/usr/share/pyshared/dhm/strtools.py is in python-dhm 0.6-3build1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | # strtools.py
#
# Copyright 2003 Wichert Akkerman <wichert@deephackmode.org>
#
# This file is free software; you can redistribute it and/or modify it
# under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# Calculate shared library dependencies
"""String handling tools
Utility functions for some standard string processing tasks
"""
__docformat__ = "epytext en"
import codecs, re
class TokenizeError(Exception):
"""Tokenizer error class"""
pass
def Tokenize(str, whitespace=" \t\r\n", quotes="\"", escapes="\\"):
"""String tokenizer
This function tokenizes a string while taking quotation and
escaping into account.
>>> import dhm.strtools
>>> dhm.strtools.Tokenize("this is a test")
['this', 'is', 'a', 'test']
>>> dhm.strtools.Tokenize("this \"is a\" test")
['this', 'is a', 'test']
>>> dhm.strtools.Tokenize("this \\\"is\\\" a test")
['this', '"is"', 'a', 'test']
>>> dhm.strtools.Tokenize("this \"is a test")
Traceback (most recent call last):
File "<stdin>", line 1, in ?
File "/usr/local/lib/python2.2/site-packages/dhm/strtools.py", line 80, in Tokenize
raise TokenizeError, "Unexpected end of string in quoted text"
dhm.strtools.TokenizeError: Unexecpted end of string in quoted text
@param str: string to tokenize
@type str: string
@param whitespace: whitespace characters seperating tokens
@type whitespace: string
@param quotes: legal quoting characters
@type quotes: string
@param escapes: characters which can escape quoting characters
@type escapes: string
@return: list of tokens
@rtype: sequence of strings
"""
(buffer, tokens, curtoken, quote)=(str, [], None, None)
try:
while buffer:
if buffer[0]==quote:
quote=None
elif (quote==None) and (buffer[0] in quotes):
quote=buffer[0]
elif buffer[0] in whitespace:
if quote!=None:
curtoken+=buffer[0]
else:
tokens.append(curtoken)
curtoken=None
while buffer[1] in whitespace:
buffer=buffer[1:]
elif buffer[0] in escapes:
if curtoken==None:
curtoken=buffer[1]
else:
curtoken+=buffer[1]
buffer=buffer[1:]
else:
if curtoken==None:
curtoken=buffer[0]
else:
curtoken+=buffer[0]
buffer=buffer[1:]
except IndexError:
raise TokenizeError, "Unexpected end of string"
if quote:
raise TokenizeError, "Unexpected end of string in quoted text"
if curtoken!=None:
tokens.append(curtoken)
return tokens
def RegexFilter(regexp, *args):
"""Extract regexp matches from a string.
Its can be useful to extract certain parts of a string based on
a regular expression. This function automates that task.
>>> import strtools
>>> strtools.RegexFilter("([^=]*)=(.*)", "username=wichert", "# a comment", "password=secret")
[('username', 'wichert'), ('password', 'secret')]
@param regexp: regular expression to look for
@type regexp: string
@param *args: strings to filter
@type *args: string argument list
@return: selected data
@rtype: list of list of matched strings
"""
lst=[]
matcher=re.compile(regexp)
for str in args:
mo=matcher.search(str)
if mo:
lst.append(mo.groups())
return lst
def CodecFile(fo, encoding="utf=8"):
"""Return a new file object for a special codec.
This function wraps a file object in a StreamReaderWriter of
a specific encoding. This is especially useful if you want to read
data in a different encoding than the default ASCII.
@param fo: file to wrap
@type fo: file object instange
@param encoding: name of the encoding to use
@type encoding: string
@return: file object with proper encoding
@rype: file instance
"""
(e,d,sr,sw)=codecs.lookup(encoding)
srw=codecs.StreamReaderWriter(fo, sr, sw, "strict")
srw.encoding=encoding
return srw
|