This file is indexed.

/usr/share/pyshared/supybot/utils/str.py is in supybot 0.83.4.1.ds-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
###
# Copyright (c) 2002-2005, Jeremiah Fincher
# Copyright (c) 2008-2009, James Vega
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#   * Redistributions of source code must retain the above copyright notice,
#     this list of conditions, and the following disclaimer.
#   * Redistributions in binary form must reproduce the above copyright notice,
#     this list of conditions, and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#   * Neither the name of the author of this software nor the name of
#     contributors to this software may be used to endorse or promote products
#     derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###

"""
Simple utility functions related to strings.
"""

import re
import new
import sys
import string
import textwrap

from iter import all, any
from structures import TwoWayDictionary

curry = new.instancemethod
chars = string.maketrans('', '')

def rsplit(s, sep=None, maxsplit=-1):
    """Equivalent to str.split, except splitting from the right."""
    if sys.version_info < (2, 4, 0):
        if sep is not None:
            sep = sep[::-1]
        L = s[::-1].split(sep, maxsplit)
        L.reverse()
        return [s[::-1] for s in L]
    else:
        return s.rsplit(sep, maxsplit)

def normalizeWhitespace(s):
    """Normalizes the whitespace in a string; \s+ becomes one space."""
    return ' '.join(s.split())

def distance(s, t):
    """Returns the levenshtein edit distance between two strings."""
    n = len(s)
    m = len(t)
    if n == 0:
        return m
    elif m == 0:
        return n
    d = []
    for i in xrange(n+1):
        d.append([])
        for j in xrange(m+1):
            d[i].append(0)
            d[0][j] = j
        d[i][0] = i
    for i in xrange(1, n+1):
        cs = s[i-1]
        for j in xrange(1, m+1):
            ct = t[j-1]
            cost = int(cs != ct)
            d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+cost)
    return d[n][m]

_soundextrans = string.maketrans(string.ascii_uppercase,
                                 '01230120022455012623010202')
_notUpper = chars.translate(chars, string.ascii_uppercase)
def soundex(s, length=4):
    """Returns the soundex hash of a given string."""
    s = s.upper() # Make everything uppercase.
    s = s.translate(chars, _notUpper) # Delete non-letters.
    if not s:
        raise ValueError, 'Invalid string for soundex: %s'
    firstChar = s[0] # Save the first character.
    s = s.translate(_soundextrans) # Convert to soundex numbers.
    s = s.lstrip(s[0]) # Remove all repeated first characters.
    L = [firstChar]
    for c in s:
        if c != L[-1]:
            L.append(c)
    L = [c for c in L if c != '0'] + (['0']*(length-1))
    s = ''.join(L)
    return length and s[:length] or s.rstrip('0')

def dqrepr(s):
    """Returns a repr() of s guaranteed to be in double quotes."""
    # The wankers-that-be decided not to use double-quotes anymore in 2.3.
    # return '"' + repr("'\x00" + s)[6:]
    return '"%s"' % s.encode('string_escape').replace('"', '\\"')

def quoted(s):
    """Returns a quoted s."""
    return '"%s"' % s

_openers = '{[(<'
_closers = '}])>'
def _getSep(s, allowBraces=False):
    if len(s) < 2:
        raise ValueError, 'string given to _getSep is too short: %r' % s
    if allowBraces:
        braces = _closers
    else:
        braces = _openers + _closers
    if s.startswith('m') or s.startswith('s'):
        separator = s[1]
    else:
        separator = s[0]
    if separator.isalnum() or separator in braces:
        raise ValueError, \
              'Invalid separator: separator must not be alphanumeric or in ' \
              '"%s"' % braces
    return separator

def perlReToPythonRe(s):
    """Converts a string representation of a Perl regular expression (i.e.,
    m/^foo$/i or /foo|bar/) to a Python regular expression.
    """
    opener = closer = _getSep(s, True)
    if opener in '{[(<':
        closer = _closers[_openers.index(opener)]
    opener = re.escape(opener)
    closer = re.escape(closer)
    matcher = re.compile(r'm?%s((?:\\.|[^\\])*)%s(.*)' % (opener, closer))
    try:
        (regexp, flags) = matcher.match(s).groups()
    except AttributeError: # Unpack list of wrong size.
        raise ValueError, 'Must be of the form m/.../ or /.../'
    regexp = regexp.replace('\\'+opener, opener)
    if opener != closer:
        regexp = regexp.replace('\\'+closer, closer)
    flag = 0
    try:
        for c in flags.upper():
            flag |= getattr(re, c)
    except AttributeError:
        raise ValueError, 'Invalid flag: %s' % c
    try:
        return re.compile(regexp, flag)
    except re.error, e:
        raise ValueError, str(e)

def perlReToReplacer(s):
    """Converts a string representation of a Perl regular expression (i.e.,
    s/foo/bar/g or s/foo/bar/i) to a Python function doing the equivalent
    replacement.
    """
    sep = _getSep(s)
    escaped = re.escape(sep)
    matcher = re.compile(r's%s((?:\\.|[^\\])*)%s((?:\\.|[^\\])*)%s(.*)'
                         % (escaped, escaped, escaped))
    try:
        (regexp, replace, flags) = matcher.match(s).groups()
    except AttributeError: # Unpack list of wrong size.
        raise ValueError, 'Must be of the form s/.../.../'
    regexp = regexp.replace('\x08', r'\b')
    replace = replace.replace('\\'+sep, sep)
    for i in xrange(10):
        replace = replace.replace(chr(i), r'\%s' % i)
    g = False
    if 'g' in flags:
        g = True
        flags = filter('g'.__ne__, flags)
    r = perlReToPythonRe(sep.join(('', regexp, flags)))
    if g:
        return curry(r.sub, replace)
    else:
        return lambda s: r.sub(replace, s, 1)

_perlVarSubstituteRe = re.compile(r'\$\{([^}]+)\}|\$([a-zA-Z][a-zA-Z0-9]*)')
def perlVariableSubstitute(vars, text):
    def replacer(m):
        (braced, unbraced) = m.groups()
        var = braced or unbraced
        try:
            x = vars[var]
            if callable(x):
                return x()
            else:
                return str(x)
        except KeyError:
            if braced:
                return '${%s}' % braced
            else:
                return '$' + unbraced
    return _perlVarSubstituteRe.sub(replacer, text)

def commaAndify(seq, comma=',', And='and'):
    """Given a a sequence, returns an English clause for that sequence.

    I.e., given [1, 2, 3], returns '1, 2, and 3'
    """
    L = list(seq)
    if len(L) == 0:
        return ''
    elif len(L) == 1:
        return ''.join(L) # We need this because it raises TypeError.
    elif len(L) == 2:
        L.insert(1, And)
        return ' '.join(L)
    else:
        L[-1] = '%s %s' % (And, L[-1])
        sep = '%s ' % comma
        return sep.join(L)

_unCommaTheRe = re.compile(r'(.*),\s*(the)$', re.I)
def unCommaThe(s):
    """Takes a string of the form 'foo, the' and turns it into 'the foo'."""
    m = _unCommaTheRe.match(s)
    if m is not None:
        return '%s %s' % (m.group(2), m.group(1))
    else:
        return s

def ellipsisify(s, n):
    """Returns a shortened version of s.  Produces up to the first n chars at
    the nearest word boundary.
    """
    if len(s) <= n:
        return s
    else:
        return (textwrap.wrap(s, n-3)[0] + '...')

plurals = TwoWayDictionary({})
def matchCase(s1, s2):
    """Matches the case of s1 in s2"""
    if s1.isupper():
        return s2.upper()
    else:
        L = list(s2)
        for (i, char) in enumerate(s1[:len(s2)]):
            if char.isupper():
                L[i] = L[i].upper()
        return ''.join(L)

consonants = 'bcdfghjklmnpqrstvwxz'
_pluralizeRegex = re.compile('[%s]y$' % consonants)
def pluralize(s):
    """Returns the plural of s.  Put any exceptions to the general English
    rule of appending 's' in the plurals dictionary.
    """
    lowered = s.lower()
    # Exception dictionary
    if lowered in plurals:
        return matchCase(s, plurals[lowered])
    # Words ending with 'ch', 'sh' or 'ss' such as 'punch(es)', 'fish(es)
    # and miss(es)
    elif any(lowered.endswith, ['x', 'ch', 'sh', 'ss']):
        return matchCase(s, s+'es')
    # Words ending with a consonant followed by a 'y' such as
    # 'try (tries)' or 'spy (spies)'
    elif _pluralizeRegex.search(lowered):
        return matchCase(s, s[:-1] + 'ies')
    # In all other cases, we simply add an 's' to the base word
    else:
        return matchCase(s, s+'s')

_depluralizeRegex = re.compile('[%s]ies' % consonants)
def depluralize(s):
    """Returns the singular of s."""
    lowered = s.lower()
    if lowered in plurals:
        return matchCase(s, plurals[lowered])
    elif any(lowered.endswith, ['ches', 'shes', 'sses']):
        return s[:-2]
    elif re.search(_depluralizeRegex, lowered):
        return s[:-3] + 'y'
    else:
        if lowered.endswith('s'):
            return s[:-1] # Chop off 's'.
        else:
            return s # Don't know what to do.

def nItems(n, item, between=None):
    """Works like this:

    >>> nItems(1, 'clock')
    '1 clock'

    >>> nItems(10, 'clock')
    '10 clocks'

    >>> nItems(10, 'clock', between='grandfather')
    '10 grandfather clocks'
    """
    assert isinstance(n, int) or isinstance(n, long), \
           'The order of the arguments to nItems changed again, sorry.'
    if between is None:
        if n != 1:
            return format('%s %p', n, item)
        else:
            return format('%s %s', n, item)
    else:
        if n != 1:
            return format('%s %s %p', n, between, item)
        else:
            return format('%s %s %s', n, between, item)

def ordinal(i):
    """Returns i + the ordinal indicator for the number.

    Example: ordinal(3) => '3rd'
    """
    i = int(i)
    if i % 100 in (11,12,13):
        return '%sth' % i
    ord = 'th'
    test = i % 10
    if test == 1:
        ord = 'st'
    elif test == 2:
        ord = 'nd'
    elif test == 3:
        ord = 'rd'
    return '%s%s' % (i, ord)

def be(i):
    """Returns the form of the verb 'to be' based on the number i."""
    if i == 1:
        return 'is'
    else:
        return 'are'

def has(i):
    """Returns the form of the verb 'to have' based on the number i."""
    if i == 1:
        return 'has'
    else:
        return 'have'

def toBool(s):
    s = s.strip().lower()
    if s in ('true', 'on', 'enable', 'enabled', '1'):
        return True
    elif s in ('false', 'off', 'disable', 'disabled', '0'):
        return False
    else:
        raise ValueError, 'Invalid string for toBool: %s' % quoted(s)

# When used with Supybot, this is overriden when supybot.conf is loaded
def timestamp(t):
    if t is None:
        t = time.time()
    return time.ctime(t)

_formatRe = re.compile('%((?:\d+)?\.\d+f|[bfhiLnpqrstu%])')
def format(s, *args, **kwargs):
    """w00t.

    %: literal %.
    i: integer
    s: string
    f: float
    r: repr
    b: form of the verb 'to be' (takes an int)
    h: form of the verb 'to have' (takes an int)
    L: commaAndify (takes a list of strings or a tuple of ([strings], and))
    p: pluralize (takes a string)
    q: quoted (takes a string)
    n: nItems (takes a 2-tuple of (n, item) or a 3-tuple of (n, between, item))
    t: time, formatted (takes an int)
    u: url, wrapped in braces (this should be configurable at some point)
    """
    args = list(args)
    args.reverse() # For more efficient popping.
    def sub(match):
        char = match.group(1)
        if char == 's':
            return str(args.pop())
        elif char == 'i':
            # XXX Improve me!
            return str(args.pop())
        elif char.endswith('f'):
            return ('%'+char) % args.pop()
        elif char == 'b':
            return be(args.pop())
        elif char == 'h':
            return has(args.pop())
        elif char == 'L':
            t = args.pop()
            if isinstance(t, list):
                return commaAndify(t)
            elif isinstance(t, tuple) and len(t) == 2:
                if not isinstance(t[0], list):
                    raise ValueError, \
                          'Invalid list for %%L in format: %s' % t
                if not isinstance(t[1], basestring):
                    raise ValueError, \
                          'Invalid string for %%L in format: %s' % t
                return commaAndify(t[0], And=t[1])
            else:
                raise ValueError, 'Invalid value for %%L in format: %s' % t
        elif char == 'p':
            return pluralize(args.pop())
        elif char == 'q':
            return quoted(args.pop())
        elif char == 'r':
            return repr(args.pop())
        elif char == 'n':
            t = args.pop()
            if not isinstance(t, (tuple, list)):
                raise ValueError, 'Invalid value for %%n in format: %s' % t
            if len(t) == 2:
                return nItems(*t)
            elif len(t) == 3:
                return nItems(t[0], t[2], between=t[1])
            else:
                raise ValueError, 'Invalid value for %%n in format: %s' % t
        elif char == 't':
            return timestamp(args.pop())
        elif char == 'u':
            return '<%s>' % args.pop()
        elif char == '%':
            return '%'
        else:
            raise ValueError, 'Invalid char in sub (in format).'
    try:
        return _formatRe.sub(sub, s)
    except IndexError:
        raise ValueError, 'Extra format chars in format spec: %r' % s

# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: