This file is indexed.

/usr/share/pyshared/twill/utils.py is in python-twill 0.9-3.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
"""
Various ugly utility functions for twill.

Apart from various simple utility functions, twill's robust parsing
code is implemented in the ConfigurableParsingFactory class.
"""

from cStringIO import StringIO
import os
import base64

import subprocess

import mechanize
import mechanize
from mechanize._util import time
from mechanize._http import HTTPRefreshProcessor
from mechanize import BrowserStateError
from errors import TwillException

class ResultWrapper:
    """
    Deal with mechanize/urllib2/whatever results, and present them in a
    unified form.  Returned by 'journey'-wrapped functions.
    """
    def __init__(self, http_code, url, page):
        if http_code is not None:
            self.http_code = int(http_code)
        else:
            self.http_code = 200
        self.url = url
        self.page = page

    def get_url(self):
        return self.url

    def get_http_code(self):
        return self.http_code

    def get_page(self):
        return self.page

def trunc(s, length):
    """
    Truncate a string s to length length, by cutting off the last 
    (length-4) characters and replacing them with ' ...'
    """
    if not s:
        return ''
    
    if len(s) > length:
        return s[:length-4] + ' ...'
    
    return s

def print_form(n, f, OUT):
    """
    Pretty-print the given form, assigned # n.
    """
    if f.name:
        print>>OUT, '\nForm name=%s (#%d)' % (f.name, n + 1)
    else:
        print>>OUT, '\nForm #%d' % (n + 1,)

    if f.controls:
        print>>OUT, "## ## __Name__________________ __Type___ __ID________ __Value__________________"


    submit_indices = {}
    n = 1
    for c in f.controls:
        if c.is_of_kind('clickable'):
            submit_indices[c] = n
            n += 1
            
    clickies = [c for c in f.controls if c.is_of_kind('clickable')]
    nonclickies = [c for c in f.controls if c not in clickies]

    for n, field in enumerate(f.controls):
        if hasattr(field, 'items'):
            items = [ i.name for i in field.items ]
            value_displayed = "%s of %s" % (field.value, items)
        else:
            value_displayed = "%s" % (field.value,)

        if field.is_of_kind('clickable'):
            submit_index = "%-2s" % (submit_indices[field],)
        else:
            submit_index = "  "
        strings = ("%-2s" % (n + 1,),
                   submit_index,
                   "%-24s %-9s" % (trunc(str(field.name), 24),
                                   trunc(field.type, 9)),
                   "%-12s" % (trunc(field.id or "(None)", 12),),
                   trunc(value_displayed, 40),
                   )
        for s in strings:
            print>>OUT, s,
        print>>OUT, ''

    print ''

def make_boolean(value):
    """
    Convert the input value into a boolean like so:
    
    >> make_boolean('true')
    True
    >> make_boolean('false')
    False
    >> make_boolean('1')
    True
    >> make_boolean('0')
    False
    >> make_boolean('+')
    True
    >> make_boolean('-')
    False
    """
    value = str(value)
    value = value.lower().strip()

    # true/false
    if value in ('true', 'false'):
        if value == 'true':
            return True
        return False

    # 0/nonzero
    try:
        ival = int(value)
        return bool(ival)
    except ValueError:
        pass

    # +/-
    if value in ('+', '-'):
        if value == '+':
            return True
        return False

    # on/off
    if value in ('on', 'off'):
        if value == 'on':
            return True
        return False

    raise TwillException("unable to convert '%s' into true/false" % (value,))

def set_form_control_value(control, val):
    """
    Helper function to deal with setting form values on checkboxes, lists etc.
    """
    if isinstance(control, mechanize.CheckboxControl):
        try:
            checkbox = control.get()
            checkbox.selected = make_boolean(val)
            return
        except mechanize.AmbiguityError:
            # if there's more than one checkbox, use the behaviour for
            # mechanize.ListControl, below.
            pass
            
    if isinstance(control, mechanize.ListControl):
        #
        # for ListControls (checkboxes, multiselect, etc.) we first need
        # to find the right *value*.  Then we need to set it +/-.
        #

        # figure out if we want to *select* it, or if we want to *deselect*
        # it (flag T/F).  By default (no +/-) select...
        
        if val.startswith('-'):
            val = val[1:]
            flag = False
        else:
            flag = True
            if val.startswith('+'):
                val = val[1:]

        # now, select the value.

        try:
            item = control.get(name=val)
        except mechanize.ItemNotFoundError:
            try:
                item = control.get(label=val)
            except mechanize.AmbiguityError:
                raise mechanize.ItemNotFoundError('multiple matches to value/label "%s" in list control' % (val,))
            except mechanize.ItemNotFoundError:
                raise mechanize.ItemNotFoundError('cannot find value/label "%s" in list control' % (val,))

        if flag:
            item.selected = 1
        else:
            item.selected = 0
    else:
        control.value = val

def _all_the_same_submit(matches):
    """
    Utility function to check to see if a list of controls all really
    belong to the same control: for use with checkboxes, hidden, and
    submit buttons.
    """
    name = None
    value = None
    for match in matches:
        if match.type not in ['submit', 'hidden']:
            return False
        if name is None:
            name = match.name
            value = match.value
        else:
            if match.name != name or match.value!= value:
                return False
    return True

def _all_the_same_checkbox(matches):
    """
    Check whether all these controls are actually the the same
    checkbox.

    Hidden controls can combine with checkboxes, to allow form
    processors to ensure a False value is returned even if user
    does not check the checkbox. Without the hidden control, no
    value would be returned.
    """
    name = None
    for match in matches:
        if match.type not in ['checkbox', 'hidden']:
            return False
        if name is None:
            name = match.name
        else:
            if match.name != name:
                return False
    return True

def unique_match(matches):
    return len(matches) == 1 or \
           _all_the_same_checkbox(matches) or \
           _all_the_same_submit(matches)

#
# stuff to run 'tidy'...
#

_tidy_cmd = ["tidy", "-q", "-ashtml"]
_tidy_exists = True

def run_tidy(html):
    """
    Run the 'tidy' command-line program on the given HTML string.

    Return a 2-tuple (output, errors).  (None, None) will be returned if
    'tidy' doesn't exist or otherwise fails.
    """
    global _tidy_cmd, _tidy_exists

    from commands import _options
    require_tidy = _options.get('require_tidy')

    if not _tidy_exists:
        if require_tidy:
            raise TwillException("tidy does not exist and require_tidy is set")
        return (None, None)
    
    #
    # run the command, if we think it exists
    #
    
    clean_html = None
    if _tidy_exists:
        try:
            process = subprocess.Popen(_tidy_cmd, stdin=subprocess.PIPE,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE, bufsize=0,
                                       shell=False)
        
            (stdout, stderr) = process.communicate(html)

            clean_html = stdout
            errors = stderr
        except OSError:
            _tidy_exists = False

    errors = None
    if require_tidy and clean_html is None:
        raise TwillException("tidy does not exist and require_tidy is set")

    return (clean_html, errors)

class ConfigurableParsingFactory(mechanize.Factory):
    """
    A factory that listens to twill config options regarding parsing.

    First: clean up passed-in HTML using tidy?
    Second: parse using the regular parser, or BeautifulSoup?
    Third: should we fail on, or ignore, parse errors?
    """
    
    def __init__(self):
        self.basic_factory = mechanize.DefaultFactory()
        self.soup_factory = mechanize.RobustFactory()

        self.set_response(None)

    def set_request_class(self, request_class):
        self.basic_factory.set_request_class(request_class)
        self.soup_factory.set_request_class(request_class)

    def set_response(self, response):
        if not response:
            self.factory = None
            self._orig_html = self._html = self._url = None
            return

        ###

        if self.use_BS():
            self.factory = self.soup_factory
        else:
            self.factory = self.basic_factory
        cleaned_response = self._cleanup_html(response)
        self.factory.set_response(cleaned_response)

    def links(self):
        return self.factory.links()
    
    def forms(self):
        return self.factory.forms()

    def get_global_form(self):
        return self.factory.global_form
    global_form = property(get_global_form)

    def _get_title(self):
        return self.factory.title
    title = property(_get_title)

    def _get_encoding(self):
        return self.factory.encoding
    encoding = property(_get_encoding)

    def _get_is_html(self):
        return self.factory.is_html
    is_html = property(_get_is_html)

    def _cleanup_html(self, response):
        response.seek(0)
        self._orig_html = response.read()
        self._url = response.geturl()
        response.seek(0)

        self._html = self._orig_html

        from twill.commands import _options
        use_tidy = _options.get('use_tidy')
        if use_tidy:
            (new_html, errors) = run_tidy(self._html)
            if new_html:
                self._html = new_html

        return mechanize.make_response(self._html, response._headers.items(),
                                       response._url, response.code,
                                       response.msg)
                                       
    def use_BS(self):
        from twill.commands import _options
        flag = _options.get('use_BeautifulSoup')

        return flag

###

class FixedHTTPBasicAuthHandler(mechanize.HTTPBasicAuthHandler):
    """
    Fix a bug that exists through Python 2.4 (but NOT in 2.5!)
    """
    def retry_http_basic_auth(self, host, req, realm):
        user,pw = self.passwd.find_user_password(realm, req.get_full_url())
        # ----------------------------------------------^^^^^^^^^^^^^^^^^^ CTB
        if pw is not None:
            raw = "%s:%s" % (user, pw)
            auth = 'Basic %s' % base64.encodestring(raw).strip()
            if req.headers.get(self.auth_header, None) == auth:
                return None
            req.add_header(self.auth_header, auth)
            return self.parent.open(req)
        else:
            return None
    

###

_debug_print_refresh = False
class FunctioningHTTPRefreshProcessor(HTTPRefreshProcessor):
    """
    Fix an issue where the 'content' component of the http-equiv=refresh
    tag may not contain 'url='.  CTB hack.
    """
    def http_response(self, request, response):
        from twill.commands import OUT, _options
        do_refresh = _options.get('acknowledge_equiv_refresh')
        
        code, msg, hdrs = response.code, response.msg, response.info()

        if code == 200 and hdrs.has_key("refresh") and do_refresh:
            refresh = hdrs.getheaders("refresh")[0]
            
            if _debug_print_refresh:
                print>>OUT, "equiv-refresh DEBUG: code 200, hdrs has 'refresh'"
                print>>OUT, "equiv-refresh DEBUG: refresh header is", refresh
                
            i = refresh.find(";")
            if i != -1:
                pause, newurl_spec = refresh[:i], refresh[i+1:]
                pause = int(pause)

                if _debug_print_refresh:
                    print>>OUT, "equiv-refresh DEBUG: pause:", pause
                    print>>OUT, "equiv-refresh DEBUG: new url:", newurl_spec
                
                j = newurl_spec.find("=")
                if j != -1:
                    newurl = newurl_spec[j+1:]
                else:
                    newurl = newurl_spec

                if _debug_print_refresh:
                    print>>OUT, "equiv-refresh DEBUG: final url:", newurl

                print>>OUT, "Following HTTP-EQUIV=REFRESH to %s" % (newurl,)
                    
                if (self.max_time is None) or (pause <= self.max_time):
                    if pause != 0 and 0:  # CTB hack! ==#  and self.honor_time:
                        time.sleep(pause)
                    hdrs["location"] = newurl
                    # hardcoded http is NOT a bug
                    response = self.parent.error(
                        "http", request, response,
                        "refresh", msg, hdrs)

        return response

    https_response = http_response

####

class HistoryStack(mechanize._mechanize.History):
    def __len__(self):
        return len(self._history)
    def __getitem__(self, i):
        return self._history[i]
    
####

def _is_valid_filename(f):
    return not (f.endswith('~') or f.endswith('.bak') or f.endswith('.old'))

def gather_filenames(arglist):
    """
    Collect script files from within directories.
    """
    l = []

    for filename in arglist:
        if os.path.isdir(filename):
            thislist = []
            for (dirpath, dirnames, filenames) in os.walk(filename):
                for f in filenames:
                    if _is_valid_filename(f):
                        f = os.path.join(dirpath, f)
                        thislist.append(f)
                        
            thislist.sort()
            l.extend(thislist)
        else:
            l.append(filename)

    return l