This file is indexed.

/usr/share/pyshared/quixote/http_request.py is in python-quixote1 1.2-5.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
"""quixote.http_request
$HeadURL: svn+ssh://svn/repos/trunk/quixote/http_request.py $
$Id: http_request.py 25234 2004-09-30 17:36:19Z nascheme $

Provides the HTTPRequest class and related code for parsing HTTP
requests, such as the FileUpload class.

Derived from Zope's HTTPRequest module (hence the different
copyright and license from the rest of Quixote).
"""

##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################

__revision__ = "$Id: http_request.py 25234 2004-09-30 17:36:19Z nascheme $"

import re
import time
import urlparse, urllib
from cgi import FieldStorage
from types import ListType

from quixote.http_response import HTTPResponse
from quixote.html import html_quote


# Various regexes for parsing specific bits of HTTP, all from RFC 2616.

# These are needed by 'get_encoding()', to parse the "Accept-Encoding"
# header.  LWS is linear whitespace; the latter two assume that LWS
# has been removed.
_http_lws_re = re.compile("(\r\n)?[ \t]+")
_http_list_re = re.compile(r",+")
_http_encoding_re = re.compile(r"([^;]+)(;q=([\d.]+))?$")

# These are needed by 'guess_browser_version()', for parsing the
# "User-Agent" header.
#   token = 1*<any CHAR except CTLs or separators>
#   CHAR = any 7-bit US ASCII character (0-127)
#   separators are  ( ) < > @ , ; : \ " / [ ] ? = { }
#
# The user_agent RE is a simplification; it only looks for one "product",
# possibly followed by a comment.
_http_token_pat = r'[^\x00-\x20\(\)\<\>\@\,\;\:\\\"\/\[\]\?\=\{\}\x7F-\xFF]+'
_http_product_pat = r'(%s)(?:/(%s))?' % (_http_token_pat, _http_token_pat)
_http_product_re = re.compile(_http_product_pat)
_comment_delim_re = re.compile(r';\s*')


def get_content_type(environ):
    ctype = environ.get("CONTENT_TYPE")
    if ctype:
        return ctype.split(";")[0]
    else:
        return None


class HTTPRequest:
    """
    Model a single HTTP request and all associated data: environment
    variables, form variables, cookies, etc.

    To access environment variables associated with the request, use
    get_environ(): eg. request.get_environ('SERVER_PORT', 80).

    To access form variables, use get_form_var(), eg.
    request.get_form_var("name").

    To access cookies, use get_cookie().

    Various bits and pieces of the requested URL can be accessed with
    get_url(), get_path(), get_server()

    The HTTPResponse object corresponding to this request is available
    in the 'response' attribute.  This is rarely needed: eg. to send an
    error response, you should raise one of the exceptions in errors.py;
    to send a redirect, you should use the request's redirect() method,
    which lets you specify relative URLs.  However, if you need to tweak
    the response object in other ways, you can do so via 'response'.
    Just keep in mind that Quixote discards the original response object
    when handling an exception.
    """

    def __init__(self, stdin, environ, content_type=None):
        self.stdin = stdin
        self.environ = environ
        if content_type is None:
            self.content_type = get_content_type(environ)
        else:
            self.content_type = content_type
        self.form = {}
        self.session = None
        self.response = HTTPResponse()
        self.start_time = None

        # The strange treatment of SERVER_PORT_SECURE is because IIS
        # sets this environment variable to "0" for non-SSL requests
        # (most web servers -- well, Apache at least -- simply don't set
        # it in that case).
        if (environ.get('HTTPS', 'off').lower() == 'on' or
            environ.get('SERVER_PORT_SECURE', '0') != '0'):
            self.scheme = "https"
        else:
            self.scheme = "http"

        k = self.environ.get('HTTP_COOKIE', '')
        if k:
            self.cookies = parse_cookie(k)
        else:
            self.cookies = {}

        # IIS breaks PATH_INFO because it leaves in the path to
        # the script, so SCRIPT_NAME is "/cgi-bin/q.py" and PATH_INFO
        # is "/cgi-bin/q.py/foo/bar".  The following code fixes
        # PATH_INFO to the expected value "/foo/bar".
        web_server = environ.get('SERVER_SOFTWARE', 'unknown')
        if web_server.find('Microsoft-IIS') != -1:
            script = environ['SCRIPT_NAME']
            path = environ['PATH_INFO']
            if path.startswith(script):
                path = path[len(script):]
                self.environ['PATH_INFO'] = path

    def add_form_value(self, key, value):
        if self.form.has_key(key):
            found = self.form[key]
            if type(found) is ListType:
                found.append(value)
            else:
                found = [found, value]
                self.form[key] = found
        else:
            self.form[key] = value

    def process_inputs(self):
        """Process request inputs.
        """
        self.start_time = time.time()
        if self.get_method() != 'GET':
            # Avoid consuming the contents of stdin unless we're sure
            # there's actually form data.
            if self.content_type == "multipart/form-data":
                raise RuntimeError(
                    "cannot handle multipart/form-data requests")
            elif self.content_type == "application/x-www-form-urlencoded":
                fp = self.stdin
            else:
                return
        else:
            fp = None

        fs = FieldStorage(fp=fp, environ=self.environ, keep_blank_values=1)
        if fs.list:
            for item in fs.list:
                self.add_form_value(item.name, item.value)

    def get_header(self, name, default=None):
        """get_header(name : string, default : string = None) -> string

        Return the named HTTP header, or an optional default argument
        (or None) if the header is not found.  Note that both original
        and CGI-ified header names are recognized, e.g. 'Content-Type',
        'CONTENT_TYPE' and 'HTTP_CONTENT_TYPE' should all return the
        Content-Type header, if available.
        """
        environ = self.environ
        name = name.replace("-", "_").upper()
        val = environ.get(name)
        if val is not None:
            return val
        if name[:5] != 'HTTP_':
            name = 'HTTP_' + name
        return environ.get(name, default)

    def get_cookie(self, cookie_name, default=None):
        return self.cookies.get(cookie_name, default)

    def get_form_var(self, var_name, default=None):
        return self.form.get(var_name, default)

    def get_method(self):
        """Returns the HTTP method for this request
        """
        return self.environ.get('REQUEST_METHOD', 'GET')

    def formiter(self):
        return self.form.iteritems()

    def get_scheme(self):
        return self.scheme

    # The following environment variables are useful for reconstructing
    # the original URL, all of which are specified by CGI 1.1:
    #
    #   SERVER_NAME            "www.example.com"
    #   SCRIPT_NAME            "/q"
    #   PATH_INFO              "/debug/dump_sessions"
    #   QUERY_STRING           "session_id=10.27.8.40...."

    def get_server(self):
        """get_server() -> string

        Return the server name with an optional port number, eg.
        "www.example.com" or "foo.bar.com:8000".
        """
        http_host = self.environ.get("HTTP_HOST")
        if http_host:
            return http_host
        server_name = self.environ["SERVER_NAME"].strip()
        server_port = self.environ.get("SERVER_PORT")
        if (not server_port or
            (self.get_scheme() == "http" and server_port == "80") or
            (self.get_scheme() == "https" and server_port == "443")):
            return server_name
        else:
            return server_name + ":" + server_port

    def get_path(self, n=0):
        """get_path(n : int = 0) -> string

        Return the path of the current request, chopping off 'n' path
        components from the right.  Eg. if the path is "/bar/baz/qux",
        n=0 would return "/bar/baz/qux" and n=2 would return "/bar".
        Note that the query string, if any, is not included.

        A path with a trailing slash should just be considered as having
        an empty last component.  Eg. if the path is "/bar/baz/", then:
          get_path(0) == "/bar/baz/"
          get_path(1) == "/bar/baz"
          get_path(2) == "/bar"

        If 'n' is negative, then components from the left of the path
        are returned.  Continuing the above example,
          get_path(-1) = "/bar"
          get_path(-2) = "/bar/baz"
          get_path(-3) = "/bar/baz/"

        Raises ValueError if absolute value of n is larger than the number of
        path components."""

        path_info = self.environ.get('PATH_INFO', '')
        path = self.environ['SCRIPT_NAME'] + path_info
        if n == 0:
            return path
        else:
            path_comps = path.split('/')
            if abs(n) > len(path_comps)-1:
                raise ValueError, "n=%d too big for path '%s'" % (n, path)
            if n > 0:
                return '/'.join(path_comps[:-n])
            elif n < 0:
                return '/'.join(path_comps[:-n+1])
            else:
                assert 0, "Unexpected value for n (%s)" % n

    def get_url(self, n=0):
        """get_url(n : int = 0) -> string

        Return the URL of the current request, chopping off 'n' path
        components from the right.  Eg. if the URL is
        "http://foo.com/bar/baz/qux", n=2 would return
        "http://foo.com/bar".  Does not include the query string (if
        any).
        """
        return "%s://%s%s" % (self.get_scheme(), self.get_server(),
                              urllib.quote(self.get_path(n)))

    def get_environ(self, key, default=None):
        """get_environ(key : string) -> string

        Fetch a CGI environment variable from the request environment.
        See http://hoohoo.ncsa.uiuc.edu/cgi/env.html
        for the variables specified by the CGI standard.
        """
        return self.environ.get(key, default)

    def get_encoding(self, encodings):
        """get_encoding(encodings : [string]) -> string

        Parse the "Accept-encoding" header. 'encodings' is a list of
        encodings supported by the server sorted in order of preference.
        The return value is one of 'encodings' or None if the client
        does not accept any of the encodings.
        """
        accept_encoding = self.get_header("accept-encoding") or ""
        found_encodings = self._parse_pref_header(accept_encoding)
        if found_encodings:
            for encoding in encodings:
                if found_encodings.has_key(encoding):
                    return encoding
        return None

    def get_accepted_types(self):
        """get_accepted_types() : {string:float}
        Return a dictionary mapping MIME types the client will accept
        to the corresponding quality value (1.0 if no value was specified).
        """
        accept_types = self.environ.get('HTTP_ACCEPT', "")
        return self._parse_pref_header(accept_types)


    def _parse_pref_header(self, S):
        """_parse_pref_header(S:string) : {string:float}
        Parse a list of HTTP preferences (content types, encodings) and
        return a dictionary mapping strings to the quality value.
        """

        found = {}
        # remove all linear whitespace
        S = _http_lws_re.sub("", S)
        for coding in _http_list_re.split(S):
            m = _http_encoding_re.match(coding)
            if m:
                encoding = m.group(1).lower()
                q = m.group(3) or 1.0
                try:
                    q = float(q)
                except ValueError:
                    continue
                if encoding == "*":
                    continue # stupid, ignore it
                if q > 0:
                    found[encoding] = q
        return found


    def dump_html(self):
        row_fmt=('<tr valign="top"><th align="left">%s</th><td>%s</td></tr>')
        lines = ["<h3>form</h3>",
                 "<table>"]

        for k,v in self.form.items():
            lines.append(row_fmt % (html_quote(k), html_quote(v)))
        lines += ["</table>",
                  "<h3>cookies</h3>",
                  "<table>"]
        for k,v in self.cookies.items():
            lines.append(row_fmt % (html_quote(k), html_quote(v)))

        lines += ["</table>",
                  "<h3>environ</h3>"
                  "<table>"]
        for k,v in self.environ.items():
            lines.append(row_fmt % (html_quote(k), html_quote(str(v))))
        lines.append("</table>")

        return "\n".join(lines)

    def dump(self):
        result=[]
        row='%-15s %s'

        result.append("Form:")
        L = self.form.items() ; L.sort()
        for k,v in L:
            result.append(row % (k,v))

        result.append("")
        result.append("Cookies:")
        L = self.cookies.items() ; L.sort()
        for k,v in L:
            result.append(row % (k,v))


        result.append("")
        result.append("Environment:")
        L = self.environ.items() ; L.sort()
        for k,v in L:
            result.append(row % (k,v))
        return "\n".join(result)

    def guess_browser_version(self):
        """guess_browser_version() -> (name : string, version : string)

        Examine the User-agent request header to try to figure out what
        the current browser is.  Returns either (name, version) where
        each element is a string, (None, None) if we couldn't parse the
        User-agent header at all, or (name, None) if we got the name but
        couldn't figure out the version.

        Handles Microsoft's little joke of pretending to be Mozilla,
        eg. if the "User-Agent" header is
          Mozilla/5.0 (compatible; MSIE 5.5)
        returns ("MSIE", "5.5").  Konqueror does the same thing, and
        it's handled the same way.
        """
        ua = self.get_header('user-agent')
        if ua is None:
            return (None, None)

        # The syntax for "User-Agent" in RFC 2616 is fairly simple:
        #
        #  User-Agent      = "User-Agent" ":" 1*( product | comment )
        #  product         = token ["/" product-version ]
        #  product-version = token
        #  comment         = "(" *( ctext | comment ) ")"
        #  ctext           = <any TEXT excluding "(" and ")">
        #  token           = 1*<any CHAR except CTLs or tspecials>
        #  tspecials       = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" |
        #                    "\" | <"> | "/" | "[" | "]" | "?" | "=" | "{" |
        #                    "}" | SP | HT
        #
        # This function handles the most-commonly-used subset of this syntax,
        # namely
        #   User-Agent = "User-Agent" ":" product 1*SP [comment]
        # ie. one product string followed by an optional comment;
        # anything after that first comment is ignored.  This should be
        # enough to distinguish Mozilla/Netscape, MSIE, Opera, and
        # Konqueror.

        m = _http_product_re.match(ua)
        if not m:
            import sys
            sys.stderr.write("couldn't parse User-Agent header: %r\n" % ua)
            return (None, None)

        name, version = m.groups()
        ua = ua[m.end():].lstrip()

        if ua.startswith('('):
            # we need to handle nested comments since MSIE uses them
            depth = 1
            chars = []
            for c in ua[1:]:
                if c == '(':
                    depth += 1
                elif c == ')':
                    depth -= 1
                    if depth == 0:
                        break
                elif depth == 1:
                    # nested comments are discarded
                    chars.append(c)
            comment = ''.join(chars)
        else:
            comment = ''
        if comment:
            comment_chunks = _comment_delim_re.split(comment)
        else:
            comment_chunks = []

        if ("compatible" in comment_chunks and
            len(comment_chunks) > 1 and comment_chunks[1]):
            # A-ha!  Someone is kidding around, pretending to be what
            # they are not.  Most likely MSIE masquerading as Mozilla,
            # but lots of other clients (eg. Konqueror) do the same.
            real_ua = comment_chunks[1]
            if "/" in real_ua:
                (name, version) = real_ua.split("/", 1)
            else:
                if real_ua.startswith("MSIE") and ' ' in real_ua:
                    (name, version) = real_ua.split(" ", 1)
                else:
                    name = real_ua
                    version = None
            return (name, version)

        # Either nobody is pulling our leg, or we didn't find anything
        # that looks vaguely like a user agent in the comment.  So use
        # what we found outside the comment, ie. what the spec says we
        # should use (sigh).
        return (name, version)

    # guess_browser_version ()

    def redirect(self, location, permanent=0):
        """redirect(location : string, permanent : boolean = false)
           -> string

        Create a redirection response.  If the location is relative, then it
        will automatically be made absolute.  The return value is an HTML
        document indicating the new URL (useful if the client browser does
        not honor the redirect).
        """
        location = urlparse.urljoin(self.get_url(), location)
        return self.response.redirect(location, permanent)


_qparm_re = re.compile(r'([\0- ]*'
                       r'([^\0- ;,=\"]+)="([^"]*)"'
                       r'([\0- ]*[;,])?[\0- ]*)')
_parm_re = re.compile(r'([\0- ]*'
                      r'([^\0- ;,="]+)=([^\0- ;,"]*)'
                      r'([\0- ]*[;,])?[\0- ]*)')

def parse_cookie(text):
    result = {}

    pos = 0
    while 1:
        mq = _qparm_re.match(text, pos)
        m = _parm_re.match(text, pos)
        if mq is not None:
            # Match quoted correct cookies
            name = mq.group(2)
            value = mq.group(3)
            pos = mq.end()
        elif m is not None:
            # Match evil MSIE cookies ;)
            name = m.group(2)
            value = m.group(3)
            pos = m.end()
        else:
            # this may be an invalid cookie.
            # We'll simply bail without raising an error
            # if the cookie is invalid.
            return result

        if not result.has_key(name):
            result[name] = value

    return result