/usr/share/pyshared/quixote/http_request.py is in python-quixote1 1.2-5.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 | """quixote.http_request
$HeadURL: svn+ssh://svn/repos/trunk/quixote/http_request.py $
$Id: http_request.py 25234 2004-09-30 17:36:19Z nascheme $
Provides the HTTPRequest class and related code for parsing HTTP
requests, such as the FileUpload class.
Derived from Zope's HTTPRequest module (hence the different
copyright and license from the rest of Quixote).
"""
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
__revision__ = "$Id: http_request.py 25234 2004-09-30 17:36:19Z nascheme $"
import re
import time
import urlparse, urllib
from cgi import FieldStorage
from types import ListType
from quixote.http_response import HTTPResponse
from quixote.html import html_quote
# Various regexes for parsing specific bits of HTTP, all from RFC 2616.
# These are needed by 'get_encoding()', to parse the "Accept-Encoding"
# header. LWS is linear whitespace; the latter two assume that LWS
# has been removed.
_http_lws_re = re.compile("(\r\n)?[ \t]+")
_http_list_re = re.compile(r",+")
_http_encoding_re = re.compile(r"([^;]+)(;q=([\d.]+))?$")
# These are needed by 'guess_browser_version()', for parsing the
# "User-Agent" header.
# token = 1*<any CHAR except CTLs or separators>
# CHAR = any 7-bit US ASCII character (0-127)
# separators are ( ) < > @ , ; : \ " / [ ] ? = { }
#
# The user_agent RE is a simplification; it only looks for one "product",
# possibly followed by a comment.
_http_token_pat = r'[^\x00-\x20\(\)\<\>\@\,\;\:\\\"\/\[\]\?\=\{\}\x7F-\xFF]+'
_http_product_pat = r'(%s)(?:/(%s))?' % (_http_token_pat, _http_token_pat)
_http_product_re = re.compile(_http_product_pat)
_comment_delim_re = re.compile(r';\s*')
def get_content_type(environ):
ctype = environ.get("CONTENT_TYPE")
if ctype:
return ctype.split(";")[0]
else:
return None
class HTTPRequest:
"""
Model a single HTTP request and all associated data: environment
variables, form variables, cookies, etc.
To access environment variables associated with the request, use
get_environ(): eg. request.get_environ('SERVER_PORT', 80).
To access form variables, use get_form_var(), eg.
request.get_form_var("name").
To access cookies, use get_cookie().
Various bits and pieces of the requested URL can be accessed with
get_url(), get_path(), get_server()
The HTTPResponse object corresponding to this request is available
in the 'response' attribute. This is rarely needed: eg. to send an
error response, you should raise one of the exceptions in errors.py;
to send a redirect, you should use the request's redirect() method,
which lets you specify relative URLs. However, if you need to tweak
the response object in other ways, you can do so via 'response'.
Just keep in mind that Quixote discards the original response object
when handling an exception.
"""
def __init__(self, stdin, environ, content_type=None):
self.stdin = stdin
self.environ = environ
if content_type is None:
self.content_type = get_content_type(environ)
else:
self.content_type = content_type
self.form = {}
self.session = None
self.response = HTTPResponse()
self.start_time = None
# The strange treatment of SERVER_PORT_SECURE is because IIS
# sets this environment variable to "0" for non-SSL requests
# (most web servers -- well, Apache at least -- simply don't set
# it in that case).
if (environ.get('HTTPS', 'off').lower() == 'on' or
environ.get('SERVER_PORT_SECURE', '0') != '0'):
self.scheme = "https"
else:
self.scheme = "http"
k = self.environ.get('HTTP_COOKIE', '')
if k:
self.cookies = parse_cookie(k)
else:
self.cookies = {}
# IIS breaks PATH_INFO because it leaves in the path to
# the script, so SCRIPT_NAME is "/cgi-bin/q.py" and PATH_INFO
# is "/cgi-bin/q.py/foo/bar". The following code fixes
# PATH_INFO to the expected value "/foo/bar".
web_server = environ.get('SERVER_SOFTWARE', 'unknown')
if web_server.find('Microsoft-IIS') != -1:
script = environ['SCRIPT_NAME']
path = environ['PATH_INFO']
if path.startswith(script):
path = path[len(script):]
self.environ['PATH_INFO'] = path
def add_form_value(self, key, value):
if self.form.has_key(key):
found = self.form[key]
if type(found) is ListType:
found.append(value)
else:
found = [found, value]
self.form[key] = found
else:
self.form[key] = value
def process_inputs(self):
"""Process request inputs.
"""
self.start_time = time.time()
if self.get_method() != 'GET':
# Avoid consuming the contents of stdin unless we're sure
# there's actually form data.
if self.content_type == "multipart/form-data":
raise RuntimeError(
"cannot handle multipart/form-data requests")
elif self.content_type == "application/x-www-form-urlencoded":
fp = self.stdin
else:
return
else:
fp = None
fs = FieldStorage(fp=fp, environ=self.environ, keep_blank_values=1)
if fs.list:
for item in fs.list:
self.add_form_value(item.name, item.value)
def get_header(self, name, default=None):
"""get_header(name : string, default : string = None) -> string
Return the named HTTP header, or an optional default argument
(or None) if the header is not found. Note that both original
and CGI-ified header names are recognized, e.g. 'Content-Type',
'CONTENT_TYPE' and 'HTTP_CONTENT_TYPE' should all return the
Content-Type header, if available.
"""
environ = self.environ
name = name.replace("-", "_").upper()
val = environ.get(name)
if val is not None:
return val
if name[:5] != 'HTTP_':
name = 'HTTP_' + name
return environ.get(name, default)
def get_cookie(self, cookie_name, default=None):
return self.cookies.get(cookie_name, default)
def get_form_var(self, var_name, default=None):
return self.form.get(var_name, default)
def get_method(self):
"""Returns the HTTP method for this request
"""
return self.environ.get('REQUEST_METHOD', 'GET')
def formiter(self):
return self.form.iteritems()
def get_scheme(self):
return self.scheme
# The following environment variables are useful for reconstructing
# the original URL, all of which are specified by CGI 1.1:
#
# SERVER_NAME "www.example.com"
# SCRIPT_NAME "/q"
# PATH_INFO "/debug/dump_sessions"
# QUERY_STRING "session_id=10.27.8.40...."
def get_server(self):
"""get_server() -> string
Return the server name with an optional port number, eg.
"www.example.com" or "foo.bar.com:8000".
"""
http_host = self.environ.get("HTTP_HOST")
if http_host:
return http_host
server_name = self.environ["SERVER_NAME"].strip()
server_port = self.environ.get("SERVER_PORT")
if (not server_port or
(self.get_scheme() == "http" and server_port == "80") or
(self.get_scheme() == "https" and server_port == "443")):
return server_name
else:
return server_name + ":" + server_port
def get_path(self, n=0):
"""get_path(n : int = 0) -> string
Return the path of the current request, chopping off 'n' path
components from the right. Eg. if the path is "/bar/baz/qux",
n=0 would return "/bar/baz/qux" and n=2 would return "/bar".
Note that the query string, if any, is not included.
A path with a trailing slash should just be considered as having
an empty last component. Eg. if the path is "/bar/baz/", then:
get_path(0) == "/bar/baz/"
get_path(1) == "/bar/baz"
get_path(2) == "/bar"
If 'n' is negative, then components from the left of the path
are returned. Continuing the above example,
get_path(-1) = "/bar"
get_path(-2) = "/bar/baz"
get_path(-3) = "/bar/baz/"
Raises ValueError if absolute value of n is larger than the number of
path components."""
path_info = self.environ.get('PATH_INFO', '')
path = self.environ['SCRIPT_NAME'] + path_info
if n == 0:
return path
else:
path_comps = path.split('/')
if abs(n) > len(path_comps)-1:
raise ValueError, "n=%d too big for path '%s'" % (n, path)
if n > 0:
return '/'.join(path_comps[:-n])
elif n < 0:
return '/'.join(path_comps[:-n+1])
else:
assert 0, "Unexpected value for n (%s)" % n
def get_url(self, n=0):
"""get_url(n : int = 0) -> string
Return the URL of the current request, chopping off 'n' path
components from the right. Eg. if the URL is
"http://foo.com/bar/baz/qux", n=2 would return
"http://foo.com/bar". Does not include the query string (if
any).
"""
return "%s://%s%s" % (self.get_scheme(), self.get_server(),
urllib.quote(self.get_path(n)))
def get_environ(self, key, default=None):
"""get_environ(key : string) -> string
Fetch a CGI environment variable from the request environment.
See http://hoohoo.ncsa.uiuc.edu/cgi/env.html
for the variables specified by the CGI standard.
"""
return self.environ.get(key, default)
def get_encoding(self, encodings):
"""get_encoding(encodings : [string]) -> string
Parse the "Accept-encoding" header. 'encodings' is a list of
encodings supported by the server sorted in order of preference.
The return value is one of 'encodings' or None if the client
does not accept any of the encodings.
"""
accept_encoding = self.get_header("accept-encoding") or ""
found_encodings = self._parse_pref_header(accept_encoding)
if found_encodings:
for encoding in encodings:
if found_encodings.has_key(encoding):
return encoding
return None
def get_accepted_types(self):
"""get_accepted_types() : {string:float}
Return a dictionary mapping MIME types the client will accept
to the corresponding quality value (1.0 if no value was specified).
"""
accept_types = self.environ.get('HTTP_ACCEPT', "")
return self._parse_pref_header(accept_types)
def _parse_pref_header(self, S):
"""_parse_pref_header(S:string) : {string:float}
Parse a list of HTTP preferences (content types, encodings) and
return a dictionary mapping strings to the quality value.
"""
found = {}
# remove all linear whitespace
S = _http_lws_re.sub("", S)
for coding in _http_list_re.split(S):
m = _http_encoding_re.match(coding)
if m:
encoding = m.group(1).lower()
q = m.group(3) or 1.0
try:
q = float(q)
except ValueError:
continue
if encoding == "*":
continue # stupid, ignore it
if q > 0:
found[encoding] = q
return found
def dump_html(self):
row_fmt=('<tr valign="top"><th align="left">%s</th><td>%s</td></tr>')
lines = ["<h3>form</h3>",
"<table>"]
for k,v in self.form.items():
lines.append(row_fmt % (html_quote(k), html_quote(v)))
lines += ["</table>",
"<h3>cookies</h3>",
"<table>"]
for k,v in self.cookies.items():
lines.append(row_fmt % (html_quote(k), html_quote(v)))
lines += ["</table>",
"<h3>environ</h3>"
"<table>"]
for k,v in self.environ.items():
lines.append(row_fmt % (html_quote(k), html_quote(str(v))))
lines.append("</table>")
return "\n".join(lines)
def dump(self):
result=[]
row='%-15s %s'
result.append("Form:")
L = self.form.items() ; L.sort()
for k,v in L:
result.append(row % (k,v))
result.append("")
result.append("Cookies:")
L = self.cookies.items() ; L.sort()
for k,v in L:
result.append(row % (k,v))
result.append("")
result.append("Environment:")
L = self.environ.items() ; L.sort()
for k,v in L:
result.append(row % (k,v))
return "\n".join(result)
def guess_browser_version(self):
"""guess_browser_version() -> (name : string, version : string)
Examine the User-agent request header to try to figure out what
the current browser is. Returns either (name, version) where
each element is a string, (None, None) if we couldn't parse the
User-agent header at all, or (name, None) if we got the name but
couldn't figure out the version.
Handles Microsoft's little joke of pretending to be Mozilla,
eg. if the "User-Agent" header is
Mozilla/5.0 (compatible; MSIE 5.5)
returns ("MSIE", "5.5"). Konqueror does the same thing, and
it's handled the same way.
"""
ua = self.get_header('user-agent')
if ua is None:
return (None, None)
# The syntax for "User-Agent" in RFC 2616 is fairly simple:
#
# User-Agent = "User-Agent" ":" 1*( product | comment )
# product = token ["/" product-version ]
# product-version = token
# comment = "(" *( ctext | comment ) ")"
# ctext = <any TEXT excluding "(" and ")">
# token = 1*<any CHAR except CTLs or tspecials>
# tspecials = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" |
# "\" | <"> | "/" | "[" | "]" | "?" | "=" | "{" |
# "}" | SP | HT
#
# This function handles the most-commonly-used subset of this syntax,
# namely
# User-Agent = "User-Agent" ":" product 1*SP [comment]
# ie. one product string followed by an optional comment;
# anything after that first comment is ignored. This should be
# enough to distinguish Mozilla/Netscape, MSIE, Opera, and
# Konqueror.
m = _http_product_re.match(ua)
if not m:
import sys
sys.stderr.write("couldn't parse User-Agent header: %r\n" % ua)
return (None, None)
name, version = m.groups()
ua = ua[m.end():].lstrip()
if ua.startswith('('):
# we need to handle nested comments since MSIE uses them
depth = 1
chars = []
for c in ua[1:]:
if c == '(':
depth += 1
elif c == ')':
depth -= 1
if depth == 0:
break
elif depth == 1:
# nested comments are discarded
chars.append(c)
comment = ''.join(chars)
else:
comment = ''
if comment:
comment_chunks = _comment_delim_re.split(comment)
else:
comment_chunks = []
if ("compatible" in comment_chunks and
len(comment_chunks) > 1 and comment_chunks[1]):
# A-ha! Someone is kidding around, pretending to be what
# they are not. Most likely MSIE masquerading as Mozilla,
# but lots of other clients (eg. Konqueror) do the same.
real_ua = comment_chunks[1]
if "/" in real_ua:
(name, version) = real_ua.split("/", 1)
else:
if real_ua.startswith("MSIE") and ' ' in real_ua:
(name, version) = real_ua.split(" ", 1)
else:
name = real_ua
version = None
return (name, version)
# Either nobody is pulling our leg, or we didn't find anything
# that looks vaguely like a user agent in the comment. So use
# what we found outside the comment, ie. what the spec says we
# should use (sigh).
return (name, version)
# guess_browser_version ()
def redirect(self, location, permanent=0):
"""redirect(location : string, permanent : boolean = false)
-> string
Create a redirection response. If the location is relative, then it
will automatically be made absolute. The return value is an HTML
document indicating the new URL (useful if the client browser does
not honor the redirect).
"""
location = urlparse.urljoin(self.get_url(), location)
return self.response.redirect(location, permanent)
_qparm_re = re.compile(r'([\0- ]*'
r'([^\0- ;,=\"]+)="([^"]*)"'
r'([\0- ]*[;,])?[\0- ]*)')
_parm_re = re.compile(r'([\0- ]*'
r'([^\0- ;,="]+)=([^\0- ;,"]*)'
r'([\0- ]*[;,])?[\0- ]*)')
def parse_cookie(text):
result = {}
pos = 0
while 1:
mq = _qparm_re.match(text, pos)
m = _parm_re.match(text, pos)
if mq is not None:
# Match quoted correct cookies
name = mq.group(2)
value = mq.group(3)
pos = mq.end()
elif m is not None:
# Match evil MSIE cookies ;)
name = m.group(2)
value = m.group(3)
pos = m.end()
else:
# this may be an invalid cookie.
# We'll simply bail without raising an error
# if the cookie is invalid.
return result
if not result.has_key(name):
result[name] = value
return result
|