/usr/share/pyshared/quixote/http_request.py is in python-quixote 2.7~b2-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 | """quixote.http_request
Provides the HTTPRequest class and related code for parsing HTTP
requests, such as the Upload class.
"""
import re
import string
import os
import tempfile
import urllib
import rfc822
from StringIO import StringIO
import quixote
from quixote.http_response import HTTPResponse
from quixote.errors import RequestError
# Various regexes for parsing specific bits of HTTP, all from RFC 2616.
# These are needed by 'get_encoding()', to parse the "Accept-Encoding"
# header. LWS is linear whitespace; the latter two assume that LWS
# has been removed.
_http_lws_re = re.compile(r"(\r\n)?[ \t]+")
_http_list_re = re.compile(r",+")
_http_encoding_re = re.compile(r"([^;]+)(;q=([\d.]+))?$")
# These are needed by 'guess_browser_version()', for parsing the
# "User-Agent" header.
# token = 1*<any CHAR except CTLs or separators>
# CHAR = any 7-bit US ASCII character (0-127)
# separators are ( ) < > @ , ; : \ " / [ ] ? = { }
#
# The user_agent RE is a simplification; it only looks for one "product",
# possibly followed by a comment.
_http_token_pat = r"[\w!#$%&'*+.^`|~-]+"
_http_product_pat = r'(%s)(?:/(%s))?' % (_http_token_pat, _http_token_pat)
_http_product_re = re.compile(_http_product_pat)
_comment_delim_re = re.compile(r';\s*')
def get_content_type(environ):
ctype = environ.get("CONTENT_TYPE")
if ctype:
return ctype.split(";")[0]
else:
return None
def _decode_string(s, charset):
if charset == 'iso-8859-1' == quixote.DEFAULT_CHARSET:
# To avoid breaking applications that are not Unicode-safe, return
# a str instance in this case.
return s
try:
return s.decode(charset)
except LookupError:
raise RequestError('unknown charset %r' % charset)
except UnicodeDecodeError:
raise RequestError('invalid %r encoded string' % charset)
def parse_header(line):
"""Parse a Content-type like header.
Return the main content-type and a dictionary of options.
"""
plist = map(lambda x: x.strip(), line.split(';'))
key = plist.pop(0).lower()
pdict = {}
for p in plist:
i = p.find('=')
if i >= 0:
name = p[:i].strip().lower()
value = p[i+1:].strip()
if len(value) >= 2 and value[0] == value[-1] == '"':
value = value[1:-1]
pdict[name] = value
return key, pdict
def parse_content_disposition(full_cdisp):
(cdisp, cdisp_params) = parse_header(full_cdisp)
name = cdisp_params.get('name')
if not (cdisp == 'form-data' and name):
raise RequestError('expected Content-Disposition: form-data '
'with a "name" parameter: got %r' % full_cdisp)
return (name, cdisp_params.get('filename'))
def parse_query(qs, charset):
"""(qs: string) -> {key:string, string|[string]}
Parse a query given as a string argument and return a dictionary.
"""
fields = {}
for chunk in filter(None, qs.split('&')):
if '=' not in chunk:
name = chunk
value = ''
else:
name, value = chunk.split('=', 1)
name = urllib.unquote(name.replace('+', ' '))
value = urllib.unquote(value.replace('+', ' '))
name = _decode_string(name, charset)
value = _decode_string(value, charset)
_add_field_value(fields, name, value)
return fields
def _add_field_value(fields, name, value):
if name in fields:
values = fields[name]
if not isinstance(values, list):
fields[name] = values = [values]
values.append(value)
else:
fields[name] = value
class HTTPRequest:
"""
Model a single HTTP request and all associated data: environment
variables, form variables, cookies, etc.
To access environment variables associated with the request, use
get_environ(): eg. request.get_environ('SERVER_PORT', 80).
To access form variables, use get_field(), eg.
request.get_field("name").
To access cookies, use get_cookie().
Various bits and pieces of the requested URL can be accessed with
get_url(), get_path(), get_server()
The HTTPResponse object corresponding to this request is available
in the 'response' attribute. This is rarely needed: eg. to send an
error response, you should raise one of the exceptions in errors.py;
to send a redirect, you should use the quixote.redirect() function,
which lets you specify relative URLs. However, if you need to tweak
the response object in other ways, you can do so via 'response'.
Just keep in mind that Quixote discards the original response object
when handling an exception.
"""
DEFAULT_CHARSET = None # defaults to quixote.DEFAULT_CHARSET
def __init__(self, stdin, environ):
self.stdin = stdin
self.environ = environ
self.form = {}
self.session = None
self.charset = self.DEFAULT_CHARSET or quixote.DEFAULT_CHARSET
self.response = HTTPResponse()
# The strange treatment of SERVER_PORT_SECURE is because IIS
# sets this environment variable to "0" for non-SSL requests
# (most web servers -- well, Apache at least -- simply don't set
# it in that case).
if (environ.get('HTTPS', 'off').lower() in ('on', 'yes', '1') or
environ.get('SERVER_PORT_SECURE', '0') != '0'):
self.scheme = "https"
else:
self.scheme = "http"
k = self.environ.get('HTTP_COOKIE', '')
if k:
self.cookies = parse_cookies(k)
else:
self.cookies = {}
# IIS breaks PATH_INFO because it leaves in the path to
# the script, so SCRIPT_NAME is "/cgi-bin/q.py" and PATH_INFO
# is "/cgi-bin/q.py/foo/bar". The following code fixes
# PATH_INFO to the expected value "/foo/bar".
web_server = environ.get('SERVER_SOFTWARE', 'unknown')
if web_server.find('Microsoft-IIS') != -1:
script = environ['SCRIPT_NAME']
path = environ['PATH_INFO']
if path.startswith(script):
path = path[len(script):]
self.environ['PATH_INFO'] = path
def process_inputs(self):
query = self.get_query()
if query:
self.form.update(parse_query(query, self.charset))
length = self.environ.get('CONTENT_LENGTH') or "0"
try:
length = int(length)
except ValueError:
raise RequestError('invalid content-length header')
ctype = self.environ.get("CONTENT_TYPE")
if ctype:
ctype, ctype_params = parse_header(ctype)
if ctype == 'application/x-www-form-urlencoded':
self._process_urlencoded(length, ctype_params)
elif ctype == 'multipart/form-data':
self._process_multipart(length, ctype_params)
def _process_urlencoded(self, length, params):
query = self.stdin.read(length)
if len(query) != length:
raise RequestError('unexpected end of request body')
# Use the declared charset if it's provided (most browser's don't
# provide it to avoid breaking old HTTP servers).
charset = params.get('charset', self.charset)
self.form.update(parse_query(query, charset))
def _process_multipart(self, length, params):
boundary = params.get('boundary')
if not boundary:
raise RequestError('multipart/form-data missing boundary')
charset = params.get('charset')
mimeinput = MIMEInput(self.stdin, boundary, length)
try:
for line in mimeinput.readpart():
pass # discard lines up to first boundary
while mimeinput.moreparts():
self._process_multipart_body(mimeinput, charset)
except EOFError:
raise RequestError('unexpected end of multipart/form-data')
def _process_multipart_body(self, mimeinput, charset):
headers = StringIO()
lines = mimeinput.readpart()
for line in lines:
headers.write(line)
if line == '\r\n':
break
headers.seek(0)
headers = rfc822.Message(headers)
ctype, ctype_params = parse_header(headers.get('content-type', ''))
if ctype and 'charset' in ctype_params:
charset = ctype_params['charset']
cdisp, cdisp_params = parse_header(headers.get('content-disposition',
''))
if not cdisp:
raise RequestError('expected Content-Disposition header')
name = cdisp_params.get('name')
filename = cdisp_params.get('filename')
if not (cdisp == 'form-data' and name):
raise RequestError('expected Content-Disposition: form-data'
'with a "name" parameter: got %r' %
headers.get('content-disposition', ''))
# FIXME: should really to handle Content-Transfer-Encoding and other
# MIME complexity here. See RFC2048 for the full horror story.
if filename:
# it might be large file upload so use a temporary file
upload = Upload(filename, ctype, charset)
upload.receive(lines)
_add_field_value(self.form, name, upload)
else:
value = _decode_string(''.join(lines), charset or self.charset)
_add_field_value(self.form, name, value)
def get_header(self, name, default=None):
"""get_header(name : string, default : string = None) -> string
Return the named HTTP header, or an optional default argument
(or None) if the header is not found. Note that both original
and CGI-ified header names are recognized, e.g. 'Content-Type',
'CONTENT_TYPE' and 'HTTP_CONTENT_TYPE' should all return the
Content-Type header, if available.
"""
environ = self.environ
name = name.replace("-", "_").upper()
val = environ.get(name)
if val is not None:
return val
if name[:5] != 'HTTP_':
name = 'HTTP_' + name
return environ.get(name, default)
def get_cookie(self, cookie_name, default=None):
return self.cookies.get(cookie_name, default)
def get_cookies(self):
return self.cookies
def get_field(self, name, default=None):
return self.form.get(name, default)
def get_fields(self):
return self.form
def get_method(self):
"""Returns the HTTP method for this request
"""
return self.environ.get('REQUEST_METHOD', 'GET')
def formiter(self):
return self.form.iteritems()
def get_scheme(self):
return self.scheme
# The following environment variables are useful for reconstructing
# the original URL, all of which are specified by CGI 1.1:
#
# SERVER_NAME "www.example.com"
# SCRIPT_NAME "/q"
# PATH_INFO "/debug/dump_sessions"
# QUERY_STRING "session_id=10.27.8.40...."
def get_server(self):
"""get_server() -> string
Return the server name with an optional port number, eg.
"www.example.com" or "foo.bar.com:8000".
"""
http_host = self.environ.get("HTTP_HOST")
if http_host:
return http_host
server_name = self.environ["SERVER_NAME"].strip()
server_port = self.environ.get("SERVER_PORT")
if (not server_port or
(self.get_scheme() == "http" and server_port == "80") or
(self.get_scheme() == "https" and server_port == "443")):
return server_name
else:
return server_name + ":" + server_port
def get_path(self, n=0):
"""get_path(n : int = 0) -> string
Return the path of the current request, chopping off 'n' path
components from the right. Eg. if the path is "/bar/baz/qux",
n=0 would return "/bar/baz/qux" and n=2 would return "/bar".
Note that the query string, if any, is not included.
A path with a trailing slash should just be considered as having
an empty last component. Eg. if the path is "/bar/baz/", then:
get_path(0) == "/bar/baz/"
get_path(1) == "/bar/baz"
get_path(2) == "/bar"
If 'n' is negative, then components from the left of the path
are returned. Continuing the above example,
get_path(-1) = "/bar"
get_path(-2) = "/bar/baz"
get_path(-3) = "/bar/baz/"
Raises ValueError if absolute value of n is larger than the number of
path components."""
path_info = self.environ.get('PATH_INFO', '')
path = self.environ['SCRIPT_NAME'] + path_info
if n == 0:
return path
else:
path_comps = path.split('/')
if abs(n) > len(path_comps)-1:
raise ValueError, "n=%d too big for path '%s'" % (n, path)
if n > 0:
return '/'.join(path_comps[:-n])
elif n < 0:
return '/'.join(path_comps[:-n+1])
else:
assert 0, "Unexpected value for n (%s)" % n
def get_query(self):
"""() -> string
Return the query component of the URL.
"""
return self.environ.get('QUERY_STRING', '')
def get_path_query(self):
"""() -> string
Return the path and the query string (if any).
"""
path = self.get_path()
query = self.get_query()
if query:
path += '?' + query
return path
def get_url(self, n=0):
"""get_url(n : int = 0) -> string
Return the URL of the current request, chopping off 'n' path
components from the right. Eg. if the URL is
"http://foo.com/bar/baz/qux", n=2 would return
"http://foo.com/bar". Does not include the query string (if
any).
"""
return "%s://%s%s" % (self.get_scheme(), self.get_server(),
urllib.quote(self.get_path(n)))
def get_environ(self, key, default=None):
"""get_environ(key : string) -> string
Fetch a CGI environment variable from the request environment.
See http://hoohoo.ncsa.uiuc.edu/cgi/env.html
for the variables specified by the CGI standard.
"""
return self.environ.get(key, default)
def get_encoding(self, encodings):
"""get_encoding(encodings : [string]) -> string
Parse the "Accept-encoding" header. 'encodings' is a list of
encodings supported by the server sorted in order of preference.
The return value is one of 'encodings' or None if the client
does not accept any of the encodings.
"""
accept_encoding = self.get_header("accept-encoding") or ""
found_encodings = self._parse_pref_header(accept_encoding)
if found_encodings:
for encoding in encodings:
if encoding in found_encodings:
return encoding
return None
def get_accepted_types(self):
"""get_accepted_types() : {string:float}
Return a dictionary mapping MIME types the client will accept
to the corresponding quality value (1.0 if no value was specified).
"""
accept_types = self.environ.get('HTTP_ACCEPT', "")
return self._parse_pref_header(accept_types)
def _parse_pref_header(self, S):
"""_parse_pref_header(S:string) : {string:float}
Parse a list of HTTP preferences (content types, encodings) and
return a dictionary mapping strings to the quality value.
"""
found = {}
# remove all linear whitespace
S = _http_lws_re.sub("", S)
for coding in _http_list_re.split(S):
m = _http_encoding_re.match(coding)
if m:
encoding = m.group(1).lower()
q = m.group(3) or 1.0
try:
q = float(q)
except ValueError:
continue
if encoding == "*":
continue # stupid, ignore it
if q > 0:
found[encoding] = q
return found
def dump(self):
result=[]
row='%-15s %s'
result.append("Form:")
L = self.form.items() ; L.sort()
for k,v in L:
result.append(row % (k,v))
result.append("")
result.append("Cookies:")
L = self.cookies.items() ; L.sort()
for k,v in L:
result.append(row % (k,v))
result.append("")
result.append("Environment:")
L = self.environ.items() ; L.sort()
for k,v in L:
result.append(row % (k,v))
return "\n".join(result)
def guess_browser_version(self):
"""guess_browser_version() -> (name : string, version : string)
Examine the User-agent request header to try to figure out what
the current browser is. Returns either (name, version) where
each element is a string, (None, None) if we couldn't parse the
User-agent header at all, or (name, None) if we got the name but
couldn't figure out the version.
Handles Microsoft's little joke of pretending to be Mozilla,
eg. if the "User-Agent" header is
Mozilla/5.0 (compatible; MSIE 5.5)
returns ("MSIE", "5.5"). Konqueror does the same thing, and
it's handled the same way.
"""
ua = self.get_header('user-agent')
if ua is None:
return (None, None)
# The syntax for "User-Agent" in RFC 2616 is fairly simple:
#
# User-Agent = "User-Agent" ":" 1*( product | comment )
# product = token ["/" product-version ]
# product-version = token
# comment = "(" *( ctext | comment ) ")"
# ctext = <any TEXT excluding "(" and ")">
# token = 1*<any CHAR except CTLs or tspecials>
# tspecials = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" |
# "\" | <"> | "/" | "[" | "]" | "?" | "=" | "{" |
# "}" | SP | HT
#
# This function handles the most-commonly-used subset of this syntax,
# namely
# User-Agent = "User-Agent" ":" product 1*SP [comment]
# ie. one product string followed by an optional comment;
# anything after that first comment is ignored. This should be
# enough to distinguish Mozilla/Netscape, MSIE, Opera, and
# Konqueror.
m = _http_product_re.match(ua)
if not m:
import sys
sys.stderr.write("couldn't parse User-Agent header: %r\n" % ua)
return (None, None)
name, version = m.groups()
ua = ua[m.end():].lstrip()
if ua.startswith('('):
# we need to handle nested comments since MSIE uses them
depth = 1
chars = []
for c in ua[1:]:
if c == '(':
depth += 1
elif c == ')':
depth -= 1
if depth == 0:
break
elif depth == 1:
# nested comments are discarded
chars.append(c)
comment = ''.join(chars)
else:
comment = ''
if comment:
comment_chunks = _comment_delim_re.split(comment)
else:
comment_chunks = []
if ("compatible" in comment_chunks and
len(comment_chunks) > 1 and comment_chunks[1]):
# A-ha! Someone is kidding around, pretending to be what
# they are not. Most likely MSIE masquerading as Mozilla,
# but lots of other clients (eg. Konqueror) do the same.
real_ua = comment_chunks[1]
if "/" in real_ua:
(name, version) = real_ua.split("/", 1)
else:
if real_ua.startswith("MSIE") and ' ' in real_ua:
(name, version) = real_ua.split(" ", 1)
else:
name = real_ua
version = None
return (name, version)
# Either nobody is pulling our leg, or we didn't find anything
# that looks vaguely like a user agent in the comment. So use
# what we found outside the comment, ie. what the spec says we
# should use (sigh).
return (name, version)
# guess_browser_version ()
# See RFC 2109 for details. Note that this parser is more liberal.
_COOKIE_RE = re.compile(r"""
\s*
(?P<name>[^=;,\s]+)
\s*
(
=
\s*
(
(?P<qvalue> "(\\[\x00-\x7f] | [^"])*")
|
(?P<value> [^";,\s]*)
)
)?
\s*
[;,]?
""", re.VERBOSE)
def parse_cookies(text):
result = {}
for m in _COOKIE_RE.finditer(text):
name = m.group('name')
if name[0] == '$':
# discard, we don't handle per cookie attributes (e.g. $Path)
continue
qvalue = m.group('qvalue')
if qvalue:
value = re.sub(r'\\(.)', r'\1', qvalue)[1:-1]
else:
value = m.group('value') or ''
result[name] = value
return result
SAFE_CHARS = string.letters + string.digits + "-@&+=_., "
_safe_trans = None
def make_safe_filename(s):
global _safe_trans
if _safe_trans is None:
_safe_trans = ["_"] * 256
for c in SAFE_CHARS:
_safe_trans[ord(c)] = c
_safe_trans = "".join(_safe_trans)
return s.translate(_safe_trans)
class Upload:
r"""
Represents a single uploaded file. Uploaded files live in the
filesystem, *not* in memory.
fp
an open file containing the content of the upload. The file pointer
points to the beginning of the file
orig_filename
the complete filename supplied by the user-agent in the
request that uploaded this file. Depending on the browser,
this might have the complete path of the original file
on the client system, in the client system's syntax -- eg.
"C:\foo\bar\upload_this" or "/foo/bar/upload_this" or
"foo:bar:upload_this".
base_filename
the base component of orig_filename, shorn of MS-DOS,
Mac OS, and Unix path components and with "unsafe"
characters neutralized (see make_safe_filename())
content_type
the content type provided by the user-agent in the request
that uploaded this file.
charset
the charset provide by the user-agent
"""
def __init__(self, orig_filename, content_type=None, charset=None):
if orig_filename:
self.orig_filename = orig_filename
bspos = orig_filename.rfind("\\")
cpos = orig_filename.rfind(":")
spos = orig_filename.rfind("/")
if bspos != -1: # eg. "\foo\bar" or "D:\ding\dong"
filename = orig_filename[bspos+1:]
elif cpos != -1: # eg. "C:foo" or ":ding:dong:foo"
filename = orig_filename[cpos+1:]
elif spos != -1: # eg. "foo/bar/baz" or "/tmp/blah"
filename = orig_filename[spos+1:]
else:
filename = orig_filename
self.base_filename = make_safe_filename(filename)
else:
self.orig_filename = None
self.base_filename = None
self.content_type = content_type
self.charset = charset
self.fp = None
def receive(self, lines):
self.fp = tempfile.TemporaryFile("w+b")
for line in lines:
self.fp.write(line)
self.fp.seek(0)
def __str__(self):
return str(self.orig_filename)
def __repr__(self):
return "<%s at %x: %s>" % (self.__class__.__name__, id(self), self)
def read(self, n):
return self.fp.read(n)
def readline(self):
return self.fp.readline()
def readlines(self):
return self.fp.readlines()
def __iter__(self):
return iter(self.fp)
def close(self):
self.fp.close()
def get_size(self):
"""Return the size of the file, in bytes.
"""
if self.fp is None:
return 0
else:
return os.fstat(self.fp.fileno()).st_size
class LineInput:
r"""
A wrapper for an input stream that has the following properties:
* lines are terminated by \r\n
* lines shorter than 'maxlength' are always returned unbroken
* lines longer than 'maxlength' are broken but the pair of
characters \r\n are never split
* no more than 'length' characters are read from the underlying
stream
* if the underlying stream does not produce at least 'length'
characters then EOFError is raised
"""
def __init__(self, fp, length):
self.fp = fp
self.length = length
self.buf = ''
def readline(self, maxlength=4096):
# fill buffer
n = min(self.length, maxlength - len(self.buf))
if n > 0:
self.length -= n
assert self.length >= 0
chunk = self.fp.read(n)
if len(chunk) != n:
raise EOFError('unexpected end of input')
self.buf += chunk
# split into lines
buf = self.buf
i = buf.find('\r\n')
if i >= 0:
i += 2
self.buf = buf[i:]
return buf[:i]
elif buf.endswith('\r'):
# avoid splitting CR LF pairs
self.buf = '\r'
return buf[:-1]
else:
self.buf = ''
return buf
class MIMEInput:
"""
Split a MIME input stream into parts. Note that this class does not
handle headers, transfer encoding, etc.
"""
def __init__(self, fp, boundary, length):
self.lineinput = LineInput(fp, length)
self.pat = re.compile(r'--%s(--)?[ \t]*\r\n' % re.escape(boundary))
self.done = False
def moreparts(self):
"""Return true if there are more parts to be read."""
return not self.done
def readpart(self):
"""Generate all the lines up to a MIME boundary. Note that you
must exhaust the generator before calling this function again."""
assert not self.done
last_line = ''
while 1:
line = self.lineinput.readline()
if not line:
# Hit EOF -- nothing more to read. This should *not* happen
# in a well-formed MIME message.
raise EOFError('MIME boundary not found (end of input)')
if last_line.endswith('\r\n') or last_line == '':
m = self.pat.match(line)
if m:
# If we hit the boundary line, return now. Forget
# the current line *and* the CRLF ending of the
# previous line.
if m.group(1):
# hit final boundary
self.done = True
yield last_line[:-2]
return
if last_line:
yield last_line
last_line = line
|