/usr/lib/python2.7/dist-packages/quixote/upload.py is in python-quixote1 1.2-6.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 | """quixote.upload
$HeadURL: svn+ssh://svn/repos/trunk/quixote/upload.py $
$Id: upload.py 25234 2004-09-30 17:36:19Z nascheme $
Code for handling HTTP upload requests. Provides HTTPUploadRequest, a
subclass of HTTPRequest that is created when handling an HTTP request
whose Content-Type is "multipart/form-data". Also provides the Upload
class, which is used as the form value for "file upload" variables.
"""
__revision__ = "$Id: upload.py 25234 2004-09-30 17:36:19Z nascheme $"
import os, string
import errno
from cgi import parse_header
from rfc822 import Message
from time import time, strftime, localtime
from quixote.http_request import HTTPRequest
from quixote.errors import RequestError
from quixote.config import ConfigError
CRLF = "\r\n"
LF = "\n"
def read_mime_part(file, boundary, lines=None, ofile=None):
"""
Read lines from 'file' up to and including a MIME message boundary
derived from 'boundary'. Return true if there is no more data to be
read from 'file', ie. we hit either a MIME outer boundary or EOF.
If 'lines' is supplied, each line is stripped of line-endings and
appended to 'lines'. If 'ofile' is supplied, each line is written
to 'ofile' as-is (ie. with line-endings intact). Neither the
boundary line nor a blank line preceding it (if any) will be
saved/written. If neither 'lines' nor 'ofile' is supplied, the data
read is discarded.
"""
# Algorithm based on read_lines_to_outerboundary() in cgi.py
next = "--" + boundary
last = "--" + boundary + "--"
# XXX reading arbitrary binary data (which is possible in a file
# upload) a line-at-a-time might be problematic. Eg. I have
# observed one .GDS file in the wild where the longest "line" was
# around 1 MB. Most binary files that I looked at have reasonable
# "line" lengths though -- maximum 5-10k. However, reading in
# fixed-size chunks would make spotting the MIME boundary tricky.
# One more reason why HTTP upload is stupid.
prev_delim = ""
while 1:
line = file.readline()
# Hit EOF -- nothing more to read. (This should *not* happen
# in a well-formed MIME message, but let's assume the worst.)
if not line:
return 1
# Strip (but remember) line ending.
if line[-2:] == CRLF:
line = line[:-2]
delim = CRLF
elif line[-1:] == LF:
line = line[:-1]
delim = LF
else:
delim = ""
# If we hit the boundary line, return now. Forget the current
# line *and* the delimiter of the previous line -- in
# particular, we do not want to preserve the blank line that
# comes after an uploaded file's contents and the following
# boundary line.
if line == next: # hit boundary, but more to come
return 0
elif line == last: # final boundary -- no more to read
return 1
if lines is not None:
lines.append(line)
if ofile is not None:
ofile.write(prev_delim + line)
prev_delim = delim
SAFE_CHARS = string.letters + string.digits + "-@&+=_., "
_safe_trans = None
def make_safe(s):
global _safe_trans
if _safe_trans is None:
_safe_trans = ["_"] * 256
for c in SAFE_CHARS:
_safe_trans[ord(c)] = c
_safe_trans = "".join(_safe_trans)
return s.translate(_safe_trans)
class Upload:
"""
Represents a single uploaded file. Uploaded files live in the
filesystem, *not* in memory -- this is not a file-like object! It's
just a place to store a couple of filenames. Specifically, feel
free to access the following instance attributes:
orig_filename
the complete filename supplied by the user-agent in the
request that uploaded this file. Depending on the browser,
this might have the complete path of the original file
on the client system, in the client system's syntax -- eg.
"C:\foo\bar\upload_this" or "/foo/bar/upload_this" or
"foo:bar:upload_this".
base_filename
the base component of orig_filename, shorn of MS-DOS,
Mac OS, and Unix path components and with "unsafe"
characters neutralized (see make_safe())
tmp_filename
where you'll actually find the file on the current system
content_type
the content type provided by the user-agent in the request
that uploaded this file.
"""
def __init__(self, orig_filename, content_type=None):
if orig_filename:
self.orig_filename = orig_filename
bspos = orig_filename.rfind("\\")
cpos = orig_filename.rfind(":")
spos = orig_filename.rfind("/")
if bspos != -1: # eg. "\foo\bar" or "D:\ding\dong"
filename = orig_filename[bspos+1:]
elif cpos != -1: # eg. "C:foo" or ":ding:dong:foo"
filename = orig_filename[cpos+1:]
elif spos != -1: # eg. "foo/bar/baz" or "/tmp/blah"
filename = orig_filename[spos+1:]
else:
filename = orig_filename
self.base_filename = make_safe(filename)
else:
self.orig_filename = None
self.base_filename = None
self.content_type = content_type
self.tmp_filename = None
def __str__(self):
return str(self.orig_filename)
def __repr__(self):
return "<%s at %x: %s>" % (self.__class__.__name__, id(self), self)
def _open(self, dir):
"""
Generate a unique filename in 'dir'. Open and return a
writeable file object from it.
"""
flags = os.O_WRONLY|os.O_CREAT|os.O_EXCL
try:
flags |= os.O_BINARY # for Windows
except AttributeError:
pass
tstamp = strftime("%Y%m%d.%H%M%S", localtime(time()))
counter = 0
while 1:
filename = "upload.%s.%s" % (tstamp, counter)
filename = os.path.join(dir, filename)
try:
fd = os.open(filename, flags)
except OSError, err:
if err.errno == errno.EEXIST:
# Filename collision -- try again
counter += 1
else:
# Bomb on any other error.
raise
else:
# Opened the file just fine; it now exists so no other
# process or thread will be able to grab that filename.
break
# Wrap a file object around the file descriptor.
return (os.fdopen(fd, "wb"), filename)
def receive(self, file, boundary, dir):
(ofile, filename) = self._open(dir)
done = read_mime_part(file, boundary, ofile=ofile)
ofile.close()
self.tmp_filename = filename
return done
def get_size(self):
"""get_size() : int
Return the size of the file, measured in bytes, or None if
the file doesn't exist.
"""
stats = os.stat(self.tmp_filename)
return stats.st_size
class CountingFile:
"""A file-like object that records the number of bytes read
from the underlying file. Ignores seek(), because it's only
used by HTTPUploadRequest on an unseekable file (stdin).
"""
def __init__(self, file):
self.__file = file
self.__bytesread = 0
def read(self, nbytes):
data = self.__file.read(nbytes)
self.__bytesread += len(data)
return data
def readline(self):
line = self.__file.readline()
self.__bytesread += len(line)
return line
def get_bytesread(self):
return self.__bytesread
class HTTPUploadRequest(HTTPRequest):
"""
Represents a single HTTP request with Content-Type
"multipart/form-data", which is used for HTTP uploads. (It's
actually possible for any HTML form to specify an encoding type of
"multipart/form-data", even if there are no file uploads in that
form. In that case, you'll still get an HTTPUploadRequest object --
but since this is a subclass of HTTPRequest, that shouldn't cause
you any problems.)
When processing the upload request, any uploaded files are stored
under a temporary filename in the directory specified by the
'upload_dir' instance attribute (which is normally set, by
Publisher, from the UPLOAD_DIR configuration variable).
HTTPUploadRequest then creates an Upload object which contains the
various filenames for this upload.
Other form variables are stored as usual in the 'form' dictionary,
to be fetched later with get_form_var(). Uploaded files can also be
accessed via get_form_var(), which returns the Upload object created
at upload-time, rather than a string.
Eg. if your upload form contains this:
<input type="file" name="upload">
then, when processing the form, you might do this:
upload = request.get_form_var("upload")
after which you could open the uploaded file immediately:
file = open(upload.tmp_filename)
or move it to a more permanent home before doing anything with it:
permanent_name = os.path.join(permanent_upload_dir,
upload.base_filename)
os.rename(upload.tmp_filename, permanent_name)
"""
def __init__(self, stdin, environ, content_type=None):
HTTPRequest.__init__(self, stdin, environ, content_type)
self.upload_dir = None
self.upload_dir_mode = 0775
def set_upload_dir(self, dir, mode=None):
self.upload_dir = dir
if mode is not None:
self.upload_dir_mode = mode
def parse_content_type(self):
full_ctype = self.get_header('Content-Type')
if full_ctype is None:
raise RequestError("no Content-Type header")
(ctype, ctype_params) = parse_header(full_ctype)
boundary = ctype_params.get('boundary')
if not (ctype == "multipart/form-data" and boundary):
raise RequestError("expected Content-Type: multipart/form-data "
"with a 'boundary' parameter: got %r"
% full_ctype)
return (ctype, boundary)
def parse_content_disposition(self, full_cdisp):
(cdisp, cdisp_params) = parse_header(full_cdisp)
name = cdisp_params.get("name")
if not (cdisp == "form-data" and name):
raise RequestError("expected Content-Disposition: form-data "
"with a 'name' parameter: got %r" % full_cdisp)
return (name, cdisp_params.get("filename"))
def check_upload_dir(self):
if not os.path.isdir(self.upload_dir):
print "creating %s with mode %o" % (self.upload_dir,
self.upload_dir_mode)
os.mkdir(self.upload_dir, self.upload_dir_mode)
def handle_upload(self, name, filename, file, boundary, content_type):
if self.upload_dir is None:
raise ConfigError("upload_dir not set")
upload = Upload(filename, content_type)
self.check_upload_dir()
done = upload.receive(file, boundary, self.upload_dir)
self.add_form_value(name, upload)
return done
def handle_regular_var(self, name, file, boundary):
lines = []
done = read_mime_part(file, boundary, lines=lines)
if len(lines) == 1:
value = lines[0]
else:
value = "\n".join(lines)
self.add_form_value(name, value)
#form_vars.append((name, value))
return done
def parse_body(self, file, boundary):
total_bytes = 0 # total bytes read from 'file'
done = 0
while not done:
headers = Message(file)
cdisp = headers.get('content-disposition')
if not cdisp:
raise RequestError("expected Content-Disposition header "
"in body sub-part")
(name, filename) = self.parse_content_disposition(cdisp)
if filename:
content_type = headers.get('content-type')
done = self.handle_upload(name, filename, file,
boundary, content_type)
else:
done = self.handle_regular_var(name, file, boundary)
def check_length_read(self, file):
# Parse Content-Length header.
# XXX if we want to worry about disk free space, this should
# be done *before* parsing the body!
clen = self.get_header("Content-Length")
if clen is not None:
clen = int(clen)
total_bytes = file.get_bytesread()
if total_bytes != clen:
raise RequestError(
"upload request length mismatch: expected %d bytes, got %d"
% (clen, total_bytes))
def process_inputs(self):
self.start_time = time()
# Parse Content-Type header -- mainly to get the 'boundary'
# parameter. Barf if not there or unexpected type.
(ctype, boundary) = self.parse_content_type()
# The meat of the body starts after the first occurrence of
# the boundary, so read up to that point.
file = CountingFile(self.stdin)
read_mime_part(file, boundary)
# Parse the parts of the message, ie. the form variables. Some of
# these will presumably be "file upload" variables, so need to be
# treated specially.
self.parse_body(file, boundary)
# Ensure that we read exactly as many bytes as were promised
# by the Content-Length header.
self.check_length_read(file)
|