/usr/share/pyshared/landscape/lib/fetch.py is in landscape-common 12.04.3-0ubuntu1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 | import os
import sys
from optparse import OptionParser
from StringIO import StringIO
from twisted.internet.threads import deferToThread
from twisted.internet.defer import DeferredList
class FetchError(Exception):
pass
class HTTPCodeError(FetchError):
def __init__(self, http_code, body):
self.http_code = http_code
self.body = body
def __str__(self):
return "Server returned HTTP code %d" % self.http_code
def __repr__(self):
return "<HTTPCodeError http_code=%d>" % self.http_code
class PyCurlError(FetchError):
def __init__(self, error_code, message):
self.error_code = error_code
self._message = message
def __str__(self):
return "Error %d: %s" % (self.error_code, self.message)
def __repr__(self):
return "<PyCurlError args=(%d, '%s')>" % (self.error_code,
self.message)
@property
def message(self):
return self._message
def fetch(url, post=False, data="", headers={}, cainfo=None, curl=None,
connect_timeout=30, total_timeout=600, insecure=False):
"""Retrieve a URL and return the content.
@param url: The url to be fetched.
@param post: If true, the POST method will be used (defaults to GET).
@param data: Data to be sent to the server as the POST content.
@param headers: Dictionary of header => value entries to be used
on the request.
@param cainfo: Path to the file with CA certificates.
@param insecure: If true, perform curl using insecure option which will
not attempt to verify authenticity of the peer's
certificate. (Used during autodiscovery)
"""
import pycurl
output = StringIO(data)
input = StringIO()
if curl is None:
curl = pycurl.Curl()
if post:
curl.setopt(pycurl.POST, True)
if data:
curl.setopt(pycurl.POSTFIELDSIZE, len(data))
curl.setopt(pycurl.READFUNCTION, output.read)
if cainfo and url.startswith("https:"):
curl.setopt(pycurl.CAINFO, cainfo)
if headers:
curl.setopt(pycurl.HTTPHEADER,
["%s: %s" % pair for pair in sorted(headers.iteritems())])
if insecure:
curl.setopt(pycurl.SSL_VERIFYPEER, False)
curl.setopt(pycurl.URL, str(url))
curl.setopt(pycurl.FOLLOWLOCATION, True)
curl.setopt(pycurl.MAXREDIRS, 5)
curl.setopt(pycurl.CONNECTTIMEOUT, connect_timeout)
curl.setopt(pycurl.LOW_SPEED_LIMIT, 1)
curl.setopt(pycurl.LOW_SPEED_TIME, total_timeout)
curl.setopt(pycurl.NOSIGNAL, 1)
curl.setopt(pycurl.WRITEFUNCTION, input.write)
curl.setopt(pycurl.DNS_CACHE_TIMEOUT, 0)
curl.setopt(pycurl.ENCODING, "gzip,deflate")
try:
curl.perform()
except pycurl.error, e:
raise PyCurlError(e.args[0], e.args[1])
body = input.getvalue()
http_code = curl.getinfo(pycurl.HTTP_CODE)
if http_code != 200:
raise HTTPCodeError(http_code, body)
return body
def test(args):
parser = OptionParser()
parser.add_option("--post", action="store_true")
parser.add_option("--data", default="")
parser.add_option("--cainfo")
options, (url,) = parser.parse_args(args)
print fetch(url, post=options.post, data=options.data,
cainfo=options.cainfo)
def fetch_async(*args, **kwargs):
"""Retrieve a URL asynchronously.
@return: A C{Deferred} resulting in the URL content.
"""
return deferToThread(fetch, *args, **kwargs)
def fetch_many_async(urls, callback=None, errback=None, **kwargs):
"""
Retrieve a list of URLs asynchronously.
@param callback: Optionally, a function that will be fired one time for
each successful URL, and will be passed its content and the URL itself.
@param errback: Optionally, a function that will be fired one time for each
failing URL, and will be passed the failure and the URL itself.
@return: A C{DeferredList} whose callback chain will be fired as soon as
all downloads have terminated. If an error occurs, the errback chain
of the C{DeferredList} will be fired immediatly.
"""
results = []
for url in urls:
result = fetch_async(url, **kwargs)
if callback:
result.addCallback(callback, url)
if errback:
result.addErrback(errback, url)
results.append(result)
return DeferredList(results, fireOnOneErrback=True, consumeErrors=True)
def url_to_filename(url, directory=None):
"""Return the last component of the given C{url}.
@param url: The URL to get the filename from.
@param directory: Optionally a path to prepend to the returned filename.
@note: Any trailing slash in the C{url} will be removed
"""
filename = url.rstrip("/").split("/")[-1]
if directory is not None:
filename = os.path.join(directory, filename)
return filename
def fetch_to_files(urls, directory, logger=None, **kwargs):
"""
Retrieve a list of URLs and save their content as files in a directory.
@param urls: The list URLs to fetch.
@param directory: The directory to save the files to, the name of the file
will equal the last fragment of the URL.
@param logger: Optional function to be used to log errors for failed URLs.
"""
def write(data, url):
filename = url_to_filename(url, directory=directory)
fd = open(filename, "w")
fd.write(data)
fd.close()
def log_error(failure, url):
if logger:
logger("Couldn't fetch file from %s (%s)" % (
url, str(failure.value)))
return failure
return fetch_many_async(urls, callback=write, errback=log_error, **kwargs)
if __name__ == "__main__":
test(sys.argv[1:])
|