/usr/lib/python2.7/dist-packages/s3transfer/download.py is in python-s3transfer 0.1.13-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 | # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import logging
import os
import socket
import math
import threading
import heapq
from botocore.compat import six
from botocore.exceptions import IncompleteReadError
from urllib3.exceptions import \
ReadTimeoutError
from s3transfer.compat import SOCKET_ERROR
from s3transfer.compat import seekable
from s3transfer.exceptions import RetriesExceededError
from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG
from s3transfer.utils import random_file_extension
from s3transfer.utils import get_callbacks
from s3transfer.utils import invoke_progress_callbacks
from s3transfer.utils import calculate_range_parameter
from s3transfer.utils import FunctionContainer
from s3transfer.utils import CountCallbackInvoker
from s3transfer.utils import StreamReaderProgress
from s3transfer.utils import DeferredOpenFile
from s3transfer.tasks import Task
from s3transfer.tasks import SubmissionTask
logger = logging.getLogger(__name__)
S3_RETRYABLE_ERRORS = (
socket.timeout, SOCKET_ERROR, ReadTimeoutError, IncompleteReadError
)
class DownloadOutputManager(object):
"""Base manager class for handling various types of files for downloads
This class is typically used for the DownloadSubmissionTask class to help
determine the following:
* Provides the fileobj to write to downloads to
* Get a task to complete once everything downloaded has been written
The answers/implementations differ for the various types of file outputs
that may be accepted. All implementations must subclass and override
public methods from this class.
"""
def __init__(self, osutil, transfer_coordinator, io_executor):
self._osutil = osutil
self._transfer_coordinator = transfer_coordinator
self._io_executor = io_executor
@classmethod
def is_compatible(cls, download_target, osutil):
"""Determines if the target for the download is compatible with manager
:param download_target: The target for which the upload will write
data to.
:param osutil: The os utility to be used for the transfer
:returns: True if the manager can handle the type of target specified
otherwise returns False.
"""
raise NotImplementedError('must implement is_compatible()')
def get_download_task_tag(self):
"""Get the tag (if any) to associate all GetObjectTasks
:rtype: s3transfer.futures.TaskTag
:returns: The tag to associate all GetObjectTasks with
"""
return None
def get_fileobj_for_io_writes(self, transfer_future):
"""Get file-like object to use for io writes in the io executor
:type transfer_future: s3transfer.futures.TransferFuture
:param transfer_future: The future associated with upload request
returns: A file-like object to write to
"""
raise NotImplementedError('must implement get_fileobj_for_io_writes()')
def queue_file_io_task(self, fileobj, data, offset):
"""Queue IO write for submission to the IO executor.
This method accepts an IO executor and information about the
downloaded data, and handles submitting this to the IO executor.
This method may defer submission to the IO executor if necessary.
"""
self._transfer_coordinator.submit(
self._io_executor,
self.get_io_write_task(fileobj, data, offset)
)
def get_io_write_task(self, fileobj, data, offset):
"""Get an IO write task for the requested set of data
This task can be ran immediately or be submitted to the IO executor
for it to run.
:type fileobj: file-like object
:param fileobj: The file-like object to write to
:type data: bytes
:param data: The data to write out
:type offset: integer
:param offset: The offset to write the data to in the file-like object
:returns: An IO task to be used to write data to a file-like object
"""
return IOWriteTask(
self._transfer_coordinator,
main_kwargs={
'fileobj': fileobj,
'data': data,
'offset': offset,
}
)
def get_final_io_task(self):
"""Get the final io task to complete the download
This is needed because based on the architecture of the TransferManager
the final tasks will be sent to the IO executor, but the executor
needs a final task for it to signal that the transfer is done and
all done callbacks can be run.
:rtype: s3transfer.tasks.Task
:returns: A final task to completed in the io executor
"""
raise NotImplementedError(
'must implement get_final_io_task()')
def _get_fileobj_from_filename(self, filename):
f = DeferredOpenFile(
filename, mode='wb', open_function=self._osutil.open)
# Make sure the file gets closed and we remove the temporary file
# if anything goes wrong during the process.
self._transfer_coordinator.add_failure_cleanup(f.close)
return f
class DownloadFilenameOutputManager(DownloadOutputManager):
def __init__(self, osutil, transfer_coordinator, io_executor):
super(DownloadFilenameOutputManager, self).__init__(
osutil, transfer_coordinator, io_executor)
self._final_filename = None
self._temp_filename = None
self._temp_fileobj = None
@classmethod
def is_compatible(cls, download_target, osutil):
return isinstance(download_target, six.string_types)
def get_fileobj_for_io_writes(self, transfer_future):
fileobj = transfer_future.meta.call_args.fileobj
self._final_filename = fileobj
self._temp_filename = fileobj + os.extsep + random_file_extension()
self._temp_fileobj = self._get_temp_fileobj()
return self._temp_fileobj
def get_final_io_task(self):
# A task to rename the file from the temporary file to its final
# location is needed. This should be the last task needed to complete
# the download.
return IORenameFileTask(
transfer_coordinator=self._transfer_coordinator,
main_kwargs={
'fileobj': self._temp_fileobj,
'final_filename': self._final_filename,
'osutil': self._osutil
},
is_final=True
)
def _get_temp_fileobj(self):
f = self._get_fileobj_from_filename(self._temp_filename)
self._transfer_coordinator.add_failure_cleanup(
self._osutil.remove_file, self._temp_filename)
return f
class DownloadSeekableOutputManager(DownloadOutputManager):
@classmethod
def is_compatible(cls, download_target, osutil):
return seekable(download_target)
def get_fileobj_for_io_writes(self, transfer_future):
# Return the fileobj provided to the future.
return transfer_future.meta.call_args.fileobj
def get_final_io_task(self):
# This task will serve the purpose of signaling when all of the io
# writes have finished so done callbacks can be called.
return CompleteDownloadNOOPTask(
transfer_coordinator=self._transfer_coordinator)
class DownloadNonSeekableOutputManager(DownloadOutputManager):
def __init__(self, osutil, transfer_coordinator, io_executor,
defer_queue=None):
super(DownloadNonSeekableOutputManager, self).__init__(
osutil, transfer_coordinator, io_executor)
if defer_queue is None:
defer_queue = DeferQueue()
self._defer_queue = defer_queue
self._io_submit_lock = threading.Lock()
@classmethod
def is_compatible(cls, download_target, osutil):
return hasattr(download_target, 'write')
def get_download_task_tag(self):
return IN_MEMORY_DOWNLOAD_TAG
def get_fileobj_for_io_writes(self, transfer_future):
return transfer_future.meta.call_args.fileobj
def get_final_io_task(self):
return CompleteDownloadNOOPTask(
transfer_coordinator=self._transfer_coordinator)
def queue_file_io_task(self, fileobj, data, offset):
with self._io_submit_lock:
writes = self._defer_queue.request_writes(offset, data)
for write in writes:
data = write['data']
logger.debug("Queueing IO offset %s for fileobj: %s",
write['offset'], fileobj)
super(
DownloadNonSeekableOutputManager, self).queue_file_io_task(
fileobj, data, offset)
def get_io_write_task(self, fileobj, data, offset):
return IOStreamingWriteTask(
self._transfer_coordinator,
main_kwargs={
'fileobj': fileobj,
'data': data,
}
)
class DownloadSpecialFilenameOutputManager(DownloadNonSeekableOutputManager):
def __init__(self, osutil, transfer_coordinator, io_executor,
defer_queue=None):
super(DownloadSpecialFilenameOutputManager, self).__init__(
osutil, transfer_coordinator, io_executor, defer_queue)
self._fileobj = None
@classmethod
def is_compatible(cls, download_target, osutil):
return isinstance(download_target, six.string_types) and \
osutil.is_special_file(download_target)
def get_fileobj_for_io_writes(self, transfer_future):
filename = transfer_future.meta.call_args.fileobj
self._fileobj = self._get_fileobj_from_filename(filename)
return self._fileobj
def get_final_io_task(self):
# Make sure the file gets closed once the transfer is done.
return IOCloseTask(
transfer_coordinator=self._transfer_coordinator,
is_final=True,
main_kwargs={'fileobj': self._fileobj})
class DownloadSubmissionTask(SubmissionTask):
"""Task for submitting tasks to execute a download"""
def _get_download_output_manager_cls(self, transfer_future, osutil):
"""Retrieves a class for managing output for a download
:type transfer_future: s3transfer.futures.TransferFuture
:param transfer_future: The transfer future for the request
:type osutil: s3transfer.utils.OSUtils
:param osutil: The os utility associated to the transfer
:rtype: class of DownloadOutputManager
:returns: The appropriate class to use for managing a specific type of
input for downloads.
"""
download_manager_resolver_chain = [
DownloadSpecialFilenameOutputManager,
DownloadFilenameOutputManager,
DownloadSeekableOutputManager,
DownloadNonSeekableOutputManager,
]
fileobj = transfer_future.meta.call_args.fileobj
for download_manager_cls in download_manager_resolver_chain:
if download_manager_cls.is_compatible(fileobj, osutil):
return download_manager_cls
raise RuntimeError(
'Output %s of type: %s is not supported.' % (
fileobj, type(fileobj)))
def _submit(self, client, config, osutil, request_executor, io_executor,
transfer_future, bandwidth_limiter=None):
"""
:param client: The client associated with the transfer manager
:type config: s3transfer.manager.TransferConfig
:param config: The transfer config associated with the transfer
manager
:type osutil: s3transfer.utils.OSUtil
:param osutil: The os utility associated to the transfer manager
:type request_executor: s3transfer.futures.BoundedExecutor
:param request_executor: The request executor associated with the
transfer manager
:type io_executor: s3transfer.futures.BoundedExecutor
:param io_executor: The io executor associated with the
transfer manager
:type transfer_future: s3transfer.futures.TransferFuture
:param transfer_future: The transfer future associated with the
transfer request that tasks are being submitted for
:type bandwidth_limiter: s3transfer.bandwidth.BandwidthLimiter
:param bandwidth_limiter: The bandwidth limiter to use when
downloading streams
"""
if transfer_future.meta.size is None:
# If a size was not provided figure out the size for the
# user.
response = client.head_object(
Bucket=transfer_future.meta.call_args.bucket,
Key=transfer_future.meta.call_args.key,
**transfer_future.meta.call_args.extra_args
)
transfer_future.meta.provide_transfer_size(
response['ContentLength'])
download_output_manager = self._get_download_output_manager_cls(
transfer_future, osutil)(osutil, self._transfer_coordinator,
io_executor)
# If it is greater than threshold do a ranged download, otherwise
# do a regular GetObject download.
if transfer_future.meta.size < config.multipart_threshold:
self._submit_download_request(
client, config, osutil, request_executor, io_executor,
download_output_manager, transfer_future, bandwidth_limiter)
else:
self._submit_ranged_download_request(
client, config, osutil, request_executor, io_executor,
download_output_manager, transfer_future, bandwidth_limiter)
def _submit_download_request(self, client, config, osutil,
request_executor, io_executor,
download_output_manager, transfer_future,
bandwidth_limiter):
call_args = transfer_future.meta.call_args
# Get a handle to the file that will be used for writing downloaded
# contents
fileobj = download_output_manager.get_fileobj_for_io_writes(
transfer_future)
# Get the needed callbacks for the task
progress_callbacks = get_callbacks(transfer_future, 'progress')
# Get any associated tags for the get object task.
get_object_tag = download_output_manager.get_download_task_tag()
# Get the final io task to run once the download is complete.
final_task = download_output_manager.get_final_io_task()
# Submit the task to download the object.
self._transfer_coordinator.submit(
request_executor,
ImmediatelyWriteIOGetObjectTask(
transfer_coordinator=self._transfer_coordinator,
main_kwargs={
'client': client,
'bucket': call_args.bucket,
'key': call_args.key,
'fileobj': fileobj,
'extra_args': call_args.extra_args,
'callbacks': progress_callbacks,
'max_attempts': config.num_download_attempts,
'download_output_manager': download_output_manager,
'io_chunksize': config.io_chunksize,
'bandwidth_limiter': bandwidth_limiter
},
done_callbacks=[final_task]
),
tag=get_object_tag
)
def _submit_ranged_download_request(self, client, config, osutil,
request_executor, io_executor,
download_output_manager,
transfer_future,
bandwidth_limiter):
call_args = transfer_future.meta.call_args
# Get the needed progress callbacks for the task
progress_callbacks = get_callbacks(transfer_future, 'progress')
# Get a handle to the file that will be used for writing downloaded
# contents
fileobj = download_output_manager.get_fileobj_for_io_writes(
transfer_future)
# Determine the number of parts
part_size = config.multipart_chunksize
num_parts = int(
math.ceil(transfer_future.meta.size / float(part_size)))
# Get any associated tags for the get object task.
get_object_tag = download_output_manager.get_download_task_tag()
# Callback invoker to submit the final io task once all downloads
# are complete.
finalize_download_invoker = CountCallbackInvoker(
self._get_final_io_task_submission_callback(
download_output_manager, io_executor
)
)
for i in range(num_parts):
# Calculate the range parameter
range_parameter = calculate_range_parameter(
part_size, i, num_parts)
# Inject the Range parameter to the parameters to be passed in
# as extra args
extra_args = {'Range': range_parameter}
extra_args.update(call_args.extra_args)
finalize_download_invoker.increment()
# Submit the ranged downloads
self._transfer_coordinator.submit(
request_executor,
GetObjectTask(
transfer_coordinator=self._transfer_coordinator,
main_kwargs={
'client': client,
'bucket': call_args.bucket,
'key': call_args.key,
'fileobj': fileobj,
'extra_args': extra_args,
'callbacks': progress_callbacks,
'max_attempts': config.num_download_attempts,
'start_index': i * part_size,
'download_output_manager': download_output_manager,
'io_chunksize': config.io_chunksize,
'bandwidth_limiter': bandwidth_limiter
},
done_callbacks=[finalize_download_invoker.decrement]
),
tag=get_object_tag
)
finalize_download_invoker.finalize()
def _get_final_io_task_submission_callback(self, download_manager,
io_executor):
final_task = download_manager.get_final_io_task()
return FunctionContainer(
self._transfer_coordinator.submit, io_executor, final_task)
def _calculate_range_param(self, part_size, part_index, num_parts):
# Used to calculate the Range parameter
start_range = part_index * part_size
if part_index == num_parts - 1:
end_range = ''
else:
end_range = start_range + part_size - 1
range_param = 'bytes=%s-%s' % (start_range, end_range)
return range_param
class GetObjectTask(Task):
def _main(self, client, bucket, key, fileobj, extra_args, callbacks,
max_attempts, download_output_manager, io_chunksize,
start_index=0, bandwidth_limiter=None):
"""Downloads an object and places content into io queue
:param client: The client to use when calling GetObject
:param bucket: The bucket to download from
:param key: The key to download from
:param fileobj: The file handle to write content to
:param exta_args: Any extra arguements to include in GetObject request
:param callbacks: List of progress callbacks to invoke on download
:param max_attempts: The number of retries to do when downloading
:param download_output_manager: The download output manager associated
with the current download.
:param io_chunksize: The size of each io chunk to read from the
download stream and queue in the io queue.
:param start_index: The location in the file to start writing the
content of the key to.
:param bandwidth_limiter: The bandwidth limiter to use when throttling
the downloading of data in streams.
"""
last_exception = None
for i in range(max_attempts):
try:
response = client.get_object(
Bucket=bucket, Key=key, **extra_args)
streaming_body = StreamReaderProgress(
response['Body'], callbacks)
if bandwidth_limiter:
streaming_body = \
bandwidth_limiter.get_bandwith_limited_stream(
streaming_body, self._transfer_coordinator)
current_index = start_index
chunks = DownloadChunkIterator(streaming_body, io_chunksize)
for chunk in chunks:
# If the transfer is done because of a cancellation
# or error somewhere else, stop trying to submit more
# data to be written and break out of the download.
if not self._transfer_coordinator.done():
self._handle_io(
download_output_manager, fileobj, chunk,
current_index
)
current_index += len(chunk)
else:
return
return
except S3_RETRYABLE_ERRORS as e:
logger.debug("Retrying exception caught (%s), "
"retrying request, (attempt %s / %s)", e, i,
max_attempts, exc_info=True)
last_exception = e
# Also invoke the progress callbacks to indicate that we
# are trying to download the stream again and all progress
# for this GetObject has been lost.
invoke_progress_callbacks(
callbacks, start_index - current_index)
continue
raise RetriesExceededError(last_exception)
def _handle_io(self, download_output_manager, fileobj, chunk, index):
download_output_manager.queue_file_io_task(fileobj, chunk, index)
class ImmediatelyWriteIOGetObjectTask(GetObjectTask):
"""GetObjectTask that immediately writes to the provided file object
This is useful for downloads where it is known only one thread is
downloading the object so there is no reason to go through the
overhead of using an IO queue and executor.
"""
def _handle_io(self, download_output_manager, fileobj, chunk, index):
task = download_output_manager.get_io_write_task(fileobj, chunk, index)
task()
class IOWriteTask(Task):
def _main(self, fileobj, data, offset):
"""Pulls off an io queue to write contents to a file
:param f: The file handle to write content to
:param data: The data to write
:param offset: The offset to write the data to.
"""
fileobj.seek(offset)
fileobj.write(data)
class IOStreamingWriteTask(Task):
"""Task for writing data to a non-seekable stream."""
def _main(self, fileobj, data):
"""Write data to a fileobj.
Data will be written directly to the fileboj without
any prior seeking.
:param fileobj: The fileobj to write content to
:param data: The data to write
"""
fileobj.write(data)
class IORenameFileTask(Task):
"""A task to rename a temporary file to its final filename
:param f: The file handle that content was written to.
:param final_filename: The final name of the file to rename to
upon completion of writing the contents.
:param osutil: OS utility
"""
def _main(self, fileobj, final_filename, osutil):
fileobj.close()
osutil.rename_file(fileobj.name, final_filename)
class IOCloseTask(Task):
"""A task to close out a file once the download is complete.
:param fileobj: The fileobj to close.
"""
def _main(self, fileobj):
fileobj.close()
class CompleteDownloadNOOPTask(Task):
"""A NOOP task to serve as an indicator that the download is complete
Note that the default for is_final is set to True because this should
always be the last task.
"""
def __init__(self, transfer_coordinator, main_kwargs=None,
pending_main_kwargs=None, done_callbacks=None,
is_final=True):
super(CompleteDownloadNOOPTask, self).__init__(
transfer_coordinator=transfer_coordinator,
main_kwargs=main_kwargs,
pending_main_kwargs=pending_main_kwargs,
done_callbacks=done_callbacks,
is_final=is_final
)
def _main(self):
pass
class DownloadChunkIterator(object):
def __init__(self, body, chunksize):
"""Iterator to chunk out a downloaded S3 stream
:param body: A readable file-like object
:param chunksize: The amount to read each time
"""
self._body = body
self._chunksize = chunksize
self._num_reads = 0
def __iter__(self):
return self
def __next__(self):
chunk = self._body.read(self._chunksize)
self._num_reads += 1
if chunk:
return chunk
elif self._num_reads == 1:
# Even though the response may have not had any
# content, we still want to account for an empty object's
# existance so return the empty chunk for that initial
# read.
return chunk
raise StopIteration()
next = __next__
class DeferQueue(object):
"""IO queue that defers write requests until they are queued sequentially.
This class is used to track IO data for a *single* fileobj.
You can send data to this queue, and it will defer any IO write requests
until it has the next contiguous block available (starting at 0).
"""
def __init__(self):
self._writes = []
self._pending_offsets = set()
self._next_offset = 0
def request_writes(self, offset, data):
"""Request any available writes given new incoming data.
You call this method by providing new data along with the
offset associated with the data. If that new data unlocks
any contiguous writes that can now be submitted, this
method will return all applicable writes.
This is done with 1 method call so you don't have to
make two method calls (put(), get()) which acquires a lock
each method call.
"""
if offset < self._next_offset:
# This is a request for a write that we've already
# seen. This can happen in the event of a retry
# where if we retry at at offset N/2, we'll requeue
# offsets 0-N/2 again.
return []
writes = []
if offset in self._pending_offsets:
# We've already queued this offset so this request is
# a duplicate. In this case we should ignore
# this request and prefer what's already queued.
return []
heapq.heappush(self._writes, (offset, data))
self._pending_offsets.add(offset)
while self._writes and self._writes[0][0] == self._next_offset:
next_write = heapq.heappop(self._writes)
writes.append({'offset': next_write[0], 'data': next_write[1]})
self._pending_offsets.remove(next_write[0])
self._next_offset += len(next_write[1])
return writes
|