This file is indexed.

/usr/share/pyshared/scrapy/shell.py is in python-scrapy 0.14.4-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
"""
Scrapy Shell

See documentation in docs/topics/shell.rst
"""

import signal

from twisted.internet import reactor, threads
from w3lib.url import any_to_uri

from scrapy.item import BaseItem
from scrapy.spider import BaseSpider
from scrapy.selector import XPathSelector, XmlXPathSelector, HtmlXPathSelector
from scrapy.utils.spider import create_spider_for_request
from scrapy.utils.misc import load_object
from scrapy.utils.request import request_deferred
from scrapy.utils.response import open_in_browser
from scrapy.utils.console import start_python_console
from scrapy.settings import Settings
from scrapy.http import Request, Response, HtmlResponse, XmlResponse

class Shell(object):

    relevant_classes = (BaseSpider, Request, Response, BaseItem, \
        XPathSelector, Settings)

    def __init__(self, crawler, update_vars=None, inthread=False, code=None):
        self.crawler = crawler
        self.update_vars = update_vars or (lambda x: None)
        self.item_class = load_object(crawler.settings['DEFAULT_ITEM_CLASS'])
        self.spider = None
        self.inthread = inthread
        self.code = code
        self.vars = {}

    def start(self, *a, **kw):
        # disable accidental Ctrl-C key press from shutting down the engine
        signal.signal(signal.SIGINT, signal.SIG_IGN)
        if self.inthread:
            return threads.deferToThread(self._start, *a, **kw)
        else:
            self._start(*a, **kw)

    def _start(self, url=None, request=None, response=None, spider=None):
        if url:
            self.fetch(url, spider)
        elif request:
            self.fetch(request, spider)
        elif response:
            request = response.request
            self.populate_vars(response, request, spider)
        else:
            self.populate_vars()
        if self.code:
            print eval(self.code, globals(), self.vars)
        else:
            start_python_console(self.vars)

    def _schedule(self, request, spider):
        spider = self._open_spider(request, spider)
        d = request_deferred(request)
        d.addCallback(lambda x: (x, spider))
        self.crawler.engine.crawl(request, spider)
        return d

    def _open_spider(self, request, spider):
        if self.spider:
            return self.spider
        if spider is None:
            spider = create_spider_for_request(self.crawler.spiders, request, \
                BaseSpider('default'), log_multiple=True)
        spider.set_crawler(self.crawler)
        self.crawler.engine.open_spider(spider, close_if_idle=False)
        self.spider = spider
        return spider

    def fetch(self, request_or_url, spider=None):
        if isinstance(request_or_url, Request):
            request = request_or_url
            url = request.url
        else:
            url = any_to_uri(request_or_url)
            request = Request(url, dont_filter=True)
            request.meta['handle_httpstatus_all'] = True
        response = None
        response, spider = threads.blockingCallFromThread(reactor, \
            self._schedule, request, spider)
        self.populate_vars(response, request, spider)

    def populate_vars(self, response=None, request=None, spider=None):
        self.vars['item'] = self.item_class()
        self.vars['settings'] = self.crawler.settings
        self.vars['spider'] = spider
        self.vars['request'] = request
        self.vars['response'] = response
        self.vars['xxs'] = XmlXPathSelector(response) \
            if isinstance(response, XmlResponse) else None
        self.vars['hxs'] = HtmlXPathSelector(response) \
            if isinstance(response, HtmlResponse) else None
        if self.inthread:
            self.vars['fetch'] = self.fetch
        self.vars['view'] = open_in_browser
        self.vars['shelp'] = self.print_help
        self.update_vars(self.vars)
        if not self.code:
            self.print_help()

    def print_help(self):
        self.p("Available Scrapy objects:")
        for k, v in sorted(self.vars.iteritems()):
            if self._is_relevant(v):
                self.p("  %-10s %s" % (k, v))
        self.p("Useful shortcuts:")
        self.p("  shelp()           Shell help (print this help)")
        if self.inthread:
            self.p("  fetch(req_or_url) Fetch request (or URL) and update local objects")
        self.p("  view(response)    View response in a browser")

    def p(self, line=''):
        print "[s] %s" % line

    def _is_relevant(self, value):
        return isinstance(value, self.relevant_classes)


def inspect_response(response, spider=None):
    """Open a shell to inspect the given response"""
    from scrapy.project import crawler
    Shell(crawler).start(response=response, spider=spider)