This file is indexed.

/usr/share/doc/python-scrapy-doc/html/index.html is in python-scrapy-doc 1.0.3-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>Scrapy documentation &mdash; Scrapy  documentation</title>
  

  
  

  

  
  
    

  

  
  
    <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
  

  

  
    <link rel="top" title="Scrapy  documentation" href="#"/>
        <link rel="next" title="Scrapy at a glance" href="intro/overview.html"/> 

  
  <script src="_static/js/modernizr.min.js"></script>

</head>

<body class="wy-body-for-nav" role="document">

  <div class="wy-grid-for-nav">

    
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search">
          

          
            <a href="#" class="icon icon-home"> Scrapy
          

          
          </a>

          
            
            
          

          
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

          
        </div>

        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          
            
            
                <ul>
<li class="toctree-l1"><a class="reference internal" href="intro/overview.html">Scrapy at a glance</a></li>
<li class="toctree-l1"><a class="reference internal" href="intro/install.html">Installation guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="intro/tutorial.html">Scrapy Tutorial</a></li>
<li class="toctree-l1"><a class="reference internal" href="intro/examples.html">Examples</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="topics/commands.html">Command line tool</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/spiders.html">Spiders</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/selectors.html">Selectors</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/items.html">Items</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/loaders.html">Item Loaders</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/shell.html">Scrapy shell</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/item-pipeline.html">Item Pipeline</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/feed-exports.html">Feed exports</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/request-response.html">Requests and Responses</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/link-extractors.html">Link Extractors</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/settings.html">Settings</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/exceptions.html">Exceptions</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="topics/logging.html">Logging</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/stats.html">Stats Collection</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/email.html">Sending e-mail</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/telnetconsole.html">Telnet Console</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/webservice.html">Web Service</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="faq.html">Frequently Asked Questions</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/debug.html">Debugging Spiders</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/contracts.html">Spiders Contracts</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/practices.html">Common Practices</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/broad-crawls.html">Broad Crawls</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/firefox.html">Using Firefox for scraping</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/firebug.html">Using Firebug for scraping</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/leaks.html">Debugging memory leaks</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/media-pipeline.html">Downloading and processing files and images</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/ubuntu.html">Ubuntu packages</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/deploy.html">Deploying Spiders</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/autothrottle.html">AutoThrottle extension</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/benchmarking.html">Benchmarking</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/jobs.html">Jobs: pausing and resuming crawls</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="topics/architecture.html">Architecture overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/downloader-middleware.html">Downloader Middleware</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/spider-middleware.html">Spider Middleware</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/extensions.html">Extensions</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/api.html">Core API</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/signals.html">Signals</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/exporters.html">Item Exporters</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="news.html">Release notes</a></li>
<li class="toctree-l1"><a class="reference internal" href="contributing.html">Contributing to Scrapy</a></li>
<li class="toctree-l1"><a class="reference internal" href="versioning.html">Versioning and API Stability</a></li>
</ul>

            
          
        </div>
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">

      
      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
        <a href="#">Scrapy</a>
      </nav>


      
      <div class="wy-nav-content">
        <div class="rst-content">
          

 



<div role="navigation" aria-label="breadcrumbs navigation">
  <ul class="wy-breadcrumbs">
    <li><a href="#">Docs</a> &raquo;</li>
      
    <li>Scrapy  documentation</li>
      <li class="wy-breadcrumbs-aside">
        
          
            <a href="_sources/index.txt" rel="nofollow"> View page source</a>
          
        
      </li>
  </ul>
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  <div class="section" id="scrapy-version-documentation">
<span id="topics-index"></span><h1>Scrapy  documentation<a class="headerlink" href="#scrapy-version-documentation" title="Permalink to this headline"></a></h1>
<p>This documentation contains everything you need to know about Scrapy.</p>
<div class="section" id="getting-help">
<h2>Getting help<a class="headerlink" href="#getting-help" title="Permalink to this headline"></a></h2>
<p>Having trouble? We&#8217;d like to help!</p>
<ul class="simple">
<li>Try the <a class="reference internal" href="faq.html"><span class="doc">FAQ</span></a> &#8211; it&#8217;s got answers to some common questions.</li>
<li>Looking for specific information? Try the <a class="reference internal" href="genindex.html"><span class="std std-ref">Index</span></a> or <a class="reference internal" href="py-modindex.html"><span class="std std-ref">Module Index</span></a>.</li>
<li>Search for information in the <a class="reference external" href="https://groups.google.com/forum/#!forum/scrapy-users">archives of the scrapy-users mailing list</a>, or
<a class="reference external" href="https://groups.google.com/forum/#!forum/scrapy-users">post a question</a>.</li>
<li>Ask a question in the <a class="reference external" href="irc://irc.freenode.net/scrapy">#scrapy IRC channel</a>.</li>
<li>Report bugs with Scrapy in our <a class="reference external" href="https://github.com/scrapy/scrapy/issues">issue tracker</a>.</li>
</ul>
</div>
<div class="section" id="first-steps">
<h2>First steps<a class="headerlink" href="#first-steps" title="Permalink to this headline"></a></h2>
<div class="toctree-wrapper compound">
</div>
<dl class="docutils">
<dt><a class="reference internal" href="intro/overview.html"><span class="doc">Scrapy at a glance</span></a></dt>
<dd>Understand what Scrapy is and how it can help you.</dd>
<dt><a class="reference internal" href="intro/install.html"><span class="doc">Installation guide</span></a></dt>
<dd>Get Scrapy installed on your computer.</dd>
<dt><a class="reference internal" href="intro/tutorial.html"><span class="doc">Scrapy Tutorial</span></a></dt>
<dd>Write your first Scrapy project.</dd>
<dt><a class="reference internal" href="intro/examples.html"><span class="doc">Examples</span></a></dt>
<dd>Learn more by playing with a pre-made Scrapy project.</dd>
</dl>
</div>
<div class="section" id="basic-concepts">
<span id="section-basics"></span><h2>Basic concepts<a class="headerlink" href="#basic-concepts" title="Permalink to this headline"></a></h2>
<div class="toctree-wrapper compound">
</div>
<dl class="docutils">
<dt><a class="reference internal" href="topics/commands.html"><span class="doc">Command line tool</span></a></dt>
<dd>Learn about the command-line tool used to manage your Scrapy project.</dd>
<dt><a class="reference internal" href="topics/spiders.html"><span class="doc">Spiders</span></a></dt>
<dd>Write the rules to crawl your websites.</dd>
<dt><a class="reference internal" href="topics/selectors.html"><span class="doc">Selectors</span></a></dt>
<dd>Extract the data from web pages using XPath.</dd>
<dt><a class="reference internal" href="topics/shell.html"><span class="doc">Scrapy shell</span></a></dt>
<dd>Test your extraction code in an interactive environment.</dd>
<dt><a class="reference internal" href="topics/items.html"><span class="doc">Items</span></a></dt>
<dd>Define the data you want to scrape.</dd>
<dt><a class="reference internal" href="topics/loaders.html"><span class="doc">Item Loaders</span></a></dt>
<dd>Populate your items with the extracted data.</dd>
<dt><a class="reference internal" href="topics/item-pipeline.html"><span class="doc">Item Pipeline</span></a></dt>
<dd>Post-process and store your scraped data.</dd>
<dt><a class="reference internal" href="topics/feed-exports.html"><span class="doc">Feed exports</span></a></dt>
<dd>Output your scraped data using different formats and storages.</dd>
<dt><a class="reference internal" href="topics/request-response.html"><span class="doc">Requests and Responses</span></a></dt>
<dd>Understand the classes used to represent HTTP requests and responses.</dd>
<dt><a class="reference internal" href="topics/link-extractors.html"><span class="doc">Link Extractors</span></a></dt>
<dd>Convenient classes to extract links to follow from pages.</dd>
<dt><a class="reference internal" href="topics/settings.html"><span class="doc">Settings</span></a></dt>
<dd>Learn how to configure Scrapy and see all <a class="reference internal" href="topics/settings.html#topics-settings-ref"><span class="std std-ref">available settings</span></a>.</dd>
<dt><a class="reference internal" href="topics/exceptions.html"><span class="doc">Exceptions</span></a></dt>
<dd>See all available exceptions and their meaning.</dd>
</dl>
</div>
<div class="section" id="built-in-services">
<h2>Built-in services<a class="headerlink" href="#built-in-services" title="Permalink to this headline"></a></h2>
<div class="toctree-wrapper compound">
</div>
<dl class="docutils">
<dt><a class="reference internal" href="topics/logging.html"><span class="doc">Logging</span></a></dt>
<dd>Learn how to use Python&#8217;s builtin logging on Scrapy.</dd>
<dt><a class="reference internal" href="topics/stats.html"><span class="doc">Stats Collection</span></a></dt>
<dd>Collect statistics about your scraping crawler.</dd>
<dt><a class="reference internal" href="topics/email.html"><span class="doc">Sending e-mail</span></a></dt>
<dd>Send email notifications when certain events occur.</dd>
<dt><a class="reference internal" href="topics/telnetconsole.html"><span class="doc">Telnet Console</span></a></dt>
<dd>Inspect a running crawler using a built-in Python console.</dd>
<dt><a class="reference internal" href="topics/webservice.html"><span class="doc">Web Service</span></a></dt>
<dd>Monitor and control a crawler using a web service.</dd>
</dl>
</div>
<div class="section" id="solving-specific-problems">
<h2>Solving specific problems<a class="headerlink" href="#solving-specific-problems" title="Permalink to this headline"></a></h2>
<div class="toctree-wrapper compound">
</div>
<dl class="docutils">
<dt><a class="reference internal" href="faq.html"><span class="doc">Frequently Asked Questions</span></a></dt>
<dd>Get answers to most frequently asked questions.</dd>
<dt><a class="reference internal" href="topics/debug.html"><span class="doc">Debugging Spiders</span></a></dt>
<dd>Learn how to debug common problems of your scrapy spider.</dd>
<dt><a class="reference internal" href="topics/contracts.html"><span class="doc">Spiders Contracts</span></a></dt>
<dd>Learn how to use contracts for testing your spiders.</dd>
<dt><a class="reference internal" href="topics/practices.html"><span class="doc">Common Practices</span></a></dt>
<dd>Get familiar with some Scrapy common practices.</dd>
<dt><a class="reference internal" href="topics/broad-crawls.html"><span class="doc">Broad Crawls</span></a></dt>
<dd>Tune Scrapy for crawling a lot domains in parallel.</dd>
<dt><a class="reference internal" href="topics/firefox.html"><span class="doc">Using Firefox for scraping</span></a></dt>
<dd>Learn how to scrape with Firefox and some useful add-ons.</dd>
<dt><a class="reference internal" href="topics/firebug.html"><span class="doc">Using Firebug for scraping</span></a></dt>
<dd>Learn how to scrape efficiently using Firebug.</dd>
<dt><a class="reference internal" href="topics/leaks.html"><span class="doc">Debugging memory leaks</span></a></dt>
<dd>Learn how to find and get rid of memory leaks in your crawler.</dd>
<dt><a class="reference internal" href="topics/media-pipeline.html"><span class="doc">Downloading and processing files and images</span></a></dt>
<dd>Download files and/or images associated with your scraped items.</dd>
<dt><a class="reference internal" href="topics/ubuntu.html"><span class="doc">Ubuntu packages</span></a></dt>
<dd>Install latest Scrapy packages easily on Ubuntu</dd>
<dt><a class="reference internal" href="topics/deploy.html"><span class="doc">Deploying Spiders</span></a></dt>
<dd>Deploying your Scrapy spiders and run them in a remote server.</dd>
<dt><a class="reference internal" href="topics/autothrottle.html"><span class="doc">AutoThrottle extension</span></a></dt>
<dd>Adjust crawl rate dynamically based on load.</dd>
<dt><a class="reference internal" href="topics/benchmarking.html"><span class="doc">Benchmarking</span></a></dt>
<dd>Check how Scrapy performs on your hardware.</dd>
<dt><a class="reference internal" href="topics/jobs.html"><span class="doc">Jobs: pausing and resuming crawls</span></a></dt>
<dd>Learn how to pause and resume crawls for large spiders.</dd>
</dl>
</div>
<div class="section" id="extending-scrapy">
<span id="id1"></span><h2>Extending Scrapy<a class="headerlink" href="#extending-scrapy" title="Permalink to this headline"></a></h2>
<div class="toctree-wrapper compound">
</div>
<dl class="docutils">
<dt><a class="reference internal" href="topics/architecture.html"><span class="doc">Architecture overview</span></a></dt>
<dd>Understand the Scrapy architecture.</dd>
<dt><a class="reference internal" href="topics/downloader-middleware.html"><span class="doc">Downloader Middleware</span></a></dt>
<dd>Customize how pages get requested and downloaded.</dd>
<dt><a class="reference internal" href="topics/spider-middleware.html"><span class="doc">Spider Middleware</span></a></dt>
<dd>Customize the input and output of your spiders.</dd>
<dt><a class="reference internal" href="topics/extensions.html"><span class="doc">Extensions</span></a></dt>
<dd>Extend Scrapy with your custom functionality</dd>
<dt><a class="reference internal" href="topics/api.html"><span class="doc">Core API</span></a></dt>
<dd>Use it on extensions and middlewares to extend Scrapy functionality</dd>
<dt><a class="reference internal" href="topics/signals.html"><span class="doc">Signals</span></a></dt>
<dd>See all available signals and how to work with them.</dd>
<dt><a class="reference internal" href="topics/exporters.html"><span class="doc">Item Exporters</span></a></dt>
<dd>Quickly export your scraped items to a file (XML, CSV, etc).</dd>
</dl>
</div>
<div class="section" id="all-the-rest">
<h2>All the rest<a class="headerlink" href="#all-the-rest" title="Permalink to this headline"></a></h2>
<div class="toctree-wrapper compound">
</div>
<dl class="docutils">
<dt><a class="reference internal" href="news.html"><span class="doc">Release notes</span></a></dt>
<dd>See what has changed in recent Scrapy versions.</dd>
<dt><a class="reference internal" href="contributing.html"><span class="doc">Contributing to Scrapy</span></a></dt>
<dd>Learn how to contribute to the Scrapy project.</dd>
<dt><a class="reference internal" href="versioning.html"><span class="doc">Versioning and API Stability</span></a></dt>
<dd>Understand Scrapy versioning and API stability.</dd>
</dl>
</div>
</div>


           </div>
          </div>
          <footer>
  
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="intro/overview.html" class="btn btn-neutral float-right" title="Scrapy at a glance" accesskey="n">Next <span class="fa fa-arrow-circle-right"></span></a>
      
      
    </div>
  

  <hr/>

  <div role="contentinfo">
    <p>
        &copy; Copyright 2008-2016, Scrapy developers.
      Last updated on July 11, 2016.

    </p>
  </div>
  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 

</footer>

        </div>
      </div>

    </section>

  </div>
  


  

    <script type="text/javascript">
        var DOCUMENTATION_OPTIONS = {
            URL_ROOT:'./',
            VERSION:'',
            COLLAPSE_INDEX:false,
            FILE_SUFFIX:'.html',
            HAS_SOURCE:  true
        };
    </script>
      <script type="text/javascript" src="_static/jquery.js"></script>
      <script type="text/javascript" src="_static/underscore.js"></script>
      <script type="text/javascript" src="_static/doctools.js"></script>

  

  
  
    <script type="text/javascript" src="_static/js/theme.js"></script>
  

  
  
  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.StickyNav.enable();
      });
  </script>
   

</body>
</html>