This file is indexed.

/usr/lib/python2.7/dist-packages/scrapy/templates/spiders/crawl.tmpl is in python-scrapy 1.0.3-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# -*- coding: utf-8 -*-
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule

from $project_name.items import ${ProjectName}Item


class $classname(CrawlSpider):
    name = '$name'
    allowed_domains = ['$domain']
    start_urls = ['http://www.$domain/']

    rules = (
        Rule(LinkExtractor(allow=r'Items/'), callback='parse_item', follow=True),
    )

    def parse_item(self, response):
        i = ${ProjectName}Item()
        #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract()
        #i['name'] = response.xpath('//div[@id="name"]').extract()
        #i['description'] = response.xpath('//div[@id="description"]').extract()
        return i