This file is indexed.

/usr/lib/python3/dist-packages/geopy/geocoders/wiki_semantic.py is in python3-geopy 0.95.1-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
import xml.dom.minidom
from urllib.request import urlopen
from geopy.geocoders.base import Geocoder
from geopy.point import Point
from geopy.location import Location
from geopy import util

try:
    from BeautifulSoup import BeautifulSoup
except ImportError:
    util.logger.warn("BeautifulSoup was not found. " \
          "The SemanticMediaWiki geocoder will not work.")

try:
    set
except NameError:
    from sets import Set as set

class SemanticMediaWiki(Geocoder):
    def __init__(self, format_url, attributes=None, relations=None,
                 prefer_semantic=False, transform_string=None):
        self.format_url = format_url
        self.attributes = attributes
        self.relations = relations
        self.prefer_semantic = prefer_semantic
        self.transform_string = transform_string
    
    def get_url(self, string):
        return self.format_url % self.transform_string(string)

    def parse_rdf_link(self, page, mime_type='application/rdf+xml'):
        """Parse the URL of the RDF link from the <head> of ``page``."""
        soup = BeautifulSoup(page)
        link = soup.head.find('link', rel='alternate', type=mime_type)
        return link and link['href'] or None
    
    def parse_rdf_things(self, data):
        dom = xml.dom.minidom.parseString(data)
        thing_map = {}
        things = dom.getElementsByTagName('smw:Thing')
        things.reverse()
        for thing in things:
            name = thing.attributes['rdf:about'].value
            articles = thing.getElementsByTagName('smw:hasArticle')
            things[name] = articles[0].attributes['rdf:resource'].value
        
        return (things, thing)
    
    def transform_semantic(self, string):
        """Normalize semantic attribute and relation names by replacing spaces
        with underscores and capitalizing the result."""
        return string.replace(' ', '_').capitalize()

    def get_relations(self, thing, relations=None):
        if relations is None:
            relations = self.relations
        
        for relation in relations:
            relation = self.transform_semantic(relation)
            for node in thing.getElementsByTagName('relation:' + relation):
                resource = node.attributes['rdf:resource'].value
                yield (relation, resource)
    
    def get_attributes(self, thing, attributes=None):
        if attributes is None:
            attributes = self.attributes

        for attribute in attributes:
            attribute = self.transform_semantic(attribute)
            for node in thing.getElementsByTagName('attribute:' + attribute):
                value = node.firstChild.nodeValue.strip()
                yield (attribute, value)
    
    def get_thing_label(self, thing):
        return util.get_first_text(thing, 'rdfs:label')
    
    def geocode_url(self, url, attempted=None):
        if attempted is None:
            attempted = set()

        util.logger.debug("Fetching %s..." % url)
        page = urlopen(url)
        soup = BeautifulSoup(page)

        rdf_url = self.parse_rdf_link(soup)
        util.logger.debug("Fetching %s..." % rdf_url)
        page = urlopen(rdf_url)

        things, thing = self.parse_rdf(page)
        name = self.get_label(thing)

        attributes = self.get_attributes(thing)
        for attribute, value in attributes:
            latitude, longitude = util.parse_geo(value)
            if None not in (latitude, longitude):
                break

        if None in (latitude, longitude):
            relations = self.get_relations(thing)
            for relation, resource in relations:
                url = things.get(resource, resource)
                if url in attempted: # Avoid cyclic relationships.
                    continue
                attempted.add(url)
                name, (latitude, longitude) = self.geocode_url(url, attempted)
                if None not in (name, latitude, longitude):
                    break

        return (name, (latitude, longitude))