/usr/lib/python3/dist-packages/geopy/util.py is in python3-geopy 0.95.1-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | from sys import version_info
import re
import logging
import html.entities
import xml.dom.minidom
from xml.parsers.expat import ExpatError
try:
from decimal import Decimal
except ImportError:
NUMBER_TYPES = (int, int, float)
else:
NUMBER_TYPES = (int, int, float, Decimal)
class NullHandler(logging.Handler):
def emit(self, record):
pass
logger = logging.getLogger('geopy')
logger.addHandler(NullHandler())
def pairwise(seq):
for i in range(0, len(seq) - 1):
yield (seq[i], seq[i + 1])
def join_filter(sep, seq, pred=bool):
return sep.join([str(i) for i in seq if pred(i)])
def get_encoding(page, contents=None):
# TODO: clean up Py3k support
if version_info < (3, 0):
charset = page.headers.getparam("charset") or None
else:
charset = page.headers.get_param("charset") or None
if charset:
return charset
if contents:
try:
return xml.dom.minidom.parseString(contents).encoding
except ExpatError:
pass
def decode_page(page):
contents = page.read()
# HTTP 1.1 defines iso-8859-1 as the 'implied' encoding if none is given
encoding = get_encoding(page, contents) or 'iso-8859-1'
# TODO: clean up Py3k support
if version_info < (3, 0):
return str(contents, encoding=encoding).encode('utf-8')
else:
return str(contents, encoding=encoding)
def get_first_text(node, tag_names, strip=None):
if isinstance(tag_names, str):
tag_names = [tag_names]
if node:
while tag_names:
nodes = node.getElementsByTagName(tag_names.pop(0))
if nodes:
child = nodes[0].firstChild
return child and child.nodeValue.strip(strip)
def join_filter(sep, seq, pred=bool):
return sep.join([str(i) for i in seq if pred(i)])
import re, html.entities
def unescape(text):
"""
Removes HTML or XML character references and entities from a text string.
"""
def fixup(m):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return chr(int(text[3:-1], 16))
else:
return chr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = chr(html.entities.name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is
return re.sub("&#?\w+;", fixup, text)
try:
reversed
except NameError:
def reversed(seq):
i = len(seq)
while i > 0:
i -= 1
yield seq[i]
else:
reversed = reversed
|