/usr/lib/python3/dist-packages/textile/utils.py is in python3-textile 1:3.0.0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 | from __future__ import unicode_literals
import six
try:
import regex as re
except ImportError:
import re
from six.moves import urllib, html_parser
urlparse = urllib.parse.urlparse
HTMLParser = html_parser.HTMLParser
from collections import OrderedDict
from xml.etree import ElementTree
from textile.regex_strings import valign_re_s, halign_re_s
def decode_high(text):
"""Decode encoded HTML entities."""
h = HTMLParser()
text = '&#{0};'.format(text)
return h.unescape(text)
def encode_high(text):
"""Encode the text so that it is an appropriate HTML entity."""
return ord(text)
def encode_html(text, quotes=True):
"""Return text that's safe for an HTML attribute."""
a = (
('&', '&'),
('<', '<'),
('>', '>'))
if quotes:
a = a + (("'", '''),
('"', '"'))
for k, v in a:
text = text.replace(k, v)
return text
def generate_tag(tag, content, attributes=None):
"""Generate a complete html tag using the ElementTree module. tag and
content are strings, the attributes argument is a dictionary. As
a convenience, if the content is ' /', a self-closing tag is generated."""
content = six.text_type(content)
# In PY2, ElementTree tostringlist only works with bytes, not with
# unicode().
enc = 'unicode'
if six.PY2:
enc = 'UTF-8'
if not tag:
return content
element = ElementTree.Element(tag, attrib=attributes)
# FIXME: Kind of an ugly hack. There *must* be a cleaner way. I tried
# adding text by assigning it to element_tag.text. That results in
# non-ascii text being html-entity encoded. Not bad, but not entirely
# matching php-textile either.
element_tag = ElementTree.tostringlist(element, encoding=enc,
method='html')
if six.PY2:
element_tag = [v.decode(enc) for v in element_tag]
element_tag.insert(len(element_tag) - 1, content)
element_text = ''.join(element_tag)
return element_text
def has_raw_text(text):
"""checks whether the text has text not already enclosed by a block tag"""
# The php version has orders the below list of tags differently. The
# important thing to note here is that the pre must occur before the p or
# else the regex module doesn't properly match pre-s. It only matches the
# p in pre.
r = re.compile(r'<(pre|p|blockquote|div|form|table|ul|ol|dl|h[1-6])[^>]*?>.*</\1>',
re.S).sub('', text.strip()).strip()
r = re.compile(r'<(hr|br)[^>]*?/>').sub('', r)
return '' != r
def is_rel_url(url):
"""Identify relative urls."""
(scheme, netloc) = urlparse(url)[0:2]
return not scheme and not netloc
def is_valid_url(url):
parsed = urlparse(url)
if parsed.scheme == '':
return True
return False
def list_type(list_string):
listtypes = {
list_string.startswith('*'): 'u',
list_string.startswith('#'): 'o',
(not list_string.startswith('*') and not list_string.startswith('#')):
'd'
}
return listtypes.get(True, False)
def normalize_newlines(string):
out = string.strip()
out = re.sub(r'\r\n?', '\n', out)
out = re.compile(r'^[ \t]*\n', flags=re.M).sub('\n', out)
out = re.sub(r'"$', '" ', out)
return out
def parse_attributes(block_attributes, element=None, include_id=True):
vAlign = {'^': 'top', '-': 'middle', '~': 'bottom'}
hAlign = {'<': 'left', '=': 'center', '>': 'right', '<>': 'justify'}
style = []
aclass = ''
lang = ''
colspan = ''
rowspan = ''
block_id = ''
span = ''
width = ''
result = OrderedDict()
if not block_attributes:
return result
matched = block_attributes
if element == 'td':
m = re.search(r'\\(\d+)', matched)
if m:
colspan = m.group(1)
m = re.search(r'/(\d+)', matched)
if m:
rowspan = m.group(1)
if element == 'td' or element == 'tr':
m = re.search(r'(^{0})'.format(valign_re_s), matched)
if m:
style.append("vertical-align:{0}".format(vAlign[m.group(1)]))
m = re.search(r'\{([^}]*)\}', matched)
if m:
style.extend(m.group(1).rstrip(';').split(';'))
matched = matched.replace(m.group(0), '')
m = re.search(r'\[([^\]]+)\]', matched, re.U)
if m:
lang = m.group(1)
matched = matched.replace(m.group(0), '')
m = re.search(r'\(([^()]+)\)', matched, re.U)
if m:
aclass = m.group(1)
matched = matched.replace(m.group(0), '')
m = re.search(r'([(]+)', matched)
if m:
style.append("padding-left:{0}em".format(len(m.group(1))))
matched = matched.replace(m.group(0), '')
m = re.search(r'([)]+)', matched)
if m:
style.append("padding-right:{0}em".format(len(m.group(1))))
matched = matched.replace(m.group(0), '')
m = re.search(r'({0})'.format(halign_re_s), matched)
if m:
style.append("text-align:{0}".format(hAlign[m.group(1)]))
m = re.search(r'^(.*)#(.*)$', aclass)
if m:
block_id = m.group(2)
aclass = m.group(1)
if element == 'col':
pattern = r'(?:\\(\d+)\.?)?\s*(\d+)?'
csp = re.match(pattern, matched)
span, width = csp.groups()
if colspan:
result['colspan'] = colspan
if style:
# Previous splits that created style may have introduced extra
# whitespace into the list elements. Clean it up.
style = [x.strip() for x in style]
result['style'] = '{0};'.format("; ".join(style))
if aclass:
result['class'] = aclass
if block_id and include_id:
result['id'] = block_id
if lang:
result['lang'] = lang
if rowspan:
result['rowspan'] = rowspan
if span:
result['span'] = span
if width:
result['width'] = width
return result
def pba(block_attributes, element=None, include_id=True):
"""Parse block attributes."""
attrs = parse_attributes(block_attributes, element, include_id)
if not attrs:
return ''
result = ' '.join(['{0}="{1}"'.format(k, v) for k, v in attrs.items()])
return ' {0}'.format(result)
|