/usr/lib/python2.7/dist-packages/sphinx/versioning.py is in python-sphinx 1.6.7-1ubuntu1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | # -*- coding: utf-8 -*-
"""
sphinx.versioning
~~~~~~~~~~~~~~~~~
Implements the low-level algorithms Sphinx uses for the versioning of
doctrees.
:copyright: Copyright 2007-2018 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from uuid import uuid4
from operator import itemgetter
from itertools import product
from six import iteritems
from six.moves import range, zip_longest
if False:
# For type annotation
from typing import Any, Iterator # NOQA
from docutils import nodes # NOQA
try:
import Levenshtein
IS_SPEEDUP = True
except ImportError:
IS_SPEEDUP = False
# anything below that ratio is considered equal/changed
VERSIONING_RATIO = 65
def add_uids(doctree, condition):
# type: (nodes.Node, Any) -> Iterator[nodes.Node]
"""Add a unique id to every node in the `doctree` which matches the
condition and yield the nodes.
:param doctree:
A :class:`docutils.nodes.document` instance.
:param condition:
A callable which returns either ``True`` or ``False`` for a given node.
"""
for node in doctree.traverse(condition):
node.uid = uuid4().hex
yield node
def merge_doctrees(old, new, condition):
# type: (nodes.Node, nodes.Node, Any) -> Iterator[nodes.Node]
"""Merge the `old` doctree with the `new` one while looking at nodes
matching the `condition`.
Each node which replaces another one or has been added to the `new` doctree
will be yielded.
:param condition:
A callable which returns either ``True`` or ``False`` for a given node.
"""
old_iter = old.traverse(condition)
new_iter = new.traverse(condition)
old_nodes = []
new_nodes = []
ratios = {}
seen = set()
# compare the nodes each doctree in order
for old_node, new_node in zip_longest(old_iter, new_iter):
if old_node is None:
new_nodes.append(new_node)
continue
if not getattr(old_node, 'uid', None):
# maybe config.gettext_uuid has been changed.
old_node.uid = uuid4().hex
if new_node is None:
old_nodes.append(old_node)
continue
ratio = get_ratio(old_node.rawsource, new_node.rawsource)
if ratio == 0:
new_node.uid = old_node.uid
seen.add(new_node)
else:
ratios[old_node, new_node] = ratio
old_nodes.append(old_node)
new_nodes.append(new_node)
# calculate the ratios for each unequal pair of nodes, should we stumble
# on a pair which is equal we set the uid and add it to the seen ones
for old_node, new_node in product(old_nodes, new_nodes):
if new_node in seen or (old_node, new_node) in ratios:
continue
ratio = get_ratio(old_node.rawsource, new_node.rawsource)
if ratio == 0:
new_node.uid = old_node.uid
seen.add(new_node)
else:
ratios[old_node, new_node] = ratio
# choose the old node with the best ratio for each new node and set the uid
# as long as the ratio is under a certain value, in which case we consider
# them not changed but different
ratios = sorted(iteritems(ratios), key=itemgetter(1)) # type: ignore
for (old_node, new_node), ratio in ratios:
if new_node in seen:
continue
else:
seen.add(new_node)
if ratio < VERSIONING_RATIO:
new_node.uid = old_node.uid
else:
new_node.uid = uuid4().hex
yield new_node
# create new uuids for any new node we left out earlier, this happens
# if one or more nodes are simply added.
for new_node in set(new_nodes) - seen:
new_node.uid = uuid4().hex
yield new_node
def get_ratio(old, new):
# type: (unicode, unicode) -> float
"""Return a "similiarity ratio" (in percent) representing the similarity
between the two strings where 0 is equal and anything above less than equal.
"""
if not all([old, new]):
return VERSIONING_RATIO
if IS_SPEEDUP:
return Levenshtein.distance(old, new) / (len(old) / 100.0)
else:
return levenshtein_distance(old, new) / (len(old) / 100.0)
def levenshtein_distance(a, b):
# type: (unicode, unicode) -> int
"""Return the Levenshtein edit distance between two strings *a* and *b*."""
if a == b:
return 0
if len(a) < len(b):
a, b = b, a
if not a:
return len(b)
previous_row = range(len(b) + 1)
for i, column1 in enumerate(a):
current_row = [i + 1]
for j, column2 in enumerate(b):
insertions = previous_row[j + 1] + 1
deletions = current_row[j] + 1
substitutions = previous_row[j] + (column1 != column2)
current_row.append(min(insertions, deletions, substitutions))
previous_row = current_row # type: ignore
return previous_row[-1]
|