/usr/share/pyshared/Bio/triefind.py is in python-biopython 1.58-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | """
Given a trie, find all occurrences of a word in the trie in a string.
Like searching a string for a substring, except that the substring is
any word in a trie.
Functions:
match Find longest key in a trie matching the beginning of the string.
match_all Find all keys in a trie matching the beginning of the string.
find Find keys in a trie matching anywhere in a string.
find_words Find keys in a trie matching whole words in a string.
"""
import string
import re
def match(string, trie):
"""match(string, trie) -> longest key or None
Find the longest key in the trie that matches the beginning of the
string.
"""
longest = None
for i in range(len(string)):
substr = string[:i+1]
if not trie.has_prefix(substr):
break
if trie.has_key(substr):
longest = substr
return longest
def match_all(string, trie):
"""match_all(string, trie) -> list of keys
Find all the keys in the trie that matches the beginning of the
string.
"""
matches = []
for i in range(len(string)):
substr = string[:i+1]
if not trie.has_prefix(substr):
break
if trie.has_key(substr):
matches.append(substr)
return matches
def find(string, trie):
"""find(string, trie) -> list of tuples (key, start, end)
Find all the keys in the trie that match anywhere in the string.
"""
results = []
start = 0 # index to start the search
while start < len(string):
# Look for a match.
keys = match_all(string[start:], trie)
for key in keys:
results.append((key, start, start+len(key)))
start += 1
return results
DEFAULT_BOUNDARY_CHARS = string.punctuation + string.whitespace
def find_words(string, trie):
"""find_words(string, trie) -> list of tuples (key, start, end)
Find all the keys in the trie that match full words in the string.
Word boundaries are defined as any punctuation or whitespace.
"""
_boundary_re = re.compile(r"[%s]+" % re.escape(DEFAULT_BOUNDARY_CHARS))
results = []
start = 0 # index of word boundary
while start < len(string):
# Look for a match.
keys = match_all(string[start:], trie)
for key in keys:
l = len(key)
# Make sure it ends at a boundary.
if start+l == len(string) or \
_boundary_re.match(string[start+l]):
results.append((key, start, start+l))
# Move forward to the next boundary.
m = _boundary_re.search(string, start)
if m is None:
break
start = m.end()
return results
|