/usr/lib/python2.7/dist-packages/pynlpl/fsa.py is in python-pynlpl 1.1.2-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | #---------------------------------------------------------------
# PyNLPl - Finite State Automata
# by Maarten van Gompel
# Centre for Language Studies
# Radboud University Nijmegen
# http://proycon.github.com/folia
# http://www.github.com/proycon/pynlpl
# proycon AT anaproy DOT nl
#
# Partially based/inspired on code by Xiayun Sun (https://github.com/xysun/regex)
#
# Licensed under GPLv3
#
#----------------------------------------------------------------
from __future__ import print_function, unicode_literals, division, absolute_import
import sys
class State(object):
def __init__(self, **kwargs):
if 'epsilon' in kwargs:
self.epsilon = kwargs['epsilon'] # epsilon-closure (lis of states)
else:
self.epsilon = [] # epsilon-closure
if 'transitions' in kwargs:
self.transitions = kwargs['transitions']
else:
self.transitions = [] #(matchitem, matchfunction(value), state)
if 'final' in kwargs:
self.final = bool(kwargs['final']) # ending state
else:
self.final = False
self.transitioned = None #will be a tuple (state, matchitem) indicating how this state was reached
class NFA(object):
"""Non-deterministic finite state automaton. Can be used to model DFAs as well if your state transitions are not ambiguous and epsilon is empty."""
def __init__(self, initialstate):
self.initialstate = initialstate
def run(self, sequence, mustmatchall=False,debug=False):
def add(state, states):
"""add state and recursively add epsilon transitions"""
assert isinstance(state, State)
if state in states:
return
states.add(state)
for eps in state.epsilon: #recurse into epsilon transitions
add(eps, states)
current_states = set()
add(self.initialstate, current_states)
if debug: print("Starting run, current states: ", repr(current_states),file=sys.stderr)
for offset, value in enumerate(sequence):
if not current_states: break
if debug: print("Value: ", repr(value),file=sys.stderr)
next_states = set()
for state in current_states:
for matchitem, matchfunction, trans_state in state.transitions:
if matchfunction(value):
trans_state.transitioned = (state, matchitem)
add(trans_state, next_states)
current_states = next_states
if debug: print("Current states: ", repr(current_states),file=sys.stderr)
if not mustmatchall:
for s in current_states:
if s.final:
if debug: print("Final state reached",file=sys.stderr)
yield offset+1
if mustmatchall:
for s in current_states:
if s.final:
if debug: print("Final state reached",file=sys.stderr)
yield offset+1
def match(self, sequence):
try:
return next(self.run(sequence,True)) == len(sequence)
except StopIteration:
return False
def find(self, sequence, debug=False):
l = len(sequence)
for i in range(0,l):
for length in self.run(sequence[i:], False, debug):
yield sequence[i:i+length]
def __iter__(self):
return iter(self._states(self.initialstate))
def _states(self, state, processedstates=[]): #pylint: disable=dangerous-default-value
"""Iterate over all states in no particular order"""
processedstates.append(state)
for nextstate in state.epsilon:
if not nextstate in processedstates:
self._states(nextstate, processedstates)
for _, nextstate in state.transitions:
if not nextstate in processedstates:
self._states(nextstate, processedstates)
return processedstates
def __repr__(self):
out = []
for state in self:
staterep = repr(state)
if state is self.initialstate:
staterep += " (INITIAL)"
for nextstate in state.epsilon:
nextstaterep = repr(nextstate)
if nextstate.final:
nextstaterep += " (FINAL)"
out.append( staterep + " -e-> " + nextstaterep )
for item, _, nextstate in state.transitions:
nextstaterep = repr(nextstate)
if nextstate.final:
nextstaterep += " (FINAL)"
out.append( staterep + " -(" + repr(item) + ")-> " + nextstaterep )
return "\n".join(out)
|