#!/usr/bin/python ## Copyright (C) 2004 Crutcher Dunnavant ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. #!-- ================================================================== --# # Pattern: a(b){3,4}c # Key: # (X) := 'if (X)' # [X] := 'do (X)' # Graph: # +--(c0<4)--+ # | | # v | # | # s0 -('a')-> s1 -> s2 -('b')-> s3 -(c0>=2)-> s4 -('c')-> s5 # [c0=0] [s0++] NORMAL=0 LOOP_INIT=1 LOOP_SENTRY=2 regex = { 'accept-states': [5], 'counters' : 1, 'states': [ # State 0 { 'type': NORMAL, 'accept': 0, 'epsilon-transitions': [], 'test-transitions': [ ('a', 1), ] }, # State 1 { 'type': LOOP_INIT, 'counter': 0, 'next-state': 2 }, # State 2 { 'type': NORMAL, 'accept': 0, 'epsilon-transitions': [], 'test-transitions': [('b', 3)] }, # State 3 { 'type': LOOP_SENTRY, 'counter': 0, 'min': 3, 'max': 4, 'loop-next': 2, 'loop-break': 4 }, # State 4 { 'type': NORMAL, 'accept': 0, 'epsilon-transitions': [], 'test-transitions': [('c', 5)] }, # State 5 { 'type': NORMAL, 'accept': 1, 'epsilon-transitions': [], 'test-transitions': [] }, ], } #!-- ================================================================== --# class StateTable: def __init__(self): self.table = [] def addState(self, i, state): if len(self.table) > i: step_table = self.table[i] elif len(self.table) == i: step_table = [] self.table.append(step_table) else: raise ValueException, "Invalid step %d" % i if state not in step_table: print "\t\tadding state (%d, %s)" % (i, state) step_table.append(state) else: print "\t\tstate (%d, %d) already present" \ % (i, state) def __str__(self): s = "" for i in range(len(self.table)): s = s + ("State %d: %s\n" \ % (i, str(self.table[i]))) return s #!-- ================================================================== --# def process_regex(regex, input): print "Processing Regex on input \"%s\"" % input st = StateTable() # i == the number of characters of input consumed so far i = 0 init_counters = map(lambda x: 0, range(regex['counters'])) # We start at state 0, with no characters consumed. st.addState(i, (0, init_counters)) match = None while i < len(input) and len(st.table) > i: next_char = input[i] print "step %d, next_char = '%s'" % (i, next_char) for (j, counters) in st.table[i]: print "\tvisiting", (i, (j, counters)) state = regex['states'][j] if state['type'] == NORMAL: if state['accept']: match = input[:i] for k in state['epsilon-transitions']: st.addState(i, (k, counters)) for (t, k) in state['test-transitions']: if next_char == t: st.addState \ (i + 1, (k, counters)) elif state['type'] == LOOP_INIT: l = counters[:] l[state['counter']] = 0 st.addState \ (i, (state['next-state'], l)) elif state['type'] == LOOP_SENTRY: l = counters[:] c = l[state['counter']] c += 1 l[state['counter']] = c if state['min'] < 0 or c >= state['min']: st.addState \ (i, (state['loop-break'], l)) if state['max'] < 0 or c < state['max']: st.addState \ (i, (state['loop-next'], l)) i = i + 1 # If we ate all the input, scan the last state set for accepters. if i == len(input) and len(st.table) > i: for (j, counters) in st.table[i]: state = regex['states'][j] if state['type'] == NORMAL: if state['accept']: match = input[:i] print print "State Table Dump:" print st return match #!-- ================================================================== --# import sys def main(): if len(sys.argv) > 1: input = sys.argv[1] else: input = "abbbc monkey" match = process_regex(regex, input) if match: print "matched \"%s\"" % match else: print "no match" if __name__ == "__main__": main()