#!/usr/bin/python ## Copyright (C) 2004 Crutcher Dunnavant ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. #!-- ================================================================== --# # Pattern: a(b)*c # Graph: # +----------------(e)-----------------+ # | | # v | # | # [s0] -('a')-> [s1] -(e)-> [s2] -('b')-> [s3] -(e)-> [s4] -('c')-> [s5] # | # | ^ # | | # +----------------(e)-----------------+ regex = { 'accept-states': [5], 'states': [ # State 0 { 'epsilon-transitions': [], 'test-transitions': [ ('a', 1), ] }, # State 1 { 'epsilon-transitions': [2, 4], 'test-transitions': [] }, # State 2 { 'epsilon-transitions': [], 'test-transitions': [ ('b', 3) ] }, # State 3 { 'epsilon-transitions': [4], 'test-transitions': [] }, # State 4 { 'epsilon-transitions': [1], 'test-transitions': [ ('c', 5) ] }, # State 5 { 'epsilon-transitions': [], 'test-transitions': [] }, ], } #!-- ================================================================== --# class StateTable: def __init__(self): self.table = [] def addState(self, i, state): if len(self.table) > i: step_table = self.table[i] elif len(self.table) == i: step_table = [] self.table.append(step_table) else: raise ValueException, "Invalid step %d" % i if state not in step_table: print "\t\tadding state (%d, %d)" % (i, state) step_table.append(state) else: print "\t\tstate (%d, %d) already present" \ % (i, state) def __str__(self): s = "" for i in range(len(self.table)): s = s + ("State %d: %s\n" \ % (i, str(self.table[i]))) return s #!-- ================================================================== --# def process_regex(regex, input): print "Processing Regex on input \"%s\"" % input st = StateTable() # i == the number of characters of input consumed so far i = 0 # We start at state 0, with no characters consumed. st.addState(i, 0) states = regex['states'] while i < len(input) and len(st.table) > i: next_char = input[i] print "step %d, next_char = '%s'" % (i, next_char) for state in st.table[i]: print "\tvisiting", (i, state) for s in states[state]['epsilon-transitions']: st.addState(i, s) for (t, s) in states[state]['test-transitions']: if next_char == t: st.addState(i + 1, s) i = i + 1 print print "State Table Dump:" print st match = None for i in range(len(st.table)): for s in st.table[i]: if s in regex['accept-states']: match = input[:i] return match #!-- ================================================================== --# import sys def main(): if len(sys.argv) > 1: input = sys.argv[1] else: input = "abbbc monkey" match = process_regex(regex, input) if match: print "matched \"%s\"" % match else: print "no match" if __name__ == "__main__": main()