#!/usr/bin/python ## Copyright (C) 2004 Crutcher Dunnavant ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. #!-- ================================================================== --# # Pattern: a(b){3,4}c # Key: # (X) := 'if (X)' # [X] := 'do (X)' # Graph: # +--(k0<4)--+ # | | # v | # | # s0 -('a')-> s1 -> s2 -('b')-> s3 -(k0>=2)-> s4 -('k')-> s5 # [k0=0] [k0++] NORMAL=0 LOOP_INIT=1 LOOP_SENTRY=2 regex = { 'counters' : 1, 'char-classes': [ (1, 97, 97), (1, 98, 98), (1, 99, 99), ], 'states': [ # State 0 { 'type': NORMAL, 'accept': 0, 'epsilon-transitions': [], 'test-transitions': [ (0, 1), ] }, # State 1 { 'type': LOOP_INIT, 'counter': 0, 'next-state': 2 }, # State 2 { 'type': NORMAL, 'accept': 0, 'epsilon-transitions': [], 'test-transitions': [ (1, 3) ] }, # State 3 { 'type': LOOP_SENTRY, 'counter': 0, 'min': 3, 'max': 4, 'loop-next': 2, 'loop-break': 4 }, # State 4 { 'type': NORMAL, 'accept': 0, 'epsilon-transitions': [], 'test-transitions': [ (2, 5) ] }, # State 5 { 'type': NORMAL, 'accept': 1, 'epsilon-transitions': [], 'test-transitions': [] }, ], } #!-- ================================================================== --# class StateTable: def __init__(self): self.table = [] def addState(self, i, state): if len(self.table) > i: step_table = self.table[i] elif len(self.table) == i: step_table = [] self.table.append(step_table) else: raise ValueException, "Invalid step %d" % i if state not in step_table: print "\t\tadding state (%d, %s)" % (i, state) step_table.append(state) else: print "\t\tstate (%d, %d) already present" \ % (i, state) def __str__(self): s = "" for i in range(len(self.table)): s = s + ("State %d: %s\n" \ % (i, str(self.table[i]))) return s #!-- ================================================================== --# def test_char(char, class_desc): c = ord(char) n = class_desc[0] for i in range(n): start = class_desc[(i*2) + 1] stop = class_desc[(i*2) + 2] if c >= start and c <= stop: return True return False def process_regex(regex, input): print "Processing Regex on input \"%s\"" % input st = StateTable() # i == the number of characters of input consumed so far i = 0 init_counters = map(lambda x: 0, range(regex['counters'])) # We start at state 0, with no characters consumed. st.addState(i, (0, init_counters)) match = None while i < len(input) and len(st.table) > i: next_char = input[i] print "step %d, next_char = '%s'" % (i, next_char) for (j, counters) in st.table[i]: print "\tvisiting", (i, (j, counters)) state = regex['states'][j] if state['type'] == NORMAL: if state['accept']: match = input[:i] for k in state['epsilon-transitions']: st.addState(i, (k, counters)) for (t, k) in state['test-transitions']: char_class = regex['char-classes'][t] if test_char(next_char, char_class): st.addState \ (i + 1, (k, counters)) elif state['type'] == LOOP_INIT: l = counters[:] l[state['counter']] = 0 st.addState \ (i, (state['next-state'], l)) elif state['type'] == LOOP_SENTRY: l = counters[:] k = l[state['counter']] k += 1 l[state['counter']] = k if k >= state['min']: st.addState \ (i, (state['loop-break'], l)) # A max of 0 would never be visited, so # this value is used to indicate that there # is no upper bound. if state['max'] == 0 or k < state['max']: st.addState \ (i, (state['loop-next'], l)) i = i + 1 # If we ate all the input, scan the last state set for accepters. if i == len(input) and len(st.table) > i: for (j, counters) in st.table[i]: state = regex['states'][j] if state['type'] == NORMAL: if state['accept']: match = input[:i] print print "State Table Dump:" print st return match #!-- ================================================================== --# import sys def main(): if len(sys.argv) > 1: input = sys.argv[1] else: input = "abbbc monkey" match = process_regex(regex, input) if match: print "matched \"%s\"" % match else: print "no match" if __name__ == "__main__": main()