Changeset 14577 for lang/python
- Timestamp:
- 06/25/08 10:55:19 (5 months ago)
- Location:
- lang/python/dfareg/dfareg
- Files:
-
- 1 added
- 5 modified
-
__init__.py (modified) (1 diff)
-
algebra.py (modified) (3 diffs)
-
lexer.py (modified) (2 diffs)
-
nfa.py (modified) (1 diff)
-
nfa2dfa.py (modified) (1 diff)
-
nfabuilder.py (added)
Legend:
- Unmodified
- Added
- Removed
-
lang/python/dfareg/dfareg/__init__.py
r12561 r14577 2 2 # -*- coding: utf-8 -*- 3 3 from dfa import * 4 from nfa2dfa import nfa2dfa 5 import lexer 4 6 5 import nfa 6 import lexer 7 def strict_match(regexp, string): 8 nfastate = lexer.compile_to_nfa(regexp) 9 automaton = nfa.NoneterministicFiniteAutomaton(nfastate) 10 return automaton.doesAccept(string) 7 def strict_match_nfa(regexp, string): 8 nfa = lexer.compile_to_nfa(regexp) 9 return nfa.get_runtime().does_accept(string) 10 11 def strict_match_dfa(regexp, string): 12 nfa = lexer.compile_to_nfa(regexp) 13 dfa = nfa2dfa(nfa) 14 return dfa.get_runtime().does_accept(string) 15 16 strict_match = strict_match_dfa 11 17 12 18 def print_nfa(regexp): 13 nfastate = lexer.compile_to_nfa(regexp) 14 for s in nfastate.all_states(): 19 nfa = lexer.compile_to_nfa(regexp) 20 print "[START]" 21 print nfa.start 22 print "[STATES]" 23 for s in nfa.states: 15 24 print s 25 print "[ACCEPTS]" 26 for s in nfa.accepts: 27 print s 28 print "[TRANSITION]" 29 print "not printable" 30 # for k, v in nfafrag.map.iteritems(): 31 # print k, v 16 32 17 33 -
lang/python/dfareg/dfareg/algebra.py
r13020 r14577 2 2 # -*- coding: utf-8 -*- 3 3 4 class AbstractSet(object): 5 def issubset(self, set_): 6 for elem in self: 7 if elem not in set_: return False 8 return True 9 10 4 11 # ベキ集合 5 class PowerSet( object):12 class PowerSet(AbstractSet): 6 13 def __init__(self, a_set): 7 14 try: … … 17 24 except TypeError, te: 18 25 # a_set wasn't a set. 19 return 026 return False 20 27 21 28 def __iter__(self): … … 44 51 45 52 def power(a_set): 53 """ 54 >>> from dfareg import algebra 55 >>> ps = algebra.power([1,2,3]) 56 >>> set([]) in ps 57 True 58 >>> set([1,2,3,4]) in ps 59 False 60 >>> set([1,2,3]) in ps 61 True 62 >>> list(ps) 63 [frozenset([2]), frozenset([3]), frozenset([1, 2]), frozenset([]), frozenset([2, 3]), frozenset([1]), frozenset([1, 3]), frozenset([1, 2, 3])] 64 """ 46 65 return PowerSet(a_set) 66 67 68 class SubsetsIncludingElem(AbstractSet): 69 """ 70 the set of subsets of "super_" including an element of "sub". 71 72 >>> from dfareg import algebra 73 >>> s = algebra.subsets_including_elem(set([1,2,3]), set([1,3])) 74 >>> for i in s: 75 ... print i 76 ... 77 frozenset([3]) 78 frozenset([1, 2]) 79 frozenset([2, 3]) 80 frozenset([1]) 81 frozenset([1, 3]) 82 frozenset([1, 2, 3]) 83 >>> set([1,2]) in s 84 True 85 >>> set([2]) in s 86 False 87 """ 88 def __init__(self, super_, sub): 89 self.super_ = super_ 90 self.sub = sub 91 def __contains__(self, a_set): 92 try: 93 if not a_set.issubset(self.super_): 94 return False 95 if a_set & self.sub: 96 return True 97 except TypeError, te: 98 # a_set wasn't a set. 99 return False 100 def __iter__(self): 101 return iter( 102 set_ 103 for set_ in power(self.super_) 104 if set_ in self 105 ) 106 107 108 def subsets_including_elem(super_, sub): 109 """ 110 >>> from dfareg import algebra 111 >>> sie = algebra.subsets_including_elem(set([1,2,3,4]), set([2,3])) 112 >>> set([1,2]) in sie 113 True 114 >>> set([1,4]) in sie 115 False 116 >>> set([3]) in sie 117 True 118 >>> set([5]) in sie 119 False 120 """ 121 return SubsetsIncludingElem(super_, sub) 122 123 124 class UnicodeSet(AbstractSet): 125 """ 126 ユニコード文字の集合 127 """ 128 def __contains__(self, val): 129 # ordできるものなら文字と見なす 130 try: 131 ord(val) 132 return True 133 except: 134 return False 135 136 unicodeset = UnicodeSet() 137 138 139 def expand_set(set_, func, super_): 140 """ 141 expand set_ with func in super_. 142 func: set_ -> power(super_) 143 144 >>> from dfareg import algebra 145 >>> algebra.expand_set(set([2]), lambda x: set([x+1]), set([1,2,3])) 146 set([2, 3]) 147 >>> algebra.expand_set(set([2]), lambda x: set([x-1]), set([1,2,3])) 148 set([1, 2]) 149 >>> algebra.expand_set(set([4]), lambda x: set([x-1, x*2]), set([1,2,3,4,5,6,7,8,9,10])) 150 set([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) 151 >>> algebra.expand_set(set([1]), lambda x: set([x*3, x*2]), set([1,2,3,4,5,6,7,8,9,10])) 152 set([1, 2, 3, 4, 6, 8, 9]) 153 """ 154 que = set( set_ ) 155 returns = set() 156 while que: 157 elem = que.pop() 158 if elem in returns: 159 continue 160 returns.add(elem) 161 nexts = func(elem) & super_ 162 que |= nexts 163 164 return returns -
lang/python/dfareg/dfareg/lexer.py
r12561 r14577 3 3 #あ(い\|う|え)*お 4 4 5 from nfa import NFAState, NoneterministicFiniteAutomaton 5 from nfabuilder import NFABuilder 6 import algebra 6 7 7 8 VALUE = 0 … … 59 60 60 61 61 # NFAを組み立てる 62 def assemble_value(value): 63 a = NFAState() 64 b = NFAState(isAcceptState = True) 65 a.connect(value, b) 66 return a 62 class AssembleNFA(object): 63 """ 64 NFAを組み立てる 67 65 68 def assemble_union(n1, n2): 69 a = NFAState()70 a.connect(None, n1)71 a.connect(None, n2)72 return a66 E = E union T | T 67 T = T concat S | S 68 S = S * | S 69 F = ( E ) | 文字 70 """ 73 71 74 def assemble_concat(n1, n2): 75 #repr(n1) 76 #repr(n2) 77 #print "CONCAT", n1, n2 # ←バグル。なんで??? 78 for state in n1.all_accept_states(): 79 #repr(state) 80 #print "ACCEPT", state 81 state.connect(None, n2) 82 state.isAcceptState = False 83 return n1 72 def __init__(self, talkens): 73 self.stack = list(talkens) 74 self.builder = NFABuilder(algebra.unicodeset) 75 76 def _assemble_value(self, value): 77 frag = self.builder.new_fragment() 78 a = frag.new_state() 79 b = frag.new_state() 80 frag.connect(a, value, b) 81 frag.accepts.add(b) 82 frag.start = a 83 return frag 84 85 def _assemble_union(self, frag1, frag2): 86 frag = self.builder.merge_fragment(frag1, frag2) 87 a = frag.new_state() 88 frag.connect(a, None, frag1.start) 89 frag.connect(a, None, frag2.start) 90 frag.start = a 91 92 return frag 84 93 85 94 86 def assemble_star(n): 87 for state in n.all_accept_states(): 88 state.connect(None, n) 89 a = NFAState(isAcceptState = True) 90 a.connect(None, n) 91 return a 95 def _assemble_concat(self, frag1, frag2): 96 frag = self.builder.merge_fragment(frag1, frag2) 97 98 for state in frag1.accepts: 99 frag.connect(state, None, frag2.start) 100 frag.accepts.remove(state) 101 102 frag.start = frag1.start 103 104 return frag 92 105 93 106 94 # E = E union T | T 95 # T = T concat S | S 96 # S = S * | S 97 # F = ( E ) | 文字 107 def _assemble_star(self, frag): 108 for state in frag.accepts: 109 frag.connect(state, None, frag.start) 98 110 99 stack = [] 111 a = frag.new_state() 112 frag.accepts.add(a) 113 frag.connect(a, None, frag.start) 100 114 101 def expr(): 102 c = term() 103 while len(stack) > 0 and stack[0].kind == OPE_UNION: 104 stack.pop(0) 105 c = assemble_union(c, term()) 106 return c 115 frag.start = a 107 116 108 def term(): 109 c = star() 110 while len(stack) > 0 and stack[0].kind == OPE_CONCAT: 111 stack.pop(0) 112 c = assemble_concat(c, star()) 113 return c 117 return frag 114 118 115 def star():116 c = fact()117 while len(stack) > 0 and stack[0].kind == OPE_STAR:118 stack.pop(0)119 c = assemble_star(c)120 return c121 119 122 def fact(): 123 t = stack.pop(0) 124 if t.kind == VALUE: 125 return assemble_value(t.value) 126 if t.kind == LPAREN: 127 c = expr() 128 if not stack.pop(0).kind == RPAREN: 129 raise "invalid paren" 120 def _expr(self): 121 c = self._term() 122 while len(self.stack) > 0 and self.stack[0].kind == OPE_UNION: 123 self.stack.pop(0) 124 c = self._assemble_union(c, self._term()) 130 125 return c 131 raise "invalid: %s" % t 126 127 def _term(self): 128 c = self._star() 129 while len(self.stack) > 0 and self.stack[0].kind == OPE_CONCAT: 130 self.stack.pop(0) 131 c = self._assemble_concat(c, self._star()) 132 return c 133 134 def _star(self): 135 c = self._fact() 136 while len(self.stack) > 0 and self.stack[0].kind == OPE_STAR: 137 self.stack.pop(0) 138 c = self._assemble_star(c) 139 return c 140 141 def _fact(self): 142 t = self.stack.pop(0) 143 if t.kind == VALUE: 144 return self._assemble_value(t.value) 145 if t.kind == LPAREN: 146 c = self._expr() 147 if not self.stack.pop(0).kind == RPAREN: 148 raise "invalid paren" 149 return c 150 raise "invalid: %s" % t 151 152 def assemble(self): 153 root_fragment = self._expr() 154 nfa = self.builder.build(root_fragment) 155 156 return nfa 132 157 133 158 def compile_to_nfa(regexp): 134 for s in parse(regexp):135 stack.append(s)136 return expr();159 talkens = parse(regexp) 160 assembler = AssembleNFA(talkens) 161 return assembler.assemble() 137 162 138 163 -
lang/python/dfareg/dfareg/nfa.py
r12561 r14577 2 2 # -*- coding: utf-8 -*- 3 3 4 class NoneterministicFiniteAutomaton(object): 5 def __init__(self, start_state): 6 self._start_state = start_state 7 self.current_state_set = None 8 self.reset() 4 from algebra import expand_set 9 5 10 def transition(self, alphabet): 11 next_state_set = set() 12 for state in self.current_state_set: 13 next_state_set = next_state_set.union( 14 state.next_state_set(alphabet)) 15 self.current_state_set = next_state_set 6 class NFARuntime(object): 7 def __init__(self, NFA): 8 self.NFA = NFA 9 self.cur_states = self._expand_epsilon( set([ self.NFA.start ]) ) 16 10 17 def doesAccept(self, input): 11 def _expand_epsilon(self, states): 12 """ 13 与えられた状態集合からε(None)で到達できるsetを得る 14 """ 15 return expand_set( 16 states, 17 lambda e: self.NFA.transition(e, None), 18 self.NFA.states 19 ) 20 21 def do_transition(self, char): 22 if not char in self.NFA.alphabet: 23 raise "bad char: %s" % char 24 25 next_states = set() 26 for state in self.cur_states: 27 next_states = next_states.union( 28 self.NFA.transition(state, char) 29 ) 30 next_states = self._expand_epsilon(next_states) 31 32 self.cur_states = next_states 33 34 if not self.cur_states.issubset(self.NFA.states): 35 raise "bad state. (maybe bad transition function.)" 36 37 def is_accept_state(self): 38 for state in self.cur_states: 39 if state in self.NFA.accepts: return True 40 return False 41 42 def does_accept(self, input): 43 """ 44 Check whether an input is accepted by this automaton. 45 """ 18 46 for alphabet in input: 19 self.transition(alphabet) 20 ret = False 21 for state in self.current_state_set: 22 if state.isAcceptState: 23 ret = True 24 break 25 self.reset() 26 return ret 27 28 def reset(self): 29 initial_set = set([self._start_state]) 30 initial_set = initial_set.union( 31 self._start_state.epsilon_reachable_set()) 32 self.current_state_set = initial_set 33 34 def __repr__(self): 35 ret = repr(self._start_state) + "\n" 36 # ret += "current state %d\n" % self.currentState.id 37 return ret 38 39 class NFAState(object): 40 idCount = 0 41 @classmethod 42 def uniqueID(clazz): 43 NFAState.idCount += 1 44 return NFAState.idCount 45 46 def __init__(self, isAcceptState = False): 47 self.id = NFAState.uniqueID() 48 self.isAcceptState = isAcceptState 49 self._arrows = {} 50 51 def connect(self, alphabet, state): 52 bag = self._arrows.setdefault(alphabet, set()) 53 bag.add(state) 54 55 def _all_states(self, set): 56 if self in set: return [] 57 set.add(self) 58 ret = [self] 59 bags = (bag for bag in self._arrows.itervalues()) 60 for b in bags: 61 for elem in b: 62 ret += elem._all_states(set) 63 return ret 64 65 def all_states(self): 66 return self._all_states(set()) 67 68 def all_accept_states(self): 69 states = self.all_states() 70 return filter(lambda s : s.isAcceptState, states) 47 self.do_transition(alphabet) 48 return self.is_accept_state() 71 49 72 50 51 class NondeterministicFiniteAutomaton(object): 52 """ 53 An nondeterministic finite automaton implementation. 54 It's just for fun. So, you shouldn't use this in your products. 55 """ 56 def __init__(self, 57 states , # a finite set of states 58 alphabet , # a finite set called the alphabet 59 transition , # a transition function 60 start , # a start state 61 accepts , # a set of accept states 62 ): 63 """ 64 Instanciate new deterministic finite automaton 65 from 5-tuple. 66 """ 67 self.states = states 68 self.alphabet = alphabet 69 self.transition = transition 70 self.start = start 71 self.accepts = accepts 73 72 74 def next_state_set(self, alphabet): 75 ret = self._arrows.get(alphabet, set()) 76 extension = set() 77 for state in ret: 78 extension = extension.union( state.epsilon_reachable_set() ) 79 ret = ret.union( extension ) 80 return ret 73 self._check_parameter() 81 74 82 def epsilon_reachable_set(self): 83 return self._epsilon_reachable_set(set()) 75 def _check_parameter(self): 76 """ 77 check whether params inputted are correct type. 78 """ 79 if not self.start in self.states: 80 raise "bad start parameter." 81 if not self.accepts.issubset(self.states): 82 raise "bad accepts parameter." 84 83 85 def _epsilon_reachable_set(self, ret): 86 for k, v in self._arrows.iteritems(): 87 if not k == None: 88 continue 89 for state in v: 90 if state in ret: 91 continue 92 ret.add(state) 93 state._epsilon_reachable_set(ret) 94 return ret 95 96 # ↓以下、斎木的にしない方がいい。直せ。 97 # _printed = {} 98 # def __repr__(self): 99 # if NFAState._printed.has_key(self.id): 100 # return "<%d>" % self.id 101 # NFAState._printed[self.id] = True 102 103 # ret = u"" 104 # for k, v in self._arrows.iteritems(): 105 # status = [] 106 # for elem in v: 107 # status.append(repr(elem).decode('utf-8')) 108 # ret = u"%s(%s=>%s)" % (ret, k, u','.join(status)) 109 # accept = u"" 110 # if self.isAcceptState: accept = u"(o)" 111 # return ( u"[%sid-%d:%s]" % (accept, self.id, ret) ).encode("utf-8") 112 113 def __repr__(self): 114 ret = u"" 115 if self.isAcceptState: ret += u"*" 116 ret += "%s\n" % self.id 117 for k, v in self._arrows.iteritems(): 118 ids = [] 119 for elem in v: # v is bag 120 ids.append( str(elem.id) ) 121 ret += u"\t%s => %s\n" % (k, u','.join(ids) ) 122 123 return ret.encode('utf-8') 84 def get_runtime(self): 85 return NFARuntime(self) -
lang/python/dfareg/dfareg/nfa2dfa.py
r12561 r14577 2 2 # -*- coding: utf-8 -*- 3 3 4 import lexer 5 from dfa import DFAState4 from dfa import DeterministicFiniteAutomaton 5 from algebra import power, subsets_including_elem, expand_set 6 6 7 def _epsilon_extension(nfa_states, ret): 8 for st in nfa_states: 9 if st in ret: 10 continue 11 ret.add(st) 12 _epsilon_extension(st.epsilon_reachable_set(), ret) 7 def nfa2dfa(nfa): 8 def epsilon_expand(set_): 9 return expand_set( 10 set_, 11 lambda e: nfa.transition(e, None), 12 nfa.states 13 ) 13 14 14 def nfaset2dfakey(set): 15 ids = [e.id for e in set] 16 ids.sort() 17 return "-".join( [str(id) for id in ids] ) 15 def transition(set_, alpha): 16 ret = set() 17 for elem in set_: 18 ret |= epsilon_expand( nfa.transition(elem, alpha) ) 19 return ret 18 20 19 def epsilon_extension(nfa_states): 20 # εで行ける範囲のsetを集める 21 ret = set() 22 _epsilon_extension(nfa_states, ret) 23 return ret 24 25 def is_accept_set(start_set): 26 for nfa in start_set: 27 if nfa.isAcceptState: return True 28 return False 29 30 _dfa_state_map = {} 31 def nfa2dfa(nfa_state): 32 # 初期状態を作る 33 start_set = epsilon_extension( set([nfa_state]) ) 34 start_state_key = nfaset2dfakey(start_set) 35 start_state = DFAState( 36 isAcceptState = is_accept_set(start_set) 37 ) 38 print start_state 39 _dfa_state_map[start_state_key] = start_state 40 41 # NFAの状態遷移関数を変換 42 for nfa_state in nfa_state.all_states(): 43 44 45 21 return DeterministicFiniteAutomaton( 22 power(nfa.states), 23 nfa.alphabet, 24 transition, 25 epsilon_expand( set([ nfa.start ]) ), 26 subsets_including_elem(nfa.states, nfa.accepts) 27 ) 46 28 47 29
![(please configure the [header_logo] section in trac.ini)](/share/chrome/site/your_project_logo.png)