pretty.ml 6.37 KB
Newer Older
1
2
3
4
5
6
type 'a regexp = 
  | Empty
  | Epsilon
  | Seq of 'a regexp * 'a regexp
  | Alt of 'a regexp * 'a regexp
  | Star of 'a regexp
7
  | Plus of 'a regexp
8
9
  | Trans of 'a

10
11
12
13
14
type 'a re =
  | RSeq of 'a re list
  | RAlt of 'a re list
  | RTrans of 'a
  | RStar of 'a re
15

16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
  | RPlus of 'a re


module Decompile(H : Hashtbl.S)(S : Set.OrderedType) = struct
  let rec compare s1 s2 = 
    if s1 == s2 then 0 
    else match (s1,s2) with
      | RSeq x, RSeq y | RAlt x, RAlt y -> compare_list x y
      | RSeq _, _ -> -1 | _, RSeq _ -> 1
      | RAlt _, _ -> -1 | _, RAlt _ -> 1
      | RTrans x, RTrans y -> S.compare x y
      | RTrans _, _ -> -1 | _, RTrans _ -> 1
      | RStar x, RStar y | RPlus x, RPlus y -> compare x y
      | RStar _, _ -> -1 | _, RStar _ -> 1
  and compare_list l1 l2 = match (l1,l2) with
    | x1::y1, x2::y2 -> 
	let c = compare x1 x2 in if c = 0 then compare_list y1 y2 else c
    | [], [] -> 0
    | [], _ -> -1 | _, [] -> 1

  let rec dump ppf = function
    | RSeq l -> Format.fprintf ppf "Seq(%a)" dump_list l
    | RAlt l -> Format.fprintf ppf "Alt(%a)" dump_list l
    | RStar r -> Format.fprintf ppf "Star(%a)" dump r
    | RPlus r -> Format.fprintf ppf "Plus(%a)" dump r
    | RTrans x -> Format.fprintf ppf "Trans"
  and dump_list ppf = function
    | [] -> ()
    | [h] ->  Format.fprintf ppf "%a" dump h
    | h::t ->  Format.fprintf ppf "%a,%a" dump h dump_list t

  let rec factor accu l1 l2 = match (l1,l2) with
    | (x1::y1,x2::y2) when compare x1 x2 = 0 -> factor (x1::accu) y1 y2 
    | (l1,l2) -> (accu,l1,l2)
   

  let rec regexp = function
    | RSeq l ->
	let rec aux = function 
	    | [h] -> regexp h 
	    | h::t -> Seq (regexp h,aux t) 
	    | [] -> Epsilon in
	aux l
    | RAlt l ->
	let rec aux = function 
	    | [h] -> regexp h 
	    | h::t -> Alt (regexp h,aux t) 
	    | [] -> Empty in
	aux l
    | RTrans x -> Trans x
    | RStar r -> Star (regexp r)
    | RPlus r -> Plus (regexp r)

  let epsilon = RSeq []
  let empty = RAlt []

  let rec nullable = function
    | RAlt l -> List.exists nullable l
    | RSeq l -> List.for_all nullable l
    | RPlus r -> nullable r
    | RStar _ -> true
    | RTrans _ -> false

  let has_epsilon =
    List.exists (function RSeq [] -> true | _ -> false)
81

82
83
84
85
86
87
88
89
90
91
92
  let remove_epsilon =
    List.filter (function RSeq [] -> false | _ -> true)

  let rec merge l1 l2 = match (l1,l2) with
    | x1::y1, x2::y2 ->
	let c = compare x1 x2 in
	if c = 0 then x1::(merge y1 y2)
	else if c < 0 then x1::(merge y1 l2)
	else x2::(merge l1 y2)
    | [], l | l,[] -> l

93
94
95
96
97
98
99
100
101
102
  let rec sub l1 l2 =
    (compare l1 l2 = 0) ||
    match (l1,l2) with
      | RSeq [x], y -> sub x y
      | RPlus x, (RStar y | RPlus y) -> sub x y
      | RSeq (x::y), (RPlus z | RStar z) -> 
	  (sub x z) && (sub (RSeq y) (RStar z))
      | x, (RStar y | RPlus y) -> sub x y
      | _ -> false

103
104
105
106
107
108
109

  let rec absorb_epsilon = function
    | RPlus r :: l -> RStar r :: l
    | (r :: _) as l when nullable r -> l
    | r :: l -> r :: (absorb_epsilon l)
    | [] -> [ epsilon ]

110
111
112
113
114
115
116
117
  let rec simplify_alt accu = function
    | [] -> List.rev accu
    | x::rest -> 
	if (List.exists (sub x) accu) || (List.exists (sub x) rest)
	then simplify_alt accu rest
	else simplify_alt (x::accu) rest
	

118
119
120
121
122
123
124
125
  let alt s1 s2 =
    let s1 = match s1 with RAlt x -> x | x -> [x] in
    let s2 = match s2 with RAlt x -> x | x -> [x] in
    let l = merge s1 s2 in
    let l = 
      if has_epsilon l 
      then absorb_epsilon (remove_epsilon l)
      else l in
126
    let l = simplify_alt [] l in
127
128
129
130
    match l with
      | [x] -> x
      | l -> RAlt l

131
132
133
134
135
136
137
138
139
140
141
  let rec simplify_seq = function
    | RStar x :: ((RStar y | RPlus y) :: _ as rest) 
	when compare x y = 0 ->
	simplify_seq rest
    | RPlus x :: (RPlus y :: _ as rest) 
	when compare x y = 0 ->
	simplify_seq (x :: rest)
    | RPlus x :: (RStar y :: rest) when compare x y = 0 ->
	simplify_seq (RPlus y :: rest)
    | x :: rest -> x :: (simplify_seq rest)
    | [] -> []
142
143
144
145
146
147
148
149
150
151

  let rec seq s1 s2 =
    match (s1,s2) with
      | RAlt [], _ | _, RAlt [] -> epsilon
      | RSeq [], x | x, RSeq [] -> x
      | _ ->
	  let s1 = match s1 with RSeq x -> x | x -> [x] in
	  let s2 = match s2 with RSeq x -> x | x -> [x] in
	  find_plus [] (s1 @ s2)
  and find_plus before = function
152
153
    | [] -> 
	(match before with [h] -> h | l -> RSeq (simplify_seq (List.rev l)))
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
    | (RStar s)::after ->
	let star = match s with RSeq x -> x | x -> [x] in
	let (right,star',after') = factor [] star after in
	let (left,star'',before') = factor [] (List.rev star') before in
	(match star'' with
	   | [] ->
	       let s = find_plus [] (left @ (List.rev right)) in
	       find_plus ((RPlus s)::before') after'
	   | _  -> 
	       find_plus ((RStar s)::before) after)
    | x::after -> find_plus (x::before) after

  let star = function
    | RAlt [] | RSeq [] -> epsilon
    | RStar _ as s -> s
169
    | RPlus s -> RStar s
170
    | s -> RStar s
171
172
173

  type 'a slot = { 
    mutable weight : int;
174
175
176
    mutable outg : ('a slot * 'a re) list;
    mutable inc  : ('a slot * 'a re) list;
    mutable self : 'a re;
177
178
    mutable ok   : bool
  }
179
180
  let alloc_slot () = 
    { weight = 0; outg = []; inc = []; self = empty; ok = false }
181
182
183
184
185
186
187

  let decompile trans n0 =
    let slot_table = H.create 121 in
    let slots = ref [] in
    let slot n =
      try H.find slot_table n
      with Not_found -> 
188
	let s = alloc_slot () in
189
190
191
192
193
194
195
196
197
	H.add slot_table n s;
	slots := s :: !slots;
	s in

    let add_trans s1 s2 t =
      if s1 == s2 
      then s1.self <- alt s1.self t
      else (s1.outg <- (s2,t) :: s1.outg; s2.inc <- (s1,t) :: s2.inc) in

198
199
    let final = alloc_slot () in
    let initial = alloc_slot () in
200
201
202
203
204
205

    let rec conv n =
      let s = slot n in
      if not s.ok then (
	s.ok <- true;
	let (tr,f) = trans n in
206
207
	if f then add_trans s final epsilon;
	List.iter (fun (l,dst) -> add_trans s (conv dst) (RTrans l)) tr;
208
209
210
211
212
213
214
215
216
217
218
219
220
221
      );
      s in

    let elim s =
      s.weight <- (-1);
      let loop = star s.self in
      List.iter 
	(fun (s1,t1) -> if s1.weight >= 0 then 
	   List.iter 
	     (fun (s2,t2) -> if s2.weight >= 0 then 
		add_trans s1 s2 (seq t1 (seq loop t2)))
	     s.outg
	) s.inc in

222
    add_trans initial (conv n0) epsilon;
223
224
225
    List.iter 
      (fun s -> s.weight <- List.length s.inc * List.length s.outg)
      !slots;
226
227
    let slots = 
      List.sort (fun s1 s2 -> Pervasives.compare s1.weight s2.weight) !slots in
228
    List.iter elim slots;
229
230
    let r = 
      List.fold_left 
231
232
	(fun accu (s,t) -> 
	   if s == final then alt accu t else accu)
233
234
235
	empty
	initial.outg in
    regexp r
236
end