Commit 7099feb8 authored by Pietro Abate's avatar Pietro Abate
Browse files

[r2005-02-23 23:22:59 by afrisch] Backtrack new decompilation of regexps

Original author: afrisch
Date: 2005-02-23 23:22:59+00:00
parent 0b5c3342
...@@ -7,14 +7,7 @@ type 'a regexp = ...@@ -7,14 +7,7 @@ type 'a regexp =
| Plus of 'a regexp | Plus of 'a regexp
| Trans of 'a | Trans of 'a
(*
type 'a re =
| RSeq of 'a re list
| RAlt of 'a re list
| RTrans of 'a
| RStar of 'a re
| RPlus of 'a re
*)
module type S = sig module type S = sig
type t type t
...@@ -25,6 +18,8 @@ end ...@@ -25,6 +18,8 @@ end
module Decompile(H : Hashtbl.S)(S : S) = struct module Decompile(H : Hashtbl.S)(S : S) = struct
(* Now attempt to simplify regexp. Does not work.... disabled *)
module A = struct
type atom = type atom =
| AStar of trie | AStar of trie
| APlus of trie | APlus of trie
...@@ -39,7 +34,7 @@ module Decompile(H : Hashtbl.S)(S : S) = struct ...@@ -39,7 +34,7 @@ module Decompile(H : Hashtbl.S)(S : S) = struct
uid *) uid *)
type 'a re = trie type re = trie
...@@ -51,6 +46,12 @@ module Decompile(H : Hashtbl.S)(S : S) = struct ...@@ -51,6 +46,12 @@ module Decompile(H : Hashtbl.S)(S : S) = struct
| AEps -> true | AEps -> true
| ABranch (_,_,_,n,_,_) -> n | ABranch (_,_,_,n,_,_) -> n
let nullable_atom = function
| AStar _ -> true
| APlus t -> assert(not (nullable t)); false
| ATrans _ -> false
let nullable_atom_list = List.exists nullable_atom
(* (*
let size = function let size = function
| AEmpty -> 0 | AEmpty -> 0
...@@ -116,16 +117,18 @@ module Decompile(H : Hashtbl.S)(S : S) = struct ...@@ -116,16 +117,18 @@ module Decompile(H : Hashtbl.S)(S : S) = struct
with Not_found -> with Not_found ->
let h = T.hash b in let h = T.hash b in
incr uid; incr uid;
let x = ABranch (a,ay,an,nullable an,h,!uid) in let nullable =
nullable an || ((nullable ay) && (nullable_atom_list a)) in
let x = ABranch (a,ay,an,nullable,h,!uid) in
HT.add branches b x; HT.add branches b x;
x x
let branch a ay an = let branch a ay an =
assert (List.length a > 0); (* assert (List.length a > 0);
match ay,an with match ay,an with
| ABranch (b,by,bn,_,_,_), AEmpty -> branch0 (a @ b) by bn | ABranch (b,by,bn,_,_,_), AEmpty -> branch0 (a @ b) by bn
| AEmpty, AEmpty -> AEmpty | AEmpty, AEmpty -> AEmpty
| _ -> branch0 a ay an | _ -> *) branch0 a ay an
let rec opt = function let rec opt = function
| ABranch (a,ay,an,_,_,_) -> branch0 a ay (opt an) | ABranch (a,ay,an,_,_,_) -> branch0 a ay (opt an)
...@@ -154,6 +157,9 @@ module Decompile(H : Hashtbl.S)(S : S) = struct ...@@ -154,6 +157,9 @@ module Decompile(H : Hashtbl.S)(S : S) = struct
| AEmpty | AEps -> AEps | AEmpty | AEps -> AEps
| t -> branch0 [ AStar t ] r AEmpty | t -> branch0 [ AStar t ] r AEmpty
let plus x =
if nullable x then AStar x else APlus x
(* (AB)*A ==> A(BA)* (* (AB)*A ==> A(BA)*
BA(BA)* ==> (BA)+ *) BA(BA)* ==> (BA)+ *)
let rec create_plus ctx = function let rec create_plus ctx = function
...@@ -165,7 +171,7 @@ module Decompile(H : Hashtbl.S)(S : S) = struct ...@@ -165,7 +171,7 @@ module Decompile(H : Hashtbl.S)(S : S) = struct
let rec aux accu = function let rec aux accu = function
| ctx,[] -> | ctx,[] ->
create_plus create_plus
(APlus (apply_factor accu AEps) :: ctx) (plus (apply_factor accu AEps) :: ctx)
follow follow
| a::b,c::d when equal_atom a c -> aux (a::accu) (b,d) | a::b,c::d when equal_atom a c -> aux (a::accu) (b,d)
| _ -> create_plus (AStar x :: ctx) follow | _ -> create_plus (AStar x :: ctx) follow
...@@ -192,22 +198,24 @@ module Decompile(H : Hashtbl.S)(S : S) = struct ...@@ -192,22 +198,24 @@ module Decompile(H : Hashtbl.S)(S : S) = struct
| AEps,t | t,AEps -> opt t | AEps,t | t,AEps -> opt t
| ABranch (_,_,_,_,_,id1), ABranch (_,_,_,_,_,id2) when id1 = id2 -> t1 | ABranch (_,_,_,_,_,id1), ABranch (_,_,_,_,_,id2) when id1 = id2 -> t1
| ABranch (al,ay,an,_,_,_), ABranch (bl,by,bn,_,_,_) -> | ABranch (al,ay,an,_,_,_), ABranch (bl,by,bn,_,_,_) ->
(* br al ay (alt an t2) *)
let (accu,_,al,bl) = factor [] [] al bl in let (accu,_,al,bl) = factor [] [] al bl in
match accu with match accu with
| [] -> | [] ->
(* let u = br al ay (alt an t2) (* let u = br al ay (alt an t2)
and v = br bl by (alt bn t1) in and v = br bl by (alt bn t1) in
choose u v *) choose u v *)
br al ay (alt an t2) branch al ay (alt an t2)
| _ -> | _ ->
let t1 = br al ay AEps in let t1 = br al ay AEps in
let t2 = br bl by AEps in let t2 = br bl by AEps in
branch accu (alt t1 t2) (alt an bn) branch accu (alt t1 t2) (alt an bn)
and br a ay an = and br a ay an =
match a with (* match a with
| [] -> alt ay an | [] -> alt ay an
| l -> branch a ay an | l -> *) branch a ay an
and seq t1 t2 = match t1,t2 with and seq t1 t2 = match t1,t2 with
| AEmpty,_|_,AEmpty -> AEmpty | AEmpty,_|_,AEmpty -> AEmpty
...@@ -245,18 +253,15 @@ module Decompile(H : Hashtbl.S)(S : S) = struct ...@@ -245,18 +253,15 @@ module Decompile(H : Hashtbl.S)(S : S) = struct
if (size r' < size r) then minim_trie r' else r if (size r' < size r) then minim_trie r' else r
let rec regexp r = let rec regexp r =
let r = minim_trie r in (* let r = minim_trie r in *)
match r with match r with
| AEmpty -> Empty | AEmpty -> Empty
| AEps -> Epsilon | AEps -> Epsilon
| ABranch (a,ay,an,_,_,_) when ay == an -> | ABranch (a,ay,an,_,_,_) when ay == an ->
let a = create_plus [] a in (* let a = create_plus [] a in *)
rseq (ralt (regexp_atom_list a) Epsilon) (regexp ay)
| ABranch (a,ay,an,_,_,_) when ay == an ->
let a = create_plus [] a in
rseq (ralt (regexp_atom_list a) Epsilon) (regexp ay) rseq (ralt (regexp_atom_list a) Epsilon) (regexp ay)
| ABranch (a,ay,an,_,_,_) -> | ABranch (a,ay,an,_,_,_) ->
let a = create_plus [] a in (* let a = create_plus [] a in *)
ralt (rseq (regexp_atom_list a) (regexp ay)) (regexp an) ralt (rseq (regexp_atom_list a) (regexp ay)) (regexp an)
and regexp_atom_list = function and regexp_atom_list = function
...@@ -267,7 +272,24 @@ module Decompile(H : Hashtbl.S)(S : S) = struct ...@@ -267,7 +272,24 @@ module Decompile(H : Hashtbl.S)(S : S) = struct
| APlus t -> Plus (regexp t) | APlus t -> Plus (regexp t)
| ATrans t -> Trans t | ATrans t -> Trans t
(* let () = () and (* Hack to avoid "let regexp ..." (ulex construction) *)
regexp r =
(* Need to clear hashtable because S.t objects might have different
meaning across calls *)
let re = regexp r in
HT.clear branches;
re
end
module B = struct
type re =
| RSeq of re list
| RAlt of re list
| RTrans of S.t
| RStar of re
| RPlus of re
let rec compare s1 s2 = let rec compare s1 s2 =
if s1 == s2 then 0 if s1 == s2 then 0
else match (s1,s2) with else match (s1,s2) with
...@@ -319,6 +341,7 @@ module Decompile(H : Hashtbl.S)(S : S) = struct ...@@ -319,6 +341,7 @@ module Decompile(H : Hashtbl.S)(S : S) = struct
let epsilon = RSeq [] let epsilon = RSeq []
let empty = RAlt [] let empty = RAlt []
let rtrans t = RTrans t
let rec nullable = function let rec nullable = function
| RAlt l -> List.exists nullable l | RAlt l -> List.exists nullable l
...@@ -431,14 +454,15 @@ module Decompile(H : Hashtbl.S)(S : S) = struct ...@@ -431,14 +454,15 @@ module Decompile(H : Hashtbl.S)(S : S) = struct
| RStar _ as s -> s | RStar _ as s -> s
| RPlus s -> RStar s | RPlus s -> RStar s
| s -> RStar s | s -> RStar s
*) end
open B
type 'a slot = { type slot = {
mutable weight : int; mutable weight : int;
mutable outg : ('a slot * 'a re) list; mutable outg : (slot * re) list;
mutable inc : ('a slot * 'a re) list; mutable inc : (slot * re) list;
mutable self : 'a re; mutable self : re;
mutable ok : bool mutable ok : bool
} }
let alloc_slot () = let alloc_slot () =
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment