Commit 3dec78bd authored by Pietro Abate's avatar Pietro Abate

[r2005-02-23 14:27:37 by afrisch] Various

Original author: afrisch
Date: 2005-02-23 14:27:38+00:00
parent f2878070
......@@ -7,16 +7,267 @@ type 'a regexp =
| Plus of 'a regexp
| Trans of 'a
(*
type 'a re =
| RSeq of 'a re list
| RAlt of 'a re list
| RTrans of 'a
| RStar of 'a re
| RPlus of 'a re
*)
module type S = sig
type t
val equal: t -> t -> bool
val compare: t -> t -> int
val hash: t -> int
end
module Decompile(H : Hashtbl.S)(S : S) = struct
type atom =
| AStar of trie
| APlus of trie
| ATrans of S.t
and trie =
| AEmpty
| AEps
| ABranch of atom list * trie * trie * bool * int * int
(* Branching atom, left, right,
nullable,
hash,
uid *)
type 'a re = trie
let empty = AEmpty
let epsilon = AEps
let nullable = function
| AEmpty -> false
| AEps -> true
| ABranch (_,_,_,n,_,_) -> n
(*
let size = function
| AEmpty -> 0
| AEps -> 0
| ABranch (_,_,_,_,_,_,sz) -> sz
*)
let compare_trie t1 t2 = match t1,t2 with
| AEmpty, AEmpty | AEps, AEps -> 0
| AEmpty, _ -> -1 | _,AEmpty -> 1
| AEps, _ -> -1 | _, AEps -> 1
| ABranch (_,_,_,_,_,id1), ABranch (_,_,_,_,_,id2) -> id1 - id2
let equal_atom a1 a2 = match a1,a2 with
| AStar t1, AStar t2 | APlus t1, APlus t2 -> t1 == t2
| ATrans t1, ATrans t2 -> S.equal t1 t2
| _ -> false
let rec equal_atom_list a1 a2 = match a1,a2 with
| [],[] -> true
| hd1::tl1,hd2::tl2 -> equal_atom hd1 hd2 && equal_atom_list tl1 tl2
| _ -> false
let compare_atom a1 a2 = match a1,a2 with
| AStar t1, AStar t2 | APlus t1, APlus t2 -> compare_trie t1 t2
| AStar _, _ -> -1 | _, AStar _ -> 1
| APlus _, _ -> -1 | _, APlus _ -> 1
| ATrans t1, ATrans t2 -> S.compare t1 t2
let hash_trie = function
| AEmpty -> 0
| AEps -> 1
| ABranch (_,_,_,_,h,_) -> h
let hash_atom = function
| AStar t -> 17 * (hash_trie t)
| APlus t -> 1 + 17 * (hash_trie t)
| ATrans t -> 2 + 17 * (S.hash t)
let rec hash_atom_list = function
| hd::tl -> hash_atom hd + 17 * (hash_atom_list tl)
| [] -> 0
module T = struct
type t = atom list * trie * trie * int
let equal (a,ay,an,_) (b,by,bn,_) =
(equal_atom_list a b) && (ay == by) && (an == bn)
let hash (a,ay,an,h) =
h
end
module HT = Hashtbl.Make(T)
let branches = HT.create 17
let uid = ref 0
let branch0 a ay an =
let h = hash_atom_list a + 17 * (hash_trie ay) + 257 * (hash_trie an) in
let b = (a,ay,an,h) in
try HT.find branches b
with Not_found ->
let h = T.hash b in
incr uid;
let x = ABranch (a,ay,an,nullable an,h,!uid) in
HT.add branches b x;
x
let branch a ay an =
assert (List.length a > 0);
match ay,an with
| ABranch (b,by,bn,_,_,_), AEmpty -> branch0 (a @ b) by bn
| AEmpty, AEmpty -> AEmpty
| _ -> branch0 a ay an
module Decompile(H : Hashtbl.S)(S : Set.OrderedType) = struct
let rec opt = function
| ABranch (a,ay,an,_,_,_) -> branch0 a ay (opt an)
| AEmpty -> AEps
| t -> t
let rec factor accu ctx x y = match x,y with
| hd1::tl1, hd2::tl2 when equal_atom hd1 hd2 ->
factor (hd1::accu) (hd1::ctx) tl1 tl2
| _ -> List.rev accu, ctx,x,y
let rec get_seq accu = function
| ABranch (a,AEps,AEmpty,_,_,_) -> Some a
| AEps -> Some []
| _ -> None
let get_seq = get_seq []
let apply_factor f r =
branch0 f r AEmpty
let apply_ctx ctx r =
List.fold_right (fun a r -> branch0 a r AEmpty) ctx r
let star x r = match x with
| AEmpty | AEps -> AEps
| t -> branch0 [ AStar t ] r AEmpty
(* (AB)*A ==> A(BA)*
BA(BA)* ==> (BA)+ *)
let rec create_plus ctx = function
| AStar x :: follow ->
(match get_seq x with
| Some s ->
let (accu,ctx,s,follow) = factor [] ctx s follow in
let s = s @ accu in
let rec aux accu = function
| ctx,[] ->
create_plus
(APlus (apply_factor accu AEps) :: ctx)
follow
| a::b,c::d when equal_atom a c -> aux (a::accu) (b,d)
| _ -> create_plus (AStar x :: ctx) follow
in
aux [] (ctx,s)
| None -> create_plus (AStar x :: ctx) follow)
| x :: follow -> create_plus (x :: ctx) follow
| [] -> List.rev ctx
let rec size = function
| AEps -> 1
| AEmpty -> 0
| ABranch (a,ay,an,_,_,_) ->
if (ay == an) then 1 + (size ay)
else 3 + (size ay) + (size an)
let choose u v =
if size u > size v then v else u
let rec alt t1 t2 = match t1,t2 with
| AEmpty,t | t,AEmpty -> t
| AEps,t | t,AEps -> opt t
| ABranch (_,_,_,_,_,id1), ABranch (_,_,_,_,_,id2) when id1 = id2 -> t1
| ABranch (al,ay,an,_,_,_), ABranch (bl,by,bn,_,_,_) ->
let (accu,_,al,bl) = factor [] [] al bl in
match accu with
| [] ->
(* let u = br al ay (alt an t2)
and v = br bl by (alt bn t1) in
choose u v *)
br al ay (alt an t2)
| _ ->
let t1 = br al ay AEps in
let t2 = br bl by AEps in
branch accu (alt t1 t2) (alt an bn)
and br a ay an =
match a with
| [] -> alt ay an
| l -> branch a ay an
and seq t1 t2 = match t1,t2 with
| AEmpty,_|_,AEmpty -> AEmpty
| AEps,t | t,AEps -> t
| ABranch (a,ay,an,_,_,_), t2 ->
(* (alt
(branch a (seq ay t2) AEmpty)
(seq an t2) )
*)
(branch a (seq ay t2) (seq an t2))
let rtrans t = branch [ATrans t] AEps AEmpty
let star = function
| AEmpty | AEps -> AEps
| t -> branch [AStar t] AEps AEmpty
let rseq r1 r2 = match r1,r2 with
| Epsilon, z | z, Epsilon -> z
| Empty, _ | _, Empty -> Empty
| x,y -> Seq (x,y)
let ralt r1 r2 = match r1,r2 with
| Empty, z | z, Empty -> z
| x,y -> Alt (x,y)
let rec minim = function
| AEmpty -> AEmpty
| AEps -> AEps
| ABranch (a,ay,(ABranch (b,by,bn,_,_,_) as an),_,_,_) as br
when ay != an ->
choose (branch b (minim by) (branch a (minim ay) bn)) br
| br -> br
let rec minim_trie r =
let r' = minim r in
if (size r' < size r) then minim_trie r' else r
let rec regexp r =
let r = minim_trie r in
match r with
| AEmpty -> Empty
| AEps -> Epsilon
| ABranch (a,ay,an,_,_,_) when ay == an ->
let a = create_plus [] a in
rseq (ralt (regexp_atom_list a) Epsilon) (regexp ay)
| ABranch (a,ay,an,_,_,_) when ay == an ->
let a = create_plus [] a in
rseq (ralt (regexp_atom_list a) Epsilon) (regexp ay)
| ABranch (a,ay,an,_,_,_) ->
let a = create_plus [] a in
ralt (rseq (regexp_atom_list a) (regexp ay)) (regexp an)
and regexp_atom_list = function
| hd::tl -> rseq (regexp_atom hd) (regexp_atom_list tl)
| [] -> Epsilon
and regexp_atom = function
| AStar t -> Star (regexp t)
| APlus t -> Plus (regexp t)
| ATrans t -> Trans t
(*
let rec compare s1 s2 =
if s1 == s2 then 0
else match (s1,s2) with
......@@ -90,6 +341,19 @@ module Decompile(H : Hashtbl.S)(S : Set.OrderedType) = struct
else x2::(merge l1 y2)
| [], l | l,[] -> l
let sort l =
let rec initlist = function
| [] -> []
| e::rest -> [e] :: initlist rest in
let rec merge2 = function
| l1::l2::rest -> merge l1 l2 :: merge2 rest
| x -> x in
let rec mergeall = function
| [] -> []
| [l] -> l
| llist -> mergeall (merge2 llist) in
mergeall (initlist l)
let rec sub l1 l2 =
(compare l1 l2 = 0) ||
match (l1,l2) with
......@@ -113,7 +377,6 @@ module Decompile(H : Hashtbl.S)(S : Set.OrderedType) = struct
if (List.exists (sub x) accu) || (List.exists (sub x) rest)
then simplify_alt accu rest
else simplify_alt (x::accu) rest
let alt s1 s2 =
let s1 = match s1 with RAlt x -> x | x -> [x] in
......@@ -168,6 +431,8 @@ module Decompile(H : Hashtbl.S)(S : Set.OrderedType) = struct
| RStar _ as s -> s
| RPlus s -> RStar s
| s -> RStar s
*)
type 'a slot = {
mutable weight : int;
......@@ -204,7 +469,7 @@ module Decompile(H : Hashtbl.S)(S : Set.OrderedType) = struct
s.ok <- true;
let (tr,f) = trans n in
if f then add_trans s final epsilon;
List.iter (fun (l,dst) -> add_trans s (conv dst) (RTrans l)) tr;
List.iter (fun (l,dst) -> add_trans s (conv dst) (rtrans l)) tr;
);
s in
......
......@@ -9,7 +9,14 @@ type 'a regexp =
| Plus of 'a regexp
| Trans of 'a
module Decompile(H : Hashtbl.S)(S : Set.OrderedType)
module type S = sig
type t
val equal: t -> t -> bool
val compare: t -> t -> int
val hash: t -> int
end
module Decompile(X : Hashtbl.S)(S : S)
: sig
val decompile: (H.key -> (S.t * H.key) list * bool) -> H.key -> S.t regexp
val decompile: (X.key -> (S.t * X.key) list * bool) -> X.key -> S.t regexp
end
......@@ -262,13 +262,6 @@ let get_qual name table get_name =
try Ns.QName.equal (get_name x) name
with Invalid_argument _ -> false)
table
let get_unqual name table get_name =
List.find
(fun x ->
try Utf8.equal (snd (get_name x)) name
with Invalid_argument _ -> false)
table
let get_type name schema = get_qual name schema.types name_of_type_definition
let get_attribute name schema =
......@@ -300,37 +293,6 @@ let get_component kind name schema =
| Some `Model_group -> mod_group ()
| None -> tries [ elt; typ; att; att_group; mod_group ]
let get_type name schema =
get_unqual name schema.types name_of_type_definition
let get_attribute name schema =
get_unqual name schema.attributes name_of_attribute_declaration
let get_element name schema =
get_unqual name schema.elements name_of_element_declaration
let get_attribute_group name schema =
get_unqual name schema.attribute_groups name_of_attribute_group_definition
let get_model_group name schema =
get_unqual name schema.model_groups name_of_model_group_definition
(* policy for unqualified schema component resolution. The order should
* be consistent with Typer.find_schema_descr *)
let get_unqual_component kind name schema =
let rec tries = function
| [] -> raise Not_found
| hd :: tl -> (try hd () with Not_found -> tries tl)
in
let elt () = Element (get_element name schema) in
let typ () = Type (get_type name schema) in
let att () = Attribute (get_attribute name schema) in
let att_group () = Attribute_group (get_attribute_group name schema) in
let mod_group () = Model_group (get_model_group name schema) in
match kind with
| Some `Element -> elt ()
| Some `Type -> typ ()
| Some `Attribute -> att ()
| Some `Attribute_group -> att_group ()
| Some `Model_group -> mod_group ()
| None -> tries [ elt; typ; att; att_group; mod_group ]
let string_of_component_kind (kind: component_kind) =
match kind with
| Some `Type -> "type"
......
......@@ -52,7 +52,6 @@ val get_model_group: Ns.qname -> schema -> model_group_definition
*)
val get_component: component_kind -> Ns.qname -> schema -> component
val get_unqual_component: component_kind -> Utf8.t -> schema -> component
val iter_types: schema -> (type_definition -> unit) -> unit
val iter_attributes: schema -> (attribute_declaration -> unit) -> unit
......
......@@ -468,7 +468,7 @@ let schema_of_uri uri =
| CT_empty ->
CT_model (particle, mixed)
| CT_model (p, _) ->
let model = Sequence (p::[particle]) in
let model = Sequence [p;particle] in
CT_model (particle_model 1 (Some 1) model, mixed)
| CT_simple _ -> assert false
in
......@@ -516,7 +516,7 @@ let schema_of_uri uri =
and parse_particle n =
let min, max = parse_min_max n in
let model mg = particle_model min max mg in
let elt e n = particle min max (Elt e) [ n ] false in
let elt e n = particle min max (Elt e) [ n ] (min = 0) in
match _tag n with
| "xsd:element" ->
(match _may_qname_attr "ref" n with
......
......@@ -10,6 +10,8 @@ open Value
module QTable = Hashtbl.Make(Ns.QName)
let ppf = Format.std_formatter
(** {2 Misc} *)
let empty_string = string_utf8 (Utf8.mk "")
......@@ -389,6 +391,13 @@ and validate_content_type ctx content_type =
get ctx
and validate_particle ctx particle =
(*
Format.fprintf ppf "Particle first";
List.iter (fun n -> Format.fprintf ppf "%a;" Ns.QName.print n)
particle.part_first;
Format.fprintf ppf "@.";
*)
let rec validate_once ~cont_ok ~cont_failure =
do_pcdata ctx;
match peek ctx with
......@@ -396,7 +405,8 @@ and validate_particle ctx particle =
when List.exists (Ns.QName.equal qname) particle.part_first ->
validate_term ctx particle.part_term;
cont_ok ()
| ev -> cont_failure ev
| ev ->
cont_failure ev
in
let rec required = function
| 0 -> ()
......@@ -404,8 +414,10 @@ and validate_particle ctx particle =
validate_once
~cont_ok:(fun () -> required (pred n))
~cont_failure:(fun event ->
error ~ctx (sprintf "Unexpected content: %s"
(string_of_event event)))
if particle.part_nullable then ()
else
error ~ctx (sprintf "Unexpected content: %s"
(string_of_event event)))
in
let rec optional = function
| None ->
......
......@@ -1339,25 +1339,29 @@ struct
let is_regexp t = subtype t seqs_descr
module S = struct
type t = { id : int;
type nd = { id : int;
mutable def : d list;
mutable state : [ `Expand | `None | `Marked | `Named of U.t ] }
and d =
| Name of U.t
| Regexp of t Pretty.regexp
| Regexp of nd Pretty.regexp
| Atomic of (Format.formatter -> unit)
| Pair of t * t
| Pair of nd * nd
| Char of Chars.V.t
| Xml of [ `Tag of (Format.formatter -> unit) | `Type of t ] * t * t
| Record of (bool * t) label_map * bool * bool
| Arrows of (t * t) list * (t * t) list
| Neg of t
| Abs of t
| Xml of [ `Tag of (Format.formatter -> unit) | `Type of nd ] * nd * nd
| Record of (bool * nd) label_map * bool * bool
| Arrows of (nd * nd) list * (nd * nd) list
| Neg of nd
| Abs of nd
let compare x y = x.id - y.id
module S = struct
type t = nd
let compare x y = x.id - y.id
let hash x = x.id
let equal x y = x.id = y.id
end
module Decompile = Pretty.Decompile(DescrHash)(S)
open S
module DescrPairMap = Map.Make(Custom.Pair(Descr)(Descr))
......@@ -1487,7 +1491,6 @@ struct
slot
and decompile d =
let r =
Decompile.decompile
(fun t ->
let tr = Product.get t in
......@@ -1495,8 +1498,7 @@ struct
let tr = List.map (fun (l,t) -> prepare l, t) tr in
tr, Atoms.contains nil_atom t.atoms)
d in
r
d
let gen = ref 0
......
(* TODO:
- rewrite type-checking of operators to propagate constraint
- optimize computation of pattern free variables
- check whether it is worth using recursive hash-consing internally
*)
......@@ -622,60 +621,93 @@ module IType = struct
else mk (IAnd (p1,p2))
type regexp =
| PEpsilon
| PElem of node
| PGuard of node
| PSeq of regexp * regexp
| PAlt of regexp * regexp
| PSeq of regexp list
| PAlt of regexp list
| PStar of regexp
| PWeakStar of regexp
let rec nullable = function
| PElem _ -> false
| PSeq rl -> List.for_all nullable rl
| PAlt rl -> List.exists nullable rl
| PStar _ | PWeakStar _ | PGuard _ -> true
let eps = PSeq []
let emp = PAlt []
let seq r1 r2 =
let r1 = match r1 with PSeq l -> l | x -> [ x ] in
let r2 = match r2 with PSeq l -> l | x -> [ x ] in
match r1 @ r2 with
| [ x ] -> x
| l -> PSeq l
let alt r1 r2 =
let r1 = match r1 with PAlt l -> l | x -> [ x ] in
let r2 = match r2 with PAlt l -> l | x -> [ x ] in
match r1 @ r2 with
| [ x ] -> x
| l -> PAlt l
let rec merge_alt = function
| PElem p::PElem q::l -> merge_alt (PElem (ior p q) :: l)
| r::l -> r::(merge_alt l)
| [] -> []
let rec remove_regexp r q = match r with
| PEpsilon ->
q
| PElem p ->
mk (ITimes (p, q))
| PGuard p ->
iand p q
| PSeq (r1,r2) ->
remove_regexp r1 (remove_regexp r2 q)
| PAlt (r1,r2) ->
ior (remove_regexp r1 q) (remove_regexp r2 q)
| PSeq l ->
List.fold_right (fun r a -> remove_regexp r a) l q
| PAlt rl ->
let rl = merge_alt rl in
List.fold_left (fun a r -> ior a (remove_regexp r q)) iempty rl
| PStar r ->
let x = mk_delayed () in
let res = ior x q in
x.desc <- ILink (remove_regexp2 r res iempty);
x.desc <- ILink (remove_regexp_nullable r res iempty);
res
| PWeakStar r ->
let x = mk_delayed () in
let res = ior q x in
x.desc <- ILink (remove_regexp2 r res iempty);
x.desc <- ILink (remove_regexp_nullable r res iempty);
res
and remove_regexp_nullable r q_nonempty q_empty =
if nullable r then remove_regexp2 r q_nonempty q_empty
else remove_regexp r q_nonempty
and remove_regexp2 r q_nonempty q_empty =
if q_nonempty == q_empty then remove_regexp r q_empty
(* Assume r is nullable *)
if q_nonempty == q_empty then remove_regexp r q_nonempty
else match r with
| PEpsilon ->
| PSeq [] ->
q_empty
| PElem p ->
mk (ITimes (p, q_nonempty))
assert false
| PGuard p ->
iand p q_empty
| PSeq (r1,r2) ->
remove_regexp2 r1
(remove_regexp2 r2 q_nonempty q_nonempty)
(remove_regexp2 r2 q_nonempty q_empty)
| PAlt (r1,r2) ->
ior
(remove_regexp2 r1 q_nonempty q_empty)
(remove_regexp2 r2 q_nonempty q_empty)
| PSeq (r::rl) ->
remove_regexp2 r
(remove_regexp (PSeq rl) q_nonempty)
(remove_regexp2 (PSeq rl) q_nonempty q_empty)
| PAlt rl ->
let rl = merge_alt rl in
List.fold_left
(fun a r -> ior a (remove_regexp_nullable r q_nonempty q_empty))
iempty rl
| PStar r ->
let x = mk_delayed () in
x.desc <- ILink (remove_regexp2 r (ior x q_nonempty) iempty);
x.desc <- ILink (remove_regexp_nullable r (ior x q_nonempty) iempty);
ior x q_empty
| PWeakStar r ->
let x = mk_delayed () in
x.desc <- ILink (remove_regexp2 r (ior q_nonempty x) iempty);
x.desc <- ILink (remove_regexp_nullable r (ior q_nonempty x) iempty);
ior q_empty x