Commit 01a9eb8f authored by Pietro Abate's avatar Pietro Abate
Browse files

[r2005-02-21 17:32:57 by afrisch] New compilation for types -- does not detect...

[r2005-02-21 17:32:57 by afrisch] New compilation for types -- does not detect ill-formed recursion yet

Original author: afrisch
Date: 2005-02-21 17:32:57+00:00
parent 097b5407
......@@ -299,6 +299,13 @@ module Map = struct
| [],[] -> ()
| _ -> assert false
let rec may_collide f exn l1 l2 =
match (l1,l2) with
| (x1,y1)::l1, (x2,y2)::l2 when X.compare x1 x2 = 0 ->
f y1 y2; may_collide f exn l1 l2
| [], [] -> ()
| _ -> raise exn
let rec map f = function
| (x,y)::l -> (x, f y)::(map f l)
| [] -> []
......
......@@ -56,6 +56,7 @@ sig
val map_from_slist: (X.t -> 'a) -> t -> 'a map
val collide: ('a -> 'b -> unit) -> 'a map -> 'b map -> unit
val may_collide: ('a -> 'b -> unit) -> exn -> 'a map -> 'b map -> unit
val map: ('a -> 'b) -> 'a map -> 'b map
val mapi: (X.t -> 'a -> 'b) -> 'a map -> 'b map
val constant: 'a -> t -> 'a map
......
......@@ -219,10 +219,6 @@ let schema_model_groups =
State.ref "Typer.schema_model_groups" (Hashtbl.create 51)
(* raise Not_found *)
let get_schema_fwd = ref (fun _ -> assert false)
let find_schema_descr_uri kind uri (name : Ns.qname) =
......@@ -256,6 +252,532 @@ let find_schema_descr env kind schema name =
find_schema_descr_uri kind uri name
module IType = struct
type node = {
mutable desc: desc;
mutable smallhash: int; (* Local hash *)
mutable rechash: int; (* Global (recursive) hash *)
mutable sid: int; (* Sequential id used to compute rechash *)
mutable t: Types.t option;
mutable tnode: Types.Node.t option;
mutable p: Patterns.descr option;
mutable pnode: Patterns.node option;
mutable fv: fv option
}
and desc =
| ILink of node
| IType of Types.descr * int
| IOr of node * node
| IAnd of node * node
| IDiff of node * node
| ITimes of node * node
| IXml of node * node
| IArrow of node * node
| IOptional of node
| IRecord of bool * (node * node option) label_map
| ICapture of id
| IConstant of id * Types.const
let rec node_temp = {
desc = ILink node_temp;
smallhash = 0; rechash = 0; sid = 0;
t = None; tnode = None; p = None; pnode = None;
fv = None
}
let rec hash0 n = match n.desc with
| ILink n -> hash0 n
| IType (t,h) -> 1 + 17 * h
| IOr _ -> 2
| IAnd _ -> 3
| IDiff _ -> 4
| ITimes _ -> 5
| IXml _ -> 6
| IArrow _ -> 7
| IOptional _ -> 8
| IRecord _ -> 9
| ICapture x -> 10 + 17*(Id.hash x)
| IConstant (x,_) -> 11 + 17*(Id.hash x)
let hash0_field = function
| (p, Some e) -> 1 + 17 * hash0 p + 257 * hash0 e
| (p, None) -> 2 + 17 * hash0 p
let rec hash1 n = match n.desc with
| ILink n -> hash1 n
| IType (t,h) -> 1 + 17 * h
| IOr (p1,p2) -> 2 + 17 * hash0 p1 + 257 * hash0 p2
| IAnd (p1,p2) -> 3 + 17 * hash0 p1 + 257 * hash0 p2
| IDiff (p1,p2) -> 4 + 17 * hash0 p1 + 257 * hash0 p2
| ITimes (p1,p2) -> 5 + 17 * hash0 p1 + 257 * hash0 p2
| IXml (p1,p2) -> 6 + 17 * hash0 p1 + 257 * hash0 p2
| IArrow (p1,p2) -> 7 + 17 * hash0 p1 + 257 * hash0 p2
| IOptional p -> 8 + 17 * hash0 p
| IRecord (o,r)->9+(if o then 17 else 0)+257*(LabelMap.hash hash0_field r)
| ICapture x -> 10 + 17 * (Id.hash x)
| IConstant (x,c) -> 11 + 17 * (Id.hash x) + 257*(Types.Const.hash c)
let smallhash n =
if n.smallhash !=0 then n.smallhash
else (let h = hash1 n in n.smallhash <- h; h)
let rec repr = function
| { desc = ILink n } -> repr n
| n -> n
let back = ref []
let link x y = match x,y with
| { t = None } as x, y
| y, ({ t = None } as x) -> back := (x,x.desc) :: !back; x.desc <- ILink y
| _ -> assert false
exception Unify
let rec unify x y =
if x == y then ()
else let x = repr x and y = repr y in if x == y then ()
(* else if (smallhash x != smallhash y) then raise Unify *)
else match x.desc,y.desc with
| IType (tx,_), IType (ty,_) when Types.equal tx ty ->
link x y
| IOr (x1,x2), IOr (y1,y2)
| IAnd (x1,x2), IAnd (y1,y2)
| IDiff (x1,x2), IDiff (y1,y2)
| ITimes (x1,x2), ITimes (y1,y2)
| IXml (x1,x2), IXml (y1,y2)
| IArrow (x1,x2), IArrow (y1,y2) ->
link x y; unify x1 y1; unify x2 y2
| IOptional x1, IOptional y1 ->
link x y; unify x1 y1
| IRecord (xo,xr), IRecord (yo,yr) when xo == yo ->
link x y; LabelMap.may_collide unify_field Unify xr yr
| ICapture xv, ICapture yv when Id.equal xv yv -> ()
| IConstant (xv,xc), IConstant (yv,yc) when
Id.equal xv yv && Types.Const.equal xc yc -> ()
| _ -> raise Unify
and unify_field f1 f2 = match f1,f2 with
| (p1, Some e1), (p2, Some e2) -> unify p1 p2; unify e1 e2
| (p1, None), (p2, None) -> unify p1 p2
| _ -> raise Unify
let may_unify x y =
try unify x y; back := []; true
with Unify ->
List.iter (fun (x,xd) -> x.desc <- xd) !back; back := []; false
module SmallHash = Hashtbl.Make(
struct
type t = node
let equal = may_unify
let hash = smallhash
end
)
let iter_field f = function
| (x, Some y) -> f x; f y
| (x, None) -> f x
let iter f = function
| IOr (x,y) | IAnd (x,y) | IDiff (x,y)
| ITimes (x,y) | IXml (x,y) | IArrow (x,y) -> f x; f y
| IOptional x -> f x
| IRecord (_,r) -> LabelMap.iter (iter_field f) r
| _ -> ()
let minimize ((mem,add) as h) =
let rec aux n =
let n = repr n in
if mem n then () else (add n (); if n.t == None then iter aux n.desc)
in aux
let to_clear = ref []
let sid = ref 0
let rec rechash n =
let n = repr n in
if (n.sid != 0) then 17 * n.sid
else begin incr sid; n.sid <- !sid; to_clear := n :: !to_clear;
match n.desc with
| ILink _ -> assert false
| IType (t,h) -> 1 + 17 * h
| IOr (p1,p2) -> 2 + 17 * rechash p1 + 257 * rechash p2
| IAnd (p1,p2) -> 3 + 17 * rechash p1 + 257 * rechash p2
| IDiff (p1,p2) -> 4 + 17 * rechash p1 + 257 * rechash p2
| ITimes (p1,p2) -> 5 + 17 * rechash p1 + 257 * rechash p2
| IXml (p1,p2) -> 6 + 17 * rechash p1 + 257 * rechash p2
| IArrow (p1,p2) -> 7 + 17 * rechash p1 + 257 * rechash p2
| IOptional p -> 8 + 17 * rechash p
| IRecord(o,r)->9+(if o then 17 else 0)+257*(LabelMap.hash rechash_field r)
| ICapture x -> 10 + 17 * (Id.hash x)
| IConstant (x,c) -> 11 + 17 * (Id.hash x) + 257*(Types.Const.hash c)
end
and rechash_field = function
| (p, Some e) -> 1 + 17 * rechash p + 257 * rechash e
| (p, None) -> 2 + 17 * rechash p
let clear () =
sid := 0; List.iter (fun x -> x.sid <- 0) !to_clear
let rechash n =
let n = repr n in
if (n.rechash != 0) then n.rechash
else (let h = rechash n in clear (); n.rechash <- h; h)
module RecHash = Hashtbl.Make(
struct
type t = node
let equal = may_unify
let hash = smallhash
end
)
let gtable = RecHash.create 17577
let internalize n =
let local = SmallHash.create 67 in
minimize (SmallHash.mem local, SmallHash.add local) n;
minimize (RecHash.mem gtable, RecHash.add gtable) n
(* Compute free variables *)
let fv n =
let fv = ref IdSet.empty in
let rec aux n =
let n = repr n in
if (n.sid = 0) then (
n.sid <- 1;
to_clear := n :: !to_clear;
match n.fv, n.desc with
| Some x, _ -> fv := IdSet.cup !fv x
| None, (ICapture x | IConstant (x,_)) -> fv := IdSet.add x !fv
| None, d -> iter aux d
)
in
match n.fv with
| Some x -> x
| None -> aux n; clear (); n.fv <- Some !fv; !fv
(* To the internal representation *)
let rec typ n =
let n = repr n in
match n.t with
| Some t -> t
| None -> let t = compute_typ n.desc in n.t <- Some t; t
and compute_typ = function
| IType (t,_) -> t
| IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
| IAnd (s1,s2) -> Types.cap (typ s1) (typ s2)
| IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
| ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
| IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
| IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
| IOptional s -> Types.Record.or_absent (typ s)
| IRecord (o,r) -> Types.record' (o, LabelMap.map compute_typ_field r)
| ILink _ -> assert false
| ICapture _ | IConstant (_,_) -> assert false
and compute_typ_field = function
| (s, None) -> typ_node s
| (s, Some _) ->
raise (Patterns.Error "Or-else clauses are not allowed in types")
and typ_node n =
match n.tnode with
| Some t -> t
| None ->
let x = Types.make () in
n.tnode <- Some x;
Types.define x (typ n);
x
let rec pat n =
let n = repr n in
if IdSet.is_empty (fv n)
then Patterns.constr (typ n)
else match n.p with
| Some p -> p
| None -> let p = compute_pat n.desc in n.p <- Some p; p
and compute_pat = function
| IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
| IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
| IDiff (s1,s2) when IdSet.is_empty (fv s2) ->
let s2 = Types.neg (typ s2) in
Patterns.cap (pat s1) (Patterns.constr s2)
| IDiff _ ->
raise (Patterns.Error "Differences are not allowed in patterns")
| ITimes (s1,s2) -> Patterns.times (pat_node s1) (pat_node s2)
| IXml (s1,s2) -> Patterns.xml (pat_node s1) (pat_node s2)
| IOptional _ ->
raise (Patterns.Error "Optional fields are not allowed in record patterns")
| IRecord (o,r) ->
let pats = ref [] in
let aux l = function
| (s,None) ->
if IdSet.is_empty (fv s) then typ_node s
else
( pats := Patterns.record l (pat_node s) :: !pats;
Types.any_node )
| (s,Some e) ->
if IdSet.is_empty (fv s) then
raise (Patterns.Error "Or-else clauses are not allowed in types")
else
( pats := Patterns.cup
(Patterns.record l (pat_node s))
(pat e) :: !pats;
Types.Record.any_or_absent_node )
in
let constr = Types.record' (o,LabelMap.mapi aux r) in
List.fold_left Patterns.cap (Patterns.constr constr) !pats
(* TODO: can avoid constr when o=true, and all fields have fv *)
| ICapture x -> Patterns.capture x
| IConstant (x,c) -> Patterns.constant x c
| IArrow _ ->
raise (Patterns.Error "Arrows are not allowed in patterns")
| IType _ | ILink _ -> assert false
and pat_node n =
match n.pnode with
| Some p -> p
| None ->
let x = Patterns.make (fv n) in
try
n.pnode <- Some x;
Patterns.define x (pat n);
x
with exn -> n.pnode <- None; raise exn
(* From AST to the intermediate representation *)
type penv = {
penv_tenv : t;
penv_derec : node Env.t;
}
let penv tenv = { penv_tenv = tenv; penv_derec = Env.empty }
let mk d = { node_temp with desc = d }
let mk_delayed () = { node_temp with desc = ILink node_temp }
let itype t = mk (IType (t, Types.hash t))
let iempty = itype Types.empty
let ior p1 p2 =
if p1 == iempty then p2
else if p2 == iempty then p1
else mk (IOr (p1,p2))
let iand p1 p2 =
if (p1 == iempty) || (p2 == iempty) then iempty
else mk (IAnd (p1,p2))
type regexp =
| PEpsilon
| PElem of node
| PGuard of node
| PSeq of regexp * regexp
| PAlt of regexp * regexp
| PStar of regexp
| PWeakStar of regexp
let rec remove_regexp r q = match r with
| PEpsilon ->
q
| PElem p ->
mk (ITimes (p, q))
| PGuard p ->
iand p q
| PSeq (r1,r2) ->
remove_regexp r1 (remove_regexp r2 q)
| PAlt (r1,r2) ->
ior (remove_regexp r1 q) (remove_regexp r2 q)
| PStar r ->
let x = mk_delayed () in
let res = ior x q in
x.desc <- ILink (remove_regexp2 r res iempty);
res
| PWeakStar r ->
let x = mk_delayed () in
let res = ior q x in
x.desc <- ILink (remove_regexp2 r res iempty);
res
and remove_regexp2 r q_nonempty q_empty =
if q_nonempty == q_empty then remove_regexp r q_empty
else match r with
| PEpsilon ->
q_empty
| PElem p ->
mk (ITimes (p, q_nonempty))
| PGuard p ->
iand p q_empty
| PSeq (r1,r2) ->
remove_regexp2 r1
(remove_regexp2 r2 q_nonempty q_nonempty)
(remove_regexp2 r2 q_nonempty q_empty)
| PAlt (r1,r2) ->
ior
(remove_regexp2 r1 q_nonempty q_empty)
(remove_regexp2 r2 q_nonempty q_empty)
| PStar r ->
let x = mk_delayed () in
x.desc <- ILink (remove_regexp2 r (ior x q_nonempty) iempty);
ior x q_empty
| PWeakStar r ->
let x = mk_delayed () in
x.desc <- ILink (remove_regexp2 r (ior q_nonempty x) iempty);
ior q_empty x
let cst_nil = Types.Atom Sequence.nil_atom
let capture_all vars p =
IdSet.fold (fun p x -> iand p (mk (ICapture x))) p vars
let termin b vars p =
if b then p
else IdSet.fold
(fun p x -> PSeq (p, PGuard (mk (IConstant (x,cst_nil))))) p vars
let rexp r = remove_regexp r (itype Sequence.nil_type)
let rec derecurs env p = match p.descr with
| PatVar v -> derecurs_var env p.loc v
| SchemaVar (kind, schema_name, component_name) ->
let name = qname env.penv_tenv p.loc component_name in
itype (find_schema_descr env.penv_tenv kind schema_name name)
| Recurs (p,b) -> derecurs (derecurs_def env b) p
| Internal t -> itype t
| NsT ns ->
itype (Types.atom (Atoms.any_in_ns (parse_ns env.penv_tenv p.loc ns)))
| Or (p1,p2) -> mk (IOr (derecurs env p1, derecurs env p2))
| And (p1,p2) -> mk (IAnd (derecurs env p1, derecurs env p2))
| Diff (p1,p2) -> mk (IDiff (derecurs env p1, derecurs env p2))
| Prod (p1,p2) -> mk (ITimes (derecurs env p1, derecurs env p2))
| XmlT (p1,p2) -> mk (IXml (derecurs env p1, derecurs env p2))
| Arrow (p1,p2) -> mk (IArrow (derecurs env p1, derecurs env p2))
| Optional p -> mk (IOptional (derecurs env p))
| Record (o,r) ->
let aux = function
| (p,Some e) -> (derecurs env p, Some (derecurs env e))
| (p,None) -> derecurs env p, None in
mk (IRecord (o, parse_record env.penv_tenv p.loc aux r))
| Constant (x,c) -> mk (IConstant (x,const env.penv_tenv p.loc c))
| Cst c -> itype (Types.constant (const env.penv_tenv p.loc c))
| Regexp r ->
let r,_ = derecurs_regexp IdSet.empty false IdSet.empty true env r in
rexp r
and derecurs_regexp vars b rvars f env = function
(* - vars: seq variables to be propagated top-down and added
to each captured element
- b: below a star ?
- rvars: seq variables that appear on the right of the regexp
- f: tail position
returns the set of seq variable of the regexp minus rvars
(they have already been terminated if not below a star)
*)
| Epsilon ->
PEpsilon, IdSet.empty
| Elem p ->
PElem (capture_all vars (derecurs env p)), IdSet.empty
| Guard p ->
PGuard (derecurs env p), IdSet.empty
| Seq (p1,p2) ->
let (p2,v2) = derecurs_regexp vars b rvars f env p2 in
let (p1,v1) = derecurs_regexp vars b (IdSet.cup rvars v2) false env p1 in
PSeq (p1,p2), IdSet.cup v1 v2
| Alt (p1,p2) ->
let (p1,v1) = derecurs_regexp vars b rvars f env p1
and (p2,v2) = derecurs_regexp vars b rvars f env p2 in
PAlt (termin b (IdSet.diff v2 v1) p1, termin b (IdSet.diff v1 v2) p2),
IdSet.cup v1 v2
| Star p ->
let (p,v) = derecurs_regexp vars true rvars false env p in
termin b v (PStar p), v
| WeakStar p ->
let (p,v) = derecurs_regexp vars true rvars false env p in
termin b v (PWeakStar p), v
| SeqCapture (x,p) ->
let vars = if f then vars else IdSet.add x vars in
let after = IdSet.mem rvars x in
let rvars = IdSet.add x rvars in
let (p,v) = derecurs_regexp vars b rvars false env p in
(if f
then PSeq (PGuard (mk (ICapture x)), p)
else termin (after || b) (IdSet.singleton x) p),
(if after then v else IdSet.add x v)
and derecurs_var env loc v =
match Ns.split_qname v with
| "", v ->
let v = ident v in
(try Env.find v env.penv_derec
with Not_found ->
try itype (find_type v env.penv_tenv)
with Not_found -> mk (ICapture v))
| cu, v ->
try
let cu = U.mk cu in
itype (find_type_global loc cu (ident v) env.penv_tenv)
with Not_found ->
raise_loc_generic loc
("Unbound external type " ^ cu ^ ":" ^ (U.to_string v))
and derecurs_def env b =
let b = List.map (fun (v,p) -> (v,p,mk_delayed ())) b in
let n =
List.fold_left (fun env (v,p,s) -> Env.add v s env) env.penv_derec b in
let env = { env with penv_derec = n } in
List.iter (fun (v,p,s) -> s.desc <- ILink (derecurs env p)) b;
env
let check_no_capture loc s =
match IdSet.pick s with
| Some x ->
raise_loc_generic loc
("Capture variable not allowed: " ^ (Ident.to_string x))
| None -> ()
let typ env t =
let d = derecurs (penv env) t in
check_no_capture t.loc (fv d);
typ_node d
let pat env t = pat_node (derecurs (penv env) t)
module Ids = Set.Make(Id)
let type_defs env b =
ignore
(List.fold_left
(fun seen (v,p) ->
if Ids.mem v seen then
raise_loc_generic p.loc
("Multiple definitions for the type identifer " ^
(Ident.to_string v));
Ids.add v seen
) Ids.empty b);
let penv = derecurs_def (penv env) b in
let b = List.map (fun (v,p) -> (v,p,derecurs penv p)) b in
let b =
List.map
(fun (v,p,s) ->
check_no_capture p.loc (fv s);
let t = Types.descr (typ_node s) in
if (p.loc <> noloc) && (Types.is_empty t) then
warning p.loc
("This definition yields an empty type for " ^ (Ident.to_string v));
(v,t)) b in
List.iter (fun (v,t) -> Types.Print.register_global (Id.value v) t) b;
b
end
(*
(* Eliminate Recursion, propagate Sequence Capture Variables *)
(* We use two intermediate representation from AST types/patterns
......@@ -963,6 +1485,43 @@ let pat env p =
| Location (loc,_,exn) when loc == noloc ->
raise (Location (p.loc, `Full, exn))
*)
(* New implem *)
let compile = ()
let typ = IType.typ
let pat = IType.pat
let type_defs = IType.type_defs
(*
let get_schema uri = assert false
let get_schema_names t = assert false
*)