Commit c1ccf440 authored by Pietro Abate's avatar Pietro Abate
Browse files

[r2005-02-24 12:42:45 by afrisch] Simplify handling of recursion

Original author: afrisch
Date: 2005-02-24 12:42:46+00:00
parent edd1b1d1
...@@ -103,7 +103,7 @@ let name_of_attribute_use { attr_decl = { attr_name = name } } = name ...@@ -103,7 +103,7 @@ let name_of_attribute_use { attr_decl = { attr_name = name } } = name
let name_of_attribute_group_definition ag = ag.ag_name let name_of_attribute_group_definition ag = ag.ag_name
let name_of_model_group_definition mg = mg.mg_name let name_of_model_group_definition mg = mg.mg_name
let name_of_particle = function let name_of_particle = function
| { part_term = Elt e } -> name_of_element_declaration (Lazy.force e) | { part_term = Elt e } -> name_of_element_declaration e
| _ -> assert false | _ -> assert false
let variety_of_simple_type_definition = function let variety_of_simple_type_definition = function
| (Primitive name) as st -> Atomic (lazy (Simple st)) | (Primitive name) as st -> Atomic (lazy (Simple st))
...@@ -397,5 +397,5 @@ and print_particle_list ppf = function ...@@ -397,5 +397,5 @@ and print_particle_list ppf = function
and print_particle ppf p = and print_particle ppf p =
print_term ppf p.part_term print_term ppf p.part_term
and print_term ppf = function and print_term ppf = function
| Elt e -> Format.fprintf ppf "E%i" ((Lazy.force e).elt_uid) | Elt e -> Format.fprintf ppf "E%i" e.elt_uid
| Model m -> print_model_group ppf m | Model m -> print_model_group ppf m
...@@ -25,28 +25,6 @@ let particle_model min max mg = ...@@ -25,28 +25,6 @@ let particle_model min max mg =
(nullable_of_model_group mg) (nullable_of_model_group mg)
let xsd = Schema_xml.xsd
(*
let fake_type_def =
Complex
{ ct_uid = -1;
ct_name = Some (xsd, Utf8.mk " FAKE TYP ");
ct_typdef = AnyType;
ct_deriv = `Restriction;
ct_attrs = [];
ct_content = CT_empty }
let fake_elt_decl =
{ elt_uid = -2;
elt_name = (xsd, Utf8.mk " FAKE ELT ");
elt_typdef = ref fake_type_def;
elt_cstr = None }
let is_fake_type_def = (==) fake_type_def
let is_fake_elt_decl = (==) fake_elt_decl
*)
let (^^) x y = Utf8.concat x y
(* element and complex type constructors which take cares of unique id *) (* element and complex type constructors which take cares of unique id *)
let element, complex = let element, complex =
let counter = ref 0 in let counter = ref 0 in
...@@ -73,8 +51,10 @@ let split s = pcre_split ~rex:space_RE s ...@@ -73,8 +51,10 @@ let split s = pcre_split ~rex:space_RE s
let unqualify s = snd (Ns.split_qname s) let unqualify s = snd (Ns.split_qname s)
let hashtbl_deref tbl = QTable.fold (fun _ v acc -> (Lazy.force v) :: acc) tbl [] let hashtbl_deref tbl =
let hashtbl_values tbl = QTable.fold (fun _ v acc -> v :: acc) tbl [] QTable.fold (fun _ v acc -> (Lazy.force v) :: acc) tbl []
let hashtbl_values tbl =
QTable.fold (fun _ v acc -> v :: acc) tbl []
let parse_facets base n = let parse_facets base n =
let validate_base_type v = let validate_base_type v =
...@@ -188,7 +168,8 @@ let schema_of_uri uri = ...@@ -188,7 +168,8 @@ let schema_of_uri uri =
let attr_elems = QTable.create 17 let attr_elems = QTable.create 17
and attr_group_elems = QTable.create 17 and attr_group_elems = QTable.create 17
and model_group_elems = QTable.create 17 in and model_group_elems = QTable.create 17
and elts_elems = QTable.create 17 in
let resolve k t1 t2 f qname = let resolve k t1 t2 f qname =
try QTable.find t1 qname try QTable.find t1 qname
...@@ -235,9 +216,7 @@ let schema_of_uri uri = ...@@ -235,9 +216,7 @@ let schema_of_uri uri =
resolve_typ qname resolve_typ qname
and resolve_elt qname = and resolve_elt qname =
try QTable.find elts qname resolve "element" elts elts_elems (parse_elt_decl true) qname
with Not_found ->
failwith ("Cannot find element " ^ (Ns.QName.to_string qname))
and resolve_att qname = and resolve_att qname =
resolve "attribute" attrs attr_elems (parse_att_decl true) qname resolve "attribute" attrs attr_elems (parse_att_decl true) qname
...@@ -523,7 +502,7 @@ let schema_of_uri uri = ...@@ -523,7 +502,7 @@ let schema_of_uri uri =
| Some ref -> elt (resolve_elt ref) ref | Some ref -> elt (resolve_elt ref) ref
| None -> | None ->
let decl = parse_elt_decl false n in let decl = parse_elt_decl false n in
elt (lazy decl) (name_of_element_declaration decl)) elt decl (name_of_element_declaration decl))
| "xsd:group" -> model (resolve_model_group (_qname_attr "ref" n)).mg_def | "xsd:group" -> model (resolve_model_group (_qname_attr "ref" n)).mg_def
| "xsd:all" | "xsd:sequence" | "xsd:choice" -> | "xsd:all" | "xsd:sequence" | "xsd:choice" ->
model (parse_model_group n) model (parse_model_group n)
...@@ -563,32 +542,34 @@ let schema_of_uri uri = ...@@ -563,32 +542,34 @@ let schema_of_uri uri =
in in
(* First pass: allocate slots for global elements and types, let check_redef n table kind =
perform inclusion *) let name = get_name n in
if (QTable.mem elts name) then
validation_error ("Redefinition of " ^ kind ^ " " ^
Ns.QName.to_string name)
else name
in
let rec register n = function let rec register n = function
| "xsd:element" -> | "xsd:element" ->
let name = get_name n in let name = check_redef n elts "element" in
if (QTable.mem elts name) then QTable.add elts_elems name n;
validation_error ("Redefinition of element " ^ Ns.QName.to_string name); todo := (fun () -> ignore (resolve_elt name)):: !todo
let l = lazy (parse_elt_decl true n) in
QTable.add elts name l
| ("xsd:simpleType" | "xsd:complexType") as s -> | ("xsd:simpleType" | "xsd:complexType") as s ->
let name = get_name n in let name = check_redef n elts "type" in
if (QTable.mem typs name) then let l = if s="xsd:simpleType" then lazy (parse_simple_type_def n)
validation_error ("Redefinition of type " ^ Ns.QName.to_string name); else lazy (parse_complex_type_def n) in
let l = if s="xsd:simpleType" then lazy (parse_simple_type_def n) QTable.add typs name l
else lazy (parse_complex_type_def n) in
QTable.add typs name l
| "xsd:attribute" -> | "xsd:attribute" ->
let name = get_name n in let name = check_redef n attr_elems "attribute" in
QTable.add attr_elems name n; QTable.add attr_elems name n;
todo := (fun () -> ignore (resolve_att name)):: !todo; todo := (fun () -> ignore (resolve_att name)):: !todo;
| "xsd:attributeGroup" -> | "xsd:attributeGroup" ->
let name = get_name n in let name = check_redef n attr_group_elems "attribute group" in
QTable.add attr_group_elems name n; QTable.add attr_group_elems name n;
todo := (fun () -> ignore (resolve_att_group name)):: !todo todo := (fun () -> ignore (resolve_att_group name)):: !todo
| "xsd:group" -> | "xsd:group" ->
let name = get_name n in let name = check_redef n model_group_elems "model group" in
QTable.add model_group_elems name n; QTable.add model_group_elems name n;
todo := (fun () -> ignore (resolve_model_group name)):: !todo todo := (fun () -> ignore (resolve_model_group name)):: !todo
| "xsd:include" -> | "xsd:include" ->
...@@ -628,7 +609,7 @@ let schema_of_uri uri = ...@@ -628,7 +609,7 @@ let schema_of_uri uri =
targetNamespace = ns; targetNamespace = ns;
types = hashtbl_deref typs; types = hashtbl_deref typs;
attributes = hashtbl_values attrs; attributes = hashtbl_values attrs;
elements = hashtbl_deref elts; elements = hashtbl_values elts;
attribute_groups = hashtbl_values attr_groups; attribute_groups = hashtbl_values attr_groups;
model_groups = hashtbl_values model_groups model_groups = hashtbl_values model_groups
} }
......
...@@ -67,7 +67,7 @@ and attribute_use = ...@@ -67,7 +67,7 @@ and attribute_use =
attr_use_cstr : value_constraint option } attr_use_cstr : value_constraint option }
and term = and term =
| Elt of element_declaration Lazy.t | Elt of element_declaration
| Model of model_group | Model of model_group
and model_group = and model_group =
......
...@@ -67,7 +67,7 @@ and attribute_use = ...@@ -67,7 +67,7 @@ and attribute_use =
attr_use_cstr : value_constraint option } attr_use_cstr : value_constraint option }
and term = and term =
| Elt of element_declaration Lazy.t | Elt of element_declaration
| Model of model_group | Model of model_group
and model_group = and model_group =
......
...@@ -439,8 +439,7 @@ and validate_particle ctx particle = ...@@ -439,8 +439,7 @@ and validate_particle ctx particle =
and validate_term ctx term = and validate_term ctx term =
match term with match term with
| Elt elt_decl_ref -> append ctx | Elt elt_decl_ref -> append ctx (validate_element ctx elt_decl_ref)
(validate_element ctx (Lazy.force elt_decl_ref))
| Model model_group -> validate_model_group ctx model_group | Model model_group -> validate_model_group ctx model_group
and validate_choice ctx particles = and validate_choice ctx particles =
......
...@@ -663,15 +663,18 @@ module IType = struct ...@@ -663,15 +663,18 @@ module IType = struct
| l -> PAlt l | l -> PAlt l
let rec merge_alt = function let rec merge_alt = function
| PElem p::PElem q::l when (has_no_fv p) && (has_no_fv q) -> | PElem p::PElem q::l -> merge_alt (PElem (ior p q) :: l)
merge_alt (PElem (ior p q) :: l)
(* Need the guard because of
[ (x&Int|_) R' ] which is produced from [ (x::Int|_) R ]
Might weaken it to (fv p = fv q)
*)
| r::l -> r::(merge_alt l) | r::l -> r::(merge_alt l)
| [] -> [] | [] -> []
(* Works only for types, not patterns, because
[ (x&Int|_) R' ] is possible *)
let rec simplify_regexp = function
| PSeq l -> PSeq (List.map simplify_regexp l)
| PAlt l -> PAlt (merge_alt (List.map simplify_regexp l))
| PStar r | PWeakStar r -> PStar (simplify_regexp r)
| x -> x
let rec print_regexp ppf = function let rec print_regexp ppf = function
| PElem _ -> Format.fprintf ppf "Elem" | PElem _ -> Format.fprintf ppf "Elem"
| PGuard _ -> Format.fprintf ppf "Guard" | PGuard _ -> Format.fprintf ppf "Guard"
...@@ -682,7 +685,8 @@ module IType = struct ...@@ -682,7 +685,8 @@ module IType = struct
and print_regexp_list ppf l = and print_regexp_list ppf l =
List.iter (fun x -> Format.fprintf ppf "%a;" print_regexp x) l List.iter (fun x -> Format.fprintf ppf "%a;" print_regexp x) l
let rec remove_regexp r q = match r with let rec remove_regexp r q =
match r with
| PElem p -> | PElem p ->
mk (ITimes (p, q)) mk (ITimes (p, q))
| PGuard p -> | PGuard p ->
...@@ -690,7 +694,6 @@ module IType = struct ...@@ -690,7 +694,6 @@ module IType = struct
| PSeq l -> | PSeq l ->
List.fold_right (fun r a -> remove_regexp r a) l q List.fold_right (fun r a -> remove_regexp r a) l q
| PAlt rl -> | PAlt rl ->
let rl = merge_alt rl in
List.fold_left (fun a r -> ior a (remove_regexp r q)) iempty rl List.fold_left (fun a r -> ior a (remove_regexp r q)) iempty rl
| PStar r -> | PStar r ->
let x = mk_delayed () in let x = mk_delayed () in
...@@ -722,7 +725,6 @@ module IType = struct ...@@ -722,7 +725,6 @@ module IType = struct
(remove_regexp (PSeq rl) q_nonempty) (remove_regexp (PSeq rl) q_nonempty)
(remove_regexp2 (PSeq rl) q_nonempty q_empty) (remove_regexp2 (PSeq rl) q_nonempty q_empty)
| PAlt rl -> | PAlt rl ->
let rl = merge_alt rl in
List.fold_left List.fold_left
(fun a r -> ior a (remove_regexp_nullable r q_nonempty q_empty)) (fun a r -> ior a (remove_regexp_nullable r q_nonempty q_empty))
iempty rl iempty rl
...@@ -1560,6 +1562,9 @@ module Schema_converter = ...@@ -1560,6 +1562,9 @@ module Schema_converter =
let is_xsd (ns,l) local = let is_xsd (ns,l) local =
(Ns.equal ns xsd) && (String.compare (Utf8.get_str l) local = 0) (Ns.equal ns xsd) && (String.compare (Utf8.get_str l) local = 0)
let complex_memo = Hashtbl.create 213
let rexp re = rexp (simplify_regexp re)
(* auxiliary functions *) (* auxiliary functions *)
let nil_type = itype Sequence.nil_type let nil_type = itype Sequence.nil_type
...@@ -1609,7 +1614,7 @@ module Schema_converter = ...@@ -1609,7 +1614,7 @@ module Schema_converter =
(* conversion functions *) (* conversion functions *)
let loop_detect = ref [] let loop_detect = ref []
let rec cd_type_of_simple_type ~schema = function let rec cd_type_of_simple_type = function
| Primitive name | Derived (Some name, _, _, _) | Primitive name | Derived (Some name, _, _, _)
when Schema_builtin.is_builtin name -> when Schema_builtin.is_builtin name ->
itype (Schema_builtin.cd_type_of_builtin name) itype (Schema_builtin.cd_type_of_builtin name)
...@@ -1630,73 +1635,69 @@ module Schema_converter = ...@@ -1630,73 +1635,69 @@ module Schema_converter =
| _ -> assert false) | _ -> assert false)
| Derived (_, List item, facets, _) -> | Derived (_, List item, facets, _) ->
mk_seq_derecurs mk_seq_derecurs
~base:(PElem (cd_type_of_simple_type_ref ~schema item)) facets ~base:(PElem (cd_type_of_simple_type_ref item)) facets
| Derived (_, Union items, facets, _) -> | Derived (_, Union items, facets, _) ->
(match List.map (cd_type_of_simple_type_ref ~schema) items with (match List.map cd_type_of_simple_type_ref items with
| [] -> assert false (* vacuum union *) | [] -> assert false (* vacuum union *)
| [t] -> t (* useless union *) | [t] -> t (* useless union *)
| hd::tl -> List.fold_left (fun acc x -> ior x acc) hd tl) | hd::tl -> List.fold_left (fun acc x -> ior x acc) hd tl)
| Derived (_,Restrict,_,_) as st -> | Derived (_,Restrict,_,_) as st ->
cd_type_of_simple_type ~schema (Schema_common.normalize_simple_type st) cd_type_of_simple_type (Schema_common.normalize_simple_type st)
and cd_type_of_simple_type_ref ~schema r = and cd_type_of_simple_type_ref r =
if List.memq r !loop_detect then failwith "Loop between simple types" if List.memq r !loop_detect then failwith "Loop between simple types"
else else
(loop_detect := r :: !loop_detect; (loop_detect := r :: !loop_detect;
let res = let res =
cd_type_of_simple_type ~schema (Schema_common.get_simple_type r) cd_type_of_simple_type (Schema_common.get_simple_type r)
in in
loop_detect := List.tl !loop_detect; loop_detect := List.tl !loop_detect;
res) res)
let complex_memo = Hashtbl.create 213
let element_memo = Hashtbl.create 213
let rec regexp_of_term ~schema = function let rec regexp_of_term = function
| Model group -> regexp_of_model_group ~schema group | Model group -> regexp_of_model_group group
| Elt decl -> PElem (cd_type_of_elt_decl ~schema (Lazy.force decl)) | Elt decl -> PElem (cd_type_of_elt_decl decl)
and regexp_of_model_group ~schema = function and regexp_of_model_group = function
| Choice l -> | Choice l ->
List.fold_left List.fold_left
(fun acc particle -> (fun acc particle ->
alt acc (regexp_of_particle ~schema particle)) alt acc (regexp_of_particle particle))
emp l emp l
| All l | Sequence l -> | All l | Sequence l ->
List.fold_left List.fold_left
(fun acc particle -> (fun acc particle ->
seq acc (regexp_of_particle ~schema particle)) seq acc (regexp_of_particle particle))
eps l eps l
and regexp_of_particle ~schema p = and regexp_of_particle p =
mk_len_regexp ?min:(Some p.part_min) ?max:p.part_max mk_len_regexp ?min:(Some p.part_min) ?max:p.part_max
(regexp_of_term ~schema p.part_term) (regexp_of_term p.part_term)
(** @return a pair composed by a type for the attributes (a record) and a and resolve_complex ct =
type for the content model (a sequence) *)
and cd_type_of_complex_type' ~schema ct =
try Hashtbl.find complex_memo ct.ct_uid try Hashtbl.find complex_memo ct.ct_uid
with Not_found -> with Not_found ->
let slot = mk_delayed () in let slot = delayed noloc in
Hashtbl.add complex_memo ct.ct_uid slot; Hashtbl.add complex_memo ct.ct_uid slot;
let content_ast_node = slot.desc <- compute_complex ct;
match ct.ct_content with
| CT_empty -> itype Sequence.nil_type
| CT_simple st -> cd_type_of_simple_type_ref ~schema st
| CT_model (particle, mixed) ->
(* Format.fprintf Format.std_formatter "CT_model particle=%a@."
Schema_common.print_particle particle; *)
let regexp = regexp_of_particle ~schema particle in
let regexp = if mixed then mix_regexp regexp else regexp in
rexp regexp
in
slot.desc <-
ITimes
(cd_type_of_attr_uses ~schema ct.ct_attrs, content_ast_node);
slot slot
and compute_complex ct =
let content_ast_node = match ct.ct_content with
| CT_empty -> itype Sequence.nil_type
| CT_simple st -> cd_type_of_simple_type_ref st
| CT_model (particle, mixed) ->
let regexp = regexp_of_particle particle in
let regexp = if mixed then mix_regexp regexp else regexp in
rexp regexp
in
ITimes (cd_type_of_attr_uses ct.ct_attrs, content_ast_node);
(** @return a closed record *) (** @return a closed record *)
and cd_type_of_attr_uses ~schema attr_uses = and cd_type_of_attr_uses attr_uses =
let fields = let fields =
List.map List.map
(fun at -> (fun at ->
...@@ -1705,20 +1706,20 @@ module Schema_converter = ...@@ -1705,20 +1706,20 @@ module Schema_converter =
| Some (`Fixed v) -> | Some (`Fixed v) ->
let v = Lazy.force v in let v = Lazy.force v in
itype (Types.constant (Value.inv_const v)) itype (Types.constant (Value.inv_const v))
| _ -> cd_type_of_simple_type_ref ~schema at.attr_decl.attr_typdef | _ -> cd_type_of_simple_type_ref at.attr_decl.attr_typdef
in in
let r = if at.attr_required then r else mk (IOptional r) in let r = if at.attr_required then r else mk (IOptional r) in
(LabelPool.mk at.attr_decl.attr_name, (r,None))) (LabelPool.mk at.attr_decl.attr_name, (r,None)))
attr_uses in attr_uses in
mk (IRecord (false, LabelMap.from_list_disj fields)) mk (IRecord (false, LabelMap.from_list_disj fields))
and cd_type_of_att_decl ~schema att = and cd_type_of_att_decl att =
let r = cd_type_of_simple_type_ref ~schema att.attr_typdef in let r = cd_type_of_simple_type_ref att.attr_typdef in
mk (IRecord (false, mk (IRecord (false,
LabelMap.from_list_disj LabelMap.from_list_disj
[(LabelPool.mk att.attr_name, (r,None))])) [(LabelPool.mk att.attr_name, (r,None))]))
and cd_type_of_elt_decl ~schema elt = and cd_type_of_elt_decl elt =
let atom_type = let atom_type =
itype (Types.atom (Atoms.atom (Atoms.V.of_qname elt.elt_name))) itype (Types.atom (Atoms.atom (Atoms.V.of_qname elt.elt_name)))
in in
...@@ -1734,32 +1735,31 @@ module Schema_converter = ...@@ -1734,32 +1735,31 @@ module Schema_converter =
| Simple st -> | Simple st ->
mk (ITimes mk (ITimes
(itype Types.empty_closed_record, (itype Types.empty_closed_record,
cd_type_of_simple_type ~schema st)) cd_type_of_simple_type st))
| Complex ct -> cd_type_of_complex_type' ~schema ct) | Complex ct -> resolve_complex ct)
in in
mk (IXml (atom_type, content)) mk (IXml (atom_type, content))
let cd_type_of_complex_type ~schema ct = let cd_type_of_complex_type ct = mk (IXml (itype Types.any, resolve_complex ct))
mk (IXml (itype Types.any, cd_type_of_complex_type' ~schema ct))
let cd_type_of_model_group ~schema g = let cd_type_of_model_group g =
rexp (regexp_of_model_group ~schema g) rexp (regexp_of_model_group g)
let typ r = IType.typ_descr r let typ r =
check_delayed ();
IType.typ_descr r
(* Schema_converter interface implementation. (* Schema_converter interface implementation.
* Shadows previous definitions. * Shadows previous definitions.
*) *)
let cd_type_of_type_def ~schema = function let cd_type_of_type_def = function
| AnyType -> Schema_builtin.cd_type_of_builtin (xsd, U.mk "anyType") | AnyType -> Schema_builtin.cd_type_of_builtin (xsd, U.mk "anyType")
| Simple st -> typ (cd_type_of_simple_type ~schema st) | Simple st -> typ (cd_type_of_simple_type st)
| Complex ct -> typ (cd_type_of_complex_type ~schema ct) | Complex ct -> typ (cd_type_of_complex_type ct)
let cd_type_of_elt_decl ~schema x = typ (cd_type_of_elt_decl ~schema x) let cd_type_of_elt_decl x = typ (cd_type_of_elt_decl x)
let cd_type_of_att_decl ~schema x = typ (cd_type_of_att_decl ~schema x) let cd_type_of_att_decl x = typ (cd_type_of_att_decl x)
let cd_type_of_attr_uses ~schema x = typ (cd_type_of_attr_uses ~schema x) let cd_type_of_attr_uses x = typ (cd_type_of_attr_uses x)
let cd_type_of_model_group ~schema x = let cd_type_of_model_group x = typ (cd_type_of_model_group x)
typ (cd_type_of_model_group ~schema x)
end end
let get_schema_names env = UEnv.fold (fun n _ acc -> n :: acc) env.schemas [] let get_schema_names env = UEnv.fold (fun n _ acc -> n :: acc) env.schemas []
...@@ -1781,36 +1781,35 @@ let load_schema schema_name uri = ...@@ -1781,36 +1781,35 @@ let load_schema schema_name uri =
Types.Print.register_global (U.mk_latin1 n) cd_type; Types.Print.register_global (U.mk_latin1 n) cd_type;
Format.fprintf Format.std_formatter "Registering schema %s: %s" kind n; Format.fprintf Format.std_formatter "Registering schema %s: %s" kind n;
(* if debug_schema then
Types.Print.print Format.std_formatter cd_type; *)
Format.fprintf Format.std_formatter "@." Format.fprintf Format.std_formatter "@."
end end
in in
Hashtbl.add !schemas uri schema; Hashtbl.add !schemas uri schema;
List.iter (* Schema types -> CDuce types *) List.iter (* Schema types -> CDuce types *)
(fun type_def -> (fun type_def ->
let name = Schema_common.name_of_type_definition type_def in let name = Schema_common.name_of_type_definition type_def in
let cd_type = Schema_converter.cd_type_of_type_def ~schema type_def in let cd_type = Schema_converter.cd_type_of_type_def type_def in
log_schema_component "type" uri name cd_type; log_schema_component "type" uri name cd_type;
Hashtbl.add !schema_types (uri, name) cd_type) Hashtbl.add !schema_types (uri, name) cd_type)
schema.Schema_types.types; schema.Schema_types.types;
List.iter (* Schema attributes -> CDuce types *) List.iter (* Schema attributes -> CDuce types *)
(fun att_decl -> (fun att_decl ->
let cd_type = Schema_converter.cd_type_of_att_decl ~schema att_decl in