Commit 74f6ff55 authored by Pietro Abate's avatar Pietro Abate
Browse files

[r2004-10-19 11:14:38 by afrisch] Rework XML Schema stuff...

Original author: afrisch
Date: 2004-10-19 11:14:39+00:00
parent 0b91ebba
......@@ -175,6 +175,9 @@ OBJECTS = \
driver/librarian.cmo \
driver/cduce.cmo
schema/schema_types.ml: schema/schema_types.mli
cp $^ $@
ML_INTERFACE_OBJS = \
ocamliface/caml_cduce.cmo ocamliface/mltypes.cmo ocamliface/mlstub.cmo
......
......@@ -22,22 +22,22 @@ let no_facets = {
*)
}
let name_of_element_declaration (_, name, _, _) = name
let name_of_element_declaration elt = elt.elt_name
let name_of_simple_type_definition = function
| Primitive name -> name
| Derived (Some name, _, _, _) -> name
| _ -> raise (Invalid_argument "anonymous simple type definition")
let name_of_complex_type_definition = function
| _, Some name, _, _, _, _ -> name
| { ct_name = Some name } -> name
| _ -> raise (Invalid_argument "anonymous complex type definition")
let name_of_type_definition = function
| AnyType -> Encodings.Utf8.mk "xsd:anyType"
| Simple st -> name_of_simple_type_definition st
| Complex ct -> name_of_complex_type_definition ct
let name_of_attribute_declaration (name, _, _) = name
let name_of_attribute_use (_, (name, _, _), _) = name
let name_of_attribute_group_definition = fst
let name_of_model_group_definition = fst
let name_of_attribute_declaration a = a.attr_name
let name_of_attribute_use { attr_decl = { attr_name = name } } = name
let name_of_attribute_group_definition ag = ag.ag_name
let name_of_model_group_definition mg = mg.mg_name
let name_of_particle = function
| (_, _, Elt elt_decl_ref, _) -> name_of_element_declaration !elt_decl_ref
| _ -> assert false
......@@ -52,7 +52,7 @@ let complex_type_of_type = function
| _ -> raise (Invalid_argument "complex_type_of_type")
let content_type_of_type = function
| AnyType -> assert false
| Complex (_, _, _, _, _, ct) -> ct
| Complex { ct_content = ct } -> ct
| Simple st -> CT_simple st
let facets_of_simple_type_definition = function
| Primitive _ -> no_facets
......@@ -135,22 +135,23 @@ let print_simple_type fmt = function
Format.fprintf fmt "%a'" Encodings.Utf8.dump name
| Derived (None, _, _, _) -> Format.fprintf fmt "unnamed'"
let print_complex_type fmt = function
| (id, Some name, _, _, _, _) ->
| { ct_uid = id; ct_name = Some name } ->
Format.fprintf fmt "%d:%a" id Encodings.Utf8.dump name
| (id, None, _, _, _, _) -> Format.fprintf fmt "%d:unnamed'" id
| { ct_uid = id } ->
Format.fprintf fmt "%d:unnamed'" id
let print_type fmt = function
| AnyType -> Format.fprintf fmt "xsd:anyType"
| Simple t -> Format.fprintf fmt "S:%a" print_simple_type t
| Complex t -> Format.fprintf fmt "C:%a" print_complex_type t
let print_attribute fmt (name, t, _) =
let print_attribute fmt { attr_name = name; attr_typdef = t } =
Format.fprintf fmt "@@%a:%a" Utf8.dump name print_simple_type t
let print_element fmt (id, name, _, _) =
let print_element fmt { elt_uid = id; elt_name = name } =
Format.fprintf fmt "E:%d:<%a>" id Utf8.dump name
let print_attributes fmt = List.iter (Format.fprintf fmt "%a" print_attribute)
let print_attribute_group fmt (name, _) =
Format.fprintf fmt "{agroup:%a}" Utf8.dump name
let print_model_group fmt (name, _) =
Format.fprintf fmt "{mgroup:%a}" Utf8.dump name
let print_attribute_group fmt ag =
Format.fprintf fmt "{agroup:%a}" Utf8.dump ag.ag_name
let print_model_group fmt mg =
Format.fprintf fmt "{mgroup:%a}" Utf8.dump mg.mg_name
let print_schema fmt schema =
let defined_types = (* filter out built-in types *)
List.filter
......
......@@ -56,13 +56,21 @@ module NodeSet = Set.Make (OrderedNode)
(* element and complex type constructors which take cares of unique id *)
let element, complex =
let counter = ref 0 in
let element name (type_def: type_definition ref) constr =
let element name (type_def: type_definition) constr =
incr counter;
!counter, name, type_def, constr
{ elt_uid = !counter;
elt_name = name;
elt_typdef = type_def;
elt_cstr = constr }
in
let complex name (type_def: type_definition) deriv attrs ct =
incr counter;
!counter, name, type_def, deriv, attrs, ct
{ ct_uid = !counter;
ct_name = name;
ct_typdef = type_def;
ct_deriv = deriv;
ct_attrs = attrs;
ct_content = ct }
in
(element, complex)
......@@ -214,7 +222,7 @@ let parse_elt_value_constraint type_def n =
debug_print ~n "Schema_parser.parse_elt_value_constraint";
let validate_value =
match type_def with
| Simple st_def | Complex (_, _, _, _, _, CT_simple st_def) ->
| Simple st_def | Complex { ct_content = CT_simple st_def } ->
validate_simple_type st_def
| _ -> validate_simple_type (Primitive (Utf8.mk "xsd:string"))
in
......@@ -242,25 +250,27 @@ let find_simple_type (resolver: resolver) n =
let parse_att_decl (resolver: resolver) n =
debug_print ~n "Schema_parser.parse_att_decl";
resolver#see n;
let name = _attribute "name" n in
let type_def = find_simple_type resolver n in
let value_constr = parse_att_value_constraint type_def n in
name, type_def, value_constr
let typdef = find_simple_type resolver n in
{ attr_name = _attribute "name" n;
attr_typdef = typdef;
attr_cstr = parse_att_value_constraint typdef n }
let parse_attribute_use (resolver: resolver) n =
debug_print ~n "Schema_parser.parse_attribute_use";
let required =
(_has_attribute "use" n) && (_attribute "use" n = Utf8.mk "required")
in
let (name, type_def, value_constr) as att_decl =
let att_decl =
if _has_attribute "ref" n then
resolver#resolve_att (_attribute "ref" n)
else
let (name, type_def, constr) = parse_att_decl resolver n in
(name, type_def, None) (* forget attribute value constraint *)
let a = parse_att_decl resolver n in
{ a with attr_cstr = None } (* forget attribute value constraint *)
in
let value_constr = parse_att_value_constraint type_def n in
required, att_decl, value_constr
let value_constr = parse_att_value_constraint att_decl.attr_typdef n in
{ attr_required = required;
attr_decl = att_decl;
attr_use_cstr = value_constr }
let parse_attribute_uses (resolver: resolver) derivation_type base n =
debug_print ~n "Schema_parser.parse_attribute_uses";
......@@ -271,13 +281,14 @@ let parse_attribute_uses (resolver: resolver) derivation_type base n =
List.concat (List.map
(fun att_group ->
if _has_attribute "ref" att_group then
snd (resolver#resolve_att_group (_attribute "ref" att_group))
let ag = resolver#resolve_att_group (_attribute "ref" att_group) in
ag.ag_def
else [])
(_elements "xsd:attributeGroup" n))
in
let uses3 = (* attribute uses from base type *)
match base with
| Complex (_, _, _, _, uses, _) ->
| Complex { ct_attrs = uses } ->
(match derivation_type with
| `Extension -> uses
| `Restriction ->
......@@ -333,7 +344,7 @@ let rec parse_complex_type (resolver: resolver) n =
match derivation_type with
| `Restriction ->
(match !base with
| Complex (_, _, _, _, _, (CT_simple base)) ->
| Complex { ct_content = CT_simple base } ->
let base =
if _has_element "xsd:simpleType" derivation then
parse_simple_type resolver
......@@ -354,7 +365,7 @@ let rec parse_complex_type (resolver: resolver) n =
| _ -> assert false)
| `Extension ->
(match !base with
| Complex (_, _, _, _, _, (CT_simple base)) -> CT_simple base
| Complex { ct_content = CT_simple base } -> CT_simple base
| Simple simple_type_def -> CT_simple simple_type_def
| _ -> assert false)
in
......@@ -424,7 +435,7 @@ and parse_elt_decl (resolver: resolver) n: element_declaration =
let name = _attribute "name" n in
let type_def = find_element_type resolver n in
let value_constr = parse_elt_value_constraint type_def n in
element name (ref type_def) value_constr
element name type_def value_constr
(* look for a type definition, try "simpleType" child, try "complexType"
* child, try "type" attribute, return anyType *)
......@@ -453,10 +464,8 @@ and parse_particle (resolver: resolver) n =
in
(min, max, Elt elt_decl, first)
| T_element "xsd:group" ->
let model_group =
snd (resolver#resolve_model_group (_attribute "ref" n))
in
(min, max, Model model_group, first_of_model_group model_group)
let mg = resolver#resolve_model_group (_attribute "ref" n) in
(min, max, Model mg.mg_def, first_of_model_group mg.mg_def)
| T_element "xsd:all" | T_element "xsd:sequence" | T_element "xsd:choice" ->
let model_group = parse_model_group resolver n in
(min, max, Model model_group, first_of_model_group model_group)
......@@ -481,10 +490,12 @@ and parse_att_group (resolver: resolver) n =
List.map (parse_attribute_use resolver) (_elements "xsd:attribute" n)
in
let uses2 =
List.concat (List.map (fun name -> snd (resolver#resolve_att_group name))
(List.map (_attribute "ref") (_elements "xsd:attributeGroup" n)))
List.concat
(List.map
(fun name -> (resolver#resolve_att_group name).ag_def)
(List.map (_attribute "ref") (_elements "xsd:attributeGroup" n)))
in
name, (uses1 @ uses2)
{ ag_name = name; ag_def = uses1 @ uses2 }
let parse_model_group_def (resolver: resolver) n =
debug_print ~n "Schema_parser.parse_model_group_def";
......@@ -494,15 +505,25 @@ let parse_model_group_def (resolver: resolver) n =
_element' ["xsd:all"; "xsd:choice"; "xsd:sequence"] n
in
let model_group = parse_model_group resolver model_group_node in
name, model_group
{ mg_name = name; mg_def = model_group }
(** @param root schema document root node *)
class lazy_resolver =
let fake_type_def =
Complex (~-1, Some (Utf8.mk " FAKE TYP "), AnyType, `Restriction, [],
CT_empty)
Complex
{ ct_uid = -1;
ct_name = Some (Utf8.mk " FAKE TYP ");
ct_typdef = AnyType;
ct_deriv = `Restriction;
ct_attrs = [];
ct_content = CT_empty }
in
let fake_elt_decl =
{ elt_uid = -2;
elt_name = Utf8.mk " FAKE ELT ";
elt_typdef = fake_type_def;
elt_cstr = None }
in
let fake_elt_decl = ~-2, Utf8.mk " FAKE ELT ", ref fake_type_def, None in
let is_fake_type_def = (==) fake_type_def in
let is_fake_elt_decl = (==) fake_elt_decl in
let validation_error s = raise (XSD_validation_error s) in
......
(**
Internal representation of an XML Schema.
(** Types used by all the Schema modules.
This module comes in .mli part only, hence no value and/or exceptions are
available here. See Schema_common.
Note: this module comes in .mli part only, hence no value and/or
exceptions are available here. See Schema_common.
*)
(**
Glossary:
XSD XML Schema Document
PSV Post Schema Validation
PSVI Post Schema Validation Infoset
XSD XML Schema Document
PSV Post Schema Validation
PSVI Post Schema Validation Infoset
*)
open Encodings
......@@ -54,14 +55,14 @@ and variety =
| Union of simple_type_definition list
type attribute_declaration =
Utf8.t * (* name *)
simple_type_definition * (* type *)
value_constraint option
{ attr_name : Utf8.t;
attr_typdef : simple_type_definition;
attr_cstr : value_constraint option }
type attribute_use =
bool * (* required *)
attribute_declaration *
value_constraint option
{ attr_required : bool;
attr_decl : attribute_declaration;
attr_use_cstr : value_constraint option }
(* first construction as per predictive parsing. None stands for epsilon, Some
* Utf8.t stands for a start tag of identical name *)
......@@ -90,18 +91,18 @@ and particle =
first
and element_declaration =
int * (* unique id *)
Utf8.t * (* name *)
type_definition ref * (* type *)
value_constraint option
{ elt_uid: int;
elt_name: Utf8.t;
mutable elt_typdef: type_definition;
elt_cstr: value_constraint option }
and complex_type_definition =
int * (* unique id *)
Utf8.t option * (* name *)
type_definition * (* base *)
derivation_type *
attribute_use list *
content_type
{ ct_uid: int;
ct_name: Utf8.t option;
ct_typdef: type_definition;
ct_deriv: derivation_type;
ct_attrs: attribute_use list;
ct_content: content_type }
and type_definition =
| AnyType
......@@ -109,12 +110,12 @@ and type_definition =
| Complex of complex_type_definition
type model_group_definition =
Utf8.t * (* name *)
model_group
{ mg_name : Utf8.t;
mg_def : model_group }
type attribute_group_definition =
Utf8.t * (* name *)
attribute_use list
{ ag_name : Utf8.t;
ag_def : attribute_use list }
type schema = {
targetNamespace: Ns.t;
......
(**
Internal representation of an XML Schema.
(** Types used by all the Schema modules.
This module comes in .mli part only, hence no value and/or exceptions are
available here. See Schema_common.
Note: this module comes in .mli part only, hence no value and/or
exceptions are available here. See Schema_common.
*)
(**
Glossary:
XSD XML Schema Document
PSV Post Schema Validation
PSVI Post Schema Validation Infoset
XSD XML Schema Document
PSV Post Schema Validation
PSVI Post Schema Validation Infoset
*)
open Encodings
......@@ -54,14 +55,14 @@ and variety =
| Union of simple_type_definition list
type attribute_declaration =
Utf8.t * (* name *)
simple_type_definition * (* type *)
value_constraint option
{ attr_name : Utf8.t;
attr_typdef : simple_type_definition;
attr_cstr : value_constraint option }
type attribute_use =
bool * (* required *)
attribute_declaration *
value_constraint option
{ attr_required : bool;
attr_decl : attribute_declaration;
attr_use_cstr : value_constraint option }
(* first construction as per predictive parsing. None stands for epsilon, Some
* Utf8.t stands for a start tag of identical name *)
......@@ -90,18 +91,18 @@ and particle =
first
and element_declaration =
int * (* unique id *)
Utf8.t * (* name *)
type_definition ref * (* type *)
value_constraint option
{ elt_uid: int;
elt_name: Utf8.t;
mutable elt_typdef: type_definition;
elt_cstr: value_constraint option }
and complex_type_definition =
int * (* unique id *)
Utf8.t option * (* name *)
type_definition * (* base *)
derivation_type *
attribute_use list *
content_type
{ ct_uid: int;
ct_name: Utf8.t option;
ct_typdef: type_definition;
ct_deriv: derivation_type;
ct_attrs: attribute_use list;
ct_content: content_type }
and type_definition =
| AnyType
......@@ -109,12 +110,12 @@ and type_definition =
| Complex of complex_type_definition
type model_group_definition =
Utf8.t * (* name *)
model_group
{ mg_name : Utf8.t;
mg_def : model_group }
type attribute_group_definition =
Utf8.t * (* name *)
attribute_use list
{ ag_name : Utf8.t;
ag_def : attribute_use list }
type schema = {
targetNamespace: Ns.t;
......
......@@ -268,7 +268,8 @@ let validate_attribute_uses context attr_uses =
let rec aux () = (* look for attribute events and fill "attrs" *)
match context#peek with
| E_attribute (qname, value) ->
let (_, (_, st_def, _), constr) = (* attribute use *)
let { attr_decl = { attr_typdef = st_def };
attr_use_cstr = constr } =
try
Hashtbl.find tbl qname
with Not_found ->
......@@ -287,23 +288,22 @@ let validate_attribute_uses context attr_uses =
in
aux ();
Hashtbl.iter
(fun qname (required, _, constr) ->
if required then (* check for missing required attributes *)
(fun qname at ->
if at.attr_required then (* check for missing required attributes *)
validation_error ~context (sprintf "Required attribute %s is missing"
(Ns.QName.to_string qname))
else (* add default values *)
match constr with
match at.attr_use_cstr with
| Some (`Default v) -> attrs := (qname, v) :: !attrs
| _ -> ())
tbl;
Value.vrecord !attrs
let rec validate_element (context: validation_context) decl =
let (_, name, type_def_ref, constr) = decl in
context#expect_start_tag name;
let (attrs, content) = validate_type context !type_def_ref in
let rec validate_element (context: validation_context) elt =
context#expect_start_tag elt.elt_name;
let (attrs, content) = validate_type context elt.elt_typdef in
let content = (* use default if needed and check fixed constraints *)
match constr with
match elt.elt_cstr with
| Some (`Default v) when Value.equal content empty_string -> v
| Some (`Fixed v) ->
check_fixed ~context v content;
......@@ -311,9 +311,9 @@ let rec validate_element (context: validation_context) decl =
| _ -> content
in
let element =
Value.Xml (Value.Atom (Atoms.V.mk context#ns name), attrs, content)
Value.Xml (Value.Atom (Atoms.V.mk context#ns elt.elt_name), attrs, content)
in
context#expect_end_tag name;
context#expect_end_tag elt.elt_name;
element
and validate_type context = function
......@@ -323,10 +323,9 @@ and validate_type context = function
validate_complex_type (context :> validation_context) ct_def
(** @return Value.t * Value.t (* attrs, content *) *)
and validate_complex_type context ct_def =
let (_, _, _, _, attr_uses, content_type) = ct_def in
let attrs = validate_attribute_uses context attr_uses in
let content = Value.sequence (validate_content_type context content_type) in
and validate_complex_type context ct =
let attrs = validate_attribute_uses context ct.ct_attrs in
let content = Value.sequence (validate_content_type context ct.ct_content) in
(attrs, content)
(** @return Value.t list *)
......@@ -587,8 +586,7 @@ let validate_attribute decl schema value =
in
Value.vrecord fields
let validate_attribute_group def schema value =
let (_, attr_uses) = def in
let validate_attribute_group { ag_def = attr_uses } schema value =
let stream =
match value with
| Record _ ->
......@@ -604,11 +602,11 @@ let validate_attribute_group def schema value =
in
validate_attribute_uses (new context ~stream ~schema) attr_uses
let validate_model_group def schema value =
let validate_model_group { mg_def = mg } schema value =
if not (Value.is_seq value) then
validation_error
"Only sequence values could be validated against model groups";
let stream = stream_of_value (Value.Xml (foo_atom, empty_record, value)) in
Stream.junk stream;
Value.sequence (validate_model_group (new context ~stream ~schema) (snd def))
Value.sequence (validate_model_group (new context ~stream ~schema) mg)
......@@ -1532,6 +1532,7 @@ module Schema_converter =
PSeq (acc, regexp_of_particle ~schema particle))
(regexp_of_particle ~schema hd) tl
(*
and regexp_of_content_type ~schema = function
| CT_empty -> PEpsilon
| CT_simple st -> PElem (cd_type_of_simple_type ~schema st)
......@@ -1542,21 +1543,22 @@ module Schema_converter =
mix_regexp regexp
end else
regexp
*)
and regexp_of_particle ~schema (min, max, term, _) =
mk_len_regexp ?min:(Some min) ?max (regexp_of_term ~schema term)
(** @return a pair composed by a type for the attributes (a record) and a
type for the content model (a sequence) *)
and cd_type_of_complex_type' ~schema (id, name, _, _, attr_uses, content) =
and cd_type_of_complex_type' ~schema ct =
try
PAlias (Hashtbl.find complex_memo id)
PAlias (Hashtbl.find complex_memo ct.ct_uid)
with Not_found ->
let slot = mk_derecurs_slot noloc in
Hashtbl.add complex_memo id slot;
let content_re = regexp_of_content_type ~schema content in
Hashtbl.add complex_memo ct.ct_uid slot;
(* let content_re = regexp_of_content_type ~schema ct.ct_content in*)
let content_ast_node =
match content with
match ct.ct_content with
| CT_empty -> PType Sequence.nil_type
| CT_simple st -> cd_type_of_simple_type ~schema st
| CT_model (particle, mixed) ->
......@@ -1566,39 +1568,41 @@ module Schema_converter =
PRegexp (regexp, PType Sequence.nil_type)
in
slot.pdescr <-
PTimes (cd_type_of_attr_uses ~schema attr_uses, content_ast_node);
PTimes (cd_type_of_attr_uses ~schema ct.ct_attrs, content_ast_node);
PAlias slot
(** @return a closed record *)
and cd_type_of_attr_uses ~schema attr_uses =
let fields =
List.map
(fun (required, (name, st, _), constr) ->
(fun at ->
let r =
match constr with
match at.attr_use_cstr with
| Some (`Fixed v) -> PType (Types.constant (Value.inv_const v))
| _ -> cd_type_of_simple_type ~schema st
| _ -> cd_type_of_simple_type ~schema at.attr_decl.attr_typdef
in
let r = if required then r else POptional r in
(LabelPool.mk (Ns.empty, name), r))
let r = if at.attr_required then r else POptional r in
(LabelPool.mk (Ns.empty, at.attr_decl.attr_name), r))
attr_uses in
PRecord (false, LabelMap.from_list_disj fields)
and cd_type_of_att_decl ~schema (name, st, _) =
let r = cd_type_of_simple_type ~schema st in
and cd_type_of_att_decl ~schema att =
let r = cd_type_of_simple_type ~schema att.attr_typdef in