Commit 1b8969be authored by Pietro Abate's avatar Pietro Abate
Browse files

[r2005-03-05 21:43:26 by afrisch] Empty log message

Original author: afrisch
Date: 2005-03-05 21:43:26+00:00
parent 7b945035
(** Components of XML Schema
Reference: http://www.w3.org/TR/xmlschema-1/
*)
open Ns
open Encodings
type xs_nonNegativeInteger = Big_int.big_int
type white_space_handling = [ `Preserve | `Replace | `Collapse ]
type facets = {
length: (xs_nonNegativeInteger * bool) option; (* length, fixed *)
minLength: (xs_nonNegativeInteger * bool) option; (* length, fixed *)
maxLength: (xs_nonNegativeInteger * bool) option; (* length, fixed *)
enumeration: Value.ValueSet.t option;
whiteSpace: white_space_handling * bool; (* handling, fixed *)
maxInclusive: (Value.t * bool) option; (* max, fixed *)
maxExclusive: (Value.t * bool) option; (* max, fixed *)
minInclusive: (Value.t * bool) option; (* min, fixed *)
minExclusive: (Value.t * bool) option; (* min, fixed *)
}
type value_constraint =
| No_constraint
| Default of Utf8.t
| Fixed of Utf8.t
type 'a ptr = 'a option ref
type attribute_declaration = {
ad_name: qname;
ad_type: simple_type_definition ptr;
ad_cstr: value_constraint;
}
and element_declaration = {
ed_name: qname;
ed_type: type_definition;
ed_cstr: value_constraint;
ed_nillable: bool;
}
and complex_type_definition = {
xt_name: qname;
xt_base: type_definition;
xt_derivation: derivation_method;
xt_attrs: attribute_use list;
xt_wild: ns_wildcard option;
xt_ct: content_type;
}
and simple_type_definition = {
st_name: qname option;
st_variety: variety;
}
and attribute_use = {
au_required: bool;
au_decl: attribute_declaration;
au_cstr: value_constraint;
}
and attribute_group_definition = {
ag_name: qname;
ag_attrs: attribute_use list;
ag_wild: ns_wildcard option;
}
and model_group_definition = {
mg_name: qname;
mg_model: model_group
}
and particle = {
p_min: Big_int.big_int;
p_max: Big_int.big_int option;
p_term: term;
}
and wildcard = {
wc_ns: ns_wildcard;
wc_process: [ `Skip | `Lax | `Strict ]
}
and schema = {
sch_types: type_definition list;
sch_attributes: attribute_declaration list;
sch_elements: element_declaration list;
sch_att_groups: attribute_group_definition list;
sch_model_groups: model_group_definition list
}
and type_definition =
| Simple of simple_type_definition ptr
| Complex of complex_type_definition ptr
and term =
| Model_group of model_group
| Wildcard of wildcard
| Element of element_declaration
and model_group =
| All of particle list
| Choice of particle list
| Sequence of particle list
and derivation_method =
| Extension
| Restriction
and content_type =
| Ct_empty
| Ct_simple of simple_type_definition
| Ct_model of mixed * particle
and mixed =
| Mixed
| Element_only
and ns_wildcard =
| W_any
| W_ns of Ns.t list
| W_not of Ns.t
and variety =
| VRestriction of simple_type_definition ptr * facets
| VList of simple_type_definition ptr
| VUnion of simple_type_definition ptr list
open Schema_components
open Encodings
module U = Utf8
let xsd = Ns.mk_ascii "http://www.w3.org/2001/XMLSchema"
let (!!) s = (xsd, U.mk s)
let (@@) s = (Ns.empty, U.mk s)
let _name = (@@) "name"
let _type = (@@) "type"
let _default = (@@) "default"
let _fixed = (@@) "fixed"
let _base = (@@) "base"
let _itemType = (@@) "itemType"
let _elementFormDefault = (@@) "elementFormDefault"
let _attributeFormDefault = (@@) "attributeFormDefault"
let _targetNamespace = (@@) "targetNamespace"
let _memberTypes = (@@) "memberTypes"
let _simpleType = (!!) "simpleType"
let _complexType = (!!) "complexType"
let _restriction = (!!) "restriction"
let _list = (!!) "list"
let _union = (!!) "union"
let _attribute = (!!) "attribute"
let _element = (!!) "element"
let _attributeGroup = (!!) "attributeGroup"
let _group = (!!) "group"
let (//) x f = f x
type xml_node = {
ns_table: Ns.table;
tag : Ns.qname;
attrs : (Ns.qname * U.t) list;
children : xml_node list
}
type env = {
target_ns : Ns.t;
attr_qual : bool;
elt_qual : bool;
type_defs : (Ns.qname * type_definition) list;
attr_decls : (Ns.qname * attribute_declaration ptr) list;
elt_decls : (Ns.qname * element_declaration ptr) list;
mg_defs : (Ns.qname * model_group_definition ptr) list;
ag_defs : (Ns.qname * attribute_group_definition ptr) list;
}
let empty_facets =
{ length = None;
minLength = None;
maxLength = None;
enumeration = None;
whiteSpace = `Preserve, false;
maxInclusive = None;
maxExclusive = None;
minInclusive = None;
minExclusive = None }
let rec simple_ur_type =
{ st_name = None;
st_variety = VUnion [] }
let ur_type_ptr = Complex (ref None)
let anyType = ref None
let () =
anyType := Some
{ xt_name = (!!) "anyType";
xt_base = Complex anyType;
xt_derivation = Restriction;
xt_attrs = [];
xt_wild = Some W_any;
xt_ct = Ct_model
(Mixed, { p_min = Big_int.unit_big_int;
p_max = Some Big_int.unit_big_int;
p_term = Model_group
(Sequence [
{ p_min = Big_int.zero_big_int;
p_max = None;
p_term = Wildcard {
wc_ns = W_any;
wc_process = `Strict; (* ??? *)
} } ])} )
}
let ptr x = ref (Some x)
let str_of_qname (ns,l) =
"{" ^ U.get_str (Ns.value ns) ^ "}:" ^ U.get_str l
let simple_ur_type_ptr = ptr simple_ur_type
let mk_qname (env,s) = (env.target_ns, s)
let mk_qname_option a = Some (mk_qname a)
let rec drop_initial_ws s i =
if (i = String.length s) then i
else match s.[i] with
| '\009' | '\010' | '\013' | '\032' -> drop_initial_ws s (succ i)
| _ -> i
let rec drop_final_ws s i =
if (i = 0) then i
else match s.[pred i] with
| '\009' | '\010' | '\013' | '\032' -> drop_final_ws s (pred i)
| _ -> i
let normalize s =
let s = U.get_str s in
let j = drop_final_ws s (String.length s) in
if (j = 0) then U.empty
else
let i = drop_initial_ws s 0 in
U.mk (String.sub s i (j-i))
let resolve_qname n s =
Ns.map_tag n.ns_table (normalize s)
let resolve_qnames n slist =
(* todo ! *)
[]
let need_attribute attr _ =
failwith ("Missing attribute " ^ attr)
let error msg _ =
failwith ("Error: " ^ msg)
let print_qname (ns,l) =
print_endline ("Looking for " ^ (U.get_str l))
let norm_attr attr found ?notfound ((env,n) as arg) =
print_qname attr;
try
let v = List.assoc attr n.attrs in
let v = normalize v in
found (env,v)
with Not_found ->
match notfound with
| Some f -> f arg
| None -> failwith ("Need attribute " ^ U.get_str (snd attr))
let qname_attr attr found ?notfound ((env,n) as arg) =
norm_attr attr
(fun (env,v) -> found (env,resolve_qname n v))
?notfound arg
let child tag found ?notfound ((env,n) as arg) =
print_qname tag;
try
let n = List.find (fun n -> n.tag = tag) n.children in
found (env,n)
with Not_found ->
match notfound with
| Some f -> f arg
| None -> failwith ("Need child " ^ U.get_str (snd tag))
let children tag found ((env,n) as arg) =
let c = List.filter (fun n -> n.tag = tag) n.children in
List.map (fun n -> found (env,n)) c
let qnames_attr attr found ?notfound ((env,n) as arg) =
norm_attr attr
(fun (env,v) -> found (env,resolve_qnames n v))
?notfound arg
let cst x _ = x
let local (_,l) = U.get_str l
let top_name = norm_attr _name mk_qname
let opt_name = norm_attr _name mk_qname_option ~notfound:(cst None)
let value_constraint =
norm_attr _default
(fun (_,s) -> Default s)
~notfound:
(norm_attr _fixed
(fun (_,s) -> Fixed s)
~notfound:(cst No_constraint))
let set_ref ((env,n) as arg) resolve decl =
let name = arg // top_name in
let r = resolve (env,name) in
let x = decl arg in
r := Some x
let rec toplevel_attribute_declaration arg =
{ ad_name = arg // top_name;
ad_type =
arg //
child _simpleType simple_type
~notfound:
(qname_attr _type resolve_simple_type
~notfound:(cst simple_ur_type_ptr));
ad_cstr = arg // value_constraint
}
and toplevel_element_declaration arg =
{ ed_name = arg // top_name;
ed_type =
arg //
child _simpleType (fun arg -> Simple (simple_type arg))
~notfound:
(child _complexType complex_type
~notfound:
(qname_attr _type resolve_type
~notfound:(cst ur_type_ptr)));
ed_cstr = arg // value_constraint;
ed_nillable = false; (* TODO *)
}
and simple_type arg =
let name = arg // opt_name in
let v =
child _restriction restriction
~notfound:
(child _list list
~notfound:
(child _union union ?notfound:None))
arg in
ptr
{ st_name = name;
st_variety = v }
and complex_type arg =
failwith "complex_type"
and restriction arg =
let base =
qname_attr _base
resolve_simple_type
~notfound:(child _simpleType simple_type)
arg in
VRestriction (base,empty_facets)
and list arg =
let item_type =
qname_attr _itemType
resolve_simple_type
~notfound:(child _simpleType simple_type)
arg in
VList item_type
and union arg =
let member_types =
qnames_attr _memberTypes
resolve_simple_types
~notfound:(children _simpleType simple_type) arg in
VUnion member_types
and toplevel ((env,n) as arg) =
match local n.tag with
| "attribute" ->
set_ref arg resolve_attribute toplevel_attribute_declaration
| "element" ->
set_ref arg resolve_element toplevel_element_declaration
| s -> failwith s
and schema n =
let arg = (),n in
let qual a =
norm_attr a
(fun v -> local v = "qualified")
~notfound:(cst false)
arg in
let env = {
target_ns = norm_attr _targetNamespace
(fun (_,ns) -> Ns.mk ns) ~notfound:(cst Ns.empty) arg;
attr_qual = qual _attributeFormDefault;
elt_qual= qual _elementFormDefault;
type_defs = [];
attr_decls = [];
elt_decls = [];
mg_defs = [];
ag_defs = []
} in
let names a f = List.map f (children a top_name (env,n)) in
let names_none a = names a (fun name -> (name, ref None)) in
let env =
{ env with
type_defs = (
names _simpleType (fun name -> (name,Simple (ref None))) @
names _complexType (fun name -> (name,Simple (ref None))) );
attr_decls = names_none _attribute;
elt_decls = names_none _element;
mg_defs = names_none _group;
ag_defs = names_none _attributeGroup
} in
List.iter (fun x -> toplevel (env,x)) n.children;
Printf.printf "# type defs: %d\n" (List.length env.type_defs);
Printf.printf "# mg defs: %d\n" (List.length env.mg_defs);
Printf.printf "# ag defs: %d\n" (List.length env.ag_defs);
Printf.printf "# attr decls: %d\n" (List.length env.attr_decls);
Printf.printf "# elt decls: %d\n" (List.length env.elt_decls);
()
and resolve_simple_type arg =
match resolve_type arg with
| Simple t -> t
| Complex _ ->
failwith "Complex type used where simple type expected"
and resolve_attribute (env,x) =
print_endline ("Resolve attribute " ^ (str_of_qname x));
List.assoc x env.attr_decls
and resolve_element (env,x) =
print_endline ("Resolve element " ^ (str_of_qname x));
List.assoc x env.elt_decls
and resolve_type (env,x) =
print_endline ("Resolve type " ^ (str_of_qname x));
List.assoc x env.type_defs
and resolve_simple_types (env,args) =
List.map (fun a -> resolve_simple_type (env,a)) args
(***************************************)
type stack =
| Start of Ns.table * Ns.qname * (Ns.qname * U.t) list * Ns.table * stack
| Element of xml_node * stack
| SEmpty
let stack = ref SEmpty
let ns_table = ref Ns.empty_table
let rec create_elt accu = function
| Element (x,st) -> create_elt (x::accu) st
| Start (t,name,att,table,st) ->
let elt = {
ns_table = t;
tag = name;
attrs = att;
children = accu
} in
stack := Element (elt,st);
ns_table := table
| SEmpty -> assert false
let start_element_handler name att =
let (table,name,att) = Ns.process_start_tag !ns_table name att in
stack := Start (table,name,att,!ns_table, !stack);
ns_table := table
let end_element_handler _ =
create_elt [] !stack
open Pxp_yacc
open Pxp_lexer_types
open Pxp_types
open Pxp_ev_parser
let pxp_config =
{ default_config with
encoding = `Enc_utf8;
store_element_positions = false;
drop_ignorable_whitespace = true
}
let pxp_handle_event = function
| E_start_tag (name,att,_,_) -> start_element_handler name att
| E_end_tag (_,_) -> end_element_handler ()
| _ -> ()
let load_xml s =
ns_table := Ns.empty_table;
stack := SEmpty;
let src = from_file s in
let mgr = create_entity_manager pxp_config src in
process_entity pxp_config (`Entry_document[`Extend_dtd_fully]) mgr
pxp_handle_event;
match !stack with
| Element (x,SEmpty) -> stack := SEmpty; x
| _ -> failwith "No XML stream to parse"
let print_qname ppf (_,s) =
Format.fprintf ppf "%s" (U.get_str s)
let rec print_node x ppf n =
Format.fprintf ppf "%s%a%a@.%a"
x
print_qname n.tag
print_attrs n.attrs
(print_children (x ^ " ")) n.children
and print_attrs ppf a =
List.iter
(fun (n,v) -> Format.fprintf ppf " %a=%s" print_qname n (U.get_str v))
a
and print_children x ppf l =
List.iter (print_node x ppf) l
let () =
let xml = load_xml "tests/schema/mails.xsd" in
Format.fprintf Format.std_formatter "%a@." (print_node "") xml;
let _ = schema xml in
()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment