Commit 514f5e6b authored by Pietro Abate's avatar Pietro Abate
Browse files

[r2003-10-24 09:38:50 by szach] - changed schema internal representation (more compact, support

  {attribute,model} groups)

- rewritten parser from scratch, support almost all XSD features

- added support for user defined simple types

- rewritten from scratch simple type validation

- commented out, for the moment, schema complex type validation

Original author: szach
Date: 2003-10-24 09:38:50+00:00
parent 14407337
open Printf
open Schema_types
open Schema_common
(* TODO dates: boundary checks (e.g. 95/26/2003) *)
(* TODO a lot of almost cut-and-paste code, expecially in gFoo types validation
*)
(* auxiliary stuff *)
(** {2 Aux/Misc stuff} *)
let regexp' s = Pcre.regexp ~flags:[`UTF8] s
let xml_S_RE = regexp' "[ \\t\\r\\n]+"
(* split a string at XML recommendation "S" production boundaries *)
let split_xml_S s = Pcre.split ~rex:xml_S_RE s
let norm_RE = regexp' "[\\t\\r\\n]"
let char_of_hex =
let int_of_hex_char = function
......@@ -36,15 +43,18 @@ let gMonthDay_RE_raw = sprintf "--(\\d{2})-(\\d{2})(%s)?" timezone_RE_raw
let gDay_RE_raw = sprintf "---(\\d{2})(%s)?" timezone_RE_raw
let gMonth_RE_raw = "--(\\d{2})--(%s)?"
(** {2 CDuce types} *)
let positive_field = false, "positive", Builtin_defs.bool
let year_field = false, "year", Builtin_defs.int
let month_field = false, "month", Builtin_defs.int
let day_field = false, "day", Builtin_defs.int
let hour_field = false, "hour", Builtin_defs.int
let minute_field = false, "minute", Builtin_defs.int
let second_field = false, "second", Builtin_defs.int ;; (* TODO this should be a decimal *)
(* some cduce types corresponding to schema ones *)
let second_field = false, "second", Builtin_defs.int
(* TODO this should be a decimal *)
let time_type_fields = [ hour_field; minute_field; second_field ]
let date_type_fields = [ year_field; month_field; day_field ]
(* TODO the constraint that at least one part should be present isn't easily
expressible with CDuce types *)
......@@ -57,16 +67,11 @@ let duration_type = Types.rec_of_list' [
true, "minute", Builtin_defs.int;
true, "second", Builtin_defs.int; (* TODO this should be a decimal *)
]
let timezone_type = Types.rec_of_list' [
false, "positive", Builtin_defs.bool;
hour_field; minute_field
]
let timezone_type_fields = [ true, "timezone", timezone_type ]
let time_type_fields = [ hour_field; minute_field; second_field ]
let date_type_fields = [ year_field; month_field; day_field ]
let time_type = Types.rec_of_list' (time_type_fields @ timezone_type_fields)
let date_type = Types.rec_of_list' (positive_field :: date_type_fields)
let dateTime_type =
......@@ -85,39 +90,16 @@ let negativeInteger_type = Builtin_defs.neg_int
let nonNegativeInteger_type = Builtin_defs.non_neg_int
let positiveInteger_type = Builtin_defs.pos_int
(* validation functions: string -> Value.t *)
let string_list_type = Sequence.star Builtin_defs.string
(** {2 Validation functions (string -> Value.t)} *)
let validate_string = Value.string_latin1
let parse_sign = function "+" | "" -> Value.vtrue | _ -> Value.vfalse
let validate_integer s =
try
Value.Integer (Intervals.V.mk s)
with Failure _ -> simple_type_error ~typ:"integer" ~value:s
let validate_interval interval type_name s =
let integer =
try
Intervals.V.mk s
with Failure _ -> simple_type_error ~typ:type_name ~value:s
in
if Intervals.contains integer interval then
Value.Integer integer
else
simple_type_error ~typ:type_name ~value:s
let validate_nonPositiveInteger =
validate_interval (Intervals.left Intervals.V.zero) "nonPositiveInteger"
let validate_negativeInteger =
validate_interval (Intervals.left Intervals.V.minus_one) "negativeInteger"
let validate_nonNegativeInteger =
validate_interval (Intervals.right Intervals.V.zero) "nonNegativeInteger"
let validate_positiveInteger =
validate_interval (Intervals.right Intervals.V.one) "positiveInteger"
let validate_bool = function
| "true" | "1" -> Value.vtrue
| "false" | "0" -> Value.vfalse
| v -> simple_type_error ~typ:"boolean" ~value:v
let parse_sign = function "+" | "" -> Value.vtrue | _ -> Value.vfalse
let parse_date =
let rex = Pcre.regexp (add_limits date_RE_raw) in
......@@ -152,6 +134,7 @@ let parse_timezone =
| _ ->
["positive", parse_sign subs.(3);
"hour", validate_integer subs.(4);
"minute", validate_integer subs.(5)]
(* parse a timezone from a string, if it's empty return the empty list,
otherwise return a list containing a pair <"timezone", timezone value> *)
......@@ -159,6 +142,39 @@ let parse_timezone' = function
| "" -> []
| v -> [ "timezone", Value.vrecord (parse_timezone v) ]
let validate_string s =
Value.string_utf8 (Encodings.Utf8.mk s)
let validate_normalizedString s =
validate_string (normalize_white_space `Replace s)
let validate_token s =
validate_string (normalize_white_space `Collapse s)
let validate_token_list s =
Value.sequence (List.map validate_token (split_xml_S s))
let validate_interval interval type_name s =
let integer =
try
Intervals.V.mk s
with Failure _ -> simple_type_error ~typ:type_name ~value:s
in
if Intervals.contains integer interval then
Value.Integer integer
else
simple_type_error ~typ:type_name ~value:s
let validate_nonPositiveInteger =
validate_interval (Intervals.left Intervals.V.zero) "nonPositiveInteger"
let validate_negativeInteger =
validate_interval (Intervals.left Intervals.V.minus_one) "negativeInteger"
let validate_nonNegativeInteger =
validate_interval (Intervals.right Intervals.V.zero) "nonNegativeInteger"
let validate_positiveInteger =
validate_interval (Intervals.right Intervals.V.one) "positiveInteger"
let validate_bool = function
| "true" | "1" -> Value.vtrue
| "false" | "0" -> Value.vfalse
| v -> simple_type_error ~typ:"boolean" ~value:v
let validate_duration =
let rex = Pcre.regexp
"^([+-])?P((\\d+)Y)?((\\d+)M)?((\\d+)D)?(T((\\d+)H)?((\\d+)M)?((\\d+)S)?)?$"
......@@ -314,7 +330,6 @@ let validate_hexBinary s =
aux 0;
validate_string res
(* TODO test base64Binary simple type! *)
let validate_base64Binary s = validate_string (Netencoding.Base64.decode s)
let validate_anyURI s =
......@@ -323,49 +338,133 @@ let validate_anyURI s =
Neturl.ip_url_syntax s))
with Neturl.Malformed_URL -> simple_type_error ~typ:"anyURI" ~value:s
let builtins = [
"xsd:string", (Builtin_defs.string, validate_string);
"xsd:integer", (Builtin_defs.int, validate_integer);
"xsd:nonPositiveInteger",
(nonPositiveInteger_type, validate_nonPositiveInteger);
"xsd:negativeInteger", (negativeInteger_type, validate_negativeInteger);
"xsd:nonNegativeInteger",
(nonNegativeInteger_type, validate_nonNegativeInteger);
"xsd:positiveInteger", (positiveInteger_type, validate_positiveInteger);
"xsd:boolean", (Builtin_defs.bool, validate_bool);
"xsd:hexBinary", (Builtin_defs.string, validate_hexBinary);
"xsd:base64Binary", (Builtin_defs.string, validate_base64Binary);
"xsd:anyURI", (Builtin_defs.string, validate_anyURI);
(* TODO anyType: is this useful? *)
"xsd:anyType", (Types.any, (fun _ -> assert false));
(* TODO anySimpleType: is ok as a string? *)
"xsd:anySimpleType", (Builtin_defs.string, validate_string);
"xsd:duration", (duration_type, validate_duration);
"xsd:dateTime", (dateTime_type, validate_dateTime);
"xsd:time", (time_type, validate_time);
"xsd:date", (date_type, validate_date);
"xsd:gYearMonth", (gYearMonth_type, validate_gYearMonth);
"xsd:gYear", (gYear_type, validate_gYear);
"xsd:gMonthDay", (gMonthDay_type, validate_gMonthDay);
"xsd:gDay", (gDay_type, validate_gDay);
"xsd:gMonth", (gMonth_type, validate_gMonth);
]
(* module's interface implementation *)
let names = List.sort compare (List.map fst builtins)
let len = List.length builtins
let cd_types = Hashtbl.create len
let validators = Hashtbl.create len
let __validate_fun_of_builtin = Hashtbl.find validators
let cd_type_of_builtin = Hashtbl.find cd_types
let fill () =
List.iter
(fun (name, (typ, validator)) ->
Hashtbl.add cd_types name typ;
Hashtbl.add validators name validator)
builtins
let _ = fill ()
(** {2 API backend} *)
let builtins = Hashtbl.create 50
let reg name spec = Hashtbl.add builtins name spec
let alias alias name =
Hashtbl.add builtins alias
(let (st_def, descr, validator) = Hashtbl.find builtins name in
let new_def =
match st_def with
| Primitive _ -> Primitive alias
| Derived (_, variety, facets, base) ->
Derived (Some alias, variety, facets, base)
in
(new_def, descr, validator))
let restrict' name basename new_facets =
let (base, _, _) = Hashtbl.find builtins basename in
let variety = variety_of_simple_type_definition base in
let facets =
merge_facets (facets_of_simple_type_definition base) new_facets
in
Derived (Some name, variety, facets, base)
let list' name itemname =
let (base, _, _) = Hashtbl.find builtins itemname in
Derived (Some name, List base, no_facets, base)
let fill () = (* fill "builtins" hashtbl *)
(* TODO missing built-in simple types: xsd:float, xsd:double, xsd:QName,
* xsd:NOTATION, xsd:decimal, xsd:long, xsd:int: xsd:short, xsd:byte
*)
let zero = Value.Integer Intervals.V.zero in
let one = Value.Integer (Intervals.V.succ Intervals.V.zero) in
let minus_one = Value.Integer (Intervals.V.pred Intervals.V.zero) in
(* primitive builtins *)
reg "xsd:anySimpleType"
(Primitive "xsd:anySimpleType", Builtin_defs.string, validate_string);
alias "xsd:anyType" "xsd:anySimpleType";
reg "xsd:string"
(Primitive "xsd:string", Builtin_defs.string, validate_string);
reg "xsd:integer"
(* not a primitive for Schema, but a primitive for us since we have neither
* xsd:decimal nor {total,fraction}Digits support *)
(Primitive "xsd:integer", Builtin_defs.int, validate_integer);
reg "xsd:boolean"
(Primitive "xsd:boolean", Builtin_defs.bool, validate_bool);
reg "xsd:hexBinary"
(Primitive "xsd:hexBinary", Builtin_defs.string, validate_hexBinary);
reg "xsd:base64Binary"
(Primitive "xsd:base64Binary", Builtin_defs.string, validate_base64Binary);
reg "xsd:anyURI"
(Primitive "xsd:anyURI", Builtin_defs.string, validate_anyURI);
reg "xsd:duration"
(Primitive "xsd:duration", duration_type, validate_duration);
reg "xsd:dateTime"
(Primitive "xsd:dateTime", dateTime_type, validate_dateTime);
reg "xsd:time"
(Primitive "xsd:time", time_type, validate_time);
reg "xsd:date"
(Primitive "xsd:date", date_type, validate_date);
reg "xsd:gYearMonth"
(Primitive "xsd:gYearMonth", gYearMonth_type, validate_gYearMonth);
reg "xsd:gYear"
(Primitive "xsd:gYear", gYear_type, validate_gYear);
reg "xsd:gMonthDay"
(Primitive "xsd:gMonthDay", gMonthDay_type, validate_gMonthDay);
reg "xsd:gDay"
(Primitive "xsd:gDay", gDay_type, validate_gDay);
reg "xsd:gMonth"
(Primitive "xsd:gMonth", gMonth_type, validate_gMonth);
(* derived builtins *)
reg "xsd:nonPositiveInteger"
(restrict' "xsd:nonPositiveInteger" "xsd:integer"
{ no_facets with maxInclusive = Some (zero, false) },
nonPositiveInteger_type, validate_nonPositiveInteger);
reg "xsd:negativeInteger"
(restrict' "xsd:negativeInteger" "xsd:nonPositiveInteger"
{ no_facets with maxInclusive = Some (minus_one, false) },
negativeInteger_type, validate_negativeInteger);
reg "xsd:nonNegativeInteger"
(restrict' "xsd:nonNegativeInteger" "xsd:integer"
{ no_facets with minInclusive = Some (zero, false) },
nonNegativeInteger_type, validate_nonNegativeInteger);
reg "xsd:positiveInteger"
(restrict' "xsd:positiveInteger" "xsd:nonNegativeInteger"
{ no_facets with minInclusive = Some (one, false) },
positiveInteger_type, validate_positiveInteger);
reg "xsd:normalizedString"
(restrict' "xsd:normalizedString" "xsd:string"
{ no_facets with whiteSpace = `Replace, false },
Builtin_defs.string, validate_normalizedString);
reg "xsd:token"
(restrict' "xsd:token" "xsd:normalizedString"
{ no_facets with whiteSpace = `Collapse, false },
Builtin_defs.string, validate_token);
alias "xsd:language" "xsd:token";
alias "xsd:Name" "xsd:token";
alias "xsd:NMTOKEN" "xsd:token";
alias "xsd:NCName" "xsd:token";
alias "xsd:ID" "xsd:token";
alias "xsd:IDREF" "xsd:token";
alias "xsd:ENTITY" "xsd:token";
reg "xsd:NMTOKENS"
(list' "xsd:NMTOKENS" "xsd:token",
string_list_type, validate_token_list);
alias "xsd:IDREFS" "xsd:NMTOKENS";
alias "xsd:ENTITIES" "xsd:NMTOKENS"
let _ = try fill () with Not_found -> assert false
(** {2 API} *)
let is_builtin = Hashtbl.mem builtins
let iter_builtin f =
Hashtbl.iter (fun _ (type_def, _, _) -> f type_def) builtins
let lookup name = Hashtbl.find builtins name
let fst (x,_,_) = x
let snd (_,y,_) = y
let trd (_,_,z) = z
let get_builtin name = fst (lookup name)
let cd_type_of_builtin name = snd (lookup name)
let validator_of_builtin name = trd (lookup name)
val names: string list
(** all schema simple type names used in this API are prefixed with
* Schema_xml.xsd_prefix *)
val cd_type_of_builtin: string -> Types.descr
(* exporting some validation functions that are useful while parsing facets,
the other are accessible using the __validate_fun_of_builtin function *)
val is_builtin: string -> bool
val get_builtin: string -> Schema_types.simple_type_definition
val iter_builtin: (Schema_types.simple_type_definition -> unit) -> unit
val validate_nonNegativeInteger: string -> Value.t
val validate_positiveInteger: string -> Value.t
(** not to be used: use Schema_validator.validator_of_simple_type instead *)
val __validate_fun_of_builtin: string -> (string -> Value.t)
val cd_type_of_builtin: string -> Types.descr
val validator_of_builtin: string -> (string -> Value.t)
open Printf
open Schema_types
let no_facets = {
length = None;
minLength = None;
maxLength = None;
(* pattern = []; *)
enumeration = None;
whiteSpace = `Collapse, true;
maxInclusive = None;
maxExclusive = None;
minInclusive = None;
minExclusive = None;
(*
totalDigits = None;
fractionDigits = None;
*)
}
let name_of_element_declaration (_, name, _, _) = name
let name_of_simple_type_definition = function
| Primitive name -> name
| Derived (Some name, _, _, _) -> name
| _ -> raise (Invalid_argument "anonymous simple type definition")
let name_of_complex_type_definition = function
| _, Some name, _, _, _, _ -> name
| _ -> raise (Invalid_argument "anonymous complex type definition")
let name_of_type_definition = function
| AnyType -> "xsd:anyType"
| Simple st -> name_of_simple_type_definition st
| Complex ct -> name_of_complex_type_definition ct
let name_of_attribute_declaration (name, _, _) = name
let name_of_attribute_use (_, (name, _, _), _) = name
let variety_of_simple_type_definition = function
| (Primitive name) as st -> Atomic st
| Derived (_, variety, _, _) -> variety
let simple_type_of_type = function
| Simple s -> s
| _ -> raise (Invalid_argument "simple_type_of_type")
let complex_type_of_type = function
| Complex c -> c
| _ -> raise (Invalid_argument "complex_type_of_type")
let content_type_of_type = function
| AnyType -> assert false
| Complex (_, _, _, _, _, ct) -> ct
| Simple st -> CT_simple st
let facets_of_simple_type_definition = function
| Primitive _ -> no_facets
| Derived (_, _, facets, _) -> facets
let iter_types schema f = List.iter f schema.types
let iter_attributes schema f = List.iter f schema.attributes
let iter_elements schema f = List.iter f schema.elements
let iter_attribute_groups schema f = List.iter f schema.attribute_groups
let iter_model_groups schema f = List.iter f schema.model_groups
exception XSD_validation_error of string
exception XSI_validation_error of string
let regexp' s = Pcre.regexp ~flags:[`UTF8] s
let rec normalize_white_space =
let ws_RE = regexp' "[\t\r\n]" in
let spaces_RE = regexp' "[ ]+" in
let margins_RE = regexp' "^ (.*) $" in
fun handling s ->
match handling with
| `Preserve -> s
| `Replace -> Pcre.replace ~rex:ws_RE ~templ:" " s
| `Collapse ->
let s' =
Pcre.replace ~rex:spaces_RE ~templ:" "
(normalize_white_space `Replace s)
in
Pcre.replace ~rex:margins_RE ~templ:"$1" s'
let anySimpleType = Primitive "xsd:anySimpleType"
let anyType = AnyType
let get_interval facets =
(* ASSUMPTION:
* not (facets.minInclusive = Some _ && facets.minExclusive = Some _)
* not (facets.maxInclusive = Some _ && facets.maxExclusive = Some _)
* Value.t is an integer! (no other intervals are actually supported
* by the CDuce type system)
*)
let min =
match facets.minInclusive, facets.minExclusive with
| Some (Value.Integer i, _), None -> Some i
| None, Some (Value.Integer i, _) -> Some (Intervals.V.succ i)
| None, None -> None
| _ -> assert false
in
let max =
match facets.maxInclusive, facets.maxExclusive with
| Some (Value.Integer i, _), None -> Some i
| None, Some (Value.Integer i, _) -> Some (Intervals.V.pred i)
| None, None -> None
| _ -> assert false
in
match min, max with
| Some min, Some max -> Intervals.bounded min max
| Some min, None -> Intervals.right min
| None, Some max -> Intervals.left max
| None, None -> Intervals.any
let print_simple_type fmt = function
| Primitive name -> Format.fprintf fmt "%s" name
| Derived (Some name, _, _, _) -> Format.fprintf fmt "%s'" name
| Derived (None, _, _, _) -> Format.fprintf fmt "unnamed'"
let print_complex_type fmt = function
| (id, Some name, _, _, _, _) -> Format.fprintf fmt "%d:%s" id name
| (id, None, _, _, _, _) -> Format.fprintf fmt "%d:unnamed'" id
let print_type fmt = function
| AnyType -> Format.fprintf fmt "xsd:anyType"
| Simple t -> Format.fprintf fmt "S:%a" print_simple_type t
| Complex t -> Format.fprintf fmt "C:%a" print_complex_type t
let print_attribute fmt (name, t, _) =
Format.fprintf fmt "@@%s:%a" name print_simple_type t
let print_element fmt (id, name, _, _) = Format.fprintf fmt "E:%d:<%s>" id name
let print_attributes fmt = List.iter (Format.fprintf fmt "%a" print_attribute)
let print_attribute_group fmt (name, _) = Format.fprintf fmt "{agroup:%s}" name
let print_model_group fmt (name, _) = Format.fprintf fmt "{mgroup:%s}" name
let print_schema fmt schema =
let defined_types = (* filter out built-in types *)
List.filter
(fun def -> not (Schema_xml.has_xsd_prefix (name_of_type_definition def)))
schema.types
in
if defined_types <> [] then begin
Format.fprintf fmt "Types: ";
List.iter (fun c -> print_type fmt c; Format.fprintf fmt " ")
defined_types;
Format.fprintf fmt "\n"
end;
if schema.attributes <> [] then begin
Format.fprintf fmt "Attributes: ";
List.iter (fun c -> print_attribute fmt c; Format.fprintf fmt " ")
schema.attributes;
Format.fprintf fmt "\n"
end;
if schema.elements <> [] then begin
Format.fprintf fmt "Elements: ";
List.iter (fun c -> print_element fmt c; Format.fprintf fmt " ")
schema.elements;
Format.fprintf fmt "\n"
end;
if schema.attribute_groups <> [] then begin
Format.fprintf fmt "Attribute groups: ";
List.iter (fun c -> print_attribute_group fmt c; Format.fprintf fmt " ")
schema.attribute_groups;
Format.fprintf fmt "\n"
end;
if schema.model_groups <> [] then begin
Format.fprintf fmt "Model groups: ";
List.iter (fun c -> print_model_group fmt c; Format.fprintf fmt " ")
schema.model_groups;
Format.fprintf fmt "\n"
end
(** naive implementation: doesn't follow XML Schema constraints on facets
* merging. Here all new facets override old ones *)
let merge_facets old_facets new_facets =
let maxInclusive, maxExclusive =
match new_facets.maxInclusive, new_facets.maxExclusive with
| None, None -> old_facets.maxInclusive, old_facets.maxExclusive
| Some _, Some _ -> assert false
| v -> v
in
let minInclusive, minExclusive =
match new_facets.minInclusive, new_facets.minExclusive with
| None, None -> old_facets.minInclusive, old_facets.minExclusive
| Some _, Some _ -> assert false
| v -> v
in
{ old_facets with
length =
(match new_facets.length with
| None -> old_facets.length
| v -> v);
minLength =
(match new_facets.minLength with
| None -> old_facets.minLength
| v -> v);
maxLength =
(match new_facets.maxLength with
| None -> old_facets.maxLength
| v -> v);
enumeration =
(match new_facets.enumeration with
| None -> old_facets.enumeration
| v -> v);
whiteSpace = new_facets.whiteSpace;
maxInclusive = maxInclusive;
maxExclusive = maxExclusive;
minInclusive = minInclusive;
minExclusive = minExclusive;
}
let restrict base new_facets new_name =
let variety = variety_of_simple_type_definition base in
let facets =
merge_facets (facets_of_simple_type_definition base) new_facets
in
Derived (new_name, variety, facets, base)
(** Schema common functionalities depending only on Schema_types *)
open Schema_types
(** validation failure *)
exception XSD_validation_error of string
exception XSI_validation_error of string
(** {2 XSD printer *)
val print_schema : Format.formatter -> schema -> unit
val print_type : Format.formatter -> type_definition -> unit
val print_attribute : Format.formatter -> attribute_declaration -> unit
val print_element : Format.formatter -> element_declaration -> unit
val print_attribute_group :
Format.formatter -> attribute_group_definition -> unit
val print_model_group : Format.formatter -> model_group_definition -> unit
val print_simple_type : Format.formatter -> simple_type_definition -> unit
val print_complex_type : Format.formatter -> complex_type_definition -> unit
(*
val print_ct : Format.formatter -> content_type -> unit
val print_particle : Format.formatter -> particle -> unit