Commit 2da3d5a2 authored by Pietro Abate's avatar Pietro Abate
Browse files

[r2003-06-12 11:54:45 by cvscast] Merging schema

Original author: cvscast
Date: 2003-06-12 11:54:49+00:00
parent 616aecd7
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:element name="root" type="rootT"/>
<xsd:complexType name="rootT">
<xsd:choice minOccurs="2" maxOccurs="unbounded">
<xsd:element name="foo" type="xsd:string" />
<xsd:element name="bar" type="xsd:string" />
</xsd:choice>
</xsd:complexType>
</xsd:schema>
#!/bin/sh
# to be run from CDuce source tree root
VALIDATE="./validate"
ROOT="schema/regtest"
LOG="$ROOT/regtest.log"
if [ -f "$ROOT/$1.xsd" ]; then
$VALIDATE $ROOT/$1.xsd $ROOT/$1.xml
else
> $LOG
date | tee $LOG
for f in $ROOT/*.xsd; do
echo -n "$(basename $f) ... " | tee -a $LOG
if ($VALIDATE $f $(echo "$f" | sed 's/xsd$/xml/') &> /dev/null); then
echo "OK" | tee -a $LOG
else
echo "FAILURE" | tee -a $LOG
fi
done
fi
<time>13:20:00-05:00</time>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:element name="time" type="xsd:time"/>
</xsd:schema>
<root>
<bar />
<foo />
<foo />
<bar />
<bar />
<foo />
</root>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:element name="root" type="rootT" />
<xsd:complexType name="rootT">
<xsd:choice minOccurs="2" maxOccurs="unbounded">
<xsd:element name="bar" type="xsd:string" />
<xsd:element name="foo" type="xsd:string" />
</xsd:choice>
</xsd:complexType>
</xsd:schema>
open Printf ;;
open Schema_types ;;
(* TODO dates: boundary checks (e.g. 95/26/2003) *)
(* TODO a lot of almost cut-and-paste code, expecially in gFoo types validation
*)
(* auxiliary stuff *)
let char_of_hex =
let int_of_hex_char = function
| '0' -> 0 | '1' -> 1 | '2' -> 2 | '3' -> 3 | '4' -> 4 | '5' -> 5 | '6' -> 6
| '7' -> 7 | '8' -> 8 | '9' -> 9 | 'a' | 'A' -> 10 | 'b' | 'B' -> 11
| 'c' | 'C' -> 12 | 'd' | 'D' -> 13 | 'e' | 'E' -> 14 | 'f' | 'F' -> 15
| _ -> assert false
in
(* most significative, least significative *)
fun ms ls -> Char.unsafe_chr (int_of_hex_char ms * 16 + int_of_hex_char ls)
;;
let strip_parens s = Pcre.replace ~pat:"[()]" s ;;
let add_limits s = "^" ^ s ^ "$" ;;
let simple_type_error ~typ ~value =
raise (XSI_validation_error (sprintf "'%s' isn't a valid xsd:%s" value typ))
;;
(* regular expressions used to validate built-in types *)
let timezone_RE_raw = "(Z)|(([+-])?(\\d{2}):(\\d{2}))" ;;
let date_RE_raw = "(\\d{4,})-(\\d{2})-(\\d{2})" ;;
let time_RE_raw = "(\\d{2}):(\\d{2}):(\\d{2})" ;;
let gYearMonth_RE_raw = sprintf "(-)?(\\d{4,})-(\\d{2})(%s)?" timezone_RE_raw ;;
let gYear_RE_raw = sprintf "(-)?(\\d{4,})(%s)?" timezone_RE_raw ;;
let gMonthDay_RE_raw = sprintf "--(\\d{2})-(\\d{2})(%s)?" timezone_RE_raw ;;
let gDay_RE_raw = sprintf "---(\\d{2})(%s)?" timezone_RE_raw ;;
let gMonth_RE_raw = "--(\\d{2})--(%s)?" ;;
let positive_field = false, "positive", Builtin_defs.bool ;;
let year_field = false, "year", Builtin_defs.int ;;
let month_field = false, "month", Builtin_defs.int ;;
let day_field = false, "day", Builtin_defs.int ;;
let hour_field = false, "hour", Builtin_defs.int ;;
let minute_field = false, "minute", Builtin_defs.int ;;
let second_field = false, "second", Builtin_defs.int ;; (* TODO this should be a decimal *)
(* some cduce types corresponding to schema ones *)
(* TODO the constraint that at least one part should be present isn't easily
expressible with CDuce types *)
let duration_type = Types.rec_of_list' [
positive_field;
true, "year", Builtin_defs.int;
true, "month", Builtin_defs.int;
true, "day", Builtin_defs.int;
true, "hour", Builtin_defs.int;
true, "minute", Builtin_defs.int;
true, "second", Builtin_defs.int; (* TODO this should be a decimal *)
] ;;
let timezone_type = Types.rec_of_list' [
false, "positive", Builtin_defs.bool;
hour_field; minute_field
] ;;
let timezone_type_fields = [ true, "timezone", timezone_type ] ;;
let time_type_fields = [ hour_field; minute_field; second_field ] ;;
let date_type_fields = [ year_field; month_field; day_field ] ;;
let time_type = Types.rec_of_list' (time_type_fields @ timezone_type_fields) ;;
let date_type = Types.rec_of_list' (positive_field :: date_type_fields) ;;
let dateTime_type =
Types.rec_of_list' (positive_field ::
(date_type_fields @ time_type_fields @ timezone_type_fields))
;;
let gYearMonth_type = Types.rec_of_list' [
positive_field; year_field; month_field
] ;;
let gYear_type = Types.rec_of_list' [ positive_field; year_field ] ;;
let gMonthDay_type = Types.rec_of_list' [ month_field; day_field ] ;;
let gDay_type = Types.rec_of_list' [ day_field ] ;;
let gMonth_type = Types.rec_of_list' [ month_field ] ;;
let nonPositiveInteger_type = Builtin_defs.non_pos_int ;;
let negativeInteger_type = Builtin_defs.neg_int ;;
let nonNegativeInteger_type = Builtin_defs.non_neg_int ;;
let positiveInteger_type = Builtin_defs.pos_int ;;
(* validation functions: string -> Value.t *)
let validate_string = Value.string_latin1 ;;
let validate_integer s =
try
Value.Integer (Intervals.mk s)
with Failure _ -> simple_type_error ~typ:"integer" ~value:s
;;
let validate_interval interval type_name s =
let integer =
try
Intervals.mk s
with Failure _ -> simple_type_error ~typ:type_name ~value:s
in
if Intervals.contains integer interval then
Value.Integer integer
else
simple_type_error ~typ:type_name ~value:s
;;
let validate_nonPositiveInteger =
validate_interval (Intervals.left (Intervals.mk "0")) "nonPositiveInteger"
;;
let validate_negativeInteger =
validate_interval (Intervals.left (Intervals.mk "-1")) "negativeInteger"
;;
let validate_nonNegativeInteger =
validate_interval (Intervals.right (Intervals.mk "0")) "nonNegativeInteger"
;;
let validate_positiveInteger =
validate_interval (Intervals.right (Intervals.mk "1")) "positiveInteger"
;;
let validate_bool = function
| "true" | "1" -> Value.vtrue
| "false" | "0" -> Value.vfalse
| v -> simple_type_error ~typ:"boolean" ~value:v
;;
let parse_sign = function "+" | "" -> Value.vtrue | _ -> Value.vfalse ;;
let parse_date =
let rex = Pcre.regexp (add_limits date_RE_raw) in
fun s ->
let abort () = simple_type_error ~typ:"date" ~value:s in
let subs = try Pcre.extract ~rex s with Not_found -> abort () in
[ "year", validate_integer subs.(1);
"month", validate_integer subs.(2);
"day", validate_integer subs.(3) ]
;;
let parse_time =
let rex = Pcre.regexp (add_limits time_RE_raw) in
fun s ->
let abort () = simple_type_error ~typ:"time" ~value:s in
let subs = try Pcre.extract ~rex s with Not_found -> abort () in
[ "hour", validate_integer subs.(1);
"minute", validate_integer subs.(2);
"second", validate_integer subs.(3) ]
;;
let parse_timezone =
let rex = Pcre.regexp (add_limits timezone_RE_raw) in
fun s ->
let abort () =
raise (XSI_validation_error (sprintf "'%s' isn't a valid timezone" s))
in
let subs = try Pcre.extract ~rex s with Not_found -> abort () in
match subs.(1) with
| "Z" ->
["positive", Value.vtrue;
"hour", validate_integer "0";
"minute", validate_integer "0"]
| _ ->
["positive", parse_sign subs.(3);
"hour", validate_integer subs.(4);
"minute", validate_integer subs.(5)]
;;
(* parse a timezone from a string, if it's empty return the empty list,
otherwise return a list containing a pair <"timezone", timezone value> *)
let parse_timezone' = function
| "" -> []
| v -> [ "timezone", Value.vrecord (parse_timezone v) ]
;;
let validate_duration =
let rex = Pcre.regexp
"^([+-])?P((\\d+)Y)?((\\d+)M)?((\\d+)D)?(T((\\d+)H)?((\\d+)M)?((\\d+)S)?)?$"
in
fun s ->
let abort () = simple_type_error ~typ:"duration" ~value:s in
let subs = try Pcre.extract ~rex s with Not_found -> abort () in
try
let fields =
["positive", parse_sign subs.(1) ] @
(match subs.(3) with "" -> [] | v -> ["year", validate_integer v]) @
(match subs.(5) with "" -> [] | v -> ["month", validate_integer v]) @
(match subs.(7) with "" -> [] | v -> ["day", validate_integer v]) @
(match subs.(10) with "" -> [] | v -> ["hour", validate_integer v]) @
(match subs.(12) with "" -> [] | v -> ["minute", validate_integer v]) @
(match subs.(14) with "" -> [] | v -> ["second", validate_integer v])
in
Value.vrecord fields
with XSI_validation_error _ -> abort ()
;;
let validate_dateTime =
let rex = Pcre.regexp (sprintf "^([+-])?(%s)T(%s)(%s)?$"
(strip_parens date_RE_raw) (strip_parens time_RE_raw)
(strip_parens timezone_RE_raw))
in
fun s ->
let abort () = simple_type_error ~typ:"dateTime" ~value:s in
let subs = try Pcre.extract ~rex s with Not_found -> abort () in
try
let fields =
[ "positive", parse_sign subs.(1) ] @
parse_date subs.(2) @
parse_time subs.(3) @
parse_timezone' subs.(4)
in
Value.vrecord fields
with XSI_validation_error _ -> abort ()
;;
let validate_gYearMonth =
let rex = Pcre.regexp (add_limits gYearMonth_RE_raw) in
fun s ->
let abort () = simple_type_error ~typ:"gYearMonth" ~value:s in
let subs = try Pcre.extract ~rex s with Not_found -> abort () in
try
let fields = [
"positive", parse_sign subs.(1);
"year", validate_integer subs.(2);
"month", validate_integer subs.(3)
] @ parse_timezone' subs.(4)
in
Value.vrecord fields
with XSI_validation_error _ -> abort ()
;;
let validate_gYear =
let rex = Pcre.regexp (add_limits gYear_RE_raw) in
fun s ->
let abort () = simple_type_error ~typ:"gYear" ~value:s in
let subs = try Pcre.extract ~rex s with Not_found -> abort () in
try
let fields = [
"positive", parse_sign subs.(1);
"year", validate_integer subs.(2);
] @ parse_timezone' subs.(3)
in
Value.vrecord fields
with XSI_validation_error _ -> abort ()
;;
let validate_gMonthDay =
let rex = Pcre.regexp (add_limits gMonthDay_RE_raw) in
fun s ->
let abort () = simple_type_error ~typ:"gMonthDay" ~value:s in
let subs = try Pcre.extract ~rex s with Not_found -> abort () in
try
let fields = [
"month", validate_integer subs.(1);
"day", validate_integer subs.(2);
] @ parse_timezone' subs.(3)
in
Value.vrecord fields
with XSI_validation_error _ -> abort ()
;;
let validate_gDay =
let rex = Pcre.regexp (add_limits gDay_RE_raw) in
fun s ->
let abort () = simple_type_error ~typ:"gDay" ~value:s in
let subs = try Pcre.extract ~rex s with Not_found -> abort () in
try
let fields =
("day", validate_integer subs.(1)) :: (parse_timezone' subs.(2))
in
Value.vrecord fields
with XSI_validation_error _ -> abort ()
;;
let validate_gMonth =
let rex = Pcre.regexp (add_limits gMonth_RE_raw) in
fun s ->
let abort () = simple_type_error ~typ:"gMonth" ~value:s in
let subs = try Pcre.extract ~rex s with Not_found -> abort () in
try
let fields =
("month", validate_integer subs.(1)) :: (parse_timezone' subs.(2))
in
Value.vrecord fields
with XSI_validation_error _ -> abort ()
;;
let validate_time =
let rex = Pcre.regexp (sprintf "^(%s)(%s)?$" (strip_parens time_RE_raw)
(strip_parens timezone_RE_raw))
in
fun s ->
let abort () = simple_type_error ~typ:"time" ~value:s in
let subs = try Pcre.extract ~rex s with Not_found -> abort () in
try
let fields =
parse_time subs.(1) @
(match subs.(2) with
| "" -> []
| v -> [ "timezone", Value.vrecord (parse_timezone v) ])
in
Value.vrecord fields
with XSI_validation_error _ -> abort ()
;;
let validate_date =
let rex = Pcre.regexp (sprintf "^(-)?(%s)(%s)?$" (strip_parens date_RE_raw)
(strip_parens timezone_RE_raw))
in
fun s ->
let abort () = simple_type_error ~typ:"date" ~value:s in
let subs = try Pcre.extract ~rex s with Not_found -> abort () in
try
let fields =
[ "positive", parse_sign subs.(1) ] @
parse_date subs.(2) @
(match subs.(3) with
| "" -> []
| v -> [ "timezone", Value.vrecord (parse_timezone v) ])
in
Value.vrecord fields
with XSI_validation_error _ -> abort ()
;;
let validate_hexBinary s =
let len = String.length s in
if len mod 2 <> 0 then
simple_type_error ~typ:"hexBinary" ~value:s;
let res = String.create (len / 2) in
let rec aux idx =
if idx < len then begin
String.unsafe_set res (idx / 2)
(char_of_hex (String.unsafe_get s idx) (String.unsafe_get s (idx + 1)));
aux (idx + 2)
end
in
aux 0;
validate_string res
;;
(* TODO test base64Binary simple type! *)
let validate_base64Binary s = validate_string (Netencoding.Base64.decode s) ;;
let validate_anyURI s =
try
validate_string (Neturl.string_of_url (Neturl.url_of_string
Neturl.ip_url_syntax s))
with Neturl.Malformed_URL -> simple_type_error ~typ:"anyURI" ~value:s
let builtins = [
"xsd:string", (Builtin_defs.string, validate_string);
"xsd:integer", (Builtin_defs.int, validate_integer);
"xsd:nonPositiveInteger",
(nonPositiveInteger_type, validate_nonPositiveInteger);
"xsd:negativeInteger", (negativeInteger_type, validate_negativeInteger);
"xsd:nonNegativeInteger",
(nonNegativeInteger_type, validate_nonNegativeInteger);
"xsd:positiveInteger", (positiveInteger_type, validate_positiveInteger);
"xsd:boolean", (Builtin_defs.bool, validate_bool);
"xsd:hexBinary", (Builtin_defs.string, validate_hexBinary);
"xsd:base64Binary", (Builtin_defs.string, validate_base64Binary);
"xsd:anyURI", (Builtin_defs.string, validate_anyURI);
(* TODO anyType: is this useful? *)
"xsd:anyType", (Types.any, (fun _ -> assert false));
(* TODO anySimpleType: is ok as a string? *)
"xsd:anySimpleType", (Builtin_defs.string, validate_string);
"xsd:duration", (duration_type, validate_duration);
"xsd:dateTime", (dateTime_type, validate_dateTime);
"xsd:time", (time_type, validate_time);
"xsd:date", (date_type, validate_date);
"xsd:gYearMonth", (gYearMonth_type, validate_gYearMonth);
"xsd:gYear", (gYear_type, validate_gYear);
"xsd:gMonthDay", (gMonthDay_type, validate_gMonthDay);
"xsd:gDay", (gDay_type, validate_gDay);
"xsd:gMonth", (gMonth_type, validate_gMonth);
] ;;
(* module's interface implementation *)
let names = List.sort compare (List.map fst builtins) ;;
let len = List.length builtins ;;
let cd_types = Hashtbl.create len ;;
let validators = Hashtbl.create len ;;
let __validate_fun_of_builtin = Hashtbl.find validators ;;
let cd_type_of_builtin = Hashtbl.find cd_types ;;
let fill () =
List.iter
(fun (name, (typ, validator)) ->
Hashtbl.add cd_types name typ;
Hashtbl.add validators name validator)
builtins
;;
fill () ;;
val names: string list
val cd_type_of_builtin: string -> Types.descr
(* exporting some validation functions that are useful while parsing facets,
the other are accessible using the __validate_fun_of_builtin function *)
val validate_nonNegativeInteger: string -> Value.t
val validate_positiveInteger: string -> Value.t
(** not to be used: use Schema_validator.validator_of_simple_type instead *)
val __validate_fun_of_builtin: string -> (string -> Value.t)
open Printf ;;
open Pxp_document ;;
open Schema_types ;;
(* TODO when looking for xsd:{restriction,extension,...} has_element is used so
actually is possible that both more of them are provided.
IDEA: validate schema document using DTD for Schemas? *)
exception Not_implemented of string ;;
let debug = true ;;
let hashtbl_values tbl = Hashtbl.fold (fun _ valu acc -> valu :: acc) tbl [] ;;
let rec filter_out_none = function (* not tail recursive *)
| [] -> []
| Some v :: tl -> v :: filter_out_none tl
| None :: tl -> filter_out_none tl
;;
let get_facet_nodes n = [] (* TODO facets *) ;;
let restrict_simple_type base_type_def facets = base_type_def ;;
let get_minOccurs n =
try
int_of_string n#extension#minOccurs
with Not_found -> 1
;;
let get_maxOccurs n =
try
(match n#extension#maxOccurs with
| "unbounded" -> None
| s -> Some (int_of_string s))
with Not_found -> Some 1
;;
let content_type_of_def = function
| S def -> CT_simple def
| C (CBuilt_in _) -> assert false
| C (CUser_defined (_, _, _, _, ct)) -> ct
;;
let parse_facet resolver base_type_def n =
let validate_base_type =
Schema_validator.validate_simple_type base_type_def
in
let value =
try
n#extension#value
with Not_found ->
raise (XSD_validation_error "Missing required 'value' attribute")
in
let fixed =
try
bool_of_string n#extension#fixed
with
| Not_found -> false
| Invalid_argument "bool_of_string" ->
raise (XSD_validation_error (sprintf
"Invalid value for 'fixed' attribute: '%s'" n#extension#fixed))
in
match n#node_type with
| T_element "xsd:length" ->
let length =
Value.get_int (Schema_builtin.validate_nonNegativeInteger value)
in
F_length (length, fixed)
| T_element "xsd:minLength" ->
let length =
Value.get_int (Schema_builtin.validate_nonNegativeInteger value)
in
F_minLength (length, fixed)
| T_element "xsd:maxLength" ->
let length =
Value.get_int (Schema_builtin.validate_nonNegativeInteger value)
in
F_maxLength (length, fixed)
| T_element "xsd:pattern" -> (* TODO Schema regexp <> PCRE regexp :-(( *)
F_pattern (Pcre.regexp value)
| T_element "xsd:enumeration" ->
F_enumeration (ValueSet.singleton (validate_base_type value))
| T_element "xsd:whiteSpace" ->
F_whiteSpace
((match value with
| "collapse" -> WS_collapse
| "preserve" -> WS_preserve
| "replace" -> WS_replace
| _ ->
raise (XSD_validation_error (sprintf
"'%s' isn't a valid whiteSpace value" value))),
fixed)
| T_element "xsd:maxInclusive" ->
F_maxInclusive (validate_base_type value, fixed)
| T_element "xsd:maxExclusive" ->
F_maxExclusive (validate_base_type value, fixed)
| T_element "xsd:minInclusive" ->
F_minInclusive (validate_base_type value, fixed)
| T_element "xsd:minExclusive" ->
F_minExclusive (validate_base_type value, fixed)
| T_element "xsd:totalDigits" ->
let digits =
Value.get_int (Schema_builtin.validate_positiveInteger value)
in
F_totalDigits (digits, fixed)
| T_element "xsd:fractionDigits" ->
let digits =
Value.get_int (Schema_builtin.validate_nonNegativeInteger value)
in
F_fractionDigits (digits, fixed)
| T_element unexpected ->
raise (XSD_validation_error (sprintf "'%s' isn't a valid facet element"
unexpected))
| _ -> assert false
;;