Commit 5c728dda authored by Pietro Abate's avatar Pietro Abate
Browse files

[r2003-06-13 10:02:51 by cvscast] - removed unneeded refs from Schema_types

- use iter_xml instead of explode_rev to generate streams of PXP
  events from a CDuce value

Original author: cvscast
Date: 2003-06-13 10:02:51+00:00
parent b1974d71
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<!-- mutual recursion between two elements -->
<xsd:element name="foo">
<xsd:complexType>
<xsd:choice>
......
......@@ -3,15 +3,28 @@
VALIDATE="./validate"
ROOT="schema/regtest"
LOG="$ROOT/regtest.log"
if [ -f "$ROOT/$1.xsd" ]; then
$VALIDATE $ROOT/$1.xsd $ROOT/$1.xml
if [ -f "$ROOT/$1" ]; then
if (echo $1 | grep "\.xsd" > /dev/null) then
$VALIDATE $ROOT/$1 $ROOT/`echo "$1" | sed 's/xsd$/xml/'`
elif (echo $1 | grep "\.cd" > /dev/null) then
./cduce $ROOT/$1
else
echo "Don't know what to do with '$1'."
fi
else
touch $LOG
date | tee $LOG
for f in $ROOT/*.xsd; do
for f in $ROOT/*.xsd; do # test each .xsd using "validate"
echo -n "`basename $f` ... " | tee -a $LOG
if ($VALIDATE $f `echo "$f" | sed 's/xsd$/xml/'` > /dev/null 2>&1);
then
if ($VALIDATE $f `echo "$f" | sed 's/xsd$/xml/'` > /dev/null 2>&1); then
echo "OK" | tee -a $LOG
else
echo "FAILURE" | tee -a $LOG
fi
done
for s in $ROOT/*.cd; do # run CDuce scripts
echo -n "`basename $s` ... " | tee -a $LOG
if (./cduce $s > /dev/null 2>&1); then
echo "OK" | tee -a $LOG
else
echo "FAILURE" | tee -a $LOG
......
open Printf ;;
open Schema_types ;;
open Printf
open Schema_types
(* TODO dates: boundary checks (e.g. 95/26/2003) *)
(* TODO a lot of almost cut-and-paste code, expecially in gFoo types validation
......@@ -17,33 +17,31 @@ let char_of_hex =
in
(* most significative, least significative *)
fun ms ls -> Char.unsafe_chr (int_of_hex_char ms * 16 + int_of_hex_char ls)
;;
let strip_parens s = Pcre.replace ~pat:"[()]" s ;;
let add_limits s = "^" ^ s ^ "$" ;;
let strip_parens s = Pcre.replace ~pat:"[()]" s
let add_limits s = "^" ^ s ^ "$"
let simple_type_error ~typ ~value =
raise (XSI_validation_error (sprintf "'%s' isn't a valid xsd:%s" value typ))
;;
(* regular expressions used to validate built-in types *)
let timezone_RE_raw = "(Z)|(([+-])?(\\d{2}):(\\d{2}))" ;;
let date_RE_raw = "(\\d{4,})-(\\d{2})-(\\d{2})" ;;
let time_RE_raw = "(\\d{2}):(\\d{2}):(\\d{2})" ;;
let gYearMonth_RE_raw = sprintf "(-)?(\\d{4,})-(\\d{2})(%s)?" timezone_RE_raw ;;
let gYear_RE_raw = sprintf "(-)?(\\d{4,})(%s)?" timezone_RE_raw ;;
let gMonthDay_RE_raw = sprintf "--(\\d{2})-(\\d{2})(%s)?" timezone_RE_raw ;;
let gDay_RE_raw = sprintf "---(\\d{2})(%s)?" timezone_RE_raw ;;
let gMonth_RE_raw = "--(\\d{2})--(%s)?" ;;
let positive_field = false, "positive", Builtin_defs.bool ;;
let year_field = false, "year", Builtin_defs.int ;;
let month_field = false, "month", Builtin_defs.int ;;
let day_field = false, "day", Builtin_defs.int ;;
let hour_field = false, "hour", Builtin_defs.int ;;
let minute_field = false, "minute", Builtin_defs.int ;;
let timezone_RE_raw = "(Z)|(([+-])?(\\d{2}):(\\d{2}))"
let date_RE_raw = "(\\d{4,})-(\\d{2})-(\\d{2})"
let time_RE_raw = "(\\d{2}):(\\d{2}):(\\d{2})"
let gYearMonth_RE_raw = sprintf "(-)?(\\d{4,})-(\\d{2})(%s)?" timezone_RE_raw
let gYear_RE_raw = sprintf "(-)?(\\d{4,})(%s)?" timezone_RE_raw
let gMonthDay_RE_raw = sprintf "--(\\d{2})-(\\d{2})(%s)?" timezone_RE_raw
let gDay_RE_raw = sprintf "---(\\d{2})(%s)?" timezone_RE_raw
let gMonth_RE_raw = "--(\\d{2})--(%s)?"
let positive_field = false, "positive", Builtin_defs.bool
let year_field = false, "year", Builtin_defs.int
let month_field = false, "month", Builtin_defs.int
let day_field = false, "day", Builtin_defs.int
let hour_field = false, "hour", Builtin_defs.int
let minute_field = false, "minute", Builtin_defs.int
let second_field = false, "second", Builtin_defs.int ;; (* TODO this should be a decimal *)
(* some cduce types corresponding to schema ones *)
......@@ -58,45 +56,43 @@ let duration_type = Types.rec_of_list' [
true, "hour", Builtin_defs.int;
true, "minute", Builtin_defs.int;
true, "second", Builtin_defs.int; (* TODO this should be a decimal *)
] ;;
]
let timezone_type = Types.rec_of_list' [
false, "positive", Builtin_defs.bool;
hour_field; minute_field
] ;;
]
let timezone_type_fields = [ true, "timezone", timezone_type ] ;;
let time_type_fields = [ hour_field; minute_field; second_field ] ;;
let date_type_fields = [ year_field; month_field; day_field ] ;;
let timezone_type_fields = [ true, "timezone", timezone_type ]
let time_type_fields = [ hour_field; minute_field; second_field ]
let date_type_fields = [ year_field; month_field; day_field ]
let time_type = Types.rec_of_list' (time_type_fields @ timezone_type_fields) ;;
let date_type = Types.rec_of_list' (positive_field :: date_type_fields) ;;
let time_type = Types.rec_of_list' (time_type_fields @ timezone_type_fields)
let date_type = Types.rec_of_list' (positive_field :: date_type_fields)
let dateTime_type =
Types.rec_of_list' (positive_field ::
(date_type_fields @ time_type_fields @ timezone_type_fields))
;;
let gYearMonth_type = Types.rec_of_list' [
positive_field; year_field; month_field
] ;;
let gYear_type = Types.rec_of_list' [ positive_field; year_field ] ;;
let gMonthDay_type = Types.rec_of_list' [ month_field; day_field ] ;;
let gDay_type = Types.rec_of_list' [ day_field ] ;;
let gMonth_type = Types.rec_of_list' [ month_field ] ;;
]
let gYear_type = Types.rec_of_list' [ positive_field; year_field ]
let gMonthDay_type = Types.rec_of_list' [ month_field; day_field ]
let gDay_type = Types.rec_of_list' [ day_field ]
let gMonth_type = Types.rec_of_list' [ month_field ]
let nonPositiveInteger_type = Builtin_defs.non_pos_int ;;
let negativeInteger_type = Builtin_defs.neg_int ;;
let nonNegativeInteger_type = Builtin_defs.non_neg_int ;;
let positiveInteger_type = Builtin_defs.pos_int ;;
let nonPositiveInteger_type = Builtin_defs.non_pos_int
let negativeInteger_type = Builtin_defs.neg_int
let nonNegativeInteger_type = Builtin_defs.non_neg_int
let positiveInteger_type = Builtin_defs.pos_int
(* validation functions: string -> Value.t *)
let validate_string = Value.string_latin1 ;;
let validate_string = Value.string_latin1
let validate_integer s =
try
Value.Integer (Intervals.mk s)
with Failure _ -> simple_type_error ~typ:"integer" ~value:s
;;
let validate_interval interval type_name s =
let integer =
try
......@@ -107,27 +103,21 @@ let validate_interval interval type_name s =
Value.Integer integer
else
simple_type_error ~typ:type_name ~value:s
;;
let validate_nonPositiveInteger =
validate_interval (Intervals.left (Intervals.mk "0")) "nonPositiveInteger"
;;
let validate_negativeInteger =
validate_interval (Intervals.left (Intervals.mk "-1")) "negativeInteger"
;;
let validate_nonNegativeInteger =
validate_interval (Intervals.right (Intervals.mk "0")) "nonNegativeInteger"
;;
let validate_positiveInteger =
validate_interval (Intervals.right (Intervals.mk "1")) "positiveInteger"
;;
let validate_bool = function
| "true" | "1" -> Value.vtrue
| "false" | "0" -> Value.vfalse
| v -> simple_type_error ~typ:"boolean" ~value:v
;;
let parse_sign = function "+" | "" -> Value.vtrue | _ -> Value.vfalse ;;
let parse_sign = function "+" | "" -> Value.vtrue | _ -> Value.vfalse
let parse_date =
let rex = Pcre.regexp (add_limits date_RE_raw) in
......@@ -137,7 +127,6 @@ let parse_date =
[ "year", validate_integer subs.(1);
"month", validate_integer subs.(2);
"day", validate_integer subs.(3) ]
;;
let parse_time =
let rex = Pcre.regexp (add_limits time_RE_raw) in
......@@ -147,7 +136,6 @@ let parse_time =
[ "hour", validate_integer subs.(1);
"minute", validate_integer subs.(2);
"second", validate_integer subs.(3) ]
;;
let parse_timezone =
let rex = Pcre.regexp (add_limits timezone_RE_raw) in
......@@ -165,13 +153,11 @@ let parse_timezone =
["positive", parse_sign subs.(3);
"hour", validate_integer subs.(4);
"minute", validate_integer subs.(5)]
;;
(* parse a timezone from a string, if it's empty return the empty list,
otherwise return a list containing a pair <"timezone", timezone value> *)
let parse_timezone' = function
| "" -> []
| v -> [ "timezone", Value.vrecord (parse_timezone v) ]
;;
let validate_duration =
let rex = Pcre.regexp
......@@ -192,7 +178,6 @@ let validate_duration =
in
Value.vrecord fields
with XSI_validation_error _ -> abort ()
;;
let validate_dateTime =
let rex = Pcre.regexp (sprintf "^([+-])?(%s)T(%s)(%s)?$"
......@@ -211,7 +196,6 @@ let validate_dateTime =
in
Value.vrecord fields
with XSI_validation_error _ -> abort ()
;;
let validate_gYearMonth =
let rex = Pcre.regexp (add_limits gYearMonth_RE_raw) in
......@@ -227,7 +211,6 @@ let validate_gYearMonth =
in
Value.vrecord fields
with XSI_validation_error _ -> abort ()
;;
let validate_gYear =
let rex = Pcre.regexp (add_limits gYear_RE_raw) in
......@@ -242,7 +225,6 @@ let validate_gYear =
in
Value.vrecord fields
with XSI_validation_error _ -> abort ()
;;
let validate_gMonthDay =
let rex = Pcre.regexp (add_limits gMonthDay_RE_raw) in
......@@ -257,7 +239,6 @@ let validate_gMonthDay =
in
Value.vrecord fields
with XSI_validation_error _ -> abort ()
;;
let validate_gDay =
let rex = Pcre.regexp (add_limits gDay_RE_raw) in
......@@ -270,7 +251,6 @@ let validate_gDay =
in
Value.vrecord fields
with XSI_validation_error _ -> abort ()
;;
let validate_gMonth =
let rex = Pcre.regexp (add_limits gMonth_RE_raw) in
......@@ -283,7 +263,6 @@ let validate_gMonth =
in
Value.vrecord fields
with XSI_validation_error _ -> abort ()
;;
let validate_time =
let rex = Pcre.regexp (sprintf "^(%s)(%s)?$" (strip_parens time_RE_raw)
......@@ -301,7 +280,6 @@ let validate_time =
in
Value.vrecord fields
with XSI_validation_error _ -> abort ()
;;
let validate_date =
let rex = Pcre.regexp (sprintf "^(-)?(%s)(%s)?$" (strip_parens date_RE_raw)
......@@ -320,7 +298,6 @@ let validate_date =
in
Value.vrecord fields
with XSI_validation_error _ -> abort ()
;;
let validate_hexBinary s =
let len = String.length s in
......@@ -336,10 +313,9 @@ let validate_hexBinary s =
in
aux 0;
validate_string res
;;
(* TODO test base64Binary simple type! *)
let validate_base64Binary s = validate_string (Netencoding.Base64.decode s) ;;
let validate_base64Binary s = validate_string (Netencoding.Base64.decode s)
let validate_anyURI s =
try
......@@ -373,16 +349,16 @@ let builtins = [
"xsd:gMonthDay", (gMonthDay_type, validate_gMonthDay);
"xsd:gDay", (gDay_type, validate_gDay);
"xsd:gMonth", (gMonth_type, validate_gMonth);
] ;;
]
(* module's interface implementation *)
let names = List.sort compare (List.map fst builtins) ;;
let len = List.length builtins ;;
let cd_types = Hashtbl.create len ;;
let validators = Hashtbl.create len ;;
let __validate_fun_of_builtin = Hashtbl.find validators ;;
let cd_type_of_builtin = Hashtbl.find cd_types ;;
let names = List.sort compare (List.map fst builtins)
let len = List.length builtins
let cd_types = Hashtbl.create len
let validators = Hashtbl.create len
let __validate_fun_of_builtin = Hashtbl.find validators
let cd_type_of_builtin = Hashtbl.find cd_types
let fill () =
List.iter
......@@ -390,7 +366,6 @@ let fill () =
Hashtbl.add cd_types name typ;
Hashtbl.add validators name validator)
builtins
;;
fill () ;;
let _ = fill ()
open Printf ;;
open Pxp_document ;;
open Schema_types ;;
open Printf
open Pxp_document
open Schema_types
(* TODO when looking for xsd:{restriction,extension,...} has_element is used so
actually is possible that both more of them are provided.
IDEA: validate schema document using DTD for Schemas? *)
exception Not_implemented of string ;;
exception Not_implemented of string
let debug = true ;;
let debug = true
let hashtbl_values tbl = Hashtbl.fold (fun _ valu acc -> valu :: acc) tbl [] ;;
let hashtbl_values tbl = Hashtbl.fold (fun _ valu acc -> valu :: acc) tbl []
let rec filter_out_none = function (* not tail recursive *)
| [] -> []
| Some v :: tl -> v :: filter_out_none tl
| None :: tl -> filter_out_none tl
;;
let get_facet_nodes n = [] (* TODO facets *) ;;
let restrict_simple_type base_type_def facets = base_type_def ;;
let get_facet_nodes n = [] (* TODO facets *)
let restrict_simple_type base_type_def facets = base_type_def
let get_minOccurs n =
try
int_of_string n#extension#minOccurs
with Not_found -> 1
;;
let get_maxOccurs n =
try
(match n#extension#maxOccurs with
| "unbounded" -> None
| s -> Some (int_of_string s))
with Not_found -> Some 1
;;
let content_type_of_def = function
| S def -> CT_simple def
| C (CBuilt_in _) -> assert false
| C (CUser_defined (_, _, _, _, _, ct)) -> ct
;;
let parse_facet resolver base_type_def n =
let validate_base_type =
......@@ -110,13 +106,11 @@ let parse_facet resolver base_type_def n =
raise (XSD_validation_error (sprintf "'%s' isn't a valid facet element"
unexpected))
| _ -> assert false
;;
let parse_simple_type resolver n =
assert (n#node_type = T_element "xsd:simpleType");
SBuilt_in "FAKE" (* TODO facets *)
(* FINQUI *)
;;
(** @return a value_constraint option from a attribute node *)
let constr_of_attr_node n validate =
......@@ -135,7 +129,6 @@ let constr_of_attr_node n validate =
with XSI_validation_error _ ->
raise (XSD_validation_error ("Invalid value for constraint on \
attribute " ^ n#extension#name))
;;
let parse_att_decl resolver n =
let name = n#extension#name in
......@@ -148,16 +141,17 @@ let parse_att_decl resolver n =
(try
(match !(resolver#resolve_typ n#extension#typ) with
| S st -> st
| C _ -> failwith "Attributes can only assume simple type values")
| C _ ->
raise (XSD_validation_error
"Attributes can only assume simple type values"))
with Not_found -> SBuilt_in "xsd:anySimpleType"))
in
let self_validate =
Schema_validator.validate_simple_type simple_type_def
in
let constr = constr_of_attr_node n self_validate in
name, ref simple_type_def, constr
name, simple_type_def, constr
| _ -> assert false (* you have to use parse_attribute_use *)
;;
(** @return an attribute_use option. None means that the attribute is
prohibited *)
......@@ -170,8 +164,8 @@ let parse_attribute_use resolver n =
else begin (* attribute not prohibited *)
let required = try n#extension#required with Not_found -> false in
if n#extension#has_attribute "ref" then begin (* "ref" attribute *)
let att_decl = !(resolver#resolve_att n#extension#ref) in
let simple_type_def = match att_decl with (_, std, _) -> !std in
let att_decl = resolver#resolve_att n#extension#ref in
let simple_type_def = match att_decl with (_, std, _) -> std in
let self_validate =
Schema_validator.validate_simple_type simple_type_def
in
......@@ -186,18 +180,19 @@ let parse_attribute_use resolver n =
(try
(match !(resolver#resolve_typ n#extension#typ) with
| S st -> st
| C _ -> failwith "Attributes can only assume simple type values")
| C _ ->
raise (XSD_validation_error
"Attributes can only assume simple type values"))
with Not_found -> SBuilt_in "xsd:anySimpleType"))
in
let self_validate =
Schema_validator.validate_simple_type simple_type_def
in
let att_decl = name, ref simple_type_def, None in
let att_decl = name, simple_type_def, None in
let constr = constr_of_attr_node n self_validate in
Some (required, att_decl, constr)
end
end
;;
(** @return a list of attribute uses from a xsd:restriction node wrt a base
type definition *)
......@@ -221,7 +216,6 @@ let attribute_uses_of_restriction ~resolver ~n ~base =
in
(* remove prohibited from embedded attribute list *)
filter_out_none (snd (List.split embedded)) @ from_base
;;
(** @return a list of attribute uses from a xsd:extension node wrt a base type
definition *)
......@@ -235,7 +229,6 @@ let attribute_uses_of_extension ~resolver ~n ~base =
| _ -> []
in
filter_out_none embedded @ from_base
;;
let counter = ref 0
......@@ -264,10 +257,10 @@ let rec parse_complex_type resolver n =
(find_element "xsd:simpleType" restriction)
with Not_found -> base
in
CT_simple (restrict_simple_type base (get_facet_nodes n))
CT_simple (restrict_simple_type base (get_facet_nodes n))
| _ -> assert false)
in
cuser_defined name base Restriction attribute_uses content_type
cuser_defined name !base Restriction attribute_uses content_type
end else if content#extension#has_element "xsd:extension" then begin
(* simpleContent, extension *)
let extension = find_element "xsd:extension" content in
......@@ -281,7 +274,7 @@ let rec parse_complex_type resolver n =
| S simple_type_def -> CT_simple simple_type_def
| _ -> assert false)
in
cuser_defined name base Extension attribute_uses content_type
cuser_defined name !base Extension attribute_uses content_type
end else
(* simpleContent, neither extension nor restriction *)
raise (XSD_validation_error "Neither <extension> nor <restriction> \
......@@ -303,11 +296,13 @@ let rec parse_complex_type resolver n =
(try content#extension#mixed with Not_found ->
(try restriction#extension#mixed with Not_found -> false))
in
CT_model
(parse_particle resolver restriction#extension#find_term, mixed)
(try
CT_model
(parse_particle resolver restriction#extension#find_term, mixed)
with Not_found -> raise (XSD_validation_error "Can't find term"))
end
in
cuser_defined name base Restriction attribute_uses content_type
cuser_defined name !base Restriction attribute_uses content_type
end else if content#extension#has_element "xsd:extension" then begin
(* complexContent, extension *)
let extension = find_element "xsd:extension" content in
......@@ -324,18 +319,22 @@ let rec parse_complex_type resolver n =
if extension#extension#has_no_term then
base_ct
else
let term = lazy (
try
extension#extension#find_term
with Not_found -> raise (XSD_validation_error ("Can't find term"))
) in
match base_ct with
| CT_empty ->
CT_model
(parse_particle resolver extension#extension#find_term, mixed)
CT_model (parse_particle resolver (Lazy.force term), mixed)
| CT_model (p, _) ->
CT_model
((1, Some 1, Sequence
(p::[parse_particle resolver extension#extension#find_term])),
((1, Some 1,
Sequence (p :: [parse_particle resolver (Lazy.force term)])),
mixed)
| _ -> assert false
| CT_simple _ -> assert false
in
cuser_defined name base Extension attribute_uses content_type
cuser_defined name !base Extension attribute_uses content_type
end else
(* complexContent, neither extension nor restriction *)
raise (XSD_validation_error "Neither <extension> nor <restriction> \
......@@ -355,7 +354,7 @@ let rec parse_complex_type resolver n =
CT_model (parse_particle resolver n#extension#find_term, mixed)
end
in
cuser_defined name base Restriction attribute_uses content_type
cuser_defined name !base Restriction attribute_uses content_type
end
and parse_elt_decl resolver n =
......@@ -403,28 +402,32 @@ and parse_particle resolver n =
Choice (List.map (parse_particle resolver) n#extension#find_terms)
| _ -> assert false
;;
module OrderedNode =
struct
type t = Schema_xml.schema_extension node
let compare = Pxp_document.compare
end
;;
module NodeSet = Set.Make (OrderedNode) ;;
module NodeSet = Set.Make (OrderedNode)
(* lazy resolver: resolve types/elements/attributes as soon as it encounter
references to them. DOESN'T WORK WITH RECURSIVE ENTITIES [ probably it loops ]
@param node schema document root node
*)
class lazy_resolver node =
class lazy_resolver =
let fake_type_def = C (CBuilt_in " FAKE TYP ") in
let fake_elt_decl = " FAKE ELT ", ref fake_type_def, None in
let is_fake_type_def = (=) fake_type_def in
let is_fake_elt_decl = (=) fake_elt_decl in
fun node ->
object (self)
val typs = Hashtbl.create 17
val attrs = Hashtbl.create 17
val elts = Hashtbl.create 17
val mutable seen_nodes = NodeSet.empty
initializer (* register built-in types *)
......@@ -435,7 +438,7 @@ class lazy_resolver node =
method private register_typ' node name def =
if Hashtbl.mem typs name then
failwith ("Redefinition of type: " ^ name);
raise (XSD_validation_error ("Redefinition of type: " ^ name));
if debug then
(Format.fprintf Format.std_formatter
"\nSchema_parser: registering TYPE %s:\n%a\n"
......@@ -446,7 +449,7 @@ class lazy_resolver node =
method private register_elt' node name decl =
if Hashtbl.mem elts name then
failwith ("Redefinition of element: " ^ name);
raise (XSD_validation_error ("Redefinition of element: " ^ name));
if debug then
(Format.fprintf Format.std_formatter
"\nSchema_parser: registering ELEMENT %s:\n%a\n"
......@@ -457,11 +460,11 @@ class lazy_resolver node =
method private register_att' node name decl =
if Hashtbl.mem attrs name then
failwith ("Redefinition of attribute: " ^ name);
raise (XSD_validation_error ("Redefinition of attribute: " ^ name));
if debug then
(Format.fprintf Format.std_formatter
"\nSchema_parser: registering ATTRIBUTE %s:\n%a\n"