Commit 9fb48689 authored by Pietro Abate's avatar Pietro Abate
Browse files

[r2003-11-26 16:23:06 by szach] first real life implementation of schema validations

Original author: szach
Date: 2003-11-26 16:23:06+00:00
parent 1ba2b247
......@@ -11,6 +11,52 @@ open Value
(** {2 Misc} *)
let empty_string = Value.string_utf8 (Utf8.mk "")
let hashtbl_is_empty tbl =
let empty = ref true in
(try
Hashtbl.iter (fun _ _ -> empty := false; raise Exit) tbl
with Exit -> ());
!empty
let string_of_value value =
let buf = Buffer.create 1024 in
let fmt = Format.formatter_of_buffer buf in
Value.print fmt value;
Buffer.contents buf
let foo_qname = Ns.empty, Utf8.mk ""
let ptbl_of_particles particles =
let tbl = Hashtbl.create 20 in
List.iter (* fill table *)
(* ASSUMPTION: firsts are disjoing as per UPA Schema constraint *)
(fun p ->
List.iter
(function None -> () | Some tag -> Hashtbl.add tbl tag p)
(first_of_particle p))
particles;
tbl
(** Validation context *)
class type validation_context =
object
(* if ns isn't given, targetNamespace of the schema is used *)
method expect_start_tag: ?ns:Ns.t -> Utf8.t -> unit
method expect_end_tag: ?ns:Ns.t -> Utf8.t -> unit
method expect_any_start_tag: Ns.qname
method expect_any_end_tag: Ns.qname
method get_string: Utf8.t
method junk: unit
method peek: event
method set_mixed: bool -> unit
method mixed: bool
method ns: Ns.t
end
let validation_error ?context s = raise (XSI_validation_error s)
let validation_error_exemplar = XSI_validation_error ""
......@@ -33,13 +79,6 @@ let rec tries funs exn arg =
let space_RE = pcre_regexp " "
let split = pcre_split ~rex:space_RE
(** Validation context *)
class type validation_context =
object
method expect_start_tag: Utf8.t -> unit
method expect_end_tag: Utf8.t -> unit
end
(** {2 Facets validation} *)
module Schema_facets:
......@@ -179,57 +218,393 @@ let rec validate_simple_type def v =
Schema_facets.facets_valid facets value;
value)
(*
let validate_element context decl =
let name = name_of_element_declaration decl in
(* wrapper for validate_simple_type which works on contexts *)
let validate_simple_type_wrapper context st_def =
validate_simple_type st_def (Value.string_utf8 context#get_string)
(** {2 Complex type validation} *)
let rec validate_any_type (context: validation_context) =
(* assumption: attribute events (if any) come first *)
let attrs = ref [] in
let cont = ref [] in
let rec aux () =
match context#peek with
| E_start_tag (ns, name) ->
context#junk;
let (attrs, content) = validate_any_type context in
let element =
Value.Xml (Value.Atom (Atoms.V.mk ns name), attrs, content)
in
context#expect_end_tag ~ns name;
cont := element :: !cont;
aux ()
| E_end_tag _ -> (Value.vrecord !attrs, Value.sequence (List.rev !cont))
| E_attribute (qname, value) ->
context#junk;
attrs := (qname, Value.string_utf8 value) :: !attrs;
aux ()
| E_char_data utf8_data ->
context#junk;
cont := Value.string_utf8 utf8_data :: !cont;
aux ()
in
aux ()
let check_fixed ~context fixed value =
if not (Value.equal fixed value) then
validation_error ~context (sprintf "Expected fixed value: %s; found %s"
(string_of_value fixed) (string_of_value value))
let validate_attribute_uses context attr_uses =
let tbl = Hashtbl.create 11 in
List.iter
(fun use -> Hashtbl.add tbl (Ns.empty, name_of_attribute_use use) use)
attr_uses;
let attrs = ref [] in
let rec aux () = (* look for attribute events and fill "attrs" *)
match context#peek with
| E_attribute (qname, value) ->
let (_, (_, st_def, _), constr) = (* attribute use *)
try
Hashtbl.find tbl qname
with Not_found ->
validation_error ~context (sprintf "Unexpected attribute: %s"
(Ns.QName.to_string qname))
in
let value = validate_simple_type st_def (Value.string_utf8 value) in
(match constr with (* check fixed constraint *)
| Some (`Fixed v) -> check_fixed ~context v value
| _ -> ());
attrs := (qname, value) :: !attrs;
Hashtbl.remove tbl qname;
context#junk;
aux ()
| _ -> ()
in
aux ();
Hashtbl.iter
(fun qname (required, _, constr) ->
if required then (* check for missing required attributes *)
validation_error ~context (sprintf "Required attribute %s is missing"
(Ns.QName.to_string qname))
else (* add default values *)
match constr with
| Some (`Default v) -> attrs := (qname, v) :: !attrs
| _ -> ())
tbl;
Value.vrecord !attrs
let rec validate_element (context: validation_context) decl =
let (_, name, type_def_ref, constr) = decl in
context#expect_start_tag name;
context#expect_end_tag name
let (attrs, content) = validate_type context !type_def_ref in
let content = (* use default if needed and check fixed constraints *)
match constr with
| Some (`Default v) when Value.equal content empty_string -> v
| Some (`Fixed v) ->
check_fixed ~context v content;
content
| _ -> content
in
let element =
Value.Xml (Value.Atom (Atoms.V.mk context#ns name), attrs, content)
in
context#expect_end_tag name;
element
and validate_type context = function
| AnyType -> validate_any_type (context :> validation_context)
| Simple st_def -> (Value.nil, validate_simple_type_wrapper context st_def)
| Complex ct_def ->
validate_complex_type (context :> validation_context) ct_def
class context ~value ~schema =
(** @return Value.t * Value.t (* attrs, content *) *)
and validate_complex_type context ct_def =
let (_, _, _, _, attr_uses, content_type) = ct_def in
let attrs = validate_attribute_uses context attr_uses in
let content = Value.sequence (validate_content_type context content_type) in
(attrs, content)
(** @return Value.t list *)
and validate_content_type context content_type =
match content_type with
| CT_empty -> []
| CT_simple st_def -> [ validate_simple_type_wrapper context st_def ]
| CT_model (particle, mixed) ->
context#set_mixed mixed;
validate_particle context particle
(** @return Value.t list *)
and validate_particle context particle =
let (min, max, term, first) = particle in
let content = ref [] in
let push v = content := v :: !content in
let rec validate_once ~cont_ok ~cont_failure =
match context#peek with
| E_start_tag (ns, tag) as event when Ns.equal ns context#ns ->
if is_in_first tag first then begin
List.iter push (validate_term context term);
cont_ok ()
end else
cont_failure event
| E_char_data utf8_data when context#mixed ->
push (Value.string_utf8 utf8_data);
context#junk;
validate_once ~cont_ok ~cont_failure
| ev -> cont_failure ev
in
let rec required = function
| v when Intervals.V.equal v Intervals.V.zero -> ()
| n (* when n > 0 *) ->
validate_once
~cont_ok:(fun () -> required (Intervals.V.pred n))
~cont_failure:(fun event ->
validation_error ~context (sprintf "Unexpected content: %s"
(string_of_event event)))
in
let rec optional = function
| None ->
validate_once
~cont_ok:(fun () -> optional None)
~cont_failure:(fun _ -> ())
| Some v when Intervals.V.equal v Intervals.V.zero -> ()
| Some n (* when n > 0 *) ->
validate_once
~cont_ok:(fun () -> optional (Some (Intervals.V.pred n)))
~cont_failure:(fun _ -> ())
in
let rec trailing_cdata () =
match context#peek with
| E_char_data utf8_data ->
push (Value.string_utf8 utf8_data);
context#junk;
trailing_cdata ()
| _ -> ()
in
required min;
optional
(match max with None -> None | Some v -> Some (Intervals.V.sub v min));
if context#mixed then trailing_cdata ();
List.rev !content
(** @return Value.t list *)
and validate_term context term =
match term with
| Elt elt_decl_ref -> [ validate_element context !elt_decl_ref ]
| Model model_group -> validate_model_group context model_group
(** @return (Value.t list * Utf8.t)
* 2nd value is the key for tbl that return the particle effectively used for
* validation *)
and validate_choice context tbl =
let backlog = ref [] in
let push v = backlog := v :: !backlog in
let rec next_tag () =
match context#peek with
| E_char_data utf8_data when context#mixed ->
push (Value.string_utf8 utf8_data);
context#junk;
next_tag ()
| E_char_data utf8_data (* when not context#mixed *) ->
validation_error ~context
(sprintf "Unexpected char data in non-mixed content: %s"
(Utf8.get_str utf8_data))
| E_start_tag qname -> qname
| ev ->
validation_error ~context
(sprintf "Unexpected content: %s" (string_of_event ev))
in
let (ns, tag) = next_tag () in
if Ns.equal ns context#ns then
try
let particle = Hashtbl.find tbl tag in
((List.rev !backlog) @ (validate_particle context particle), tag)
with Not_found ->
validation_error ~context (sprintf "Unexpected element %s"
(Ns.QName.to_string (ns, tag)))
else (* wrong namespace *)
validation_error ~context
(sprintf "Element from unexpected namespace: %s"
(Ns.QName.to_string (ns, tag)))
(** @return Value.t list *)
and validate_model_group context model_group =
match model_group with
| All particles ->
let tbl = ptbl_of_particles particles in
let contents = ref [] in
let rec aux () =
if hashtbl_is_empty tbl then
List.concat (List.rev !contents)
else begin
let (content, key) = validate_choice context tbl in
contents := content :: !contents;
Hashtbl.remove tbl key;
aux ()
end
in
aux ()
| Choice particles ->
fst (validate_choice context (ptbl_of_particles particles))
| Sequence particles ->
List.concat (List.map (validate_particle context) particles)
(** {2 Context implementation} *)
class context ~stream ~schema =
object (self)
val stream = stream_of_value value
val mutable mixed = false
method mixed = mixed
method set_mixed v = mixed <- v
method private next =
try
Stream.next stream
with Stream.Failure ->
self#error "Unexpected end of stream"
self#error "Unexpected end of stream";
(* just to cheat with the type checker, above function wont return *)
Stream.next stream
method peek =
match Stream.peek stream with
| None ->
self#error "Unexpected end of stream";
(* just to cheat with the type checker as above *)
Stream.next stream
| Some e -> e
method junk = Stream.junk stream
method get_string =
let buf = Buffer.create 1024 in
let rec aux () =
match self#peek with
| E_char_data data ->
Buffer.add_string buf (Utf8.get_str data);
self#junk;
aux ()
| _ -> Utf8.mk (Buffer.contents buf)
in
aux ()
method expect_start_tag name =
let expected = (schema.targetNamespace, name) in
method private error s = ignore (validation_error ~context:self s)
method expect_start_tag ?ns name =
let ns = match ns with Some ns -> ns | _ -> schema.targetNamespace in
let expected = (ns, name) in
match self#next with
| E_start_tag found ->
if not (Ns.QName.equal expected found) then
self#error (sprintf "Start tag error: expected %a, found %a"
Ns.QName.to_string expected Ns.QName.to_string found)
self#error (sprintf "Start tag error: expected %s, found %s"
(Ns.QName.to_string expected) (Ns.QName.to_string found))
| ev ->
self#error (sprintf "Expected start tag (%a), found %a"
NS.QName.to_string expected string_of_event ev)
method expect_end_tag name =
let expected = (schema.targetNamespace, name) in
self#error (sprintf "Expected start tag (%s), found %s"
(Ns.QName.to_string expected) (string_of_event ev))
method expect_end_tag ?ns name =
let ns = match ns with Some ns -> ns | _ -> schema.targetNamespace in
let expected = (ns, name) in
match self#next with
| E_end_tag found ->
if not (Ns.QName.equal expected found) then
self#error (sprintf "Start tag error: expected %a, found %a"
Ns.QName.to_string expected Ns.QName.to_string found)
self#error (sprintf "Start tag error: expected %s, found %s"
(Ns.QName.to_string expected) (Ns.QName.to_string found))
| ev ->
self#error (sprintf "Expected end tag (%a), found %a"
NS.QName.to_string expected string_of_event ev)
self#error (sprintf "Expected end tag (%s), found %s"
(Ns.QName.to_string expected) (string_of_event ev))
method expect_any_start_tag =
match self#next with
| E_start_tag tag -> tag
| ev ->
self#error (sprintf "Expected start tag, found %s"
(string_of_event ev));
foo_qname (* useless *)
method expect_any_end_tag =
match self#next with
| E_end_tag tag -> tag
| ev ->
self#error (sprintf "Expected end tag, found %s"
(string_of_event ev));
foo_qname (* useless *)
method ns = schema.targetNamespace
end
*)
(** {2 API} *)
(*
let validate_element decl schema value =
validate_element decl (new context ~value ~schema)
*)
let validate_type _ _ = assert false
let validate_attribute _ _ = assert false
let validate_element _ _ = assert false
let validate_attribute_group _ _ = assert false
let validate_model_group _ _ = assert false
validate_element (new context ~stream:(stream_of_value value) ~schema) decl
let validate_type def schema value =
match def with
| AnyType -> value (* shortcut *)
| Simple st_def ->
if not (is_str value) then
validation_error
"Only string values could be validate against simple types";
validate_simple_type st_def value (* shortcut *)
| Complex ct_def ->
let context = new context ~stream:(stream_of_value value) ~schema in
let start_tag = context#expect_any_start_tag in
let (attrs, content) = validate_complex_type context ct_def in
let end_tag = context#expect_any_end_tag in
assert (start_tag = end_tag);
let (ns, name) = start_tag in
Value.Xml (Value.Atom (Atoms.V.mk ns name), attrs, content)
let validate_attribute decl schema value =
(match value with
| Record _ -> ()
| _ ->
validation_error
"Only record values could be validated against attributes");
let (name, st_def, constr) = decl in
let qname = (schema.targetNamespace, name) in
let fields = Value.get_fields value in
let found = ref false in
let rec aux = function
| [] -> []
| (qname', value) :: rest when qname' = qname ->
(qname', validate_simple_type st_def value) :: aux rest
| field :: rest -> field :: aux rest
in
let fields = aux (Value.get_fields value) in
let fields =
if not !found then
match constr with
| Some (`Default v) -> (qname, v) :: fields
| _ ->
validation_error (sprintf
"Attribute %s was not found and no default value was provided"
(Ns.QName.to_string qname))
else
fields
in
Value.vrecord fields
let validate_attribute_group def schema value =
let (_, attr_uses) = def in
let stream =
match value with
| Record _ ->
Stream.of_list
(List.map
(fun (qname, v) ->
E_attribute (qname, fst (Value.get_string_utf8 v)))
(Value.get_fields value))
| _ ->
validation_error
"Only record values could be validated against attribute groups"
in
validate_attribute_uses (new context ~stream ~schema) attr_uses
let validate_model_group def schema value =
if not (Value.is_seq value) then
validation_error
"Only sequence values could be validated against model groups";
let stream =
stream_of_value (Value.Xml (Value.Absent, Value.Absent, value))
in
Stream.junk stream;
Value.sequence (validate_model_group (new context ~stream ~schema) (snd def))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment