Commit 529e07bc authored by Pietro Abate's avatar Pietro Abate
Browse files

[r2005-02-22 17:46:40 by afrisch] rework validator

Original author: afrisch
Date: 2005-02-22 17:46:40+00:00
parent 923baaa2
......@@ -29,40 +29,120 @@ let string_of_value value =
let foo_qname = Ns.empty, Utf8.mk ""
(** Validation context *)
class type validation_context =
object
(* if ns isn't given, targetNamespace of the schema is used *)
method expect_start_tag: Ns.qname -> unit
method expect_end_tag: Ns.qname -> unit
method expect_any_start_tag: Ns.qname
method expect_any_end_tag: Ns.qname
method get_string: Utf8.t
method junk: unit
method peek: event
method set_mixed: bool -> unit
method mixed: bool
end
let validation_error ?context s = raise (XSI_validation_error s)
let validation_error_exemplar = XSI_validation_error ""
let compare_exn e1 e2 =
(* comparison function on exceptions; include all validation error
* exceptions in an equivalence class *)
match e1, e2 with
| XSI_validation_error _, XSI_validation_error _ -> 0
| e1, e2 -> Pervasives.compare e1 e2
let rec tries funs exn arg =
type context = {
ctx_stream: event Stream.t;
ctx_schema: schema;
mutable ctx_mixed: bool;
mutable ctx_current: Value.t;
}
let subctx mixed ctx = { ctx with ctx_current = Value.nil; ctx_mixed = mixed }
let get ctx = ctx.ctx_current
let rec only_ws s i =
(i = 0) ||
(let i = pred i in match (String.unsafe_get s i) with
| ' ' | '\t' | '\n' | '\r' -> only_ws s i
| _ -> false)
let only_ws s =
let s = Utf8.get_str s in
only_ws s (String.length s)
let error s = raise (XSI_validation_error s)
let concat ctx v = ctx.ctx_current <- Value.concat ctx.ctx_current v
let append ctx v = ctx.ctx_current <- Value.append ctx.ctx_current v
let xml qname attrs content =
Value.Xml (Value.Atom (Atoms.V.of_qname qname), attrs, content)
let peek ctx =
match Stream.peek ctx.ctx_stream with
| None -> error "Unexpected end of stream"
| Some e -> e
let next ctx =
try Stream.next ctx.ctx_stream
with Stream.Failure -> error "Unexpected end of stream"
let junk ctx =
Stream.junk ctx.ctx_stream
let get_string ctx =
let b = Buffer.create 67 in
let rec aux () =
match peek ctx with
| E_char_data s ->
junk ctx;
Buffer.add_string b (Utf8.get_str s);
aux ()
| E_start_tag _ ->
error "XML element found in simple content"
| _ -> ()
in
aux ();
Utf8.mk (Buffer.contents b)
let rec copy_pcdata ctx =
match peek ctx with
| E_char_data s ->
junk ctx;
concat ctx (string_utf8 s);
copy_pcdata ctx
| _ -> ()
let rec ignore_ws ctx =
match peek ctx with
| E_char_data s when only_ws s ->
junk ctx;
ignore_ws ctx
| E_char_data _ ->
error "Unexpected char data in non-mixed content"
| _ -> ()
let do_pcdata ctx =
if ctx.ctx_mixed then copy_pcdata ctx else ignore_ws ctx
let expect_end_tag ctx =
match next ctx with
| E_end_tag _ -> ()
| ev -> error (sprintf "Expected end tag, found %s" (string_of_event ev))
let expect_start_tag ctx tag =
match next ctx with
| E_start_tag t when Ns.QName.equal t tag -> ()
| ev -> error (sprintf "Expected tag %s, found %s"
(Ns.QName.to_string tag) (string_of_event ev))
let expect_any_start_tag ctx =
match next ctx with
| E_start_tag t -> t
| ev -> error (sprintf "Expected start tag, found %s"
(string_of_event ev))
let get_attributes ctx =
let rec aux attrs =
match peek ctx with
| E_attribute (qname, value) ->
junk ctx;
aux ((qname,value)::attrs)
| _ -> attrs
in
aux []
let rec tries funs arg =
match funs with
| [] -> raise Not_found
| f :: tl ->
try
print_endline "try"; flush stdout;
f arg
with e when compare_exn e exn = 0 ->
tries tl exn arg
with XSI_validation_error _ ->
tries tl arg
let space_RE = pcre_regexp " "
let split = pcre_split ~rex:space_RE
......@@ -162,199 +242,166 @@ end
(** {2 Simple type validation} *)
let rec validate_simple_type def v =
let s =
match get_string_utf8 v with
| utf8_string, rest when rest |=| nil -> utf8_string
| _ -> validation_error "string expected"
in
let rec validate_simple_type def s =
match def with
| Primitive name | Derived (Some name, _, _, _)
when Schema_builtin.is_builtin name ->
(try
Schema_builtin.validate_builtin name s
with Schema_builtin.Schema_builtin_error name ->
validation_error (sprintf "%s isn't a valid %s"
error (sprintf "%s isn't a valid %s"
(Utf8.to_string s) name))
| Primitive _ -> assert false
| Derived (_, Atomic primitive, facets, base) ->
let literal = normalize_white_space (fst facets.whiteSpace) s in
let value = validate_simple_type_ref base(*primitive*)(*???*)
(string_utf8 literal) in
literal in
Schema_facets.facets_valid facets value;
value
| Derived (_, List item, facets, base) ->
let literal = normalize_white_space (fst facets.whiteSpace) s in
let items =
List.map (validate_simple_type_ref item)
(List.map string_utf8 (split literal))
in
let items = List.map (validate_simple_type_ref item) (split literal) in
let value = Value.sequence items in
Schema_facets.facets_valid facets value;
value
| Derived (_, Union members, facets, base) ->
let value = tries (List.map validate_simple_type_ref members)
validation_error_exemplar
(string_utf8 s) in
print_endline ("union:" ^ (Utf8.to_string s)); flush stdout;
let value = tries (List.map validate_simple_type_ref members) s in
print_endline "after union"; flush stdout;
Schema_facets.facets_valid facets value;
print_endline "ok"; flush stdout;
value
| Derived (_, Restrict, _,_) as st ->
(* TODO: compute the restriction statically ... *)
let st = normalize_simple_type st in
validate_simple_type st v
validate_simple_type st s
and validate_simple_type_ref def v =
validate_simple_type (get_simple_type def) v
and validate_simple_type_ref def s =
validate_simple_type (get_simple_type def) s
(* wrapper for validate_simple_type which works on contexts *)
let validate_simple_type_wrapper context st_def =
validate_simple_type st_def (string_utf8 context#get_string)
(* wrapper for validate_simple_type which works on contexts *)
let validate_simple_type_wrapper ctx st_def =
validate_simple_type st_def (get_string ctx)
(** {2 Complex type validation} *)
let rec validate_any_type (context: validation_context) =
let rec validate_any_type ctx =
(* assumption: attribute events (if any) come first *)
let attrs = ref [] in
let cont = ref Value.nil in
let rec aux () =
match context#peek with
let attrs = get_attributes ctx in
let attrs = List.map (fun (n,v) -> (n,Value.string_utf8 v)) attrs in
let ctx = subctx true ctx in
let rec aux attrs =
copy_pcdata ctx;
match peek ctx with
| E_start_tag qname ->
context#junk;
let (attrs, content) = validate_any_type context in
let element =
Value.Xml (Value.Atom (Atoms.V.of_qname qname), attrs, content)
in
context#expect_end_tag qname;
cont := Value.append !cont element;
aux ()
| E_end_tag _ -> (Value.vrecord !attrs, !cont)
| E_attribute (qname, value) ->
context#junk;
attrs := (qname, Value.string_utf8 value) :: !attrs;
aux ()
| E_char_data utf8_data ->
context#junk;
cont := Value.concat !cont (string_utf8 utf8_data);
junk ctx;
let (attrs, content) = validate_any_type ctx in
expect_end_tag ctx;
append ctx (xml qname attrs content);
aux ()
| E_end_tag _ -> ()
| _ -> assert false
in
aux ()
aux ();
(Value.vrecord attrs, get ctx)
let check_fixed ~context fixed value =
let check_fixed ~ctx fixed value =
if not (Value.equal fixed value) then
validation_error ~context (sprintf "Expected fixed value: %s; found %s"
error ~ctx (sprintf "Expected fixed value: %s; found %s"
(string_of_value fixed) (string_of_value value))
let next_pcdata context =
let rec aux accu =
match context#peek with
| E_char_data utf8_data when context#mixed ->
context#junk;
aux (Value.concat accu (string_utf8 utf8_data))
| E_char_data utf8_data ->
validation_error ~context
(sprintf "Unexpected char data in non-mixed content: %s"
(Utf8.get_str utf8_data))
| _ -> accu
in
aux Value.nil
let next_tag context =
match context#peek with
let next_tag ctx =
match peek ctx with
| E_start_tag qname -> qname
| _ -> raise Not_found
let validate_attribute_uses context attr_uses =
let validate_attribute_uses ctx attr_uses =
let tbl = QTable.create 11 in
List.iter
(fun use -> QTable.add tbl (name_of_attribute_use use) use)
attr_uses;
let attrs = ref [] in
let rec aux () = (* look for attribute events and fill "attrs" *)
match context#peek with
| E_attribute (qname, value) ->
let { attr_decl = { attr_typdef = st_def };
attr_use_cstr = constr } =
try QTable.find tbl qname
with Not_found ->
validation_error ~context (sprintf "Unexpected attribute: %s"
(Ns.QName.to_string qname))
in
let value = validate_simple_type_ref st_def (Value.string_utf8 value) in
(match constr with (* check fixed constraint *)
| Some (`Fixed v) -> check_fixed ~context (Lazy.force v) value
| _ -> ());
attrs := (qname, value) :: !attrs;
QTable.remove tbl qname;
context#junk;
aux ()
| _ -> ()
let attrs =
List.map
(fun (qname, value) ->
let { attr_decl = { attr_typdef = st_def };
attr_use_cstr = constr } =
try QTable.find tbl qname
with Not_found ->
error (sprintf "Unexpected attribute: %s"
(Ns.QName.to_string qname))
in
let value = validate_simple_type_ref st_def value in
(match constr with (* check fixed constraint *)
| Some (`Fixed v) -> check_fixed ~ctx (Lazy.force v) value
| _ -> ());
QTable.remove tbl qname;
(qname, value)
) (get_attributes ctx);
in
aux ();
let attrs = ref attrs in
QTable.iter
(fun qname at ->
if at.attr_required then (* check for missing required attributes *)
validation_error ~context (sprintf "Required attribute %s is missing"
(Ns.QName.to_string qname))
else (* add default values *)
match at.attr_use_cstr with
| Some (`Default v) -> attrs := (qname, (Lazy.force v)) :: !attrs
| _ -> ())
if at.attr_required then (* check for missing required attributes *)
error (sprintf "Required attribute %s is missing"
(Ns.QName.to_string qname))
else (* add default values *)
match at.attr_use_cstr with
| Some (`Default v) -> attrs := (qname, (Lazy.force v)) :: !attrs
| _ -> ())
tbl;
Value.vrecord !attrs
let rec validate_element (context: validation_context) elt =
context#expect_start_tag elt.elt_name;
let (attrs, content) = validate_type_ref context elt.elt_typdef in
let rec validate_element ctx elt =
expect_start_tag ctx elt.elt_name;
let (attrs, content) = validate_type_ref ctx elt.elt_typdef in
let content = (* use default if needed and check fixed constraints *)
match elt.elt_cstr with
| Some (`Default v) when Value.equal content empty_string -> Lazy.force v
| Some (`Fixed v) ->
check_fixed ~context (Lazy.force v) content;
check_fixed ~ctx (Lazy.force v) content;
content
| _ -> content
in
let element =
Value.Xml (Value.Atom (Atoms.V.of_qname elt.elt_name), attrs, content)
in
context#expect_end_tag elt.elt_name;
element
expect_end_tag ctx;
xml elt.elt_name attrs content
and validate_type context = function
| AnyType -> validate_any_type context
| Simple st_def -> (empty_record, validate_simple_type_wrapper context st_def)
| Complex ct_def -> validate_complex_type context ct_def
and validate_type ctx = function
| AnyType -> validate_any_type ctx
| Simple st_def -> (empty_record, validate_simple_type_wrapper ctx st_def)
| Complex ct_def -> validate_complex_type ctx ct_def
and validate_type_ref context x =
validate_type context (Lazy.force x)
and validate_type_ref ctx x =
validate_type ctx (Lazy.force x)
(** @return Value.t * Value.t (* attrs, content *) *)
and validate_complex_type context ct =
let attrs = validate_attribute_uses context ct.ct_attrs in
let content = validate_content_type context ct.ct_content in
and validate_complex_type ctx ct =
let attrs = validate_attribute_uses ctx ct.ct_attrs in
let content = validate_content_type ctx ct.ct_content in
(attrs, content)
and validate_content_type context content_type : Value.t =
and validate_content_type ctx content_type =
match content_type with
| CT_empty -> Value.nil
| CT_simple st_def -> Value.sequence [ validate_simple_type_wrapper context (get_simple_type st_def) ]
| CT_empty ->
Value.nil
| CT_simple st_def ->
Value.sequence [ validate_simple_type_wrapper ctx (get_simple_type st_def) ]
| CT_model (particle, mixed) ->
context#set_mixed mixed;
validate_particle context particle
let mixold = ctx.ctx_mixed in
let ctx = subctx mixed ctx in
validate_particle ctx particle;
get ctx
and validate_particle context particle =
let content = ref Value.nil in
let concat v = content := Value.concat !content v in
and validate_particle ctx particle =
let rec validate_once ~cont_ok ~cont_failure =
match context#peek with
do_pcdata ctx;
match peek ctx with
| E_start_tag qname
when List.exists (Ns.QName.equal qname) particle.part_first ->
concat (validate_term context particle.part_term);
validate_term ctx particle.part_term;
cont_ok ()
| E_char_data utf8_data when context#mixed ->
concat (string_utf8 utf8_data);
context#junk;
validate_once ~cont_ok ~cont_failure
| ev -> cont_failure ev
in
let rec required = function
......@@ -363,7 +410,7 @@ and validate_particle context particle =
validate_once
~cont_ok:(fun () -> required (pred n))
~cont_failure:(fun event ->
validation_error ~context (sprintf "Unexpected content: %s"
error ~ctx (sprintf "Unexpected content: %s"
(string_of_event event)))
in
let rec optional = function
......@@ -377,47 +424,39 @@ and validate_particle context particle =
~cont_ok:(fun () -> optional (Some (pred n)))
~cont_failure:(fun _ -> ())
in
let rec trailing_cdata () =
match context#peek with
| E_char_data utf8_data ->
concat (string_utf8 utf8_data);
context#junk;
trailing_cdata ()
| _ -> ()
in
required particle.part_min;
optional
(match particle.part_max with
| None -> None
| Some v -> Some (v - particle.part_min));
if context#mixed then trailing_cdata ();
!content
do_pcdata ctx
and validate_term context term =
and validate_term ctx term =
match term with
| Elt elt_decl_ref ->
sequence [ validate_element context (Lazy.force elt_decl_ref) ]
| Model model_group ->
validate_model_group context model_group
| Elt elt_decl_ref -> append ctx
(validate_element ctx (Lazy.force elt_decl_ref))
| Model model_group -> validate_model_group ctx model_group
and validate_choice context particles =
and validate_choice ctx particles =
(* TODO: Handle case when one of the choices is nullable *)
let tbl = QTable.create 20 in
List.iter
(fun p ->
List.iter (fun tag -> QTable.add tbl tag p) (first_of_particle p))
particles;
let txt = next_pcdata context in
do_pcdata ctx;
try
let qname = next_tag context in
let particle = QTable.find tbl qname in
Value.concat txt (validate_particle context particle)
(match peek ctx with
| E_start_tag qname ->
let particle = QTable.find tbl qname in
validate_particle ctx particle
| _ -> raise Not_found)
with Not_found ->
validation_error ~context (sprintf "Cannot choose branch of choice group")
error (sprintf "Cannot choose branch of choice group")
and validate_all_group context particles =
and validate_all_group ctx particles =
let tbl = QTable.create 20 in
let slots : (bool * Value.t option ref) list =
let slots =
List.map
(fun p ->
let slot = ref None in
......@@ -428,133 +467,73 @@ and validate_all_group context particles =
let contents = ref Value.nil in
let rec aux () =
let qname = next_tag context in
let p,slot = QTable.find tbl qname in
match !slot with
| Some x -> ()
| None -> slot := Some (validate_particle context p); aux ()
match peek ctx with
| E_start_tag qname ->
let qname = next_tag ctx in
let p,slot = QTable.find tbl qname in
(match !slot with
| Some x -> ()
| None ->
let ctx = subctx ctx.ctx_mixed ctx in
validate_particle ctx p;
slot := Some (get ctx); aux ())
| _ -> ()
in
let txt = next_pcdata context in
(try aux () with Not_found -> ());
List.fold_left
(fun accu (nullable,slot) ->
do_pcdata ctx;
aux ();
List.iter
(fun (nullable,slot) ->
match !slot with
| Some x -> Value.concat accu x
| None when nullable -> accu
| None ->
validation_error ~context
"One particle of the all group is missing"
) txt slots
| Some x -> concat ctx x
| None when nullable -> ()
| None -> error "One particle of the all group is missing"
) slots
and validate_model_group context model_group =
and validate_model_group ctx model_group =
match model_group with
| All particles -> validate_all_group context particles
| Choice particles -> validate_choice context particles
| Sequence particles ->
flatten (sequence (List.map (validate_particle context) particles))
| All particles -> validate_all_group ctx particles
| Choice particles -> validate_choice ctx particles
| Sequence particles -> List.iter (validate_particle ctx) particles
(** {2 Context implementation} *)
class context ~stream ~schema =
object (self)
val mutable mixed = false
let ctx stream schema =
{ ctx_stream = stream;
ctx_schema = schema;
ctx_mixed = false;
ctx_current = Value.Absent }
method mixed = mixed
method set_mixed v = mixed <- v
let validate_element decl schema value =
let ctx = ctx (stream_of_value value) schema in
validate_element ctx decl
method private next =
try
Stream.next stream
with Stream.Failure ->
self#error "Unexpected end of stream";
(* just to cheat with the type checker, above function wont return *)
Stream.next stream
method peek =
match Stream.peek stream with
| None ->
self#error "Unexpected end of stream";
(* just to cheat with the type checker as above *)
Stream.next stream
| Some e -> e
method junk = Stream.junk stream