schema_validator.ml 17.7 KB
Newer Older
1
let debug = false
2

3
open Printf
4

5
open Value
6
open Encodings
7
open Schema_pcre
8
open Schema_common
9
open Schema_types
10

11
module QTable = Hashtbl.Make(Ns.Label)
12

13 14
let ppf = Format.std_formatter

15 16
  (** {2 Misc} *)

17 18 19
let empty_record = Value.vrecord []
let foo_atom = Value.Atom (Atoms.V.mk_ascii "foo")
let foo_event = E_char_data (Utf8.mk "")
20

21 22
let qtable_is_empty tbl =
  try QTable.iter (fun _ _ -> raise Exit) tbl; true
23
  with Exit -> false
24 25 26 27

let string_of_value value =
  let buf = Buffer.create 1024 in
  let fmt = Format.formatter_of_buffer buf in
28
  Format.fprintf fmt "%a@?" Value.print value;
29 30 31 32
  Buffer.contents buf

let foo_qname = Ns.empty, Utf8.mk ""

33 34 35 36 37 38 39 40
type context = {
  ctx_stream: event Stream.t;
  mutable ctx_mixed: bool;
  mutable ctx_current: Value.t;
}

let subctx mixed ctx = { ctx with ctx_current = Value.nil; ctx_mixed = mixed }

41 42 43
let push_str ctx s =
  { ctx with ctx_stream = Stream.icons (E_char_data s) ctx.ctx_stream }

44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
let get ctx = ctx.ctx_current

let rec only_ws s i =
  (i = 0) ||
  (let i = pred i in match (String.unsafe_get s i) with
     | ' ' | '\t' | '\n' | '\r' -> only_ws s i
     | _ -> false)

let only_ws s =
  let s = Utf8.get_str s in
  only_ws s (String.length s)

let error s = raise (XSI_validation_error s)

let concat ctx v = ctx.ctx_current <- Value.concat ctx.ctx_current v
let append ctx v = ctx.ctx_current <- Value.append ctx.ctx_current v

let xml qname attrs content =
62
  Value.Xml (Value.Atom qname, attrs, content, Value.Identity)
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113


let peek ctx = 
  match Stream.peek ctx.ctx_stream with
    | None -> error "Unexpected end of stream"
    | Some e -> e

let next ctx =
  try Stream.next ctx.ctx_stream
  with Stream.Failure -> error "Unexpected end of stream"

let junk ctx =
  Stream.junk ctx.ctx_stream

let get_string ctx =
  let b = Buffer.create 67 in
  let rec aux () =
    match peek ctx with
      | E_char_data s ->
          junk ctx;
	  Buffer.add_string b (Utf8.get_str s);
	  aux ()
      | E_start_tag _ ->
	  error "XML element found in simple content"
      | _ -> ()
  in
  aux ();
  Utf8.mk (Buffer.contents b)

let rec copy_pcdata ctx =
  match peek ctx with
    | E_char_data s ->
        junk ctx;
	concat ctx (string_utf8 s);
	copy_pcdata ctx
    | _ -> ()

let rec ignore_ws ctx =
  match peek ctx with
    | E_char_data s when only_ws s -> 
	junk ctx;
	ignore_ws ctx
    | E_char_data _ -> 
        error "Unexpected char data in non-mixed content"
    | _ -> ()

let do_pcdata ctx =
  if ctx.ctx_mixed then copy_pcdata ctx else ignore_ws ctx

let expect_end_tag ctx =
  match next ctx with
114
    | E_end_tag -> ()
115 116
    | ev -> error (sprintf "Expected end tag, found %s" (string_of_event ev))

117 118
let is_end_tag ctx =
  match peek ctx with
119
    | E_end_tag -> true
120 121
    | ev -> false

122 123
let check_nil ctx =
  match peek ctx with
124
    | E_end_tag -> ()
125 126 127
    | ev -> error (sprintf "Non-empty content with xsi:nil set : %s" 
		     (string_of_event ev))

128 129
let expect_start_tag ctx tag =
  match next ctx with
130
    | E_start_tag t when Atoms.V.equal t tag -> ()
131
    | ev -> error (sprintf "Expected tag %s, found %s" 
132
		     (Atoms.V.to_string tag) (string_of_event ev))
133

134

135 136 137 138 139 140
let expect_any_start_tag ctx =
  match next ctx with
    | E_start_tag t -> t
    | ev -> error (sprintf "Expected start tag, found %s" 
		     (string_of_event ev))

141 142
type attrs = {
  xsi_nil: bool;
143
  attrs: (Ns.Label.t * Utf8.t) list
144 145 146 147 148 149
}

let get_bool v = 
  match Utf8.get_str v with
    | "true" | "1" -> true
    | "false" | "0" -> false
150
    | _ -> error "Invalid boolean value"
151

152
let get_attributes ctx =
153
  let rec aux attrs nil =
154
    match peek ctx with
155
      | E_attribute (qname,value) when Ns.Label.equal qname xsi_nil_label ->
156 157 158 159 160 161
	  junk ctx;
	  aux attrs (get_bool value)
      | E_attribute (qname, value) ->
          junk ctx;
          aux ((qname,value)::attrs) nil
      | _ -> { attrs = attrs; xsi_nil = nil }
162
  in
163
  aux [] false
164 165

let rec tries funs arg =
166 167 168
  match funs with
  | [] -> raise Not_found
  | f :: tl ->
169
      try f arg
170 171
      with XSI_validation_error _ ->
        tries tl arg
172 173 174 175 176 177

let space_RE = pcre_regexp " "
let split = pcre_split ~rex:space_RE

  (** {2 Facets validation} *)

178 179 180 181 182 183 184
module Schema_facets:
sig
  exception Facet_error of string
  val facets_valid: facets -> Value.t -> unit
end
=
struct
185 186 187 188 189 190 191 192 193 194 195 196

  open Big_int
  open Value

  exception Facet_error of string

    (* compute the length of a particular CDuce *)
    (* STRONG ASSUMPTION: v is a CDuce value built via "validate_simple_type"
     * function below, thus it contains no sequence of characters, but strings
     * and no Concat, but just Pair *)
  let length v =
    let rec aux acc = function
197
      | Pair (_, rest,sigma) -> aux (succ acc) rest
198
      | _ -> 0
199
    in
200
    aux 0 v
201 202

  let length_valid len value =
203 204
    if (length value != len) 
    then raise (Facet_error "length")
205
  let minLength_valid min_len value =
206 207
    if (length value < min_len)
    then raise (Facet_error "minLength")
208
  let maxLength_valid max_len value =
209 210
    if (length value > max_len) 
    then raise (Facet_error "maxLength")
211 212

  let enumeration_valid enum value =
213
    if not (List.exists (fun x -> Value.equal value x) enum) 
214
    then raise (Facet_error "enumeration")
215 216

  let maxInclusive_valid max_inc value =
217
    if value |>| (max_inc) then raise (Facet_error "maxInclusive")
218
  let maxExclusive_valid max_exc value =
219
    if value |>=| (max_exc) then raise (Facet_error "maxExclusive")
220
  let minInclusive_valid min_inc value =
221
    if value |<| (min_inc) then raise (Facet_error "minInclusive")
222
  let minExclusive_valid min_exc value =
223
    if value |<=| (min_exc) then raise (Facet_error "minInclusive")
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268

    (* check facets validaty rules other than pattern and whiteSpace. "value"
    parameter should already be white space normalized and pattern valid.
    Assumption: facets set contains only facets that are applicable to the type
    of value *)
  let facets_valid facets value =
    (* TODO efficiency *)
    (* all facets are always checked, but we know that in some cases only some
     * of them can be present; e.g. if variety is union only pattern and
     * enumeration are possibles ... *)
    (match facets.length with
    | None ->
        (match facets.minLength with
        | None -> ()
        | Some (len, _) -> minLength_valid len value);
        (match facets.maxLength with
        | None -> ()
        | Some (len, _) -> maxLength_valid len value);
    | Some (len, _) -> length_valid len value);
    (match facets.enumeration with
    | None -> ()
    | Some enum -> enumeration_valid enum value);
    (match facets.maxInclusive with
    | None -> ()
    | Some (lim, _) -> maxInclusive_valid lim value);
    (match facets.maxExclusive with
    | None -> ()
    | Some (lim, _) -> maxExclusive_valid lim value);
    (match facets.minInclusive with
    | None -> ()
    | Some (lim, _) -> minInclusive_valid lim value);
    (match facets.minExclusive with
    | None -> ()
    | Some (lim, _) -> minExclusive_valid lim value);
  (*
    (match facets.totalDigits with
    | None -> ()
    | Some (dig, _) -> totalDigits_valid dig value);
    (match facets.fractionDigits with
    | None -> ()
    | Some (dig, _) -> fractionDigits_valid dig value)
  *)

end

269
  (** {2 Simple type validation} *)
270 271
let rec validate_simple_type def s =
 match def with
272
  | { st_name = Some name } when Schema_builtin.is name ->
273 274 275 276 277 278 279 280
    let value = (match def with 
	{ st_variety = Atomic st; st_facets = facets } ->
	  normalize_white_space (fst facets.whiteSpace) s
      | _ -> s) in
    (try Schema_builtin.validate (Schema_builtin.get name) value
     with Schema_builtin.Error name ->
       error (sprintf "%s isn't a valid %s"
		(Utf8.to_string value) name))
281
  | { st_variety = Atomic st; st_facets = facets } ->
282
      let literal = normalize_white_space (fst facets.whiteSpace) s in
283
      let value = validate_simple_type st literal in
284 285
      Schema_facets.facets_valid facets value;
      value
286
  | { st_variety = List item; st_facets = facets } ->
287
      let literal = normalize_white_space (fst facets.whiteSpace) s in
288
      let items = List.map (validate_simple_type item) (split literal) in
289 290 291
      let value = Value.sequence items in
      Schema_facets.facets_valid facets value;
      value
292 293
  | { st_variety = Union members; st_facets = facets } ->
      let value = tries (List.map validate_simple_type members) s in
294 295 296
      Schema_facets.facets_valid facets value;
      value

297 298
  (** {2 Complex type validation} *)

299
let rec validate_any_type ctx =
300
  (* assumption: attribute events (if any) come first *)
301
  let attrs = get_attributes ctx in
302
  let attrs = List.map (fun (n,v) -> (n,Value.string_utf8 v)) attrs.attrs in
303 304 305 306 307

  let ctx = subctx true ctx in
  let rec aux attrs =
    copy_pcdata ctx;
    match peek ctx with
308
    | E_start_tag qname ->
309 310 311 312
        junk ctx;
        let (attrs, content) = validate_any_type ctx in
        expect_end_tag ctx;
	append ctx (xml qname attrs content);
313
        aux ()
314
    | E_end_tag -> ()
315
    | _ -> assert false
316
  in
317 318
  aux ();
  (Value.vrecord attrs, get ctx)
319

320 321
let validate_wildcard ctx w =
  let qname = expect_any_start_tag ctx in
322
  if not (Atoms.contains qname w.wild_first)
323
  then error (sprintf "Tag %s is not accepted by the wildcard" 
324
		(Atoms.V.to_string qname));
325 326 327 328
  let (attrs, content) = validate_any_type ctx in
  expect_end_tag ctx;
  xml qname attrs content

329
let check_fixed fixed value =
330
  if not (Value.equal fixed value) then
331
    error (sprintf "Expected fixed value: %s; found %s"
332 333
      (string_of_value fixed) (string_of_value value))

334

335 336 337

let next_tag ctx =
  match peek ctx with
338 339
    | E_start_tag qname -> qname
    | _ -> raise Not_found
340

341
let validate_attribute_uses attrs (attr_uses,anyattr) =
342
  let tbl = QTable.create 11 in
343
  List.iter
344
    (fun use -> QTable.add tbl use.attr_decl.attr_name use)
345
    attr_uses;
346 347 348
  let attribs = ref [] in
  List.iter
    (fun (qname, value) ->
349 350 351 352 353
       let value = 
	 try
	   let a = QTable.find tbl qname in
	   let value = validate_simple_type a.attr_decl.attr_typdef value in
	   (match a.attr_use_cstr with  (* check fixed constraint *)
354
              | Some (`Fixed (_,v)) -> check_fixed v value
355 356 357 358 359 360
              | _ -> ());
	   QTable.remove tbl qname;
	   value
	 with Not_found ->
	   if anyattr then Value.string_utf8 value
	   else error 
361
	     (sprintf "Unexpected attribute: %s" (Ns.Label.string_of_attr qname))
362 363 364 365
       in
       attribs := (qname, value) :: !attribs
    ) attrs.attrs;
  if attrs.xsi_nil then
366
    attribs := (xsi_nil_label, Value.vtrue) :: !attribs;
367
  QTable.iter
368
    (fun qname at ->
369 370
       if at.attr_required then  (* check for missing required attributes *)
         error (sprintf "Required attribute %s is missing"
371
		  (Ns.Label.string_of_attr qname))
372 373
       else  (* add default values *)
         match at.attr_use_cstr with
374
           | Some (`Default (_,v)) -> attribs := (qname, v) :: !attribs
375
           | _ -> ())
376
    tbl;
377
  Value.vrecord !attribs
378

379 380
let rec validate_element ctx elt =
  expect_start_tag ctx elt.elt_name;
381 382 383
  let attrs = get_attributes ctx in
  if (attrs.xsi_nil && not elt.elt_nillable) then
    error "xsi:nil attribute on non-nillable element";
384 385 386 387 388 389 390 391 392
  let is_empty = is_end_tag ctx in
  let ctx =
    match is_empty, elt.elt_cstr with
      | true, Some (`Default (v,_) | `Fixed (v,_)) -> push_str ctx v
      | _ -> ctx in
  let (attrs, content) = validate_type ctx attrs (Lazy.force elt.elt_typdef) in
  (match is_empty, elt.elt_cstr with
      | false, Some (`Fixed (_,v)) -> check_fixed v content
      | _ -> ());
393 394
  expect_end_tag ctx;
  xml elt.elt_name attrs content
395

396
and validate_type ctx attrs = function
397
  | AnyType -> validate_any_type ctx
398 399 400 401
  | Simple st_def ->
      if (List.length attrs.attrs > 0) then
	error "attribute on element with simple content";
      if attrs.xsi_nil then (check_nil ctx; 
402
			     Value.vrecord [xsi_nil_label,Value.vtrue],
403
			     Value.nil)
404
      else (empty_record, validate_simple_type st_def (get_string ctx))
405 406 407 408 409 410 411
  | Complex ct_def -> validate_complex_type ctx attrs ct_def

and validate_complex_type ctx attrs ct =
  let content =
    if attrs.xsi_nil then (check_nil ctx; Value.nil)
    else validate_content_type ctx ct.ct_content in
  let attrs = validate_attribute_uses attrs ct.ct_attrs in
412 413
  (attrs, content)

414
and validate_content_type ctx content_type =
415
  match content_type with
416 417 418
  | CT_empty -> 
      Value.nil
  | CT_simple st_def -> 
419
      validate_simple_type st_def (get_string ctx)
420
  | CT_model (particle, mixed) ->
421 422 423
      let ctx = subctx mixed ctx in
      validate_particle ctx particle;
      get ctx
424

425
and validate_particle ctx particle =
426
  let rec validate_once ~cont_ok ~cont_failure =
427 428
    do_pcdata ctx;
    match peek ctx with
429
    | E_start_tag qname 
430
	when Atoms.contains qname particle.part_first ->
431
	validate_term ctx particle.part_term;
432
        cont_ok ()
433 434
    | ev -> 
	cont_failure ev
435 436
  in
  let rec required = function
437
    | 0 -> ()    | n ->
438
        validate_once
439
          ~cont_ok:(fun () -> required (pred n))
440
          ~cont_failure:(fun event ->
441 442
			   if particle.part_nullable then ()
			   else
443 444
			     error (sprintf "Unexpected content: %s"
				      (string_of_event event)))
445 446 447 448 449 450
  in
  let rec optional = function
    | None ->
        validate_once
          ~cont_ok:(fun () -> optional None)
          ~cont_failure:(fun _ -> ())
451 452
    | Some 0 -> ()
    | Some n ->
453
        validate_once
454
          ~cont_ok:(fun () -> optional (Some (pred n)))
455 456
          ~cont_failure:(fun _ -> ())
  in
457
  required particle.part_min;
458
  optional
459 460 461
    (match particle.part_max with 
       | None -> None 
       | Some v -> Some (v - particle.part_min));
462
  do_pcdata ctx
463

464
and validate_term ctx term =
465
  match term with
466 467 468
  | Elt elt -> append ctx (validate_element ctx elt)
  | Model mg -> validate_model_group ctx mg
  | Wildcard w -> append ctx (validate_wildcard ctx w)
469

470
and validate_choice ctx particles =
471
(* TODO: Handle case when one of the choices is nullable *)
472
  let tbl = Atoms.mk_map
473
    (List.map (fun p -> p.part_first, p) particles) in
474
  do_pcdata ctx;
475
  try
476
    (match peek ctx with
477
       | E_start_tag q -> validate_particle ctx (Atoms.get_map q tbl)
478
       | _ -> raise Not_found)
479
  with Not_found ->
480
    error (sprintf "Cannot choose branch of choice group")
481

482
and validate_all_group ctx particles =
483 484
  let slots = List.map (fun p -> (p, ref None)) particles in
  let tbl = Atoms.mk_map
485
    (List.map (fun (p,slot) -> p.part_first, (p,slot)) slots) in
486 487
  
  let rec aux () =
488
    match peek ctx with
489 490
      | E_start_tag q ->
	  let p,slot = Atoms.get_map q tbl in
491 492 493 494 495 496 497
	  (match !slot with
	    | Some x -> ()
	    | None -> 
		let ctx = subctx ctx.ctx_mixed ctx in
		validate_particle ctx p;
		slot := Some (get ctx); aux ())
      | _ -> ()
498
  in
499 500 501
  do_pcdata ctx;
  aux ();
  List.iter
502
    (fun (p,slot) ->
503
       match !slot with
504
	 | Some x -> concat ctx x
505
	 | None when p.part_nullable -> ()
506 507
	 | None -> error "One particle of the all group is missing"
    ) slots
508

509

510
and validate_model_group ctx model_group =
511
  match model_group with
512 513 514
  | All particles -> validate_all_group ctx particles
  | Choice particles -> validate_choice ctx particles
  | Sequence particles -> List.iter (validate_particle ctx) particles
515 516


517
let ctx stream =
518 519 520
  { ctx_stream = stream;
    ctx_mixed = false;
    ctx_current = Value.Absent }
521

522 523
let validate_element decl value =
  let ctx = ctx (stream_of_value value) in
524
  validate_element ctx decl
525

526 527 528 529 530
let get_str v = 
  if not (is_str v) then
    error
      "Only string values could be validate against simple types";
  fst (get_string_utf8 v)
531

532
let validate_type def value =
533 534
  match def with
  | AnyType -> value  (* shortcut *)
535
  | Simple st_def -> validate_simple_type st_def (get_str value)
536
  | Complex ct_def ->
537
      let ctx = ctx (stream_of_value value) in
538
      let start_tag = expect_any_start_tag ctx in
539 540
      let attrs = get_attributes ctx in
      let (attrs, content) = validate_complex_type ctx attrs ct_def in
541
      expect_end_tag ctx;
542
      Value.Xml (Value.Atom start_tag, attrs, content,Value.Identity)
543

544
(*
545
let validate_attribute decl value =
546
  assert false; (* TODO see the .mli *)
547 548 549
  (match value with
  | Record _ -> ()
  | _ ->
550
      error
551 552 553 554 555 556 557 558
        "Only record values could be validated against attributes");
  let (name, st_def, constr) = decl in
  let qname = (schema.targetNamespace, name) in
  let fields = Value.get_fields value in
  let found = ref false in
  let rec aux = function
    | [] -> []
    | (qname', value) :: rest when qname' = qname ->
559
        found := true;
560 561 562 563 564 565 566 567 568 569
        (qname', validate_simple_type st_def value) :: aux rest
    | field :: rest -> field :: aux rest

  in
  let fields = aux (Value.get_fields value) in
  let fields =
    if not !found then
      match constr with
      | Some (`Default v) -> (qname, v) :: fields
      | _ ->
570
          error (sprintf
571 572 573 574 575 576
            "Attribute %s was not found and no default value was provided"
            (Ns.QName.to_string qname))
    else
      fields
  in
  Value.vrecord fields
577
*)
578

579
let validate_attribute_group { ag_def = attr_uses } value =
580 581 582 583
  let stream =
    match value with
    | Record _ ->
        Stream.of_list
584
          ((List.map
585 586
            (fun (qname, v) ->
              E_attribute (qname, fst (Value.get_string_utf8 v)))
587 588
            (Value.get_fields value)) @
            [ foo_event ])
589
    | _ ->
590
        error
591 592
          "Only record values could be validated against attribute groups"
  in
593
  let ctx = ctx stream in
594 595
  let attrs = get_attributes ctx in
  validate_attribute_uses attrs attr_uses
596

597

598
let validate_model_group { mg_def = mg } value =
599
  if not (Value.is_seq value) then
600
    error
601
      "Only sequence values could be validated against model groups";
602
  let stream = stream_of_value (Value.Xml (foo_atom, empty_record, value,Value.Identity)) in
603
  Stream.junk stream;
604
  let ctx = ctx stream in
605 606 607
  validate_model_group ctx mg;
  get ctx

608

609 610 611 612 613 614 615 616 617 618 619 620 621

type t =
  | VAttrGp of attribute_group_definition
  | VModelGp of model_group_definition
  | VType of type_definition
  | VElem of element_declaration

let run s v =
  match s with
    | VAttrGp x -> validate_attribute_group x v
    | VModelGp x -> validate_model_group x v
    | VType x -> validate_type x v
    | VElem x -> validate_element x v