schema_validator.ml 19.1 KB
Newer Older
1
let debug = false
2

3
open Printf
4

5
open Encodings
6
open Schema_pcre
7
open Schema_common
8
open Schema_types
9
open Value
10

11
12
module QTable = Hashtbl.Make(Ns.QName)

13
14
  (** {2 Misc} *)

15
let empty_string = string_utf8 (Utf8.mk "")
16
17
18
let empty_record = Value.vrecord []
let foo_atom = Value.Atom (Atoms.V.mk_ascii "foo")
let foo_event = E_char_data (Utf8.mk "")
19

20
21
let qtable_is_empty tbl =
  try QTable.iter (fun _ _ -> raise Exit) tbl; true
22
  with Exit -> false
23
24
25
26
27
28
29
30
31
32

let string_of_value value =
  let buf = Buffer.create 1024 in
  let fmt = Format.formatter_of_buffer buf in
  Value.print fmt value;
  Buffer.contents buf

let foo_qname = Ns.empty, Utf8.mk ""

let ptbl_of_particles particles =
33
  let tbl = QTable.create 20 in
34
35
  List.iter (* fill table *)
    (* ASSUMPTION: firsts are disjoing as per UPA Schema constraint *)
36
37
    (fun p -> 
       List.iter (fun tag -> QTable.add tbl tag p) (first_of_particle p))
38
39
40
41
42
43
44
    particles;
  tbl

  (** Validation context *)
class type validation_context =
  object
      (* if ns isn't given, targetNamespace of the schema is used *)
45
46
    method expect_start_tag: Ns.qname -> unit
    method expect_end_tag: Ns.qname -> unit
47
48
49
50
51
52
53
54
55
56
    method expect_any_start_tag: Ns.qname
    method expect_any_end_tag: Ns.qname
    method get_string: Utf8.t
    method junk: unit
    method peek: event

    method set_mixed: bool -> unit
    method mixed: bool
  end

57
58
let validation_error ?context s = raise (XSI_validation_error s)
let validation_error_exemplar = XSI_validation_error ""
59
60

let compare_exn e1 e2 =
61
62
    (* comparison function on exceptions; include all validation error
     * exceptions in an equivalence class *)
63
64
65
66
  match e1, e2 with
  | XSI_validation_error _, XSI_validation_error _ -> 0
  | e1, e2 -> Pervasives.compare e1 e2

67
68
69
70
71
72
73
74
75
76
77
78
79
80
let rec tries funs exn arg =
  match funs with
  | [] -> raise Not_found
  | f :: tl ->
      try
        f arg
      with e when compare_exn e exn = 0 ->
        tries tl exn arg

let space_RE = pcre_regexp " "
let split = pcre_split ~rex:space_RE

  (** {2 Facets validation} *)

81
82
83
84
85
86
87
module Schema_facets:
sig
  exception Facet_error of string
  val facets_valid: facets -> Value.t -> unit
end
=
struct
88
89
90
91
92
93
94
95
96
97
98
99

  open Big_int
  open Value

  exception Facet_error of string

    (* compute the length of a particular CDuce *)
    (* STRONG ASSUMPTION: v is a CDuce value built via "validate_simple_type"
     * function below, thus it contains no sequence of characters, but strings
     * and no Concat, but just Pair *)
  let length v =
    let rec aux acc = function
100
101
      | Pair (_, rest) -> aux (succ acc) rest
      | _ -> 0
102
    in
103
    aux 0 v
104
105

  let length_valid len value =
106
107
    if (length value != len) 
    then raise (Facet_error "length")
108
  let minLength_valid min_len value =
109
110
    if (length value < min_len)
    then raise (Facet_error "minLength")
111
  let maxLength_valid max_len value =
112
113
    if (length value > max_len) 
    then raise (Facet_error "maxLength")
114
115

  let enumeration_valid enum value =
116
117
    if not (List.exists (fun x -> Value.equal value (Lazy.force x)) enum) 
    then raise (Facet_error "enumeration")
118
119

  let maxInclusive_valid max_inc value =
120
    if value |>| (Lazy.force max_inc) then raise (Facet_error "maxInclusive")
121
  let maxExclusive_valid max_exc value =
122
    if value |>=| (Lazy.force max_exc) then raise (Facet_error "maxExclusive")
123
  let minInclusive_valid min_inc value =
124
    if value |<| (Lazy.force min_inc) then raise (Facet_error "minInclusive")
125
  let minExclusive_valid min_exc value =
126
    if value |<=| (Lazy.force min_exc) then raise (Facet_error "minInclusive")
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171

    (* check facets validaty rules other than pattern and whiteSpace. "value"
    parameter should already be white space normalized and pattern valid.
    Assumption: facets set contains only facets that are applicable to the type
    of value *)
  let facets_valid facets value =
    (* TODO efficiency *)
    (* all facets are always checked, but we know that in some cases only some
     * of them can be present; e.g. if variety is union only pattern and
     * enumeration are possibles ... *)
    (match facets.length with
    | None ->
        (match facets.minLength with
        | None -> ()
        | Some (len, _) -> minLength_valid len value);
        (match facets.maxLength with
        | None -> ()
        | Some (len, _) -> maxLength_valid len value);
    | Some (len, _) -> length_valid len value);
    (match facets.enumeration with
    | None -> ()
    | Some enum -> enumeration_valid enum value);
    (match facets.maxInclusive with
    | None -> ()
    | Some (lim, _) -> maxInclusive_valid lim value);
    (match facets.maxExclusive with
    | None -> ()
    | Some (lim, _) -> maxExclusive_valid lim value);
    (match facets.minInclusive with
    | None -> ()
    | Some (lim, _) -> minInclusive_valid lim value);
    (match facets.minExclusive with
    | None -> ()
    | Some (lim, _) -> minExclusive_valid lim value);
  (*
    (match facets.totalDigits with
    | None -> ()
    | Some (dig, _) -> totalDigits_valid dig value);
    (match facets.fractionDigits with
    | None -> ()
    | Some (dig, _) -> fractionDigits_valid dig value)
  *)

end

172
  (** {2 Simple type validation} *)
173

174
let rec validate_simple_type def v =
175
176
  let s =
    match get_string_utf8 v with
177
178
    | utf8_string, rest when rest |=| nil -> utf8_string
    | _ -> validation_error "string expected"
179
  in
180
  match def with
181
  | Primitive name | Derived (Some name, _, _, _)
182
183
184
185
    when Schema_builtin.is_builtin name ->
      (try
        Schema_builtin.validate_builtin name s
      with Schema_builtin.Schema_builtin_error name ->
186
        validation_error (sprintf "%s isn't a valid %s"
187
          (Utf8.to_string s) name))
188
  | Primitive _ -> assert false
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
  | Derived (_, Atomic primitive, facets, base) ->
      let literal = normalize_white_space (fst facets.whiteSpace) s in
      let value = validate_simple_type_ref base(*primitive*)(*???*) 
	(string_utf8 literal) in
      Schema_facets.facets_valid facets value;
      value
  | Derived (_, List item, facets, base) ->
      let literal = normalize_white_space (fst facets.whiteSpace) s in
      let items =
        List.map (validate_simple_type_ref item)
          (List.map string_utf8 (split literal))
      in
      let value = Value.sequence items in
      Schema_facets.facets_valid facets value;
      value
  | Derived (_, Union members, facets, base) ->
      let value = tries (List.map validate_simple_type_ref members)
        validation_error_exemplar
        (string_utf8 s) in
      Schema_facets.facets_valid facets value;
      value
  | Derived (_, Restrict, _,_) as st ->
      (* TODO: compute the restriction statically ... *)
      let st = normalize_simple_type st in
      validate_simple_type st v


and validate_simple_type_ref def v =
  validate_simple_type (get_simple_type def) v
218

219
220
  (* wrapper for validate_simple_type which works on contexts *)
let validate_simple_type_wrapper context st_def =
221
  validate_simple_type st_def (string_utf8 context#get_string)
222
223
224
225
226
227

  (** {2 Complex type validation} *)

let rec validate_any_type (context: validation_context) =
  (* assumption: attribute events (if any) come first *)
  let attrs = ref [] in
228
  let cont = ref Value.nil in
229
230
  let rec aux () =
    match context#peek with
231
    | E_start_tag qname ->
232
233
234
        context#junk;
        let (attrs, content) = validate_any_type context in
        let element =
235
          Value.Xml (Value.Atom (Atoms.V.of_qname qname), attrs, content)
236
        in
237
        context#expect_end_tag qname;
238
        cont := Value.append !cont element;
239
        aux ()
240
    | E_end_tag _ -> (Value.vrecord !attrs, !cont)
241
242
243
244
245
246
    | E_attribute (qname, value) ->
        context#junk;
        attrs := (qname, Value.string_utf8 value) :: !attrs;
        aux ()
    | E_char_data utf8_data ->
        context#junk;
247
        cont := Value.concat !cont (string_utf8 utf8_data);
248
249
250
251
252
253
254
255
256
        aux ()
  in
  aux ()

let check_fixed ~context fixed value =
  if not (Value.equal fixed value) then
    validation_error ~context (sprintf "Expected fixed value: %s; found %s"
      (string_of_value fixed) (string_of_value value))

257

258
let validate_attribute_uses context attr_uses =
259
  let tbl = QTable.create 11 in
260
  List.iter
261
    (fun use -> QTable.add tbl (name_of_attribute_use use) use)
262
263
264
265
266
    attr_uses;
  let attrs = ref [] in
  let rec aux () =  (* look for attribute events and fill "attrs" *)
    match context#peek with
    | E_attribute (qname, value) ->
267
268
        let { attr_decl =  { attr_typdef = st_def };
	      attr_use_cstr = constr } =
269
          try QTable.find tbl qname
270
271
272
273
          with Not_found ->
            validation_error ~context (sprintf "Unexpected attribute: %s"
              (Ns.QName.to_string qname))
        in
274
        let value = validate_simple_type_ref st_def (Value.string_utf8 value) in
275
        (match constr with  (* check fixed constraint *)
276
        | Some (`Fixed v) -> check_fixed ~context (Lazy.force v) value
277
278
        | _ -> ());
        attrs := (qname, value) :: !attrs;
279
        QTable.remove tbl qname;
280
281
282
283
284
        context#junk;
        aux ()
    | _ -> ()
  in
  aux ();
285
  QTable.iter
286
287
    (fun qname at ->
      if at.attr_required then  (* check for missing required attributes *)
288
289
290
        validation_error ~context (sprintf "Required attribute %s is missing"
          (Ns.QName.to_string qname))
      else  (* add default values *)
291
        match at.attr_use_cstr with
292
        | Some (`Default v) -> attrs := (qname, (Lazy.force v)) :: !attrs
293
294
295
296
        | _ -> ())
    tbl;
  Value.vrecord !attrs

297
298
let rec validate_element (context: validation_context) elt =
  context#expect_start_tag elt.elt_name;
299
  let (attrs, content) = validate_type_ref context elt.elt_typdef in
300
  let content = (* use default if needed and check fixed constraints *)
301
    match elt.elt_cstr with
302
    | Some (`Default v) when Value.equal content empty_string -> Lazy.force v
303
    | Some (`Fixed v) ->
304
        check_fixed ~context (Lazy.force v) content;
305
306
307
308
        content
    | _ -> content
  in
  let element =
309
    Value.Xml (Value.Atom (Atoms.V.of_qname elt.elt_name), attrs, content)
310
  in
311
  context#expect_end_tag elt.elt_name;
312
313
314
  element

and validate_type context = function
315
  | AnyType -> validate_any_type context
316
  | Simple st_def -> (empty_record, validate_simple_type_wrapper context st_def)
317
  | Complex ct_def -> validate_complex_type context ct_def
318

319
and validate_type_ref context x =
320
  validate_type context (Lazy.force x)
321

322
  (** @return Value.t * Value.t (* attrs, content *) *)
323
324
and validate_complex_type context ct =
  let attrs = validate_attribute_uses context ct.ct_attrs in
325
  let content = validate_content_type context ct.ct_content in
326
327
  (attrs, content)

328
and validate_content_type context content_type : Value.t =
329
  match content_type with
330
331
  | CT_empty -> Value.nil
  | CT_simple st_def -> Value.sequence [ validate_simple_type_wrapper context (get_simple_type st_def) ]
332
333
334
335
336
  | CT_model (particle, mixed) ->
      context#set_mixed mixed;
      validate_particle context particle

and validate_particle context particle =
337
338
  let content = ref Value.nil in
  let concat v = content := Value.concat !content v in
339
340
  let rec validate_once ~cont_ok ~cont_failure =
    match context#peek with
341
342
343
    | E_start_tag qname 
	when List.exists (Ns.QName.equal qname) particle.part_first ->
	concat (validate_term context particle.part_term);
344
        cont_ok ()
345
    | E_char_data utf8_data when context#mixed ->
346
        concat (string_utf8 utf8_data);
347
348
349
350
351
        context#junk;
        validate_once ~cont_ok ~cont_failure
    | ev -> cont_failure ev
  in
  let rec required = function
352
353
    | 0 -> ()
    | n ->
354
        validate_once
355
          ~cont_ok:(fun () -> required (pred n))
356
357
358
359
360
361
362
363
364
          ~cont_failure:(fun event ->
            validation_error ~context (sprintf "Unexpected content: %s"
              (string_of_event event)))
  in
  let rec optional = function
    | None ->
        validate_once
          ~cont_ok:(fun () -> optional None)
          ~cont_failure:(fun _ -> ())
365
366
    | Some 0 -> ()
    | Some n ->
367
        validate_once
368
          ~cont_ok:(fun () -> optional (Some (pred n)))
369
370
371
372
373
          ~cont_failure:(fun _ -> ())
  in
  let rec trailing_cdata () =
    match context#peek with
    | E_char_data utf8_data ->
374
        concat (string_utf8 utf8_data);
375
376
377
378
        context#junk;
        trailing_cdata ()
    | _ -> ()
  in
379
  required particle.part_min;
380
  optional
381
382
383
    (match particle.part_max with 
       | None -> None 
       | Some v -> Some (v - particle.part_min));
384
  if context#mixed then trailing_cdata ();
385
  !content
386
387
388

and validate_term context term =
  match term with
389
390
391
392
  | Elt elt_decl_ref -> 
      sequence [ validate_element context (Lazy.force elt_decl_ref) ]
  | Model model_group -> 
      validate_model_group context model_group
393

394
  (** @return (Value.t * Utf8.t)
395
396
397
   * 2nd value is the key for tbl that return the particle effectively used for
   * validation *)
and validate_choice context tbl =
398
399
  let backlog = ref Value.nil in
  let concat v = backlog := Value.concat !backlog v in
400
401
402
  let rec next_tag () =
    match context#peek with
    | E_char_data utf8_data when context#mixed ->
403
        concat (string_utf8 utf8_data);
404
405
406
407
408
409
410
411
412
413
414
        context#junk;
        next_tag ()
    | E_char_data utf8_data (* when not context#mixed *) ->
        validation_error ~context
          (sprintf "Unexpected char data in non-mixed content: %s"
            (Utf8.get_str utf8_data))
    | E_start_tag qname -> qname
    | ev ->
        validation_error ~context
          (sprintf "Unexpected content: %s" (string_of_event ev))
  in
415
416
417
  let qname = next_tag () in
  try
    let particle = QTable.find tbl qname in
418
419
(* BUG: should put the backlog back !!! *)
    Value.concat !backlog (validate_particle context particle), qname
420
421
422
  with Not_found ->
    validation_error ~context (sprintf "Unexpected element %s"
				 (Ns.QName.to_string qname))
423
424
425
426

and validate_model_group context model_group =
  match model_group with
  | All particles ->
427
(* BUG: reorder ! *)
428
      let tbl = ptbl_of_particles particles in
429
      let contents = ref Value.nil in
430
      let rec aux () =
431
        if qtable_is_empty tbl then !contents
432
433
        else begin
          let (content, key) = validate_choice context tbl in
434
          contents := Value.concat !contents content;
435
          QTable.remove tbl key;
436
437
438
439
440
441
442
          aux ()
        end
      in
      aux ()
  | Choice particles ->
      fst (validate_choice context (ptbl_of_particles particles))
  | Sequence particles ->
443
      flatten (sequence (List.map (validate_particle context) particles))
444
445
446
447

  (** {2 Context implementation} *)

class context ~stream ~schema =
448
  object (self)
449
450
451
452
    val mutable mixed = false

    method mixed = mixed
    method set_mixed v = mixed <- v
453
454
455
456
457

    method private next =
      try
        Stream.next stream
      with Stream.Failure ->
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
        self#error "Unexpected end of stream";
          (* just to cheat with the type checker, above function wont return *)
        Stream.next stream
    method peek =
      match Stream.peek stream with
      | None ->
          self#error "Unexpected end of stream";
            (* just to cheat with the type checker as above *)
          Stream.next stream
      | Some e -> e
    method junk = Stream.junk stream
    method get_string =
      let buf = Buffer.create 1024 in
      let rec aux () =
        match self#peek with
        | E_char_data data ->
            Buffer.add_string buf (Utf8.get_str data);
            self#junk;
            aux ()
        | _ -> Utf8.mk (Buffer.contents buf)
      in
      aux ()
480

481
482
    method private error s = ignore (validation_error ~context:self s)

483
    method expect_start_tag expected =
484
485
486
      match self#next with
      | E_start_tag found ->
          if not (Ns.QName.equal expected found) then
487
488
            self#error (sprintf "Start tag error: expected %s, found %s"
              (Ns.QName.to_string expected) (Ns.QName.to_string found))
489
      | ev ->
490
491
          self#error (sprintf "Expected start tag (%s), found %s"
            (Ns.QName.to_string expected) (string_of_event ev))
492
    method expect_end_tag expected =
493
494
495
      match self#next with
      | E_end_tag found ->
          if not (Ns.QName.equal expected found) then
496
497
            self#error (sprintf "Start tag error: expected %s, found %s"
              (Ns.QName.to_string expected) (Ns.QName.to_string found))
498
      | ev ->
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
          self#error (sprintf "Expected end tag (%s), found %s"
            (Ns.QName.to_string expected) (string_of_event ev))
    method expect_any_start_tag =
      match self#next with
      | E_start_tag tag -> tag
      | ev ->
          self#error (sprintf "Expected start tag, found %s"
            (string_of_event ev));
          foo_qname (* useless *)
    method expect_any_end_tag =
      match self#next with
      | E_end_tag tag -> tag
      | ev ->
          self#error (sprintf "Expected end tag, found %s"
            (string_of_event ev));
          foo_qname (* useless *)

516
  end
517

518
  (** {2 API} *)
519

520
let validate_element decl schema value =
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
  validate_element (new context ~stream:(stream_of_value value) ~schema) decl

let validate_type def schema value =
  match def with
  | AnyType -> value  (* shortcut *)
  | Simple st_def ->
      if not (is_str value) then
        validation_error
          "Only string values could be validate against simple types";
      validate_simple_type st_def value  (* shortcut *)
  | Complex ct_def ->
      let context = new context ~stream:(stream_of_value value) ~schema in
      let start_tag = context#expect_any_start_tag in
      let (attrs, content) = validate_complex_type context ct_def in
      let end_tag = context#expect_any_end_tag in
      assert (start_tag = end_tag);
      let (ns, name) = start_tag in
      Value.Xml (Value.Atom (Atoms.V.mk ns name), attrs, content)

let validate_attribute decl schema value =
541
  assert false; (* TODO see the .mli *)
542
543
544
545
546
547
548
549
550
551
552
553
  (match value with
  | Record _ -> ()
  | _ ->
      validation_error
        "Only record values could be validated against attributes");
  let (name, st_def, constr) = decl in
  let qname = (schema.targetNamespace, name) in
  let fields = Value.get_fields value in
  let found = ref false in
  let rec aux = function
    | [] -> []
    | (qname', value) :: rest when qname' = qname ->
554
        found := true;
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
        (qname', validate_simple_type st_def value) :: aux rest
    | field :: rest -> field :: aux rest

  in
  let fields = aux (Value.get_fields value) in
  let fields =
    if not !found then
      match constr with
      | Some (`Default v) -> (qname, v) :: fields
      | _ ->
          validation_error (sprintf
            "Attribute %s was not found and no default value was provided"
            (Ns.QName.to_string qname))
    else
      fields
  in
  Value.vrecord fields

573
let validate_attribute_group { ag_def = attr_uses } schema value =
574
575
576
577
  let stream =
    match value with
    | Record _ ->
        Stream.of_list
578
          ((List.map
579
580
            (fun (qname, v) ->
              E_attribute (qname, fst (Value.get_string_utf8 v)))
581
582
            (Value.get_fields value)) @
            [ foo_event ])
583
584
585
586
587
588
    | _ ->
        validation_error
          "Only record values could be validated against attribute groups"
  in
  validate_attribute_uses (new context ~stream ~schema) attr_uses

589
let validate_model_group { mg_def = mg } schema value =
590
591
592
  if not (Value.is_seq value) then
    validation_error
      "Only sequence values could be validated against model groups";
593
  let stream = stream_of_value (Value.Xml (foo_atom, empty_record, value)) in
594
  Stream.junk stream;
595
  validate_model_group (new context ~stream ~schema) mg
596