schema_validator.ml 17.7 KB
Newer Older
1
let debug = false
2

3
open Printf
4

5
open Encodings
6
open Schema_pcre
7
open Schema_common
8
open Schema_types
9
open Value
10

11
12
module QTable = Hashtbl.Make(Ns.QName)

13
14
let ppf = Format.std_formatter

15
16
  (** {2 Misc} *)

17
let empty_string = string_utf8 (Utf8.mk "")
18
19
20
let empty_record = Value.vrecord []
let foo_atom = Value.Atom (Atoms.V.mk_ascii "foo")
let foo_event = E_char_data (Utf8.mk "")
21

22
23
let qtable_is_empty tbl =
  try QTable.iter (fun _ _ -> raise Exit) tbl; true
24
  with Exit -> false
25
26
27
28

let string_of_value value =
  let buf = Buffer.create 1024 in
  let fmt = Format.formatter_of_buffer buf in
29
  Format.fprintf fmt "%a@?" Value.print value;
30
31
32
33
  Buffer.contents buf

let foo_qname = Ns.empty, Utf8.mk ""

34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
type context = {
  ctx_stream: event Stream.t;
  ctx_schema: schema;

  mutable ctx_mixed: bool;
  mutable ctx_current: Value.t;
}

let subctx mixed ctx = { ctx with ctx_current = Value.nil; ctx_mixed = mixed }

let get ctx = ctx.ctx_current

let rec only_ws s i =
  (i = 0) ||
  (let i = pred i in match (String.unsafe_get s i) with
     | ' ' | '\t' | '\n' | '\r' -> only_ws s i
     | _ -> false)

let only_ws s =
  let s = Utf8.get_str s in
  only_ws s (String.length s)

let error s = raise (XSI_validation_error s)

let concat ctx v = ctx.ctx_current <- Value.concat ctx.ctx_current v
let append ctx v = ctx.ctx_current <- Value.append ctx.ctx_current v

let xml qname attrs content =
  Value.Xml (Value.Atom (Atoms.V.of_qname qname), attrs, content)


let peek ctx = 
  match Stream.peek ctx.ctx_stream with
    | None -> error "Unexpected end of stream"
    | Some e -> e

let next ctx =
  try Stream.next ctx.ctx_stream
  with Stream.Failure -> error "Unexpected end of stream"

let junk ctx =
  Stream.junk ctx.ctx_stream

let get_string ctx =
  let b = Buffer.create 67 in
  let rec aux () =
    match peek ctx with
      | E_char_data s ->
          junk ctx;
	  Buffer.add_string b (Utf8.get_str s);
	  aux ()
      | E_start_tag _ ->
	  error "XML element found in simple content"
      | _ -> ()
  in
  aux ();
  Utf8.mk (Buffer.contents b)

let rec copy_pcdata ctx =
  match peek ctx with
    | E_char_data s ->
        junk ctx;
	concat ctx (string_utf8 s);
	copy_pcdata ctx
    | _ -> ()

let rec ignore_ws ctx =
  match peek ctx with
    | E_char_data s when only_ws s -> 
	junk ctx;
	ignore_ws ctx
    | E_char_data _ -> 
        error "Unexpected char data in non-mixed content"
    | _ -> ()

let do_pcdata ctx =
  if ctx.ctx_mixed then copy_pcdata ctx else ignore_ws ctx

let expect_end_tag ctx =
  match next ctx with
    | E_end_tag _ -> ()
    | ev -> error (sprintf "Expected end tag, found %s" (string_of_event ev))

117
118
119
120
121
122
let check_nil ctx =
  match peek ctx with
    | E_end_tag _ -> ()
    | ev -> error (sprintf "Non-empty content with xsi:nil set : %s" 
		     (string_of_event ev))

123
124
125
126
127
128
let expect_start_tag ctx tag =
  match next ctx with
    | E_start_tag t when Ns.QName.equal t tag -> ()
    | ev -> error (sprintf "Expected tag %s, found %s" 
		     (Ns.QName.to_string tag) (string_of_event ev))

129

130
131
132
133
134
135
let expect_any_start_tag ctx =
  match next ctx with
    | E_start_tag t -> t
    | ev -> error (sprintf "Expected start tag, found %s" 
		     (string_of_event ev))

136
137
138
139
140
141
142
143
144
145
146
type attrs = {
  xsi_nil: bool;
  attrs: (Ns.qname * Utf8.t) list
}

let get_bool v = 
  match Utf8.get_str v with
    | "true" | "1" -> true
    | "false" | "0" -> false
    | _ -> failwith "Invalid boolean value"

147
let get_attributes ctx =
148
  let rec aux attrs nil =
149
    match peek ctx with
150
151
152
153
154
155
156
      | E_attribute (qname,value) when Ns.QName.equal qname xsi_nil_qname ->
	  junk ctx;
	  aux attrs (get_bool value)
      | E_attribute (qname, value) ->
          junk ctx;
          aux ((qname,value)::attrs) nil
      | _ -> { attrs = attrs; xsi_nil = nil }
157
  in
158
  aux [] false
159
160

let rec tries funs arg =
161
162
163
  match funs with
  | [] -> raise Not_found
  | f :: tl ->
164
      try f arg
165
166
      with XSI_validation_error _ ->
        tries tl arg
167
168
169
170
171
172

let space_RE = pcre_regexp " "
let split = pcre_split ~rex:space_RE

  (** {2 Facets validation} *)

173
174
175
176
177
178
179
module Schema_facets:
sig
  exception Facet_error of string
  val facets_valid: facets -> Value.t -> unit
end
=
struct
180
181
182
183
184
185
186
187
188
189
190
191

  open Big_int
  open Value

  exception Facet_error of string

    (* compute the length of a particular CDuce *)
    (* STRONG ASSUMPTION: v is a CDuce value built via "validate_simple_type"
     * function below, thus it contains no sequence of characters, but strings
     * and no Concat, but just Pair *)
  let length v =
    let rec aux acc = function
192
193
      | Pair (_, rest) -> aux (succ acc) rest
      | _ -> 0
194
    in
195
    aux 0 v
196
197

  let length_valid len value =
198
199
    if (length value != len) 
    then raise (Facet_error "length")
200
  let minLength_valid min_len value =
201
202
    if (length value < min_len)
    then raise (Facet_error "minLength")
203
  let maxLength_valid max_len value =
204
205
    if (length value > max_len) 
    then raise (Facet_error "maxLength")
206
207

  let enumeration_valid enum value =
208
    if not (List.exists (fun x -> Value.equal value x) enum) 
209
    then raise (Facet_error "enumeration")
210
211

  let maxInclusive_valid max_inc value =
212
    if value |>| (max_inc) then raise (Facet_error "maxInclusive")
213
  let maxExclusive_valid max_exc value =
214
    if value |>=| (max_exc) then raise (Facet_error "maxExclusive")
215
  let minInclusive_valid min_inc value =
216
    if value |<| (min_inc) then raise (Facet_error "minInclusive")
217
  let minExclusive_valid min_exc value =
218
    if value |<=| (min_exc) then raise (Facet_error "minInclusive")
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263

    (* check facets validaty rules other than pattern and whiteSpace. "value"
    parameter should already be white space normalized and pattern valid.
    Assumption: facets set contains only facets that are applicable to the type
    of value *)
  let facets_valid facets value =
    (* TODO efficiency *)
    (* all facets are always checked, but we know that in some cases only some
     * of them can be present; e.g. if variety is union only pattern and
     * enumeration are possibles ... *)
    (match facets.length with
    | None ->
        (match facets.minLength with
        | None -> ()
        | Some (len, _) -> minLength_valid len value);
        (match facets.maxLength with
        | None -> ()
        | Some (len, _) -> maxLength_valid len value);
    | Some (len, _) -> length_valid len value);
    (match facets.enumeration with
    | None -> ()
    | Some enum -> enumeration_valid enum value);
    (match facets.maxInclusive with
    | None -> ()
    | Some (lim, _) -> maxInclusive_valid lim value);
    (match facets.maxExclusive with
    | None -> ()
    | Some (lim, _) -> maxExclusive_valid lim value);
    (match facets.minInclusive with
    | None -> ()
    | Some (lim, _) -> minInclusive_valid lim value);
    (match facets.minExclusive with
    | None -> ()
    | Some (lim, _) -> minExclusive_valid lim value);
  (*
    (match facets.totalDigits with
    | None -> ()
    | Some (dig, _) -> totalDigits_valid dig value);
    (match facets.fractionDigits with
    | None -> ()
    | Some (dig, _) -> fractionDigits_valid dig value)
  *)

end

264
  (** {2 Simple type validation} *)
265

266
267
268
269
270
let rec validate_simple_type def s = match def with
  | { st_name = Some name } when Schema_builtin.is name ->
      (try Schema_builtin.validate (Schema_builtin.get name) s
       with Schema_builtin.Error name ->
	 error (sprintf "%s isn't a valid %s"
271
		  (Utf8.to_string s) name))
272
  | { st_variety = Atomic st; st_facets = facets } ->
273
      let literal = normalize_white_space (fst facets.whiteSpace) s in
274
      let value = validate_simple_type st literal in
275
276
      Schema_facets.facets_valid facets value;
      value
277
  | { st_variety = List item; st_facets = facets } ->
278
      let literal = normalize_white_space (fst facets.whiteSpace) s in
279
      let items = List.map (validate_simple_type item) (split literal) in
280
281
282
      let value = Value.sequence items in
      Schema_facets.facets_valid facets value;
      value
283
284
  | { st_variety = Union members; st_facets = facets } ->
      let value = tries (List.map validate_simple_type members) s in
285
286
287
      Schema_facets.facets_valid facets value;
      value

288
289
let validate_simple_type_wrapper ctx st_def =
  validate_simple_type st_def (get_string ctx)
290
291
292

  (** {2 Complex type validation} *)

293
let rec validate_any_type ctx =
294
  (* assumption: attribute events (if any) come first *)
295
  let attrs = get_attributes ctx in
296
  let attrs = List.map (fun (n,v) -> (n,Value.string_utf8 v)) attrs.attrs in
297
298
299
300
301

  let ctx = subctx true ctx in
  let rec aux attrs =
    copy_pcdata ctx;
    match peek ctx with
302
    | E_start_tag qname ->
303
304
305
306
        junk ctx;
        let (attrs, content) = validate_any_type ctx in
        expect_end_tag ctx;
	append ctx (xml qname attrs content);
307
        aux ()
308
309
    | E_end_tag _ -> ()
    | _ -> assert false
310
  in
311
312
  aux ();
  (Value.vrecord attrs, get ctx)
313

314
315
let validate_wildcard ctx w =
  let qname = expect_any_start_tag ctx in
316
  if not (Atoms.contains (Atoms.V.of_qname qname) w.wild_first)
317
318
319
320
321
322
  then error (sprintf "Tag %s is not accepted by the wildcard" 
		(Ns.QName.to_string qname));
  let (attrs, content) = validate_any_type ctx in
  expect_end_tag ctx;
  xml qname attrs content

323
let check_fixed fixed value =
324
  if not (Value.equal fixed value) then
325
    error (sprintf "Expected fixed value: %s; found %s"
326
327
      (string_of_value fixed) (string_of_value value))

328

329
330
331

let next_tag ctx =
  match peek ctx with
332
333
    | E_start_tag qname -> qname
    | _ -> raise Not_found
334

335
let validate_attribute_uses attrs attr_uses =
336
  let tbl = QTable.create 11 in
337
  List.iter
338
    (fun use -> QTable.add tbl (name_of_attribute_use use) use)
339
    attr_uses;
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
  let attribs = ref [] in
  List.iter
    (fun (qname, value) ->
       let { attr_decl =  { attr_typdef = st_def };
	     attr_use_cstr = constr } =
         try QTable.find tbl qname
         with Not_found ->
           error (sprintf "Unexpected attribute: %s"
		    (Ns.QName.to_string qname))
       in
       let value = validate_simple_type st_def value in
       (match constr with  (* check fixed constraint *)
          | Some (`Fixed v) -> check_fixed v value
          | _ -> ());
       QTable.remove tbl qname;
       attribs := (qname, value) :: !attribs
    ) attrs.attrs;
  if attrs.xsi_nil then
    attribs := (xsi_nil_qname, Value.vtrue) :: !attribs;
359
  QTable.iter
360
    (fun qname at ->
361
362
363
364
365
       if at.attr_required then  (* check for missing required attributes *)
         error (sprintf "Required attribute %s is missing"
		  (Ns.QName.to_string qname))
       else  (* add default values *)
         match at.attr_use_cstr with
366
           | Some (`Default v) -> attribs := (qname, v) :: !attribs
367
           | _ -> ())
368
    tbl;
369
  Value.vrecord !attribs
370

371
372
let rec validate_element ctx elt =
  expect_start_tag ctx elt.elt_name;
373
374
375
376
  let attrs = get_attributes ctx in
  if (attrs.xsi_nil && not elt.elt_nillable) then
    error "xsi:nil attribute on non-nillable element";
  let (attrs, content) = validate_type_ref ctx attrs elt.elt_typdef in
377
  let content = (* use default if needed and check fixed constraints *)
378
    match elt.elt_cstr with
379
    | Some (`Default v) when Value.equal content empty_string -> v
380
    | Some (`Fixed v) -> check_fixed v content; content
381
382
    | _ -> content
  in
383
384
  expect_end_tag ctx;
  xml elt.elt_name attrs content
385

386
and validate_type ctx attrs = function
387
  | AnyType -> validate_any_type ctx
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
  | Simple st_def ->
      if (List.length attrs.attrs > 0) then
	error "attribute on element with simple content";
      if attrs.xsi_nil then (check_nil ctx; 
			     Value.vrecord [xsi_nil_qname,Value.vtrue],
			     Value.nil)
      else (empty_record, validate_simple_type_wrapper ctx st_def)
  | Complex ct_def -> validate_complex_type ctx attrs ct_def

and validate_type_ref ctx attrs x =
  validate_type ctx attrs (Lazy.force x)

and validate_complex_type ctx attrs ct =
  let content =
    if attrs.xsi_nil then (check_nil ctx; Value.nil)
    else validate_content_type ctx ct.ct_content in
  let attrs = validate_attribute_uses attrs ct.ct_attrs in
405
406
  (attrs, content)

407
and validate_content_type ctx content_type =
408
  match content_type with
409
410
411
  | CT_empty -> 
      Value.nil
  | CT_simple st_def -> 
412
      Value.sequence [ validate_simple_type_wrapper ctx st_def ]
413
  | CT_model (particle, mixed) ->
414
415
416
417
      let mixold = ctx.ctx_mixed in
      let ctx = subctx mixed ctx in
      validate_particle ctx particle;
      get ctx
418

419
and validate_particle ctx particle =
420
421
422
423
424
425
426
(*
  Format.fprintf ppf "Particle first";
  List.iter (fun n -> Format.fprintf ppf "%a;" Ns.QName.print n) 
    particle.part_first;
  Format.fprintf ppf "@.";
*)

427
  let rec validate_once ~cont_ok ~cont_failure =
428
429
    do_pcdata ctx;
    match peek ctx with
430
    | E_start_tag qname 
431
	when Atoms.contains (Atoms.V.of_qname qname) particle.part_first ->
432
	validate_term ctx particle.part_term;
433
        cont_ok ()
434
435
    | ev -> 
	cont_failure ev
436
437
  in
  let rec required = function
438
439
    | 0 -> ()
    | n ->
440
        validate_once
441
          ~cont_ok:(fun () -> required (pred n))
442
          ~cont_failure:(fun event ->
443
444
445
446
			   if particle.part_nullable then ()
			   else
			     error ~ctx (sprintf "Unexpected content: %s"
					   (string_of_event event)))
447
448
449
450
451
452
  in
  let rec optional = function
    | None ->
        validate_once
          ~cont_ok:(fun () -> optional None)
          ~cont_failure:(fun _ -> ())
453
454
    | Some 0 -> ()
    | Some n ->
455
        validate_once
456
          ~cont_ok:(fun () -> optional (Some (pred n)))
457
458
          ~cont_failure:(fun _ -> ())
  in
459
  required particle.part_min;
460
  optional
461
462
463
    (match particle.part_max with 
       | None -> None 
       | Some v -> Some (v - particle.part_min));
464
  do_pcdata ctx
465

466
and validate_term ctx term =
467
  match term with
468
469
470
  | Elt elt -> append ctx (validate_element ctx elt)
  | Model mg -> validate_model_group ctx mg
  | Wildcard w -> append ctx (validate_wildcard ctx w)
471

472
and validate_choice ctx particles =
473
(* TODO: Handle case when one of the choices is nullable *)
474
475
  let tbl = Atoms.mk_map
    (List.map (fun p -> first_of_particle p, p) particles) in
476
  do_pcdata ctx;
477
  try
478
479
    (match peek ctx with
       | E_start_tag qname -> 
480
	   let particle = Atoms.get_map (Atoms.V.of_qname qname) tbl in
481
482
	   validate_particle ctx particle
       | _ -> raise Not_found)
483
  with Not_found ->
484
    error (sprintf "Cannot choose branch of choice group")
485

486
and validate_all_group ctx particles =
487
  let tbl = QTable.create 20 in
488
489
490
  let slots = List.map (fun p -> (p, ref None)) particles in
  let tbl = Atoms.mk_map
    (List.map (fun (p,slot) -> first_of_particle p, (p,slot)) slots) in
491
492
493
  
  let contents = ref Value.nil in
  let rec aux () =
494
495
496
    match peek ctx with
      | E_start_tag qname -> 
	  let qname = next_tag ctx in
497
	  let p,slot = Atoms.get_map (Atoms.V.of_qname qname) tbl in
498
499
500
501
502
503
504
	  (match !slot with
	    | Some x -> ()
	    | None -> 
		let ctx = subctx ctx.ctx_mixed ctx in
		validate_particle ctx p;
		slot := Some (get ctx); aux ())
      | _ -> ()
505
  in
506
507
508
  do_pcdata ctx;
  aux ();
  List.iter
509
    (fun (p,slot) ->
510
       match !slot with
511
	 | Some x -> concat ctx x
512
	 | None when nullable p -> ()
513
514
	 | None -> error "One particle of the all group is missing"
    ) slots
515

516

517
and validate_model_group ctx model_group =
518
  match model_group with
519
520
521
  | All particles -> validate_all_group ctx particles
  | Choice particles -> validate_choice ctx particles
  | Sequence particles -> List.iter (validate_particle ctx) particles
522
523


524
525
526
527
528
let ctx stream schema =
  { ctx_stream = stream;
    ctx_schema = schema;
    ctx_mixed = false;
    ctx_current = Value.Absent }
529

530
531
532
let validate_element decl schema value =
  let ctx = ctx (stream_of_value value) schema in
  validate_element ctx decl
533

534
535
536
537
538
let get_str v = 
  if not (is_str v) then
    error
      "Only string values could be validate against simple types";
  fst (get_string_utf8 v)
539

540
541
542
let validate_type def schema value =
  match def with
  | AnyType -> value  (* shortcut *)
543
  | Simple st_def -> validate_simple_type st_def (get_str value)
544
  | Complex ct_def ->
545
546
      let ctx = ctx (stream_of_value value) schema in
      let start_tag = expect_any_start_tag ctx in
547
548
      let attrs = get_attributes ctx in
      let (attrs, content) = validate_complex_type ctx attrs ct_def in
549
550
      expect_end_tag ctx;
      Value.Xml (Value.Atom (Atoms.V.of_qname start_tag), attrs, content)
551

552
(*
553
let validate_attribute decl schema value =
554
  assert false; (* TODO see the .mli *)
555
556
557
  (match value with
  | Record _ -> ()
  | _ ->
558
      error
559
560
561
562
563
564
565
566
        "Only record values could be validated against attributes");
  let (name, st_def, constr) = decl in
  let qname = (schema.targetNamespace, name) in
  let fields = Value.get_fields value in
  let found = ref false in
  let rec aux = function
    | [] -> []
    | (qname', value) :: rest when qname' = qname ->
567
        found := true;
568
569
570
571
572
573
574
575
576
577
        (qname', validate_simple_type st_def value) :: aux rest
    | field :: rest -> field :: aux rest

  in
  let fields = aux (Value.get_fields value) in
  let fields =
    if not !found then
      match constr with
      | Some (`Default v) -> (qname, v) :: fields
      | _ ->
578
          error (sprintf
579
580
581
582
583
584
            "Attribute %s was not found and no default value was provided"
            (Ns.QName.to_string qname))
    else
      fields
  in
  Value.vrecord fields
585
*)
586

587
let validate_attribute_group { ag_def = attr_uses } schema value =
588
589
590
591
  let stream =
    match value with
    | Record _ ->
        Stream.of_list
592
          ((List.map
593
594
            (fun (qname, v) ->
              E_attribute (qname, fst (Value.get_string_utf8 v)))
595
596
            (Value.get_fields value)) @
            [ foo_event ])
597
    | _ ->
598
        error
599
600
          "Only record values could be validated against attribute groups"
  in
601
602
603
  let ctx = ctx stream schema in
  let attrs = get_attributes ctx in
  validate_attribute_uses attrs attr_uses
604

605

606
let validate_model_group { mg_def = mg } schema value =
607
  if not (Value.is_seq value) then
608
    error
609
      "Only sequence values could be validated against model groups";
610
  let stream = stream_of_value (Value.Xml (foo_atom, empty_record, value)) in
611
  Stream.junk stream;
612
613
614
615
  let ctx = ctx stream schema in
  validate_model_group ctx mg;
  get ctx

616