schema_validator.ml 16.6 KB
Newer Older
1
let debug = false
2

3
open Printf
4

5
open Encodings
6
open Schema_pcre
7
open Schema_common
8
open Schema_types
9
open Value
10

11
12
module QTable = Hashtbl.Make(Ns.QName)

13
14
let ppf = Format.std_formatter

15
16
  (** {2 Misc} *)

17
let empty_string = string_utf8 (Utf8.mk "")
18
19
20
let empty_record = Value.vrecord []
let foo_atom = Value.Atom (Atoms.V.mk_ascii "foo")
let foo_event = E_char_data (Utf8.mk "")
21

22
23
let qtable_is_empty tbl =
  try QTable.iter (fun _ _ -> raise Exit) tbl; true
24
  with Exit -> false
25
26
27
28

let string_of_value value =
  let buf = Buffer.create 1024 in
  let fmt = Format.formatter_of_buffer buf in
29
  Format.fprintf fmt "%a@?" Value.print value;
30
31
32
33
  Buffer.contents buf

let foo_qname = Ns.empty, Utf8.mk ""

34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
type context = {
  ctx_stream: event Stream.t;
  ctx_schema: schema;

  mutable ctx_mixed: bool;
  mutable ctx_current: Value.t;
}

let subctx mixed ctx = { ctx with ctx_current = Value.nil; ctx_mixed = mixed }

let get ctx = ctx.ctx_current

let rec only_ws s i =
  (i = 0) ||
  (let i = pred i in match (String.unsafe_get s i) with
     | ' ' | '\t' | '\n' | '\r' -> only_ws s i
     | _ -> false)

let only_ws s =
  let s = Utf8.get_str s in
  only_ws s (String.length s)

let error s = raise (XSI_validation_error s)

let concat ctx v = ctx.ctx_current <- Value.concat ctx.ctx_current v
let append ctx v = ctx.ctx_current <- Value.append ctx.ctx_current v

let xml qname attrs content =
  Value.Xml (Value.Atom (Atoms.V.of_qname qname), attrs, content)


let peek ctx = 
  match Stream.peek ctx.ctx_stream with
    | None -> error "Unexpected end of stream"
    | Some e -> e

let next ctx =
  try Stream.next ctx.ctx_stream
  with Stream.Failure -> error "Unexpected end of stream"

let junk ctx =
  Stream.junk ctx.ctx_stream

let get_string ctx =
  let b = Buffer.create 67 in
  let rec aux () =
    match peek ctx with
      | E_char_data s ->
          junk ctx;
	  Buffer.add_string b (Utf8.get_str s);
	  aux ()
      | E_start_tag _ ->
	  error "XML element found in simple content"
      | _ -> ()
  in
  aux ();
  Utf8.mk (Buffer.contents b)

let rec copy_pcdata ctx =
  match peek ctx with
    | E_char_data s ->
        junk ctx;
	concat ctx (string_utf8 s);
	copy_pcdata ctx
    | _ -> ()

let rec ignore_ws ctx =
  match peek ctx with
    | E_char_data s when only_ws s -> 
	junk ctx;
	ignore_ws ctx
    | E_char_data _ -> 
        error "Unexpected char data in non-mixed content"
    | _ -> ()

let do_pcdata ctx =
  if ctx.ctx_mixed then copy_pcdata ctx else ignore_ws ctx

let expect_end_tag ctx =
  match next ctx with
    | E_end_tag _ -> ()
    | ev -> error (sprintf "Expected end tag, found %s" (string_of_event ev))

let expect_start_tag ctx tag =
  match next ctx with
    | E_start_tag t when Ns.QName.equal t tag -> ()
    | ev -> error (sprintf "Expected tag %s, found %s" 
		     (Ns.QName.to_string tag) (string_of_event ev))

123

124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
let expect_any_start_tag ctx =
  match next ctx with
    | E_start_tag t -> t
    | ev -> error (sprintf "Expected start tag, found %s" 
		     (string_of_event ev))

let get_attributes ctx =
  let rec aux attrs =
    match peek ctx with
    | E_attribute (qname, value) ->
        junk ctx;
        aux ((qname,value)::attrs)
    | _ -> attrs
  in
  aux []

let rec tries funs arg =
141
142
143
  match funs with
  | [] -> raise Not_found
  | f :: tl ->
144
      try f arg
145
146
      with XSI_validation_error _ ->
        tries tl arg
147
148
149
150
151
152

let space_RE = pcre_regexp " "
let split = pcre_split ~rex:space_RE

  (** {2 Facets validation} *)

153
154
155
156
157
158
159
module Schema_facets:
sig
  exception Facet_error of string
  val facets_valid: facets -> Value.t -> unit
end
=
struct
160
161
162
163
164
165
166
167
168
169
170
171

  open Big_int
  open Value

  exception Facet_error of string

    (* compute the length of a particular CDuce *)
    (* STRONG ASSUMPTION: v is a CDuce value built via "validate_simple_type"
     * function below, thus it contains no sequence of characters, but strings
     * and no Concat, but just Pair *)
  let length v =
    let rec aux acc = function
172
173
      | Pair (_, rest) -> aux (succ acc) rest
      | _ -> 0
174
    in
175
    aux 0 v
176
177

  let length_valid len value =
178
179
    if (length value != len) 
    then raise (Facet_error "length")
180
  let minLength_valid min_len value =
181
182
    if (length value < min_len)
    then raise (Facet_error "minLength")
183
  let maxLength_valid max_len value =
184
185
    if (length value > max_len) 
    then raise (Facet_error "maxLength")
186
187

  let enumeration_valid enum value =
188
    if not (List.exists (fun x -> Value.equal value x) enum) 
189
    then raise (Facet_error "enumeration")
190
191

  let maxInclusive_valid max_inc value =
192
    if value |>| (max_inc) then raise (Facet_error "maxInclusive")
193
  let maxExclusive_valid max_exc value =
194
    if value |>=| (max_exc) then raise (Facet_error "maxExclusive")
195
  let minInclusive_valid min_inc value =
196
    if value |<| (min_inc) then raise (Facet_error "minInclusive")
197
  let minExclusive_valid min_exc value =
198
    if value |<=| (min_exc) then raise (Facet_error "minInclusive")
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243

    (* check facets validaty rules other than pattern and whiteSpace. "value"
    parameter should already be white space normalized and pattern valid.
    Assumption: facets set contains only facets that are applicable to the type
    of value *)
  let facets_valid facets value =
    (* TODO efficiency *)
    (* all facets are always checked, but we know that in some cases only some
     * of them can be present; e.g. if variety is union only pattern and
     * enumeration are possibles ... *)
    (match facets.length with
    | None ->
        (match facets.minLength with
        | None -> ()
        | Some (len, _) -> minLength_valid len value);
        (match facets.maxLength with
        | None -> ()
        | Some (len, _) -> maxLength_valid len value);
    | Some (len, _) -> length_valid len value);
    (match facets.enumeration with
    | None -> ()
    | Some enum -> enumeration_valid enum value);
    (match facets.maxInclusive with
    | None -> ()
    | Some (lim, _) -> maxInclusive_valid lim value);
    (match facets.maxExclusive with
    | None -> ()
    | Some (lim, _) -> maxExclusive_valid lim value);
    (match facets.minInclusive with
    | None -> ()
    | Some (lim, _) -> minInclusive_valid lim value);
    (match facets.minExclusive with
    | None -> ()
    | Some (lim, _) -> minExclusive_valid lim value);
  (*
    (match facets.totalDigits with
    | None -> ()
    | Some (dig, _) -> totalDigits_valid dig value);
    (match facets.fractionDigits with
    | None -> ()
    | Some (dig, _) -> fractionDigits_valid dig value)
  *)

end

244
  (** {2 Simple type validation} *)
245

246
247
248
249
250
let rec validate_simple_type def s = match def with
  | { st_name = Some name } when Schema_builtin.is name ->
      (try Schema_builtin.validate (Schema_builtin.get name) s
       with Schema_builtin.Error name ->
	 error (sprintf "%s isn't a valid %s"
251
		  (Utf8.to_string s) name))
252
  | { st_variety = Atomic st; st_facets = facets } ->
253
      let literal = normalize_white_space (fst facets.whiteSpace) s in
254
      let value = validate_simple_type st literal in
255
256
      Schema_facets.facets_valid facets value;
      value
257
  | { st_variety = List item; st_facets = facets } ->
258
      let literal = normalize_white_space (fst facets.whiteSpace) s in
259
      let items = List.map (validate_simple_type item) (split literal) in
260
261
262
      let value = Value.sequence items in
      Schema_facets.facets_valid facets value;
      value
263
264
  | { st_variety = Union members; st_facets = facets } ->
      let value = tries (List.map validate_simple_type members) s in
265
266
267
      Schema_facets.facets_valid facets value;
      value

268
269
let validate_simple_type_wrapper ctx st_def =
  validate_simple_type st_def (get_string ctx)
270
271
272

  (** {2 Complex type validation} *)

273
let rec validate_any_type ctx =
274
  (* assumption: attribute events (if any) come first *)
275
276
277
278
279
280
281
  let attrs = get_attributes ctx in
  let attrs = List.map (fun (n,v) -> (n,Value.string_utf8 v)) attrs in

  let ctx = subctx true ctx in
  let rec aux attrs =
    copy_pcdata ctx;
    match peek ctx with
282
    | E_start_tag qname ->
283
284
285
286
        junk ctx;
        let (attrs, content) = validate_any_type ctx in
        expect_end_tag ctx;
	append ctx (xml qname attrs content);
287
        aux ()
288
289
    | E_end_tag _ -> ()
    | _ -> assert false
290
  in
291
292
  aux ();
  (Value.vrecord attrs, get ctx)
293

294
295
let validate_wildcard ctx w =
  let qname = expect_any_start_tag ctx in
296
  if not (Atoms.contains (Atoms.V.of_qname qname) w.wild_first)
297
298
299
300
301
302
  then error (sprintf "Tag %s is not accepted by the wildcard" 
		(Ns.QName.to_string qname));
  let (attrs, content) = validate_any_type ctx in
  expect_end_tag ctx;
  xml qname attrs content

303
let check_fixed ~ctx fixed value =
304
  if not (Value.equal fixed value) then
305
    error ~ctx (sprintf "Expected fixed value: %s; found %s"
306
307
      (string_of_value fixed) (string_of_value value))

308

309
310
311

let next_tag ctx =
  match peek ctx with
312
313
    | E_start_tag qname -> qname
    | _ -> raise Not_found
314

315
let validate_attribute_uses ctx attr_uses =
316
  let tbl = QTable.create 11 in
317
  List.iter
318
    (fun use -> QTable.add tbl (name_of_attribute_use use) use)
319
    attr_uses;
320
321
322
323
324
325
326
327
328
329
  let attrs =
    List.map
      (fun (qname, value) ->
	 let { attr_decl =  { attr_typdef = st_def };
	       attr_use_cstr = constr } =
           try QTable.find tbl qname
           with Not_found ->
             error (sprintf "Unexpected attribute: %s"
		      (Ns.QName.to_string qname))
	 in
330
	 let value = validate_simple_type st_def value in
331
	 (match constr with  (* check fixed constraint *)
332
            | Some (`Fixed v) -> check_fixed ~ctx v value
333
334
335
336
            | _ -> ());
	 QTable.remove tbl qname;
	 (qname, value)
      ) (get_attributes ctx);
337
  in
338
  let attrs = ref attrs in
339
  QTable.iter
340
    (fun qname at ->
341
342
343
344
345
       if at.attr_required then  (* check for missing required attributes *)
         error (sprintf "Required attribute %s is missing"
		  (Ns.QName.to_string qname))
       else  (* add default values *)
         match at.attr_use_cstr with
346
           | Some (`Default v) -> attrs := (qname, v) :: !attrs
347
           | _ -> ())
348
349
350
    tbl;
  Value.vrecord !attrs

351
352
353
let rec validate_element ctx elt =
  expect_start_tag ctx elt.elt_name;
  let (attrs, content) = validate_type_ref ctx elt.elt_typdef in
354
  let content = (* use default if needed and check fixed constraints *)
355
    match elt.elt_cstr with
356
    | Some (`Default v) when Value.equal content empty_string -> v
357
    | Some (`Fixed v) ->
358
        check_fixed ~ctx v content;
359
360
361
        content
    | _ -> content
  in
362
363
  expect_end_tag ctx;
  xml elt.elt_name attrs content
364

365
366
367
368
and validate_type ctx = function
  | AnyType -> validate_any_type ctx
  | Simple st_def -> (empty_record, validate_simple_type_wrapper ctx st_def)
  | Complex ct_def -> validate_complex_type ctx ct_def
369

370
371
and validate_type_ref ctx x =
  validate_type ctx (Lazy.force x)
372

373
374
375
and validate_complex_type ctx ct =
  let attrs = validate_attribute_uses ctx ct.ct_attrs in
  let content = validate_content_type ctx ct.ct_content in
376
377
  (attrs, content)

378
and validate_content_type ctx content_type =
379
  match content_type with
380
381
382
  | CT_empty -> 
      Value.nil
  | CT_simple st_def -> 
383
      Value.sequence [ validate_simple_type_wrapper ctx st_def ]
384
  | CT_model (particle, mixed) ->
385
386
387
388
      let mixold = ctx.ctx_mixed in
      let ctx = subctx mixed ctx in
      validate_particle ctx particle;
      get ctx
389

390
and validate_particle ctx particle =
391
392
393
394
395
396
397
(*
  Format.fprintf ppf "Particle first";
  List.iter (fun n -> Format.fprintf ppf "%a;" Ns.QName.print n) 
    particle.part_first;
  Format.fprintf ppf "@.";
*)

398
  let rec validate_once ~cont_ok ~cont_failure =
399
400
    do_pcdata ctx;
    match peek ctx with
401
    | E_start_tag qname 
402
	when Atoms.contains (Atoms.V.of_qname qname) particle.part_first ->
403
	validate_term ctx particle.part_term;
404
        cont_ok ()
405
406
    | ev -> 
	cont_failure ev
407
408
  in
  let rec required = function
409
410
    | 0 -> ()
    | n ->
411
        validate_once
412
          ~cont_ok:(fun () -> required (pred n))
413
          ~cont_failure:(fun event ->
414
415
416
417
			   if particle.part_nullable then ()
			   else
			     error ~ctx (sprintf "Unexpected content: %s"
					   (string_of_event event)))
418
419
420
421
422
423
  in
  let rec optional = function
    | None ->
        validate_once
          ~cont_ok:(fun () -> optional None)
          ~cont_failure:(fun _ -> ())
424
425
    | Some 0 -> ()
    | Some n ->
426
        validate_once
427
          ~cont_ok:(fun () -> optional (Some (pred n)))
428
429
          ~cont_failure:(fun _ -> ())
  in
430
  required particle.part_min;
431
  optional
432
433
434
    (match particle.part_max with 
       | None -> None 
       | Some v -> Some (v - particle.part_min));
435
  do_pcdata ctx
436

437
and validate_term ctx term =
438
  match term with
439
440
441
  | Elt elt -> append ctx (validate_element ctx elt)
  | Model mg -> validate_model_group ctx mg
  | Wildcard w -> append ctx (validate_wildcard ctx w)
442

443
and validate_choice ctx particles =
444
(* TODO: Handle case when one of the choices is nullable *)
445
446
  let tbl = Atoms.mk_map
    (List.map (fun p -> first_of_particle p, p) particles) in
447
  do_pcdata ctx;
448
  try
449
450
    (match peek ctx with
       | E_start_tag qname -> 
451
	   let particle = Atoms.get_map (Atoms.V.of_qname qname) tbl in
452
453
	   validate_particle ctx particle
       | _ -> raise Not_found)
454
  with Not_found ->
455
    error (sprintf "Cannot choose branch of choice group")
456

457
and validate_all_group ctx particles =
458
  let tbl = QTable.create 20 in
459
460
461
  let slots = List.map (fun p -> (p, ref None)) particles in
  let tbl = Atoms.mk_map
    (List.map (fun (p,slot) -> first_of_particle p, (p,slot)) slots) in
462
463
464
  
  let contents = ref Value.nil in
  let rec aux () =
465
466
467
    match peek ctx with
      | E_start_tag qname -> 
	  let qname = next_tag ctx in
468
	  let p,slot = Atoms.get_map (Atoms.V.of_qname qname) tbl in
469
470
471
472
473
474
475
	  (match !slot with
	    | Some x -> ()
	    | None -> 
		let ctx = subctx ctx.ctx_mixed ctx in
		validate_particle ctx p;
		slot := Some (get ctx); aux ())
      | _ -> ()
476
  in
477
478
479
  do_pcdata ctx;
  aux ();
  List.iter
480
    (fun (p,slot) ->
481
       match !slot with
482
	 | Some x -> concat ctx x
483
	 | None when nullable p -> ()
484
485
	 | None -> error "One particle of the all group is missing"
    ) slots
486

487

488
and validate_model_group ctx model_group =
489
  match model_group with
490
491
492
  | All particles -> validate_all_group ctx particles
  | Choice particles -> validate_choice ctx particles
  | Sequence particles -> List.iter (validate_particle ctx) particles
493
494


495
496
497
498
499
let ctx stream schema =
  { ctx_stream = stream;
    ctx_schema = schema;
    ctx_mixed = false;
    ctx_current = Value.Absent }
500

501
502
503
let validate_element decl schema value =
  let ctx = ctx (stream_of_value value) schema in
  validate_element ctx decl
504

505
506
507
508
509
let get_str v = 
  if not (is_str v) then
    error
      "Only string values could be validate against simple types";
  fst (get_string_utf8 v)
510

511
512
513
let validate_type def schema value =
  match def with
  | AnyType -> value  (* shortcut *)
514
  | Simple st_def -> validate_simple_type st_def (get_str value)
515
  | Complex ct_def ->
516
517
518
519
520
      let ctx = ctx (stream_of_value value) schema in
      let start_tag = expect_any_start_tag ctx in
      let (attrs, content) = validate_complex_type ctx ct_def in
      expect_end_tag ctx;
      Value.Xml (Value.Atom (Atoms.V.of_qname start_tag), attrs, content)
521

522
(*
523
let validate_attribute decl schema value =
524
  assert false; (* TODO see the .mli *)
525
526
527
  (match value with
  | Record _ -> ()
  | _ ->
528
      error
529
530
531
532
533
534
535
536
        "Only record values could be validated against attributes");
  let (name, st_def, constr) = decl in
  let qname = (schema.targetNamespace, name) in
  let fields = Value.get_fields value in
  let found = ref false in
  let rec aux = function
    | [] -> []
    | (qname', value) :: rest when qname' = qname ->
537
        found := true;
538
539
540
541
542
543
544
545
546
547
        (qname', validate_simple_type st_def value) :: aux rest
    | field :: rest -> field :: aux rest

  in
  let fields = aux (Value.get_fields value) in
  let fields =
    if not !found then
      match constr with
      | Some (`Default v) -> (qname, v) :: fields
      | _ ->
548
          error (sprintf
549
550
551
552
553
554
            "Attribute %s was not found and no default value was provided"
            (Ns.QName.to_string qname))
    else
      fields
  in
  Value.vrecord fields
555
*)
556

557
let validate_attribute_group { ag_def = attr_uses } schema value =
558
559
560
561
  let stream =
    match value with
    | Record _ ->
        Stream.of_list
562
          ((List.map
563
564
            (fun (qname, v) ->
              E_attribute (qname, fst (Value.get_string_utf8 v)))
565
566
            (Value.get_fields value)) @
            [ foo_event ])
567
    | _ ->
568
        error
569
570
          "Only record values could be validated against attribute groups"
  in
571
572
  validate_attribute_uses (ctx stream schema) attr_uses

573

574
let validate_model_group { mg_def = mg } schema value =
575
  if not (Value.is_seq value) then
576
    error
577
      "Only sequence values could be validated against model groups";
578
  let stream = stream_of_value (Value.Xml (foo_atom, empty_record, value)) in
579
  Stream.junk stream;
580
581
582
583
  let ctx = ctx stream schema in
  validate_model_group ctx mg;
  get ctx

584