schema_parser.ml 19.9 KB
Newer Older
1
open Printf
2

3
open Encodings
4
open Schema_pcre
5
open Schema_common
6
open Schema_types
7
8
open Schema_validator
open Schema_xml
9

10
11
module QTable = Hashtbl.Make(Ns.QName)

12
13
let validation_error s = raise (XSD_validation_error s)

14
15
16
17
18
19
20
21
22
23
24
25
26
27
let particle min max term first nullable =
  { part_min = min;
    part_max = max;
    part_term = term;
    part_first = first;
    part_nullable = nullable }

let particle_model min max mg =
  particle min max 
    (Model mg)
    (first_of_model_group mg)
    (nullable_of_model_group mg)


28
29
let xsd = Schema_xml.xsd

30
(*
31
32
33
let fake_type_def =
  Complex 
    { ct_uid = -1;
34
      ct_name = Some (xsd, Utf8.mk "   FAKE TYP   ");
35
36
37
38
39
40
      ct_typdef = AnyType;
      ct_deriv = `Restriction;
      ct_attrs = [];
      ct_content = CT_empty }
let fake_elt_decl = 
  { elt_uid = -2;
41
    elt_name = (xsd, Utf8.mk "   FAKE ELT   ");
42
    elt_typdef = ref fake_type_def;
43
44
45
    elt_cstr =  None }
let is_fake_type_def = (==) fake_type_def
let is_fake_elt_decl = (==) fake_elt_decl
46
47
*)

48
49
50
let (^^) x y = Utf8.concat x y

(* element and complex type constructors which take cares of unique id *)
51
52
let element, complex =
  let counter = ref 0 in
53
  let element name type_def constr =
54
    incr counter;
55
56
57
58
    { elt_uid = !counter;
      elt_name = name;
      elt_typdef = type_def;
      elt_cstr = constr }
59
60
61
  in
  let complex name (type_def: type_definition) deriv attrs ct =
    incr counter;
62
63
64
65
66
67
    { ct_uid = !counter;
      ct_name = name;
      ct_typdef = type_def;
      ct_deriv = deriv;
      ct_attrs = attrs;
      ct_content = ct }
68
69
  in
  (element, complex)
70

71
72
73
74
75
let space_RE = pcre_regexp " "
let split s = pcre_split ~rex:space_RE s

let unqualify s = snd (Ns.split_qname s)

76
let hashtbl_deref tbl =  QTable.fold (fun _ v acc -> (Lazy.force v) :: acc) tbl []
77
let hashtbl_values tbl = QTable.fold (fun _ v acc -> v :: acc) tbl []
78

79
let parse_facets base n =
80
81
  let validate_base_type v = 
    lazy (Schema_validator.validate_simple_type (get_simple_type base) v) in 
82
83
84
85
86
  let parse_nonneg n =
    let s = Utf8.get_str (_attr "value" n) in
    let i = int_of_string s in
    if (i < 0) then failwith "Unexpected negative integer";
    i
87
  in
88
  let aux facets n tag =
89
90
    let fixed = _is_attr "fixed" n "true" in
    match tag with
91
      | "xsd:length" ->
92
          let length = parse_nonneg n in
93
94
          { facets with length = Some (length, fixed) }
      | "xsd:minLength" ->
95
          let length = parse_nonneg n in
96
97
          { facets with minLength = Some (length, fixed) }
      | "xsd:maxLength" ->
98
          let length = parse_nonneg n in
99
100
101
102
103
          { facets with maxLength = Some (length, fixed) }
      | "xsd:enumeration" ->
          let value = Value.string_utf8 (_attr "value" n) in
          let value = validate_base_type value in
          let new_enumeration =
104
105
106
	    match facets.enumeration with
	      | None -> Some [ value ]
	      | Some entries -> Some (value :: entries)
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
          in
          { facets with enumeration = new_enumeration }
      | "xsd:whiteSpace" ->
          let value = Utf8.get_str (_attr "value" n) in
	  let k = match value with
		  | "collapse" -> `Collapse
		  | "preserve" -> `Preserve
		  | "replace" -> `Replace
		  | _ -> assert false in
          { facets with whiteSpace = (k,fixed) }
      | "xsd:maxInclusive" ->
          let value = Value.string_utf8 (_attr "value" n) in
          { facets with maxInclusive = Some (validate_base_type value, fixed) }
      | "xsd:maxExclusive" ->
          let value = Value.string_utf8 (_attr "value" n) in
          { facets with maxExclusive = Some (validate_base_type value, fixed) }
      | "xsd:minInclusive" ->
          let value = Value.string_utf8 (_attr "value" n) in
          { facets with minInclusive = Some (validate_base_type value, fixed) }
      | "xsd:minExclusive" ->
          let value = Value.string_utf8 (_attr "value" n) in
          { facets with minExclusive = Some (validate_base_type value, fixed) }
      | _ ->
	  facets
  in
  _fold_elems n no_facets aux

134
135
136
let merge_facets' base new_facets =
  merge_facets (facets_of_simple_type_definition base) new_facets

137
138
139
140
141
142
143
144
let default_fixed n f =
  match _may_attr "default" n with
    | Some v -> Some (`Default (f (Value.string_utf8 v)))
    | None ->
	match _may_attr "fixed" n with
	  | Some v -> Some (`Fixed (f (Value.string_utf8 v)))
	  | None -> None

145
let parse_att_value_constraint stype_def n =
146
147
  default_fixed n 
    (fun v -> lazy (validate_simple_type (get_simple_type stype_def) v))
148
149

let parse_min_max n =
150
  (match _may_attr "minOccurs" n with 
151
152
     | Some v -> int_of_string (Utf8.get_str v)
     | None -> 1),
153
  (match _may_attr "maxOccurs" n with 
154
155
156
157
158
     | Some v ->
	 (match Utf8.get_str v with 
	    | "unbounded" -> None 
	    | v -> Some (int_of_string v))
     | None -> Some 1)
159
160
161
162
163
164

let rec first n f = function
  | [] -> None
  | x::l -> match f x n with None -> first n f l | x -> x

let find_particles n =
165
  _filter_elems ["xsd:element"; "xsd:group"; "xsd:choice"; "xsd:sequence"] n
166

167
let find_particle n =
168
  first n _may_elem ["xsd:all"; "xsd:choice"; "xsd:group"; "xsd:sequence"]
169
    
170

171
172
173
174
175
let register_builtins typs =
  Schema_builtin.iter_builtin
    (fun st_def ->
       let type_def = Simple st_def in
       let name = name_of_type_definition type_def in
176
177
       QTable.replace typs name (lazy type_def));
  QTable.replace typs (xsd, Utf8.mk "anyType") (lazy AnyType)
178
179

(* Main parsing function *)
180
let schema_of_uri uri =
181

182
183
184
185
186
  let typs = QTable.create 17 in
  let elts = QTable.create 17 in
  let attrs= QTable.create 17 in
  let attr_groups = QTable.create 17 in
  let model_groups = QTable.create 17 in
187
  register_builtins typs;
188

189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
  let attr_elems = QTable.create 17
  and attr_group_elems = QTable.create 17
  and model_group_elems = QTable.create 17 in

  let resolve k t1 t2 f qname =
    try QTable.find t1 qname
    with Not_found ->
      let node = 
	try QTable.find t2 qname
	with Not_found ->
	  validation_error ("Can't find declaration for " ^ k ^ " " ^
			      Ns.QName.to_string qname)
      in
      let decl = f node in
      QTable.replace t1 qname decl;
      decl
205
  in
206
  let todo = ref [] in
207

208

209
210
211
212
213
214
  let rec parse_uri uri =
    let root = node_of_uri uri in
    let targetNamespace =  
      match _may_attr "targetNamespace" root with
	| Some ns -> Ns.mk ns
	| None -> Ns.empty
215
    in
216
217
218
219
220
    let attributeFormDefault = 
      _is_attr "attributeFormDefault" root "qualified" in
    let elementFormDefault = 
      _is_attr "elementFormDefault" root "qualified" in

221
    let rec parse_root uri root =
222
223
224
225
226
227

  let may_name n =
    match _may_attr "name" n with
      | Some local -> Some (targetNamespace,local)
      | None -> None in
  let get_name n = (targetNamespace, _attr "name" n) in
228

229
  let rec resolve_typ qname : Schema_types.type_definition lazy_t =
230
    try QTable.find typs qname
231
    with Not_found ->   
232
      failwith ("Cannot find type " ^ (Ns.QName.to_string qname))
233

234
  and resolve_simple_typ qname : Schema_types.type_definition lazy_t =
235
    resolve_typ qname
236

237
238
  and resolve_elt qname =
    try QTable.find elts qname
239
    with Not_found -> 
240
      failwith ("Cannot find element " ^ (Ns.QName.to_string qname))
241
      
242
243
  and resolve_att qname =
    resolve "attribute" attrs attr_elems (parse_att_decl true) qname
244

245
246
247
  and resolve_att_group qname =
    resolve "attribute group" attr_groups attr_group_elems parse_att_group 
      qname
248

249
250
251
  and resolve_model_group qname =
    resolve "model group" model_groups model_group_elems parse_model_group_def
      qname
252

253
  (* parse an xsd:simpleType element *)
254
255
  and parse_simple_type_def n =
    let name = may_name n in
256
257
258
259
    match _may_elem "xsd:restriction" n with
      | Some restriction ->
	  let base = find_base_simple_type restriction in
	  let facets = parse_facets base restriction in
260
261
262
263
264
	  Simple (Derived (name, Restrict, facets, base)) 
      | None -> 
    match _may_elem "xsd:list" n with
      | Some list ->
	  let items = find_item_type list in
265
	  Simple (Derived (name, List items, no_facets, lazy (Simple anySimpleType)))
266
267
268
269
      | None -> 
    match _may_elem "xsd:union" n with
      | Some union ->
	  let members = find_member_types union in
270
	  Simple (Derived (name, Union members, no_facets, lazy (Simple anySimpleType)))
271
      | None ->
272
	  failwith ("Unknown variety for simpleType at line " ^ (string_of_int (_line n)) ^ "  uri = " ^ uri)
273
  and parse_simple_type n =
274
    lazy (parse_simple_type_def n)
275

276
277
  (* look for a simple type def: try attribute "base", try "simpleType" child,
   * fail *)
278
  and find_base_simple_type n : Schema_types.type_ref=
279
    match _may_qname_attr "base" n with
280
281
282
283
284
285
286
287
288
      | Some v -> resolve_simple_typ v
      | None ->
	  match _may_elem "xsd:simpleType" n with
	    | Some v -> parse_simple_type v
	    | None -> validation_error "no base simple type specified"
		
  (* look for a simple type def: try attribute "itemType", try "simpleType"
   * child, fail *)
  and find_item_type n =
289
    match _may_qname_attr "itemType" n with
290
291
292
293
294
      | Some v -> resolve_simple_typ v
      | None ->
	  match _may_elem "xsd:simpleType" n with
	    | Some v -> parse_simple_type v
	    | None -> validation_error "no itemType specified"
295

296
297
298
299
300
  (* look for a list of simple type defs: try attribute "memberTypes", try
   * "simpleType" children, fail *)
  and find_member_types n =
    let members1 =
      match _may_attr "memberTypes" n with
301
302
303
	| Some v -> 
	    List.map (fun x -> resolve_simple_typ (_resolve_qname n x)) 
	      (split v)
304
305
306
307
308
309
310
311
312
313
314
	| None -> []
    in
    let members2 =
      let nodes = _elems "xsd:simpleType" n in
      List.map parse_simple_type nodes
    in
    match members1 @ members2 with
      | [] -> validation_error "no member types specified"
      | members -> members


315
316
  and parse_elt_value_constraint (type_def: type_ref)  n =
    let validate_value v = 
317
318
      lazy (match Lazy.force type_def with
	      | Simple st_def ->
319
		  validate_simple_type st_def v
320
321
322
323
324
325
	      | Complex { ct_content = CT_simple c } ->
		  (match Lazy.force c with 
		     | Simple st_def -> 
			 validate_simple_type st_def v
		     | _ ->
			 validate_simple_type (Primitive (xsd, Utf8.mk "string")) v)
326
	      | _ -> 
327
		  validate_simple_type (Primitive (xsd, Utf8.mk "string")) v
328
	   )
329
330
    in
    default_fixed n validate_value
331

332
333
334
335
336
337
  (* look for a simple type def, try "simpleType" child, try "type" attribute,
   * return anySimpleType *)
  and find_simple_type n =
    match _may_elem "xsd:simpleType" n with
      | Some v -> parse_simple_type v
      | None ->
338
	  match _may_qname_attr "type" n with
339
	    | Some v -> resolve_simple_typ v
340
	    | None -> lazy (Simple anySimpleType)
341

342
343
344
345
346
347
348
349
350
  and parse_att_decl global n =
    let local = _attr "name" n in
    let ns = if global then targetNamespace
    else
      match _may_attr "form" n with
	| Some s when (Utf8.get_str s = "qualified") -> targetNamespace
	| None when attributeFormDefault -> targetNamespace
	| _ -> Ns.empty
    in
351
    let typdef = find_simple_type n in
352
    { attr_name = (ns,local);
353
354
355
356
357
358
      attr_typdef = typdef;
      attr_cstr = parse_att_value_constraint typdef n }
      
  and parse_attribute_use n =
    let required = _is_attr "use" n "required" in
    let att_decl =
359
      match _may_qname_attr "ref" n with
360
361
	| Some v -> resolve_att v
	| None ->
362
	    let a = parse_att_decl false n in
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
	    { a with attr_cstr = None }  
	      (* forget attribute value constraint *)
    in
    let value_constr = parse_att_value_constraint att_decl.attr_typdef n in
    { attr_required = required;
      attr_decl = att_decl;
      attr_use_cstr = value_constr }
      
  and parse_attribute_uses derivation_type base n =
    let uses1 = (* attribute uses from "attribute" children *)
      List.map parse_attribute_use (_elems "xsd:attribute" n)
    in
    let uses2 = (* attribute uses from "attributeGroup" children ref *)
      List.concat (List.map
		     (fun att_group ->
378
			match _may_qname_attr "ref" att_group with
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
			  | Some v -> (resolve_att_group v).ag_def
			  | None -> []
		     )
		     (_elems "xsd:attributeGroup" n))
    in
    let uses3 = (* attribute uses from base type *)
      match base with
	| Complex { ct_attrs = uses } ->
            (match derivation_type with
               | `Extension -> uses
               | `Restriction ->
		   let ( &= ) u1 u2 = 
		     (* by name equality over attribute uses *)
		     (name_of_attribute_use u1 = name_of_attribute_use u2)
		   in
		   let defined_uses = uses1 @ uses2 in
		   List.filter
		     (fun use -> 
			not (List.exists (fun u -> u &= use) defined_uses))
 (*                   && not (List.mem name prohibited_uses1) *) (* TODO prohibited attribute uses *)
		     uses)
	| _ -> []
    in
    uses1 @ uses2 @ uses3



  and get_derivation content =
    let (derivation,derivation_type) = 
      match _may_elem "xsd:restriction" content with
	| Some v -> (v, `Restriction)
	| None ->
	    match _may_elem "xsd:extension" content with
	      | Some v -> (v, `Extension)
	      | None -> assert false in
414
    let base = resolve_typ (_qname_attr "base" derivation) in
415
416
417
    let base = Lazy.force base in
    let uses = parse_attribute_uses derivation_type base derivation in
    (derivation,derivation_type,base,uses)
418
    
419
420
  and parse_complex_type_def n =
    let name = may_name n in
421
422
423
424
425
426
427
428
    let (base,derivation_type,uses,content_type) =
      match _may_elem "xsd:simpleContent" n with
	| Some c -> parse_simple_content n c
	| None ->
	    match _may_elem "xsd:complexContent" n with
	      | Some c -> parse_complex_content n c
	      | None -> parse_other_content n
    in
429
430
    Complex (complex name base derivation_type uses content_type)
  and parse_complex_type n =
431
    lazy (parse_complex_type_def n)
432
      
433
434
435
436
437
438
439
440
441
  and parse_simple_content n content =
    let derivation,derivation_type,base,uses = get_derivation content in
    let content_type =
      match derivation_type,base with
	| `Restriction, Complex { ct_content = CT_simple base } ->
	    let base =
	      match _may_elem "xsd:simpleType" derivation with
		| Some s -> parse_simple_type s
		| None -> base in
442
	    CT_simple (lazy (Simple (Derived (None, Restrict, parse_facets base n, base))))
443
444
	| `Extension, Complex { ct_content = CT_simple base } -> 
	    CT_simple base
445
	| `Extension, (Simple _ as st) -> CT_simple (lazy st)
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
	| _ -> assert false
    in
    base,derivation_type,uses,content_type

  and parse_complex_content n content =
    let derivation,derivation_type,base,uses = get_derivation content in
    let mixed = _is_attr "mixed" content "true" || _is_attr "mixed" n "true" in
    let particle_node = find_particle derivation in
    let content_type =
      match derivation_type, particle_node with
	| `Restriction, None -> CT_empty
	| `Restriction, Some p_node ->
	    let particle = parse_particle p_node in
	    CT_model (particle, mixed)
	| `Extension, None ->
	    content_type_of_type base
	      (* TODO BUG HERE if base =
		 AnyType *)
	| `Extension, Some p_node ->
            let base_ct = content_type_of_type base in
	    let particle = parse_particle p_node in
	    match base_ct with
	      | CT_empty ->
		  CT_model (particle, mixed)
	      | CT_model (p, _) ->
		  let model = Sequence (p::[particle]) in
472
		  CT_model (particle_model 1 (Some 1) model, mixed)
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
	      | CT_simple _ -> assert false
    in
    base,derivation_type,uses,content_type

  and parse_other_content n =
    let uses = parse_attribute_uses `Restriction AnyType n in
    let mixed = _is_attr "mixed" n "true" in
    let content_type =
      match find_particle n with
	| None -> CT_empty
	| Some pnode ->
            let particle = parse_particle pnode in
            CT_model (particle, mixed)
    in
    AnyType,`Restriction,uses,content_type
      
489

490
491
492
493
494
495
496
497
498
499
500
501
  and parse_elt_decl global n =
    let local = _attr "name" n in
    let ns = if global then targetNamespace
    else
      match _may_attr "form" n with
	| Some s when (Utf8.get_str s = "qualified") -> targetNamespace
	| None when elementFormDefault -> targetNamespace
	| _ -> Ns.empty
    in
    let type_def = find_element_type n in
    let value_constr = parse_elt_value_constraint type_def n in
    element (ns,local) type_def value_constr
502
503
504
505
506
	    
  (* look for a type definition, try "simpleType" child, try "complexType"
   * child, try "type" attribute, return anyType *)
  and find_element_type n =
    match _may_elem "xsd:simpleType" n with
507
      | Some n -> parse_simple_type n
508
509
      | None ->
	  match _may_elem "xsd:complexType" n with
510
	    | Some n -> parse_complex_type n
511
	    | None ->
512
		match _may_qname_attr "type" n with
513
		  | Some v -> resolve_typ v
514
		  | None -> lazy AnyType
515
516
517
		      
  and parse_particle n =
    let min, max = parse_min_max n in
518
519
    let model mg = particle_model min max mg in
    let elt e n = particle min max (Elt e) [ n ] false in
520
521
    match _tag n with
      | "xsd:element" ->
522
523
524
525
526
527
528
529
	  (match _may_qname_attr "ref" n with
	     | Some ref -> elt (resolve_elt ref) ref
	     | None ->
		 let decl = parse_elt_decl false n in
		 elt (lazy decl) (name_of_element_declaration decl))
      | "xsd:group" -> model (resolve_model_group (_qname_attr "ref" n)).mg_def
      | "xsd:all" | "xsd:sequence" | "xsd:choice" -> 
	  model (parse_model_group n)
530
531
532
533
534
535
536
537
538
539
540
541
542
      | _ -> assert false
	  
  and parse_model_group n =
    match _tag n with
      | "xsd:all" ->
	  All (List.map parse_particle (_elems "xsd:element" n))
      | "xsd:sequence" ->
	  Sequence (List.map parse_particle (find_particles n))
      | "xsd:choice" ->
	  Choice (List.map parse_particle (find_particles n))
      | _ -> assert false
	  
  and parse_att_group n =
543
    let name = get_name n in
544
545
546
547
548
549
550
    let uses1 =
      List.map parse_attribute_use (_elems "xsd:attribute" n)
    in
    let uses2 =
      List.concat 
	(List.map 
	   (fun name -> (resolve_att_group name).ag_def)
551
	   (List.map (_qname_attr "ref") (_elems "xsd:attributeGroup" n)))
552
553
554
555
    in
    { ag_name = name; ag_def = uses1 @ uses2 }
      
  and parse_model_group_def n =
556
    let name = get_name n in
557
558
559
560
561
562
563
564
565
    let model_group_node =
      match first n _may_elem ["xsd:all"; "xsd:choice"; "xsd:sequence"] with
	| Some m -> m
	| None -> assert false in
    let model_group = parse_model_group model_group_node in
    { mg_name = name; mg_def = model_group }

  in

566
567
568
(* First pass: allocate slots for global elements and types,
   perform inclusion *)
  let rec register n = function
569
    | "xsd:element" ->
570
571
572
	let name = get_name n in
	if (QTable.mem elts name) then
	  validation_error ("Redefinition of element " ^ Ns.QName.to_string name);
573
574
	let l = lazy (parse_elt_decl true n) in
	QTable.add elts name l
575
    | ("xsd:simpleType" | "xsd:complexType") as s ->
576
577
578
	 let name = get_name n in
	 if (QTable.mem typs name) then
	   validation_error ("Redefinition of type " ^ Ns.QName.to_string name);
579
580
581
	 let l = if s="xsd:simpleType" then lazy (parse_simple_type_def n)
	 else lazy (parse_complex_type_def n) in
	 QTable.add typs name l
582
    | "xsd:attribute" ->
583
584
585
        let name = get_name n in
	QTable.add attr_elems name n;
	todo := (fun () -> ignore (resolve_att name)):: !todo;
586
    | "xsd:attributeGroup" ->
587
588
589
        let name = get_name n in
	QTable.add attr_group_elems name n;
	todo := (fun () -> ignore (resolve_att_group name)):: !todo
590
    | "xsd:group" ->
591
592
593
        let name = get_name n in
	QTable.add model_group_elems name n;
	todo := (fun () -> ignore (resolve_model_group name)):: !todo
594
595
    | "xsd:include" ->
	let local = _attr "schemaLocation" n in
596
597
598
599
600
601
602
603
604
605
606
607
608
609
	let uri = Url.local uri (Utf8.get_str local) in
	print_endline ("Include " ^ uri); flush stdout;
	parse_root uri (node_of_uri uri);
    | "xsd:import" ->
	(match _may_attr "schemaLocation" n with
	   | None ->
	       print_endline "Import ignored, no schemaLocation"
	   | Some local ->
	       let new_uri = Url.local uri (Utf8.get_str local) in
	       print_endline ("Import " ^ new_uri); flush stdout;
	       ignore (parse_uri new_uri)
	)
	(* TODO: check namespace *)
    | "xsd:annotation" -> ()
610
611
612
613
614
615
616
    | s -> print_endline ("Ignore Schema element " ^ s);
  in
  _iter_elems root register;

(* end of parse_root *)

    in
617
    parse_root uri root;
618
619
620
621
    targetNamespace 

(* end of parse_uri *)

622
  in
623
  let ns = parse_uri uri in
624
625
626

  (* Second pass: compute the definitions *)
  List.iter (fun f -> f ()) !todo;
627
  {
628
    targetNamespace = ns;
629
630
631
632
633
    types = hashtbl_deref typs;
    attributes = hashtbl_values attrs;
    elements = hashtbl_deref elts;
    attribute_groups = hashtbl_values attr_groups;
    model_groups = hashtbl_values model_groups
634
635
  }

636