schema_parser.ml 31 KB
Newer Older
1

2
3
open Printf
open Pxp_document
4

5
6
open Encodings
open Encodings.Utf8.Pcre
7
open Schema_common
8
open Schema_types
9
10
11
open Schema_validator
open Schema_xml
open Schema_xml.Pxp_helpers
12

13
14
15
16
17
18
19
20
21
22
let debug = false
let debug_print ?(n: pxp_node option) s =
  if debug then
    (match n with
    | None -> prerr_endline s
    | Some n ->
        let line = match n#position with (_,l,_) -> l in
        prerr_endline (sprintf "[%d] %s" line s);
        flush stderr)

23
24
let space_RE = pcre_regexp " "
let split s = pcre_split ~rex:space_RE s
25
let unqualify s = snd (Ns.split_qname s)
26
27
28
29
30
31
let hashtbl_deref tbl =
  (* ASSUMPTION: no multiple bindings *)
  let tbl' = Hashtbl.create 1024 in
  Hashtbl.iter (fun key value -> Hashtbl.add tbl' key !value) tbl;
  tbl'
let hashtbl_values tbl = Hashtbl.fold (fun _ v acc -> v :: acc) tbl []
32

33
34
class type resolver =
  object
35
36
37
38
      (** add a node to the list of "seen" nodes.
      @raise Osv_validation_error if the same node is seen twice *)
    method see : pxp_node -> unit

39
    method resolve_att: ?fix_ns:bool -> Utf8.t -> attribute_declaration
40
    method resolve_elt:
41
      ?fix_ns:bool -> now:bool -> Utf8.t -> element_declaration ref
42
    method resolve_typ:
43
      ?fix_ns:bool -> now:bool -> Utf8.t -> type_definition ref
44
    method resolve_att_group:
45
46
47
      ?fix_ns:bool -> Utf8.t -> attribute_group_definition
    method resolve_model_group: ?fix_ns:bool -> Utf8.t -> model_group_definition
    method resolve_simple_typ: ?fix_ns:bool -> Utf8.t -> simple_type_definition
48
49
  end

50
51
52
53
54
55
module OrderedNode =
  struct
    type t = pxp_node
    let compare = Pxp_document.compare
  end
module NodeSet = Set.Make (OrderedNode)
56

57
58
59
60
61
62
63
64
65
66
67
68
  (* element and complex type constructors which take cares of unique id *)
let element, complex =
  let counter = ref 0 in
  let element name (type_def: type_definition ref) constr =
    incr counter;
    !counter, name, type_def, constr
  in
  let complex name (type_def: type_definition) deriv attrs ct =
    incr counter;
    !counter, name, type_def, deriv, attrs, ct
  in
  (element, complex)
69

70
71
72
let integer_of_value_t = function
  | Value.Integer i -> i
  | _ -> assert false
73

74
75
76
77
let parse_facets base n =
  debug_print ~n "Schema_parser.parse_facet"; 
  let validate_base_type = Schema_validator.validate_simple_type base in
  let validate_nonNegativeInteger =
78
79
    Schema_builtin.validate_builtin
      (Schema_xml.add_xsd_prefix (Utf8.mk "nonNegativeInteger"))
80
  in
81
82
  let facets = ref no_facets in
  n#iter_nodes (fun n ->
83
84
85
    let fixed =
      (_has_attribute "fixed" n) && (_attribute "fixed" n = Utf8.mk "true")
    in
86
87
88
89
90
91
92
93
94
95
96
97
98
99
    match n#node_type with
    | T_element "xsd:length" ->
        let value = _attribute "value" n in
        let length = integer_of_value_t (validate_nonNegativeInteger value) in
        facets := { !facets with length = Some (length, fixed) }
    | T_element "xsd:minLength" ->
        let value = _attribute "value" n in
        let length = integer_of_value_t (validate_nonNegativeInteger value) in
        facets := { !facets with minLength = Some (length, fixed) }
    | T_element "xsd:maxLength" ->
        let value = _attribute "value" n in
        let length = integer_of_value_t (validate_nonNegativeInteger value) in
        facets := { !facets with maxLength = Some (length, fixed) }
    | T_element "xsd:enumeration" ->
100
        let value = Value.string_utf8 (_attribute "value" n) in
101
102
103
104
105
106
107
108
        let value = validate_base_type value in
        let new_enumeration =
          (match !facets.enumeration with
          | None -> Some (Value.ValueSet.singleton value)
          | Some entries -> Some (Value.ValueSet.add value entries))
        in
        facets := { !facets with enumeration = new_enumeration }
    | T_element "xsd:whiteSpace" ->
109
        let value = Utf8.get_str (_attribute "value" n) in
110
111
112
113
114
115
116
117
        facets := { !facets with whiteSpace =
          ((match value with
            | "collapse" -> `Collapse
            | "preserve" -> `Preserve
            | "replace" -> `Replace
            | _ -> assert false),
          fixed) }
    | T_element "xsd:maxInclusive" ->
118
        let value = Value.string_utf8 (_attribute "value" n) in
119
120
121
        facets := { !facets with
          maxInclusive = Some (validate_base_type value, fixed) }
    | T_element "xsd:maxExclusive" ->
122
        let value = Value.string_utf8 (_attribute "value" n) in
123
124
125
        facets := { !facets with
          maxExclusive = Some (validate_base_type value, fixed) }
    | T_element "xsd:minInclusive" ->
126
        let value = Value.string_utf8 (_attribute "value" n) in
127
128
129
        facets := { !facets with
          minInclusive = Some (validate_base_type value, fixed) }
    | T_element "xsd:minExclusive" ->
130
        let value = Value.string_utf8 (_attribute "value" n) in
131
132
133
134
135
136
137
138
139
140
141
142
143
144
        facets := { !facets with
          minExclusive = Some (validate_base_type value, fixed) }
    | _ -> ());
  !facets
  
let merge_facets' base new_facets =
  merge_facets (facets_of_simple_type_definition base) new_facets

  (* parse an xsd:simpleType element *)
let rec parse_simple_type (resolver: resolver) n =
  debug_print ~n "Schema_parser.parse_simple_type";
  resolver#see n;
  let name =
    if _has_attribute "name" n then Some (_attribute "name" n) else None
145
  in
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
  if _has_element "xsd:restriction" n then begin    (* restriction *)
    let restriction = _element "xsd:restriction" n in
    let base = find_base_simple_type resolver restriction in
    let facets = parse_facets base restriction in
    restrict base facets name
  end else if _has_element "xsd:list" n then begin  (* list *)
    let list = _element "xsd:list" n in
    let items = find_item_type resolver list in
    Derived (name, List items, no_facets, anySimpleType)
  end else begin                                    (* union *)
    let union = _element "xsd:union" n in
    let members = find_member_types resolver union in
    Derived (name, Union members, no_facets, anySimpleType)
  end

  (* look for a simple type def: try attribute "base", try "simpleType" child,
   * fail *)
and find_base_simple_type (resolver: resolver) n =
  if _has_attribute "base" n then
    resolver#resolve_simple_typ (_attribute "base" n)
  else if _has_element "xsd:simpleType" n then
    parse_simple_type resolver (_element "xsd:simpleType" n)
  else
    raise (XSD_validation_error "no base simple type specified")

  (* look for a simple type def: try attribute "itemType", try "simpleType"
   * child, fail *)
and find_item_type (resolver: resolver) n =
  if _has_attribute "itemType" n then
    resolver#resolve_simple_typ (_attribute "itemType" n)
  else if _has_element "xsd:simpleType" n then
    parse_simple_type resolver (_element "xsd:simpleType" n)
  else
    raise (XSD_validation_error "no itemType specified")

  (* look for a list of simple type defs: try attribute "memberTypes", try
   * "simpleType" children, fail *)
and find_member_types (resolver: resolver) n =
  let members1 =
    if _has_attribute "memberTypes" n then
      let names = split (_attribute "memberTypes" n) in
      List.map resolver#resolve_simple_typ names
    else
      []
190
  in
191
192
193
194
195
196
197
198
199
200
201
202
  let members2 =
    let nodes = _elements "xsd:simpleType" n in
    List.map (parse_simple_type resolver) nodes
  in
  (match members1 @ members2 with
  | [] -> raise (XSD_validation_error "no member types specified")
  | members -> members)

  (* parse an attribute value constraint *)
let parse_att_value_constraint stype_def n =
  debug_print ~n "Schema_parser.parse_att_value_constraint";
  if _has_attribute "default" n then
203
    let value = Value.string_utf8 (_attribute "default" n) in
204
    let value = validate_simple_type stype_def value in
205
206
    Some (`Default value)
  else if _has_attribute "fixed" n then
207
    let value = Value.string_utf8 (_attribute "fixed" n) in
208
    let value = validate_simple_type stype_def value in
209
210
211
212
213
214
215
216
217
218
219
    Some (`Fixed value)
  else
    None

  (* parse an element value constraint *)
let parse_elt_value_constraint type_def n =
  debug_print ~n "Schema_parser.parse_elt_value_constraint";
  let validate_value =
    match type_def with
    | Simple st_def | Complex (_, _, _, _, _, CT_simple st_def) ->
        validate_simple_type st_def
220
    | _ -> validate_simple_type (Primitive (Utf8.mk "xsd:string"))
221
222
  in
  if _has_attribute "default" n then
223
    let value = Value.string_utf8 (_attribute "default" n) in
224
    let value = validate_value value in
225
226
    Some (`Default value)
  else if _has_attribute "fixed" n then
227
    let value = Value.string_utf8 (_attribute "fixed" n) in
228
    let value = validate_value value in
229
230
231
    Some (`Fixed value)
  else
    None
232

233
234
235
236
237
238
239
240
241
  (* look for a simple type def, try "simpleType" child, try "type" attribute,
   * return anySimpleType *)
let find_simple_type (resolver: resolver) n =
  if _has_element "xsd:simpleType" n then
    parse_simple_type resolver (_element "xsd:simpleType" n)
  else if _has_attribute "type" n then
    resolver#resolve_simple_typ (_attribute "type" n)
  else
    anySimpleType
242

243
let parse_att_decl (resolver: resolver) n =
244
245
246
247
248
249
  debug_print ~n "Schema_parser.parse_att_decl";
  resolver#see n;
  let name = _attribute "name" n in
  let type_def = find_simple_type resolver n in
  let value_constr = parse_att_value_constraint type_def n in
  name, type_def, value_constr
250

251
let parse_attribute_use (resolver: resolver) n =
252
253
  debug_print ~n "Schema_parser.parse_attribute_use";
  let required =
254
    (_has_attribute "use" n) && (_attribute "use" n = Utf8.mk "required")
255
  in
256
257
258
259
260
261
262
263
264
265
266
267
268
269
  let (name, type_def, value_constr) as att_decl =
    if _has_attribute "ref" n then
      resolver#resolve_att (_attribute "ref" n)
    else
      let (name, type_def, constr) = parse_att_decl resolver n in
      (name, type_def, None)  (* forget attribute value constraint *)
  in
  let value_constr = parse_att_value_constraint type_def n in
  required, att_decl, value_constr

let parse_attribute_uses (resolver: resolver) derivation_type base n =
  debug_print ~n "Schema_parser.parse_attribute_uses";
  let uses1 = (* attribute uses from "attribute" children *)
    List.map (parse_attribute_use resolver) (_elements "xsd:attribute" n)
270
  in
271
272
273
274
275
276
277
  let uses2 = (* attribute uses from "attributeGroup" children ref *)
    List.concat (List.map
      (fun att_group ->
        if _has_attribute "ref" att_group then
          snd (resolver#resolve_att_group (_attribute "ref" att_group))
        else [])
      (_elements "xsd:attributeGroup" n))
278
  in
279
  let uses3 = (* attribute uses from base type *)
280
    match base with
281
282
283
284
285
286
287
288
289
290
291
292
    | Complex (_, _, _, _, uses, _) ->
        (match derivation_type with
        | `Extension -> uses
        | `Restriction ->
            let ( &= ) u1 u2 = (* by name equality over attribute uses *)
              (name_of_attribute_use u1 = name_of_attribute_use u2)
            in
            let defined_uses = uses1 @ uses2 in
            List.filter
              (fun use -> not (List.exists (fun u -> u &= use) defined_uses))
(*                   && not (List.mem name prohibited_uses1) *) (* TODO prohibited attribute uses *)
              uses)
293
294
    | _ -> []
  in
295
296
297
298
  uses1 @ uses2 @ uses3

let parse_min_max n =
  ((if _has_attribute "minOccurs" n then
299
    Intervals.V.mk (Utf8.get_str (_attribute "minOccurs" n))
300
301
302
  else
    Intervals.V.one),
  (if _has_attribute "maxOccurs" n then
303
    match Utf8.get_str (_attribute "maxOccurs" n) with
304
305
306
307
    | "unbounded" -> None
    | s -> Some (Intervals.V.mk s)
  else
    Some Intervals.V.one))
308

309
310
let find_particles =
  _elements' ["xsd:element"; "xsd:group"; "xsd:choice"; "xsd:sequence"]
311

312
let rec parse_complex_type (resolver: resolver) n =
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
  let find_particle n =
    try
      Some (_element' ["xsd:all"; "xsd:choice"; "xsd:group"; "xsd:sequence"] n)
    with Not_found -> None
  in
  debug_print ~n "Schema_parser.parse_complex_type";
  resolver#see n;
  let name =
    if _has_attribute "name" n then Some (_attribute "name" n) else None
  in
  if _has_element "xsd:simpleContent" n then
    let content = _element "xsd:simpleContent" n in
    let derivation, derivation_type =
      if _has_element "xsd:restriction" content then
        (_element "xsd:restriction" content, `Restriction)
      else  (* _has_element "xsd:extension" *)
        (_element "xsd:extension" content, `Extension)
330
    in
331
332
    let base = resolver#resolve_typ ~now:true (_attribute "base" derivation) in
    let uses = parse_attribute_uses resolver derivation_type !base derivation in
333
    let content_type =
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
      match derivation_type with
      | `Restriction ->
          (match !base with
          | Complex (_, _, _, _, _, (CT_simple base)) ->
              let base =
                if _has_element "xsd:simpleType" derivation then
                  parse_simple_type resolver
                    (_element "xsd:simpleType" derivation)
                else
                  base
              in
              let new_facets = merge_facets' base (parse_facets base n) in
              let restricted_simple_type_def =
                (match base with
                | Primitive name ->
                    Derived (None, variety_of_simple_type_definition base,
                      new_facets, base)
                | Derived (_, variety, _, _) ->
                    Derived (None, variety, new_facets, base))
              in
              CT_simple restricted_simple_type_def
          | _ -> assert false)
      | `Extension ->
          (match !base with
          | Complex (_, _, _, _, _, (CT_simple base)) -> CT_simple base
          | Simple simple_type_def -> CT_simple simple_type_def
          | _ -> assert false)
361
    in
362
363
364
365
366
367
368
369
370
371
372
373
    complex name !base derivation_type uses content_type
  else if _has_element "xsd:complexContent" n then
    let content = _element "xsd:complexContent" n in
    let derivation, derivation_type =
      if _has_element "xsd:restriction" content then
        (_element "xsd:restriction" content, `Restriction)
      else  (* _has_element "xsd:extension" *)
        (_element "xsd:extension" content, `Extension)
    in
    let base = resolver#resolve_typ ~now:true (_attribute "base" derivation) in
    let uses = parse_attribute_uses resolver derivation_type !base derivation in
    let mixed =
374
375
376
      (_has_attribute "mixed" content &&
       (_attribute "mixed" content = Utf8.mk "true"))
      || (_has_attribute "mixed" n && (_attribute "mixed" n = Utf8.mk "true"))
377
378
379
380
381
382
383
384
385
386
387
    in
    let particle_node = find_particle derivation in
    let content_type =
      match derivation_type with
      | `Restriction ->
          (match particle_node with
          | None -> CT_empty
          | Some p_node ->
              let particle = parse_particle resolver p_node in
              CT_model (particle, mixed))
      | `Extension ->
388
389
          let base_ct = content_type_of_type !base in (* TODO BUG HERE if base =
                                                      AnyType *)
390
391
392
393
394
395
396
          (match particle_node with
          | None -> base_ct
          | Some pnode ->
              let particle = parse_particle resolver pnode in
              (match base_ct with
              | CT_empty -> CT_model (particle, mixed)
              | CT_model (p, _) ->
397
                  let model = Sequence (p::[particle]) in
398
                  CT_model
399
400
401
                    ((Intervals.V.one, Some (Intervals.V.one), Model model,
                      first_of_model_group model),
                     mixed)
402
403
404
405
406
407
              | CT_simple _ -> assert false))
    in
    complex name !base derivation_type uses content_type
  else  (* neither simpleContent nor complexContent *)
    let base = anyType in
    let uses = parse_attribute_uses resolver `Restriction base n in
408
409
410
    let mixed =
      _has_attribute "mixed" n && (_attribute "mixed" n = Utf8.mk "true")
    in
411
412
413
414
415
    let content_type =
      match find_particle n with
      | None -> CT_empty
      | Some pnode ->
          let particle = parse_particle resolver pnode in
416
          CT_model (particle, mixed)
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
    in
    complex name anyType `Restriction uses content_type

and parse_elt_decl (resolver: resolver) n: element_declaration =
  debug_print ~n "Schema_parser.parse_elt_decl";
  resolver#see n;
  if not (_has_attribute "name" n) then
    raise (XSD_validation_error "missing element name");
  let name = _attribute "name" n in
  let type_def = find_element_type resolver n in
  let value_constr = parse_elt_value_constraint type_def n in
  element name (ref type_def) value_constr

  (* look for a type definition, try "simpleType" child, try "complexType"
   * child, try "type" attribute, return anyType *)
and find_element_type (resolver: resolver) n =
  if _has_element "xsd:simpleType" n then
    Simple (parse_simple_type resolver (_element "xsd:simpleType" n))
  else if _has_element "xsd:complexType" n then
    Complex (parse_complex_type resolver (_element "xsd:complexType" n))
  else if _has_attribute "type" n then
    !(resolver#resolve_typ ~now:true (_attribute "type" n))
  else
    anyType
441

442
and parse_particle (resolver: resolver) n =
443
444
  debug_print ~n "Schema_parser.parse_particle";
  let min, max = parse_min_max n in
445
  match n#node_type with
446
  | T_element "xsd:element" ->
447
      let elt_decl, first =
448
        if _has_attribute "ref" n then
449
450
          let ref = _attribute "ref" n in
          (resolver#resolve_elt ~now:false ref, [ Some ref ])
451
        else  (* no "ref" attribute *)
452
453
          let decl = parse_elt_decl resolver n in
          (ref decl, [ Some (name_of_element_declaration decl) ])
454
      in
455
      (min, max, Elt elt_decl, first)
456
457
458
459
  | T_element "xsd:group" ->
      let model_group =
        snd (resolver#resolve_model_group (_attribute "ref" n))
      in
460
      (min, max, Model model_group, first_of_model_group model_group)
461
462
  | T_element "xsd:all" | T_element "xsd:sequence" | T_element "xsd:choice" ->
      let model_group = parse_model_group resolver n in
463
      (min, max, Model model_group, first_of_model_group model_group)
464
465
466
467
468
  | _ -> assert false

and parse_model_group (resolver: resolver) n =
  debug_print ~n "Schema_parser.parse_model_group";
  match n#node_type with
469
  | T_element "xsd:all" ->
470
      All (List.map (parse_particle resolver) (_elements "xsd:element" n))
471
  | T_element "xsd:sequence" ->
472
      Sequence (List.map (parse_particle resolver) (find_particles n))
473
  | T_element "xsd:choice" ->
474
      Choice (List.map (parse_particle resolver) (find_particles n))
475
476
  | _ -> assert false

477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
and parse_att_group (resolver: resolver) n =
  debug_print ~n "Schema_parser.parse_att_group";
  resolver#see n;
  let name = _attribute "name" n in
  let uses1 =
    List.map (parse_attribute_use resolver) (_elements "xsd:attribute" n)
  in
  let uses2 =
    List.concat (List.map (fun name -> snd (resolver#resolve_att_group name))
      (List.map (_attribute "ref") (_elements "xsd:attributeGroup" n)))
  in
  name, (uses1 @ uses2)

let parse_model_group_def (resolver: resolver) n =
  debug_print ~n "Schema_parser.parse_model_group_def";
  resolver#see n;
  let name = _attribute "name" n in
  let model_group_node =
    _element' ["xsd:all"; "xsd:choice"; "xsd:sequence"] n
  in
  let model_group = parse_model_group resolver model_group_node in
  name, model_group

  (** @param root schema document root node *)
class lazy_resolver =
  let fake_type_def =
503
504
    Complex (~-1, Some (Utf8.mk "   FAKE TYP   "), AnyType, `Restriction, [],
      CT_empty)
505
  in
506
  let fake_elt_decl = ~-2, Utf8.mk "   FAKE ELT   ", ref fake_type_def, None in
507
508
509
510
511
512
  let is_fake_type_def = (==) fake_type_def in
  let is_fake_elt_decl = (==) fake_elt_decl in
  let validation_error s = raise (XSD_validation_error s) in
  let get_ns_prefix n =
    match n#node_type with T_namespace p -> p | _ -> assert false
  in
513
  let (^^) x y = Utf8.concat x y in
514
  fun root ->
515
516
  object (self)

517
    val typs: (Utf8.t, type_definition ref) Hashtbl.t =
518
      Hashtbl.create 17
519
    val attrs: (Utf8.t, attribute_declaration) Hashtbl.t =
520
      Hashtbl.create 17
521
    val elts: (Utf8.t, element_declaration ref) Hashtbl.t =
522
      Hashtbl.create 17
523
    val attr_groups: (Utf8.t, attribute_group_definition) Hashtbl.t =
524
      Hashtbl.create 17
525
    val model_groups: (Utf8.t, model_group_definition) Hashtbl.t =
526
527
528
529
530
531
532
533
      Hashtbl.create 17

    val mutable seen_nodes = NodeSet.empty

    val mutable targetNamespace = None
    val mutable targetNamespace_prefix = "0TARGET0"
    val namespace_manager = new Pxp_dtd.namespace_manager
    val orig_ns_prefixes = Hashtbl.create 17
534

535
    initializer
536
537
538
539
540
      Schema_builtin.iter_builtin (* register built-in types *)
        (fun st_def ->
          let type_def = Simple st_def in
          let name = name_of_type_definition type_def in
          Hashtbl.replace typs name (ref type_def));
541
      Hashtbl.replace typs (Utf8.mk "xsd:anyType") (ref AnyType);
542
      List.iter (* fill namespace manager *)
543
544
        (fun (p, ns) ->
          namespace_manager#add_namespace (Utf8.get_str p) (Utf8.get_str ns))
545
546
547
548
549
550
551
552
553
554
555
556
557
558
        Schema_xml.schema_ns_prefixes;
      List.iter
        (fun n ->
          let prefix = get_ns_prefix n in
          let uri = n#data in
          if prefix <> "" then begin
            Hashtbl.add orig_ns_prefixes prefix uri;
            ignore (namespace_manager#lookup_or_add_namespace prefix uri)
          end)
        root#namespace_info#declaration;
      if _has_attribute "targetNamespace" root then begin
        let ns = _attribute "targetNamespace" root in
        targetNamespace <- Some ns;
        targetNamespace_prefix <-
559
560
          namespace_manager#lookup_or_add_namespace
            targetNamespace_prefix (Utf8.get_str ns)
561
562
563
564
      end;

      (** schemas namespaces handling *)

565
566
567
    method targetNamespace =
      match targetNamespace with
      | None -> Ns.empty
568
      | Some s -> Ns.mk s
569

570
571
      (* qualify names of entities before registering them with defined
       * targetNamespace, if any *)
572
    method private qualify_name name =
573
      match targetNamespace with
574
575
      | None -> name
      | Some _ -> (Utf8.mk (targetNamespace_prefix ^ ":")) ^^ name
576
577
578

      (* resolve user references using our namespace manager *)
    method private fix_namespace s =
579
      match Ns.split_qname s with
580
581
582
      | "", base ->
          (match targetNamespace with
          | None -> base
583
          | Some _ -> (Utf8.mk targetNamespace_prefix) ^^ (Utf8.mk ":") ^^ base)
584
585
586
587
      | prefix, base ->
          (try
            let orig_uri = Hashtbl.find orig_ns_prefixes prefix in
            let new_prefix = namespace_manager#get_normprefix orig_uri in
588
589
590
            (Utf8.mk new_prefix) ^^ (Utf8.mk ":") ^^ base
          with Not_found ->
            validation_error ("Can't resolve: " ^ Utf8.get_str s))
591

592
      (** seen nodes accounting *)
593

594
595
    method already_seen n = NodeSet.mem n seen_nodes
    method see (n: pxp_node) =
596
      debug_print "lazy_resolver.see";
597
598
599
600
601
      if NodeSet.mem n seen_nodes then
        validation_error "Types/Elements loop";
      seen_nodes <- NodeSet.add n seen_nodes

    method private find_global_component tag_pred name =
602
      let basename = snd (Ns.split_qname name) in
603
604
605
606
      find (fun n -> match n#node_type with
        | T_element tag when tag_pred tag ->
            (_has_attribute "name" n) && (_attribute "name" n = basename)
        | _ -> false) root
607
608
609
610
611

      (** registration of global entities *)

    method register_typ name def =
      debug_print "lazy_resolver.register_typ";
612
      let name = self#qualify_name name in
613
      if (Hashtbl.mem typs name) &&
614
         (not (is_fake_type_def !(Hashtbl.find typs name))) then
615
616
617
        validation_error ("Redefinition of type: " ^ Utf8.get_str name);
      debug_print (sprintf "Osv_parser: registering TYPE %s"
        (Utf8.get_str name));
618
      let type_def_ref = self#resolve_typ ~fix_ns:false ~now:false name in
619
620
621
622
      type_def_ref := def

    method register_elt name decl =
      debug_print "lazy_resolver.register_elt";
623
      let name = self#qualify_name name in
624
      if (Hashtbl.mem elts name) &&
625
         (not (is_fake_elt_decl !(Hashtbl.find elts name))) then
626
627
628
        validation_error ("Redefinition of element: " ^ Utf8.get_str name);
      debug_print (sprintf "Osv_parser: registering ELEMENT %s"
        (Utf8.get_str name));
629
      let elt_decl_ref = self#resolve_elt ~fix_ns:false ~now:false name in
630
      elt_decl_ref := decl
631

632
633
    method register_att name decl =
      debug_print "lazy_resolver.register_att";
634
      let name = self#qualify_name name in
635
      if Hashtbl.mem attrs name then
636
637
638
        validation_error ("Redefinition of attribute: " ^ Utf8.get_str name);
      debug_print (sprintf "Osv_parser: registering ATTRIBUTE %s"
        (Utf8.get_str name));
639
640
641
642
643
644
      Hashtbl.replace attrs name decl

    method register_att_group name def =
      debug_print "lazy_resolver.register_att_group";
      let name = self#qualify_name name in
      if Hashtbl.mem attr_groups name then
645
646
647
648
        validation_error ("Redefinition of attribute group: " ^
          Utf8.get_str name);
      debug_print (sprintf "Osv_parser: registering ATTRIBUTE GROUP %s"
        (Utf8.get_str name));
649
650
651
652
653
654
      Hashtbl.replace attr_groups name def

    method register_model_group name def =
      debug_print "lazy_resolver.register_model_group";
      let name = self#qualify_name name in
      if Hashtbl.mem model_groups name then
655
656
657
        validation_error ("Redefinition of model group: " ^ Utf8.get_str name);
      debug_print (sprintf "Osv_parser: registering MODEL GROUP %s"
        (Utf8.get_str name));
658
      Hashtbl.replace model_groups name def
659

660
      (** entities lookup *)
661

662
    method resolve_typ ?(fix_ns = true) ~now name =
663
      debug_print "lazy_resolver.resolve_typ";
664
      let name = if fix_ns then self#fix_namespace name else name in
665
      try
666
667
        Hashtbl.find typs name
      with Not_found ->
668
669
670
671
672
673
674
675
676
        let type_def =
          if now then (* resolve now: look for global type definitions *)
            let type_node =
              try
                self#find_global_component
                  (fun tag ->
                    (tag = "xsd:simpleType") || (tag = "xsd:complexType"))
                  name
              with Not_found ->
677
678
                validation_error ("Can't find definition of type: " ^
                  Utf8.get_str name)
679
            in
680
            if _tag_name type_node = Utf8.mk "xsd:simpleType" then
681
682
683
684
685
686
687
688
689
690
691
692
              Simple (parse_simple_type (self :> resolver) type_node)
            else  (* _tag_name type_node = "xsd:complexType" *)
              Complex (parse_complex_type (self :> resolver) type_node)
          else  (* resolve later: return a fake type ref *)
            fake_type_def
        in
        let type_def_ref = ref type_def in
        Hashtbl.replace typs name type_def_ref;
        type_def_ref

    method resolve_simple_typ ?(fix_ns = true) name =
      match !(self#resolve_typ ~fix_ns ~now:true name) with
693
      | AnyType -> Primitive (Utf8.mk "xsd:anySimpleType")
694
695
      | Simple st -> st
      | Complex _ -> assert false
696

697
    method resolve_elt ?(fix_ns = true) ~now name =
698
      debug_print "lazy_resolver.resolve_elt";
699
      let name = if fix_ns then self#fix_namespace name else name in
700
      try
701
702
        Hashtbl.find elts name
      with Not_found ->
703
704
705
706
707
708
        let elt_decl =
          if now then (* resolve now: look for global element declarations *)
            let elt_node =
              try
                self#find_global_component ((=) "xsd:element") name
              with Not_found ->
709
710
                validation_error ("Can't find declaration of element: " ^
                  Utf8.get_str name)
711
712
713
714
715
716
717
718
            in
            parse_elt_decl (self :> resolver) elt_node
          else  (* resolve later: return a fake element declaration *)
            fake_elt_decl
        in
        let elt_decl_ref = ref elt_decl in
        Hashtbl.replace elts name elt_decl_ref;
        elt_decl_ref
719

720
    method resolve_att ?(fix_ns = true) name =
721
      debug_print "lazy_resolver.resolve_att";
722
      let name = if fix_ns then self#fix_namespace name else name in
723
      try
724
725
        Hashtbl.find attrs name
      with Not_found ->
726
727
        let node =
          try
728
729
            self#find_global_component ((=) "xsd:attribute") name
          with Not_found ->
730
731
            validation_error ("Can't find declaration of attribute: " ^
              Utf8.get_str name)
732
733
        in
        let att_decl = parse_att_decl (self :> resolver) node in
734
        Hashtbl.replace attrs name att_decl;
735
736
        att_decl

737
738
739
740
741
742
743
744
745
746
747
    method resolve_att_group ?(fix_ns = true) name =
      debug_print "lazy_resolver.resolve_att_group";
      let name = if fix_ns then self#fix_namespace name else name in
      try
        Hashtbl.find attr_groups name
      with Not_found ->
        let node =
          try
            self#find_global_component ((=) "xsd:attributeGroup") name
          with Not_found ->
            validation_error
748
              ("Can't find definition of attribute group: " ^ Utf8.get_str name)
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
        in
        let att_group_decl = parse_att_group (self :> resolver) node in
        Hashtbl.replace attr_groups name att_group_decl;
        att_group_decl

    method resolve_model_group ?(fix_ns = true) name =
      debug_print "lazy_resolver.resolve_model_group";
      let name = if fix_ns then self#fix_namespace name else name in
      try
        Hashtbl.find model_groups name
      with Not_found ->
        let node =
          try
            self#find_global_component ((=) "xsd:group") name
          with Not_found ->
            validation_error
765
              ("Can't find definition of model group: " ^ Utf8.get_str name)
766
767
768
769
770
        in
        let model_group = parse_model_group_def (self :> resolver) node in
        Hashtbl.replace model_groups name model_group;
        model_group

771
772
      (** acces to registered global entities *)

773
774
    method elt_decls = hashtbl_values (hashtbl_deref elts)
    method type_defs = hashtbl_values (hashtbl_deref typs)
775
    method att_decls = hashtbl_values attrs
776
777
    method att_groups = hashtbl_values attr_groups
    method model_groups = hashtbl_values model_groups
778
779
780

  end

781
782
783
  (** {2 module's interface implementation} *)

let schema_of_node root =
784
  let resolver = new lazy_resolver root in
785
  let resolver' = (resolver :> resolver) in
786
787
  root#iter_nodes (fun n ->
    if not (resolver#already_seen n) then
788
      match n#node_type with
789
      | T_element "xsd:element" ->
790
791
          let name = _attribute "name" n in
          resolver#register_elt name (parse_elt_decl resolver' n)
792
      | T_element "xsd:simpleType" ->
793
794
          let name = _attribute "name" n in
          resolver#register_typ name (Simple (parse_simple_type resolver' n))
795
      | T_element "xsd:complexType" ->
796
797
          let name = _attribute "name" n in
          resolver#register_typ name (Complex (parse_complex_type resolver' n))
798
      | T_element "xsd:attribute" ->
799
800
801
802
803
804
805
806
807
          let name = _attribute "name" n in
          resolver#register_att name (parse_att_decl resolver' n)
      | T_element "xsd:attributeGroup" ->
          let name = _attribute "name" n in
          resolver#register_att_group name (parse_att_group resolver' n)
      | T_element "xsd:group" ->
          let name = _attribute "name" n in
          resolver#register_model_group name (parse_model_group_def resolver' n)
      | _ -> ());
808
  {
809
    targetNamespace = resolver#targetNamespace;
810
811
812
813
814
    types = resolver#type_defs;
    attributes = resolver#att_decls;
    elements = resolver#elt_decls;
    attribute_groups = resolver#att_groups;
    model_groups = resolver#model_groups
815
816
  }

817
818
819
820
821
822
823
824
825
826
let parse_schema source =
  let config =
    { new_xsd_config () with Pxp_types.enable_namespace_info = true }
  in
  let schema = schema_of_node (pxp_node_of ~config source) in
  debug_print "parse_schema completed successfully";
  schema

let schema_of_file fname = parse_schema (Pxp_types.from_file fname)

827
828
829
let schema_of_string s = parse_schema (Pxp_types.from_string s)