typer.ml 75.7 KB
Newer Older
1
(* TODO:
2
 - rewrite type-checking of operators to propagate constraint
3
4
 - optimize computation of pattern free variables
 - check whether it is worth using recursive hash-consing internally
5
6
*)

7
8
9
open Location
open Ast
open Ident
10

11
12
13
14
15
16
let (=) (x:int) y = x = y
let (<=) (x:int) y = x <= y
let (<) (x:int) y = x < y
let (>=) (x:int) y = x >= y
let (>) (x:int) y = x > y

17
18
let debug_schema = false

19
let warning loc msg =
20
  let v = Location.get_viewport () in
21
  let ppf = if Html.is_html v then Html.ppf v else Format.err_formatter in
22
23
24
  Format.fprintf ppf "Warning %a:@\n" Location.print_loc (loc,`Full);
  Location.html_hilight (loc,`Full);
  Format.fprintf ppf "%s@." msg
25

26
27
28
29
30
31
32
33
34
exception NonExhaustive of Types.descr
exception Constraint of Types.descr * Types.descr
exception ShouldHave of Types.descr * string
exception ShouldHave2 of Types.descr * string * Types.descr
exception WrongLabel of Types.descr * label
exception UnboundId of id * bool
exception UnboundExtId of Types.CompUnit.t * id
exception Error of string

35
36
37

exception Warning of string * Types.t

38
39
40
41
let raise_loc loc exn = raise (Location (loc,`Full,exn))
let raise_loc_str loc ofs exn = raise (Location (loc,`Char ofs,exn))
let error loc msg = raise_loc loc (Error msg)

42
43
type item =
  | Type of Types.t
44
  | Val of Types.t
45

46
47
module UEnv = Map.Make(U)

48
type t = {
49
  ids : item Env.t;
50
  ns: Ns.table;
51
  cu: Types.CompUnit.t UEnv.t;
52
  schemas: string UEnv.t
53
}
54

55
56
57
58
59
let hash _ = failwith "Typer.hash"
let compare _ _ = failwith "Typer.compare"
let dump ppf _ = failwith "Typer.dump"
let equal _ _ = failwith "Typer.equal"
let check _ = failwith "Typer.check"
60
61

(* TODO: filter out builtin defs ? *)
62
63
64
65
let serialize_item s = function
  | Type t -> Serialize.Put.bits 1 s 0; Types.serialize s t
  | Val t -> Serialize.Put.bits 1 s 1; Types.serialize s t

66
let serialize s env =
67
  Serialize.Put.env Id.serialize serialize_item Env.iter s env.ids;
68
  Ns.serialize_table s env.ns
69

70
71
72
73
74
let deserialize_item s = match Serialize.Get.bits 1 s with
  | 0 -> Type (Types.deserialize s)
  | 1 -> Val (Types.deserialize s)
  | _ -> assert false

75
let deserialize s =
76
  let ids = Serialize.Get.env Id.deserialize deserialize_item Env.add Env.empty s in
77
  let ns = Ns.deserialize_table s in
78
  { ids = ids; ns = ns; cu = UEnv.empty; schemas = UEnv.empty }
79
80


81
82
let empty_env = {
  ids = Env.empty;
83
  ns = Ns.empty_table;
84
  cu = UEnv.empty;
85
  schemas = UEnv.empty
86
87
}

88
89
let from_comp_unit = ref (fun cu -> assert false)

90
let enter_cu x cu env =
91
  { env with cu = UEnv.add x cu env.cu }
92

93
94
95
let find_cu x env =
  try UEnv.find x env.cu
  with Not_found -> Types.CompUnit.mk x
96
97


98
99
100
101
102
103
let enter_schema x uri env =
  { env with schemas = UEnv.add x uri env.schemas }
let find_schema x env =
  try UEnv.find x env.schemas
  with Not_found -> raise (Error (Printf.sprintf "%s: no such schema" (U.get_str x)))

104
105
106
107
108
109
110
111
let enter_type id t env =
  { env with ids = Env.add id (Type t) env.ids }
let enter_types l env =
  { env with ids = 
      List.fold_left (fun accu (id,t) -> Env.add id (Type t) accu) env.ids l }
let find_type id env =
  match Env.find id env.ids with
    | Type t -> t
112
    | Val _ -> raise Not_found
113

114
let find_type_global loc cu id env =
115
  let cu = find_cu cu env in
116
117
118
  let env = !from_comp_unit cu in
  find_type id env

119
let enter_value id t env = 
120
  { env with ids = Env.add id (Val t) env.ids }
121
122
let enter_values l env =
  { env with ids = 
123
      List.fold_left (fun accu (id,t) -> Env.add id (Val t) accu) env.ids l }
124
125
126
let enter_values_dummy l env =
  { env with ids = 
      List.fold_left (fun accu id -> Env.add id (Val Types.empty) accu) env.ids l }
127
128
let find_value id env =
  match Env.find id env.ids with
129
    | Val t -> t
130
    | _ -> raise Not_found
131
132
133
let find_value_global cu id env =
  let env = !from_comp_unit cu in
  find_value id env
134
	
135
136
137
138
139
140
let value_name_ok id env =
  try match Env.find id env.ids with
    | Val t -> true
    | _ -> false
  with Not_found -> true

141
142
143
144
let iter_values env f =
  Env.iter (fun x ->
	      function Val t -> f x t;
		| _ -> ()) env.ids
145

146

147
148
149
150
151
152
153
154
155
let register_types cu env =
  let prefix = U.concat (Types.CompUnit.value cu) (U.mk ":") in
  Env.iter (fun x ->
	      function 
		| Type t ->
		    let n = U.concat prefix (Id.value x) in
		    Types.Print.register_global n t
		| _ -> ()) env.ids

156

157
(* Namespaces *)
158

159
let set_ns_table_for_printer env = 
160
  Ns.InternalPrinter.set_table env.ns
161

162
let get_ns_table tenv = tenv.ns
163

164
let enter_ns p ns env =
165
  { env with ns = Ns.add_prefix p ns env.ns }
166

167
168
169
170
171
let protect_error_ns loc f x =
  try f x
  with Ns.UnknownPrefix ns ->
    raise_loc_generic loc 
    ("Undefined namespace prefix " ^ (U.to_string ns))
172

173
174
175
let qname env loc t = 
  protect_error_ns loc (Ns.map_tag env.ns) t
    
176
let parse_atom env loc t =
177
  Atoms.V.of_qname (qname env loc t)
178
179
 
let parse_ns env loc ns =
180
  protect_error_ns loc (Ns.map_prefix env.ns) ns
181

182
let parse_label env loc t =
183
  let (ns,l) = protect_error_ns loc (Ns.map_attr env.ns) t in
184
  LabelPool.mk (ns,l)
185

186
187
188
189
190
191
192
193
194
195
196
197
198
let parse_record env loc f r =
  let r = List.map (fun (l,x) -> (parse_label env loc l, f x)) r in
  LabelMap.from_list (fun _ _ -> raise_loc_generic loc "Duplicated record field") r

let rec const env loc = function
  | LocatedExpr (loc,e) -> const env loc e
  | Pair (x,y) -> Types.Pair (const env loc x, const env loc y)
  | Xml (x,y) -> Types.Xml (const env loc x, const env loc y)
  | RecordLitt x -> Types.Record (parse_record env loc (const env loc) x)
  | String (i,j,s,c) -> Types.String (i,j,s,const env loc c)
  | Atom t -> Types.Atom (parse_atom env loc t)
  | Integer i -> Types.Integer i
  | Char c -> Types.Char c
199
  | Const c -> c
200
201
202
203
  | _ -> raise_loc_generic loc "This should be a scalar or structured constant"

(* I. Transform the abstract syntax of types and patterns into
      the internal form *)
204

205

206
(* Schema *)
207

208
209
210
let is_registered_schema env s = UEnv.mem s env.schemas

(* uri -> schema binding *)
211
let schemas = State.ref "Typer.schemas" (Hashtbl.create 3)
212
213
214

let schema_types = State.ref "Typer.schema_types" (Hashtbl.create 51)
let schema_elements = State.ref "Typer.schema_elements" (Hashtbl.create 51)
215
let schema_attributes = State.ref "Typer.schema_attributes" (Hashtbl.create 51)
216
217
218
219
let schema_attribute_groups =
  State.ref "Typer.schema_attribute_groups" (Hashtbl.create 51)
let schema_model_groups =
  State.ref "Typer.schema_model_groups" (Hashtbl.create 51)
220

221
222
223

let get_schema_fwd = ref (fun _ -> assert false)

224
let find_schema_descr_uri kind uri (name : Ns.qname) =
225
  try
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
    ignore (!get_schema_fwd uri);
    let elt () = Hashtbl.find !schema_elements (uri, name) in
    let typ () = Hashtbl.find !schema_types (uri, name) in
    let att () = Hashtbl.find !schema_attributes (uri, name) in
    let att_group () = Hashtbl.find !schema_attribute_groups (uri, name) in
    let mod_group () = Hashtbl.find !schema_model_groups (uri, name) in
    let rec do_try n = function
      | [] -> raise Not_found
      | f :: rem -> (try f () with Not_found -> do_try n rem)
    in
    match kind with
      | Some `Element -> do_try "element" [ elt ]
      | Some `Type -> do_try "type" [ typ ]
      | Some `Attribute -> do_try "atttribute" [ att ]
      | Some `Attribute_group -> do_try "attribute group" [ att_group ]
      | Some `Model_group -> do_try "model group" [ mod_group ]
      | None ->
          (* policy for unqualified schema component resolution. This order should
           * be consistent with Schema_component.get_component *)
          do_try "component" [ elt; typ; att; att_group; mod_group ]
    with Not_found ->    
247
      raise (Error (Printf.sprintf "No %s named '%s' found in schema '%s'"
248
		      (Schema_common.string_of_component_kind kind) (Ns.QName.to_string name) uri))
249
250
251
252
253

let find_schema_descr env kind schema name =
  let uri = find_schema schema env in
  find_schema_descr_uri kind uri name

254

255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
module IType = struct
  type node = {
    mutable desc: desc;
    mutable smallhash: int;  (* Local hash *)
    mutable rechash: int;    (* Global (recursive) hash *)
    mutable sid: int;        (* Sequential id used to compute rechash *)
    mutable t: Types.t option;
    mutable tnode: Types.Node.t option;
    mutable p: Patterns.descr option;
    mutable pnode: Patterns.node option;
    mutable fv: fv option
  } 
  and desc =
    | ILink of node
    | IType of Types.descr * int
    | IOr of node * node
    | IAnd of node * node
    | IDiff of node * node
    | ITimes of node * node
    | IXml of node * node
    | IArrow of node * node
    | IOptional of node
    | IRecord of bool * (node * node option) label_map
    | ICapture of id
    | IConstant of id * Types.const

  let rec node_temp = { 
    desc = ILink node_temp;
    smallhash = 0; rechash = 0; sid = 0;
    t = None; tnode = None; p = None; pnode = None;
    fv = None
  }
			

  let rec hash0 n = match n.desc with
    | ILink n -> hash0 n
    | IType (t,h) -> 1 + 17 * h
    | IOr _ -> 2
    | IAnd _ -> 3
    | IDiff _ -> 4
    | ITimes _ -> 5
    | IXml _ -> 6
    | IArrow _ -> 7
    | IOptional _ -> 8
    | IRecord _ -> 9
    | ICapture x -> 10 + 17*(Id.hash x)
    | IConstant (x,_) -> 11 + 17*(Id.hash x)

  let hash0_field = function
    | (p, Some e) -> 1 + 17 * hash0 p + 257 * hash0 e
    | (p, None) -> 2 + 17 * hash0 p

  let rec hash1 n = match n.desc with
    | ILink n -> hash1 n
    | IType (t,h) -> 1 + 17 * h
    | IOr (p1,p2) -> 2 + 17 * hash0 p1 + 257 * hash0 p2
    | IAnd (p1,p2) -> 3 + 17 * hash0 p1 + 257 * hash0 p2
    | IDiff (p1,p2) -> 4 + 17 * hash0 p1 + 257 * hash0 p2
    | ITimes (p1,p2) -> 5 + 17 * hash0 p1 + 257 * hash0 p2
    | IXml (p1,p2) -> 6 + 17 * hash0 p1 + 257 * hash0 p2
    | IArrow (p1,p2) -> 7 + 17 * hash0 p1 + 257 * hash0 p2
    | IOptional p -> 8 + 17 * hash0 p
    | IRecord (o,r)->9+(if o then 17 else 0)+257*(LabelMap.hash hash0_field r)
    | ICapture x -> 10 + 17 * (Id.hash x)
    | IConstant (x,c) -> 11 + 17 * (Id.hash x) + 257*(Types.Const.hash c)

  let smallhash n =
    if n.smallhash !=0 then n.smallhash
    else (let h = hash1 n in n.smallhash <- h; h)

  let rec repr = function
    | { desc = ILink n } -> repr n
    | n -> n

  let back = ref []

  let link x y = match x,y with
    | { t = None } as x, y 
    | y, ({ t = None } as x) -> back := (x,x.desc) :: !back; x.desc <- ILink y
    | _ -> assert false

  exception Unify

  let rec unify x y =
    if x == y then ()
    else let x = repr x and y = repr y in if x == y then ()
(*    else if (smallhash x != smallhash y) then raise Unify *)
    else match x.desc,y.desc with
      | IType (tx,_), IType (ty,_) when Types.equal tx ty ->
	  link x y
      | IOr (x1,x2), IOr (y1,y2)
      | IAnd (x1,x2), IAnd (y1,y2)
      | IDiff (x1,x2), IDiff (y1,y2)
      | ITimes (x1,x2), ITimes (y1,y2)
      | IXml (x1,x2), IXml (y1,y2)
      | IArrow (x1,x2), IArrow (y1,y2) ->
	  link x y; unify x1 y1; unify x2 y2
      | IOptional x1, IOptional y1 ->
	  link x y; unify x1 y1
      | IRecord (xo,xr), IRecord (yo,yr) when xo == yo ->
	  link x y; LabelMap.may_collide unify_field Unify xr yr
      | ICapture xv, ICapture yv when Id.equal xv yv -> ()
      | IConstant (xv,xc), IConstant (yv,yc) when
	  Id.equal xv yv && Types.Const.equal xc yc -> ()
      | _ -> raise Unify
  and unify_field f1 f2 = match f1,f2 with
    | (p1, Some e1), (p2, Some e2) -> unify p1 p2; unify e1 e2
    | (p1, None), (p2, None) -> unify p1 p2
    | _ -> raise Unify

  let may_unify x y =
    try unify x y; back := []; true
    with Unify -> 
      List.iter (fun (x,xd) -> x.desc <- xd) !back; back := []; false

  module SmallHash = Hashtbl.Make(
    struct 
      type t = node
      let equal = may_unify
      let hash = smallhash
    end
  )

  let iter_field f = function
    | (x, Some y) -> f x; f y
    | (x, None) -> f x
  let iter f = function
    | IOr (x,y) | IAnd (x,y) | IDiff (x,y)
    | ITimes (x,y) | IXml (x,y) | IArrow (x,y) -> f x; f y
    | IOptional x -> f x
    | IRecord (_,r) -> LabelMap.iter (iter_field f) r
    | _ -> ()

  let minimize ((mem,add) as h) =
    let rec aux n =
      let n = repr n in
      if mem n then () else (add n (); if n.t == None then iter aux n.desc)
    in aux

  let to_clear = ref []
  let sid = ref 0
  let rec rechash n =
    let n = repr n in
    if (n.sid != 0) then 17 * n.sid
    else begin incr sid; n.sid <- !sid; to_clear := n :: !to_clear; 
    match n.desc with
    | ILink _ -> assert false
    | IType (t,h) -> 1 + 17 * h
    | IOr (p1,p2) -> 2 + 17 * rechash p1 + 257 * rechash p2
    | IAnd (p1,p2) -> 3 + 17 * rechash p1 + 257 * rechash p2
    | IDiff (p1,p2) -> 4 + 17 * rechash p1 + 257 * rechash p2
    | ITimes (p1,p2) -> 5 + 17 * rechash p1 + 257 * rechash p2
    | IXml (p1,p2) -> 6 + 17 * rechash p1 + 257 * rechash p2
    | IArrow (p1,p2) -> 7 + 17 * rechash p1 + 257 * rechash p2
    | IOptional p -> 8 + 17 * rechash p
    | IRecord(o,r)->9+(if o then 17 else 0)+257*(LabelMap.hash rechash_field r)
    | ICapture x -> 10 + 17 * (Id.hash x)
    | IConstant (x,c) -> 11 + 17 * (Id.hash x) + 257*(Types.Const.hash c)
    end
  and rechash_field = function
    | (p, Some e) -> 1 + 17 * rechash p + 257 * rechash e
    | (p, None) -> 2 + 17 * rechash p

  let clear () =
    sid := 0; List.iter (fun x -> x.sid <- 0) !to_clear

  let rechash n =
    let n = repr n in
    if (n.rechash != 0) then n.rechash 
    else (let h = rechash n in clear (); n.rechash <- h; h)

  module RecHash = Hashtbl.Make(
    struct
      type t = node
      let equal = may_unify
      let hash = smallhash
    end
  )

  let gtable = RecHash.create 17577

  let internalize n =
    let local = SmallHash.create 67 in
    minimize (SmallHash.mem local, SmallHash.add local) n;
    minimize (RecHash.mem gtable, RecHash.add gtable) n

(* Compute free variables *)

  let fv n =
    let fv = ref IdSet.empty in
    let rec aux n =
      let n = repr n in
      if (n.sid = 0) then (
	n.sid <- 1;
	to_clear := n :: !to_clear;
	match n.fv, n.desc with
	  | Some x, _ -> fv := IdSet.cup !fv x
	  | None, (ICapture x | IConstant (x,_)) -> fv := IdSet.add x !fv
	  | None, d -> iter aux d
      )
    in
    match n.fv with
      | Some x -> x
      | None -> aux n; clear (); n.fv <- Some !fv; !fv

(* To the internal representation *)


  let rec typ n =
    let n = repr n in
    match n.t with
      | Some t -> t
      | None -> let t = compute_typ n.desc in n.t <- Some t; t
  and compute_typ = function
    | IType (t,_) -> t
    | IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
    | IAnd (s1,s2) ->  Types.cap (typ s1) (typ s2)
    | IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
    | ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
    | IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
    | IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
    | IOptional s -> Types.Record.or_absent (typ s)
    | IRecord (o,r) ->  Types.record' (o, LabelMap.map compute_typ_field r)
    | ILink _ -> assert false
    | ICapture _ | IConstant (_,_) -> assert false
  and compute_typ_field = function
    | (s, None) -> typ_node s
    | (s, Some _) -> 
	raise (Patterns.Error "Or-else clauses are not allowed in types")

  and typ_node n =
    match n.tnode with
      | Some t -> t
      | None ->
	  let x = Types.make () in
	  n.tnode <- Some x;
	  Types.define x (typ n);
	  x
      
  let rec pat n =
    let n = repr n in
    if IdSet.is_empty (fv n)
    then Patterns.constr (typ n)
    else match n.p with
      | Some p -> p
      | None -> let p = compute_pat n.desc in n.p <- Some p; p

  and compute_pat = function
    | IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
    | IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
    | IDiff (s1,s2) when IdSet.is_empty (fv s2) ->
	let s2 = Types.neg (typ s2) in
	Patterns.cap (pat s1) (Patterns.constr s2)
    | IDiff _ ->
	raise (Patterns.Error "Differences are not allowed in patterns")
    | ITimes (s1,s2) -> Patterns.times (pat_node s1) (pat_node s2)
    | IXml (s1,s2) -> Patterns.xml (pat_node s1) (pat_node s2)
    | IOptional _ -> 
	raise (Patterns.Error "Optional fields are not allowed in record patterns")
    | IRecord (o,r) ->
	let pats = ref [] in
	let aux l = function
	  | (s,None) ->
	      if IdSet.is_empty (fv s) then typ_node s
	      else
		( pats := Patterns.record l (pat_node s) :: !pats;
		  Types.any_node )
	  | (s,Some e) ->
	      if IdSet.is_empty (fv s) then
		raise (Patterns.Error "Or-else clauses are not allowed in types")
	      else
		( pats := Patterns.cup 
		    (Patterns.record l (pat_node s))
		    (pat e) :: !pats;
		  Types.Record.any_or_absent_node )
	in
	let constr = Types.record' (o,LabelMap.mapi aux r) in
	List.fold_left Patterns.cap (Patterns.constr constr) !pats
	  (* TODO: can avoid constr when o=true, and all fields have fv *)
    | ICapture x -> Patterns.capture x
    | IConstant (x,c) -> Patterns.constant x c
    | IArrow _ ->
	raise (Patterns.Error "Arrows are not allowed in patterns")
    | IType _ | ILink _ -> assert false
      
  and pat_node n =
    match n.pnode with
      | Some p -> p
      | None ->
	  let x = Patterns.make (fv n) in
	  try
	    n.pnode <- Some x;
	    Patterns.define x (pat n);
	    x
	  with exn -> n.pnode <- None; raise exn

(* From AST to the intermediate representation *)

  type penv = {
    penv_tenv : t;
    penv_derec : node Env.t;
  }

  let penv tenv = { penv_tenv = tenv; penv_derec = Env.empty }

  let mk d = { node_temp with desc = d }
  let mk_delayed () = { node_temp with desc = ILink node_temp }
  let itype t = mk (IType (t, Types.hash t))
  let iempty = itype Types.empty

  let ior p1 p2 =
    if p1 == iempty then p2 
    else if p2 == iempty then p1 
    else mk (IOr (p1,p2))

  let iand p1 p2 =
    if (p1 == iempty) || (p2 == iempty) then iempty 
    else mk (IAnd (p1,p2))

  type regexp =
    | PEpsilon
    | PElem of node
    | PGuard of node
    | PSeq of regexp * regexp
    | PAlt of regexp * regexp
    | PStar of regexp
    | PWeakStar of regexp

  let rec remove_regexp r q = match r with
    | PEpsilon ->
	q
    | PElem p ->
	mk (ITimes (p, q))
    | PGuard p ->
	iand p q
    | PSeq (r1,r2) ->
	remove_regexp r1 (remove_regexp r2 q)
    | PAlt (r1,r2) ->
	ior (remove_regexp r1 q) (remove_regexp r2 q)
    | PStar r ->
	let x = mk_delayed () in
	let res = ior x q in
	x.desc <- ILink (remove_regexp2 r res iempty);
	res
    | PWeakStar r ->
	let x = mk_delayed () in
	let res = ior q x in
	x.desc <- ILink (remove_regexp2 r res iempty);
	res
	  
  and remove_regexp2 r q_nonempty q_empty =
    if q_nonempty == q_empty then remove_regexp r q_empty
    else match r with
      | PEpsilon ->
          q_empty
      | PElem p ->
          mk (ITimes (p, q_nonempty))
      | PGuard p ->
	  iand p q_empty
      | PSeq (r1,r2) ->
          remove_regexp2 r1
            (remove_regexp2 r2 q_nonempty q_nonempty)
            (remove_regexp2 r2 q_nonempty q_empty)
      | PAlt (r1,r2) ->
          ior
            (remove_regexp2 r1 q_nonempty q_empty)
            (remove_regexp2 r2 q_nonempty q_empty)
      | PStar r ->
 	  let x = mk_delayed () in
          x.desc <- ILink (remove_regexp2 r (ior x q_nonempty) iempty);
          ior x q_empty
      | PWeakStar r ->
 	  let x = mk_delayed () in
          x.desc <- ILink (remove_regexp2 r (ior q_nonempty x) iempty);
          ior q_empty x


  let cst_nil = Types.Atom Sequence.nil_atom
  let capture_all vars p = 
    IdSet.fold (fun p x -> iand p (mk (ICapture x))) p vars
  let termin b vars p = 
    if b then p 
    else IdSet.fold 
      (fun p x -> PSeq (p, PGuard (mk (IConstant (x,cst_nil))))) p vars

  let rexp r = remove_regexp r (itype Sequence.nil_type)

  let rec derecurs env p = match p.descr with
    | PatVar v -> derecurs_var env p.loc v
    | SchemaVar (kind, schema_name, component_name) ->

	let name = qname env.penv_tenv  p.loc component_name in
	itype (find_schema_descr env.penv_tenv kind schema_name name)

    | Recurs (p,b) -> derecurs (derecurs_def env b) p
    | Internal t -> itype t
    | NsT ns -> 
	itype (Types.atom (Atoms.any_in_ns (parse_ns env.penv_tenv p.loc ns)))
    | Or (p1,p2) -> mk (IOr (derecurs env p1, derecurs env p2))
    | And (p1,p2) -> mk (IAnd (derecurs env p1, derecurs env p2))
    | Diff (p1,p2) -> mk (IDiff (derecurs env p1, derecurs env p2))
    | Prod (p1,p2) -> mk (ITimes (derecurs env p1, derecurs env p2))
    | XmlT (p1,p2) -> mk (IXml (derecurs env p1, derecurs env p2))
    | Arrow (p1,p2) -> mk (IArrow (derecurs env p1, derecurs env p2))
    | Optional p -> mk (IOptional (derecurs env p))
    | Record (o,r) -> 
	let aux = function
	  | (p,Some e) -> (derecurs env p, Some (derecurs env e))
	  | (p,None) -> derecurs env p, None in
	mk (IRecord (o, parse_record env.penv_tenv p.loc aux r))
    | Constant (x,c) -> mk (IConstant (x,const env.penv_tenv p.loc c))
    | Cst c -> itype (Types.constant (const env.penv_tenv p.loc c))
    | Regexp r ->
	let r,_ = derecurs_regexp IdSet.empty false IdSet.empty true env r in
	rexp r
	  
  and derecurs_regexp vars b rvars f env = function
      (* - vars: seq variables to be propagated top-down and added
	 to each captured element
	 - b: below a star ?
	 - rvars: seq variables that appear on the right of the regexp
	 - f: tail position
	 
	 returns the set of seq variable of the regexp minus rvars
	 (they have already been terminated if not below a star)
      *)
    | Epsilon -> 
	PEpsilon, IdSet.empty
    | Elem p -> 
	PElem (capture_all vars (derecurs env p)), IdSet.empty
    | Guard p ->
	PGuard (derecurs env p), IdSet.empty
    | Seq (p1,p2) -> 
	let (p2,v2) = derecurs_regexp vars b rvars f env p2 in
	let (p1,v1) = derecurs_regexp vars b (IdSet.cup rvars v2) false env p1 in
	PSeq (p1,p2), IdSet.cup v1 v2
    | Alt (p1,p2) -> 
	let (p1,v1) = derecurs_regexp vars b rvars f env p1
	and (p2,v2) = derecurs_regexp vars b rvars f env p2 in
	PAlt (termin b (IdSet.diff v2 v1) p1, termin b (IdSet.diff v1 v2) p2),
	IdSet.cup v1 v2
    | Star p -> 
	let (p,v) = derecurs_regexp vars true rvars false env p in
	termin b v (PStar p), v
    | WeakStar p -> 
	let (p,v) = derecurs_regexp vars true rvars false env p in
	termin b v (PWeakStar p), v
    | SeqCapture (x,p) -> 
	let vars = if f then vars else IdSet.add x vars in
	let after = IdSet.mem rvars x in
	let rvars = IdSet.add x rvars in
	let (p,v) = derecurs_regexp vars b rvars false env p in
	(if f 
	 then PSeq (PGuard (mk (ICapture x)), p) 
	 else termin (after || b) (IdSet.singleton x) p), 
	(if after then v else IdSet.add x v)
	  
	  
  and derecurs_var env loc v =
    match Ns.split_qname v with
      | "", v ->
	  let v = ident v in
	  (try Env.find v env.penv_derec
	   with Not_found -> 
	     try itype (find_type v env.penv_tenv)
	     with Not_found -> mk (ICapture v))
      | cu, v -> 
	  try 
	    let cu = U.mk cu in
	    itype (find_type_global loc cu (ident v) env.penv_tenv)
	  with Not_found ->
	    raise_loc_generic loc 
	      ("Unbound external type " ^ cu ^ ":" ^ (U.to_string v))
	      
  and derecurs_def env b =
    let b = List.map (fun (v,p) -> (v,p,mk_delayed ())) b in
    let n = 
      List.fold_left (fun env (v,p,s) -> Env.add v s env) env.penv_derec b in
    let env = { env with penv_derec = n } in
    List.iter (fun (v,p,s) -> s.desc <- ILink (derecurs env p)) b;
    env

  let check_no_capture loc s =
    match IdSet.pick s with
      | Some x ->  
	  raise_loc_generic loc 
	    ("Capture variable not allowed: " ^ (Ident.to_string x))
      | None -> ()

  let typ env t = 
    let d = derecurs (penv env) t in
    check_no_capture t.loc (fv d);
    typ_node d
  let pat env t = pat_node (derecurs (penv env) t)


  module Ids = Set.Make(Id)
  let type_defs env b =
    ignore 
      (List.fold_left 
	 (fun seen (v,p) ->
	    if Ids.mem v seen then 
	      raise_loc_generic p.loc 
		("Multiple definitions for the type identifer " ^ 
		   (Ident.to_string v));
	    Ids.add v seen
	 ) Ids.empty b);
    
    let penv = derecurs_def (penv env) b in
    let b = List.map (fun (v,p) -> (v,p,derecurs penv p)) b in
    let b = 
      List.map 
	(fun (v,p,s) -> 
	   check_no_capture p.loc (fv s);
	   let t = Types.descr (typ_node s) in
	   if (p.loc <> noloc) && (Types.is_empty t) then
	     warning p.loc 
	       ("This definition yields an empty type for " ^ (Ident.to_string v));
	   (v,t)) b in
    List.iter (fun (v,t) -> Types.Print.register_global (Id.value v) t) b;
    b

end

(*

781
782
(* Eliminate Recursion, propagate Sequence Capture Variables *)

783
784
785
786
787
788
789
790
791
792
793
794
795
796
(* We use two intermediate representation from AST types/patterns
   to internal ones:

      AST -(1)-> derecurs -(2)-> slot -(3)-> internal

   (1) eliminate recursion, schema, 
       propagate sequence capture variables, keep regexps

   (2) stratify, detect ill-formed recursion, compile regexps

   (3) check additional constraints on types / patterns;
       deep (recursive) hash-consing
*)     

797
798
799
800
type derecurs_slot = {
  ploc : Location.loc;
  pid  : int;
  mutable ploop : bool;
801
  mutable pdescr : derecurs;
802
} and derecurs =
803
  | PDummy
804
  | PAlias of derecurs_slot
805
  | PType of Types.descr * int
806
807
808
809
810
811
812
  | POr of derecurs * derecurs
  | PAnd of derecurs * derecurs
  | PDiff of derecurs * derecurs
  | PTimes of derecurs * derecurs
  | PXml of derecurs * derecurs
  | PArrow of derecurs * derecurs
  | POptional of derecurs
813
  | PRecord of bool * (derecurs * derecurs option) label_map
814
815
  | PCapture of id
  | PConstant of id * Types.const
816
  | PRegexp of derecurs_regexp
817
818
819
and derecurs_regexp =
  | PEpsilon
  | PElem of derecurs
820
  | PGuard of derecurs
821
822
823
824
825
  | PSeq of derecurs_regexp * derecurs_regexp
  | PAlt of derecurs_regexp * derecurs_regexp
  | PStar of derecurs_regexp
  | PWeakStar of derecurs_regexp

826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850

let rec print_derecurs ppf = function
  | PDummy -> Format.fprintf ppf "Dummy"
  | PAlias a -> Format.fprintf ppf "Alias %i" a.pid
  | PType _ -> Format.fprintf ppf "Type"
  | POr (r1,r2) -> Format.fprintf ppf "Or(%a,%a)" 
      print_derecurs r1 print_derecurs r2
  | PAnd (r1,r2) -> Format.fprintf ppf "And(%a,%a)" 
      print_derecurs r1 print_derecurs r2
  | PDiff (r1,r2) -> Format.fprintf ppf "Diff(%a,%a)" 
      print_derecurs r1 print_derecurs r2
  | PTimes (r1,r2) -> Format.fprintf ppf "Times(%a,%a)" 
      print_derecurs r1 print_derecurs r2
  | PXml (r1,r2) -> Format.fprintf ppf "Xml(%a,%a)" 
      print_derecurs r1 print_derecurs r2
  | PRegexp r -> Format.fprintf ppf "Regexp(%a)" print_regexp r
  | _ -> Format.fprintf ppf "Other"
and print_regexp ppf = function
  | PEpsilon -> Format.fprintf ppf "e"
  | PElem r -> Format.fprintf ppf "(%a)" print_derecurs r
  | PGuard r -> Format.fprintf ppf "/(%a)" print_derecurs r
  | PSeq (r1,r2) -> Format.fprintf ppf "%a,%a" print_regexp r1 print_regexp r2
  | PAlt (r1,r2) -> Format.fprintf ppf "%a|%a" print_regexp r1 print_regexp r2
  | PStar r | PWeakStar r -> Format.fprintf ppf "%a*" print_regexp r

851
852
type descr = 
  | IDummy
853
  | IType of Types.descr * int
854
855
856
857
858
859
860
  | IOr of descr * descr
  | IAnd of descr * descr
  | IDiff of descr * descr
  | ITimes of slot * slot
  | IXml of slot * slot
  | IArrow of slot * slot
  | IOptional of descr
861
  | IRecord of bool * (slot * descr option) label_map
862
863
864
865
866
867
868
869
  | ICapture of id
  | IConstant of id * Types.const
and slot = {
  mutable fv : fv option;
  mutable hash : int option;
  mutable rank1: int; mutable rank2: int;
  mutable gen1 : int; mutable gen2: int;
  mutable d    : descr;
870
}
871
872
873
874
875
876
877
878
879
880
881
882
883
    

let counter = ref 0
let mk_derecurs_slot loc = 
  incr counter; 
  { ploop = false; ploc = loc; pid = !counter; pdescr = PDummy }
	  
let mk_slot () = 
  { d=IDummy; fv=None; hash=None; rank1=0; rank2=0; gen1=0; gen2=0 } 


(* This environment is used in phase (1) to eliminate recursion *)
type penv = {
884
  penv_tenv : t;
885
886
887
888
  penv_derec : derecurs_slot Env.t;
}

let penv tenv = { penv_tenv = tenv; penv_derec = Env.empty }
889

890
let rec hash_derecurs = function
891
  | PDummy -> assert false
892
893
  | PAlias s -> 
      s.pid
894
895
  | PType (t,hash) -> 
      1 + 17 * hash
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
  | POr (p1,p2) -> 
      2 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PAnd (p1,p2) -> 
      3 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PDiff (p1,p2) -> 
      4 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PTimes (p1,p2) -> 
      5 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PXml (p1,p2) -> 
      6 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PArrow (p1,p2) -> 
      7 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | POptional p -> 
      8 + 17 * (hash_derecurs p)
  | PRecord (o,r) -> 
911
      (if o then 9 else 10) + 17 * (LabelMap.hash hash_derecurs_field r)
912
913
914
  | PCapture x -> 
      11 + 17 * (Id.hash x)
  | PConstant (x,c) -> 
915
      12 + 17 * (Id.hash x) + 257 * (Types.Const.hash c)
916
917
  | PRegexp p -> 
      13 + 17 * (hash_derecurs_regexp p)
918
919
920
and hash_derecurs_field = function
  | (p, Some e) -> 1 + 17 * hash_derecurs p + 257 * hash_derecurs e
  | (p, None) -> 2 + 17 * hash_derecurs p
921
and hash_derecurs_regexp = function
922
923
924
925
926
927
928
929
930
931
932
933
  | PEpsilon -> 
      1
  | PElem p -> 
      2 + 17 * (hash_derecurs p)
  | PSeq (p1,p2) -> 
      3 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PAlt (p1,p2) -> 
      4 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PStar p -> 
      5 + 17 * (hash_derecurs_regexp p)
  | PWeakStar p -> 
      6 + 17 * (hash_derecurs_regexp p)
934
935
  | PGuard p ->
      7 + 17 * (hash_derecurs p)
936
937

let rec equal_derecurs p1 p2 = (p1 == p2) || match p1,p2 with
938
939
  | PAlias s1, PAlias s2 -> 
      s1 == s2
940
941
  | PType (t1,h1), PType (t2,h2) -> 
      (h1 == h2) && (Types.equal t1 t2)
942
943
944
945
946
  | POr (p1,q1), POr (p2,q2)
  | PAnd (p1,q1), PAnd (p2,q2)
  | PDiff (p1,q1), PDiff (p2,q2)
  | PTimes (p1,q1), PTimes (p2,q2)
  | PXml (p1,q1), PXml (p2,q2)
947
948
949
950
951
  | PArrow (p1,q1), PArrow (p2,q2) -> 
      (equal_derecurs p1 p2) && (equal_derecurs q1 q2)
  | POptional p1, POptional p2 -> 
      equal_derecurs p1 p2
  | PRecord (o1,r1), PRecord (o2,r2) -> 
952
      (o1 == o2) && (LabelMap.equal equal_derecurs_field r1 r2)
953
954
955
  | PCapture x1, PCapture x2 -> 
      Id.equal x1 x2
  | PConstant (x1,c1), PConstant (x2,c2) -> 
956
      (Id.equal x1 x2) && (Types.Const.equal c1 c2)
957
958
  | PRegexp p1, PRegexp p2 -> 
      equal_derecurs_regexp p1 p2
959
  | _ -> false
960
961
962
963
and equal_derecurs_field r1 r2 = match (r1,r2) with
  | (p1,None),(p2,None) -> equal_derecurs p1 p2
  | (p1, Some e1), (p2, Some e2) -> equal_derecurs p1 p2 && equal_derecurs e1 e2
  | _ -> false
964
and equal_derecurs_regexp r1 r2 = match r1,r2 with
965
966
967
968
  | PEpsilon, PEpsilon -> 
      true
  | PElem p1, PElem p2 -> 
      equal_derecurs p1 p2
969
970
  | PGuard p1, PGuard p2 ->
      equal_derecurs p1 p2
971
  | PSeq (p1,q1), PSeq (p2,q2) 
972
973
  | PAlt (p1,q1), PAlt (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs_regexp q1 q2)
974
  | PStar p1, PStar p2
975
976
  | PWeakStar p1, PWeakStar p2 -> 
      equal_derecurs_regexp p1 p2
977
  | _ -> false
978

979
980
981
982
983
984
985
986
module DerecursTable = Hashtbl.Make(
  struct 
    type t = derecurs 
    let hash = hash_derecurs
    let equal = equal_derecurs
  end
)

987
988
989
990
let gen = ref 0
let rank = ref 0
	     
let rec hash_descr = function
991
  | IDummy -> assert false
992
  | IType (t,h) -> h
993
994
995
996
997
998
999
  | IOr (d1,d2) -> 1 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IAnd (d1,d2) -> 2 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IDiff (d1,d2) -> 3 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IOptional d -> 4 + 17 * (hash_descr d)
  | ITimes (s1,s2) -> 5 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IXml (s1,s2) -> 6 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IArrow (s1,s2) -> 7 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
1000
  | IRecord (o,r) -> (if o then 8 else 9) + 17 * (LabelMap.hash hash_descr_field r)
1001
  | ICapture x -> 10 + 17 * (Id.hash x)
1002
  | IConstant (x,y) -> 11 + 17 * (Id.hash x) + 257 * (Types.Const.hash y)
1003
1004
1005
and hash_descr_field = function
  | (d, Some e) -> 1 + 17 * hash_slot d + 257 * hash_descr e
  | (d, None) -> 2 + 17 * hash_slot d
1006
1007
1008
1009
1010
and hash_slot s =
  if s.gen1 = !gen then 13 * s.rank1
  else (
    incr rank;
    s.rank1 <- !rank; s.gen1 <- !gen;
1011
    hash_descr s.d
1012
1013
1014
1015
  )
    
let rec equal_descr d1 d2 = 
  match (d1,d2) with
1016
  | IType (x1,h1), IType (x2,h2) -> (h1 == h2) && (Types.equal x1 x2)
1017
1018
1019
1020
1021
1022
1023
  | IOr (x1,y1), IOr (x2,y2) 
  | IAnd (x1,y1), IAnd (x2,y2) 
  | IDiff (x1,y1), IDiff (x2,y2) -> (equal_descr x1 x2) && (equal_descr y1 y2)
  | IOptional x1, IOptional x2 -> equal_descr x1 x2
  | ITimes (x1,y1), ITimes (x2,y2) 
  | IXml (x1,y1), IXml (x2,y2) 
  | IArrow (x1,y1), IArrow (x2,y2) -> (equal_slot x1 x2) && (equal_slot y1 y2)
1024
  | IRecord (o1,r1), IRecord (o2,r2) -> 
1025
      (o1 == o2) && (LabelMap.equal equal_descr_field r1 r2)
1026
  | ICapture x1, ICapture x2 -> Id.equal x1 x2
1027
  | IConstant (x1,y1), IConstant (x2,y2) -> 
1028
      (Id.equal x1 x2) && (Types.Const.equal y1 y2)
1029
  | _ -> false
1030
1031
1032
1033
and equal_descr_field d1 d2 = match (d1,d2) with
  | (d1,None),(d2,None) -> equal_slot d1 d2
  | (d1, Some e1), (d2, Some e2) -> equal_slot d1 d2 && equal_descr e1 e2
  | _ -> false
1034
1035
1036
1037
1038
1039
1040
and equal_slot s1 s2 =
  ((s1.gen1 = !gen) && (s2.gen2 = !gen) && (s1.rank1 = s2.rank2))
  ||
  ((s1.gen1 <> !gen) && (s2.gen2 <> !gen) && (
     incr rank;
     s1.rank1 <- !rank; s1.gen1 <- !gen;
     s2.rank2 <- !rank; s2.gen2 <- !gen;
1041
     equal_descr s1.d s2.d
1042
1043
   ))
  
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
module SlotTable = Hashtbl.Make(
  struct
    type t = slot
	
    let hash s =
      match s.hash with
	| Some h -> h
	| None ->
	    incr gen; rank := 0; 
	    let h = hash_slot s in
	    s.hash <- Some h;
	    h
	      
    let equal s1 s2 = 
      (s1 == s2) || 
      (incr gen; rank := 0; 
       let e = equal_slot s1 s2 in
       (*     if e then Printf.eprintf "Recursive hash-consing: Equal\n";  *)
       e)
  end)

1065
let ptype t = PType (t, Types.hash t)
1066

1067
let pempty = ptype Types.empty
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124

let por p1 p2 =
  if p1 == pempty then p2 else
    if p2 == pempty then p1 else
      POr (p1,p2)

let pand p1 p2 =
  if (p1 == pempty) || (p2 == pempty) then pempty else PAnd (p1,p2)

let rec remove_regexp r q = match r with
  | PEpsilon ->
      q
  | PElem p ->
      PTimes (p, q)
  | PGuard p ->
      pand p q
  | PSeq (r1,r2) ->
      remove_regexp r1 (remove_regexp r2 q)
  | PAlt (r1,r2) ->
      por (remove_regexp r1 q) (remove_regexp r2 q)
  | PStar r ->
      let x = mk_derecurs_slot noloc in
      let res = POr (PAlias x, q) in
      x.pdescr <- remove_regexp2 r res pempty;
      res
  | PWeakStar r ->
      let x = mk_derecurs_slot noloc in
      let res = POr (q, PAlias x) in
      x.pdescr <- remove_regexp2 r res pempty;
      res

and remove_regexp2 r q_nonempty q_empty =
  if q_nonempty == q_empty then remove_regexp r q_empty
  else match r with
    | PEpsilon ->
        q_empty
    | PElem p ->
        PTimes (p, q_nonempty)
    | PGuard p ->
	pand p q_empty
    | PSeq (r1,r2) ->
        remove_regexp2 r1
        (remove_regexp2 r2 q_nonempty q_nonempty)
        (remove_regexp2 r2 q_nonempty q_empty)
    | PAlt (r1,r2) ->
        por
        (remove_regexp2 r1 q_nonempty q_empty)
        (remove_regexp2 r2 q_nonempty q_empty)
    | PStar r ->
        let x = mk_derecurs_slot noloc in
        x.pdescr <- remove_regexp2 r (POr (PAlias x, q_nonempty)) pempty;
        por (PAlias x) q_empty
    | PWeakStar r ->
        let x = mk_derecurs_slot noloc in
        x.pdescr <- remove_regexp2 r (POr (q_nonempty, PAlias x)) pempty;
        por q_empty (PAlias x)

1125
1126
1127
1128
1129
1130
let cst_nil = Types.Atom Sequence.nil_atom
let capture_all vars p = IdSet.fold (fun p x -> PAnd (p, PCapture x)) p vars
let termin b vars p = 
  if b then p 
  else IdSet.fold (fun p x -> PSeq (p, PGuard (PConstant (x,cst_nil)))) p vars

1131
let rec derecurs env p = match p.descr with
1132
  | PatVar v -> derecurs_var env p.loc v
1133
  | SchemaVar (kind, schema_name, component_name) ->
1134
      let name = qname env.penv_tenv  p.loc component_name in
1135
      ptype (find_schema_descr env.penv_tenv kind schema_name name)
1136
  | Recurs (p,b) -> derecurs (derecurs_def env b) p
1137
1138
  | Internal t -> ptype t
  | NsT ns -> ptype (Types.atom (Atoms.any_in_ns (parse_ns env.penv_tenv p.loc ns)))
1139
1140
1141
1142
1143
1144
1145
  | Or (p1,p2) -> POr (derecurs env p1, derecurs env p2)
  | And (p1,p2) -> PAnd (derecurs env p1, derecurs env p2)
  | Diff (p1,p2) -> PDiff (derecurs env p1, derecurs env p2)
  | Prod (p1,p2) -> PTimes (derecurs env p1, derecurs env p2)
  | XmlT (p1,p2) -> PXml (derecurs env p1, derecurs env p2)
  | Arrow (p1,p2) -> PArrow (derecurs env p1, derecurs env p2)
  | Optional p -> POptional (derecurs env p)
1146
1147
1148
1149
1150
  | Record (o,r) -> 
      let aux = function
	| (p,Some e) -> (derecurs env p, Some (derecurs env e))
	| (p,None) -> derecurs env p, None in
      PRecord (o, parse_record env.penv_tenv p.loc aux r)
1151
  | Constant (x,c) -> PConstant (x,const env.penv_tenv p.loc c)
1152
  | Cst c -> ptype (Types.constant (const env.penv_tenv p.loc c))
1153
1154
1155
  | Regexp r ->
      let r,_ = derecurs_regexp IdSet.empty false IdSet.empty true env r in
      PRegexp r
1156
1157
1158
	(* Note: computing remove_regexp here is slower (because
	   of caching ?) *)

1159
and derecurs_regexp vars b rvars f env = function
1160
1161
1162
1163
(* - vars: seq variables to be propagated top-down and added
     to each captured element
   - b: below a star ?
   - rvars: seq variables that appear on the right of the regexp
1164
   - f: tail position
1165
1166

  returns the set of seq variable of the regexp minus rvars
1167
  (they have already been terminated if not below a star)
1168
*)
1169
  | Epsilon -> 
1170
      PEpsilon, IdSet.empty
1171
  | Elem p -> 
1172
      PElem (capture_all vars (derecurs env p)), IdSet.empty
1173
  | Guard p ->
1174
      PGuard (derecurs env p), IdSet.empty
1175
  | Seq (p1,p2) -> 
1176
1177
      let (p2,v2) = derecurs_regexp vars b rvars f env p2 in
      let (p1,v1) = derecurs_regexp vars b (IdSet.cup rvars v2) false env p1 in
1178
      PSeq (p1,p2), IdSet.cup v1 v2
1179
  | Alt (p1,p2) -> 
1180
1181
      let (p1,v1) = derecurs_regexp vars b rvars f env p1
      and (p2,v2) = derecurs_regexp vars b rvars f env p2 in
1182
1183
      PAlt (termin b (IdSet.diff v2 v1) p1, termin b (IdSet.diff v1 v2) p2),
      IdSet.cup v1 v2
1184
  | Star p -> 
1185
      let (p,v) = derecurs_regexp vars true rvars false env p in
1186
      termin b v (PStar p), v
1187
  | WeakStar p -> 
1188
      let (p,v) = derecurs_regexp vars true rvars false env p in
1189
      termin b v (PWeakStar p), v
1190
  | SeqCapture (x,p) -> 
1191
      let vars = if f then vars else IdSet.add x vars in
1192
1193
      let after = IdSet.mem rvars x in
      let rvars = IdSet.add x rvars in
1194
1195
1196
1197
      let (p,v) = derecurs_regexp vars b rvars false env p in
      (if f 
       then PSeq (PGuard (PCapture x), p) 
       else termin (after || b) (IdSet.singleton x) p), 
1198
1199
      (if after then v else IdSet.add x v)

1200

1201
1202
1203
1204
1205
1206
and derecurs_var env loc v =
  match Ns.split_qname v with
    | "", v ->
	let v = ident v in
	(try PAlias (Env.find v env.penv_derec)
	 with Not_found -> 
1207
	   try ptype (find_type v env.penv_tenv)
1208
1209
1210
1211
	   with Not_found -> PCapture v)
    | cu, v -> 
	try 
	  let cu = U.mk cu in
1212
	  ptype (find_type_global loc cu (ident v) env.penv_tenv)
1213
1214
1215
1216
	with Not_found ->
	  raise_loc_generic loc 
	  ("Unbound external type " ^ cu ^ ":" ^ (U.to_string v))

1217
1218
1219
1220
1221
1222
1223
1224
and derecurs_def env b =
  let b = List.map (fun (v,p) -> (v,p,mk_derecurs_slot p.loc)) b in
  let n = 
    List.fold_left (fun env (v,p,s) -> Env.add v s env) env.penv_derec b in
  let env = { env with penv_derec = n } in
  List.iter (fun (v,p,s) -> s.pdescr <- derecurs env p) b;
  env

1225

1226
1227
1228
1229
1230
let rec fv_slot s =
  match s.fv with
    | Some x -> x
    | None ->
	if s.gen1 = !gen then IdSet.empty 
1231
	else (s.gen1 <- !gen; fv_descr s.d)
1232
and fv_descr = function
1233
  | IDummy -> assert false
1234
  | IType _ -> IdSet.empty
1235
1236
1237
1238
1239
1240
1241
  | IOr (d1,d2)
  | IAnd (d1,d2)  
  | IDiff (d1,d2) -> IdSet.cup (fv_descr d1) (fv_descr d2)
  | IOptional d -> fv_descr d
  | ITimes (s1,s2)  
  | IXml (s1,s2)  
  | IArrow (s1,s2) -> IdSet.cup (fv_slot s1) (fv_slot s2)
1242
  | IRecord (o,r) -> 
1243
      List.fold_left IdSet.cup IdSet.empty (LabelMap.map_to_list fv_field r)
1244
  | ICapture x | IConstant (x,_) -> IdSet.singleton x
1245
1246
1247
1248
and fv_field = function
  | (d,Some e) -> IdSet.cup (fv_slot d) (fv_descr e)
  | (d,None) -> fv_slot d

1249

1250
1251
1252
1253
1254
1255
1256
1257
let compute_fv s =
  match s.fv with
    | Some x -> ()
    | None ->
	incr gen;
	let x = fv_slot s in
	s.fv <- Some x
	  
1258
1259
1260
let check_no_capture loc s =
  match IdSet.pick s with
    | Some x ->  
1261
	raise_loc_generic loc ("Capture variable not allowed: " ^ (Ident.to_string x))
1262
    | None -> ()
1263
    
1264
1265
let compile_slot_hash = DerecursTable.create 15067
let compile_hash = DerecursTable.create 15067
1266

1267
1268
let todo_defs = ref []
let todo_fv = ref []
1269
1270

let rec compile p =
1271
1272
1273
1274
  real_compile p
(*
  print_char '*'; flush stdout;
  try Stats.InOut.wrap "lookup" (DerecursTable.find compile_hash) p; 
1275
  with Not_found ->
1276
    Stats.InOut.enter "compile";
1277
1278
    let c = real_compile p in
    DerecursTable.replace compile_hash p c;
1279
    Stats.InOut.leave "compile";
1280
    c
1281
*)
1282
and real_compile = function
1283
  | PDummy -> assert false
1284
1285
1286
1287
  | PAlias v ->
      if v.ploop then
	raise_loc_generic v.ploc ("Unguarded recursion on type/pattern");
      v.ploop <- true;
1288
      let r = compile v.pdescr in
1289
1290
      v.ploop <- false;
      r
1291
  | PType (t,h) -> IType (t,h)
1292
1293
1294
1295
1296
1297
1298
  | POr (t1,t2) -> IOr (compile t1, compile t2)
  | PAnd (t1,t2) -> IAnd (compile t1, compile t2)
  | PDiff (t1,t2) -> IDiff (compile t1, compile t2)
  | PTimes (t1,t2) -> ITimes (compile_slot t1, compile_slot t2)
  | PXml (t1,t2) -> IXml (compile_slot t1, compile_slot t2)
  | PArrow (t1,t2) -> IArrow (compile_slot t1, compile_slot t2)
  | POptional t -> IOptional (compile t)
1299
  | PRecord (o,r) ->  IRecord (o, LabelMap.map compile_field r)
1300
1301
  | PConstant (x,v) -> IConstant (x,v)
  | PCapture x -> ICapture x
1302
  | PRegexp r -> compile (remove_regexp r (ptype Sequence.nil_type))
1303

1304
1305
1306
1307
and compile_field = function
  | (p, Some e) -> (compile_slot p, Some (compile e))
  | (p, None) -> (compile_slot p, None)

1308
1309
and compile_slot p =
  try DerecursTable.find compile_slot_hash p
1310
  with Not_found ->
1311
1312
1313
    let s = mk_slot () in
    todo_defs := (s,p) :: !todo_defs;
    todo_fv := s :: !todo_fv;
1314
    DerecursTable.add compile_slot_hash p s;
1315
    s
1316

1317
      
1318
let timer_fv = Stats.Timer.create "Typer.fv"
1319
let rec flush_defs () = 
1320
1321
1322
1323
  match !todo_defs with
    | [] -> 
	Stats.Timer.start timer_fv;
	List.iter compute_fv !todo_fv;
1324
1325
	todo_fv := [];
	Stats.Timer.stop timer_fv ()
1326
1327
1328
    | (s,p)::t ->
(*	Format.fprintf Format.std_formatter "flush slot:%a@."
	  print_derecurs p; *)
1329
1330
1331
	todo_defs := t; 
	s.d <- compile p; 
	flush_defs ()
1332
1333
1334
1335
1336
	
let typ_nodes = SlotTable.create 67
let pat_nodes = SlotTable.create 67
		  
let rec typ = function
1337
  | IType (t,_) -> t
1338
1339
1340
1341
1342
1343
1344
  | IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
  | IAnd (s1,s2) ->  Types.cap (typ s1) (typ s2)
  | IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
  | ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
  | IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
  | IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
  | IOptional s -> Types.Record.or_absent (typ s)
1345
  | IRecord (o,r) ->  Types.record' (o, LabelMap.map typ_field r)
1346
  | IDummy | ICapture _ | IConstant (_,_) -> assert false
1347
      
1348
1349
1350
1351
1352
and typ_field = function
  | (s, None) -> typ_node s
  | (s, Some _) -> 
      raise (Patterns.Error "Or-else clauses are not allowed in types")

1353
and typ_node s : Types.Node.t =
1354
1355
1356
1357
  try SlotTable.find typ_nodes s
  with Not_found ->
    let x = Types.make () in
    SlotTable.add typ_nodes s x;
1358
    Types.define x (typ s.d);
1359
1360
1361
1362
1363
1364
1365
1366
    x
      
let rec pat d : Patterns.descr =
  if IdSet.is_empty (fv_descr d)
  then Patterns.constr (typ d)
  else pat_aux d
    
and pat_aux = function
1367
  | IDummy -> assert false
1368
1369
1370
1371
1372
1373
  | IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
  | IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
  | IDiff (s1,s2) when IdSet.is_empty (fv_descr s2) ->
      let s2 = Types.neg (typ s2) in
      Patterns.cap (pat s1) (Patterns.constr s2)
  | IDiff _ ->
1374
      raise (Patterns.Error "Differences are not allowed in patterns")