typer.ml 56.4 KB
Newer Older
1
(* TODO:
2
 - rewrite type-checking of operators to propagate constraint
3
4
 - optimize computation of pattern free variables
 - check whether it is worth using recursive hash-consing internally
5
6
*)

7
8
9
open Location
open Ast
open Ident
10

11
12
let debug_schema = false

13
let warning loc msg =
14
15
16
17
18
  let v = Location.get_viewport () in
  let ppf = Html.ppf v in
  Format.fprintf ppf "Warning %a:@\n" Location.print_loc (loc,`Full);
  Location.html_hilight (loc,`Full);
  Format.fprintf ppf "%s@." msg
19

20
21
22
23
24
25
26
27
28
exception NonExhaustive of Types.descr
exception Constraint of Types.descr * Types.descr
exception ShouldHave of Types.descr * string
exception ShouldHave2 of Types.descr * string * Types.descr
exception WrongLabel of Types.descr * label
exception UnboundId of id * bool
exception UnboundExtId of Types.CompUnit.t * id
exception Error of string

29
30
31

exception Warning of string * Types.t

32
33
34
35
let raise_loc loc exn = raise (Location (loc,`Full,exn))
let raise_loc_str loc ofs exn = raise (Location (loc,`Char ofs,exn))
let error loc msg = raise_loc loc (Error msg)

36
37
type item =
  | Type of Types.t
38
  | Val of Types.t
39

40
41
module UEnv = Map.Make(U)

42
type t = {
43
  ids : item Env.t;
44
  ns: Ns.table;
45
  cu: Types.CompUnit.t UEnv.t;
46
  schemas: string UEnv.t
47
}
48

49
50
51
52
53
let hash _ = failwith "Typer.hash"
let compare _ _ = failwith "Typer.compare"
let dump ppf _ = failwith "Typer.dump"
let equal _ _ = failwith "Typer.equal"
let check _ = failwith "Typer.check"
54
55

(* TODO: filter out builtin defs ? *)
56
57
58
59
let serialize_item s = function
  | Type t -> Serialize.Put.bits 1 s 0; Types.serialize s t
  | Val t -> Serialize.Put.bits 1 s 1; Types.serialize s t

60
let serialize s env =
61
  Serialize.Put.env Id.serialize serialize_item Env.iter s env.ids;
62
  Ns.serialize_table s env.ns
63

64
65
66
67
68
let deserialize_item s = match Serialize.Get.bits 1 s with
  | 0 -> Type (Types.deserialize s)
  | 1 -> Val (Types.deserialize s)
  | _ -> assert false

69
let deserialize s =
70
  let ids = Serialize.Get.env Id.deserialize deserialize_item Env.add Env.empty s in
71
  let ns = Ns.deserialize_table s in
72
  { ids = ids; ns = ns; cu = UEnv.empty; schemas = UEnv.empty }
73
74


75
76
let empty_env = {
  ids = Env.empty;
77
  ns = Ns.empty_table;
78
  cu = UEnv.empty;
79
  schemas = UEnv.empty
80
81
}

82
83
let from_comp_unit = ref (fun cu -> assert false)

84
let enter_cu x cu env =
85
  { env with cu = UEnv.add x cu env.cu }
86

87
88
89
let find_cu x env =
  try UEnv.find x env.cu
  with Not_found -> Types.CompUnit.mk x
90
91


92
93
94
95
96
97
let enter_schema x uri env =
  { env with schemas = UEnv.add x uri env.schemas }
let find_schema x env =
  try UEnv.find x env.schemas
  with Not_found -> raise (Error (Printf.sprintf "%s: no such schema" (U.get_str x)))

98
99
100
101
102
103
104
105
let enter_type id t env =
  { env with ids = Env.add id (Type t) env.ids }
let enter_types l env =
  { env with ids = 
      List.fold_left (fun accu (id,t) -> Env.add id (Type t) accu) env.ids l }
let find_type id env =
  match Env.find id env.ids with
    | Type t -> t
106
    | Val _ -> raise Not_found
107

108
let find_type_global loc cu id env =
109
  let cu = find_cu cu env in
110
111
112
  let env = !from_comp_unit cu in
  find_type id env

113
let enter_value id t env = 
114
  { env with ids = Env.add id (Val t) env.ids }
115
116
let enter_values l env =
  { env with ids = 
117
      List.fold_left (fun accu (id,t) -> Env.add id (Val t) accu) env.ids l }
118
119
120
let enter_values_dummy l env =
  { env with ids = 
      List.fold_left (fun accu id -> Env.add id (Val Types.empty) accu) env.ids l }
121
122
let find_value id env =
  match Env.find id env.ids with
123
    | Val t -> t
124
    | _ -> raise Not_found
125
126
127
let find_value_global cu id env =
  let env = !from_comp_unit cu in
  find_value id env
128
	
129
130
131
132
133
134
let value_name_ok id env =
  try match Env.find id env.ids with
    | Val t -> true
    | _ -> false
  with Not_found -> true

135
136
137
138
let iter_values env f =
  Env.iter (fun x ->
	      function Val t -> f x t;
		| _ -> ()) env.ids
139

140

141
142
143
144
145
146
147
148
149
let register_types cu env =
  let prefix = U.concat (Types.CompUnit.value cu) (U.mk ":") in
  Env.iter (fun x ->
	      function 
		| Type t ->
		    let n = U.concat prefix (Id.value x) in
		    Types.Print.register_global n t
		| _ -> ()) env.ids

150

151
(* Namespaces *)
152

153
let set_ns_table_for_printer env = 
154
  Ns.InternalPrinter.set_table env.ns
155

156
let get_ns_table tenv = tenv.ns
157

158
let enter_ns p ns env =
159
  { env with ns = Ns.add_prefix p ns env.ns }
160

161
162
163
164
165
let protect_error_ns loc f x =
  try f x
  with Ns.UnknownPrefix ns ->
    raise_loc_generic loc 
    ("Undefined namespace prefix " ^ (U.to_string ns))
166

167
let parse_atom env loc t =
168
  let (ns,l) = protect_error_ns loc (Ns.map_tag env.ns) t in
169
170
171
  Atoms.V.mk ns l
 
let parse_ns env loc ns =
172
  protect_error_ns loc (Ns.map_prefix env.ns) ns
173

174
let parse_label env loc t =
175
  let (ns,l) = protect_error_ns loc (Ns.map_attr env.ns) t in
176
  LabelPool.mk (ns,l)
177

178
179
180
181
182
183
184
185
186
187
188
189
190
let parse_record env loc f r =
  let r = List.map (fun (l,x) -> (parse_label env loc l, f x)) r in
  LabelMap.from_list (fun _ _ -> raise_loc_generic loc "Duplicated record field") r

let rec const env loc = function
  | LocatedExpr (loc,e) -> const env loc e
  | Pair (x,y) -> Types.Pair (const env loc x, const env loc y)
  | Xml (x,y) -> Types.Xml (const env loc x, const env loc y)
  | RecordLitt x -> Types.Record (parse_record env loc (const env loc) x)
  | String (i,j,s,c) -> Types.String (i,j,s,const env loc c)
  | Atom t -> Types.Atom (parse_atom env loc t)
  | Integer i -> Types.Integer i
  | Char c -> Types.Char c
191
  | Const c -> c
192
193
194
195
  | _ -> raise_loc_generic loc "This should be a scalar or structured constant"

(* I. Transform the abstract syntax of types and patterns into
      the internal form *)
196

197

198
(* Schema *)
199

200
201
202
let is_registered_schema env s = UEnv.mem s env.schemas

(* uri -> schema binding *)
203
let schemas = State.ref "Typer.schemas" (Hashtbl.create 3)
204
205
206

let schema_types = State.ref "Typer.schema_types" (Hashtbl.create 51)
let schema_elements = State.ref "Typer.schema_elements" (Hashtbl.create 51)
207
let schema_attributes = State.ref "Typer.schema_attributes" (Hashtbl.create 51)
208
209
210
211
let schema_attribute_groups =
  State.ref "Typer.schema_attribute_groups" (Hashtbl.create 51)
let schema_model_groups =
  State.ref "Typer.schema_model_groups" (Hashtbl.create 51)
212

213
214


215
216
  (* raise Not_found *)

217
218
219
220

let get_schema_fwd = ref (fun _ -> assert false)

let find_schema_descr_uri kind uri name =
221
  try
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
    ignore (!get_schema_fwd uri);
    let elt () = Hashtbl.find !schema_elements (uri, name) in
    let typ () = Hashtbl.find !schema_types (uri, name) in
    let att () = Hashtbl.find !schema_attributes (uri, name) in
    let att_group () = Hashtbl.find !schema_attribute_groups (uri, name) in
    let mod_group () = Hashtbl.find !schema_model_groups (uri, name) in
    let rec do_try n = function
      | [] -> raise Not_found
      | f :: rem -> (try f () with Not_found -> do_try n rem)
    in
    match kind with
      | Some `Element -> do_try "element" [ elt ]
      | Some `Type -> do_try "type" [ typ ]
      | Some `Attribute -> do_try "atttribute" [ att ]
      | Some `Attribute_group -> do_try "attribute group" [ att_group ]
      | Some `Model_group -> do_try "model group" [ mod_group ]
      | None ->
          (* policy for unqualified schema component resolution. This order should
           * be consistent with Schema_component.get_component *)
          do_try "component" [ elt; typ; att; att_group; mod_group ]
    with Not_found ->    
243
      raise (Error (Printf.sprintf "No %s named '%s' found in schema '%s'"
244
245
246
247
248
249
		      (Schema_common.string_of_component_kind kind) (U.get_str name) uri))

let find_schema_descr env kind schema name =
  let uri = find_schema schema env in
  find_schema_descr_uri kind uri name

250

251
252
(* Eliminate Recursion, propagate Sequence Capture Variables *)

253
254
255
256
257
258
259
260
261
262
263
264
265
266
(* We use two intermediate representation from AST types/patterns
   to internal ones:

      AST -(1)-> derecurs -(2)-> slot -(3)-> internal

   (1) eliminate recursion, schema, 
       propagate sequence capture variables, keep regexps

   (2) stratify, detect ill-formed recursion, compile regexps

   (3) check additional constraints on types / patterns;
       deep (recursive) hash-consing
*)     

267
268
269
270
type derecurs_slot = {
  ploc : Location.loc;
  pid  : int;
  mutable ploop : bool;
271
  mutable pdescr : derecurs;
272
} and derecurs =
273
  | PDummy
274
275
276
277
278
279
280
281
282
  | PAlias of derecurs_slot
  | PType of Types.descr
  | POr of derecurs * derecurs
  | PAnd of derecurs * derecurs
  | PDiff of derecurs * derecurs
  | PTimes of derecurs * derecurs
  | PXml of derecurs * derecurs
  | PArrow of derecurs * derecurs
  | POptional of derecurs
283
  | PRecord of bool * (derecurs * derecurs option) label_map
284
285
  | PCapture of id
  | PConstant of id * Types.const
286
  | PRegexp of derecurs_regexp
287
288
289
and derecurs_regexp =
  | PEpsilon
  | PElem of derecurs
290
  | PGuard of derecurs
291
292
293
294
295
  | PSeq of derecurs_regexp * derecurs_regexp
  | PAlt of derecurs_regexp * derecurs_regexp
  | PStar of derecurs_regexp
  | PWeakStar of derecurs_regexp

296
297
298
299
300
301
302
303
304
305
type descr = 
  | IDummy
  | IType of Types.descr
  | IOr of descr * descr
  | IAnd of descr * descr
  | IDiff of descr * descr
  | ITimes of slot * slot
  | IXml of slot * slot
  | IArrow of slot * slot
  | IOptional of descr
306
  | IRecord of bool * (slot * descr option) label_map
307
308
309
310
311
312
313
314
  | ICapture of id
  | IConstant of id * Types.const
and slot = {
  mutable fv : fv option;
  mutable hash : int option;
  mutable rank1: int; mutable rank2: int;
  mutable gen1 : int; mutable gen2: int;
  mutable d    : descr;
315
}
316
317
318
319
320
321
322
323
324
325
326
327
328
    

let counter = ref 0
let mk_derecurs_slot loc = 
  incr counter; 
  { ploop = false; ploc = loc; pid = !counter; pdescr = PDummy }
	  
let mk_slot () = 
  { d=IDummy; fv=None; hash=None; rank1=0; rank2=0; gen1=0; gen2=0 } 


(* This environment is used in phase (1) to eliminate recursion *)
type penv = {
329
  penv_tenv : t;
330
331
332
333
  penv_derec : derecurs_slot Env.t;
}

let penv tenv = { penv_tenv = tenv; penv_derec = Env.empty }
334

335
let rec hash_derecurs = function
336
  | PDummy -> assert false
337
338
339
  | PAlias s -> 
      s.pid
  | PType t -> 
340
      1 + 17 * (Types.hash t)
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
  | POr (p1,p2) -> 
      2 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PAnd (p1,p2) -> 
      3 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PDiff (p1,p2) -> 
      4 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PTimes (p1,p2) -> 
      5 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PXml (p1,p2) -> 
      6 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PArrow (p1,p2) -> 
      7 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | POptional p -> 
      8 + 17 * (hash_derecurs p)
  | PRecord (o,r) -> 
356
      (if o then 9 else 10) + 17 * (LabelMap.hash hash_derecurs_field r)
357
358
359
  | PCapture x -> 
      11 + 17 * (Id.hash x)
  | PConstant (x,c) -> 
360
      12 + 17 * (Id.hash x) + 257 * (Types.Const.hash c)
361
362
  | PRegexp p -> 
      13 + 17 * (hash_derecurs_regexp p)
363
364
365
and hash_derecurs_field = function
  | (p, Some e) -> 1 + 17 * hash_derecurs p + 257 * hash_derecurs e
  | (p, None) -> 2 + 17 * hash_derecurs p
366
and hash_derecurs_regexp = function
367
368
369
370
371
372
373
374
375
376
377
378
  | PEpsilon -> 
      1
  | PElem p -> 
      2 + 17 * (hash_derecurs p)
  | PSeq (p1,p2) -> 
      3 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PAlt (p1,p2) -> 
      4 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PStar p -> 
      5 + 17 * (hash_derecurs_regexp p)
  | PWeakStar p -> 
      6 + 17 * (hash_derecurs_regexp p)
379
380
  | PGuard p ->
      7 + 17 * (hash_derecurs p)
381
382

let rec equal_derecurs p1 p2 = (p1 == p2) || match p1,p2 with
383
384
385
  | PAlias s1, PAlias s2 -> 
      s1 == s2
  | PType t1, PType t2 -> 
386
      Types.equal t1 t2
387
388
389
390
391
  | POr (p1,q1), POr (p2,q2)
  | PAnd (p1,q1), PAnd (p2,q2)
  | PDiff (p1,q1), PDiff (p2,q2)
  | PTimes (p1,q1), PTimes (p2,q2)
  | PXml (p1,q1), PXml (p2,q2)
392
393
394
395
396
  | PArrow (p1,q1), PArrow (p2,q2) -> 
      (equal_derecurs p1 p2) && (equal_derecurs q1 q2)
  | POptional p1, POptional p2 -> 
      equal_derecurs p1 p2
  | PRecord (o1,r1), PRecord (o2,r2) -> 
397
      (o1 == o2) && (LabelMap.equal equal_derecurs_field r1 r2)
398
399
400
  | PCapture x1, PCapture x2 -> 
      Id.equal x1 x2
  | PConstant (x1,c1), PConstant (x2,c2) -> 
401
      (Id.equal x1 x2) && (Types.Const.equal c1 c2)
402
403
  | PRegexp p1, PRegexp p2 -> 
      equal_derecurs_regexp p1 p2
404
  | _ -> false
405
406
407
408
and equal_derecurs_field r1 r2 = match (r1,r2) with
  | (p1,None),(p2,None) -> equal_derecurs p1 p2
  | (p1, Some e1), (p2, Some e2) -> equal_derecurs p1 p2 && equal_derecurs e1 e2
  | _ -> false
409
and equal_derecurs_regexp r1 r2 = match r1,r2 with
410
411
412
413
  | PEpsilon, PEpsilon -> 
      true
  | PElem p1, PElem p2 -> 
      equal_derecurs p1 p2
414
415
  | PGuard p1, PGuard p2 ->
      equal_derecurs p1 p2
416
  | PSeq (p1,q1), PSeq (p2,q2) 
417
418
  | PAlt (p1,q1), PAlt (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs_regexp q1 q2)
419
  | PStar p1, PStar p2
420
421
  | PWeakStar p1, PWeakStar p2 -> 
      equal_derecurs_regexp p1 p2
422
  | _ -> false
423

424
425
426
427
428
429
430
431
module DerecursTable = Hashtbl.Make(
  struct 
    type t = derecurs 
    let hash = hash_derecurs
    let equal = equal_derecurs
  end
)

432
433
434
435
let gen = ref 0
let rank = ref 0
	     
let rec hash_descr = function
436
  | IDummy -> assert false
437
  | IType x -> Types.hash x
438
439
440
441
442
443
444
  | IOr (d1,d2) -> 1 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IAnd (d1,d2) -> 2 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IDiff (d1,d2) -> 3 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IOptional d -> 4 + 17 * (hash_descr d)
  | ITimes (s1,s2) -> 5 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IXml (s1,s2) -> 6 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IArrow (s1,s2) -> 7 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
445
  | IRecord (o,r) -> (if o then 8 else 9) + 17 * (LabelMap.hash hash_descr_field r)
446
  | ICapture x -> 10 + 17 * (Id.hash x)
447
  | IConstant (x,y) -> 11 + 17 * (Id.hash x) + 257 * (Types.Const.hash y)
448
449
450
and hash_descr_field = function
  | (d, Some e) -> 1 + 17 * hash_slot d + 257 * hash_descr e
  | (d, None) -> 2 + 17 * hash_slot d
451
452
453
454
455
and hash_slot s =
  if s.gen1 = !gen then 13 * s.rank1
  else (
    incr rank;
    s.rank1 <- !rank; s.gen1 <- !gen;
456
    hash_descr s.d
457
458
459
460
  )
    
let rec equal_descr d1 d2 = 
  match (d1,d2) with
461
  | IType x1, IType x2 -> Types.equal x1 x2
462
463
464
465
466
467
468
  | IOr (x1,y1), IOr (x2,y2) 
  | IAnd (x1,y1), IAnd (x2,y2) 
  | IDiff (x1,y1), IDiff (x2,y2) -> (equal_descr x1 x2) && (equal_descr y1 y2)
  | IOptional x1, IOptional x2 -> equal_descr x1 x2
  | ITimes (x1,y1), ITimes (x2,y2) 
  | IXml (x1,y1), IXml (x2,y2) 
  | IArrow (x1,y1), IArrow (x2,y2) -> (equal_slot x1 x2) && (equal_slot y1 y2)
469
  | IRecord (o1,r1), IRecord (o2,r2) -> 
470
      (o1 = o2) && (LabelMap.equal equal_descr_field r1 r2)
471
  | ICapture x1, ICapture x2 -> Id.equal x1 x2
472
  | IConstant (x1,y1), IConstant (x2,y2) -> 
473
      (Id.equal x1 x2) && (Types.Const.equal y1 y2)
474
  | _ -> false
475
476
477
478
and equal_descr_field d1 d2 = match (d1,d2) with
  | (d1,None),(d2,None) -> equal_slot d1 d2
  | (d1, Some e1), (d2, Some e2) -> equal_slot d1 d2 && equal_descr e1 e2
  | _ -> false
479
480
481
482
483
484
485
and equal_slot s1 s2 =
  ((s1.gen1 = !gen) && (s2.gen2 = !gen) && (s1.rank1 = s2.rank2))
  ||
  ((s1.gen1 <> !gen) && (s2.gen2 <> !gen) && (
     incr rank;
     s1.rank1 <- !rank; s1.gen1 <- !gen;
     s2.rank2 <- !rank; s2.gen2 <- !gen;
486
     equal_descr s1.d s2.d
487
488
   ))
  
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
module SlotTable = Hashtbl.Make(
  struct
    type t = slot
	
    let hash s =
      match s.hash with
	| Some h -> h
	| None ->
	    incr gen; rank := 0; 
	    let h = hash_slot s in
	    s.hash <- Some h;
	    h
	      
    let equal s1 s2 = 
      (s1 == s2) || 
      (incr gen; rank := 0; 
       let e = equal_slot s1 s2 in
       (*     if e then Printf.eprintf "Recursive hash-consing: Equal\n";  *)
       e)
  end)


511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
let pempty = PType Types.empty

let por p1 p2 =
  if p1 == pempty then p2 else
    if p2 == pempty then p1 else
      POr (p1,p2)

let pand p1 p2 =
  if (p1 == pempty) || (p2 == pempty) then pempty else PAnd (p1,p2)

let rec remove_regexp r q = match r with
  | PEpsilon ->
      q
  | PElem p ->
      PTimes (p, q)
  | PGuard p ->
      pand p q
  | PSeq (r1,r2) ->
      remove_regexp r1 (remove_regexp r2 q)
  | PAlt (r1,r2) ->
      por (remove_regexp r1 q) (remove_regexp r2 q)
  | PStar r ->
      let x = mk_derecurs_slot noloc in
      let res = POr (PAlias x, q) in
      x.pdescr <- remove_regexp2 r res pempty;
      res
  | PWeakStar r ->
      let x = mk_derecurs_slot noloc in
      let res = POr (q, PAlias x) in
      x.pdescr <- remove_regexp2 r res pempty;
      res

and remove_regexp2 r q_nonempty q_empty =
  if q_nonempty == q_empty then remove_regexp r q_empty
  else match r with
    | PEpsilon ->
        q_empty
    | PElem p ->
        PTimes (p, q_nonempty)
    | PGuard p ->
	pand p q_empty
    | PSeq (r1,r2) ->
        remove_regexp2 r1
        (remove_regexp2 r2 q_nonempty q_nonempty)
        (remove_regexp2 r2 q_nonempty q_empty)
    | PAlt (r1,r2) ->
        por
        (remove_regexp2 r1 q_nonempty q_empty)
        (remove_regexp2 r2 q_nonempty q_empty)
    | PStar r ->
        let x = mk_derecurs_slot noloc in
        x.pdescr <- remove_regexp2 r (POr (PAlias x, q_nonempty)) pempty;
        por (PAlias x) q_empty
    | PWeakStar r ->
        let x = mk_derecurs_slot noloc in
        x.pdescr <- remove_regexp2 r (POr (q_nonempty, PAlias x)) pempty;
        por q_empty (PAlias x)

569
570
571
572
573
574
let cst_nil = Types.Atom Sequence.nil_atom
let capture_all vars p = IdSet.fold (fun p x -> PAnd (p, PCapture x)) p vars
let termin b vars p = 
  if b then p 
  else IdSet.fold (fun p x -> PSeq (p, PGuard (PConstant (x,cst_nil)))) p vars

575
let rec derecurs env p = match p.descr with
576
  | PatVar v -> derecurs_var env p.loc v
577
  | SchemaVar (kind, schema_name, component_name) ->
578
      PType (find_schema_descr env.penv_tenv kind schema_name component_name)
579
580
581
582
583
584
585
586
587
588
  | Recurs (p,b) -> derecurs (derecurs_def env b) p
  | Internal t -> PType t
  | NsT ns -> PType (Types.atom (Atoms.any_in_ns (parse_ns env.penv_tenv p.loc ns)))
  | Or (p1,p2) -> POr (derecurs env p1, derecurs env p2)
  | And (p1,p2) -> PAnd (derecurs env p1, derecurs env p2)
  | Diff (p1,p2) -> PDiff (derecurs env p1, derecurs env p2)
  | Prod (p1,p2) -> PTimes (derecurs env p1, derecurs env p2)
  | XmlT (p1,p2) -> PXml (derecurs env p1, derecurs env p2)
  | Arrow (p1,p2) -> PArrow (derecurs env p1, derecurs env p2)
  | Optional p -> POptional (derecurs env p)
589
590
591
592
593
  | Record (o,r) -> 
      let aux = function
	| (p,Some e) -> (derecurs env p, Some (derecurs env e))
	| (p,None) -> derecurs env p, None in
      PRecord (o, parse_record env.penv_tenv p.loc aux r)
594
595
  | Constant (x,c) -> PConstant (x,const env.penv_tenv p.loc c)
  | Cst c -> PType (Types.constant (const env.penv_tenv p.loc c))
596
597
598
  | Regexp r ->
      let r,_ = derecurs_regexp IdSet.empty false IdSet.empty true env r in
      PRegexp r
599
600
601
	(* Note: computing remove_regexp here is slower (because
	   of caching ?) *)

602
and derecurs_regexp vars b rvars f env = function
603
604
605
606
(* - vars: seq variables to be propagated top-down and added
     to each captured element
   - b: below a star ?
   - rvars: seq variables that appear on the right of the regexp
607
   - f: tail position
608
609

  returns the set of seq variable of the regexp minus rvars
610
  (they have already been terminated if not below a star)
611
*)
612
  | Epsilon -> 
613
      PEpsilon, IdSet.empty
614
  | Elem p -> 
615
      PElem (capture_all vars (derecurs env p)), IdSet.empty
616
  | Guard p ->
617
      PGuard (derecurs env p), IdSet.empty
618
  | Seq (p1,p2) -> 
619
620
      let (p2,v2) = derecurs_regexp vars b rvars f env p2 in
      let (p1,v1) = derecurs_regexp vars b (IdSet.cup rvars v2) false env p1 in
621
      PSeq (p1,p2), IdSet.cup v1 v2
622
  | Alt (p1,p2) -> 
623
624
      let (p1,v1) = derecurs_regexp vars b rvars f env p1
      and (p2,v2) = derecurs_regexp vars b rvars f env p2 in
625
626
      PAlt (termin b (IdSet.diff v2 v1) p1, termin b (IdSet.diff v1 v2) p2),
      IdSet.cup v1 v2
627
  | Star p -> 
628
      let (p,v) = derecurs_regexp vars true rvars false env p in
629
      termin b v (PStar p), v
630
  | WeakStar p -> 
631
      let (p,v) = derecurs_regexp vars true rvars false env p in
632
      termin b v (PWeakStar p), v
633
  | SeqCapture (x,p) -> 
634
      let vars = if f then vars else IdSet.add x vars in
635
636
      let after = IdSet.mem rvars x in
      let rvars = IdSet.add x rvars in
637
638
639
640
      let (p,v) = derecurs_regexp vars b rvars false env p in
      (if f 
       then PSeq (PGuard (PCapture x), p) 
       else termin (after || b) (IdSet.singleton x) p), 
641
642
      (if after then v else IdSet.add x v)

643

644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
and derecurs_var env loc v =
  match Ns.split_qname v with
    | "", v ->
	let v = ident v in
	(try PAlias (Env.find v env.penv_derec)
	 with Not_found -> 
	   try PType (find_type v env.penv_tenv)
	   with Not_found -> PCapture v)
    | cu, v -> 
	try 
	  let cu = U.mk cu in
	  PType (find_type_global loc cu (ident v) env.penv_tenv)
	with Not_found ->
	  raise_loc_generic loc 
	  ("Unbound external type " ^ cu ^ ":" ^ (U.to_string v))

660
661
662
663
664
665
666
667
and derecurs_def env b =
  let b = List.map (fun (v,p) -> (v,p,mk_derecurs_slot p.loc)) b in
  let n = 
    List.fold_left (fun env (v,p,s) -> Env.add v s env) env.penv_derec b in
  let env = { env with penv_derec = n } in
  List.iter (fun (v,p,s) -> s.pdescr <- derecurs env p) b;
  env

668

669
670
671
672
673
let rec fv_slot s =
  match s.fv with
    | Some x -> x
    | None ->
	if s.gen1 = !gen then IdSet.empty 
674
	else (s.gen1 <- !gen; fv_descr s.d)
675
and fv_descr = function
676
  | IDummy -> assert false
677
  | IType _ -> IdSet.empty
678
679
680
681
682
683
684
  | IOr (d1,d2)
  | IAnd (d1,d2)  
  | IDiff (d1,d2) -> IdSet.cup (fv_descr d1) (fv_descr d2)
  | IOptional d -> fv_descr d
  | ITimes (s1,s2)  
  | IXml (s1,s2)  
  | IArrow (s1,s2) -> IdSet.cup (fv_slot s1) (fv_slot s2)
685
  | IRecord (o,r) -> 
686
      List.fold_left IdSet.cup IdSet.empty (LabelMap.map_to_list fv_field r)
687
  | ICapture x | IConstant (x,_) -> IdSet.singleton x
688
689
690
691
and fv_field = function
  | (d,Some e) -> IdSet.cup (fv_slot d) (fv_descr e)
  | (d,None) -> fv_slot d

692

693
694
695
696
697
698
699
700
let compute_fv s =
  match s.fv with
    | Some x -> ()
    | None ->
	incr gen;
	let x = fv_slot s in
	s.fv <- Some x
	  
701
702
703
let check_no_capture loc s =
  match IdSet.pick s with
    | Some x ->  
704
	raise_loc_generic loc ("Capture variable not allowed: " ^ (Ident.to_string x))
705
    | None -> ()
706
    
707
708
709
let compile_slot_hash = DerecursTable.create 67
let compile_hash = DerecursTable.create 67

710
711
let todo_defs = ref []
let todo_fv = ref []
712
713
714
715
716
717
718
719

let rec compile p =
  try DerecursTable.find compile_hash p
  with Not_found ->
    let c = real_compile p in
    DerecursTable.replace compile_hash p c;
    c
and real_compile = function
720
  | PDummy -> assert false
721
722
723
724
  | PAlias v ->
      if v.ploop then
	raise_loc_generic v.ploc ("Unguarded recursion on type/pattern");
      v.ploop <- true;
725
      let r = compile v.pdescr in
726
727
728
729
730
731
732
733
734
735
      v.ploop <- false;
      r
  | PType t -> IType t
  | POr (t1,t2) -> IOr (compile t1, compile t2)
  | PAnd (t1,t2) -> IAnd (compile t1, compile t2)
  | PDiff (t1,t2) -> IDiff (compile t1, compile t2)
  | PTimes (t1,t2) -> ITimes (compile_slot t1, compile_slot t2)
  | PXml (t1,t2) -> IXml (compile_slot t1, compile_slot t2)
  | PArrow (t1,t2) -> IArrow (compile_slot t1, compile_slot t2)
  | POptional t -> IOptional (compile t)
736
  | PRecord (o,r) ->  IRecord (o, LabelMap.map compile_field r)
737
738
  | PConstant (x,v) -> IConstant (x,v)
  | PCapture x -> ICapture x
739
  | PRegexp r -> compile (remove_regexp r (PType Sequence.nil_type))
740

741
742
743
744
and compile_field = function
  | (p, Some e) -> (compile_slot p, Some (compile e))
  | (p, None) -> (compile_slot p, None)

745
746
and compile_slot p =
  try DerecursTable.find compile_slot_hash p
747
  with Not_found ->
748
749
750
    let s = mk_slot () in
    todo_defs := (s,p) :: !todo_defs;
    todo_fv := s :: !todo_fv;
751
    DerecursTable.add compile_slot_hash p s;
752
    s
753

754
      
755
let timer_fv = Stats.Timer.create "Typer.fv"
756
let rec flush_defs () = 
757
758
759
760
  match !todo_defs with
    | [] -> 
	Stats.Timer.start timer_fv;
	List.iter compute_fv !todo_fv;
761
762
	todo_fv := [];
	Stats.Timer.stop timer_fv ()
763
764
765
766
    | (s,p)::t -> 
	todo_defs := t; 
	s.d <- compile p; 
	flush_defs ()
767
768
769
770
771
772
773
774
775
776
777
778
779
	
let typ_nodes = SlotTable.create 67
let pat_nodes = SlotTable.create 67
		  
let rec typ = function
  | IType t -> t
  | IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
  | IAnd (s1,s2) ->  Types.cap (typ s1) (typ s2)
  | IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
  | ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
  | IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
  | IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
  | IOptional s -> Types.Record.or_absent (typ s)
780
  | IRecord (o,r) ->  Types.record' (o, LabelMap.map typ_field r)
781
  | IDummy | ICapture _ | IConstant (_,_) -> assert false
782
      
783
784
785
786
787
and typ_field = function
  | (s, None) -> typ_node s
  | (s, Some _) -> 
      raise (Patterns.Error "Or-else clauses are not allowed in types")

788
and typ_node s : Types.Node.t =
789
790
791
792
  try SlotTable.find typ_nodes s
  with Not_found ->
    let x = Types.make () in
    SlotTable.add typ_nodes s x;
793
    Types.define x (typ s.d);
794
795
796
797
798
799
800
801
    x
      
let rec pat d : Patterns.descr =
  if IdSet.is_empty (fv_descr d)
  then Patterns.constr (typ d)
  else pat_aux d
    
and pat_aux = function
802
  | IDummy -> assert false
803
804
805
806
807
808
  | IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
  | IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
  | IDiff (s1,s2) when IdSet.is_empty (fv_descr s2) ->
      let s2 = Types.neg (typ s2) in
      Patterns.cap (pat s1) (Patterns.constr s2)
  | IDiff _ ->
809
      raise (Patterns.Error "Differences are not allowed in patterns")
810
811
812
  | ITimes (s1,s2) -> Patterns.times (pat_node s1) (pat_node s2)
  | IXml (s1,s2) -> Patterns.xml (pat_node s1) (pat_node s2)
  | IOptional _ -> 
813
      raise (Patterns.Error "Optional fields are not allowed in record patterns")
814
815
  | IRecord (o,r) ->
      let pats = ref [] in
816
817
818
819
820
821
822
823
824
825
826
827
828
829
      let aux l = function
	| (s,None) ->
	    if IdSet.is_empty (fv_slot s) then typ_node s
	    else
	      ( pats := Patterns.record l (pat_node s) :: !pats;
		Types.any_node )
	| (s,Some e) ->
	    if IdSet.is_empty (fv_slot s) then
	      raise (Patterns.Error "Or-else clauses are not allowed in types")
	    else
	      ( pats := Patterns.cup 
		  (Patterns.record l (pat_node s))
		  (pat e) :: !pats;
		Types.Record.any_or_absent_node )
830
831
832
833
834
835
836
      in
      let constr = Types.record' (o,LabelMap.mapi aux r) in
      List.fold_left Patterns.cap (Patterns.constr constr) !pats
	(* TODO: can avoid constr when o=true, and all fields have fv *)
  | ICapture x -> Patterns.capture x
  | IConstant (x,c) -> Patterns.constant x c
  | IArrow _ ->
837
      raise (Patterns.Error "Arrows are not allowed in patterns")
838
839
840
841
842
843
  | IType _ -> assert false
      
and pat_node s : Patterns.node =
  try SlotTable.find pat_nodes s
  with Not_found ->
    let x = Patterns.make (fv_slot s) in
844
845
    try
      SlotTable.add pat_nodes s x;
846
      Patterns.define x (pat s.d);
847
848
849
      x
    with exn -> SlotTable.remove pat_nodes s; raise exn
      (* For the toplevel ... *)
850

851

852
module Ids = Set.Make(Id)
853
let type_defs env b =
854
855
856
857
858
859
860
861
862
863
  ignore 
    (List.fold_left 
       (fun seen (v,p) ->
	  if Ids.mem v seen then 
	    raise_loc_generic p.loc 
	      ("Multiple definitions for the type identifer " ^ 
	       (Ident.to_string v));
	  Ids.add v seen
       ) Ids.empty b);

864
865
  let penv = derecurs_def (penv env) b in
  let b = List.map (fun (v,p) -> (v,p,compile (derecurs penv p))) b in
866
867
868
869
  flush_defs ();
  let b = 
    List.map 
      (fun (v,p,s) -> 
870
	 check_no_capture p.loc (fv_descr s);
871
872
873
	 let t = typ s in
	 if (p.loc <> noloc) && (Types.is_empty t) then
	   warning p.loc 
874
	     ("This definition yields an empty type for " ^ (Ident.to_string v));
875
	 (v,t)) b in
876
  List.iter (fun (v,t) -> Types.Print.register_global (Id.value v) t) b;
877
  b
878
879


880
881
882
883
884
let dump_types ppf env =
  Env.iter (fun v -> 
	      function 
		  (Type _) -> Format.fprintf ppf " %a" Ident.print v
		| _ -> ()) env.ids
885
886
let dump_type ppf env name =
  try
887
    (match Env.find (Ident.ident name) env.ids with
888
889
    | Type t -> Types.Print.print ppf t
    | _ -> raise Not_found)
890
891
  with Not_found ->
    raise (Error (Printf.sprintf "Type %s not found" (U.get_str name)))
892

893
894
895
let dump_schema_type ppf env (k, s, n) =
  let uri = find_schema s env in
  let descr = find_schema_descr_uri k uri n in
896
  Types.Print.print ppf descr
897

898
let dump_ns ppf env =
899
  Ns.dump_table ppf env.ns
900

901

902
903
let do_typ loc r = 
  let s = compile_slot r in
904
  flush_defs ();
905
906
  check_no_capture loc (fv_slot s);
  typ_node s
907
   
908
909
let typ env p =
  do_typ p.loc (derecurs (penv env) p)
910
    
911
912
let pat env p = 
  let s = compile_slot (derecurs (penv env) p) in
913
914
915
  flush_defs ();
  try pat_node s
  with Patterns.Error e -> raise_loc_generic p.loc e
916
    | Location (loc,_,exn) when loc = noloc -> raise (Location (p.loc, `Full, exn))
917
918


919
920
(* II. Build skeleton *)

921

922
type type_fun = Types.t -> bool -> Types.t
923

924
module Fv = IdSet
925

926
927
928
type branch = Branch of Typed.branch * branch list

let cur_branch : branch list ref = ref []
929

930
let exp loc fv e =
931
932
  fv,
  { Typed.exp_loc = loc;
933
    Typed.exp_typ = Types.empty;
934
    Typed.exp_descr = e;
935
  }
936

937
let ops = Hashtbl.create 13
938
939
let register_op op arity f = Hashtbl.add ops op (arity,f)
let typ_op op = snd (Hashtbl.find ops op)
940

941
942
943
944
945
let is_op env s = 
  if (Env.mem (ident s) env.ids) then None
  else 
    try let s = U.get_str s in Some (s, fst (Hashtbl.find ops s))
    with Not_found -> None
946

947
948
let rec expr env loc = function
  | LocatedExpr (loc,e) -> expr env loc e
949
  | Forget (e,t) ->
950
      let (fv,e) = expr env loc e and t = typ env t in
951
      exp loc fv (Typed.Forget (e,t))
952
953
  | Check (e,t) ->
      let (fv,e) = expr env loc e and t = typ env t in
954
      exp loc fv (Typed.Check (ref Types.empty,e,t))
955
  | Var s -> var env loc s
956
  | Apply (e1,e2) -> 
957
958
959
960
961
962
963
964
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
      let fv = Fv.cup fv1 fv2 in
      (match e1.Typed.exp_descr with
	 | Typed.Op (op,arity,args) when arity > 0 -> 
	     exp loc fv (Typed.Op (op,arity - 1,args @ [e2]))
	 | _ ->
	     exp loc fv (Typed.Apply (e1,e2)))
  | Abstraction a -> abstraction env loc a
965
  | (Integer _ | Char _ | Atom _ | Const _) as c -> 
966
      exp loc Fv.empty (Typed.Cst (const env loc c))
967
  | Pair (e1,e2) ->
968
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
969
970
      exp loc (Fv.cup fv1 fv2) (Typed.Pair (e1,e2))
  | Xml (e1,e2) ->
971
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
972
973
      exp loc (Fv.cup fv1 fv2) (Typed.Xml (e1,e2))
  | Dot (e,l) ->
974
975
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.Dot (e,parse_label env loc l))
976
  | RemoveField (e,l) ->
977
978
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.RemoveField (e,parse_label env loc l))
979
980
  | RecordLitt r -> 
      let fv = ref Fv.empty in
981
      let r = parse_record env loc