typer.ml 41.8 KB
Newer Older
1
(* TODO:
2
 - rewrite type-checking of operators to propagate constraint
3
4
 - optimize computation of pattern free variables
 - check whether it is worth using recursive hash-consing internally
5
6
*)

7
8
9
open Location
open Ast
open Ident
10

11
12
let warning loc msg =
  Format.fprintf !Location.warning_ppf "Warning %a:@\n%a%s@\n" 
13
14
    Location.print_loc (loc,`Full)
    Location.html_hilight (loc,`Full)
15
16
    msg

17
18
type item =
  | Type of Types.t
19
  | Val of Types.t
20

21
type t = {
22
  ids : item Env.t;
23
24
  tenv_nspref: Ns.table;
}
25

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
include Custom.Dummy

(* TODO: filter out builtin defs ? *)
(* TODO: serialize nspref *)
let serialize s env =
  Env.iter 
    (fun id item ->
       match item with
	 | Type t ->
	     Serialize.Put.bits 2 s 0b01;
	     Id.serialize s id;
	     Types.serialize s t
	 | Val t ->
	     Serialize.Put.bits 2 s 0b10;
	     Id.serialize s id;
	     Types.serialize s t
    ) env.ids;
  Serialize.Put.bits 2 s 0b00;
  Ns.serialize_table s env.tenv_nspref

let deserialize s =
  let rec aux env =
    match Serialize.Get.bits 2 s with
      | 0b00 -> env
      | 0b01 ->
	  let id = Id.deserialize s in
	  let t = Types.deserialize s in
	  aux (Env.add id (Type t) env)
      | 0b10 ->
	  let id = Id.deserialize s in
	  let t = Types.deserialize s in
	  aux (Env.add id (Val t) env) 
      | _ -> assert false
  in
  let ids = aux Env.empty in
  let ns = Ns.deserialize_table s in
  { ids = ids; tenv_nspref = ns }


65
66
67
68
69
70
71
72
73
74
75
76
77
let empty_env = {
  ids = Env.empty;
  tenv_nspref = Ns.empty_table;
}

let enter_type id t env =
  { env with ids = Env.add id (Type t) env.ids }
let enter_types l env =
  { env with ids = 
      List.fold_left (fun accu (id,t) -> Env.add id (Type t) accu) env.ids l }
let find_type id env =
  match Env.find id env.ids with
    | Type t -> t
78
    | Val _ -> raise Not_found
79
80

let enter_value id t env = 
81
  { env with ids = Env.add id (Val t) env.ids }
82
83
let enter_values l env =
  { env with ids = 
84
      List.fold_left (fun accu (id,t) -> Env.add id (Val t) accu) env.ids l }
85
86
let find_value id env =
  match Env.find id env.ids with
87
    | Val t -> t
88
89
90
    | _ -> raise Not_found
	

91
(* Namespaces *)
92

93
94
95
let set_ns_table_for_printer env = 
  Ns.InternalPrinter.set_table env.tenv_nspref

96
let get_ns_table tenv = tenv.tenv_nspref
97

98
99
100
let enter_ns p ns env =
  { env with tenv_nspref = Ns.add_prefix p ns env.tenv_nspref }

101
102
103
104
105
let protect_error_ns loc f x =
  try f x
  with Ns.UnknownPrefix ns ->
    raise_loc_generic loc 
    ("Undefined namespace prefix " ^ (U.to_string ns))
106

107
108
109
110
111
112
let parse_atom env loc t =
  let (ns,l) = protect_error_ns loc (Ns.map_tag env.tenv_nspref) t in
  Atoms.V.mk ns l
 
let parse_ns env loc ns =
  protect_error_ns loc (Ns.map_prefix env.tenv_nspref) ns
113

114
115
116
let parse_label env loc t =
  let (ns,l) = protect_error_ns loc (Ns.map_attr env.tenv_nspref) t in
  LabelPool.mk (ns,l)
117

118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
let parse_record env loc f r =
  let r = List.map (fun (l,x) -> (parse_label env loc l, f x)) r in
  LabelMap.from_list (fun _ _ -> raise_loc_generic loc "Duplicated record field") r

let rec const env loc = function
  | LocatedExpr (loc,e) -> const env loc e
  | Pair (x,y) -> Types.Pair (const env loc x, const env loc y)
  | Xml (x,y) -> Types.Xml (const env loc x, const env loc y)
  | RecordLitt x -> Types.Record (parse_record env loc (const env loc) x)
  | String (i,j,s,c) -> Types.String (i,j,s,const env loc c)
  | Atom t -> Types.Atom (parse_atom env loc t)
  | Integer i -> Types.Integer i
  | Char c -> Types.Char c
  | _ -> raise_loc_generic loc "This should be a scalar or structured constant"

(* I. Transform the abstract syntax of types and patterns into
      the internal form *)
135

136
exception NonExhaustive of Types.descr
137
exception Constraint of Types.descr * Types.descr
138
exception ShouldHave of Types.descr * string
139
exception ShouldHave2 of Types.descr * string * Types.descr
140
exception WrongLabel of Types.descr * label
141
exception UnboundId of id * bool
142
exception Error of string
143

144
145
let raise_loc loc exn = raise (Location (loc,`Full,exn))
let raise_loc_str loc ofs exn = raise (Location (loc,`Char ofs,exn))
146
let error loc msg = raise_loc loc (Error msg)
147

148
149
150
  (* Schema datastructures *)

module StringSet = Set.Make (String)
151
152
153

  (* just to remember imported schemas *)
let schemas = State.ref "Typer.schemas" StringSet.empty
154
155
156

let schema_types = State.ref "Typer.schema_types" (Hashtbl.create 51)
let schema_elements = State.ref "Typer.schema_elements" (Hashtbl.create 51)
157
let schema_attributes = State.ref "Typer.schema_attributes" (Hashtbl.create 51)
158

159
160
161
162
163
164
165
166
(* Eliminate Recursion, propagate Sequence Capture Variables *)

let rec seq_vars accu = function
  | Epsilon | Elem _ -> accu
  | Seq (r1,r2) | Alt (r1,r2) -> seq_vars (seq_vars accu r1) r2
  | Star r | WeakStar r -> seq_vars accu r
  | SeqCapture (v,r) -> seq_vars (IdSet.add v accu) r

167
168
169
170
171
172
173
174
175
176
177
178
179
180
(* We use two intermediate representation from AST types/patterns
   to internal ones:

      AST -(1)-> derecurs -(2)-> slot -(3)-> internal

   (1) eliminate recursion, schema, 
       propagate sequence capture variables, keep regexps

   (2) stratify, detect ill-formed recursion, compile regexps

   (3) check additional constraints on types / patterns;
       deep (recursive) hash-consing
*)     

181
182
183
184
type derecurs_slot = {
  ploc : Location.loc;
  pid  : int;
  mutable ploop : bool;
185
  mutable pdescr : derecurs;
186
} and derecurs =
187
  | PDummy
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
  | PAlias of derecurs_slot
  | PType of Types.descr
  | POr of derecurs * derecurs
  | PAnd of derecurs * derecurs
  | PDiff of derecurs * derecurs
  | PTimes of derecurs * derecurs
  | PXml of derecurs * derecurs
  | PArrow of derecurs * derecurs
  | POptional of derecurs
  | PRecord of bool * derecurs label_map
  | PCapture of id
  | PConstant of id * Types.const
  | PRegexp of derecurs_regexp * derecurs
and derecurs_regexp =
  | PEpsilon
  | PElem of derecurs
  | PSeq of derecurs_regexp * derecurs_regexp
  | PAlt of derecurs_regexp * derecurs_regexp
  | PStar of derecurs_regexp
  | PWeakStar of derecurs_regexp

209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228

type descr = 
  | IDummy
  | IType of Types.descr
  | IOr of descr * descr
  | IAnd of descr * descr
  | IDiff of descr * descr
  | ITimes of slot * slot
  | IXml of slot * slot
  | IArrow of slot * slot
  | IOptional of descr
  | IRecord of bool * slot label_map
  | ICapture of id
  | IConstant of id * Types.const
and slot = {
  mutable fv : fv option;
  mutable hash : int option;
  mutable rank1: int; mutable rank2: int;
  mutable gen1 : int; mutable gen2: int;
  mutable d    : descr;
229
}
230
231
232
233
234
235
236
237
238
239
240
241
242
    

let counter = ref 0
let mk_derecurs_slot loc = 
  incr counter; 
  { ploop = false; ploc = loc; pid = !counter; pdescr = PDummy }
	  
let mk_slot () = 
  { d=IDummy; fv=None; hash=None; rank1=0; rank2=0; gen1=0; gen2=0 } 


(* This environment is used in phase (1) to eliminate recursion *)
type penv = {
243
  penv_tenv : t;
244
245
246
247
  penv_derec : derecurs_slot Env.t;
}

let penv tenv = { penv_tenv = tenv; penv_derec = Env.empty }
248

249
let rec hash_derecurs = function
250
  | PDummy -> assert false
251
252
253
  | PAlias s -> 
      s.pid
  | PType t -> 
254
      1 + 17 * (Types.hash t)
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
  | POr (p1,p2) -> 
      2 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PAnd (p1,p2) -> 
      3 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PDiff (p1,p2) -> 
      4 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PTimes (p1,p2) -> 
      5 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PXml (p1,p2) -> 
      6 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PArrow (p1,p2) -> 
      7 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | POptional p -> 
      8 + 17 * (hash_derecurs p)
  | PRecord (o,r) -> 
      (if o then 9 else 10) + 17 * (LabelMap.hash hash_derecurs r)
  | PCapture x -> 
      11 + 17 * (Id.hash x)
  | PConstant (x,c) -> 
274
      12 + 17 * (Id.hash x) + 257 * (Types.Const.hash c)
275
276
  | PRegexp (p,q) -> 
      13 + 17 * (hash_derecurs_regexp p) + 257 * (hash_derecurs q)
277
and hash_derecurs_regexp = function
278
279
280
281
282
283
284
285
286
287
288
289
  | PEpsilon -> 
      1
  | PElem p -> 
      2 + 17 * (hash_derecurs p)
  | PSeq (p1,p2) -> 
      3 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PAlt (p1,p2) -> 
      4 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PStar p -> 
      5 + 17 * (hash_derecurs_regexp p)
  | PWeakStar p -> 
      6 + 17 * (hash_derecurs_regexp p)
290
291

let rec equal_derecurs p1 p2 = (p1 == p2) || match p1,p2 with
292
293
294
  | PAlias s1, PAlias s2 -> 
      s1 == s2
  | PType t1, PType t2 -> 
295
      Types.equal t1 t2
296
297
298
299
300
  | POr (p1,q1), POr (p2,q2)
  | PAnd (p1,q1), PAnd (p2,q2)
  | PDiff (p1,q1), PDiff (p2,q2)
  | PTimes (p1,q1), PTimes (p2,q2)
  | PXml (p1,q1), PXml (p2,q2)
301
302
303
304
305
306
307
308
309
  | PArrow (p1,q1), PArrow (p2,q2) -> 
      (equal_derecurs p1 p2) && (equal_derecurs q1 q2)
  | POptional p1, POptional p2 -> 
      equal_derecurs p1 p2
  | PRecord (o1,r1), PRecord (o2,r2) -> 
      (o1 == o2) && (LabelMap.equal equal_derecurs r1 r2)
  | PCapture x1, PCapture x2 -> 
      Id.equal x1 x2
  | PConstant (x1,c1), PConstant (x2,c2) -> 
310
      (Id.equal x1 x2) && (Types.Const.equal c1 c2)
311
312
  | PRegexp (p1,q1), PRegexp (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
313
314
  | _ -> false
and equal_derecurs_regexp r1 r2 = match r1,r2 with
315
316
317
318
  | PEpsilon, PEpsilon -> 
      true
  | PElem p1, PElem p2 -> 
      equal_derecurs p1 p2
319
  | PSeq (p1,q1), PSeq (p2,q2) 
320
321
  | PAlt (p1,q1), PAlt (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs_regexp q1 q2)
322
  | PStar p1, PStar p2
323
324
  | PWeakStar p1, PWeakStar p2 -> 
      equal_derecurs_regexp p1 p2
325
  | _ -> false
326

327
328
329
330
331
332
333
334
335
336
337
module DerecursTable = Hashtbl.Make(
  struct 
    type t = derecurs 
    let hash = hash_derecurs
    let equal = equal_derecurs
  end
)

module RE = Hashtbl.Make(
  struct 
    type t = derecurs_regexp * derecurs 
338
339
340
341
    let hash (p,q) = 
      (hash_derecurs_regexp p) + 17 * (hash_derecurs q)
    let equal (p1,q1) (p2,q2) = 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
342
343
  end
)
344

345
346
347
348
let gen = ref 0
let rank = ref 0
	     
let rec hash_descr = function
349
  | IDummy -> assert false
350
  | IType x -> Types.hash x
351
352
353
354
355
356
357
358
359
  | IOr (d1,d2) -> 1 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IAnd (d1,d2) -> 2 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IDiff (d1,d2) -> 3 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IOptional d -> 4 + 17 * (hash_descr d)
  | ITimes (s1,s2) -> 5 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IXml (s1,s2) -> 6 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IArrow (s1,s2) -> 7 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IRecord (o,r) -> (if o then 8 else 9) + 17 * (LabelMap.hash hash_slot r)
  | ICapture x -> 10 + 17 * (Id.hash x)
360
  | IConstant (x,y) -> 11 + 17 * (Id.hash x) + 257 * (Types.Const.hash y)
361
362
363
364
365
and hash_slot s =
  if s.gen1 = !gen then 13 * s.rank1
  else (
    incr rank;
    s.rank1 <- !rank; s.gen1 <- !gen;
366
    hash_descr s.d
367
368
369
370
  )
    
let rec equal_descr d1 d2 = 
  match (d1,d2) with
371
  | IType x1, IType x2 -> Types.equal x1 x2
372
373
374
375
376
377
378
  | IOr (x1,y1), IOr (x2,y2) 
  | IAnd (x1,y1), IAnd (x2,y2) 
  | IDiff (x1,y1), IDiff (x2,y2) -> (equal_descr x1 x2) && (equal_descr y1 y2)
  | IOptional x1, IOptional x2 -> equal_descr x1 x2
  | ITimes (x1,y1), ITimes (x2,y2) 
  | IXml (x1,y1), IXml (x2,y2) 
  | IArrow (x1,y1), IArrow (x2,y2) -> (equal_slot x1 x2) && (equal_slot y1 y2)
379
380
  | IRecord (o1,r1), IRecord (o2,r2) -> 
      (o1 = o2) && (LabelMap.equal equal_slot r1 r2)
381
  | ICapture x1, ICapture x2 -> Id.equal x1 x2
382
  | IConstant (x1,y1), IConstant (x2,y2) -> 
383
      (Id.equal x1 x2) && (Types.Const.equal y1 y2)
384
385
386
387
388
389
390
391
  | _ -> false
and equal_slot s1 s2 =
  ((s1.gen1 = !gen) && (s2.gen2 = !gen) && (s1.rank1 = s2.rank2))
  ||
  ((s1.gen1 <> !gen) && (s2.gen2 <> !gen) && (
     incr rank;
     s1.rank1 <- !rank; s1.gen1 <- !gen;
     s2.rank2 <- !rank; s2.gen2 <- !gen;
392
     equal_descr s1.d s2.d
393
394
   ))
  
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
module SlotTable = Hashtbl.Make(
  struct
    type t = slot
	
    let hash s =
      match s.hash with
	| Some h -> h
	| None ->
	    incr gen; rank := 0; 
	    let h = hash_slot s in
	    s.hash <- Some h;
	    h
	      
    let equal s1 s2 = 
      (s1 == s2) || 
      (incr gen; rank := 0; 
       let e = equal_slot s1 s2 in
       (*     if e then Printf.eprintf "Recursive hash-consing: Equal\n";  *)
       e)
  end)


let rec derecurs env p = match p.descr with
  | PatVar v ->
      (try PAlias (Env.find v env.penv_derec)
       with Not_found -> 
421
	 try PType (find_type v env.penv_tenv)
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
	 with Not_found -> PCapture v)
  | SchemaVar (kind, schema, item) ->
      PType (derecurs_schema env kind schema item)
  | Recurs (p,b) -> derecurs (derecurs_def env b) p
  | Internal t -> PType t
  | NsT ns -> PType (Types.atom (Atoms.any_in_ns (parse_ns env.penv_tenv p.loc ns)))
  | Or (p1,p2) -> POr (derecurs env p1, derecurs env p2)
  | And (p1,p2) -> PAnd (derecurs env p1, derecurs env p2)
  | Diff (p1,p2) -> PDiff (derecurs env p1, derecurs env p2)
  | Prod (p1,p2) -> PTimes (derecurs env p1, derecurs env p2)
  | XmlT (p1,p2) -> PXml (derecurs env p1, derecurs env p2)
  | Arrow (p1,p2) -> PArrow (derecurs env p1, derecurs env p2)
  | Optional p -> POptional (derecurs env p)
  | Record (o,r) -> PRecord (o, parse_record env.penv_tenv p.loc (derecurs env) r)
  | Constant (x,c) -> PConstant (x,const env.penv_tenv p.loc c)
  | Cst c -> PType (Types.constant (const env.penv_tenv p.loc c))
  | Regexp (r,q) -> 
      let constant_nil t v = 
	PAnd (t, PConstant (v, Types.Atom Sequence.nil_atom)) in
      let vars = seq_vars IdSet.empty r in
      let q = IdSet.fold constant_nil (derecurs env q) vars in
      let r = derecurs_regexp (fun p -> p) env r in
      PRegexp (r, q)
and derecurs_regexp vars env = function
  | Epsilon -> 
      PEpsilon
  | Elem p -> 
      PElem (vars (derecurs env p))
  | Seq (p1,p2) -> 
      PSeq (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Alt (p1,p2) -> 
      PAlt (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Star p -> 
      PStar (derecurs_regexp vars env p)
  | WeakStar p -> 
      PWeakStar (derecurs_regexp vars env p)
  | SeqCapture (x,p) -> 
      derecurs_regexp (fun p -> PAnd (vars p, PCapture x)) env p


and derecurs_def env b =
  let b = List.map (fun (v,p) -> (v,p,mk_derecurs_slot p.loc)) b in
  let n = 
    List.fold_left (fun env (v,p,s) -> Env.add v s env) env.penv_derec b in
  let env = { env with penv_derec = n } in
  List.iter (fun (v,p,s) -> s.pdescr <- derecurs env p) b;
  env

and derecurs_schema env kind schema item =
  let elt () = fst (Hashtbl.find !schema_elements (schema, item)) in
  let typ () = Hashtbl.find !schema_types (schema, item) in
  let att () = Hashtbl.find !schema_attributes (schema, item) in
  let rec do_try n = function
    | [] -> 
	let s = Printf.sprintf 
		  "No %s named '%s' found in schema '%s'" n item schema in
	failwith s
    | f :: rem -> (try f () with Not_found -> do_try n rem)  in
  match kind with
    | `Element -> do_try "element" [ elt ]
    | `Type -> do_try "type" [ typ ]
    | `Attribute -> do_try "atttribute" [ att ]
    | `Any -> do_try "item" [ elt; typ; att ]

    
487
488
489
490
491
let rec fv_slot s =
  match s.fv with
    | Some x -> x
    | None ->
	if s.gen1 = !gen then IdSet.empty 
492
	else (s.gen1 <- !gen; fv_descr s.d)
493
and fv_descr = function
494
  | IDummy -> assert false
495
  | IType _ -> IdSet.empty
496
497
498
499
500
501
502
  | IOr (d1,d2)
  | IAnd (d1,d2)  
  | IDiff (d1,d2) -> IdSet.cup (fv_descr d1) (fv_descr d2)
  | IOptional d -> fv_descr d
  | ITimes (s1,s2)  
  | IXml (s1,s2)  
  | IArrow (s1,s2) -> IdSet.cup (fv_slot s1) (fv_slot s2)
503
504
  | IRecord (o,r) -> 
      List.fold_left IdSet.cup IdSet.empty (LabelMap.map_to_list fv_slot r)
505
  | ICapture x | IConstant (x,_) -> IdSet.singleton x
506

507
508
509
510
511
512
513
514
let compute_fv s =
  match s.fv with
    | Some x -> ()
    | None ->
	incr gen;
	let x = fv_slot s in
	s.fv <- Some x
	  
515
516
517
let check_no_capture loc s =
  match IdSet.pick s with
    | Some x ->  
518
	raise_loc_generic loc ("Unbound type name " ^ (Ident.to_string x))
519
    | None -> ()
520
    
521
522
523
let compile_slot_hash = DerecursTable.create 67
let compile_hash = DerecursTable.create 67

524
525
let todo_defs = ref []
let todo_fv = ref []
526
527
528
529
530
531
532
533

let rec compile p =
  try DerecursTable.find compile_hash p
  with Not_found ->
    let c = real_compile p in
    DerecursTable.replace compile_hash p c;
    c
and real_compile = function
534
  | PDummy -> assert false
535
536
537
538
  | PAlias v ->
      if v.ploop then
	raise_loc_generic v.ploc ("Unguarded recursion on type/pattern");
      v.ploop <- true;
539
      let r = compile v.pdescr in
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
      v.ploop <- false;
      r
  | PType t -> IType t
  | POr (t1,t2) -> IOr (compile t1, compile t2)
  | PAnd (t1,t2) -> IAnd (compile t1, compile t2)
  | PDiff (t1,t2) -> IDiff (compile t1, compile t2)
  | PTimes (t1,t2) -> ITimes (compile_slot t1, compile_slot t2)
  | PXml (t1,t2) -> IXml (compile_slot t1, compile_slot t2)
  | PArrow (t1,t2) -> IArrow (compile_slot t1, compile_slot t2)
  | POptional t -> IOptional (compile t)
  | PRecord (o,r) ->  IRecord (o, LabelMap.map compile_slot r)
  | PConstant (x,v) -> IConstant (x,v)
  | PCapture x -> ICapture x
  | PRegexp (r,q) -> compile_regexp r q
and compile_regexp r q =
  let memo = RE.create 17 in
  let rec aux accu r q =
    if RE.mem memo (r,q) then accu
    else (
      RE.add memo (r,q) ();
      match r with
561
	| PEpsilon ->
562
563
564
	    (match q with 
	       | PRegexp (r,q) -> aux accu r q 
	       | _ -> (compile q) :: accu)
565
566
567
568
569
570
571
572
573
574
575
576
577
	| PElem p -> ITimes (compile_slot p, compile_slot q) :: accu
	| PSeq (r1,r2) -> aux accu r1 (PRegexp (r2,q))
	| PAlt (r1,r2) -> aux (aux accu r1 q) r2 q
	| PStar r1 -> aux (aux accu r1 (PRegexp (r,q))) PEpsilon q
	| PWeakStar r1 -> aux (aux accu PEpsilon q) r1 (PRegexp (r,q))
    )
  in
  let accu = aux [] r q in
  match accu with
    | [] -> assert false
    | p::l -> List.fold_left (fun acc p -> IOr (p,acc)) p l
and compile_slot p =
  try DerecursTable.find compile_slot_hash p
578
  with Not_found ->
579
580
581
    let s = mk_slot () in
    todo_defs := (s,p) :: !todo_defs;
    todo_fv := s :: !todo_fv;
582
    DerecursTable.add compile_slot_hash p s;
583
    s
584

585
      
586
let timer_fv = Stats.Timer.create "Typer.fv"
587
let rec flush_defs () = 
588
589
590
591
  match !todo_defs with
    | [] -> 
	Stats.Timer.start timer_fv;
	List.iter compute_fv !todo_fv;
592
593
	todo_fv := [];
	Stats.Timer.stop timer_fv ()
594
595
596
597
    | (s,p)::t -> 
	todo_defs := t; 
	s.d <- compile p; 
	flush_defs ()
598
599
600
601
602
603
604
605
606
607
608
609
610
611
	
let typ_nodes = SlotTable.create 67
let pat_nodes = SlotTable.create 67
		  
let rec typ = function
  | IType t -> t
  | IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
  | IAnd (s1,s2) ->  Types.cap (typ s1) (typ s2)
  | IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
  | ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
  | IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
  | IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
  | IOptional s -> Types.Record.or_absent (typ s)
  | IRecord (o,r) -> Types.record' (o, LabelMap.map typ_node r)
612
  | IDummy | ICapture _ | IConstant (_,_) -> assert false
613
      
614
and typ_node s : Types.Node.t =
615
616
617
618
  try SlotTable.find typ_nodes s
  with Not_found ->
    let x = Types.make () in
    SlotTable.add typ_nodes s x;
619
    Types.define x (typ s.d);
620
621
622
623
624
625
626
627
    x
      
let rec pat d : Patterns.descr =
  if IdSet.is_empty (fv_descr d)
  then Patterns.constr (typ d)
  else pat_aux d
    
and pat_aux = function
628
  | IDummy -> assert false
629
630
631
632
633
634
  | IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
  | IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
  | IDiff (s1,s2) when IdSet.is_empty (fv_descr s2) ->
      let s2 = Types.neg (typ s2) in
      Patterns.cap (pat s1) (Patterns.constr s2)
  | IDiff _ ->
635
      raise (Patterns.Error "Differences are not allowed in patterns")
636
637
638
  | ITimes (s1,s2) -> Patterns.times (pat_node s1) (pat_node s2)
  | IXml (s1,s2) -> Patterns.xml (pat_node s1) (pat_node s2)
  | IOptional _ -> 
639
      raise (Patterns.Error "Optional fields are not allowed in record patterns")
640
641
642
643
644
645
646
647
648
649
650
651
652
653
  | IRecord (o,r) ->
      let pats = ref [] in
      let aux l s = 
	if IdSet.is_empty (fv_slot s) then typ_node s
	else
	  ( pats := Patterns.record l (pat_node s) :: !pats;
	    Types.any_node )
      in
      let constr = Types.record' (o,LabelMap.mapi aux r) in
      List.fold_left Patterns.cap (Patterns.constr constr) !pats
	(* TODO: can avoid constr when o=true, and all fields have fv *)
  | ICapture x -> Patterns.capture x
  | IConstant (x,c) -> Patterns.constant x c
  | IArrow _ ->
654
      raise (Patterns.Error "Arrows are not allowed in patterns")
655
656
657
658
659
660
  | IType _ -> assert false
      
and pat_node s : Patterns.node =
  try SlotTable.find pat_nodes s
  with Not_found ->
    let x = Patterns.make (fv_slot s) in
661
662
    try
      SlotTable.add pat_nodes s x;
663
      Patterns.define x (pat s.d);
664
665
666
      x
    with exn -> SlotTable.remove pat_nodes s; raise exn
      (* For the toplevel ... *)
667

668

669
let type_defs env b =
670
671
  List.iter 
    (fun (v,p) ->
672
673
       if Env.mem v env.ids
       then raise_loc_generic p.loc ("Identifier " ^ (Ident.to_string v) ^ " is already bound")
674
    ) b;
675
676
  let penv = derecurs_def (penv env) b in
  let b = List.map (fun (v,p) -> (v,p,compile (derecurs penv p))) b in
677
678
679
680
  flush_defs ();
  let b = 
    List.map 
      (fun (v,p,s) -> 
681
	 check_no_capture p.loc (fv_descr s);
682
683
684
	 let t = typ s in
	 if (p.loc <> noloc) && (Types.is_empty t) then
	   warning p.loc 
685
	     ("This definition yields an empty type for " ^ (Ident.to_string v));
686
	 (v,t)) b in
687
  List.iter (fun (v,t) -> Types.Print.register_global (Id.value v) t) b;
688
  b
689
690


691
692
693
694
695
let dump_types ppf env =
  Env.iter (fun v -> 
	      function 
		  (Type _) -> Format.fprintf ppf " %a" Ident.print v
		| _ -> ()) env.ids
696

697
698
let dump_ns ppf env =
  Ns.dump_table ppf env.tenv_nspref
699

700

701
702
let do_typ loc r = 
  let s = compile_slot r in
703
  flush_defs ();
704
705
  check_no_capture loc (fv_slot s);
  typ_node s
706
   
707
708
let typ env p =
  do_typ p.loc (derecurs (penv env) p)
709
    
710
711
let pat env p = 
  let s = compile_slot (derecurs (penv env) p) in
712
713
714
  flush_defs ();
  try pat_node s
  with Patterns.Error e -> raise_loc_generic p.loc e
715
    | Location (loc,_,exn) when loc = noloc -> raise (Location (p.loc, `Full, exn))
716
717


718
719
(* II. Build skeleton *)

720

721
722
723
724
725
type type_fun = Types.t -> bool -> Types.t
let mk_unary_op = ref (fun _ _ -> assert false)
let typ_unary_op = ref (fun _ _ _ -> assert false)
let mk_binary_op = ref (fun _ _ -> assert false)
let typ_binary_op = ref (fun _ _ _ _ -> assert false)
726
727


728
module Fv = IdSet
729

730
731
732
type branch = Branch of Typed.branch * branch list

let cur_branch : branch list ref = ref []
733

734
let exp loc fv e =
735
736
  fv,
  { Typed.exp_loc = loc;
737
    Typed.exp_typ = Types.empty;
738
    Typed.exp_descr = e;
739
  }
740
741


742
743
let rec expr env loc = function
  | LocatedExpr (loc,e) -> expr env loc e
744
  | Forget (e,t) ->
745
      let (fv,e) = expr env loc e and t = typ env t in
746
747
748
749
      exp loc fv (Typed.Forget (e,t))
  | Var s -> 
      exp loc (Fv.singleton s) (Typed.Var s)
  | Apply (e1,e2) -> 
750
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
751
752
      exp loc (Fv.cup fv1 fv2) (Typed.Apply (e1,e2))
  | Abstraction a ->
753
      let iface = List.map (fun (t1,t2) -> (typ env t1, typ env t2)) 
754
755
756
757
758
759
760
		    a.fun_iface in
      let t = List.fold_left 
		(fun accu (t1,t2) -> Types.cap accu (Types.arrow t1 t2)) 
		Types.any iface in
      let iface = List.map 
		    (fun (t1,t2) -> (Types.descr t1, Types.descr t2)) 
		    iface in
761
      let (fv0,body) = branches env a.fun_body in
762
763
764
765
766
767
768
769
770
771
772
      let fv = match a.fun_name with
	| None -> fv0
	| Some f -> Fv.remove f fv0 in
      let e = Typed.Abstraction 
		{ Typed.fun_name = a.fun_name;
		  Typed.fun_iface = iface;
		  Typed.fun_body = body;
		  Typed.fun_typ = t;
		  Typed.fun_fv = fv
		} in
      exp loc fv e
773
  | (Integer _ | Char _ | Atom _) as c -> 
774
      exp loc Fv.empty (Typed.Cst (const env loc c))
775
  | Pair (e1,e2) ->
776
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
777
778
      exp loc (Fv.cup fv1 fv2) (Typed.Pair (e1,e2))
  | Xml (e1,e2) ->
779
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
780
781
      exp loc (Fv.cup fv1 fv2) (Typed.Xml (e1,e2))
  | Dot (e,l) ->
782
783
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.Dot (e,parse_label env loc l))
784
  | RemoveField (e,l) ->
785
786
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.RemoveField (e,parse_label env loc l))
787
788
  | RecordLitt r -> 
      let fv = ref Fv.empty in
789
      let r = parse_record env loc
790
		(fun e -> 
791
		   let (fv2,e) = expr env loc e 
792
793
794
		   in fv := Fv.cup !fv fv2; e)
		r in
      exp loc !fv (Typed.RecordLitt r)
795
  | String (i,j,s,e) ->
796
      let (fv,e) = expr env loc e in
797
      exp loc fv (Typed.String (i,j,s,e))
798
  | Op (op,le) ->
799
      let (fvs,ltes) = List.split (List.map (expr env loc) le) in
800
      let fv = List.fold_left Fv.cup Fv.empty fvs in
801
      (try
802
803
804
	 (match ltes with
	    | [e] -> exp loc fv (Typed.UnaryOp (!mk_unary_op op env, e))
	    | [e1;e2] -> exp loc fv (Typed.BinaryOp (!mk_binary_op op env, e1,e2))
805
806
807
	    | _ -> assert false)
       with Not_found -> assert false)

808
  | Match (e,b) -> 
809
810
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
811
      exp loc (Fv.cup fv1 fv2) (Typed.Match (e, b))
812
  | Map (e,b) ->
813
814
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
815
816
      exp loc (Fv.cup fv1 fv2) (Typed.Map (e, b))
  | Transform (e,b) ->
817
818
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
819
      exp loc (Fv.cup fv1 fv2) (Typed.Transform (e, b))
820
  | Xtrans (e,b) ->
821
822
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
823
      exp loc (Fv.cup fv1 fv2) (Typed.Xtrans (e, b))
824
  | Validate (e,schema,elt) ->
825
      let (fv,e) = expr env loc e in
826
      exp loc fv (Typed.Validate (e, schema, elt))
827
  | Try (e,b) ->
828
829
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
830
      exp loc (Fv.cup fv1 fv2) (Typed.Try (e, b))
831
  | NamespaceIn (pr,ns,e) ->
832
833
      let env = enter_ns pr ns env in
      expr env loc e
834
  | Ref (e,t) ->
835
      let (fv,e) = expr env loc e and t = typ env t in
836
      exp loc fv (Typed.Ref (e,t))
837
	      
838
  and branches env b = 
839
    let fv = ref Fv.empty in
840
    let accept = ref Types.empty in
841
    let branch (p,e) = 
842
843
      let cur_br = !cur_branch in
      cur_branch := [];
844
      let (fv2,e) = expr env noloc e in
845
      let br_loc = merge_loc p.loc e.Typed.exp_loc in
846
      let p = pat env p in
847
848
849
850
851
852
      (match Fv.pick (Fv.diff (Patterns.fv p) fv2) with
	| None -> ()
	| Some x ->
	    let x = U.to_string (Id.value x) in
	    warning br_loc 
	      ("The capture variable " ^ x ^ 
853
	       " is declared in the pattern but not used in the body of this branch. It might be a misspelled type or name (if not use _ instead)."));
854
855
856
857
858
859
860
861
862
      let fv2 = Fv.diff fv2 (Patterns.fv p) in
      fv := Fv.cup !fv fv2;
      accept := Types.cup !accept (Types.descr (Patterns.accept p));
      let br = 
	{ 
	  Typed.br_loc = br_loc;
	  Typed.br_used = br_loc = noloc;
	  Typed.br_pat = p;
	  Typed.br_body = e } in
863
      cur_branch := Branch (br, !cur_branch) :: cur_br;
864
865
      br in
    let b = List.map branch b in
866
867
868
869
    (!fv, 
     { 
       Typed.br_typ = Types.empty; 
       Typed.br_branches = b; 
870
871
       Typed.br_accept = !accept;
       Typed.br_compiled = None;
872
873
     } 
    )
874

875
let expr env = expr env noloc
876

877
878
879
let let_decl env p e =
  let (_,e) = expr env e in
  { Typed.let_pat = pat env p;
880
881
882
    Typed.let_body = e;
    Typed.let_compiled = None }

883
884
885

(* Hide global "typing/parsing" environment *)

886

887
888
(* III. Type-checks *)

889
890
open Typed

891
892
let require loc t s = 
  if not (Types.subtype t s) then raise_loc loc (Constraint (t, s))
893

894
let verify loc t s = 
895
896
  require loc t s; t

897
898
899
900
901
let check_str loc ofs t s = 
  if not (Types.subtype t s) then raise_loc_str loc ofs (Constraint (t, s));
  t

let should_have loc constr s = 
902
903
  raise_loc loc (ShouldHave (constr,s))

904
905
906
let should_have_str loc ofs constr s = 
  raise_loc_str loc ofs (ShouldHave (constr,s))

907
908
909
910
911
912
913
914
915
916
917
let flatten loc arg constr precise =
  let constr' = Sequence.star 
		  (Sequence.approx (Types.cap Sequence.any constr)) in
  let sconstr' = Sequence.star constr' in
  let exact = Types.subtype constr' constr in
  if exact then
    let t = arg sconstr' precise in
    if precise then Sequence.flatten t else constr
  else
    let t = arg sconstr' true in
    Sequence.flatten t
918

919
920
let rec type_check env e constr precise = 
  let d = type_check' e.exp_loc env e.exp_descr constr precise in
921
  let d = if precise then d else constr in
922
923
924
  e.exp_typ <- Types.cup e.exp_typ d;
  d

925
and type_check' loc env e constr precise = match e with
926
927
928
  | Forget (e,t) ->
      let t = Types.descr t in
      ignore (type_check env e t false);
929
      verify loc t constr
930

931
  | Abstraction a ->
932
933
934
      let t =
	try Types.Arrow.check_strenghten a.fun_typ constr 
	with Not_found -> 
935
936
	  should_have loc constr
	    "but the interface of the abstraction is not compatible"
937
      in
938
939
      let env = match a.fun_name with
	| None -> env
940
	| Some f -> enter_value f a.fun_typ env in
941
942
      List.iter 
	(fun (t1,t2) ->
943
944
945
	   let acc = a.fun_body.br_accept in 
	   if not (Types.subtype t1 acc) then
	     raise_loc loc (NonExhaustive (Types.diff t1 acc));
946
	   ignore (type_check_branches loc env t1 a.fun_body t2 false)
947
948
	) a.fun_iface;
      t
949

950
951
  | Match (e,b) ->
      let t = type_check env e b.br_accept true in
952
      type_check_branches loc env t b constr precise
953
954
955

  | Try (e,b) ->
      let te = type_check env e constr precise in
956
      let tb = type_check_branches loc env Types.any b constr precise in
957
      Types.cup te tb
958

959
960
  | Pair (e1,e2) ->
      type_check_pair loc env e1 e2 constr precise
961

962
963
  | Xml (e1,e2) ->
      type_check_pair ~kind:`XML loc env e1 e2 constr precise
964

965
  | RecordLitt r ->
966
967
968
969
970
971
972
973
      type_record loc env r constr precise

  | Map (e,b) ->
      type_map loc env false e b constr precise

  | Transform (e,b) ->
      flatten loc (type_map loc env true e b) constr precise

974
975
976
977
  | Apply (e1,e2) ->
      let t1 = type_check env e1 Types.Arrow.any true in
      let t1 = Types.Arrow.get t1 in
      let dom = Types.Arrow.domain t1 in
978
979
980
981
982
983
984
      let res =
	if Types.Arrow.need_arg t1 then
	  let t2 = type_check env e2 dom true in
	  Types.Arrow.apply t1 t2
	else
	  (ignore (type_check env e2 dom false); Types.Arrow.apply_noarg t1)
      in
985
      verify loc res constr
986
987

  | UnaryOp (o,e) ->
988
989
      let t = !typ_unary_op o loc (type_check env e) constr precise in
      verify loc t constr
990
991

  | BinaryOp (o,e1,e2) ->
992
993
994
      let t = !typ_binary_op o loc 
		(type_check env e1) (type_check env e2) constr precise in
      verify loc t constr
995
996
997

  | Var s -> 
      let t = 
998
	try find_value s env
999
	with Not_found -> 
1000
	  raise_loc loc (UnboundId (s, Env.mem s env.ids) ) in
1001
      verify loc t constr
1002
1003
      
  | Cst c -> 
1004
      verify loc (Types.constant c) constr
1005

1006
1007
1008
  | String (i,j,s,e) ->
      type_check_string loc env 0 s i j e constr precise

1009
1010
1011
1012
1013
1014
  | Dot (e,l) ->
      let t = type_check env e Types.Record.any true in
      let t = 
        try (Types.Record.project t l) 
        with Not_found -> raise_loc loc (WrongLabel(t,l))
      in
1015
      verify loc t constr
1016
1017
1018
1019

  | RemoveField (e,l) ->
      let t = type_check env e Types.Record.any true in
      let t = Types.Record.remove_field t l in
1020
      verify loc t constr
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030

  | Xtrans (e,b) ->
      let t = type_check env e Sequence.any true in
      let t = 
	Sequence.map_tree 
	  (fun t ->
	     let resid = Types.diff t b.br_accept in
	     let res = type_check_branches loc env t b Sequence.any true in
	     (res,resid)
	  ) t in
1031
      verify loc t constr
1032

1033
1034
1035
  | Validate (e, schema_name, elt_name) ->
      ignore (type_check env e Types.any false);
      let t = fst (Hashtbl.find !schema_elements (schema_name, elt_name)) in
1036
      verify loc t constr