typer.ml 43.4 KB
Newer Older
1
(* TODO:
2
 - rewrite type-checking of operators to propagate constraint
3
4
 - optimize computation of pattern free variables
 - check whether it is worth using recursive hash-consing internally
5
6
*)

7
8
9
open Location
open Ast
open Ident
10

11
let warning loc msg =
12
  Format.fprintf !Location.warning_ppf "Warning %a:@\n%a%s@." 
13
14
    Location.print_loc (loc,`Full)
    Location.html_hilight (loc,`Full)
15
16
    msg

17
18
type item =
  | Type of Types.t
19
  | Val of Types.t
20

21
type t = {
22
  ids : item Env.t;
23
24
  tenv_nspref: Ns.table;
}
25

26
27
28
29
30
let hash _ = failwith "Typer.hash"
let compare _ _ = failwith "Typer.compare"
let dump ppf _ = failwith "Typer.dump"
let equal _ _ = failwith "Typer.equal"
let check _ = failwith "Typer.check"
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67

(* TODO: filter out builtin defs ? *)
let serialize s env =
  Env.iter 
    (fun id item ->
       match item with
	 | Type t ->
	     Serialize.Put.bits 2 s 0b01;
	     Id.serialize s id;
	     Types.serialize s t
	 | Val t ->
	     Serialize.Put.bits 2 s 0b10;
	     Id.serialize s id;
	     Types.serialize s t
    ) env.ids;
  Serialize.Put.bits 2 s 0b00;
  Ns.serialize_table s env.tenv_nspref

let deserialize s =
  let rec aux env =
    match Serialize.Get.bits 2 s with
      | 0b00 -> env
      | 0b01 ->
	  let id = Id.deserialize s in
	  let t = Types.deserialize s in
	  aux (Env.add id (Type t) env)
      | 0b10 ->
	  let id = Id.deserialize s in
	  let t = Types.deserialize s in
	  aux (Env.add id (Val t) env) 
      | _ -> assert false
  in
  let ids = aux Env.empty in
  let ns = Ns.deserialize_table s in
  { ids = ids; tenv_nspref = ns }


68
69
70
71
72
73
74
75
76
77
78
79
80
let empty_env = {
  ids = Env.empty;
  tenv_nspref = Ns.empty_table;
}

let enter_type id t env =
  { env with ids = Env.add id (Type t) env.ids }
let enter_types l env =
  { env with ids = 
      List.fold_left (fun accu (id,t) -> Env.add id (Type t) accu) env.ids l }
let find_type id env =
  match Env.find id env.ids with
    | Type t -> t
81
    | Val _ -> raise Not_found
82
83

let enter_value id t env = 
84
  { env with ids = Env.add id (Val t) env.ids }
85
86
let enter_values l env =
  { env with ids = 
87
      List.fold_left (fun accu (id,t) -> Env.add id (Val t) accu) env.ids l }
88
89
let find_value id env =
  match Env.find id env.ids with
90
    | Val t -> t
91
92
    | _ -> raise Not_found
	
93
94
95
96
97
98
let value_name_ok id env =
  try match Env.find id env.ids with
    | Val t -> true
    | _ -> false
  with Not_found -> true

99
100
101
102
let iter_values env f =
  Env.iter (fun x ->
	      function Val t -> f x t;
		| _ -> ()) env.ids
103

104
(* Namespaces *)
105

106
107
108
let set_ns_table_for_printer env = 
  Ns.InternalPrinter.set_table env.tenv_nspref

109
let get_ns_table tenv = tenv.tenv_nspref
110

111
112
113
let enter_ns p ns env =
  { env with tenv_nspref = Ns.add_prefix p ns env.tenv_nspref }

114
115
116
117
118
let protect_error_ns loc f x =
  try f x
  with Ns.UnknownPrefix ns ->
    raise_loc_generic loc 
    ("Undefined namespace prefix " ^ (U.to_string ns))
119

120
121
122
123
124
125
let parse_atom env loc t =
  let (ns,l) = protect_error_ns loc (Ns.map_tag env.tenv_nspref) t in
  Atoms.V.mk ns l
 
let parse_ns env loc ns =
  protect_error_ns loc (Ns.map_prefix env.tenv_nspref) ns
126

127
128
129
let parse_label env loc t =
  let (ns,l) = protect_error_ns loc (Ns.map_attr env.tenv_nspref) t in
  LabelPool.mk (ns,l)
130

131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
let parse_record env loc f r =
  let r = List.map (fun (l,x) -> (parse_label env loc l, f x)) r in
  LabelMap.from_list (fun _ _ -> raise_loc_generic loc "Duplicated record field") r

let rec const env loc = function
  | LocatedExpr (loc,e) -> const env loc e
  | Pair (x,y) -> Types.Pair (const env loc x, const env loc y)
  | Xml (x,y) -> Types.Xml (const env loc x, const env loc y)
  | RecordLitt x -> Types.Record (parse_record env loc (const env loc) x)
  | String (i,j,s,c) -> Types.String (i,j,s,const env loc c)
  | Atom t -> Types.Atom (parse_atom env loc t)
  | Integer i -> Types.Integer i
  | Char c -> Types.Char c
  | _ -> raise_loc_generic loc "This should be a scalar or structured constant"

(* I. Transform the abstract syntax of types and patterns into
      the internal form *)
148

149
exception NonExhaustive of Types.descr
150
exception Constraint of Types.descr * Types.descr
151
exception ShouldHave of Types.descr * string
152
exception ShouldHave2 of Types.descr * string * Types.descr
153
exception WrongLabel of Types.descr * label
154
exception UnboundId of id * bool
155
exception Error of string
156

157
158
let raise_loc loc exn = raise (Location (loc,`Full,exn))
let raise_loc_str loc ofs exn = raise (Location (loc,`Char ofs,exn))
159
let error loc msg = raise_loc loc (Error msg)
160

161
162
163
  (* Schema datastructures *)

module StringSet = Set.Make (String)
164
165
166

  (* just to remember imported schemas *)
let schemas = State.ref "Typer.schemas" StringSet.empty
167
168
169

let schema_types = State.ref "Typer.schema_types" (Hashtbl.create 51)
let schema_elements = State.ref "Typer.schema_elements" (Hashtbl.create 51)
170
let schema_attributes = State.ref "Typer.schema_attributes" (Hashtbl.create 51)
171

172
173
174
175
176
177
178
179
(* Eliminate Recursion, propagate Sequence Capture Variables *)

let rec seq_vars accu = function
  | Epsilon | Elem _ -> accu
  | Seq (r1,r2) | Alt (r1,r2) -> seq_vars (seq_vars accu r1) r2
  | Star r | WeakStar r -> seq_vars accu r
  | SeqCapture (v,r) -> seq_vars (IdSet.add v accu) r

180
181
182
183
184
185
186
187
188
189
190
191
192
193
(* We use two intermediate representation from AST types/patterns
   to internal ones:

      AST -(1)-> derecurs -(2)-> slot -(3)-> internal

   (1) eliminate recursion, schema, 
       propagate sequence capture variables, keep regexps

   (2) stratify, detect ill-formed recursion, compile regexps

   (3) check additional constraints on types / patterns;
       deep (recursive) hash-consing
*)     

194
195
196
197
type derecurs_slot = {
  ploc : Location.loc;
  pid  : int;
  mutable ploop : bool;
198
  mutable pdescr : derecurs;
199
} and derecurs =
200
  | PDummy
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
  | PAlias of derecurs_slot
  | PType of Types.descr
  | POr of derecurs * derecurs
  | PAnd of derecurs * derecurs
  | PDiff of derecurs * derecurs
  | PTimes of derecurs * derecurs
  | PXml of derecurs * derecurs
  | PArrow of derecurs * derecurs
  | POptional of derecurs
  | PRecord of bool * derecurs label_map
  | PCapture of id
  | PConstant of id * Types.const
  | PRegexp of derecurs_regexp * derecurs
and derecurs_regexp =
  | PEpsilon
  | PElem of derecurs
  | PSeq of derecurs_regexp * derecurs_regexp
  | PAlt of derecurs_regexp * derecurs_regexp
  | PStar of derecurs_regexp
  | PWeakStar of derecurs_regexp

222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241

type descr = 
  | IDummy
  | IType of Types.descr
  | IOr of descr * descr
  | IAnd of descr * descr
  | IDiff of descr * descr
  | ITimes of slot * slot
  | IXml of slot * slot
  | IArrow of slot * slot
  | IOptional of descr
  | IRecord of bool * slot label_map
  | ICapture of id
  | IConstant of id * Types.const
and slot = {
  mutable fv : fv option;
  mutable hash : int option;
  mutable rank1: int; mutable rank2: int;
  mutable gen1 : int; mutable gen2: int;
  mutable d    : descr;
242
}
243
244
245
246
247
248
249
250
251
252
253
254
255
    

let counter = ref 0
let mk_derecurs_slot loc = 
  incr counter; 
  { ploop = false; ploc = loc; pid = !counter; pdescr = PDummy }
	  
let mk_slot () = 
  { d=IDummy; fv=None; hash=None; rank1=0; rank2=0; gen1=0; gen2=0 } 


(* This environment is used in phase (1) to eliminate recursion *)
type penv = {
256
  penv_tenv : t;
257
258
259
260
  penv_derec : derecurs_slot Env.t;
}

let penv tenv = { penv_tenv = tenv; penv_derec = Env.empty }
261

262
let rec hash_derecurs = function
263
  | PDummy -> assert false
264
265
266
  | PAlias s -> 
      s.pid
  | PType t -> 
267
      1 + 17 * (Types.hash t)
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
  | POr (p1,p2) -> 
      2 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PAnd (p1,p2) -> 
      3 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PDiff (p1,p2) -> 
      4 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PTimes (p1,p2) -> 
      5 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PXml (p1,p2) -> 
      6 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PArrow (p1,p2) -> 
      7 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | POptional p -> 
      8 + 17 * (hash_derecurs p)
  | PRecord (o,r) -> 
      (if o then 9 else 10) + 17 * (LabelMap.hash hash_derecurs r)
  | PCapture x -> 
      11 + 17 * (Id.hash x)
  | PConstant (x,c) -> 
287
      12 + 17 * (Id.hash x) + 257 * (Types.Const.hash c)
288
289
  | PRegexp (p,q) -> 
      13 + 17 * (hash_derecurs_regexp p) + 257 * (hash_derecurs q)
290
and hash_derecurs_regexp = function
291
292
293
294
295
296
297
298
299
300
301
302
  | PEpsilon -> 
      1
  | PElem p -> 
      2 + 17 * (hash_derecurs p)
  | PSeq (p1,p2) -> 
      3 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PAlt (p1,p2) -> 
      4 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PStar p -> 
      5 + 17 * (hash_derecurs_regexp p)
  | PWeakStar p -> 
      6 + 17 * (hash_derecurs_regexp p)
303
304

let rec equal_derecurs p1 p2 = (p1 == p2) || match p1,p2 with
305
306
307
  | PAlias s1, PAlias s2 -> 
      s1 == s2
  | PType t1, PType t2 -> 
308
      Types.equal t1 t2
309
310
311
312
313
  | POr (p1,q1), POr (p2,q2)
  | PAnd (p1,q1), PAnd (p2,q2)
  | PDiff (p1,q1), PDiff (p2,q2)
  | PTimes (p1,q1), PTimes (p2,q2)
  | PXml (p1,q1), PXml (p2,q2)
314
315
316
317
318
319
320
321
322
  | PArrow (p1,q1), PArrow (p2,q2) -> 
      (equal_derecurs p1 p2) && (equal_derecurs q1 q2)
  | POptional p1, POptional p2 -> 
      equal_derecurs p1 p2
  | PRecord (o1,r1), PRecord (o2,r2) -> 
      (o1 == o2) && (LabelMap.equal equal_derecurs r1 r2)
  | PCapture x1, PCapture x2 -> 
      Id.equal x1 x2
  | PConstant (x1,c1), PConstant (x2,c2) -> 
323
      (Id.equal x1 x2) && (Types.Const.equal c1 c2)
324
325
  | PRegexp (p1,q1), PRegexp (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
326
327
  | _ -> false
and equal_derecurs_regexp r1 r2 = match r1,r2 with
328
329
330
331
  | PEpsilon, PEpsilon -> 
      true
  | PElem p1, PElem p2 -> 
      equal_derecurs p1 p2
332
  | PSeq (p1,q1), PSeq (p2,q2) 
333
334
  | PAlt (p1,q1), PAlt (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs_regexp q1 q2)
335
  | PStar p1, PStar p2
336
337
  | PWeakStar p1, PWeakStar p2 -> 
      equal_derecurs_regexp p1 p2
338
  | _ -> false
339

340
341
342
343
344
345
346
347
348
349
350
module DerecursTable = Hashtbl.Make(
  struct 
    type t = derecurs 
    let hash = hash_derecurs
    let equal = equal_derecurs
  end
)

module RE = Hashtbl.Make(
  struct 
    type t = derecurs_regexp * derecurs 
351
352
353
354
    let hash (p,q) = 
      (hash_derecurs_regexp p) + 17 * (hash_derecurs q)
    let equal (p1,q1) (p2,q2) = 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
355
356
  end
)
357

358
359
360
361
let gen = ref 0
let rank = ref 0
	     
let rec hash_descr = function
362
  | IDummy -> assert false
363
  | IType x -> Types.hash x
364
365
366
367
368
369
370
371
372
  | IOr (d1,d2) -> 1 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IAnd (d1,d2) -> 2 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IDiff (d1,d2) -> 3 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IOptional d -> 4 + 17 * (hash_descr d)
  | ITimes (s1,s2) -> 5 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IXml (s1,s2) -> 6 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IArrow (s1,s2) -> 7 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IRecord (o,r) -> (if o then 8 else 9) + 17 * (LabelMap.hash hash_slot r)
  | ICapture x -> 10 + 17 * (Id.hash x)
373
  | IConstant (x,y) -> 11 + 17 * (Id.hash x) + 257 * (Types.Const.hash y)
374
375
376
377
378
and hash_slot s =
  if s.gen1 = !gen then 13 * s.rank1
  else (
    incr rank;
    s.rank1 <- !rank; s.gen1 <- !gen;
379
    hash_descr s.d
380
381
382
383
  )
    
let rec equal_descr d1 d2 = 
  match (d1,d2) with
384
  | IType x1, IType x2 -> Types.equal x1 x2
385
386
387
388
389
390
391
  | IOr (x1,y1), IOr (x2,y2) 
  | IAnd (x1,y1), IAnd (x2,y2) 
  | IDiff (x1,y1), IDiff (x2,y2) -> (equal_descr x1 x2) && (equal_descr y1 y2)
  | IOptional x1, IOptional x2 -> equal_descr x1 x2
  | ITimes (x1,y1), ITimes (x2,y2) 
  | IXml (x1,y1), IXml (x2,y2) 
  | IArrow (x1,y1), IArrow (x2,y2) -> (equal_slot x1 x2) && (equal_slot y1 y2)
392
393
  | IRecord (o1,r1), IRecord (o2,r2) -> 
      (o1 = o2) && (LabelMap.equal equal_slot r1 r2)
394
  | ICapture x1, ICapture x2 -> Id.equal x1 x2
395
  | IConstant (x1,y1), IConstant (x2,y2) -> 
396
      (Id.equal x1 x2) && (Types.Const.equal y1 y2)
397
398
399
400
401
402
403
404
  | _ -> false
and equal_slot s1 s2 =
  ((s1.gen1 = !gen) && (s2.gen2 = !gen) && (s1.rank1 = s2.rank2))
  ||
  ((s1.gen1 <> !gen) && (s2.gen2 <> !gen) && (
     incr rank;
     s1.rank1 <- !rank; s1.gen1 <- !gen;
     s2.rank2 <- !rank; s2.gen2 <- !gen;
405
     equal_descr s1.d s2.d
406
407
   ))
  
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
module SlotTable = Hashtbl.Make(
  struct
    type t = slot
	
    let hash s =
      match s.hash with
	| Some h -> h
	| None ->
	    incr gen; rank := 0; 
	    let h = hash_slot s in
	    s.hash <- Some h;
	    h
	      
    let equal s1 s2 = 
      (s1 == s2) || 
      (incr gen; rank := 0; 
       let e = equal_slot s1 s2 in
       (*     if e then Printf.eprintf "Recursive hash-consing: Equal\n";  *)
       e)
  end)


let rec derecurs env p = match p.descr with
  | PatVar v ->
      (try PAlias (Env.find v env.penv_derec)
       with Not_found -> 
434
	 try PType (find_type v env.penv_tenv)
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
	 with Not_found -> PCapture v)
  | SchemaVar (kind, schema, item) ->
      PType (derecurs_schema env kind schema item)
  | Recurs (p,b) -> derecurs (derecurs_def env b) p
  | Internal t -> PType t
  | NsT ns -> PType (Types.atom (Atoms.any_in_ns (parse_ns env.penv_tenv p.loc ns)))
  | Or (p1,p2) -> POr (derecurs env p1, derecurs env p2)
  | And (p1,p2) -> PAnd (derecurs env p1, derecurs env p2)
  | Diff (p1,p2) -> PDiff (derecurs env p1, derecurs env p2)
  | Prod (p1,p2) -> PTimes (derecurs env p1, derecurs env p2)
  | XmlT (p1,p2) -> PXml (derecurs env p1, derecurs env p2)
  | Arrow (p1,p2) -> PArrow (derecurs env p1, derecurs env p2)
  | Optional p -> POptional (derecurs env p)
  | Record (o,r) -> PRecord (o, parse_record env.penv_tenv p.loc (derecurs env) r)
  | Constant (x,c) -> PConstant (x,const env.penv_tenv p.loc c)
  | Cst c -> PType (Types.constant (const env.penv_tenv p.loc c))
  | Regexp (r,q) -> 
      let constant_nil t v = 
	PAnd (t, PConstant (v, Types.Atom Sequence.nil_atom)) in
      let vars = seq_vars IdSet.empty r in
      let q = IdSet.fold constant_nil (derecurs env q) vars in
      let r = derecurs_regexp (fun p -> p) env r in
      PRegexp (r, q)
and derecurs_regexp vars env = function
  | Epsilon -> 
      PEpsilon
  | Elem p -> 
      PElem (vars (derecurs env p))
  | Seq (p1,p2) -> 
      PSeq (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Alt (p1,p2) -> 
      PAlt (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Star p -> 
      PStar (derecurs_regexp vars env p)
  | WeakStar p -> 
      PWeakStar (derecurs_regexp vars env p)
  | SeqCapture (x,p) -> 
      derecurs_regexp (fun p -> PAnd (vars p, PCapture x)) env p


and derecurs_def env b =
  let b = List.map (fun (v,p) -> (v,p,mk_derecurs_slot p.loc)) b in
  let n = 
    List.fold_left (fun env (v,p,s) -> Env.add v s env) env.penv_derec b in
  let env = { env with penv_derec = n } in
  List.iter (fun (v,p,s) -> s.pdescr <- derecurs env p) b;
  env

and derecurs_schema env kind schema item =
  let elt () = fst (Hashtbl.find !schema_elements (schema, item)) in
  let typ () = Hashtbl.find !schema_types (schema, item) in
  let att () = Hashtbl.find !schema_attributes (schema, item) in
  let rec do_try n = function
    | [] -> 
	let s = Printf.sprintf 
		  "No %s named '%s' found in schema '%s'" n item schema in
	failwith s
    | f :: rem -> (try f () with Not_found -> do_try n rem)  in
  match kind with
    | `Element -> do_try "element" [ elt ]
    | `Type -> do_try "type" [ typ ]
    | `Attribute -> do_try "atttribute" [ att ]
    | `Any -> do_try "item" [ elt; typ; att ]

    
500
501
502
503
504
let rec fv_slot s =
  match s.fv with
    | Some x -> x
    | None ->
	if s.gen1 = !gen then IdSet.empty 
505
	else (s.gen1 <- !gen; fv_descr s.d)
506
and fv_descr = function
507
  | IDummy -> assert false
508
  | IType _ -> IdSet.empty
509
510
511
512
513
514
515
  | IOr (d1,d2)
  | IAnd (d1,d2)  
  | IDiff (d1,d2) -> IdSet.cup (fv_descr d1) (fv_descr d2)
  | IOptional d -> fv_descr d
  | ITimes (s1,s2)  
  | IXml (s1,s2)  
  | IArrow (s1,s2) -> IdSet.cup (fv_slot s1) (fv_slot s2)
516
517
  | IRecord (o,r) -> 
      List.fold_left IdSet.cup IdSet.empty (LabelMap.map_to_list fv_slot r)
518
  | ICapture x | IConstant (x,_) -> IdSet.singleton x
519

520
521
522
523
524
525
526
527
let compute_fv s =
  match s.fv with
    | Some x -> ()
    | None ->
	incr gen;
	let x = fv_slot s in
	s.fv <- Some x
	  
528
529
530
let check_no_capture loc s =
  match IdSet.pick s with
    | Some x ->  
531
	raise_loc_generic loc ("Capture variable not allowed: " ^ (Ident.to_string x))
532
    | None -> ()
533
    
534
535
536
let compile_slot_hash = DerecursTable.create 67
let compile_hash = DerecursTable.create 67

537
538
let todo_defs = ref []
let todo_fv = ref []
539
540
541
542
543
544
545
546

let rec compile p =
  try DerecursTable.find compile_hash p
  with Not_found ->
    let c = real_compile p in
    DerecursTable.replace compile_hash p c;
    c
and real_compile = function
547
  | PDummy -> assert false
548
549
550
551
  | PAlias v ->
      if v.ploop then
	raise_loc_generic v.ploc ("Unguarded recursion on type/pattern");
      v.ploop <- true;
552
      let r = compile v.pdescr in
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
      v.ploop <- false;
      r
  | PType t -> IType t
  | POr (t1,t2) -> IOr (compile t1, compile t2)
  | PAnd (t1,t2) -> IAnd (compile t1, compile t2)
  | PDiff (t1,t2) -> IDiff (compile t1, compile t2)
  | PTimes (t1,t2) -> ITimes (compile_slot t1, compile_slot t2)
  | PXml (t1,t2) -> IXml (compile_slot t1, compile_slot t2)
  | PArrow (t1,t2) -> IArrow (compile_slot t1, compile_slot t2)
  | POptional t -> IOptional (compile t)
  | PRecord (o,r) ->  IRecord (o, LabelMap.map compile_slot r)
  | PConstant (x,v) -> IConstant (x,v)
  | PCapture x -> ICapture x
  | PRegexp (r,q) -> compile_regexp r q
and compile_regexp r q =
  let memo = RE.create 17 in
569
570
571
  let add accu i = 
    match accu with None -> Some i | Some j -> Some (IOr (j,i)) in
  let get = function Some x -> x | None -> assert false in
572
  let rec queue accu = function
573
574
575
    | PRegexp (r,q) -> aux accu r q 
    | _ -> add accu (compile q)
  and aux accu r q =
576
577
578
579
    if RE.mem memo (r,q) then accu
    else (
      RE.add memo (r,q) ();
      match r with
580
	| PEpsilon -> queue accu q
581
582
583
584
585
586
587
588
589
590
591
	| PElem p ->
(* Be careful not to create pairs with same second component *)
	    let rec extract = function
	      | PConstant (x,v) -> `Const (x,v)
	      | POr (x,y) ->
		  (match extract x, extract y with
		    | `Pat x, `Pat y -> `Pat (POr (x,y))
		    | x, y -> `Or (x,y))
	      | p -> `Pat p
	    in
	    let rec mk accu = function
592
593
594
595
	      | `Const (x,v) -> 
		  (match queue None q with 
		    | Some q -> add accu (IAnd (IConstant (x,v), q))
		    | None -> accu)
596
	      | `Or (x,y) -> mk (mk accu x) y
597
598
	      | `Pat p -> 
		  add accu (ITimes (compile_slot p, compile_slot q))
599
600
	    in
	    mk accu (extract p)
601
602
603
604
605
606
	| PSeq (r1,r2) -> aux accu r1 (PRegexp (r2,q))
	| PAlt (r1,r2) -> aux (aux accu r1 q) r2 q
	| PStar r1 -> aux (aux accu r1 (PRegexp (r,q))) PEpsilon q
	| PWeakStar r1 -> aux (aux accu PEpsilon q) r1 (PRegexp (r,q))
    )
  in
607
  get (aux None r q)
608
609
and compile_slot p =
  try DerecursTable.find compile_slot_hash p
610
  with Not_found ->
611
612
613
    let s = mk_slot () in
    todo_defs := (s,p) :: !todo_defs;
    todo_fv := s :: !todo_fv;
614
    DerecursTable.add compile_slot_hash p s;
615
    s
616

617
      
618
let timer_fv = Stats.Timer.create "Typer.fv"
619
let rec flush_defs () = 
620
621
622
623
  match !todo_defs with
    | [] -> 
	Stats.Timer.start timer_fv;
	List.iter compute_fv !todo_fv;
624
625
	todo_fv := [];
	Stats.Timer.stop timer_fv ()
626
627
628
629
    | (s,p)::t -> 
	todo_defs := t; 
	s.d <- compile p; 
	flush_defs ()
630
631
632
633
634
635
636
637
638
639
640
641
642
643
	
let typ_nodes = SlotTable.create 67
let pat_nodes = SlotTable.create 67
		  
let rec typ = function
  | IType t -> t
  | IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
  | IAnd (s1,s2) ->  Types.cap (typ s1) (typ s2)
  | IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
  | ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
  | IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
  | IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
  | IOptional s -> Types.Record.or_absent (typ s)
  | IRecord (o,r) -> Types.record' (o, LabelMap.map typ_node r)
644
  | IDummy | ICapture _ | IConstant (_,_) -> assert false
645
      
646
and typ_node s : Types.Node.t =
647
648
649
650
  try SlotTable.find typ_nodes s
  with Not_found ->
    let x = Types.make () in
    SlotTable.add typ_nodes s x;
651
    Types.define x (typ s.d);
652
653
654
655
656
657
658
659
    x
      
let rec pat d : Patterns.descr =
  if IdSet.is_empty (fv_descr d)
  then Patterns.constr (typ d)
  else pat_aux d
    
and pat_aux = function
660
  | IDummy -> assert false
661
662
663
664
665
666
  | IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
  | IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
  | IDiff (s1,s2) when IdSet.is_empty (fv_descr s2) ->
      let s2 = Types.neg (typ s2) in
      Patterns.cap (pat s1) (Patterns.constr s2)
  | IDiff _ ->
667
      raise (Patterns.Error "Differences are not allowed in patterns")
668
669
670
  | ITimes (s1,s2) -> Patterns.times (pat_node s1) (pat_node s2)
  | IXml (s1,s2) -> Patterns.xml (pat_node s1) (pat_node s2)
  | IOptional _ -> 
671
      raise (Patterns.Error "Optional fields are not allowed in record patterns")
672
673
674
675
676
677
678
679
680
681
682
683
684
685
  | IRecord (o,r) ->
      let pats = ref [] in
      let aux l s = 
	if IdSet.is_empty (fv_slot s) then typ_node s
	else
	  ( pats := Patterns.record l (pat_node s) :: !pats;
	    Types.any_node )
      in
      let constr = Types.record' (o,LabelMap.mapi aux r) in
      List.fold_left Patterns.cap (Patterns.constr constr) !pats
	(* TODO: can avoid constr when o=true, and all fields have fv *)
  | ICapture x -> Patterns.capture x
  | IConstant (x,c) -> Patterns.constant x c
  | IArrow _ ->
686
      raise (Patterns.Error "Arrows are not allowed in patterns")
687
688
689
690
691
692
  | IType _ -> assert false
      
and pat_node s : Patterns.node =
  try SlotTable.find pat_nodes s
  with Not_found ->
    let x = Patterns.make (fv_slot s) in
693
694
    try
      SlotTable.add pat_nodes s x;
695
      Patterns.define x (pat s.d);
696
697
698
      x
    with exn -> SlotTable.remove pat_nodes s; raise exn
      (* For the toplevel ... *)
699

700

701
let type_defs env b =
702
703
  List.iter 
    (fun (v,p) ->
704
705
       if Env.mem v env.ids
       then raise_loc_generic p.loc ("Identifier " ^ (Ident.to_string v) ^ " is already bound")
706
    ) b;
707
708
  let penv = derecurs_def (penv env) b in
  let b = List.map (fun (v,p) -> (v,p,compile (derecurs penv p))) b in
709
710
711
712
  flush_defs ();
  let b = 
    List.map 
      (fun (v,p,s) -> 
713
	 check_no_capture p.loc (fv_descr s);
714
715
716
	 let t = typ s in
	 if (p.loc <> noloc) && (Types.is_empty t) then
	   warning p.loc 
717
	     ("This definition yields an empty type for " ^ (Ident.to_string v));
718
	 (v,t)) b in
719
  List.iter (fun (v,t) -> Types.Print.register_global (Id.value v) t) b;
720
  b
721
722


723
724
725
726
727
let dump_types ppf env =
  Env.iter (fun v -> 
	      function 
		  (Type _) -> Format.fprintf ppf " %a" Ident.print v
		| _ -> ()) env.ids
728

729
730
let dump_ns ppf env =
  Ns.dump_table ppf env.tenv_nspref
731

732

733
734
let do_typ loc r = 
  let s = compile_slot r in
735
  flush_defs ();
736
737
  check_no_capture loc (fv_slot s);
  typ_node s
738
   
739
740
let typ env p =
  do_typ p.loc (derecurs (penv env) p)
741
    
742
743
let pat env p = 
  let s = compile_slot (derecurs (penv env) p) in
744
745
746
  flush_defs ();
  try pat_node s
  with Patterns.Error e -> raise_loc_generic p.loc e
747
    | Location (loc,_,exn) when loc = noloc -> raise (Location (p.loc, `Full, exn))
748
749


750
751
(* II. Build skeleton *)

752

753
754
755
756
757
type type_fun = Types.t -> bool -> Types.t
let mk_unary_op = ref (fun _ _ -> assert false)
let typ_unary_op = ref (fun _ _ _ -> assert false)
let mk_binary_op = ref (fun _ _ -> assert false)
let typ_binary_op = ref (fun _ _ _ _ -> assert false)
758
759


760
module Fv = IdSet
761

762
763
764
type branch = Branch of Typed.branch * branch list

let cur_branch : branch list ref = ref []
765

766
let exp loc fv e =
767
768
  fv,
  { Typed.exp_loc = loc;
769
    Typed.exp_typ = Types.empty;
770
    Typed.exp_descr = e;
771
  }
772
773


774
775
let rec expr env loc = function
  | LocatedExpr (loc,e) -> expr env loc e
776
  | Forget (e,t) ->
777
      let (fv,e) = expr env loc e and t = typ env t in
778
779
780
781
      exp loc fv (Typed.Forget (e,t))
  | Var s -> 
      exp loc (Fv.singleton s) (Typed.Var s)
  | Apply (e1,e2) -> 
782
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
783
784
      exp loc (Fv.cup fv1 fv2) (Typed.Apply (e1,e2))
  | Abstraction a ->
785
      let iface = List.map (fun (t1,t2) -> (typ env t1, typ env t2)) 
786
787
788
789
790
791
792
		    a.fun_iface in
      let t = List.fold_left 
		(fun accu (t1,t2) -> Types.cap accu (Types.arrow t1 t2)) 
		Types.any iface in
      let iface = List.map 
		    (fun (t1,t2) -> (Types.descr t1, Types.descr t2)) 
		    iface in
793
      let (fv0,body) = branches env a.fun_body in
794
795
796
797
798
799
800
801
802
803
804
      let fv = match a.fun_name with
	| None -> fv0
	| Some f -> Fv.remove f fv0 in
      let e = Typed.Abstraction 
		{ Typed.fun_name = a.fun_name;
		  Typed.fun_iface = iface;
		  Typed.fun_body = body;
		  Typed.fun_typ = t;
		  Typed.fun_fv = fv
		} in
      exp loc fv e
805
  | (Integer _ | Char _ | Atom _) as c -> 
806
      exp loc Fv.empty (Typed.Cst (const env loc c))
807
  | Pair (e1,e2) ->
808
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
809
810
      exp loc (Fv.cup fv1 fv2) (Typed.Pair (e1,e2))
  | Xml (e1,e2) ->
811
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
812
813
      exp loc (Fv.cup fv1 fv2) (Typed.Xml (e1,e2))
  | Dot (e,l) ->
814
815
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.Dot (e,parse_label env loc l))
816
  | RemoveField (e,l) ->
817
818
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.RemoveField (e,parse_label env loc l))
819
820
  | RecordLitt r -> 
      let fv = ref Fv.empty in
821
      let r = parse_record env loc
822
		(fun e -> 
823
		   let (fv2,e) = expr env loc e 
824
825
826
		   in fv := Fv.cup !fv fv2; e)
		r in
      exp loc !fv (Typed.RecordLitt r)
827
  | String (i,j,s,e) ->
828
      let (fv,e) = expr env loc e in
829
      exp loc fv (Typed.String (i,j,s,e))
830
  | Op (op,le) ->
831
      let (fvs,ltes) = List.split (List.map (expr env loc) le) in
832
      let fv = List.fold_left Fv.cup Fv.empty fvs in
833
      (try
834
835
836
	 (match ltes with
	    | [e] -> exp loc fv (Typed.UnaryOp (!mk_unary_op op env, e))
	    | [e1;e2] -> exp loc fv (Typed.BinaryOp (!mk_binary_op op env, e1,e2))
837
838
839
	    | _ -> assert false)
       with Not_found -> assert false)

840
  | Match (e,b) -> 
841
842
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
843
      exp loc (Fv.cup fv1 fv2) (Typed.Match (e, b))
844
  | Map (e,b) ->
845
846
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
847
848
      exp loc (Fv.cup fv1 fv2) (Typed.Map (e, b))
  | Transform (e,b) ->
849
850
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
851
      exp loc (Fv.cup fv1 fv2) (Typed.Transform (e, b))
852
  | Xtrans (e,b) ->
853
854
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
855
      exp loc (Fv.cup fv1 fv2) (Typed.Xtrans (e, b))
856
  | Validate (e,schema,elt) ->
857
      let (fv,e) = expr env loc e in
858
      exp loc fv (Typed.Validate (e, schema, elt))
859
  | Try (e,b) ->
860
861
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
862
      exp loc (Fv.cup fv1 fv2) (Typed.Try (e, b))
863
  | NamespaceIn (pr,ns,e) ->
864
865
      let env = enter_ns pr ns env in
      expr env loc e
866
  | Ref (e,t) ->
867
      let (fv,e) = expr env loc e and t = typ env t in
868
      exp loc fv (Typed.Ref (e,t))
869
	      
870
  and branches env b = 
871
    let fv = ref Fv.empty in
872
    let accept = ref Types.empty in
873
    let branch (p,e) = 
874
875
      let cur_br = !cur_branch in
      cur_branch := [];
876
      let (fv2,e) = expr env noloc e in
877
      let br_loc = merge_loc p.loc e.Typed.exp_loc in
878
      let p = pat env p in
879
880
881
882
883
884
      (match Fv.pick (Fv.diff (Patterns.fv p) fv2) with
	| None -> ()
	| Some x ->
	    let x = U.to_string (Id.value x) in
	    warning br_loc 
	      ("The capture variable " ^ x ^ 
885
	       " is declared in the pattern but not used in the body of this branch. It might be a misspelled type or name (if not use _ instead)."));
886
887
888
889
890
891
892
893
894
      let fv2 = Fv.diff fv2 (Patterns.fv p) in
      fv := Fv.cup !fv fv2;
      accept := Types.cup !accept (Types.descr (Patterns.accept p));
      let br = 
	{ 
	  Typed.br_loc = br_loc;
	  Typed.br_used = br_loc = noloc;
	  Typed.br_pat = p;
	  Typed.br_body = e } in
895
      cur_branch := Branch (br, !cur_branch) :: cur_br;
896
897
      br in
    let b = List.map branch b in
898
899
900
901
    (!fv, 
     { 
       Typed.br_typ = Types.empty; 
       Typed.br_branches = b; 
902
903
       Typed.br_accept = !accept;
       Typed.br_compiled = None;
904
905
     } 
    )
906

907
let expr env e = snd (expr env noloc e)
908

909
910
let let_decl env p e =
  { Typed.let_pat = pat env p;
911
    Typed.let_body = expr env e;
912
913
    Typed.let_compiled = None }

914
915
916

(* Hide global "typing/parsing" environment *)

917

918
919
(* III. Type-checks *)

920
921
open Typed

922
923
let require loc t s = 
  if not (Types.subtype t s) then raise_loc loc (Constraint (t, s))
924

925
let verify loc t s = 
926
927
  require loc t s; t

928
929
930
931
932
let check_str loc ofs t s = 
  if not (Types.subtype t s) then raise_loc_str loc ofs (Constraint (t, s));
  t

let should_have loc constr s = 
933
934
  raise_loc loc (ShouldHave (constr,s))

935
936
937
let should_have_str loc ofs constr s = 
  raise_loc_str loc ofs (ShouldHave (constr,s))

938
939
940
941
942
943
944
945
946
947
948
let flatten loc arg constr precise =
  let constr' = Sequence.star 
		  (Sequence.approx (Types.cap Sequence.any constr)) in
  let sconstr' = Sequence.star constr' in
  let exact = Types.subtype constr' constr in
  if exact then
    let t = arg sconstr' precise in
    if precise then Sequence.flatten t else constr
  else
    let t = arg sconstr' true in
    Sequence.flatten t
949

950
951
let rec type_check env e constr precise = 
  let d = type_check' e.exp_loc env e.exp_descr constr precise in
952
  let d = if precise then d else constr in
953
954
955
  e.exp_typ <- Types.cup e.exp_typ d;
  d

956
and type_check' loc env e constr precise = match e with
957
958
959
  | Forget (e,t) ->
      let t = Types.descr t in
      ignore (type_check env e t false);
960
      verify loc t constr
961

962
  | Abstraction a ->
963
964
965
      let t =
	try Types.Arrow.check_strenghten a.fun_typ constr 
	with Not_found -> 
966
967
	  should_have loc constr
	    "but the interface of the abstraction is not compatible"
968
      in
969
970
      let env = match a.fun_name with
	| None -> env
971
	| Some f -> enter_value f a.fun_typ env in
972
973
      List.iter 
	(fun (t1,t2) ->
974
975
976
	   let acc = a.fun_body.br_accept in 
	   if not (Types.subtype t1 acc) then
	     raise_loc loc (NonExhaustive (Types.diff t1 acc));
977
	   ignore (type_check_branches loc env t1 a.fun_body t2 false)
978
979
	) a.fun_iface;
      t
980

981
982
  | Match (e,b) ->
      let t = type_check env e b.br_accept true in
983
      type_check_branches loc env t b constr precise
984
985
986

  | Try (e,b) ->
      let te = type_check env e constr precise in
987
      let tb = type_check_branches loc env Types.any b constr precise in
988
      Types.cup te tb
989

990
991
  | Pair (e1,e2) ->
      type_check_pair loc env e1 e2 constr precise
992

993
994
  | Xml (e1,e2) ->
      type_check_pair ~kind:`XML loc env e1 e2 constr precise
995

996
  | RecordLitt r ->
997
998
999
1000
1001
1002
1003
1004
      type_record loc env r constr precise

  | Map (e,b) ->
      type_map loc env false e b constr precise

  | Transform (e,b) ->
      flatten loc (type_map loc env true e b) constr precise

1005
1006
1007
1008
  | Apply (e1,e2) ->
      let t1 = type_check env e1 Types.Arrow.any true in
      let t1 = Types.Arrow.get t1 in
      let dom = Types.Arrow.domain t1 in
1009
1010
1011
1012
1013
1014
1015
      let res =
	if Types.Arrow.need_arg t1 then
	  let t2 = type_check env e2 dom true in
	  Types.Arrow.apply t1 t2
	else
	  (ignore (type_check env e2 dom false); Types.Arrow.apply_noarg t1)
      in
1016
      verify loc res constr
1017
1018

  | UnaryOp (o,e) ->
1019
1020
      let t = !typ_unary_op o loc (type_check env e) constr precise in
      verify loc t constr
1021
1022

  | BinaryOp (o,e1,e2) ->
1023
1024
1025
      let t = !typ_binary_op o loc 
		(type_check env e1) (type_check env e2) constr precise in
      verify loc t constr
1026
1027
1028

  | Var s -> 
      let t = 
1029
	try find_value s env
1030
	with Not_found -> 
1031
	  raise_loc loc