typer.ml 44.1 KB
Newer Older
1
(* TODO:
2
 - rewrite type-checking of operators to propagate constraint
3
4
 - optimize computation of pattern free variables
 - check whether it is worth using recursive hash-consing internally
5
6
*)

7
8
9
open Location
open Ast
open Ident
10

11
let warning loc msg =
12
  Format.fprintf !Location.warning_ppf "Warning %a:@\n%a%s@." 
13
14
    Location.print_loc (loc,`Full)
    Location.html_hilight (loc,`Full)
15
16
    msg

17
18
type item =
  | Type of Types.t
19
  | Val of Types.t
20

21
type t = {
22
  ids : item Env.t;
23
24
  ns: Ns.table;
  cu: Types.CompUnit.t Env.t;
25
}
26

27
28
29
30
31
let hash _ = failwith "Typer.hash"
let compare _ _ = failwith "Typer.compare"
let dump ppf _ = failwith "Typer.dump"
let equal _ _ = failwith "Typer.equal"
let check _ = failwith "Typer.check"
32
33

(* TODO: filter out builtin defs ? *)
34
35
36
37
let serialize_item s = function
  | Type t -> Serialize.Put.bits 1 s 0; Types.serialize s t
  | Val t -> Serialize.Put.bits 1 s 1; Types.serialize s t

38
let serialize s env =
39
  Serialize.Put.env Id.serialize serialize_item Env.iter s env.ids;
40
  Ns.serialize_table s env.ns
41

42
43
44
45
46
let deserialize_item s = match Serialize.Get.bits 1 s with
  | 0 -> Type (Types.deserialize s)
  | 1 -> Val (Types.deserialize s)
  | _ -> assert false

47
let deserialize s =
48
49
  let ids = 
    Serialize.Get.env Id.deserialize deserialize_item Env.add Env.empty s in
50
  let ns = Ns.deserialize_table s in
51
  { ids = ids; ns = ns; cu = Env.empty }
52
53


54
55
let empty_env = {
  ids = Env.empty;
56
57
  ns = Ns.empty_table;
  cu = Env.empty;
58
59
}

60
61
let from_comp_unit = ref (fun cu -> assert false)

62
63
64
65
66
67
68
69
let enter_type id t env =
  { env with ids = Env.add id (Type t) env.ids }
let enter_types l env =
  { env with ids = 
      List.fold_left (fun accu (id,t) -> Env.add id (Type t) accu) env.ids l }
let find_type id env =
  match Env.find id env.ids with
    | Type t -> t
70
    | Val _ -> raise Not_found
71

72
73
74
75
76
let find_type_global cu id env =
  let cu = Env.find cu env.cu in
  let env = !from_comp_unit cu in
  find_type id env

77
let enter_value id t env = 
78
  { env with ids = Env.add id (Val t) env.ids }
79
80
let enter_values l env =
  { env with ids = 
81
      List.fold_left (fun accu (id,t) -> Env.add id (Val t) accu) env.ids l }
82
83
let find_value id env =
  match Env.find id env.ids with
84
    | Val t -> t
85
    | _ -> raise Not_found
86
87
88
let find_value_global cu id env =
  let env = !from_comp_unit cu in
  find_value id env
89
	
90
91
92
93
94
95
let value_name_ok id env =
  try match Env.find id env.ids with
    | Val t -> true
    | _ -> false
  with Not_found -> true

96
97
98
99
let iter_values env f =
  Env.iter (fun x ->
	      function Val t -> f x t;
		| _ -> ()) env.ids
100

101
102
103
104

let enter_cu x cu env =
  { env with cu = Env.add (ident x) cu env.cu }

105
(* Namespaces *)
106

107
let set_ns_table_for_printer env = 
108
  Ns.InternalPrinter.set_table env.ns
109

110
let get_ns_table tenv = tenv.ns
111

112
let enter_ns p ns env =
113
  { env with ns = Ns.add_prefix p ns env.ns }
114

115
116
117
118
119
let protect_error_ns loc f x =
  try f x
  with Ns.UnknownPrefix ns ->
    raise_loc_generic loc 
    ("Undefined namespace prefix " ^ (U.to_string ns))
120

121
let parse_atom env loc t =
122
  let (ns,l) = protect_error_ns loc (Ns.map_tag env.ns) t in
123
124
125
  Atoms.V.mk ns l
 
let parse_ns env loc ns =
126
  protect_error_ns loc (Ns.map_prefix env.ns) ns
127

128
let parse_label env loc t =
129
  let (ns,l) = protect_error_ns loc (Ns.map_attr env.ns) t in
130
  LabelPool.mk (ns,l)
131

132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
let parse_record env loc f r =
  let r = List.map (fun (l,x) -> (parse_label env loc l, f x)) r in
  LabelMap.from_list (fun _ _ -> raise_loc_generic loc "Duplicated record field") r

let rec const env loc = function
  | LocatedExpr (loc,e) -> const env loc e
  | Pair (x,y) -> Types.Pair (const env loc x, const env loc y)
  | Xml (x,y) -> Types.Xml (const env loc x, const env loc y)
  | RecordLitt x -> Types.Record (parse_record env loc (const env loc) x)
  | String (i,j,s,c) -> Types.String (i,j,s,const env loc c)
  | Atom t -> Types.Atom (parse_atom env loc t)
  | Integer i -> Types.Integer i
  | Char c -> Types.Char c
  | _ -> raise_loc_generic loc "This should be a scalar or structured constant"

(* I. Transform the abstract syntax of types and patterns into
      the internal form *)
149

150
exception NonExhaustive of Types.descr
151
exception Constraint of Types.descr * Types.descr
152
exception ShouldHave of Types.descr * string
153
exception ShouldHave2 of Types.descr * string * Types.descr
154
exception WrongLabel of Types.descr * label
155
exception UnboundId of id * bool
156
exception Error of string
157

158
159
let raise_loc loc exn = raise (Location (loc,`Full,exn))
let raise_loc_str loc ofs exn = raise (Location (loc,`Char ofs,exn))
160
let error loc msg = raise_loc loc (Error msg)
161

162
163
164
  (* Schema datastructures *)

module StringSet = Set.Make (String)
165
166
167

  (* just to remember imported schemas *)
let schemas = State.ref "Typer.schemas" StringSet.empty
168
169
170

let schema_types = State.ref "Typer.schema_types" (Hashtbl.create 51)
let schema_elements = State.ref "Typer.schema_elements" (Hashtbl.create 51)
171
let schema_attributes = State.ref "Typer.schema_attributes" (Hashtbl.create 51)
172

173
174
175
176
177
178
179
180
(* Eliminate Recursion, propagate Sequence Capture Variables *)

let rec seq_vars accu = function
  | Epsilon | Elem _ -> accu
  | Seq (r1,r2) | Alt (r1,r2) -> seq_vars (seq_vars accu r1) r2
  | Star r | WeakStar r -> seq_vars accu r
  | SeqCapture (v,r) -> seq_vars (IdSet.add v accu) r

181
182
183
184
185
186
187
188
189
190
191
192
193
194
(* We use two intermediate representation from AST types/patterns
   to internal ones:

      AST -(1)-> derecurs -(2)-> slot -(3)-> internal

   (1) eliminate recursion, schema, 
       propagate sequence capture variables, keep regexps

   (2) stratify, detect ill-formed recursion, compile regexps

   (3) check additional constraints on types / patterns;
       deep (recursive) hash-consing
*)     

195
196
197
198
type derecurs_slot = {
  ploc : Location.loc;
  pid  : int;
  mutable ploop : bool;
199
  mutable pdescr : derecurs;
200
} and derecurs =
201
  | PDummy
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
  | PAlias of derecurs_slot
  | PType of Types.descr
  | POr of derecurs * derecurs
  | PAnd of derecurs * derecurs
  | PDiff of derecurs * derecurs
  | PTimes of derecurs * derecurs
  | PXml of derecurs * derecurs
  | PArrow of derecurs * derecurs
  | POptional of derecurs
  | PRecord of bool * derecurs label_map
  | PCapture of id
  | PConstant of id * Types.const
  | PRegexp of derecurs_regexp * derecurs
and derecurs_regexp =
  | PEpsilon
  | PElem of derecurs
  | PSeq of derecurs_regexp * derecurs_regexp
  | PAlt of derecurs_regexp * derecurs_regexp
  | PStar of derecurs_regexp
  | PWeakStar of derecurs_regexp

223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242

type descr = 
  | IDummy
  | IType of Types.descr
  | IOr of descr * descr
  | IAnd of descr * descr
  | IDiff of descr * descr
  | ITimes of slot * slot
  | IXml of slot * slot
  | IArrow of slot * slot
  | IOptional of descr
  | IRecord of bool * slot label_map
  | ICapture of id
  | IConstant of id * Types.const
and slot = {
  mutable fv : fv option;
  mutable hash : int option;
  mutable rank1: int; mutable rank2: int;
  mutable gen1 : int; mutable gen2: int;
  mutable d    : descr;
243
}
244
245
246
247
248
249
250
251
252
253
254
255
256
    

let counter = ref 0
let mk_derecurs_slot loc = 
  incr counter; 
  { ploop = false; ploc = loc; pid = !counter; pdescr = PDummy }
	  
let mk_slot () = 
  { d=IDummy; fv=None; hash=None; rank1=0; rank2=0; gen1=0; gen2=0 } 


(* This environment is used in phase (1) to eliminate recursion *)
type penv = {
257
  penv_tenv : t;
258
259
260
261
  penv_derec : derecurs_slot Env.t;
}

let penv tenv = { penv_tenv = tenv; penv_derec = Env.empty }
262

263
let rec hash_derecurs = function
264
  | PDummy -> assert false
265
266
267
  | PAlias s -> 
      s.pid
  | PType t -> 
268
      1 + 17 * (Types.hash t)
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
  | POr (p1,p2) -> 
      2 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PAnd (p1,p2) -> 
      3 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PDiff (p1,p2) -> 
      4 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PTimes (p1,p2) -> 
      5 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PXml (p1,p2) -> 
      6 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PArrow (p1,p2) -> 
      7 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | POptional p -> 
      8 + 17 * (hash_derecurs p)
  | PRecord (o,r) -> 
      (if o then 9 else 10) + 17 * (LabelMap.hash hash_derecurs r)
  | PCapture x -> 
      11 + 17 * (Id.hash x)
  | PConstant (x,c) -> 
288
      12 + 17 * (Id.hash x) + 257 * (Types.Const.hash c)
289
290
  | PRegexp (p,q) -> 
      13 + 17 * (hash_derecurs_regexp p) + 257 * (hash_derecurs q)
291
and hash_derecurs_regexp = function
292
293
294
295
296
297
298
299
300
301
302
303
  | PEpsilon -> 
      1
  | PElem p -> 
      2 + 17 * (hash_derecurs p)
  | PSeq (p1,p2) -> 
      3 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PAlt (p1,p2) -> 
      4 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PStar p -> 
      5 + 17 * (hash_derecurs_regexp p)
  | PWeakStar p -> 
      6 + 17 * (hash_derecurs_regexp p)
304
305

let rec equal_derecurs p1 p2 = (p1 == p2) || match p1,p2 with
306
307
308
  | PAlias s1, PAlias s2 -> 
      s1 == s2
  | PType t1, PType t2 -> 
309
      Types.equal t1 t2
310
311
312
313
314
  | POr (p1,q1), POr (p2,q2)
  | PAnd (p1,q1), PAnd (p2,q2)
  | PDiff (p1,q1), PDiff (p2,q2)
  | PTimes (p1,q1), PTimes (p2,q2)
  | PXml (p1,q1), PXml (p2,q2)
315
316
317
318
319
320
321
322
323
  | PArrow (p1,q1), PArrow (p2,q2) -> 
      (equal_derecurs p1 p2) && (equal_derecurs q1 q2)
  | POptional p1, POptional p2 -> 
      equal_derecurs p1 p2
  | PRecord (o1,r1), PRecord (o2,r2) -> 
      (o1 == o2) && (LabelMap.equal equal_derecurs r1 r2)
  | PCapture x1, PCapture x2 -> 
      Id.equal x1 x2
  | PConstant (x1,c1), PConstant (x2,c2) -> 
324
      (Id.equal x1 x2) && (Types.Const.equal c1 c2)
325
326
  | PRegexp (p1,q1), PRegexp (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
327
328
  | _ -> false
and equal_derecurs_regexp r1 r2 = match r1,r2 with
329
330
331
332
  | PEpsilon, PEpsilon -> 
      true
  | PElem p1, PElem p2 -> 
      equal_derecurs p1 p2
333
  | PSeq (p1,q1), PSeq (p2,q2) 
334
335
  | PAlt (p1,q1), PAlt (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs_regexp q1 q2)
336
  | PStar p1, PStar p2
337
338
  | PWeakStar p1, PWeakStar p2 -> 
      equal_derecurs_regexp p1 p2
339
  | _ -> false
340

341
342
343
344
345
346
347
348
349
350
351
module DerecursTable = Hashtbl.Make(
  struct 
    type t = derecurs 
    let hash = hash_derecurs
    let equal = equal_derecurs
  end
)

module RE = Hashtbl.Make(
  struct 
    type t = derecurs_regexp * derecurs 
352
353
354
355
    let hash (p,q) = 
      (hash_derecurs_regexp p) + 17 * (hash_derecurs q)
    let equal (p1,q1) (p2,q2) = 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
356
357
  end
)
358

359
360
361
362
let gen = ref 0
let rank = ref 0
	     
let rec hash_descr = function
363
  | IDummy -> assert false
364
  | IType x -> Types.hash x
365
366
367
368
369
370
371
372
373
  | IOr (d1,d2) -> 1 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IAnd (d1,d2) -> 2 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IDiff (d1,d2) -> 3 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IOptional d -> 4 + 17 * (hash_descr d)
  | ITimes (s1,s2) -> 5 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IXml (s1,s2) -> 6 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IArrow (s1,s2) -> 7 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IRecord (o,r) -> (if o then 8 else 9) + 17 * (LabelMap.hash hash_slot r)
  | ICapture x -> 10 + 17 * (Id.hash x)
374
  | IConstant (x,y) -> 11 + 17 * (Id.hash x) + 257 * (Types.Const.hash y)
375
376
377
378
379
and hash_slot s =
  if s.gen1 = !gen then 13 * s.rank1
  else (
    incr rank;
    s.rank1 <- !rank; s.gen1 <- !gen;
380
    hash_descr s.d
381
382
383
384
  )
    
let rec equal_descr d1 d2 = 
  match (d1,d2) with
385
  | IType x1, IType x2 -> Types.equal x1 x2
386
387
388
389
390
391
392
  | IOr (x1,y1), IOr (x2,y2) 
  | IAnd (x1,y1), IAnd (x2,y2) 
  | IDiff (x1,y1), IDiff (x2,y2) -> (equal_descr x1 x2) && (equal_descr y1 y2)
  | IOptional x1, IOptional x2 -> equal_descr x1 x2
  | ITimes (x1,y1), ITimes (x2,y2) 
  | IXml (x1,y1), IXml (x2,y2) 
  | IArrow (x1,y1), IArrow (x2,y2) -> (equal_slot x1 x2) && (equal_slot y1 y2)
393
394
  | IRecord (o1,r1), IRecord (o2,r2) -> 
      (o1 = o2) && (LabelMap.equal equal_slot r1 r2)
395
  | ICapture x1, ICapture x2 -> Id.equal x1 x2
396
  | IConstant (x1,y1), IConstant (x2,y2) -> 
397
      (Id.equal x1 x2) && (Types.Const.equal y1 y2)
398
399
400
401
402
403
404
405
  | _ -> false
and equal_slot s1 s2 =
  ((s1.gen1 = !gen) && (s2.gen2 = !gen) && (s1.rank1 = s2.rank2))
  ||
  ((s1.gen1 <> !gen) && (s2.gen2 <> !gen) && (
     incr rank;
     s1.rank1 <- !rank; s1.gen1 <- !gen;
     s2.rank2 <- !rank; s2.gen2 <- !gen;
406
     equal_descr s1.d s2.d
407
408
   ))
  
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
module SlotTable = Hashtbl.Make(
  struct
    type t = slot
	
    let hash s =
      match s.hash with
	| Some h -> h
	| None ->
	    incr gen; rank := 0; 
	    let h = hash_slot s in
	    s.hash <- Some h;
	    h
	      
    let equal s1 s2 = 
      (s1 == s2) || 
      (incr gen; rank := 0; 
       let e = equal_slot s1 s2 in
       (*     if e then Printf.eprintf "Recursive hash-consing: Equal\n";  *)
       e)
  end)


let rec derecurs env p = match p.descr with
  | PatVar v ->
433
434
435
436
437
438
439
440
441
442
443
444
445
      (match Ns.split_qname v with
	 | "", v ->
	     let v = ident v in
	     (try PAlias (Env.find v env.penv_derec)
	      with Not_found -> 
		try PType (find_type v env.penv_tenv)
		with Not_found -> PCapture v)
	 | cu, v -> 
	     try 
	       let cu = ident (U.mk cu) in
	       PType (find_type_global cu (ident v) env.penv_tenv)
	     with Not_found ->
	       failwith ("Unbound external type " ^ cu ^ ":" ^ (U.to_string v)))
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
  | SchemaVar (kind, schema, item) ->
      PType (derecurs_schema env kind schema item)
  | Recurs (p,b) -> derecurs (derecurs_def env b) p
  | Internal t -> PType t
  | NsT ns -> PType (Types.atom (Atoms.any_in_ns (parse_ns env.penv_tenv p.loc ns)))
  | Or (p1,p2) -> POr (derecurs env p1, derecurs env p2)
  | And (p1,p2) -> PAnd (derecurs env p1, derecurs env p2)
  | Diff (p1,p2) -> PDiff (derecurs env p1, derecurs env p2)
  | Prod (p1,p2) -> PTimes (derecurs env p1, derecurs env p2)
  | XmlT (p1,p2) -> PXml (derecurs env p1, derecurs env p2)
  | Arrow (p1,p2) -> PArrow (derecurs env p1, derecurs env p2)
  | Optional p -> POptional (derecurs env p)
  | Record (o,r) -> PRecord (o, parse_record env.penv_tenv p.loc (derecurs env) r)
  | Constant (x,c) -> PConstant (x,const env.penv_tenv p.loc c)
  | Cst c -> PType (Types.constant (const env.penv_tenv p.loc c))
  | Regexp (r,q) -> 
      let constant_nil t v = 
	PAnd (t, PConstant (v, Types.Atom Sequence.nil_atom)) in
      let vars = seq_vars IdSet.empty r in
      let q = IdSet.fold constant_nil (derecurs env q) vars in
      let r = derecurs_regexp (fun p -> p) env r in
      PRegexp (r, q)
and derecurs_regexp vars env = function
  | Epsilon -> 
      PEpsilon
  | Elem p -> 
      PElem (vars (derecurs env p))
  | Seq (p1,p2) -> 
      PSeq (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Alt (p1,p2) -> 
      PAlt (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Star p -> 
      PStar (derecurs_regexp vars env p)
  | WeakStar p -> 
      PWeakStar (derecurs_regexp vars env p)
  | SeqCapture (x,p) -> 
      derecurs_regexp (fun p -> PAnd (vars p, PCapture x)) env p


and derecurs_def env b =
  let b = List.map (fun (v,p) -> (v,p,mk_derecurs_slot p.loc)) b in
  let n = 
    List.fold_left (fun env (v,p,s) -> Env.add v s env) env.penv_derec b in
  let env = { env with penv_derec = n } in
  List.iter (fun (v,p,s) -> s.pdescr <- derecurs env p) b;
  env

and derecurs_schema env kind schema item =
  let elt () = fst (Hashtbl.find !schema_elements (schema, item)) in
  let typ () = Hashtbl.find !schema_types (schema, item) in
  let att () = Hashtbl.find !schema_attributes (schema, item) in
  let rec do_try n = function
    | [] -> 
	let s = Printf.sprintf 
		  "No %s named '%s' found in schema '%s'" n item schema in
	failwith s
    | f :: rem -> (try f () with Not_found -> do_try n rem)  in
  match kind with
    | `Element -> do_try "element" [ elt ]
    | `Type -> do_try "type" [ typ ]
    | `Attribute -> do_try "atttribute" [ att ]
    | `Any -> do_try "item" [ elt; typ; att ]

    
510
511
512
513
514
let rec fv_slot s =
  match s.fv with
    | Some x -> x
    | None ->
	if s.gen1 = !gen then IdSet.empty 
515
	else (s.gen1 <- !gen; fv_descr s.d)
516
and fv_descr = function
517
  | IDummy -> assert false
518
  | IType _ -> IdSet.empty
519
520
521
522
523
524
525
  | IOr (d1,d2)
  | IAnd (d1,d2)  
  | IDiff (d1,d2) -> IdSet.cup (fv_descr d1) (fv_descr d2)
  | IOptional d -> fv_descr d
  | ITimes (s1,s2)  
  | IXml (s1,s2)  
  | IArrow (s1,s2) -> IdSet.cup (fv_slot s1) (fv_slot s2)
526
527
  | IRecord (o,r) -> 
      List.fold_left IdSet.cup IdSet.empty (LabelMap.map_to_list fv_slot r)
528
  | ICapture x | IConstant (x,_) -> IdSet.singleton x
529

530
531
532
533
534
535
536
537
let compute_fv s =
  match s.fv with
    | Some x -> ()
    | None ->
	incr gen;
	let x = fv_slot s in
	s.fv <- Some x
	  
538
539
540
let check_no_capture loc s =
  match IdSet.pick s with
    | Some x ->  
541
	raise_loc_generic loc ("Capture variable not allowed: " ^ (Ident.to_string x))
542
    | None -> ()
543
    
544
545
546
let compile_slot_hash = DerecursTable.create 67
let compile_hash = DerecursTable.create 67

547
548
let todo_defs = ref []
let todo_fv = ref []
549
550
551
552
553
554
555
556

let rec compile p =
  try DerecursTable.find compile_hash p
  with Not_found ->
    let c = real_compile p in
    DerecursTable.replace compile_hash p c;
    c
and real_compile = function
557
  | PDummy -> assert false
558
559
560
561
  | PAlias v ->
      if v.ploop then
	raise_loc_generic v.ploc ("Unguarded recursion on type/pattern");
      v.ploop <- true;
562
      let r = compile v.pdescr in
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
      v.ploop <- false;
      r
  | PType t -> IType t
  | POr (t1,t2) -> IOr (compile t1, compile t2)
  | PAnd (t1,t2) -> IAnd (compile t1, compile t2)
  | PDiff (t1,t2) -> IDiff (compile t1, compile t2)
  | PTimes (t1,t2) -> ITimes (compile_slot t1, compile_slot t2)
  | PXml (t1,t2) -> IXml (compile_slot t1, compile_slot t2)
  | PArrow (t1,t2) -> IArrow (compile_slot t1, compile_slot t2)
  | POptional t -> IOptional (compile t)
  | PRecord (o,r) ->  IRecord (o, LabelMap.map compile_slot r)
  | PConstant (x,v) -> IConstant (x,v)
  | PCapture x -> ICapture x
  | PRegexp (r,q) -> compile_regexp r q
and compile_regexp r q =
  let memo = RE.create 17 in
579
580
581
  let add accu i = 
    match accu with None -> Some i | Some j -> Some (IOr (j,i)) in
  let get = function Some x -> x | None -> assert false in
582
  let rec queue accu = function
583
584
585
    | PRegexp (r,q) -> aux accu r q 
    | _ -> add accu (compile q)
  and aux accu r q =
586
587
588
589
    if RE.mem memo (r,q) then accu
    else (
      RE.add memo (r,q) ();
      match r with
590
	| PEpsilon -> queue accu q
591
592
593
594
595
596
597
598
599
600
601
	| PElem p ->
(* Be careful not to create pairs with same second component *)
	    let rec extract = function
	      | PConstant (x,v) -> `Const (x,v)
	      | POr (x,y) ->
		  (match extract x, extract y with
		    | `Pat x, `Pat y -> `Pat (POr (x,y))
		    | x, y -> `Or (x,y))
	      | p -> `Pat p
	    in
	    let rec mk accu = function
602
603
604
605
	      | `Const (x,v) -> 
		  (match queue None q with 
		    | Some q -> add accu (IAnd (IConstant (x,v), q))
		    | None -> accu)
606
	      | `Or (x,y) -> mk (mk accu x) y
607
608
	      | `Pat p -> 
		  add accu (ITimes (compile_slot p, compile_slot q))
609
610
	    in
	    mk accu (extract p)
611
612
613
614
615
616
	| PSeq (r1,r2) -> aux accu r1 (PRegexp (r2,q))
	| PAlt (r1,r2) -> aux (aux accu r1 q) r2 q
	| PStar r1 -> aux (aux accu r1 (PRegexp (r,q))) PEpsilon q
	| PWeakStar r1 -> aux (aux accu PEpsilon q) r1 (PRegexp (r,q))
    )
  in
617
  get (aux None r q)
618
619
and compile_slot p =
  try DerecursTable.find compile_slot_hash p
620
  with Not_found ->
621
622
623
    let s = mk_slot () in
    todo_defs := (s,p) :: !todo_defs;
    todo_fv := s :: !todo_fv;
624
    DerecursTable.add compile_slot_hash p s;
625
    s
626

627
      
628
let timer_fv = Stats.Timer.create "Typer.fv"
629
let rec flush_defs () = 
630
631
632
633
  match !todo_defs with
    | [] -> 
	Stats.Timer.start timer_fv;
	List.iter compute_fv !todo_fv;
634
635
	todo_fv := [];
	Stats.Timer.stop timer_fv ()
636
637
638
639
    | (s,p)::t -> 
	todo_defs := t; 
	s.d <- compile p; 
	flush_defs ()
640
641
642
643
644
645
646
647
648
649
650
651
652
653
	
let typ_nodes = SlotTable.create 67
let pat_nodes = SlotTable.create 67
		  
let rec typ = function
  | IType t -> t
  | IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
  | IAnd (s1,s2) ->  Types.cap (typ s1) (typ s2)
  | IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
  | ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
  | IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
  | IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
  | IOptional s -> Types.Record.or_absent (typ s)
  | IRecord (o,r) -> Types.record' (o, LabelMap.map typ_node r)
654
  | IDummy | ICapture _ | IConstant (_,_) -> assert false
655
      
656
and typ_node s : Types.Node.t =
657
658
659
660
  try SlotTable.find typ_nodes s
  with Not_found ->
    let x = Types.make () in
    SlotTable.add typ_nodes s x;
661
    Types.define x (typ s.d);
662
663
664
665
666
667
668
669
    x
      
let rec pat d : Patterns.descr =
  if IdSet.is_empty (fv_descr d)
  then Patterns.constr (typ d)
  else pat_aux d
    
and pat_aux = function
670
  | IDummy -> assert false
671
672
673
674
675
676
  | IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
  | IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
  | IDiff (s1,s2) when IdSet.is_empty (fv_descr s2) ->
      let s2 = Types.neg (typ s2) in
      Patterns.cap (pat s1) (Patterns.constr s2)
  | IDiff _ ->
677
      raise (Patterns.Error "Differences are not allowed in patterns")
678
679
680
  | ITimes (s1,s2) -> Patterns.times (pat_node s1) (pat_node s2)
  | IXml (s1,s2) -> Patterns.xml (pat_node s1) (pat_node s2)
  | IOptional _ -> 
681
      raise (Patterns.Error "Optional fields are not allowed in record patterns")
682
683
684
685
686
687
688
689
690
691
692
693
694
695
  | IRecord (o,r) ->
      let pats = ref [] in
      let aux l s = 
	if IdSet.is_empty (fv_slot s) then typ_node s
	else
	  ( pats := Patterns.record l (pat_node s) :: !pats;
	    Types.any_node )
      in
      let constr = Types.record' (o,LabelMap.mapi aux r) in
      List.fold_left Patterns.cap (Patterns.constr constr) !pats
	(* TODO: can avoid constr when o=true, and all fields have fv *)
  | ICapture x -> Patterns.capture x
  | IConstant (x,c) -> Patterns.constant x c
  | IArrow _ ->
696
      raise (Patterns.Error "Arrows are not allowed in patterns")
697
698
699
700
701
702
  | IType _ -> assert false
      
and pat_node s : Patterns.node =
  try SlotTable.find pat_nodes s
  with Not_found ->
    let x = Patterns.make (fv_slot s) in
703
704
    try
      SlotTable.add pat_nodes s x;
705
      Patterns.define x (pat s.d);
706
707
708
      x
    with exn -> SlotTable.remove pat_nodes s; raise exn
      (* For the toplevel ... *)
709

710

711
let type_defs env b =
712
713
  List.iter 
    (fun (v,p) ->
714
715
       if Env.mem v env.ids
       then raise_loc_generic p.loc ("Identifier " ^ (Ident.to_string v) ^ " is already bound")
716
    ) b;
717
718
  let penv = derecurs_def (penv env) b in
  let b = List.map (fun (v,p) -> (v,p,compile (derecurs penv p))) b in
719
720
721
722
  flush_defs ();
  let b = 
    List.map 
      (fun (v,p,s) -> 
723
	 check_no_capture p.loc (fv_descr s);
724
725
726
	 let t = typ s in
	 if (p.loc <> noloc) && (Types.is_empty t) then
	   warning p.loc 
727
	     ("This definition yields an empty type for " ^ (Ident.to_string v));
728
	 (v,t)) b in
729
  List.iter (fun (v,t) -> Types.Print.register_global (Id.value v) t) b;
730
  b
731
732


733
734
735
736
737
let dump_types ppf env =
  Env.iter (fun v -> 
	      function 
		  (Type _) -> Format.fprintf ppf " %a" Ident.print v
		| _ -> ()) env.ids
738

739
let dump_ns ppf env =
740
  Ns.dump_table ppf env.ns
741

742

743
744
let do_typ loc r = 
  let s = compile_slot r in
745
  flush_defs ();
746
747
  check_no_capture loc (fv_slot s);
  typ_node s
748
   
749
750
let typ env p =
  do_typ p.loc (derecurs (penv env) p)
751
    
752
753
let pat env p = 
  let s = compile_slot (derecurs (penv env) p) in
754
755
756
  flush_defs ();
  try pat_node s
  with Patterns.Error e -> raise_loc_generic p.loc e
757
    | Location (loc,_,exn) when loc = noloc -> raise (Location (p.loc, `Full, exn))
758
759


760
761
(* II. Build skeleton *)

762

763
764
765
766
767
type type_fun = Types.t -> bool -> Types.t
let mk_unary_op = ref (fun _ _ -> assert false)
let typ_unary_op = ref (fun _ _ _ -> assert false)
let mk_binary_op = ref (fun _ _ -> assert false)
let typ_binary_op = ref (fun _ _ _ _ -> assert false)
768
769


770
module Fv = IdSet
771

772
773
774
type branch = Branch of Typed.branch * branch list

let cur_branch : branch list ref = ref []
775

776
let exp loc fv e =
777
778
  fv,
  { Typed.exp_loc = loc;
779
    Typed.exp_typ = Types.empty;
780
    Typed.exp_descr = e;
781
  }
782
783


784
785
let rec expr env loc = function
  | LocatedExpr (loc,e) -> expr env loc e
786
  | Forget (e,t) ->
787
      let (fv,e) = expr env loc e and t = typ env t in
788
789
      exp loc fv (Typed.Forget (e,t))
  | Var s -> 
790
791
792
793
794
795
      (match Ns.split_qname s with
	| "", id -> let id = ident id in
	  exp loc (Fv.singleton id) (Typed.Var id)
	| cu, id -> 
	    let cu = Env.find (ident (U.mk cu)) env.cu in
	    exp loc Fv.empty (Typed.ExtVar (cu, ident id)))
796
  | Apply (e1,e2) -> 
797
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
798
799
      exp loc (Fv.cup fv1 fv2) (Typed.Apply (e1,e2))
  | Abstraction a ->
800
      let iface = List.map (fun (t1,t2) -> (typ env t1, typ env t2)) 
801
802
803
804
805
806
807
		    a.fun_iface in
      let t = List.fold_left 
		(fun accu (t1,t2) -> Types.cap accu (Types.arrow t1 t2)) 
		Types.any iface in
      let iface = List.map 
		    (fun (t1,t2) -> (Types.descr t1, Types.descr t2)) 
		    iface in
808
      let (fv0,body) = branches env a.fun_body in
809
810
811
812
813
814
815
816
817
818
819
      let fv = match a.fun_name with
	| None -> fv0
	| Some f -> Fv.remove f fv0 in
      let e = Typed.Abstraction 
		{ Typed.fun_name = a.fun_name;
		  Typed.fun_iface = iface;
		  Typed.fun_body = body;
		  Typed.fun_typ = t;
		  Typed.fun_fv = fv
		} in
      exp loc fv e
820
  | (Integer _ | Char _ | Atom _) as c -> 
821
      exp loc Fv.empty (Typed.Cst (const env loc c))
822
  | Pair (e1,e2) ->
823
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
824
825
      exp loc (Fv.cup fv1 fv2) (Typed.Pair (e1,e2))
  | Xml (e1,e2) ->
826
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
827
828
      exp loc (Fv.cup fv1 fv2) (Typed.Xml (e1,e2))
  | Dot (e,l) ->
829
830
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.Dot (e,parse_label env loc l))
831
  | RemoveField (e,l) ->
832
833
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.RemoveField (e,parse_label env loc l))
834
835
  | RecordLitt r -> 
      let fv = ref Fv.empty in
836
      let r = parse_record env loc
837
		(fun e -> 
838
		   let (fv2,e) = expr env loc e 
839
840
841
		   in fv := Fv.cup !fv fv2; e)
		r in
      exp loc !fv (Typed.RecordLitt r)
842
  | String (i,j,s,e) ->
843
      let (fv,e) = expr env loc e in
844
      exp loc fv (Typed.String (i,j,s,e))
845
  | Op (op,le) ->
846
      let (fvs,ltes) = List.split (List.map (expr env loc) le) in
847
      let fv = List.fold_left Fv.cup Fv.empty fvs in
848
      (try
849
850
851
	 (match ltes with
	    | [e] -> exp loc fv (Typed.UnaryOp (!mk_unary_op op env, e))
	    | [e1;e2] -> exp loc fv (Typed.BinaryOp (!mk_binary_op op env, e1,e2))
852
853
854
	    | _ -> assert false)
       with Not_found -> assert false)

855
  | Match (e,b) -> 
856
857
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
858
      exp loc (Fv.cup fv1 fv2) (Typed.Match (e, b))
859
  | Map (e,b) ->
860
861
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
862
863
      exp loc (Fv.cup fv1 fv2) (Typed.Map (e, b))
  | Transform (e,b) ->
864
865
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
866
      exp loc (Fv.cup fv1 fv2) (Typed.Transform (e, b))
867
  | Xtrans (e,b) ->
868
869
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
870
      exp loc (Fv.cup fv1 fv2) (Typed.Xtrans (e, b))
871
  | Validate (e,schema,elt) ->
872
      let (fv,e) = expr env loc e in
873
      exp loc fv (Typed.Validate (e, schema, elt))
874
  | Try (e,b) ->
875
876
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
877
      exp loc (Fv.cup fv1 fv2) (Typed.Try (e, b))
878
  | NamespaceIn (pr,ns,e) ->
879
880
      let env = enter_ns pr ns env in
      expr env loc e
881
  | Ref (e,t) ->
882
      let (fv,e) = expr env loc e and t = typ env t in
883
      exp loc fv (Typed.Ref (e,t))
884
	      
885
  and branches env b = 
886
    let fv = ref Fv.empty in
887
    let accept = ref Types.empty in
888
    let branch (p,e) = 
889
890
      let cur_br = !cur_branch in
      cur_branch := [];
891
      let (fv2,e) = expr env noloc e in
892
      let br_loc = merge_loc p.loc e.Typed.exp_loc in
893
      let p = pat env p in
894
895
896
897
898
899
      (match Fv.pick (Fv.diff (Patterns.fv p) fv2) with
	| None -> ()
	| Some x ->
	    let x = U.to_string (Id.value x) in
	    warning br_loc 
	      ("The capture variable " ^ x ^ 
900
	       " is declared in the pattern but not used in the body of this branch. It might be a misspelled type or name (if not use _ instead)."));
901
902
903
904
905
906
907
908
909
      let fv2 = Fv.diff fv2 (Patterns.fv p) in
      fv := Fv.cup !fv fv2;
      accept := Types.cup !accept (Types.descr (Patterns.accept p));
      let br = 
	{ 
	  Typed.br_loc = br_loc;
	  Typed.br_used = br_loc = noloc;
	  Typed.br_pat = p;
	  Typed.br_body = e } in
910
      cur_branch := Branch (br, !cur_branch) :: cur_br;
911
912
      br in
    let b = List.map branch b in
913
914
915
916
    (!fv, 
     { 
       Typed.br_typ = Types.empty; 
       Typed.br_branches = b; 
917
918
       Typed.br_accept = !accept;
       Typed.br_compiled = None;
919
920
     } 
    )
921

922
let expr env e = snd (expr env noloc e)
923

924
925
let let_decl env p e =
  { Typed.let_pat = pat env p;
926
    Typed.let_body = expr env e;
927
928
    Typed.let_compiled = None }

929
930
931

(* Hide global "typing/parsing" environment *)

932

933
934
(* III. Type-checks *)

935
936
open Typed

937
938
let require loc t s = 
  if not (Types.subtype t s) then raise_loc loc (Constraint (t, s))
939

940
let verify loc t s = 
941
942
  require loc t s; t

943
944
945
946
947
let check_str loc ofs t s = 
  if not (Types.subtype t s) then raise_loc_str loc ofs (Constraint (t, s));
  t

let should_have loc constr s = 
948
949
  raise_loc loc (ShouldHave (constr,s))

950
951
952
let should_have_str loc ofs constr s = 
  raise_loc_str loc ofs (ShouldHave (constr,s))

953
954
955
956
957
958
959
960
961
962
963
let flatten loc arg constr precise =
  let constr' = Sequence.star 
		  (Sequence.approx (Types.cap Sequence.any constr)) in
  let sconstr' = Sequence.star constr' in
  let exact = Types.subtype constr' constr in
  if exact then
    let t = arg sconstr' precise in
    if precise then Sequence.flatten t else constr
  else
    let t = arg sconstr' true in
    Sequence.flatten t
964

965
966
let rec type_check env e constr precise = 
  let d = type_check' e.exp_loc env e.exp_descr constr precise in
967
  let d = if precise then d else constr in
968
969
970
  e.exp_typ <- Types.cup e.exp_typ d;
  d

971
and type_check' loc env e constr precise = match e with
972
973
974
  | Forget (e,t) ->
      let t = Types.descr t in
      ignore (type_check env e t false);
975
      verify loc t constr
976

977
  | Abstraction a ->
978
979
980
      let t =
	try Types.Arrow.check_strenghten a.fun_typ constr 
	with Not_found -> 
981
982
	  should_have loc constr
	    "but the interface of the abstraction is not compatible"
983
      in
984
985
      let env = match a.fun_name with
	| None -> env
986
	| Some f -> enter_value f a.fun_typ env in
987
988
      List.iter 
	(fun (t1,t2) ->
989
990
991
	   let acc = a.fun_body.br_accept in 
	   if not (Types.subtype t1 acc) then
	     raise_loc loc (NonExhaustive (Types.diff t1 acc));
992
	   ignore (type_check_branches loc env t1 a.fun_body t2 false)