typer.ml 40.9 KB
Newer Older
1
(* TODO:
2
 - rewrite type-checking of operators to propagate constraint
3
4
 - optimize computation of pattern free variables
 - check whether it is worth using recursive hash-consing internally
5
6
*)

7
8
9
open Location
open Ast
open Ident
10

11
12
let warning loc msg =
  Format.fprintf !Location.warning_ppf "Warning %a:@\n%a%s@\n" 
13
14
    Location.print_loc (loc,`Full)
    Location.html_hilight (loc,`Full)
15
16
    msg

17
18
19
20
type tenv = {
  tenv_names : Types.t Env.t;
  tenv_nspref: Ns.table;
}
21

22
(* Namespaces *)
23

24
let get_ns_table tenv = tenv.tenv_nspref
25

26
27
28
29
30
let protect_error_ns loc f x =
  try f x
  with Ns.UnknownPrefix ns ->
    raise_loc_generic loc 
    ("Undefined namespace prefix " ^ (U.to_string ns))
31

32
33
34
35
36
37
let parse_atom env loc t =
  let (ns,l) = protect_error_ns loc (Ns.map_tag env.tenv_nspref) t in
  Atoms.V.mk ns l
 
let parse_ns env loc ns =
  protect_error_ns loc (Ns.map_prefix env.tenv_nspref) ns
38

39
40
41
let parse_label env loc t =
  let (ns,l) = protect_error_ns loc (Ns.map_attr env.tenv_nspref) t in
  LabelPool.mk (ns,l)
42

43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
let parse_record env loc f r =
  let r = List.map (fun (l,x) -> (parse_label env loc l, f x)) r in
  LabelMap.from_list (fun _ _ -> raise_loc_generic loc "Duplicated record field") r

let rec const env loc = function
  | LocatedExpr (loc,e) -> const env loc e
  | Pair (x,y) -> Types.Pair (const env loc x, const env loc y)
  | Xml (x,y) -> Types.Xml (const env loc x, const env loc y)
  | RecordLitt x -> Types.Record (parse_record env loc (const env loc) x)
  | String (i,j,s,c) -> Types.String (i,j,s,const env loc c)
  | Atom t -> Types.Atom (parse_atom env loc t)
  | Integer i -> Types.Integer i
  | Char c -> Types.Char c
  | _ -> raise_loc_generic loc "This should be a scalar or structured constant"

(* I. Transform the abstract syntax of types and patterns into
      the internal form *)
60

61
exception NonExhaustive of Types.descr
62
exception Constraint of Types.descr * Types.descr
63
exception ShouldHave of Types.descr * string
64
exception ShouldHave2 of Types.descr * string * Types.descr
65
exception WrongLabel of Types.descr * label
66
exception UnboundId of id * bool
67
exception Error of string
68

69
70
let raise_loc loc exn = raise (Location (loc,`Full,exn))
let raise_loc_str loc ofs exn = raise (Location (loc,`Char ofs,exn))
71
let error loc msg = raise_loc loc (Error msg)
72

73
74
75
  (* Schema datastructures *)

module StringSet = Set.Make (String)
76
77
78

  (* just to remember imported schemas *)
let schemas = State.ref "Typer.schemas" StringSet.empty
79
80
81

let schema_types = State.ref "Typer.schema_types" (Hashtbl.create 51)
let schema_elements = State.ref "Typer.schema_elements" (Hashtbl.create 51)
82
let schema_attributes = State.ref "Typer.schema_attributes" (Hashtbl.create 51)
83

84
85
86
87
88
89
90
91
(* Eliminate Recursion, propagate Sequence Capture Variables *)

let rec seq_vars accu = function
  | Epsilon | Elem _ -> accu
  | Seq (r1,r2) | Alt (r1,r2) -> seq_vars (seq_vars accu r1) r2
  | Star r | WeakStar r -> seq_vars accu r
  | SeqCapture (v,r) -> seq_vars (IdSet.add v accu) r

92
93
94
95
96
97
98
99
100
101
102
103
104
105
(* We use two intermediate representation from AST types/patterns
   to internal ones:

      AST -(1)-> derecurs -(2)-> slot -(3)-> internal

   (1) eliminate recursion, schema, 
       propagate sequence capture variables, keep regexps

   (2) stratify, detect ill-formed recursion, compile regexps

   (3) check additional constraints on types / patterns;
       deep (recursive) hash-consing
*)     

106
107
108
109
type derecurs_slot = {
  ploc : Location.loc;
  pid  : int;
  mutable ploop : bool;
110
  mutable pdescr : derecurs;
111
} and derecurs =
112
  | PDummy
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
  | PAlias of derecurs_slot
  | PType of Types.descr
  | POr of derecurs * derecurs
  | PAnd of derecurs * derecurs
  | PDiff of derecurs * derecurs
  | PTimes of derecurs * derecurs
  | PXml of derecurs * derecurs
  | PArrow of derecurs * derecurs
  | POptional of derecurs
  | PRecord of bool * derecurs label_map
  | PCapture of id
  | PConstant of id * Types.const
  | PRegexp of derecurs_regexp * derecurs
and derecurs_regexp =
  | PEpsilon
  | PElem of derecurs
  | PSeq of derecurs_regexp * derecurs_regexp
  | PAlt of derecurs_regexp * derecurs_regexp
  | PStar of derecurs_regexp
  | PWeakStar of derecurs_regexp

134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153

type descr = 
  | IDummy
  | IType of Types.descr
  | IOr of descr * descr
  | IAnd of descr * descr
  | IDiff of descr * descr
  | ITimes of slot * slot
  | IXml of slot * slot
  | IArrow of slot * slot
  | IOptional of descr
  | IRecord of bool * slot label_map
  | ICapture of id
  | IConstant of id * Types.const
and slot = {
  mutable fv : fv option;
  mutable hash : int option;
  mutable rank1: int; mutable rank2: int;
  mutable gen1 : int; mutable gen2: int;
  mutable d    : descr;
154
}
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
    

let counter = ref 0
let mk_derecurs_slot loc = 
  incr counter; 
  { ploop = false; ploc = loc; pid = !counter; pdescr = PDummy }
	  
let mk_slot () = 
  { d=IDummy; fv=None; hash=None; rank1=0; rank2=0; gen1=0; gen2=0 } 


(* This environment is used in phase (1) to eliminate recursion *)
type penv = {
  penv_tenv : tenv;
  penv_derec : derecurs_slot Env.t;
}

let penv tenv = { penv_tenv = tenv; penv_derec = Env.empty }
173

174
let rec hash_derecurs = function
175
  | PDummy -> assert false
176
177
178
  | PAlias s -> 
      s.pid
  | PType t -> 
179
      1 + 17 * (Types.hash t)
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
  | POr (p1,p2) -> 
      2 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PAnd (p1,p2) -> 
      3 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PDiff (p1,p2) -> 
      4 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PTimes (p1,p2) -> 
      5 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PXml (p1,p2) -> 
      6 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PArrow (p1,p2) -> 
      7 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | POptional p -> 
      8 + 17 * (hash_derecurs p)
  | PRecord (o,r) -> 
      (if o then 9 else 10) + 17 * (LabelMap.hash hash_derecurs r)
  | PCapture x -> 
      11 + 17 * (Id.hash x)
  | PConstant (x,c) -> 
      12 + 17 * (Id.hash x) + 257 * (Types.hash_const c)
  | PRegexp (p,q) -> 
      13 + 17 * (hash_derecurs_regexp p) + 257 * (hash_derecurs q)
202
and hash_derecurs_regexp = function
203
204
205
206
207
208
209
210
211
212
213
214
  | PEpsilon -> 
      1
  | PElem p -> 
      2 + 17 * (hash_derecurs p)
  | PSeq (p1,p2) -> 
      3 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PAlt (p1,p2) -> 
      4 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PStar p -> 
      5 + 17 * (hash_derecurs_regexp p)
  | PWeakStar p -> 
      6 + 17 * (hash_derecurs_regexp p)
215
216

let rec equal_derecurs p1 p2 = (p1 == p2) || match p1,p2 with
217
218
219
  | PAlias s1, PAlias s2 -> 
      s1 == s2
  | PType t1, PType t2 -> 
220
      Types.equal t1 t2
221
222
223
224
225
  | POr (p1,q1), POr (p2,q2)
  | PAnd (p1,q1), PAnd (p2,q2)
  | PDiff (p1,q1), PDiff (p2,q2)
  | PTimes (p1,q1), PTimes (p2,q2)
  | PXml (p1,q1), PXml (p2,q2)
226
227
228
229
230
231
232
233
234
235
236
237
  | PArrow (p1,q1), PArrow (p2,q2) -> 
      (equal_derecurs p1 p2) && (equal_derecurs q1 q2)
  | POptional p1, POptional p2 -> 
      equal_derecurs p1 p2
  | PRecord (o1,r1), PRecord (o2,r2) -> 
      (o1 == o2) && (LabelMap.equal equal_derecurs r1 r2)
  | PCapture x1, PCapture x2 -> 
      Id.equal x1 x2
  | PConstant (x1,c1), PConstant (x2,c2) -> 
      (Id.equal x1 x2) && (Types.equal_const c1 c2)
  | PRegexp (p1,q1), PRegexp (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
238
239
  | _ -> false
and equal_derecurs_regexp r1 r2 = match r1,r2 with
240
241
242
243
  | PEpsilon, PEpsilon -> 
      true
  | PElem p1, PElem p2 -> 
      equal_derecurs p1 p2
244
  | PSeq (p1,q1), PSeq (p2,q2) 
245
246
  | PAlt (p1,q1), PAlt (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs_regexp q1 q2)
247
  | PStar p1, PStar p2
248
249
  | PWeakStar p1, PWeakStar p2 -> 
      equal_derecurs_regexp p1 p2
250
  | _ -> false
251

252
253
254
255
256
257
258
259
260
261
262
module DerecursTable = Hashtbl.Make(
  struct 
    type t = derecurs 
    let hash = hash_derecurs
    let equal = equal_derecurs
  end
)

module RE = Hashtbl.Make(
  struct 
    type t = derecurs_regexp * derecurs 
263
264
265
266
    let hash (p,q) = 
      (hash_derecurs_regexp p) + 17 * (hash_derecurs q)
    let equal (p1,q1) (p2,q2) = 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
267
268
  end
)
269

270
271
272
273
let gen = ref 0
let rank = ref 0
	     
let rec hash_descr = function
274
  | IDummy -> assert false
275
  | IType x -> Types.hash x
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
  | IOr (d1,d2) -> 1 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IAnd (d1,d2) -> 2 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IDiff (d1,d2) -> 3 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IOptional d -> 4 + 17 * (hash_descr d)
  | ITimes (s1,s2) -> 5 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IXml (s1,s2) -> 6 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IArrow (s1,s2) -> 7 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IRecord (o,r) -> (if o then 8 else 9) + 17 * (LabelMap.hash hash_slot r)
  | ICapture x -> 10 + 17 * (Id.hash x)
  | IConstant (x,y) -> 11 + 17 * (Id.hash x) + 257 * (Types.hash_const y)
and hash_slot s =
  if s.gen1 = !gen then 13 * s.rank1
  else (
    incr rank;
    s.rank1 <- !rank; s.gen1 <- !gen;
291
    hash_descr s.d
292
293
294
295
  )
    
let rec equal_descr d1 d2 = 
  match (d1,d2) with
296
  | IType x1, IType x2 -> Types.equal x1 x2
297
298
299
300
301
302
303
  | IOr (x1,y1), IOr (x2,y2) 
  | IAnd (x1,y1), IAnd (x2,y2) 
  | IDiff (x1,y1), IDiff (x2,y2) -> (equal_descr x1 x2) && (equal_descr y1 y2)
  | IOptional x1, IOptional x2 -> equal_descr x1 x2
  | ITimes (x1,y1), ITimes (x2,y2) 
  | IXml (x1,y1), IXml (x2,y2) 
  | IArrow (x1,y1), IArrow (x2,y2) -> (equal_slot x1 x2) && (equal_slot y1 y2)
304
305
  | IRecord (o1,r1), IRecord (o2,r2) -> 
      (o1 = o2) && (LabelMap.equal equal_slot r1 r2)
306
  | ICapture x1, ICapture x2 -> Id.equal x1 x2
307
308
  | IConstant (x1,y1), IConstant (x2,y2) -> 
      (Id.equal x1 x2) && (Types.equal_const y1 y2)
309
310
311
312
313
314
315
316
  | _ -> false
and equal_slot s1 s2 =
  ((s1.gen1 = !gen) && (s2.gen2 = !gen) && (s1.rank1 = s2.rank2))
  ||
  ((s1.gen1 <> !gen) && (s2.gen2 <> !gen) && (
     incr rank;
     s1.rank1 <- !rank; s1.gen1 <- !gen;
     s2.rank2 <- !rank; s2.gen2 <- !gen;
317
     equal_descr s1.d s2.d
318
319
   ))
  
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
module SlotTable = Hashtbl.Make(
  struct
    type t = slot
	
    let hash s =
      match s.hash with
	| Some h -> h
	| None ->
	    incr gen; rank := 0; 
	    let h = hash_slot s in
	    s.hash <- Some h;
	    h
	      
    let equal s1 s2 = 
      (s1 == s2) || 
      (incr gen; rank := 0; 
       let e = equal_slot s1 s2 in
       (*     if e then Printf.eprintf "Recursive hash-consing: Equal\n";  *)
       e)
  end)


let rec derecurs env p = match p.descr with
  | PatVar v ->
      (try PAlias (Env.find v env.penv_derec)
       with Not_found -> 
	 try PType (Env.find v env.penv_tenv.tenv_names)
	 with Not_found -> PCapture v)
  | SchemaVar (kind, schema, item) ->
      PType (derecurs_schema env kind schema item)
  | Recurs (p,b) -> derecurs (derecurs_def env b) p
  | Internal t -> PType t
  | NsT ns -> PType (Types.atom (Atoms.any_in_ns (parse_ns env.penv_tenv p.loc ns)))
  | Or (p1,p2) -> POr (derecurs env p1, derecurs env p2)
  | And (p1,p2) -> PAnd (derecurs env p1, derecurs env p2)
  | Diff (p1,p2) -> PDiff (derecurs env p1, derecurs env p2)
  | Prod (p1,p2) -> PTimes (derecurs env p1, derecurs env p2)
  | XmlT (p1,p2) -> PXml (derecurs env p1, derecurs env p2)
  | Arrow (p1,p2) -> PArrow (derecurs env p1, derecurs env p2)
  | Optional p -> POptional (derecurs env p)
  | Record (o,r) -> PRecord (o, parse_record env.penv_tenv p.loc (derecurs env) r)
  | Constant (x,c) -> PConstant (x,const env.penv_tenv p.loc c)
  | Cst c -> PType (Types.constant (const env.penv_tenv p.loc c))
  | Regexp (r,q) -> 
      let constant_nil t v = 
	PAnd (t, PConstant (v, Types.Atom Sequence.nil_atom)) in
      let vars = seq_vars IdSet.empty r in
      let q = IdSet.fold constant_nil (derecurs env q) vars in
      let r = derecurs_regexp (fun p -> p) env r in
      PRegexp (r, q)
and derecurs_regexp vars env = function
  | Epsilon -> 
      PEpsilon
  | Elem p -> 
      PElem (vars (derecurs env p))
  | Seq (p1,p2) -> 
      PSeq (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Alt (p1,p2) -> 
      PAlt (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Star p -> 
      PStar (derecurs_regexp vars env p)
  | WeakStar p -> 
      PWeakStar (derecurs_regexp vars env p)
  | SeqCapture (x,p) -> 
      derecurs_regexp (fun p -> PAnd (vars p, PCapture x)) env p


and derecurs_def env b =
  let b = List.map (fun (v,p) -> (v,p,mk_derecurs_slot p.loc)) b in
  let n = 
    List.fold_left (fun env (v,p,s) -> Env.add v s env) env.penv_derec b in
  let env = { env with penv_derec = n } in
  List.iter (fun (v,p,s) -> s.pdescr <- derecurs env p) b;
  env

and derecurs_schema env kind schema item =
  let elt () = fst (Hashtbl.find !schema_elements (schema, item)) in
  let typ () = Hashtbl.find !schema_types (schema, item) in
  let att () = Hashtbl.find !schema_attributes (schema, item) in
  let rec do_try n = function
    | [] -> 
	let s = Printf.sprintf 
		  "No %s named '%s' found in schema '%s'" n item schema in
	failwith s
    | f :: rem -> (try f () with Not_found -> do_try n rem)  in
  match kind with
    | `Element -> do_try "element" [ elt ]
    | `Type -> do_try "type" [ typ ]
    | `Attribute -> do_try "atttribute" [ att ]
    | `Any -> do_try "item" [ elt; typ; att ]

    
412
413
414
415
416
let rec fv_slot s =
  match s.fv with
    | Some x -> x
    | None ->
	if s.gen1 = !gen then IdSet.empty 
417
	else (s.gen1 <- !gen; fv_descr s.d)
418
and fv_descr = function
419
  | IDummy -> assert false
420
  | IType _ -> IdSet.empty
421
422
423
424
425
426
427
  | IOr (d1,d2)
  | IAnd (d1,d2)  
  | IDiff (d1,d2) -> IdSet.cup (fv_descr d1) (fv_descr d2)
  | IOptional d -> fv_descr d
  | ITimes (s1,s2)  
  | IXml (s1,s2)  
  | IArrow (s1,s2) -> IdSet.cup (fv_slot s1) (fv_slot s2)
428
429
  | IRecord (o,r) -> 
      List.fold_left IdSet.cup IdSet.empty (LabelMap.map_to_list fv_slot r)
430
  | ICapture x | IConstant (x,_) -> IdSet.singleton x
431

432
433
434
435
436
437
438
439
let compute_fv s =
  match s.fv with
    | Some x -> ()
    | None ->
	incr gen;
	let x = fv_slot s in
	s.fv <- Some x
	  
440
441
442
let check_no_capture loc s =
  match IdSet.pick s with
    | Some x ->  
443
	raise_loc_generic loc ("Unbound type name " ^ (Ident.to_string x))
444
    | None -> ()
445
    
446
447
448
let compile_slot_hash = DerecursTable.create 67
let compile_hash = DerecursTable.create 67

449
450
let todo_defs = ref []
let todo_fv = ref []
451
452
453
454
455
456
457
458

let rec compile p =
  try DerecursTable.find compile_hash p
  with Not_found ->
    let c = real_compile p in
    DerecursTable.replace compile_hash p c;
    c
and real_compile = function
459
  | PDummy -> assert false
460
461
462
463
  | PAlias v ->
      if v.ploop then
	raise_loc_generic v.ploc ("Unguarded recursion on type/pattern");
      v.ploop <- true;
464
      let r = compile v.pdescr in
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
      v.ploop <- false;
      r
  | PType t -> IType t
  | POr (t1,t2) -> IOr (compile t1, compile t2)
  | PAnd (t1,t2) -> IAnd (compile t1, compile t2)
  | PDiff (t1,t2) -> IDiff (compile t1, compile t2)
  | PTimes (t1,t2) -> ITimes (compile_slot t1, compile_slot t2)
  | PXml (t1,t2) -> IXml (compile_slot t1, compile_slot t2)
  | PArrow (t1,t2) -> IArrow (compile_slot t1, compile_slot t2)
  | POptional t -> IOptional (compile t)
  | PRecord (o,r) ->  IRecord (o, LabelMap.map compile_slot r)
  | PConstant (x,v) -> IConstant (x,v)
  | PCapture x -> ICapture x
  | PRegexp (r,q) -> compile_regexp r q
and compile_regexp r q =
  let memo = RE.create 17 in
  let rec aux accu r q =
    if RE.mem memo (r,q) then accu
    else (
      RE.add memo (r,q) ();
      match r with
486
	| PEpsilon ->
487
488
489
	    (match q with 
	       | PRegexp (r,q) -> aux accu r q 
	       | _ -> (compile q) :: accu)
490
491
492
493
494
495
496
497
498
499
500
501
502
	| PElem p -> ITimes (compile_slot p, compile_slot q) :: accu
	| PSeq (r1,r2) -> aux accu r1 (PRegexp (r2,q))
	| PAlt (r1,r2) -> aux (aux accu r1 q) r2 q
	| PStar r1 -> aux (aux accu r1 (PRegexp (r,q))) PEpsilon q
	| PWeakStar r1 -> aux (aux accu PEpsilon q) r1 (PRegexp (r,q))
    )
  in
  let accu = aux [] r q in
  match accu with
    | [] -> assert false
    | p::l -> List.fold_left (fun acc p -> IOr (p,acc)) p l
and compile_slot p =
  try DerecursTable.find compile_slot_hash p
503
  with Not_found ->
504
505
506
    let s = mk_slot () in
    todo_defs := (s,p) :: !todo_defs;
    todo_fv := s :: !todo_fv;
507
    DerecursTable.add compile_slot_hash p s;
508
    s
509

510
      
511
let timer_fv = Stats.Timer.create "Typer.fv"
512
let rec flush_defs () = 
513
514
515
516
517
518
519
520
521
522
  match !todo_defs with
    | [] -> 
	Stats.Timer.start timer_fv;
	List.iter compute_fv !todo_fv;
	Stats.Timer.stop timer_fv;
	todo_fv := []
    | (s,p)::t -> 
	todo_defs := t; 
	s.d <- compile p; 
	flush_defs ()
523
524
525
526
527
528
529
530
531
532
533
534
535
536
	
let typ_nodes = SlotTable.create 67
let pat_nodes = SlotTable.create 67
		  
let rec typ = function
  | IType t -> t
  | IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
  | IAnd (s1,s2) ->  Types.cap (typ s1) (typ s2)
  | IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
  | ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
  | IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
  | IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
  | IOptional s -> Types.Record.or_absent (typ s)
  | IRecord (o,r) -> Types.record' (o, LabelMap.map typ_node r)
537
  | IDummy | ICapture _ | IConstant (_,_) -> assert false
538
      
539
and typ_node s : Types.Node.t =
540
541
542
543
  try SlotTable.find typ_nodes s
  with Not_found ->
    let x = Types.make () in
    SlotTable.add typ_nodes s x;
544
    Types.define x (typ s.d);
545
546
547
548
549
550
551
552
    x
      
let rec pat d : Patterns.descr =
  if IdSet.is_empty (fv_descr d)
  then Patterns.constr (typ d)
  else pat_aux d
    
and pat_aux = function
553
  | IDummy -> assert false
554
555
556
557
558
559
  | IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
  | IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
  | IDiff (s1,s2) when IdSet.is_empty (fv_descr s2) ->
      let s2 = Types.neg (typ s2) in
      Patterns.cap (pat s1) (Patterns.constr s2)
  | IDiff _ ->
560
      raise (Patterns.Error "Differences are not allowed in patterns")
561
562
563
  | ITimes (s1,s2) -> Patterns.times (pat_node s1) (pat_node s2)
  | IXml (s1,s2) -> Patterns.xml (pat_node s1) (pat_node s2)
  | IOptional _ -> 
564
      raise (Patterns.Error "Optional fields are not allowed in record patterns")
565
566
567
568
569
570
571
572
573
574
575
576
577
578
  | IRecord (o,r) ->
      let pats = ref [] in
      let aux l s = 
	if IdSet.is_empty (fv_slot s) then typ_node s
	else
	  ( pats := Patterns.record l (pat_node s) :: !pats;
	    Types.any_node )
      in
      let constr = Types.record' (o,LabelMap.mapi aux r) in
      List.fold_left Patterns.cap (Patterns.constr constr) !pats
	(* TODO: can avoid constr when o=true, and all fields have fv *)
  | ICapture x -> Patterns.capture x
  | IConstant (x,c) -> Patterns.constant x c
  | IArrow _ ->
579
      raise (Patterns.Error "Arrows are not allowed in patterns")
580
581
582
583
584
585
  | IType _ -> assert false
      
and pat_node s : Patterns.node =
  try SlotTable.find pat_nodes s
  with Not_found ->
    let x = Patterns.make (fv_slot s) in
586
587
    try
      SlotTable.add pat_nodes s x;
588
      Patterns.define x (pat s.d);
589
590
591
      x
    with exn -> SlotTable.remove pat_nodes s; raise exn
      (* For the toplevel ... *)
592

593

594
let register_types glb b =
595
596
  List.iter 
    (fun (v,p) ->
597
598
       if Env.mem v glb.tenv_names
       then raise_loc_generic p.loc ("Multiple definition for type " ^ (Ident.to_string v))
599
    ) b;
600
601
  let env = derecurs_def (penv glb) b in
  let b = List.map (fun (v,p) -> (v,p,compile (derecurs env p))) b in
602
603
604
605
  flush_defs ();
  let b = 
    List.map 
      (fun (v,p,s) -> 
606
	 check_no_capture p.loc (fv_descr s);
607
608
609
	 let t = typ s in
	 if (p.loc <> noloc) && (Types.is_empty t) then
	   warning p.loc 
610
	     ("This definition yields an empty type for " ^ (Ident.to_string v));
611
	 (v,t)) b in
612
613
614
  List.iter (fun (v,t) -> Types.Print.register_global (Id.value v) t) b;
  let n = List.fold_left (fun accu (v,t) -> Env.add v t accu) glb.tenv_names b in
  { glb with tenv_names = n }
615

616
let register_ns glb p ns =
617
  { glb with tenv_nspref = Ns.add_prefix p ns glb.tenv_nspref }
618

619
let dump_types ppf glb =
620
  Env.iter (fun v _ -> Format.fprintf ppf " %a" Ident.print v) glb.tenv_names
621

622
623
624
let dump_ns ppf glb =
  Ns.dump_table ppf glb.tenv_nspref

625

626
627
let do_typ loc r = 
  let s = compile_slot r in
628
  flush_defs ();
629
630
  check_no_capture loc (fv_slot s);
  typ_node s
631
   
632
let typ glb p =
633
  do_typ p.loc (derecurs (penv glb) p)
634
    
635
let pat glb p = 
636
  let s = compile_slot (derecurs (penv glb) p) in
637
638
639
  flush_defs ();
  try pat_node s
  with Patterns.Error e -> raise_loc_generic p.loc e
640
    | Location (loc,_,exn) when loc = noloc -> raise (Location (p.loc, `Full, exn))
641
642


643
644
(* II. Build skeleton *)

645
646
647
648
649
650
651
652

type op = [ `Unary of tenv -> Typed.unary_op | `Binary of tenv -> Typed.binary_op ]
let op_table : (string,op) Hashtbl.t = Hashtbl.create 31
let register_unary_op s f = Hashtbl.add op_table s (`Unary f)
let register_binary_op s f = Hashtbl.add op_table s (`Binary f)
let find_op s = Hashtbl.find op_table s


653
module Fv = IdSet
654

655
656
657
type branch = Branch of Typed.branch * branch list

let cur_branch : branch list ref = ref []
658

659
let exp loc fv e =
660
661
  fv,
  { Typed.exp_loc = loc;
662
    Typed.exp_typ = Types.empty;
663
    Typed.exp_descr = e;
664
  }
665
666


667
668
let rec expr glb loc = function
  | LocatedExpr (loc,e) -> expr glb loc e
669
  | Forget (e,t) ->
670
      let (fv,e) = expr glb loc e and t = typ glb t in
671
672
673
674
      exp loc fv (Typed.Forget (e,t))
  | Var s -> 
      exp loc (Fv.singleton s) (Typed.Var s)
  | Apply (e1,e2) -> 
675
      let (fv1,e1) = expr glb loc e1 and (fv2,e2) = expr glb loc e2 in
676
677
      exp loc (Fv.cup fv1 fv2) (Typed.Apply (e1,e2))
  | Abstraction a ->
678
      let iface = List.map (fun (t1,t2) -> (typ glb t1, typ glb t2)) 
679
680
681
682
683
684
685
		    a.fun_iface in
      let t = List.fold_left 
		(fun accu (t1,t2) -> Types.cap accu (Types.arrow t1 t2)) 
		Types.any iface in
      let iface = List.map 
		    (fun (t1,t2) -> (Types.descr t1, Types.descr t2)) 
		    iface in
686
      let (fv0,body) = branches glb a.fun_body in
687
688
689
690
691
692
693
694
695
696
697
      let fv = match a.fun_name with
	| None -> fv0
	| Some f -> Fv.remove f fv0 in
      let e = Typed.Abstraction 
		{ Typed.fun_name = a.fun_name;
		  Typed.fun_iface = iface;
		  Typed.fun_body = body;
		  Typed.fun_typ = t;
		  Typed.fun_fv = fv
		} in
      exp loc fv e
698
  | (Integer _ | Char _ | Atom _) as c -> 
699
      exp loc Fv.empty (Typed.Cst (const glb loc c))
700
  | Pair (e1,e2) ->
701
      let (fv1,e1) = expr glb loc e1 and (fv2,e2) = expr glb loc e2 in
702
703
      exp loc (Fv.cup fv1 fv2) (Typed.Pair (e1,e2))
  | Xml (e1,e2) ->
704
      let (fv1,e1) = expr glb loc e1 and (fv2,e2) = expr glb loc e2 in
705
706
      exp loc (Fv.cup fv1 fv2) (Typed.Xml (e1,e2))
  | Dot (e,l) ->
707
      let (fv,e) = expr glb loc e in
708
      exp loc fv (Typed.Dot (e,parse_label glb loc l))
709
  | RemoveField (e,l) ->
710
      let (fv,e) = expr glb loc e in
711
      exp loc fv (Typed.RemoveField (e,parse_label glb loc l))
712
713
  | RecordLitt r -> 
      let fv = ref Fv.empty in
714
      let r = parse_record glb loc
715
		(fun e -> 
716
		   let (fv2,e) = expr glb loc e 
717
718
719
		   in fv := Fv.cup !fv fv2; e)
		r in
      exp loc !fv (Typed.RecordLitt r)
720
  | String (i,j,s,e) ->
721
      let (fv,e) = expr glb loc e in
722
      exp loc fv (Typed.String (i,j,s,e))
723
  | Op (op,le) ->
724
      let (fvs,ltes) = List.split (List.map (expr glb loc) le) in
725
      let fv = List.fold_left Fv.cup Fv.empty fvs in
726
      (try
727
728
729
	 (match (ltes,find_op op) with
	    | [e], `Unary op -> exp loc fv (Typed.UnaryOp (op glb, e))
	    | [e1;e2], `Binary op -> exp loc fv (Typed.BinaryOp (op glb, e1,e2))
730
731
732
	    | _ -> assert false)
       with Not_found -> assert false)

733
  | Match (e,b) -> 
734
735
      let (fv1,e) = expr glb loc e
      and (fv2,b) = branches glb b in
736
      exp loc (Fv.cup fv1 fv2) (Typed.Match (e, b))
737
  | Map (e,b) ->
738
739
      let (fv1,e) = expr glb loc e
      and (fv2,b) = branches glb b in
740
741
      exp loc (Fv.cup fv1 fv2) (Typed.Map (e, b))
  | Transform (e,b) ->
742
743
      let (fv1,e) = expr glb loc e
      and (fv2,b) = branches glb b in
744
      exp loc (Fv.cup fv1 fv2) (Typed.Transform (e, b))
745
  | Xtrans (e,b) ->
746
747
      let (fv1,e) = expr glb loc e
      and (fv2,b) = branches glb b in
748
      exp loc (Fv.cup fv1 fv2) (Typed.Xtrans (e, b))
749
  | Validate (e,schema,elt) ->
750
      let (fv,e) = expr glb loc e in
751
      exp loc fv (Typed.Validate (e, schema, elt))
752
  | Try (e,b) ->
753
754
      let (fv1,e) = expr glb loc e
      and (fv2,b) = branches glb b in
755
      exp loc (Fv.cup fv1 fv2) (Typed.Try (e, b))
756
  | NamespaceIn (pr,ns,e) ->
757
      let glb = register_ns glb pr ns in
758
      expr glb loc e
759
760
761
  | Ref (e,t) ->
      let (fv,e) = expr glb loc e and t = typ glb t in
      exp loc fv (Typed.Ref (e,t))
762
	      
763
  and branches glb b = 
764
    let fv = ref Fv.empty in
765
    let accept = ref Types.empty in
766
    let branch (p,e) = 
767
768
      let cur_br = !cur_branch in
      cur_branch := [];
769
      let (fv2,e) = expr glb noloc e in
770
      let br_loc = merge_loc p.loc e.Typed.exp_loc in
771
      let p = pat glb p in
772
773
774
775
776
777
      (match Fv.pick (Fv.diff (Patterns.fv p) fv2) with
	| None -> ()
	| Some x ->
	    let x = U.to_string (Id.value x) in
	    warning br_loc 
	      ("The capture variable " ^ x ^ 
778
	       " is declared in the pattern but not used in the body of this branch. It might be a misspelled type or name (if not use _ instead)."));
779
780
781
782
783
784
785
786
787
      let fv2 = Fv.diff fv2 (Patterns.fv p) in
      fv := Fv.cup !fv fv2;
      accept := Types.cup !accept (Types.descr (Patterns.accept p));
      let br = 
	{ 
	  Typed.br_loc = br_loc;
	  Typed.br_used = br_loc = noloc;
	  Typed.br_pat = p;
	  Typed.br_body = e } in
788
      cur_branch := Branch (br, !cur_branch) :: cur_br;
789
790
      br in
    let b = List.map branch b in
791
792
793
794
    (!fv, 
     { 
       Typed.br_typ = Types.empty; 
       Typed.br_branches = b; 
795
796
       Typed.br_accept = !accept;
       Typed.br_compiled = None;
797
798
     } 
    )
799

800
let expr glb = expr glb noloc
801

802
803
804
let let_decl glb p e =
  let (_,e) = expr glb e in
  { Typed.let_pat = pat glb p;
805
806
807
    Typed.let_body = e;
    Typed.let_compiled = None }

808
809
810
811

(* Hide global "typing/parsing" environment *)

let glb = State.ref "Typer.glb_env" 
812
813
	    { tenv_names = Env.empty;
	      tenv_nspref = Ns.empty_table }
814
815
816
817
818
819

let pat p = pat !glb p
let typ t = typ !glb t
let expr e = expr !glb e
let let_decl p e = let_decl !glb p e

820
821
let register_global_types l = glb := register_types !glb l
let dump_global_types ppf = dump_types ppf !glb
822

823
824
let register_global_ns p ns = glb := register_ns !glb p ns
let dump_global_ns ppf = dump_ns ppf !glb
825

826
827
let set_ns_table_for_printer () = Ns.InternalPrinter.set_table !glb.tenv_nspref

828
829
830
(* III. Type-checks *)

type env = Types.descr Env.t
831
832
833

open Typed

834
835
let require loc t s = 
  if not (Types.subtype t s) then raise_loc loc (Constraint (t, s))
836

837
838
839
let check loc t s = 
  require loc t s; t

840
841
842
843
844
let check_str loc ofs t s = 
  if not (Types.subtype t s) then raise_loc_str loc ofs (Constraint (t, s));
  t

let should_have loc constr s = 
845
846
  raise_loc loc (ShouldHave (constr,s))

847
848
849
let should_have_str loc ofs constr s = 
  raise_loc_str loc ofs (ShouldHave (constr,s))

850
851
852
853
854
855
856
857
858
859
860
let flatten loc arg constr precise =
  let constr' = Sequence.star 
		  (Sequence.approx (Types.cap Sequence.any constr)) in
  let sconstr' = Sequence.star constr' in
  let exact = Types.subtype constr' constr in
  if exact then
    let t = arg sconstr' precise in
    if precise then Sequence.flatten t else constr
  else
    let t = arg sconstr' true in
    Sequence.flatten t
861

862
863
let rec type_check env e constr precise = 
  let d = type_check' e.exp_loc env e.exp_descr constr precise in
864
  let d = if precise then d else constr in
865
866
867
  e.exp_typ <- Types.cup e.exp_typ d;
  d

868
and type_check' loc env e constr precise = match e with
869
870
871
  | Forget (e,t) ->
      let t = Types.descr t in
      ignore (type_check env e t false);
872
873
      check loc t constr

874
  | Abstraction a ->
875
876
877
      let t =
	try Types.Arrow.check_strenghten a.fun_typ constr 
	with Not_found -> 
878
879
	  should_have loc constr
	    "but the interface of the abstraction is not compatible"
880
      in
881
882
883
      let env = match a.fun_name with
	| None -> env
	| Some f -> Env.add f a.fun_typ env in
884
885
      List.iter 
	(fun (t1,t2) ->
886
887
888
	   let acc = a.fun_body.br_accept in 
	   if not (Types.subtype t1 acc) then
	     raise_loc loc (NonExhaustive (Types.diff t1 acc));
889
	   ignore (type_check_branches loc env t1 a.fun_body t2 false)
890
891
	) a.fun_iface;
      t
892

893
894
  | Match (e,b) ->
      let t = type_check env e b.br_accept true in
895
      type_check_branches loc env t b constr precise
896
897
898

  | Try (e,b) ->
      let te = type_check env e constr precise in
899
      let tb = type_check_branches loc env Types.any b constr precise in
900
      Types.cup te tb
901

902
903
  | Pair (e1,e2) ->
      type_check_pair loc env e1 e2 constr precise
904

905
906
  | Xml (e1,e2) ->
      type_check_pair ~kind:`XML loc env e1 e2 constr precise
907

908
  | RecordLitt r ->
909
910
911
912
913
914
915
916
      type_record loc env r constr precise

  | Map (e,b) ->
      type_map loc env false e b constr precise

  | Transform (e,b) ->
      flatten loc (type_map loc env true e b) constr precise

917
918
919
920
  | Apply (e1,e2) ->
      let t1 = type_check env e1 Types.Arrow.any true in
      let t1 = Types.Arrow.get t1 in
      let dom = Types.Arrow.domain t1 in
921
922
923
924
925
926
927
      let res =
	if Types.Arrow.need_arg t1 then
	  let t2 = type_check env e2 dom true in
	  Types.Arrow.apply t1 t2
	else
	  (ignore (type_check env e2 dom false); Types.Arrow.apply_noarg t1)
      in
928
929
930
      check loc res constr

  | UnaryOp (o,e) ->
931
932
      let t = o.un_op_typer loc 
		(type_check env e) constr precise in
933
934
935
      check loc t constr

  | BinaryOp (o,e1,e2) ->
936
937
938
      let t = o.bin_op_typer loc 
		(type_check env e1) 
		(type_check env e2) constr precise in
939
940
941
942
943
      check loc t constr

  | Var s -> 
      let t = 
	try Env.find s env
944
945
	with Not_found -> 
	  raise_loc loc 
946
	    (UnboundId (s, Env.mem s !glb.tenv_names) ) in
947
948
949
950
951
      check loc t constr
      
  | Cst c -> 
      check loc (Types.constant c) constr

952
953
954
  | String (i,j,s,e) ->
      type_check_string loc env 0 s i j e constr precise

955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
  | Dot (e,l) ->
      let t = type_check env e Types.Record.any true in
      let t = 
        try (Types.Record.project t l) 
        with Not_found -> raise_loc loc (WrongLabel(t,l))
      in
      check loc t constr

  | RemoveField (e,l) ->
      let t = type_check env e Types.Record.any true in
      let t = Types.Record.remove_field t l in
      check loc t constr

  | Xtrans (e,b) ->
      let t = type_check env e Sequence.any true in
      let t = 
	Sequence.map_tree 
	  (fun t ->
	     let resid = Types.diff t b.br_accept in
	     let res = type_check_branches loc env t b Sequence.any true in
	     (res,resid)
	  ) t in
      check loc t constr

979
980
981
982
  | Validate (e, schema_name, elt_name) ->
      ignore (type_check env e Types.any false);
      let t = fst (Hashtbl.find !schema_elements (schema_name, elt_name)) in
      check loc t constr
983

984
985
986
987
  | Ref (e,t) ->
      ignore (type_check env e (Types.descr t) false);
      check loc (Builtin_defs.ref_type t) constr

988
and type_check_pair ?(kind=`Normal) loc env e1 e2 constr precise =
989
  let rects = Types.Product.normal ~kind constr in
990
991
  if Types.Product.is_empty rects then 
    (match kind with
992
993
      | `Normal -> should_have loc constr "but it is a pair"
      | `XML -> should_have loc constr "but it is an XML element");
994
  let need_s = Types.Product.need_second rects in
995
996
997
998
999
  let t1 = type_check env e1 (Types.Product.pi1 rects) (precise || need_s) in
  let c2 = Types.Product.constraint_on_2 rects t1 in
  if Types.is_empty c2 then 
    raise_loc loc (ShouldHave2 (constr,"but the first component has type",t1));
  let t2 = type_check env e2 c2 precise in
1000

1001
  if precise then 
1002
1003
1004
    match kind with
      | `Normal -> Types.times (Types.cons t1) (Types.cons t2)
      | `XML -> Types.xml (Types.cons t1) (Types.cons t2)
1005
1006
1007
  else
    constr

1008
1009
1010
1011
1012
1013
1014
1015
1016
and type_check_string loc env ofs s i j e constr precise =
  if U.equal_index i j then type_check env e constr precise
  else
    let rects = Types.Product.normal constr in
    if Types.Product.is_empty rects 
    then should_have_str loc ofs constr "but it is a string"
    else
      let need_s = Types.Product.need_second rects in
      let (ch,i') = U.next s i in
1017
      let ch = Chars.V.mk_int ch in
1018
1019
1020
1021
1022
1023
1024
      let tch = Types.constant (Types.Char ch) in
      let t1 = check_str loc ofs tch (Types.Product.pi1 rects) in
      let c2 = Types.Product.constraint_on_2 rects t1 in
      let t2 = type_check_string loc env (ofs + 1) s i' j e c2 precise in
      if precise then Types.times (Types.cons t1) (Types.cons t2)
      else constr

1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
and type_record loc env r constr precise =
(* try to get rid of precise = true for values of fields *)
(* also: the use equivalent of need_second to optimize... *)
  if not (Types.Record.has_record constr) then
    should_have loc constr "but it is a record";
  let (rconstr,res) = 
    List.fold_left
      (fun (rconstr,res) (l,e) ->
	 (* could compute (split l e) once... *)
	 let pi = Types.Record.project_opt rconstr l in
	 if Types.is_empty pi then 
1036
	   (let l = Label.to_string (LabelPool.value l) in
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
	    should_have loc constr
	      (Printf.sprintf "Field %s is not allowed here." l));
	 let t = type_check env e pi true in
	 let rconstr = Types.Record.condition rconstr l t in
	 let res = (l,Types.cons t) :: res in
	 (rconstr,res)
      ) (constr, []) (LabelMap.get r)
  in
  if not (Types.Record.has_empty_record rconstr) then
    should_have loc constr "More fields should be present";
  let t = 
    Types.record' (false, LabelMap.from_list (fun _ _ -> assert false) res)
  in
  check loc t constr
1051

1052

1053
and type_check_branches loc env targ brs constr precise =
1054
  if Types.is_empty targ then Types.empty
1055
1056
  else (
    brs.br_typ <- Types.cup brs.br_typ targ;
1057
    branches_aux loc env targ 
1058
1059
      (if precise then Types.empty else constr) 
      constr precise brs.br_branches
1060
  )
1061