typer.ml 48 KB
Newer Older
1
(* TODO:
2
 - rewrite type-checking of operators to propagate constraint
3
4
 - optimize computation of pattern free variables
 - check whether it is worth using recursive hash-consing internally
5
6
*)

7
8
9
open Location
open Ast
open Ident
10

11
let warning loc msg =
12
  Format.fprintf !Location.warning_ppf "Warning %a:@\n%a%s@." 
13
14
    Location.print_loc (loc,`Full)
    Location.html_hilight (loc,`Full)
15
16
    msg

17
18
type item =
  | Type of Types.t
19
  | Val of Types.t
20

21
type t = {
22
  ids : item Env.t;
23
24
  ns: Ns.table;
  cu: Types.CompUnit.t Env.t;
25
}
26

27
28
29
30
31
let hash _ = failwith "Typer.hash"
let compare _ _ = failwith "Typer.compare"
let dump ppf _ = failwith "Typer.dump"
let equal _ _ = failwith "Typer.equal"
let check _ = failwith "Typer.check"
32
33

(* TODO: filter out builtin defs ? *)
34
35
36
37
let serialize_item s = function
  | Type t -> Serialize.Put.bits 1 s 0; Types.serialize s t
  | Val t -> Serialize.Put.bits 1 s 1; Types.serialize s t

38
let serialize s env =
39
  Serialize.Put.env Id.serialize serialize_item Env.iter s env.ids;
40
  Ns.serialize_table s env.ns
41

42
43
44
45
46
let deserialize_item s = match Serialize.Get.bits 1 s with
  | 0 -> Type (Types.deserialize s)
  | 1 -> Val (Types.deserialize s)
  | _ -> assert false

47
let deserialize s =
48
49
  let ids = 
    Serialize.Get.env Id.deserialize deserialize_item Env.add Env.empty s in
50
  let ns = Ns.deserialize_table s in
51
  { ids = ids; ns = ns; cu = Env.empty }
52
53


54
55
let empty_env = {
  ids = Env.empty;
56
57
  ns = Ns.empty_table;
  cu = Env.empty;
58
59
}

60
61
let from_comp_unit = ref (fun cu -> assert false)

62
63
64
65
66
67
68
69
70
71
let enter_cu x cu env =
  { env with cu = Env.add (ident x) cu env.cu }

let find_cu loc x env =
  try Env.find x env.cu
  with Not_found -> 
    raise_loc_generic loc 
      ("Unbound compunit prefix " ^ (Ident.to_string x))


72
73
74
75
76
77
78
79
let enter_type id t env =
  { env with ids = Env.add id (Type t) env.ids }
let enter_types l env =
  { env with ids = 
      List.fold_left (fun accu (id,t) -> Env.add id (Type t) accu) env.ids l }
let find_type id env =
  match Env.find id env.ids with
    | Type t -> t
80
    | Val _ -> raise Not_found
81

82
83
let find_type_global loc cu id env =
  let cu = find_cu loc cu env in
84
85
86
  let env = !from_comp_unit cu in
  find_type id env

87
let enter_value id t env = 
88
  { env with ids = Env.add id (Val t) env.ids }
89
90
let enter_values l env =
  { env with ids = 
91
      List.fold_left (fun accu (id,t) -> Env.add id (Val t) accu) env.ids l }
92
93
let find_value id env =
  match Env.find id env.ids with
94
    | Val t -> t
95
    | _ -> raise Not_found
96
97
98
let find_value_global cu id env =
  let env = !from_comp_unit cu in
  find_value id env
99
	
100
101
102
103
104
105
let value_name_ok id env =
  try match Env.find id env.ids with
    | Val t -> true
    | _ -> false
  with Not_found -> true

106
107
108
109
let iter_values env f =
  Env.iter (fun x ->
	      function Val t -> f x t;
		| _ -> ()) env.ids
110

111

112

113
(* Namespaces *)
114

115
let set_ns_table_for_printer env = 
116
  Ns.InternalPrinter.set_table env.ns
117

118
let get_ns_table tenv = tenv.ns
119

120
let enter_ns p ns env =
121
  { env with ns = Ns.add_prefix p ns env.ns }
122

123
124
125
126
127
let protect_error_ns loc f x =
  try f x
  with Ns.UnknownPrefix ns ->
    raise_loc_generic loc 
    ("Undefined namespace prefix " ^ (U.to_string ns))
128

129
let parse_atom env loc t =
130
  let (ns,l) = protect_error_ns loc (Ns.map_tag env.ns) t in
131
132
133
  Atoms.V.mk ns l
 
let parse_ns env loc ns =
134
  protect_error_ns loc (Ns.map_prefix env.ns) ns
135

136
let parse_label env loc t =
137
  let (ns,l) = protect_error_ns loc (Ns.map_attr env.ns) t in
138
  LabelPool.mk (ns,l)
139

140
141
142
143
144
145
146
147
148
149
150
151
152
let parse_record env loc f r =
  let r = List.map (fun (l,x) -> (parse_label env loc l, f x)) r in
  LabelMap.from_list (fun _ _ -> raise_loc_generic loc "Duplicated record field") r

let rec const env loc = function
  | LocatedExpr (loc,e) -> const env loc e
  | Pair (x,y) -> Types.Pair (const env loc x, const env loc y)
  | Xml (x,y) -> Types.Xml (const env loc x, const env loc y)
  | RecordLitt x -> Types.Record (parse_record env loc (const env loc) x)
  | String (i,j,s,c) -> Types.String (i,j,s,const env loc c)
  | Atom t -> Types.Atom (parse_atom env loc t)
  | Integer i -> Types.Integer i
  | Char c -> Types.Char c
153
  | Const c -> c
154
155
156
157
  | _ -> raise_loc_generic loc "This should be a scalar or structured constant"

(* I. Transform the abstract syntax of types and patterns into
      the internal form *)
158

159
exception NonExhaustive of Types.descr
160
exception Constraint of Types.descr * Types.descr
161
exception ShouldHave of Types.descr * string
162
exception ShouldHave2 of Types.descr * string * Types.descr
163
exception WrongLabel of Types.descr * label
164
exception UnboundId of id * bool
165
exception Error of string
166

167
168
let raise_loc loc exn = raise (Location (loc,`Full,exn))
let raise_loc_str loc ofs exn = raise (Location (loc,`Char ofs,exn))
169
let error loc msg = raise_loc loc (Error msg)
170

171
172
173
  (* Schema datastructures *)

module StringSet = Set.Make (String)
174
175
176

  (* just to remember imported schemas *)
let schemas = State.ref "Typer.schemas" StringSet.empty
177
178
179

let schema_types = State.ref "Typer.schema_types" (Hashtbl.create 51)
let schema_elements = State.ref "Typer.schema_elements" (Hashtbl.create 51)
180
let schema_attributes = State.ref "Typer.schema_attributes" (Hashtbl.create 51)
181
182
183
184
let schema_attribute_groups =
  State.ref "Typer.schema_attribute_groups" (Hashtbl.create 51)
let schema_model_groups =
  State.ref "Typer.schema_model_groups" (Hashtbl.create 51)
185

186
187
188
189
190
191
192
193
(* Eliminate Recursion, propagate Sequence Capture Variables *)

let rec seq_vars accu = function
  | Epsilon | Elem _ -> accu
  | Seq (r1,r2) | Alt (r1,r2) -> seq_vars (seq_vars accu r1) r2
  | Star r | WeakStar r -> seq_vars accu r
  | SeqCapture (v,r) -> seq_vars (IdSet.add v accu) r

194
195
196
197
198
199
200
201
202
203
204
205
206
207
(* We use two intermediate representation from AST types/patterns
   to internal ones:

      AST -(1)-> derecurs -(2)-> slot -(3)-> internal

   (1) eliminate recursion, schema, 
       propagate sequence capture variables, keep regexps

   (2) stratify, detect ill-formed recursion, compile regexps

   (3) check additional constraints on types / patterns;
       deep (recursive) hash-consing
*)     

208
209
210
211
type derecurs_slot = {
  ploc : Location.loc;
  pid  : int;
  mutable ploop : bool;
212
  mutable pdescr : derecurs;
213
} and derecurs =
214
  | PDummy
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
  | PAlias of derecurs_slot
  | PType of Types.descr
  | POr of derecurs * derecurs
  | PAnd of derecurs * derecurs
  | PDiff of derecurs * derecurs
  | PTimes of derecurs * derecurs
  | PXml of derecurs * derecurs
  | PArrow of derecurs * derecurs
  | POptional of derecurs
  | PRecord of bool * derecurs label_map
  | PCapture of id
  | PConstant of id * Types.const
  | PRegexp of derecurs_regexp * derecurs
and derecurs_regexp =
  | PEpsilon
  | PElem of derecurs
  | PSeq of derecurs_regexp * derecurs_regexp
  | PAlt of derecurs_regexp * derecurs_regexp
  | PStar of derecurs_regexp
  | PWeakStar of derecurs_regexp

236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255

type descr = 
  | IDummy
  | IType of Types.descr
  | IOr of descr * descr
  | IAnd of descr * descr
  | IDiff of descr * descr
  | ITimes of slot * slot
  | IXml of slot * slot
  | IArrow of slot * slot
  | IOptional of descr
  | IRecord of bool * slot label_map
  | ICapture of id
  | IConstant of id * Types.const
and slot = {
  mutable fv : fv option;
  mutable hash : int option;
  mutable rank1: int; mutable rank2: int;
  mutable gen1 : int; mutable gen2: int;
  mutable d    : descr;
256
}
257
258
259
260
261
262
263
264
265
266
267
268
269
    

let counter = ref 0
let mk_derecurs_slot loc = 
  incr counter; 
  { ploop = false; ploc = loc; pid = !counter; pdescr = PDummy }
	  
let mk_slot () = 
  { d=IDummy; fv=None; hash=None; rank1=0; rank2=0; gen1=0; gen2=0 } 


(* This environment is used in phase (1) to eliminate recursion *)
type penv = {
270
  penv_tenv : t;
271
272
273
274
  penv_derec : derecurs_slot Env.t;
}

let penv tenv = { penv_tenv = tenv; penv_derec = Env.empty }
275

276
let rec hash_derecurs = function
277
  | PDummy -> assert false
278
279
280
  | PAlias s -> 
      s.pid
  | PType t -> 
281
      1 + 17 * (Types.hash t)
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
  | POr (p1,p2) -> 
      2 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PAnd (p1,p2) -> 
      3 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PDiff (p1,p2) -> 
      4 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PTimes (p1,p2) -> 
      5 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PXml (p1,p2) -> 
      6 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PArrow (p1,p2) -> 
      7 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | POptional p -> 
      8 + 17 * (hash_derecurs p)
  | PRecord (o,r) -> 
      (if o then 9 else 10) + 17 * (LabelMap.hash hash_derecurs r)
  | PCapture x -> 
      11 + 17 * (Id.hash x)
  | PConstant (x,c) -> 
301
      12 + 17 * (Id.hash x) + 257 * (Types.Const.hash c)
302
303
  | PRegexp (p,q) -> 
      13 + 17 * (hash_derecurs_regexp p) + 257 * (hash_derecurs q)
304
and hash_derecurs_regexp = function
305
306
307
308
309
310
311
312
313
314
315
316
  | PEpsilon -> 
      1
  | PElem p -> 
      2 + 17 * (hash_derecurs p)
  | PSeq (p1,p2) -> 
      3 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PAlt (p1,p2) -> 
      4 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PStar p -> 
      5 + 17 * (hash_derecurs_regexp p)
  | PWeakStar p -> 
      6 + 17 * (hash_derecurs_regexp p)
317
318

let rec equal_derecurs p1 p2 = (p1 == p2) || match p1,p2 with
319
320
321
  | PAlias s1, PAlias s2 -> 
      s1 == s2
  | PType t1, PType t2 -> 
322
      Types.equal t1 t2
323
324
325
326
327
  | POr (p1,q1), POr (p2,q2)
  | PAnd (p1,q1), PAnd (p2,q2)
  | PDiff (p1,q1), PDiff (p2,q2)
  | PTimes (p1,q1), PTimes (p2,q2)
  | PXml (p1,q1), PXml (p2,q2)
328
329
330
331
332
333
334
335
336
  | PArrow (p1,q1), PArrow (p2,q2) -> 
      (equal_derecurs p1 p2) && (equal_derecurs q1 q2)
  | POptional p1, POptional p2 -> 
      equal_derecurs p1 p2
  | PRecord (o1,r1), PRecord (o2,r2) -> 
      (o1 == o2) && (LabelMap.equal equal_derecurs r1 r2)
  | PCapture x1, PCapture x2 -> 
      Id.equal x1 x2
  | PConstant (x1,c1), PConstant (x2,c2) -> 
337
      (Id.equal x1 x2) && (Types.Const.equal c1 c2)
338
339
  | PRegexp (p1,q1), PRegexp (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
340
341
  | _ -> false
and equal_derecurs_regexp r1 r2 = match r1,r2 with
342
343
344
345
  | PEpsilon, PEpsilon -> 
      true
  | PElem p1, PElem p2 -> 
      equal_derecurs p1 p2
346
  | PSeq (p1,q1), PSeq (p2,q2) 
347
348
  | PAlt (p1,q1), PAlt (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs_regexp q1 q2)
349
  | PStar p1, PStar p2
350
351
  | PWeakStar p1, PWeakStar p2 -> 
      equal_derecurs_regexp p1 p2
352
  | _ -> false
353

354
355
356
357
358
359
360
361
362
363
364
module DerecursTable = Hashtbl.Make(
  struct 
    type t = derecurs 
    let hash = hash_derecurs
    let equal = equal_derecurs
  end
)

module RE = Hashtbl.Make(
  struct 
    type t = derecurs_regexp * derecurs 
365
366
367
368
    let hash (p,q) = 
      (hash_derecurs_regexp p) + 17 * (hash_derecurs q)
    let equal (p1,q1) (p2,q2) = 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
369
370
  end
)
371

372
373
374
375
let gen = ref 0
let rank = ref 0
	     
let rec hash_descr = function
376
  | IDummy -> assert false
377
  | IType x -> Types.hash x
378
379
380
381
382
383
384
385
386
  | IOr (d1,d2) -> 1 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IAnd (d1,d2) -> 2 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IDiff (d1,d2) -> 3 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IOptional d -> 4 + 17 * (hash_descr d)
  | ITimes (s1,s2) -> 5 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IXml (s1,s2) -> 6 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IArrow (s1,s2) -> 7 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IRecord (o,r) -> (if o then 8 else 9) + 17 * (LabelMap.hash hash_slot r)
  | ICapture x -> 10 + 17 * (Id.hash x)
387
  | IConstant (x,y) -> 11 + 17 * (Id.hash x) + 257 * (Types.Const.hash y)
388
389
390
391
392
and hash_slot s =
  if s.gen1 = !gen then 13 * s.rank1
  else (
    incr rank;
    s.rank1 <- !rank; s.gen1 <- !gen;
393
    hash_descr s.d
394
395
396
397
  )
    
let rec equal_descr d1 d2 = 
  match (d1,d2) with
398
  | IType x1, IType x2 -> Types.equal x1 x2
399
400
401
402
403
404
405
  | IOr (x1,y1), IOr (x2,y2) 
  | IAnd (x1,y1), IAnd (x2,y2) 
  | IDiff (x1,y1), IDiff (x2,y2) -> (equal_descr x1 x2) && (equal_descr y1 y2)
  | IOptional x1, IOptional x2 -> equal_descr x1 x2
  | ITimes (x1,y1), ITimes (x2,y2) 
  | IXml (x1,y1), IXml (x2,y2) 
  | IArrow (x1,y1), IArrow (x2,y2) -> (equal_slot x1 x2) && (equal_slot y1 y2)
406
407
  | IRecord (o1,r1), IRecord (o2,r2) -> 
      (o1 = o2) && (LabelMap.equal equal_slot r1 r2)
408
  | ICapture x1, ICapture x2 -> Id.equal x1 x2
409
  | IConstant (x1,y1), IConstant (x2,y2) -> 
410
      (Id.equal x1 x2) && (Types.Const.equal y1 y2)
411
412
413
414
415
416
417
418
  | _ -> false
and equal_slot s1 s2 =
  ((s1.gen1 = !gen) && (s2.gen2 = !gen) && (s1.rank1 = s2.rank2))
  ||
  ((s1.gen1 <> !gen) && (s2.gen2 <> !gen) && (
     incr rank;
     s1.rank1 <- !rank; s1.gen1 <- !gen;
     s2.rank2 <- !rank; s2.gen2 <- !gen;
419
     equal_descr s1.d s2.d
420
421
   ))
  
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
module SlotTable = Hashtbl.Make(
  struct
    type t = slot
	
    let hash s =
      match s.hash with
	| Some h -> h
	| None ->
	    incr gen; rank := 0; 
	    let h = hash_slot s in
	    s.hash <- Some h;
	    h
	      
    let equal s1 s2 = 
      (s1 == s2) || 
      (incr gen; rank := 0; 
       let e = equal_slot s1 s2 in
       (*     if e then Printf.eprintf "Recursive hash-consing: Equal\n";  *)
       e)
  end)


let rec derecurs env p = match p.descr with
  | PatVar v ->
446
447
448
449
450
451
452
453
454
455
      (match Ns.split_qname v with
	 | "", v ->
	     let v = ident v in
	     (try PAlias (Env.find v env.penv_derec)
	      with Not_found -> 
		try PType (find_type v env.penv_tenv)
		with Not_found -> PCapture v)
	 | cu, v -> 
	     try 
	       let cu = ident (U.mk cu) in
456
	       PType (find_type_global p.loc cu (ident v) env.penv_tenv)
457
	     with Not_found ->
458
459
	       raise_loc_generic p.loc 
	       ("Unbound external type " ^ cu ^ ":" ^ (U.to_string v)))
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
  | SchemaVar (kind, schema, item) ->
      PType (derecurs_schema env kind schema item)
  | Recurs (p,b) -> derecurs (derecurs_def env b) p
  | Internal t -> PType t
  | NsT ns -> PType (Types.atom (Atoms.any_in_ns (parse_ns env.penv_tenv p.loc ns)))
  | Or (p1,p2) -> POr (derecurs env p1, derecurs env p2)
  | And (p1,p2) -> PAnd (derecurs env p1, derecurs env p2)
  | Diff (p1,p2) -> PDiff (derecurs env p1, derecurs env p2)
  | Prod (p1,p2) -> PTimes (derecurs env p1, derecurs env p2)
  | XmlT (p1,p2) -> PXml (derecurs env p1, derecurs env p2)
  | Arrow (p1,p2) -> PArrow (derecurs env p1, derecurs env p2)
  | Optional p -> POptional (derecurs env p)
  | Record (o,r) -> PRecord (o, parse_record env.penv_tenv p.loc (derecurs env) r)
  | Constant (x,c) -> PConstant (x,const env.penv_tenv p.loc c)
  | Cst c -> PType (Types.constant (const env.penv_tenv p.loc c))
  | Regexp (r,q) -> 
      let constant_nil t v = 
	PAnd (t, PConstant (v, Types.Atom Sequence.nil_atom)) in
      let vars = seq_vars IdSet.empty r in
      let q = IdSet.fold constant_nil (derecurs env q) vars in
      let r = derecurs_regexp (fun p -> p) env r in
      PRegexp (r, q)
and derecurs_regexp vars env = function
  | Epsilon -> 
      PEpsilon
  | Elem p -> 
      PElem (vars (derecurs env p))
  | Seq (p1,p2) -> 
      PSeq (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Alt (p1,p2) -> 
      PAlt (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Star p -> 
      PStar (derecurs_regexp vars env p)
  | WeakStar p -> 
      PWeakStar (derecurs_regexp vars env p)
  | SeqCapture (x,p) -> 
      derecurs_regexp (fun p -> PAnd (vars p, PCapture x)) env p


and derecurs_def env b =
  let b = List.map (fun (v,p) -> (v,p,mk_derecurs_slot p.loc)) b in
  let n = 
    List.fold_left (fun env (v,p,s) -> Env.add v s env) env.penv_derec b in
  let env = { env with penv_derec = n } in
  List.iter (fun (v,p,s) -> s.pdescr <- derecurs env p) b;
  env

and derecurs_schema env kind schema item =
508
  let elt () = Hashtbl.find !schema_elements (schema, item) in
509
510
  let typ () = Hashtbl.find !schema_types (schema, item) in
  let att () = Hashtbl.find !schema_attributes (schema, item) in
511
512
  let att_group () = Hashtbl.find !schema_attribute_groups (schema, item) in
  let mod_group () = Hashtbl.find !schema_model_groups (schema, item) in
513
514
515
516
517
518
519
  let rec do_try n = function
    | [] -> 
	let s = Printf.sprintf 
		  "No %s named '%s' found in schema '%s'" n item schema in
	failwith s
    | f :: rem -> (try f () with Not_found -> do_try n rem)  in
  match kind with
520
521
522
523
524
525
    | Some `Element -> do_try "element" [ elt ]
    | Some `Type -> do_try "type" [ typ ]
    | Some `Attribute -> do_try "atttribute" [ att ]
    | Some `Attribute_group -> do_try "attribute group" [ att_group ]
    | Some `Model_group -> do_try "model group" [ mod_group ]
    | None -> do_try "item" [ elt; typ; att; att_group; mod_group ]
526
527

    
528
529
530
531
532
let rec fv_slot s =
  match s.fv with
    | Some x -> x
    | None ->
	if s.gen1 = !gen then IdSet.empty 
533
	else (s.gen1 <- !gen; fv_descr s.d)
534
and fv_descr = function
535
  | IDummy -> assert false
536
  | IType _ -> IdSet.empty
537
538
539
540
541
542
543
  | IOr (d1,d2)
  | IAnd (d1,d2)  
  | IDiff (d1,d2) -> IdSet.cup (fv_descr d1) (fv_descr d2)
  | IOptional d -> fv_descr d
  | ITimes (s1,s2)  
  | IXml (s1,s2)  
  | IArrow (s1,s2) -> IdSet.cup (fv_slot s1) (fv_slot s2)
544
545
  | IRecord (o,r) -> 
      List.fold_left IdSet.cup IdSet.empty (LabelMap.map_to_list fv_slot r)
546
  | ICapture x | IConstant (x,_) -> IdSet.singleton x
547

548
549
550
551
552
553
554
555
let compute_fv s =
  match s.fv with
    | Some x -> ()
    | None ->
	incr gen;
	let x = fv_slot s in
	s.fv <- Some x
	  
556
557
558
let check_no_capture loc s =
  match IdSet.pick s with
    | Some x ->  
559
	raise_loc_generic loc ("Capture variable not allowed: " ^ (Ident.to_string x))
560
    | None -> ()
561
    
562
563
564
let compile_slot_hash = DerecursTable.create 67
let compile_hash = DerecursTable.create 67

565
566
let todo_defs = ref []
let todo_fv = ref []
567
568
569
570
571
572
573
574

let rec compile p =
  try DerecursTable.find compile_hash p
  with Not_found ->
    let c = real_compile p in
    DerecursTable.replace compile_hash p c;
    c
and real_compile = function
575
  | PDummy -> assert false
576
577
578
579
  | PAlias v ->
      if v.ploop then
	raise_loc_generic v.ploc ("Unguarded recursion on type/pattern");
      v.ploop <- true;
580
      let r = compile v.pdescr in
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
      v.ploop <- false;
      r
  | PType t -> IType t
  | POr (t1,t2) -> IOr (compile t1, compile t2)
  | PAnd (t1,t2) -> IAnd (compile t1, compile t2)
  | PDiff (t1,t2) -> IDiff (compile t1, compile t2)
  | PTimes (t1,t2) -> ITimes (compile_slot t1, compile_slot t2)
  | PXml (t1,t2) -> IXml (compile_slot t1, compile_slot t2)
  | PArrow (t1,t2) -> IArrow (compile_slot t1, compile_slot t2)
  | POptional t -> IOptional (compile t)
  | PRecord (o,r) ->  IRecord (o, LabelMap.map compile_slot r)
  | PConstant (x,v) -> IConstant (x,v)
  | PCapture x -> ICapture x
  | PRegexp (r,q) -> compile_regexp r q
and compile_regexp r q =
  let memo = RE.create 17 in
597
598
599
  let add accu i = 
    match accu with None -> Some i | Some j -> Some (IOr (j,i)) in
  let get = function Some x -> x | None -> assert false in
600
  let rec queue accu = function
601
602
603
    | PRegexp (r,q) -> aux accu r q 
    | _ -> add accu (compile q)
  and aux accu r q =
604
605
606
607
    if RE.mem memo (r,q) then accu
    else (
      RE.add memo (r,q) ();
      match r with
608
	| PEpsilon -> queue accu q
609
610
611
612
613
614
615
616
617
618
619
	| PElem p ->
(* Be careful not to create pairs with same second component *)
	    let rec extract = function
	      | PConstant (x,v) -> `Const (x,v)
	      | POr (x,y) ->
		  (match extract x, extract y with
		    | `Pat x, `Pat y -> `Pat (POr (x,y))
		    | x, y -> `Or (x,y))
	      | p -> `Pat p
	    in
	    let rec mk accu = function
620
621
622
623
	      | `Const (x,v) -> 
		  (match queue None q with 
		    | Some q -> add accu (IAnd (IConstant (x,v), q))
		    | None -> accu)
624
	      | `Or (x,y) -> mk (mk accu x) y
625
626
	      | `Pat p -> 
		  add accu (ITimes (compile_slot p, compile_slot q))
627
628
	    in
	    mk accu (extract p)
629
630
631
632
633
634
	| PSeq (r1,r2) -> aux accu r1 (PRegexp (r2,q))
	| PAlt (r1,r2) -> aux (aux accu r1 q) r2 q
	| PStar r1 -> aux (aux accu r1 (PRegexp (r,q))) PEpsilon q
	| PWeakStar r1 -> aux (aux accu PEpsilon q) r1 (PRegexp (r,q))
    )
  in
635
  get (aux None r q)
636
637
and compile_slot p =
  try DerecursTable.find compile_slot_hash p
638
  with Not_found ->
639
640
641
    let s = mk_slot () in
    todo_defs := (s,p) :: !todo_defs;
    todo_fv := s :: !todo_fv;
642
    DerecursTable.add compile_slot_hash p s;
643
    s
644

645
      
646
let timer_fv = Stats.Timer.create "Typer.fv"
647
let rec flush_defs () = 
648
649
650
651
  match !todo_defs with
    | [] -> 
	Stats.Timer.start timer_fv;
	List.iter compute_fv !todo_fv;
652
653
	todo_fv := [];
	Stats.Timer.stop timer_fv ()
654
655
656
657
    | (s,p)::t -> 
	todo_defs := t; 
	s.d <- compile p; 
	flush_defs ()
658
659
660
661
662
663
664
665
666
667
668
669
670
671
	
let typ_nodes = SlotTable.create 67
let pat_nodes = SlotTable.create 67
		  
let rec typ = function
  | IType t -> t
  | IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
  | IAnd (s1,s2) ->  Types.cap (typ s1) (typ s2)
  | IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
  | ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
  | IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
  | IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
  | IOptional s -> Types.Record.or_absent (typ s)
  | IRecord (o,r) -> Types.record' (o, LabelMap.map typ_node r)
672
  | IDummy | ICapture _ | IConstant (_,_) -> assert false
673
      
674
and typ_node s : Types.Node.t =
675
676
677
678
  try SlotTable.find typ_nodes s
  with Not_found ->
    let x = Types.make () in
    SlotTable.add typ_nodes s x;
679
    Types.define x (typ s.d);
680
681
682
683
684
685
686
687
    x
      
let rec pat d : Patterns.descr =
  if IdSet.is_empty (fv_descr d)
  then Patterns.constr (typ d)
  else pat_aux d
    
and pat_aux = function
688
  | IDummy -> assert false
689
690
691
692
693
694
  | IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
  | IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
  | IDiff (s1,s2) when IdSet.is_empty (fv_descr s2) ->
      let s2 = Types.neg (typ s2) in
      Patterns.cap (pat s1) (Patterns.constr s2)
  | IDiff _ ->
695
      raise (Patterns.Error "Differences are not allowed in patterns")
696
697
698
  | ITimes (s1,s2) -> Patterns.times (pat_node s1) (pat_node s2)
  | IXml (s1,s2) -> Patterns.xml (pat_node s1) (pat_node s2)
  | IOptional _ -> 
699
      raise (Patterns.Error "Optional fields are not allowed in record patterns")
700
701
702
703
704
705
706
707
708
709
710
711
712
713
  | IRecord (o,r) ->
      let pats = ref [] in
      let aux l s = 
	if IdSet.is_empty (fv_slot s) then typ_node s
	else
	  ( pats := Patterns.record l (pat_node s) :: !pats;
	    Types.any_node )
      in
      let constr = Types.record' (o,LabelMap.mapi aux r) in
      List.fold_left Patterns.cap (Patterns.constr constr) !pats
	(* TODO: can avoid constr when o=true, and all fields have fv *)
  | ICapture x -> Patterns.capture x
  | IConstant (x,c) -> Patterns.constant x c
  | IArrow _ ->
714
      raise (Patterns.Error "Arrows are not allowed in patterns")
715
716
717
718
719
720
  | IType _ -> assert false
      
and pat_node s : Patterns.node =
  try SlotTable.find pat_nodes s
  with Not_found ->
    let x = Patterns.make (fv_slot s) in
721
722
    try
      SlotTable.add pat_nodes s x;
723
      Patterns.define x (pat s.d);
724
725
726
      x
    with exn -> SlotTable.remove pat_nodes s; raise exn
      (* For the toplevel ... *)
727

728

729
let type_defs env b =
730
731
  List.iter 
    (fun (v,p) ->
732
733
       if Env.mem v env.ids
       then raise_loc_generic p.loc ("Identifier " ^ (Ident.to_string v) ^ " is already bound")
734
    ) b;
735
736
  let penv = derecurs_def (penv env) b in
  let b = List.map (fun (v,p) -> (v,p,compile (derecurs penv p))) b in
737
738
739
740
  flush_defs ();
  let b = 
    List.map 
      (fun (v,p,s) -> 
741
	 check_no_capture p.loc (fv_descr s);
742
743
744
	 let t = typ s in
	 if (p.loc <> noloc) && (Types.is_empty t) then
	   warning p.loc 
745
	     ("This definition yields an empty type for " ^ (Ident.to_string v));
746
	 (v,t)) b in
747
  List.iter (fun (v,t) -> Types.Print.register_global (Id.value v) t) b;
748
  b
749
750


751
752
753
754
755
let dump_types ppf env =
  Env.iter (fun v -> 
	      function 
		  (Type _) -> Format.fprintf ppf " %a" Ident.print v
		| _ -> ()) env.ids
756

757
let dump_ns ppf env =
758
  Ns.dump_table ppf env.ns
759

760

761
762
let do_typ loc r = 
  let s = compile_slot r in
763
  flush_defs ();
764
765
  check_no_capture loc (fv_slot s);
  typ_node s
766
   
767
768
let typ env p =
  do_typ p.loc (derecurs (penv env) p)
769
    
770
771
let pat env p = 
  let s = compile_slot (derecurs (penv env) p) in
772
773
774
  flush_defs ();
  try pat_node s
  with Patterns.Error e -> raise_loc_generic p.loc e
775
    | Location (loc,_,exn) when loc = noloc -> raise (Location (p.loc, `Full, exn))
776
777


778
779
(* II. Build skeleton *)

780

781
782
783
784
785
type type_fun = Types.t -> bool -> Types.t
let mk_unary_op = ref (fun _ _ -> assert false)
let typ_unary_op = ref (fun _ _ _ -> assert false)
let mk_binary_op = ref (fun _ _ -> assert false)
let typ_binary_op = ref (fun _ _ _ _ -> assert false)
786
787


788
module Fv = IdSet
789

790
791
792
type branch = Branch of Typed.branch * branch list

let cur_branch : branch list ref = ref []
793

794
let exp loc fv e =
795
796
  fv,
  { Typed.exp_loc = loc;
797
    Typed.exp_typ = Types.empty;
798
    Typed.exp_descr = e;
799
  }
800
801


802
803
let rec expr env loc = function
  | LocatedExpr (loc,e) -> expr env loc e
804
  | Forget (e,t) ->
805
      let (fv,e) = expr env loc e and t = typ env t in
806
807
      exp loc fv (Typed.Forget (e,t))
  | Var s -> 
808
809
810
811
      (match Ns.split_qname s with
	| "", id -> let id = ident id in
	  exp loc (Fv.singleton id) (Typed.Var id)
	| cu, id -> 
812
	    let cu = find_cu loc (ident (U.mk cu)) env in
813
	    exp loc Fv.empty (Typed.ExtVar (cu, ident id)))
814
  | Apply (e1,e2) -> 
815
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
816
817
      exp loc (Fv.cup fv1 fv2) (Typed.Apply (e1,e2))
  | Abstraction a ->
818
      let iface = List.map (fun (t1,t2) -> (typ env t1, typ env t2)) 
819
820
821
822
823
824
825
		    a.fun_iface in
      let t = List.fold_left 
		(fun accu (t1,t2) -> Types.cap accu (Types.arrow t1 t2)) 
		Types.any iface in
      let iface = List.map 
		    (fun (t1,t2) -> (Types.descr t1, Types.descr t2)) 
		    iface in
826
      let (fv0,body) = branches env a.fun_body in
827
828
829
830
831
832
833
834
835
836
837
      let fv = match a.fun_name with
	| None -> fv0
	| Some f -> Fv.remove f fv0 in
      let e = Typed.Abstraction 
		{ Typed.fun_name = a.fun_name;
		  Typed.fun_iface = iface;
		  Typed.fun_body = body;
		  Typed.fun_typ = t;
		  Typed.fun_fv = fv
		} in
      exp loc fv e
838
  | (Integer _ | Char _ | Atom _ | Const _) as c -> 
839
      exp loc Fv.empty (Typed.Cst (const env loc c))
840
  | Pair (e1,e2) ->
841
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
842
843
      exp loc (Fv.cup fv1 fv2) (Typed.Pair (e1,e2))
  | Xml (e1,e2) ->
844
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
845
846
      exp loc (Fv.cup fv1 fv2) (Typed.Xml (e1,e2))
  | Dot (e,l) ->
847
848
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.Dot (e,parse_label env loc l))
849
  | RemoveField (e,l) ->
850
851
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.RemoveField (e,parse_label env loc l))
852
853
  | RecordLitt r -> 
      let fv = ref Fv.empty in
854
      let r = parse_record env loc
855
		(fun e -> 
856
		   let (fv2,e) = expr env loc e 
857
858
859
		   in fv := Fv.cup !fv fv2; e)
		r in
      exp loc !fv (Typed.RecordLitt r)
860
  | String (i,j,s,e) ->
861
      let (fv,e) = expr env loc e in
862
      exp loc fv (Typed.String (i,j,s,e))
863
  | Op (op,le) ->
864
      let (fvs,ltes) = List.split (List.map (expr env loc) le) in
865
      let fv = List.fold_left Fv.cup Fv.empty fvs in
866
      (try
867
868
869
	 (match ltes with
	    | [e] -> exp loc fv (Typed.UnaryOp (!mk_unary_op op env, e))
	    | [e1;e2] -> exp loc fv (Typed.BinaryOp (!mk_binary_op op env, e1,e2))
870
871
872
	    | _ -> assert false)
       with Not_found -> assert false)

873
  | Match (e,b) -> 
874
875
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
876
      exp loc (Fv.cup fv1 fv2) (Typed.Match (e, b))
877
  | Map (e,b) ->
878
879
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
880
881
      exp loc (Fv.cup fv1 fv2) (Typed.Map (e, b))
  | Transform (e,b) ->
882
883
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
884
      exp loc (Fv.cup fv1 fv2) (Typed.Transform (e, b))
885
  | Xtrans (e,b) ->
886
887
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
888
      exp loc (Fv.cup fv1 fv2) (Typed.Xtrans (e, b))
889
  | Validate (e,schema,elt) ->
890
      let (fv,e) = expr env loc e in
891
      exp loc fv (Typed.Validate (e, schema, elt))
892
  | Try (e,b) ->
893
894
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
895
      exp loc (Fv.cup fv1 fv2) (Typed.Try (e, b))
896
  | NamespaceIn (pr,ns,e) ->
897
898
      let env = enter_ns pr ns env in
      expr env loc e
899
  | Ref (e,t) ->
900
      let (fv,e) = expr env loc e and t = typ env t in
901
      exp loc fv (Typed.Ref (e,t))
902
	      
903
  and branches env b = 
904
    let fv = ref Fv.empty in
905
    let accept = ref Types.empty in
906
    let branch (p,e) = 
907
908
      let cur_br = !cur_branch in
      cur_branch := [];
909
      let (fv2,e) = expr env noloc e in
910
      let br_loc = merge_loc p.loc e.Typed.exp_loc in
911
      let p = pat env p in
912
913
914
915
916
917
      (match Fv.pick (Fv.diff (Patterns.fv p) fv2) with
	| None -> ()
	| Some x ->
	    let x = U.to_string (Id.value x) in
	    warning br_loc 
	      ("The capture variable " ^ x ^ 
918
	       " is declared in the pattern but not used in the body of this branch. It might be a misspelled type or name (if not use _ instead)."));
919
920
921
922
923
924
925
926
927
      let fv2 = Fv.diff fv2 (Patterns.fv p) in
      fv := Fv.cup !fv fv2;
      accept := Types.cup !accept (Types.descr (Patterns.accept p));
      let br = 
	{ 
	  Typed.br_loc = br_loc;
	  Typed.br_used = br_loc = noloc;
	  Typed.br_pat = p;
	  Typed.br_body = e } in
928
      cur_branch := Branch (br, !cur_branch) :: cur_br;
929
930
      br in
    let b = List.map branch b in
931
932
933
934
    (!fv, 
     { 
       Typed.br_typ = Types.empty; 
       Typed.br_branches = b; 
935
936
       Typed.br_accept = !accept;
       Typed.br_compiled = None;
937
938
     } 
    )
939

940
let expr env e = snd (expr env noloc e)
941

942
943
let let_decl env p e =
  { Typed.let_pat = pat env p;
944
    Typed.let_body = expr env e;
945
946
    Typed.let_compiled = None }

947
948
949

(* Hide global "typing/parsing" environment *)

950

951
952
(* III. Type-checks *)

953
954
open Typed

955
956
let require loc t s = 
  if not (Types.subtype t s) then raise_loc loc (Constraint (t, s))
957

958
let verify loc t s = 
959
960
  require loc t s; t

961
962
963
964
965
let check_str loc ofs t s = 
  if not (Types.subtype t s) then raise_loc_str loc ofs (Constraint (t, s));
  t

let should_have loc constr s = 
966
967
  raise_loc loc (ShouldHave (constr,s))

968
969
970
let should_have_str loc ofs constr s = 
  raise_loc_str loc ofs (ShouldHave (constr,s))

971
972
973
974
975
976
977
978
979
980
981
let flatten loc arg constr precise =
  let constr' = Sequence.star 
		  (Sequence.approx (Types.cap Sequence.any constr)) in
  let sconstr' = Sequence.star constr' in
  let exact = Types.subtype constr' constr in
  if exact then
    let t = arg sconstr' precise in
    if precise then Sequence.flatten t else constr
  else
    let t = arg sconstr' true in
    Sequence.flatten t
982

983
984
let rec type_check env e constr precise = 
  let d = type_check' e.exp_loc env e.exp_descr constr