typer.ml 51.8 KB
Newer Older
1
(* TODO:
2
 - rewrite type-checking of operators to propagate constraint
3
4
 - optimize computation of pattern free variables
 - check whether it is worth using recursive hash-consing internally
5
6
*)

7
8
9
open Location
open Ast
open Ident
10

11
12
let debug_schema = false

13
let warning loc msg =
14
  Format.fprintf !Location.warning_ppf "Warning %a:@\n%a%s@." 
15
16
    Location.print_loc (loc,`Full)
    Location.html_hilight (loc,`Full)
17
18
    msg

19
20
type item =
  | Type of Types.t
21
  | Val of Types.t
22

23
24
module UEnv = Map.Make(U)

25
type t = {
26
  ids : item Env.t;
27
  ns: Ns.table;
28
  cu: Types.CompUnit.t UEnv.t;
29
}
30

31
32
33
34
35
let hash _ = failwith "Typer.hash"
let compare _ _ = failwith "Typer.compare"
let dump ppf _ = failwith "Typer.dump"
let equal _ _ = failwith "Typer.equal"
let check _ = failwith "Typer.check"
36
37

(* TODO: filter out builtin defs ? *)
38
39
40
41
let serialize_item s = function
  | Type t -> Serialize.Put.bits 1 s 0; Types.serialize s t
  | Val t -> Serialize.Put.bits 1 s 1; Types.serialize s t

42
let serialize s env =
43
  Serialize.Put.env Id.serialize serialize_item Env.iter s env.ids;
44
  Ns.serialize_table s env.ns
45

46
47
48
49
50
let deserialize_item s = match Serialize.Get.bits 1 s with
  | 0 -> Type (Types.deserialize s)
  | 1 -> Val (Types.deserialize s)
  | _ -> assert false

51
let deserialize s =
52
53
  let ids = 
    Serialize.Get.env Id.deserialize deserialize_item Env.add Env.empty s in
54
  let ns = Ns.deserialize_table s in
55
  { ids = ids; ns = ns; cu = UEnv.empty }
56
57


58
59
let empty_env = {
  ids = Env.empty;
60
  ns = Ns.empty_table;
61
  cu = UEnv.empty;
62
63
}

64
65
let from_comp_unit = ref (fun cu -> assert false)

66
let enter_cu x cu env =
67
  { env with cu = UEnv.add x cu env.cu }
68

69
70
71
let find_cu x env =
  try UEnv.find x env.cu
  with Not_found -> Types.CompUnit.mk x
72
73


74
75
76
77
78
79
80
81
let enter_type id t env =
  { env with ids = Env.add id (Type t) env.ids }
let enter_types l env =
  { env with ids = 
      List.fold_left (fun accu (id,t) -> Env.add id (Type t) accu) env.ids l }
let find_type id env =
  match Env.find id env.ids with
    | Type t -> t
82
    | Val _ -> raise Not_found
83

84
let find_type_global loc cu id env =
85
  let cu = find_cu cu env in
86
87
88
  let env = !from_comp_unit cu in
  find_type id env

89
let enter_value id t env = 
90
  { env with ids = Env.add id (Val t) env.ids }
91
92
let enter_values l env =
  { env with ids = 
93
      List.fold_left (fun accu (id,t) -> Env.add id (Val t) accu) env.ids l }
94
95
let find_value id env =
  match Env.find id env.ids with
96
    | Val t -> t
97
    | _ -> raise Not_found
98
99
100
let find_value_global cu id env =
  let env = !from_comp_unit cu in
  find_value id env
101
	
102
103
104
105
106
107
let value_name_ok id env =
  try match Env.find id env.ids with
    | Val t -> true
    | _ -> false
  with Not_found -> true

108
109
110
111
let iter_values env f =
  Env.iter (fun x ->
	      function Val t -> f x t;
		| _ -> ()) env.ids
112

113

114
115
116
117
118
119
120
121
122
let register_types cu env =
  let prefix = U.concat (Types.CompUnit.value cu) (U.mk ":") in
  Env.iter (fun x ->
	      function 
		| Type t ->
		    let n = U.concat prefix (Id.value x) in
		    Types.Print.register_global n t
		| _ -> ()) env.ids

123

124
(* Namespaces *)
125

126
let set_ns_table_for_printer env = 
127
  Ns.InternalPrinter.set_table env.ns
128

129
let get_ns_table tenv = tenv.ns
130

131
let enter_ns p ns env =
132
  { env with ns = Ns.add_prefix p ns env.ns }
133

134
135
136
137
138
let protect_error_ns loc f x =
  try f x
  with Ns.UnknownPrefix ns ->
    raise_loc_generic loc 
    ("Undefined namespace prefix " ^ (U.to_string ns))
139

140
let parse_atom env loc t =
141
  let (ns,l) = protect_error_ns loc (Ns.map_tag env.ns) t in
142
143
144
  Atoms.V.mk ns l
 
let parse_ns env loc ns =
145
  protect_error_ns loc (Ns.map_prefix env.ns) ns
146

147
let parse_label env loc t =
148
  let (ns,l) = protect_error_ns loc (Ns.map_attr env.ns) t in
149
  LabelPool.mk (ns,l)
150

151
152
153
154
155
156
157
158
159
160
161
162
163
let parse_record env loc f r =
  let r = List.map (fun (l,x) -> (parse_label env loc l, f x)) r in
  LabelMap.from_list (fun _ _ -> raise_loc_generic loc "Duplicated record field") r

let rec const env loc = function
  | LocatedExpr (loc,e) -> const env loc e
  | Pair (x,y) -> Types.Pair (const env loc x, const env loc y)
  | Xml (x,y) -> Types.Xml (const env loc x, const env loc y)
  | RecordLitt x -> Types.Record (parse_record env loc (const env loc) x)
  | String (i,j,s,c) -> Types.String (i,j,s,const env loc c)
  | Atom t -> Types.Atom (parse_atom env loc t)
  | Integer i -> Types.Integer i
  | Char c -> Types.Char c
164
  | Const c -> c
165
166
167
168
  | _ -> raise_loc_generic loc "This should be a scalar or structured constant"

(* I. Transform the abstract syntax of types and patterns into
      the internal form *)
169

170
exception NonExhaustive of Types.descr
171
exception Constraint of Types.descr * Types.descr
172
exception ShouldHave of Types.descr * string
173
exception ShouldHave2 of Types.descr * string * Types.descr
174
exception WrongLabel of Types.descr * label
175
exception UnboundId of id * bool
176
exception UnboundExtId of Types.CompUnit.t * id
177
exception Error of string
178

179
180
let raise_loc loc exn = raise (Location (loc,`Full,exn))
let raise_loc_str loc ofs exn = raise (Location (loc,`Char ofs,exn))
181
let error loc msg = raise_loc loc (Error msg)
182

183
  (* just to remember imported schemas *)
184
let schemas = State.ref "Typer.schemas" (Hashtbl.create 3)
185
let is_registered_schema = Hashtbl.mem !schemas
186
187
188

let schema_types = State.ref "Typer.schema_types" (Hashtbl.create 51)
let schema_elements = State.ref "Typer.schema_elements" (Hashtbl.create 51)
189
let schema_attributes = State.ref "Typer.schema_attributes" (Hashtbl.create 51)
190
191
192
193
let schema_attribute_groups =
  State.ref "Typer.schema_attribute_groups" (Hashtbl.create 51)
let schema_model_groups =
  State.ref "Typer.schema_model_groups" (Hashtbl.create 51)
194

195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
  (* raise Not_found *)
let find_schema_descr kind schema name =
  let elt () = Hashtbl.find !schema_elements (schema, name) in
  let typ () = Hashtbl.find !schema_types (schema, name) in
  let att () = Hashtbl.find !schema_attributes (schema, name) in
  let att_group () = Hashtbl.find !schema_attribute_groups (schema, name) in
  let mod_group () = Hashtbl.find !schema_model_groups (schema, name) in
  let rec do_try n = function
    | [] -> raise Not_found
    | f :: rem -> (try f () with Not_found -> do_try n rem)
  in
  match kind with
    | Some `Element -> do_try "element" [ elt ]
    | Some `Type -> do_try "type" [ typ ]
    | Some `Attribute -> do_try "atttribute" [ att ]
    | Some `Attribute_group -> do_try "attribute group" [ att_group ]
    | Some `Model_group -> do_try "model group" [ mod_group ]
    | None ->
        (* policy for unqualified schema component resolution. This order should
         * be consistent with Schema_component.get_component *)
        do_try "component" [ elt; typ; att; att_group; mod_group ]

  (* as above, but raise Error *)
let find_schema_descr' k s n =
  try
    find_schema_descr k s n
  with Not_found ->
222
223
224
225
226
    if is_registered_schema s then
      raise (Error (Printf.sprintf "No %s named '%s' found in schema '%s'"
        (Schema_common.string_of_component_kind k) (U.get_str n) (U.get_str s)))
    else
      raise (Error (Printf.sprintf "%s: no such schema" (U.get_str s)))
227

228
229
230
231
232
233
234
235
(* Eliminate Recursion, propagate Sequence Capture Variables *)

let rec seq_vars accu = function
  | Epsilon | Elem _ -> accu
  | Seq (r1,r2) | Alt (r1,r2) -> seq_vars (seq_vars accu r1) r2
  | Star r | WeakStar r -> seq_vars accu r
  | SeqCapture (v,r) -> seq_vars (IdSet.add v accu) r

236
237
238
239
240
241
242
243
244
245
246
247
248
249
(* We use two intermediate representation from AST types/patterns
   to internal ones:

      AST -(1)-> derecurs -(2)-> slot -(3)-> internal

   (1) eliminate recursion, schema, 
       propagate sequence capture variables, keep regexps

   (2) stratify, detect ill-formed recursion, compile regexps

   (3) check additional constraints on types / patterns;
       deep (recursive) hash-consing
*)     

250
251
252
253
type derecurs_slot = {
  ploc : Location.loc;
  pid  : int;
  mutable ploop : bool;
254
  mutable pdescr : derecurs;
255
} and derecurs =
256
  | PDummy
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
  | PAlias of derecurs_slot
  | PType of Types.descr
  | POr of derecurs * derecurs
  | PAnd of derecurs * derecurs
  | PDiff of derecurs * derecurs
  | PTimes of derecurs * derecurs
  | PXml of derecurs * derecurs
  | PArrow of derecurs * derecurs
  | POptional of derecurs
  | PRecord of bool * derecurs label_map
  | PCapture of id
  | PConstant of id * Types.const
  | PRegexp of derecurs_regexp * derecurs
and derecurs_regexp =
  | PEpsilon
  | PElem of derecurs
  | PSeq of derecurs_regexp * derecurs_regexp
  | PAlt of derecurs_regexp * derecurs_regexp
  | PStar of derecurs_regexp
  | PWeakStar of derecurs_regexp

278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297

type descr = 
  | IDummy
  | IType of Types.descr
  | IOr of descr * descr
  | IAnd of descr * descr
  | IDiff of descr * descr
  | ITimes of slot * slot
  | IXml of slot * slot
  | IArrow of slot * slot
  | IOptional of descr
  | IRecord of bool * slot label_map
  | ICapture of id
  | IConstant of id * Types.const
and slot = {
  mutable fv : fv option;
  mutable hash : int option;
  mutable rank1: int; mutable rank2: int;
  mutable gen1 : int; mutable gen2: int;
  mutable d    : descr;
298
}
299
300
301
302
303
304
305
306
307
308
309
310
311
    

let counter = ref 0
let mk_derecurs_slot loc = 
  incr counter; 
  { ploop = false; ploc = loc; pid = !counter; pdescr = PDummy }
	  
let mk_slot () = 
  { d=IDummy; fv=None; hash=None; rank1=0; rank2=0; gen1=0; gen2=0 } 


(* This environment is used in phase (1) to eliminate recursion *)
type penv = {
312
  penv_tenv : t;
313
314
315
316
  penv_derec : derecurs_slot Env.t;
}

let penv tenv = { penv_tenv = tenv; penv_derec = Env.empty }
317

318
let rec hash_derecurs = function
319
  | PDummy -> assert false
320
321
322
  | PAlias s -> 
      s.pid
  | PType t -> 
323
      1 + 17 * (Types.hash t)
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
  | POr (p1,p2) -> 
      2 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PAnd (p1,p2) -> 
      3 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PDiff (p1,p2) -> 
      4 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PTimes (p1,p2) -> 
      5 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PXml (p1,p2) -> 
      6 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PArrow (p1,p2) -> 
      7 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | POptional p -> 
      8 + 17 * (hash_derecurs p)
  | PRecord (o,r) -> 
      (if o then 9 else 10) + 17 * (LabelMap.hash hash_derecurs r)
  | PCapture x -> 
      11 + 17 * (Id.hash x)
  | PConstant (x,c) -> 
343
      12 + 17 * (Id.hash x) + 257 * (Types.Const.hash c)
344
345
  | PRegexp (p,q) -> 
      13 + 17 * (hash_derecurs_regexp p) + 257 * (hash_derecurs q)
346
and hash_derecurs_regexp = function
347
348
349
350
351
352
353
354
355
356
357
358
  | PEpsilon -> 
      1
  | PElem p -> 
      2 + 17 * (hash_derecurs p)
  | PSeq (p1,p2) -> 
      3 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PAlt (p1,p2) -> 
      4 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PStar p -> 
      5 + 17 * (hash_derecurs_regexp p)
  | PWeakStar p -> 
      6 + 17 * (hash_derecurs_regexp p)
359
360

let rec equal_derecurs p1 p2 = (p1 == p2) || match p1,p2 with
361
362
363
  | PAlias s1, PAlias s2 -> 
      s1 == s2
  | PType t1, PType t2 -> 
364
      Types.equal t1 t2
365
366
367
368
369
  | POr (p1,q1), POr (p2,q2)
  | PAnd (p1,q1), PAnd (p2,q2)
  | PDiff (p1,q1), PDiff (p2,q2)
  | PTimes (p1,q1), PTimes (p2,q2)
  | PXml (p1,q1), PXml (p2,q2)
370
371
372
373
374
375
376
377
378
  | PArrow (p1,q1), PArrow (p2,q2) -> 
      (equal_derecurs p1 p2) && (equal_derecurs q1 q2)
  | POptional p1, POptional p2 -> 
      equal_derecurs p1 p2
  | PRecord (o1,r1), PRecord (o2,r2) -> 
      (o1 == o2) && (LabelMap.equal equal_derecurs r1 r2)
  | PCapture x1, PCapture x2 -> 
      Id.equal x1 x2
  | PConstant (x1,c1), PConstant (x2,c2) -> 
379
      (Id.equal x1 x2) && (Types.Const.equal c1 c2)
380
381
  | PRegexp (p1,q1), PRegexp (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
382
383
  | _ -> false
and equal_derecurs_regexp r1 r2 = match r1,r2 with
384
385
386
387
  | PEpsilon, PEpsilon -> 
      true
  | PElem p1, PElem p2 -> 
      equal_derecurs p1 p2
388
  | PSeq (p1,q1), PSeq (p2,q2) 
389
390
  | PAlt (p1,q1), PAlt (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs_regexp q1 q2)
391
  | PStar p1, PStar p2
392
393
  | PWeakStar p1, PWeakStar p2 -> 
      equal_derecurs_regexp p1 p2
394
  | _ -> false
395

396
397
398
399
400
401
402
403
404
405
406
module DerecursTable = Hashtbl.Make(
  struct 
    type t = derecurs 
    let hash = hash_derecurs
    let equal = equal_derecurs
  end
)

module RE = Hashtbl.Make(
  struct 
    type t = derecurs_regexp * derecurs 
407
408
409
410
    let hash (p,q) = 
      (hash_derecurs_regexp p) + 17 * (hash_derecurs q)
    let equal (p1,q1) (p2,q2) = 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
411
412
  end
)
413

414
415
416
417
let gen = ref 0
let rank = ref 0
	     
let rec hash_descr = function
418
  | IDummy -> assert false
419
  | IType x -> Types.hash x
420
421
422
423
424
425
426
427
428
  | IOr (d1,d2) -> 1 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IAnd (d1,d2) -> 2 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IDiff (d1,d2) -> 3 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IOptional d -> 4 + 17 * (hash_descr d)
  | ITimes (s1,s2) -> 5 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IXml (s1,s2) -> 6 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IArrow (s1,s2) -> 7 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IRecord (o,r) -> (if o then 8 else 9) + 17 * (LabelMap.hash hash_slot r)
  | ICapture x -> 10 + 17 * (Id.hash x)
429
  | IConstant (x,y) -> 11 + 17 * (Id.hash x) + 257 * (Types.Const.hash y)
430
431
432
433
434
and hash_slot s =
  if s.gen1 = !gen then 13 * s.rank1
  else (
    incr rank;
    s.rank1 <- !rank; s.gen1 <- !gen;
435
    hash_descr s.d
436
437
438
439
  )
    
let rec equal_descr d1 d2 = 
  match (d1,d2) with
440
  | IType x1, IType x2 -> Types.equal x1 x2
441
442
443
444
445
446
447
  | IOr (x1,y1), IOr (x2,y2) 
  | IAnd (x1,y1), IAnd (x2,y2) 
  | IDiff (x1,y1), IDiff (x2,y2) -> (equal_descr x1 x2) && (equal_descr y1 y2)
  | IOptional x1, IOptional x2 -> equal_descr x1 x2
  | ITimes (x1,y1), ITimes (x2,y2) 
  | IXml (x1,y1), IXml (x2,y2) 
  | IArrow (x1,y1), IArrow (x2,y2) -> (equal_slot x1 x2) && (equal_slot y1 y2)
448
449
  | IRecord (o1,r1), IRecord (o2,r2) -> 
      (o1 = o2) && (LabelMap.equal equal_slot r1 r2)
450
  | ICapture x1, ICapture x2 -> Id.equal x1 x2
451
  | IConstant (x1,y1), IConstant (x2,y2) -> 
452
      (Id.equal x1 x2) && (Types.Const.equal y1 y2)
453
454
455
456
457
458
459
460
  | _ -> false
and equal_slot s1 s2 =
  ((s1.gen1 = !gen) && (s2.gen2 = !gen) && (s1.rank1 = s2.rank2))
  ||
  ((s1.gen1 <> !gen) && (s2.gen2 <> !gen) && (
     incr rank;
     s1.rank1 <- !rank; s1.gen1 <- !gen;
     s2.rank2 <- !rank; s2.gen2 <- !gen;
461
     equal_descr s1.d s2.d
462
463
   ))
  
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
module SlotTable = Hashtbl.Make(
  struct
    type t = slot
	
    let hash s =
      match s.hash with
	| Some h -> h
	| None ->
	    incr gen; rank := 0; 
	    let h = hash_slot s in
	    s.hash <- Some h;
	    h
	      
    let equal s1 s2 = 
      (s1 == s2) || 
      (incr gen; rank := 0; 
       let e = equal_slot s1 s2 in
       (*     if e then Printf.eprintf "Recursive hash-consing: Equal\n";  *)
       e)
  end)


let rec derecurs env p = match p.descr with
  | PatVar v ->
488
489
490
491
492
493
494
495
496
      (match Ns.split_qname v with
	 | "", v ->
	     let v = ident v in
	     (try PAlias (Env.find v env.penv_derec)
	      with Not_found -> 
		try PType (find_type v env.penv_tenv)
		with Not_found -> PCapture v)
	 | cu, v -> 
	     try 
497
	       let cu = U.mk cu in
498
	       PType (find_type_global p.loc cu (ident v) env.penv_tenv)
499
	     with Not_found ->
500
501
	       raise_loc_generic p.loc 
	       ("Unbound external type " ^ cu ^ ":" ^ (U.to_string v)))
502
503
  | SchemaVar (kind, schema_name, component_name) ->
      PType (derecurs_schema env kind schema_name component_name)
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
  | Recurs (p,b) -> derecurs (derecurs_def env b) p
  | Internal t -> PType t
  | NsT ns -> PType (Types.atom (Atoms.any_in_ns (parse_ns env.penv_tenv p.loc ns)))
  | Or (p1,p2) -> POr (derecurs env p1, derecurs env p2)
  | And (p1,p2) -> PAnd (derecurs env p1, derecurs env p2)
  | Diff (p1,p2) -> PDiff (derecurs env p1, derecurs env p2)
  | Prod (p1,p2) -> PTimes (derecurs env p1, derecurs env p2)
  | XmlT (p1,p2) -> PXml (derecurs env p1, derecurs env p2)
  | Arrow (p1,p2) -> PArrow (derecurs env p1, derecurs env p2)
  | Optional p -> POptional (derecurs env p)
  | Record (o,r) -> PRecord (o, parse_record env.penv_tenv p.loc (derecurs env) r)
  | Constant (x,c) -> PConstant (x,const env.penv_tenv p.loc c)
  | Cst c -> PType (Types.constant (const env.penv_tenv p.loc c))
  | Regexp (r,q) -> 
      let constant_nil t v = 
	PAnd (t, PConstant (v, Types.Atom Sequence.nil_atom)) in
      let vars = seq_vars IdSet.empty r in
      let q = IdSet.fold constant_nil (derecurs env q) vars in
      let r = derecurs_regexp (fun p -> p) env r in
      PRegexp (r, q)
and derecurs_regexp vars env = function
  | Epsilon -> 
      PEpsilon
  | Elem p -> 
      PElem (vars (derecurs env p))
  | Seq (p1,p2) -> 
      PSeq (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Alt (p1,p2) -> 
      PAlt (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Star p -> 
      PStar (derecurs_regexp vars env p)
  | WeakStar p -> 
      PWeakStar (derecurs_regexp vars env p)
  | SeqCapture (x,p) -> 
      derecurs_regexp (fun p -> PAnd (vars p, PCapture x)) env p


and derecurs_def env b =
  let b = List.map (fun (v,p) -> (v,p,mk_derecurs_slot p.loc)) b in
  let n = 
    List.fold_left (fun env (v,p,s) -> Env.add v s env) env.penv_derec b in
  let env = { env with penv_derec = n } in
  List.iter (fun (v,p,s) -> s.pdescr <- derecurs env p) b;
  env

549
and derecurs_schema env = find_schema_descr
550
    
551
552
553
554
555
let rec fv_slot s =
  match s.fv with
    | Some x -> x
    | None ->
	if s.gen1 = !gen then IdSet.empty 
556
	else (s.gen1 <- !gen; fv_descr s.d)
557
and fv_descr = function
558
  | IDummy -> assert false
559
  | IType _ -> IdSet.empty
560
561
562
563
564
565
566
  | IOr (d1,d2)
  | IAnd (d1,d2)  
  | IDiff (d1,d2) -> IdSet.cup (fv_descr d1) (fv_descr d2)
  | IOptional d -> fv_descr d
  | ITimes (s1,s2)  
  | IXml (s1,s2)  
  | IArrow (s1,s2) -> IdSet.cup (fv_slot s1) (fv_slot s2)
567
568
  | IRecord (o,r) -> 
      List.fold_left IdSet.cup IdSet.empty (LabelMap.map_to_list fv_slot r)
569
  | ICapture x | IConstant (x,_) -> IdSet.singleton x
570

571
572
573
574
575
576
577
578
let compute_fv s =
  match s.fv with
    | Some x -> ()
    | None ->
	incr gen;
	let x = fv_slot s in
	s.fv <- Some x
	  
579
580
581
let check_no_capture loc s =
  match IdSet.pick s with
    | Some x ->  
582
	raise_loc_generic loc ("Capture variable not allowed: " ^ (Ident.to_string x))
583
    | None -> ()
584
    
585
586
587
let compile_slot_hash = DerecursTable.create 67
let compile_hash = DerecursTable.create 67

588
589
let todo_defs = ref []
let todo_fv = ref []
590
591
592
593
594
595
596
597

let rec compile p =
  try DerecursTable.find compile_hash p
  with Not_found ->
    let c = real_compile p in
    DerecursTable.replace compile_hash p c;
    c
and real_compile = function
598
  | PDummy -> assert false
599
600
601
602
  | PAlias v ->
      if v.ploop then
	raise_loc_generic v.ploc ("Unguarded recursion on type/pattern");
      v.ploop <- true;
603
      let r = compile v.pdescr in
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
      v.ploop <- false;
      r
  | PType t -> IType t
  | POr (t1,t2) -> IOr (compile t1, compile t2)
  | PAnd (t1,t2) -> IAnd (compile t1, compile t2)
  | PDiff (t1,t2) -> IDiff (compile t1, compile t2)
  | PTimes (t1,t2) -> ITimes (compile_slot t1, compile_slot t2)
  | PXml (t1,t2) -> IXml (compile_slot t1, compile_slot t2)
  | PArrow (t1,t2) -> IArrow (compile_slot t1, compile_slot t2)
  | POptional t -> IOptional (compile t)
  | PRecord (o,r) ->  IRecord (o, LabelMap.map compile_slot r)
  | PConstant (x,v) -> IConstant (x,v)
  | PCapture x -> ICapture x
  | PRegexp (r,q) -> compile_regexp r q
and compile_regexp r q =
  let memo = RE.create 17 in
620
621
622
  let add accu i = 
    match accu with None -> Some i | Some j -> Some (IOr (j,i)) in
  let get = function Some x -> x | None -> assert false in
623
  let rec queue accu = function
624
625
626
    | PRegexp (r,q) -> aux accu r q 
    | _ -> add accu (compile q)
  and aux accu r q =
627
628
629
630
    if RE.mem memo (r,q) then accu
    else (
      RE.add memo (r,q) ();
      match r with
631
	| PEpsilon -> queue accu q
632
633
634
635
636
637
638
639
640
641
642
	| PElem p ->
(* Be careful not to create pairs with same second component *)
	    let rec extract = function
	      | PConstant (x,v) -> `Const (x,v)
	      | POr (x,y) ->
		  (match extract x, extract y with
		    | `Pat x, `Pat y -> `Pat (POr (x,y))
		    | x, y -> `Or (x,y))
	      | p -> `Pat p
	    in
	    let rec mk accu = function
643
644
645
646
	      | `Const (x,v) -> 
		  (match queue None q with 
		    | Some q -> add accu (IAnd (IConstant (x,v), q))
		    | None -> accu)
647
	      | `Or (x,y) -> mk (mk accu x) y
648
649
	      | `Pat p -> 
		  add accu (ITimes (compile_slot p, compile_slot q))
650
651
	    in
	    mk accu (extract p)
652
653
654
655
656
657
	| PSeq (r1,r2) -> aux accu r1 (PRegexp (r2,q))
	| PAlt (r1,r2) -> aux (aux accu r1 q) r2 q
	| PStar r1 -> aux (aux accu r1 (PRegexp (r,q))) PEpsilon q
	| PWeakStar r1 -> aux (aux accu PEpsilon q) r1 (PRegexp (r,q))
    )
  in
658
  get (aux None r q)
659
660
and compile_slot p =
  try DerecursTable.find compile_slot_hash p
661
  with Not_found ->
662
663
664
    let s = mk_slot () in
    todo_defs := (s,p) :: !todo_defs;
    todo_fv := s :: !todo_fv;
665
    DerecursTable.add compile_slot_hash p s;
666
    s
667

668
      
669
let timer_fv = Stats.Timer.create "Typer.fv"
670
let rec flush_defs () = 
671
672
673
674
  match !todo_defs with
    | [] -> 
	Stats.Timer.start timer_fv;
	List.iter compute_fv !todo_fv;
675
676
	todo_fv := [];
	Stats.Timer.stop timer_fv ()
677
678
679
680
    | (s,p)::t -> 
	todo_defs := t; 
	s.d <- compile p; 
	flush_defs ()
681
682
683
684
685
686
687
688
689
690
691
692
693
694
	
let typ_nodes = SlotTable.create 67
let pat_nodes = SlotTable.create 67
		  
let rec typ = function
  | IType t -> t
  | IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
  | IAnd (s1,s2) ->  Types.cap (typ s1) (typ s2)
  | IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
  | ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
  | IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
  | IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
  | IOptional s -> Types.Record.or_absent (typ s)
  | IRecord (o,r) -> Types.record' (o, LabelMap.map typ_node r)
695
  | IDummy | ICapture _ | IConstant (_,_) -> assert false
696
      
697
and typ_node s : Types.Node.t =
698
699
700
701
  try SlotTable.find typ_nodes s
  with Not_found ->
    let x = Types.make () in
    SlotTable.add typ_nodes s x;
702
    Types.define x (typ s.d);
703
704
705
706
707
708
709
710
    x
      
let rec pat d : Patterns.descr =
  if IdSet.is_empty (fv_descr d)
  then Patterns.constr (typ d)
  else pat_aux d
    
and pat_aux = function
711
  | IDummy -> assert false
712
713
714
715
716
717
  | IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
  | IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
  | IDiff (s1,s2) when IdSet.is_empty (fv_descr s2) ->
      let s2 = Types.neg (typ s2) in
      Patterns.cap (pat s1) (Patterns.constr s2)
  | IDiff _ ->
718
      raise (Patterns.Error "Differences are not allowed in patterns")
719
720
721
  | ITimes (s1,s2) -> Patterns.times (pat_node s1) (pat_node s2)
  | IXml (s1,s2) -> Patterns.xml (pat_node s1) (pat_node s2)
  | IOptional _ -> 
722
      raise (Patterns.Error "Optional fields are not allowed in record patterns")
723
724
725
726
727
728
729
730
731
732
733
734
735
736
  | IRecord (o,r) ->
      let pats = ref [] in
      let aux l s = 
	if IdSet.is_empty (fv_slot s) then typ_node s
	else
	  ( pats := Patterns.record l (pat_node s) :: !pats;
	    Types.any_node )
      in
      let constr = Types.record' (o,LabelMap.mapi aux r) in
      List.fold_left Patterns.cap (Patterns.constr constr) !pats
	(* TODO: can avoid constr when o=true, and all fields have fv *)
  | ICapture x -> Patterns.capture x
  | IConstant (x,c) -> Patterns.constant x c
  | IArrow _ ->
737
      raise (Patterns.Error "Arrows are not allowed in patterns")
738
739
740
741
742
743
  | IType _ -> assert false
      
and pat_node s : Patterns.node =
  try SlotTable.find pat_nodes s
  with Not_found ->
    let x = Patterns.make (fv_slot s) in
744
745
    try
      SlotTable.add pat_nodes s x;
746
      Patterns.define x (pat s.d);
747
748
749
      x
    with exn -> SlotTable.remove pat_nodes s; raise exn
      (* For the toplevel ... *)
750

751

752
module Ids = Set.Make(Id)
753
let type_defs env b =
754
755
756
757
758
759
760
761
762
763
  ignore 
    (List.fold_left 
       (fun seen (v,p) ->
	  if Ids.mem v seen then 
	    raise_loc_generic p.loc 
	      ("Multiple definitions for the type identifer " ^ 
	       (Ident.to_string v));
	  Ids.add v seen
       ) Ids.empty b);

764
765
  let penv = derecurs_def (penv env) b in
  let b = List.map (fun (v,p) -> (v,p,compile (derecurs penv p))) b in
766
767
768
769
  flush_defs ();
  let b = 
    List.map 
      (fun (v,p,s) -> 
770
	 check_no_capture p.loc (fv_descr s);
771
772
773
	 let t = typ s in
	 if (p.loc <> noloc) && (Types.is_empty t) then
	   warning p.loc 
774
	     ("This definition yields an empty type for " ^ (Ident.to_string v));
775
	 (v,t)) b in
776
  List.iter (fun (v,t) -> Types.Print.register_global (Id.value v) t) b;
777
  b
778
779


780
781
782
783
784
let dump_types ppf env =
  Env.iter (fun v -> 
	      function 
		  (Type _) -> Format.fprintf ppf " %a" Ident.print v
		| _ -> ()) env.ids
785
786
let dump_type ppf env name =
  try
787
    (match Env.find (Ident.ident name) env.ids with
788
789
    | Type t -> Types.Print.print ppf t
    | _ -> raise Not_found)
790
791
  with Not_found ->
    raise (Error (Printf.sprintf "Type %s not found" (U.get_str name)))
792
793
794
795

let dump_schema_type ppf (k, s, n) =
  let descr = find_schema_descr' k s n in
  Types.Print.print ppf descr
796

797
let dump_ns ppf env =
798
  Ns.dump_table ppf env.ns
799

800

801
802
let do_typ loc r = 
  let s = compile_slot r in
803
  flush_defs ();
804
805
  check_no_capture loc (fv_slot s);
  typ_node s
806
   
807
808
let typ env p =
  do_typ p.loc (derecurs (penv env) p)
809
    
810
811
let pat env p = 
  let s = compile_slot (derecurs (penv env) p) in
812
813
814
  flush_defs ();
  try pat_node s
  with Patterns.Error e -> raise_loc_generic p.loc e
815
    | Location (loc,_,exn) when loc = noloc -> raise (Location (p.loc, `Full, exn))
816
817


818
819
(* II. Build skeleton *)

820

821
822
823
824
825
type type_fun = Types.t -> bool -> Types.t
let mk_unary_op = ref (fun _ _ -> assert false)
let typ_unary_op = ref (fun _ _ _ -> assert false)
let mk_binary_op = ref (fun _ _ -> assert false)
let typ_binary_op = ref (fun _ _ _ _ -> assert false)
826
827


828
module Fv = IdSet
829

830
831
832
type branch = Branch of Typed.branch * branch list

let cur_branch : branch list ref = ref []
833

834
let exp loc fv e =
835
836
  fv,
  { Typed.exp_loc = loc;
837
    Typed.exp_typ = Types.empty;
838
    Typed.exp_descr = e;
839
  }
840
841


842
843
let rec expr env loc = function
  | LocatedExpr (loc,e) -> expr env loc e
844
  | Forget (e,t) ->
845
      let (fv,e) = expr env loc e and t = typ env t in
846
847
      exp loc fv (Typed.Forget (e,t))
  | Var s -> 
848
849
850
851
      (match Ns.split_qname s with
	| "", id -> let id = ident id in
	  exp loc (Fv.singleton id) (Typed.Var id)
	| cu, id -> 
852
	    let cu = find_cu (U.mk cu) env in
853
	    exp loc Fv.empty (Typed.ExtVar (cu, ident id)))
854
  | Apply (e1,e2) -> 
855
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
856
857
      exp loc (Fv.cup fv1 fv2) (Typed.Apply (e1,e2))
  | Abstraction a ->
858
      let iface = List.map (fun (t1,t2) -> (typ env t1, typ env t2)) 
859
860
861
862
863
864
865
		    a.fun_iface in
      let t = List.fold_left 
		(fun accu (t1,t2) -> Types.cap accu (Types.arrow t1 t2)) 
		Types.any iface in
      let iface = List.map 
		    (fun (t1,t2) -> (Types.descr t1, Types.descr t2)) 
		    iface in
866
      let (fv0,body) = branches env a.fun_body in
867
868
869
870
871
872
873
874
875
876
877
      let fv = match a.fun_name with
	| None -> fv0
	| Some f -> Fv.remove f fv0 in
      let e = Typed.Abstraction 
		{ Typed.fun_name = a.fun_name;
		  Typed.fun_iface = iface;
		  Typed.fun_body = body;
		  Typed.fun_typ = t;
		  Typed.fun_fv = fv
		} in
      exp loc fv e
878
  | (Integer _ | Char _ | Atom _ | Const _) as c -> 
879
      exp loc Fv.empty (Typed.Cst (const env loc c))
880
  | Pair (e1,e2) ->
881
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
882
883
      exp loc (Fv.cup fv1 fv2) (Typed.Pair (e1,e2))
  | Xml (e1,e2) ->
884
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
885
886
      exp loc (Fv.cup fv1 fv2) (Typed.Xml (e1,e2))
  | Dot (e,l) ->
887
888
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.Dot (e,parse_label env loc l))
889
  | RemoveField (e,l) ->
890
891
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.RemoveField (e,parse_label env loc l))
892
893
  | RecordLitt r -> 
      let fv = ref Fv.empty in
894
      let r = parse_record env loc
895
		(fun e -> 
896
		   let (fv2,e) = expr env loc e 
897
898
899
		   in fv := Fv.cup !fv fv2; e)
		r in
      exp loc !fv (Typed.RecordLitt r)
900
  | String (i,j,s,e) ->
901
      let (fv,e) = expr env loc e in
902
      exp loc fv (Typed.String (i,j,s,e))
903
  | Op (op,le) ->
904
      let (fvs,ltes) = List.split (List.map (expr env loc) le) in
905
      let fv = List.fold_left Fv.cup Fv.empty fvs in
906
      (try
907
908
909
	 (match ltes with
	    | [e] -> exp loc fv (Typed.UnaryOp (!mk_unary_op op env, e))
	    | [e1;e2] -> exp loc fv (Typed.BinaryOp (!mk_binary_op op env, e1,e2))
910
911
912
	    | _ -> assert false)
       with Not_found -> assert false)

913
  | Match (e,b) -> 
914
915
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
916
      exp loc (Fv.cup fv1 fv2) (Typed.Match (e, b))
917
  | Map (e,b) ->
918
919
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
920
921
      exp loc (Fv.cup fv1 fv2) (Typed.Map (e, b))
  | Transform (e,b) ->
922
923
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
924
      exp loc (Fv.cup fv1 fv2) (Typed.Transform (e, b))
925
  | Xtrans (e,b) ->
926
927
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
928
      exp loc (Fv.cup fv1 fv2) (Typed.Xtrans (e, b))
929
  | Validate (e,kind,schema,elt) ->
930
      let (fv,e) = expr env loc e in
931
      exp loc fv (Typed.Validate (e, kind, schema, elt))
932
  | Try (e,b) ->
933
934
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
935
      exp loc (Fv.cup fv1 fv2) (Typed.Try (e, b))
936
  | NamespaceIn (pr,ns,e) ->
937
938
      let env = enter_ns pr ns env in
      expr env loc e
939
  | Ref (e,t) ->
940
      let (fv,e) = expr env loc e and t = typ env t in
941
      exp loc fv (Typed.Ref (e,t))
942
  | External (s,args) ->
943
      let args = List.map (typ env) args in
944
      let (i,t) = Externals.resolve s args in
945
      exp loc Fv.empty (Typed.External (t,i))
946
	      
947
  and branches env b = 
948
    let fv = ref Fv.empty in
949
    let accept = ref Types.empty in
950
    let branch (p,e) = 
951
952
      let cur_br = !cur_branch in
      cur_branch := [];
953
      let (fv2,e) = expr env noloc e in
954
      let br_loc = merge_loc p.loc e.Typed.exp_loc in
955
      let p = pat env p in
956
957
958
959
960
961
      (match Fv.pick (Fv.diff (Patterns.fv p) fv2) with
	| None -> ()
	| Some x ->
	    let x = U.to_string (Id.value x) in
	    warning br_loc 
	      ("The capture variable " ^ x ^ 
962
	       " is declared in the pattern but not used in the body of this branch. It might be a misspelled or undeclared type or name (if it isn't, use _ instead)."));
963
964
965
966
967
968
969
970
971
      let fv2 = Fv.diff fv2 (Patterns.fv p) in
      fv := Fv.cup !fv fv2;
      accept := Types.cup !accept (Types.descr (Patterns.accept p));
      let br = 
	{ 
	  Typed.br_loc = br_loc;
	  Typed.br_used = br_loc = noloc;
	  Typed.br_pat = p;
	  Typed.br_body = e } in
972
      cur_branch := Branch (br, !cur_branch) :: cur_br;
973
974
      br in
    let b = List.map branch b in
975
976
977
978
    (!fv, 
     { 
       Typed.br_typ = Types.empty; 
       Typed.br_branches = b; 
Pietro Abate's avatar