typer.ml 54 KB
Newer Older
1
(* TODO:
2
 - rewrite type-checking of operators to propagate constraint
3
4
 - optimize computation of pattern free variables
 - check whether it is worth using recursive hash-consing internally
5
6
*)

7
8
9
open Location
open Ast
open Ident
10

11
12
let debug_schema = false

13
let warning loc msg =
14
  Format.fprintf !Location.warning_ppf "Warning %a:@\n%a%s@." 
15
16
    Location.print_loc (loc,`Full)
    Location.html_hilight (loc,`Full)
17
18
    msg

19
20
21
22
23
24
25
26
27
exception NonExhaustive of Types.descr
exception Constraint of Types.descr * Types.descr
exception ShouldHave of Types.descr * string
exception ShouldHave2 of Types.descr * string * Types.descr
exception WrongLabel of Types.descr * label
exception UnboundId of id * bool
exception UnboundExtId of Types.CompUnit.t * id
exception Error of string

28
29
30

exception Warning of string * Types.t

31
32
33
34
let raise_loc loc exn = raise (Location (loc,`Full,exn))
let raise_loc_str loc ofs exn = raise (Location (loc,`Char ofs,exn))
let error loc msg = raise_loc loc (Error msg)

35
36
type item =
  | Type of Types.t
37
  | Val of Types.t
38

39
40
module UEnv = Map.Make(U)

41
type t = {
42
  ids : item Env.t;
43
  ns: Ns.table;
44
  cu: Types.CompUnit.t UEnv.t;
45
  schemas: string UEnv.t
46
}
47

48
49
50
51
52
let hash _ = failwith "Typer.hash"
let compare _ _ = failwith "Typer.compare"
let dump ppf _ = failwith "Typer.dump"
let equal _ _ = failwith "Typer.equal"
let check _ = failwith "Typer.check"
53
54

(* TODO: filter out builtin defs ? *)
55
56
57
58
let serialize_item s = function
  | Type t -> Serialize.Put.bits 1 s 0; Types.serialize s t
  | Val t -> Serialize.Put.bits 1 s 1; Types.serialize s t

59
let serialize s env =
60
  Serialize.Put.env Id.serialize serialize_item Env.iter s env.ids;
61
  Ns.serialize_table s env.ns
62

63
64
65
66
67
let deserialize_item s = match Serialize.Get.bits 1 s with
  | 0 -> Type (Types.deserialize s)
  | 1 -> Val (Types.deserialize s)
  | _ -> assert false

68
let deserialize s =
69
  let ids = Serialize.Get.env Id.deserialize deserialize_item Env.add Env.empty s in
70
  let ns = Ns.deserialize_table s in
71
  { ids = ids; ns = ns; cu = UEnv.empty; schemas = UEnv.empty }
72
73


74
75
let empty_env = {
  ids = Env.empty;
76
  ns = Ns.empty_table;
77
  cu = UEnv.empty;
78
  schemas = UEnv.empty
79
80
}

81
82
let from_comp_unit = ref (fun cu -> assert false)

83
let enter_cu x cu env =
84
  { env with cu = UEnv.add x cu env.cu }
85

86
87
88
let find_cu x env =
  try UEnv.find x env.cu
  with Not_found -> Types.CompUnit.mk x
89
90


91
92
93
94
95
96
let enter_schema x uri env =
  { env with schemas = UEnv.add x uri env.schemas }
let find_schema x env =
  try UEnv.find x env.schemas
  with Not_found -> raise (Error (Printf.sprintf "%s: no such schema" (U.get_str x)))

97
98
99
100
101
102
103
104
let enter_type id t env =
  { env with ids = Env.add id (Type t) env.ids }
let enter_types l env =
  { env with ids = 
      List.fold_left (fun accu (id,t) -> Env.add id (Type t) accu) env.ids l }
let find_type id env =
  match Env.find id env.ids with
    | Type t -> t
105
    | Val _ -> raise Not_found
106

107
let find_type_global loc cu id env =
108
  let cu = find_cu cu env in
109
110
111
  let env = !from_comp_unit cu in
  find_type id env

112
let enter_value id t env = 
113
  { env with ids = Env.add id (Val t) env.ids }
114
115
let enter_values l env =
  { env with ids = 
116
      List.fold_left (fun accu (id,t) -> Env.add id (Val t) accu) env.ids l }
117
118
119
let enter_values_dummy l env =
  { env with ids = 
      List.fold_left (fun accu id -> Env.add id (Val Types.empty) accu) env.ids l }
120
121
let find_value id env =
  match Env.find id env.ids with
122
    | Val t -> t
123
    | _ -> raise Not_found
124
125
126
let find_value_global cu id env =
  let env = !from_comp_unit cu in
  find_value id env
127
	
128
129
130
131
132
133
let value_name_ok id env =
  try match Env.find id env.ids with
    | Val t -> true
    | _ -> false
  with Not_found -> true

134
135
136
137
let iter_values env f =
  Env.iter (fun x ->
	      function Val t -> f x t;
		| _ -> ()) env.ids
138

139

140
141
142
143
144
145
146
147
148
let register_types cu env =
  let prefix = U.concat (Types.CompUnit.value cu) (U.mk ":") in
  Env.iter (fun x ->
	      function 
		| Type t ->
		    let n = U.concat prefix (Id.value x) in
		    Types.Print.register_global n t
		| _ -> ()) env.ids

149

150
(* Namespaces *)
151

152
let set_ns_table_for_printer env = 
153
  Ns.InternalPrinter.set_table env.ns
154

155
let get_ns_table tenv = tenv.ns
156

157
let enter_ns p ns env =
158
  { env with ns = Ns.add_prefix p ns env.ns }
159

160
161
162
163
164
let protect_error_ns loc f x =
  try f x
  with Ns.UnknownPrefix ns ->
    raise_loc_generic loc 
    ("Undefined namespace prefix " ^ (U.to_string ns))
165

166
let parse_atom env loc t =
167
  let (ns,l) = protect_error_ns loc (Ns.map_tag env.ns) t in
168
169
170
  Atoms.V.mk ns l
 
let parse_ns env loc ns =
171
  protect_error_ns loc (Ns.map_prefix env.ns) ns
172

173
let parse_label env loc t =
174
  let (ns,l) = protect_error_ns loc (Ns.map_attr env.ns) t in
175
  LabelPool.mk (ns,l)
176

177
178
179
180
181
182
183
184
185
186
187
188
189
let parse_record env loc f r =
  let r = List.map (fun (l,x) -> (parse_label env loc l, f x)) r in
  LabelMap.from_list (fun _ _ -> raise_loc_generic loc "Duplicated record field") r

let rec const env loc = function
  | LocatedExpr (loc,e) -> const env loc e
  | Pair (x,y) -> Types.Pair (const env loc x, const env loc y)
  | Xml (x,y) -> Types.Xml (const env loc x, const env loc y)
  | RecordLitt x -> Types.Record (parse_record env loc (const env loc) x)
  | String (i,j,s,c) -> Types.String (i,j,s,const env loc c)
  | Atom t -> Types.Atom (parse_atom env loc t)
  | Integer i -> Types.Integer i
  | Char c -> Types.Char c
190
  | Const c -> c
191
192
193
194
  | _ -> raise_loc_generic loc "This should be a scalar or structured constant"

(* I. Transform the abstract syntax of types and patterns into
      the internal form *)
195

196

197
(* Schema *)
198

199
200
201
let is_registered_schema env s = UEnv.mem s env.schemas

(* uri -> schema binding *)
202
let schemas = State.ref "Typer.schemas" (Hashtbl.create 3)
203
204
205

let schema_types = State.ref "Typer.schema_types" (Hashtbl.create 51)
let schema_elements = State.ref "Typer.schema_elements" (Hashtbl.create 51)
206
let schema_attributes = State.ref "Typer.schema_attributes" (Hashtbl.create 51)
207
208
209
210
let schema_attribute_groups =
  State.ref "Typer.schema_attribute_groups" (Hashtbl.create 51)
let schema_model_groups =
  State.ref "Typer.schema_model_groups" (Hashtbl.create 51)
211

212
213


214
215
  (* raise Not_found *)

216
217
218
219

let get_schema_fwd = ref (fun _ -> assert false)

let find_schema_descr_uri kind uri name =
220
  try
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
    ignore (!get_schema_fwd uri);
    let elt () = Hashtbl.find !schema_elements (uri, name) in
    let typ () = Hashtbl.find !schema_types (uri, name) in
    let att () = Hashtbl.find !schema_attributes (uri, name) in
    let att_group () = Hashtbl.find !schema_attribute_groups (uri, name) in
    let mod_group () = Hashtbl.find !schema_model_groups (uri, name) in
    let rec do_try n = function
      | [] -> raise Not_found
      | f :: rem -> (try f () with Not_found -> do_try n rem)
    in
    match kind with
      | Some `Element -> do_try "element" [ elt ]
      | Some `Type -> do_try "type" [ typ ]
      | Some `Attribute -> do_try "atttribute" [ att ]
      | Some `Attribute_group -> do_try "attribute group" [ att_group ]
      | Some `Model_group -> do_try "model group" [ mod_group ]
      | None ->
          (* policy for unqualified schema component resolution. This order should
           * be consistent with Schema_component.get_component *)
          do_try "component" [ elt; typ; att; att_group; mod_group ]
    with Not_found ->    
242
      raise (Error (Printf.sprintf "No %s named '%s' found in schema '%s'"
243
244
245
246
247
248
		      (Schema_common.string_of_component_kind kind) (U.get_str name) uri))

let find_schema_descr env kind schema name =
  let uri = find_schema schema env in
  find_schema_descr_uri kind uri name

249

250
251
252
253
254
255
256
257
(* Eliminate Recursion, propagate Sequence Capture Variables *)

let rec seq_vars accu = function
  | Epsilon | Elem _ -> accu
  | Seq (r1,r2) | Alt (r1,r2) -> seq_vars (seq_vars accu r1) r2
  | Star r | WeakStar r -> seq_vars accu r
  | SeqCapture (v,r) -> seq_vars (IdSet.add v accu) r

258
259
260
261
262
263
264
265
266
267
268
269
270
271
(* We use two intermediate representation from AST types/patterns
   to internal ones:

      AST -(1)-> derecurs -(2)-> slot -(3)-> internal

   (1) eliminate recursion, schema, 
       propagate sequence capture variables, keep regexps

   (2) stratify, detect ill-formed recursion, compile regexps

   (3) check additional constraints on types / patterns;
       deep (recursive) hash-consing
*)     

272
273
274
275
type derecurs_slot = {
  ploc : Location.loc;
  pid  : int;
  mutable ploop : bool;
276
  mutable pdescr : derecurs;
277
} and derecurs =
278
  | PDummy
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
  | PAlias of derecurs_slot
  | PType of Types.descr
  | POr of derecurs * derecurs
  | PAnd of derecurs * derecurs
  | PDiff of derecurs * derecurs
  | PTimes of derecurs * derecurs
  | PXml of derecurs * derecurs
  | PArrow of derecurs * derecurs
  | POptional of derecurs
  | PRecord of bool * derecurs label_map
  | PCapture of id
  | PConstant of id * Types.const
  | PRegexp of derecurs_regexp * derecurs
and derecurs_regexp =
  | PEpsilon
  | PElem of derecurs
  | PSeq of derecurs_regexp * derecurs_regexp
  | PAlt of derecurs_regexp * derecurs_regexp
  | PStar of derecurs_regexp
  | PWeakStar of derecurs_regexp

300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319

type descr = 
  | IDummy
  | IType of Types.descr
  | IOr of descr * descr
  | IAnd of descr * descr
  | IDiff of descr * descr
  | ITimes of slot * slot
  | IXml of slot * slot
  | IArrow of slot * slot
  | IOptional of descr
  | IRecord of bool * slot label_map
  | ICapture of id
  | IConstant of id * Types.const
and slot = {
  mutable fv : fv option;
  mutable hash : int option;
  mutable rank1: int; mutable rank2: int;
  mutable gen1 : int; mutable gen2: int;
  mutable d    : descr;
320
}
321
322
323
324
325
326
327
328
329
330
331
332
333
    

let counter = ref 0
let mk_derecurs_slot loc = 
  incr counter; 
  { ploop = false; ploc = loc; pid = !counter; pdescr = PDummy }
	  
let mk_slot () = 
  { d=IDummy; fv=None; hash=None; rank1=0; rank2=0; gen1=0; gen2=0 } 


(* This environment is used in phase (1) to eliminate recursion *)
type penv = {
334
  penv_tenv : t;
335
336
337
338
  penv_derec : derecurs_slot Env.t;
}

let penv tenv = { penv_tenv = tenv; penv_derec = Env.empty }
339

340
let rec hash_derecurs = function
341
  | PDummy -> assert false
342
343
344
  | PAlias s -> 
      s.pid
  | PType t -> 
345
      1 + 17 * (Types.hash t)
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
  | POr (p1,p2) -> 
      2 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PAnd (p1,p2) -> 
      3 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PDiff (p1,p2) -> 
      4 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PTimes (p1,p2) -> 
      5 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PXml (p1,p2) -> 
      6 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PArrow (p1,p2) -> 
      7 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | POptional p -> 
      8 + 17 * (hash_derecurs p)
  | PRecord (o,r) -> 
      (if o then 9 else 10) + 17 * (LabelMap.hash hash_derecurs r)
  | PCapture x -> 
      11 + 17 * (Id.hash x)
  | PConstant (x,c) -> 
365
      12 + 17 * (Id.hash x) + 257 * (Types.Const.hash c)
366
367
  | PRegexp (p,q) -> 
      13 + 17 * (hash_derecurs_regexp p) + 257 * (hash_derecurs q)
368
and hash_derecurs_regexp = function
369
370
371
372
373
374
375
376
377
378
379
380
  | PEpsilon -> 
      1
  | PElem p -> 
      2 + 17 * (hash_derecurs p)
  | PSeq (p1,p2) -> 
      3 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PAlt (p1,p2) -> 
      4 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PStar p -> 
      5 + 17 * (hash_derecurs_regexp p)
  | PWeakStar p -> 
      6 + 17 * (hash_derecurs_regexp p)
381
382

let rec equal_derecurs p1 p2 = (p1 == p2) || match p1,p2 with
383
384
385
  | PAlias s1, PAlias s2 -> 
      s1 == s2
  | PType t1, PType t2 -> 
386
      Types.equal t1 t2
387
388
389
390
391
  | POr (p1,q1), POr (p2,q2)
  | PAnd (p1,q1), PAnd (p2,q2)
  | PDiff (p1,q1), PDiff (p2,q2)
  | PTimes (p1,q1), PTimes (p2,q2)
  | PXml (p1,q1), PXml (p2,q2)
392
393
394
395
396
397
398
399
400
  | PArrow (p1,q1), PArrow (p2,q2) -> 
      (equal_derecurs p1 p2) && (equal_derecurs q1 q2)
  | POptional p1, POptional p2 -> 
      equal_derecurs p1 p2
  | PRecord (o1,r1), PRecord (o2,r2) -> 
      (o1 == o2) && (LabelMap.equal equal_derecurs r1 r2)
  | PCapture x1, PCapture x2 -> 
      Id.equal x1 x2
  | PConstant (x1,c1), PConstant (x2,c2) -> 
401
      (Id.equal x1 x2) && (Types.Const.equal c1 c2)
402
403
  | PRegexp (p1,q1), PRegexp (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
404
405
  | _ -> false
and equal_derecurs_regexp r1 r2 = match r1,r2 with
406
407
408
409
  | PEpsilon, PEpsilon -> 
      true
  | PElem p1, PElem p2 -> 
      equal_derecurs p1 p2
410
  | PSeq (p1,q1), PSeq (p2,q2) 
411
412
  | PAlt (p1,q1), PAlt (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs_regexp q1 q2)
413
  | PStar p1, PStar p2
414
415
  | PWeakStar p1, PWeakStar p2 -> 
      equal_derecurs_regexp p1 p2
416
  | _ -> false
417

418
419
420
421
422
423
424
425
426
427
428
module DerecursTable = Hashtbl.Make(
  struct 
    type t = derecurs 
    let hash = hash_derecurs
    let equal = equal_derecurs
  end
)

module RE = Hashtbl.Make(
  struct 
    type t = derecurs_regexp * derecurs 
429
430
431
432
    let hash (p,q) = 
      (hash_derecurs_regexp p) + 17 * (hash_derecurs q)
    let equal (p1,q1) (p2,q2) = 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
433
434
  end
)
435

436
437
438
439
let gen = ref 0
let rank = ref 0
	     
let rec hash_descr = function
440
  | IDummy -> assert false
441
  | IType x -> Types.hash x
442
443
444
445
446
447
448
449
450
  | IOr (d1,d2) -> 1 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IAnd (d1,d2) -> 2 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IDiff (d1,d2) -> 3 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IOptional d -> 4 + 17 * (hash_descr d)
  | ITimes (s1,s2) -> 5 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IXml (s1,s2) -> 6 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IArrow (s1,s2) -> 7 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IRecord (o,r) -> (if o then 8 else 9) + 17 * (LabelMap.hash hash_slot r)
  | ICapture x -> 10 + 17 * (Id.hash x)
451
  | IConstant (x,y) -> 11 + 17 * (Id.hash x) + 257 * (Types.Const.hash y)
452
453
454
455
456
and hash_slot s =
  if s.gen1 = !gen then 13 * s.rank1
  else (
    incr rank;
    s.rank1 <- !rank; s.gen1 <- !gen;
457
    hash_descr s.d
458
459
460
461
  )
    
let rec equal_descr d1 d2 = 
  match (d1,d2) with
462
  | IType x1, IType x2 -> Types.equal x1 x2
463
464
465
466
467
468
469
  | IOr (x1,y1), IOr (x2,y2) 
  | IAnd (x1,y1), IAnd (x2,y2) 
  | IDiff (x1,y1), IDiff (x2,y2) -> (equal_descr x1 x2) && (equal_descr y1 y2)
  | IOptional x1, IOptional x2 -> equal_descr x1 x2
  | ITimes (x1,y1), ITimes (x2,y2) 
  | IXml (x1,y1), IXml (x2,y2) 
  | IArrow (x1,y1), IArrow (x2,y2) -> (equal_slot x1 x2) && (equal_slot y1 y2)
470
471
  | IRecord (o1,r1), IRecord (o2,r2) -> 
      (o1 = o2) && (LabelMap.equal equal_slot r1 r2)
472
  | ICapture x1, ICapture x2 -> Id.equal x1 x2
473
  | IConstant (x1,y1), IConstant (x2,y2) -> 
474
      (Id.equal x1 x2) && (Types.Const.equal y1 y2)
475
476
477
478
479
480
481
482
  | _ -> false
and equal_slot s1 s2 =
  ((s1.gen1 = !gen) && (s2.gen2 = !gen) && (s1.rank1 = s2.rank2))
  ||
  ((s1.gen1 <> !gen) && (s2.gen2 <> !gen) && (
     incr rank;
     s1.rank1 <- !rank; s1.gen1 <- !gen;
     s2.rank2 <- !rank; s2.gen2 <- !gen;
483
     equal_descr s1.d s2.d
484
485
   ))
  
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
module SlotTable = Hashtbl.Make(
  struct
    type t = slot
	
    let hash s =
      match s.hash with
	| Some h -> h
	| None ->
	    incr gen; rank := 0; 
	    let h = hash_slot s in
	    s.hash <- Some h;
	    h
	      
    let equal s1 s2 = 
      (s1 == s2) || 
      (incr gen; rank := 0; 
       let e = equal_slot s1 s2 in
       (*     if e then Printf.eprintf "Recursive hash-consing: Equal\n";  *)
       e)
  end)


let rec derecurs env p = match p.descr with
509
  | PatVar v -> derecurs_var env p.loc v
510
  | SchemaVar (kind, schema_name, component_name) ->
511
      PType (find_schema_descr env.penv_tenv kind schema_name component_name)
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
  | Recurs (p,b) -> derecurs (derecurs_def env b) p
  | Internal t -> PType t
  | NsT ns -> PType (Types.atom (Atoms.any_in_ns (parse_ns env.penv_tenv p.loc ns)))
  | Or (p1,p2) -> POr (derecurs env p1, derecurs env p2)
  | And (p1,p2) -> PAnd (derecurs env p1, derecurs env p2)
  | Diff (p1,p2) -> PDiff (derecurs env p1, derecurs env p2)
  | Prod (p1,p2) -> PTimes (derecurs env p1, derecurs env p2)
  | XmlT (p1,p2) -> PXml (derecurs env p1, derecurs env p2)
  | Arrow (p1,p2) -> PArrow (derecurs env p1, derecurs env p2)
  | Optional p -> POptional (derecurs env p)
  | Record (o,r) -> PRecord (o, parse_record env.penv_tenv p.loc (derecurs env) r)
  | Constant (x,c) -> PConstant (x,const env.penv_tenv p.loc c)
  | Cst c -> PType (Types.constant (const env.penv_tenv p.loc c))
  | Regexp (r,q) -> 
      let constant_nil t v = 
	PAnd (t, PConstant (v, Types.Atom Sequence.nil_atom)) in
      let vars = seq_vars IdSet.empty r in
      let q = IdSet.fold constant_nil (derecurs env q) vars in
      let r = derecurs_regexp (fun p -> p) env r in
      PRegexp (r, q)
and derecurs_regexp vars env = function
  | Epsilon -> 
      PEpsilon
  | Elem p -> 
      PElem (vars (derecurs env p))
  | Seq (p1,p2) -> 
      PSeq (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Alt (p1,p2) -> 
      PAlt (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Star p -> 
      PStar (derecurs_regexp vars env p)
  | WeakStar p -> 
      PWeakStar (derecurs_regexp vars env p)
  | SeqCapture (x,p) -> 
      derecurs_regexp (fun p -> PAnd (vars p, PCapture x)) env p

548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
and derecurs_var env loc v =
  match Ns.split_qname v with
    | "", v ->
	let v = ident v in
	(try PAlias (Env.find v env.penv_derec)
	 with Not_found -> 
	   try PType (find_type v env.penv_tenv)
	   with Not_found -> PCapture v)
    | cu, v -> 
	try 
	  let cu = U.mk cu in
	  PType (find_type_global loc cu (ident v) env.penv_tenv)
	with Not_found ->
	  raise_loc_generic loc 
	  ("Unbound external type " ^ cu ^ ":" ^ (U.to_string v))


565
566
567
568
569
570
571
572
573

and derecurs_def env b =
  let b = List.map (fun (v,p) -> (v,p,mk_derecurs_slot p.loc)) b in
  let n = 
    List.fold_left (fun env (v,p,s) -> Env.add v s env) env.penv_derec b in
  let env = { env with penv_derec = n } in
  List.iter (fun (v,p,s) -> s.pdescr <- derecurs env p) b;
  env

574
575
576
577
578
let rec fv_slot s =
  match s.fv with
    | Some x -> x
    | None ->
	if s.gen1 = !gen then IdSet.empty 
579
	else (s.gen1 <- !gen; fv_descr s.d)
580
and fv_descr = function
581
  | IDummy -> assert false
582
  | IType _ -> IdSet.empty
583
584
585
586
587
588
589
  | IOr (d1,d2)
  | IAnd (d1,d2)  
  | IDiff (d1,d2) -> IdSet.cup (fv_descr d1) (fv_descr d2)
  | IOptional d -> fv_descr d
  | ITimes (s1,s2)  
  | IXml (s1,s2)  
  | IArrow (s1,s2) -> IdSet.cup (fv_slot s1) (fv_slot s2)
590
591
  | IRecord (o,r) -> 
      List.fold_left IdSet.cup IdSet.empty (LabelMap.map_to_list fv_slot r)
592
  | ICapture x | IConstant (x,_) -> IdSet.singleton x
593

594
595
596
597
598
599
600
601
let compute_fv s =
  match s.fv with
    | Some x -> ()
    | None ->
	incr gen;
	let x = fv_slot s in
	s.fv <- Some x
	  
602
603
604
let check_no_capture loc s =
  match IdSet.pick s with
    | Some x ->  
605
	raise_loc_generic loc ("Capture variable not allowed: " ^ (Ident.to_string x))
606
    | None -> ()
607
    
608
609
610
let compile_slot_hash = DerecursTable.create 67
let compile_hash = DerecursTable.create 67

611
612
let todo_defs = ref []
let todo_fv = ref []
613
614
615
616
617
618
619
620

let rec compile p =
  try DerecursTable.find compile_hash p
  with Not_found ->
    let c = real_compile p in
    DerecursTable.replace compile_hash p c;
    c
and real_compile = function
621
  | PDummy -> assert false
622
623
624
625
  | PAlias v ->
      if v.ploop then
	raise_loc_generic v.ploc ("Unguarded recursion on type/pattern");
      v.ploop <- true;
626
      let r = compile v.pdescr in
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
      v.ploop <- false;
      r
  | PType t -> IType t
  | POr (t1,t2) -> IOr (compile t1, compile t2)
  | PAnd (t1,t2) -> IAnd (compile t1, compile t2)
  | PDiff (t1,t2) -> IDiff (compile t1, compile t2)
  | PTimes (t1,t2) -> ITimes (compile_slot t1, compile_slot t2)
  | PXml (t1,t2) -> IXml (compile_slot t1, compile_slot t2)
  | PArrow (t1,t2) -> IArrow (compile_slot t1, compile_slot t2)
  | POptional t -> IOptional (compile t)
  | PRecord (o,r) ->  IRecord (o, LabelMap.map compile_slot r)
  | PConstant (x,v) -> IConstant (x,v)
  | PCapture x -> ICapture x
  | PRegexp (r,q) -> compile_regexp r q
and compile_regexp r q =
  let memo = RE.create 17 in
643
644
645
  let add accu i = 
    match accu with None -> Some i | Some j -> Some (IOr (j,i)) in
  let get = function Some x -> x | None -> assert false in
646
  let rec queue accu = function
647
648
649
    | PRegexp (r,q) -> aux accu r q 
    | _ -> add accu (compile q)
  and aux accu r q =
650
651
652
653
    if RE.mem memo (r,q) then accu
    else (
      RE.add memo (r,q) ();
      match r with
654
	| PEpsilon -> queue accu q
655
656
657
658
659
660
661
662
663
664
665
	| PElem p ->
(* Be careful not to create pairs with same second component *)
	    let rec extract = function
	      | PConstant (x,v) -> `Const (x,v)
	      | POr (x,y) ->
		  (match extract x, extract y with
		    | `Pat x, `Pat y -> `Pat (POr (x,y))
		    | x, y -> `Or (x,y))
	      | p -> `Pat p
	    in
	    let rec mk accu = function
666
667
668
669
	      | `Const (x,v) -> 
		  (match queue None q with 
		    | Some q -> add accu (IAnd (IConstant (x,v), q))
		    | None -> accu)
670
	      | `Or (x,y) -> mk (mk accu x) y
671
672
	      | `Pat p -> 
		  add accu (ITimes (compile_slot p, compile_slot q))
673
674
	    in
	    mk accu (extract p)
675
676
677
678
679
680
	| PSeq (r1,r2) -> aux accu r1 (PRegexp (r2,q))
	| PAlt (r1,r2) -> aux (aux accu r1 q) r2 q
	| PStar r1 -> aux (aux accu r1 (PRegexp (r,q))) PEpsilon q
	| PWeakStar r1 -> aux (aux accu PEpsilon q) r1 (PRegexp (r,q))
    )
  in
681
  get (aux None r q)
682
683
and compile_slot p =
  try DerecursTable.find compile_slot_hash p
684
  with Not_found ->
685
686
687
    let s = mk_slot () in
    todo_defs := (s,p) :: !todo_defs;
    todo_fv := s :: !todo_fv;
688
    DerecursTable.add compile_slot_hash p s;
689
    s
690

691
      
692
let timer_fv = Stats.Timer.create "Typer.fv"
693
let rec flush_defs () = 
694
695
696
697
  match !todo_defs with
    | [] -> 
	Stats.Timer.start timer_fv;
	List.iter compute_fv !todo_fv;
698
699
	todo_fv := [];
	Stats.Timer.stop timer_fv ()
700
701
702
703
    | (s,p)::t -> 
	todo_defs := t; 
	s.d <- compile p; 
	flush_defs ()
704
705
706
707
708
709
710
711
712
713
714
715
716
717
	
let typ_nodes = SlotTable.create 67
let pat_nodes = SlotTable.create 67
		  
let rec typ = function
  | IType t -> t
  | IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
  | IAnd (s1,s2) ->  Types.cap (typ s1) (typ s2)
  | IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
  | ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
  | IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
  | IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
  | IOptional s -> Types.Record.or_absent (typ s)
  | IRecord (o,r) -> Types.record' (o, LabelMap.map typ_node r)
718
  | IDummy | ICapture _ | IConstant (_,_) -> assert false
719
      
720
and typ_node s : Types.Node.t =
721
722
723
724
  try SlotTable.find typ_nodes s
  with Not_found ->
    let x = Types.make () in
    SlotTable.add typ_nodes s x;
725
    Types.define x (typ s.d);
726
727
728
729
730
731
732
733
    x
      
let rec pat d : Patterns.descr =
  if IdSet.is_empty (fv_descr d)
  then Patterns.constr (typ d)
  else pat_aux d
    
and pat_aux = function
734
  | IDummy -> assert false
735
736
737
738
739
740
  | IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
  | IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
  | IDiff (s1,s2) when IdSet.is_empty (fv_descr s2) ->
      let s2 = Types.neg (typ s2) in
      Patterns.cap (pat s1) (Patterns.constr s2)
  | IDiff _ ->
741
      raise (Patterns.Error "Differences are not allowed in patterns")
742
743
744
  | ITimes (s1,s2) -> Patterns.times (pat_node s1) (pat_node s2)
  | IXml (s1,s2) -> Patterns.xml (pat_node s1) (pat_node s2)
  | IOptional _ -> 
745
      raise (Patterns.Error "Optional fields are not allowed in record patterns")
746
747
748
749
750
751
752
753
754
755
756
757
758
759
  | IRecord (o,r) ->
      let pats = ref [] in
      let aux l s = 
	if IdSet.is_empty (fv_slot s) then typ_node s
	else
	  ( pats := Patterns.record l (pat_node s) :: !pats;
	    Types.any_node )
      in
      let constr = Types.record' (o,LabelMap.mapi aux r) in
      List.fold_left Patterns.cap (Patterns.constr constr) !pats
	(* TODO: can avoid constr when o=true, and all fields have fv *)
  | ICapture x -> Patterns.capture x
  | IConstant (x,c) -> Patterns.constant x c
  | IArrow _ ->
760
      raise (Patterns.Error "Arrows are not allowed in patterns")
761
762
763
764
765
766
  | IType _ -> assert false
      
and pat_node s : Patterns.node =
  try SlotTable.find pat_nodes s
  with Not_found ->
    let x = Patterns.make (fv_slot s) in
767
768
    try
      SlotTable.add pat_nodes s x;
769
      Patterns.define x (pat s.d);
770
771
772
      x
    with exn -> SlotTable.remove pat_nodes s; raise exn
      (* For the toplevel ... *)
773

774

775
module Ids = Set.Make(Id)
776
let type_defs env b =
777
778
779
780
781
782
783
784
785
786
  ignore 
    (List.fold_left 
       (fun seen (v,p) ->
	  if Ids.mem v seen then 
	    raise_loc_generic p.loc 
	      ("Multiple definitions for the type identifer " ^ 
	       (Ident.to_string v));
	  Ids.add v seen
       ) Ids.empty b);

787
788
  let penv = derecurs_def (penv env) b in
  let b = List.map (fun (v,p) -> (v,p,compile (derecurs penv p))) b in
789
790
791
792
  flush_defs ();
  let b = 
    List.map 
      (fun (v,p,s) -> 
793
	 check_no_capture p.loc (fv_descr s);
794
795
796
	 let t = typ s in
	 if (p.loc <> noloc) && (Types.is_empty t) then
	   warning p.loc 
797
	     ("This definition yields an empty type for " ^ (Ident.to_string v));
798
	 (v,t)) b in
799
  List.iter (fun (v,t) -> Types.Print.register_global (Id.value v) t) b;
800
  b
801
802


803
804
805
806
807
let dump_types ppf env =
  Env.iter (fun v -> 
	      function 
		  (Type _) -> Format.fprintf ppf " %a" Ident.print v
		| _ -> ()) env.ids
808
809
let dump_type ppf env name =
  try
810
    (match Env.find (Ident.ident name) env.ids with
811
812
    | Type t -> Types.Print.print ppf t
    | _ -> raise Not_found)
813
814
  with Not_found ->
    raise (Error (Printf.sprintf "Type %s not found" (U.get_str name)))
815

816
817
818
let dump_schema_type ppf env (k, s, n) =
  let uri = find_schema s env in
  let descr = find_schema_descr_uri k uri n in
819
  Types.Print.print ppf descr
820

821
let dump_ns ppf env =
822
  Ns.dump_table ppf env.ns
823

824

825
826
let do_typ loc r = 
  let s = compile_slot r in
827
  flush_defs ();
828
829
  check_no_capture loc (fv_slot s);
  typ_node s
830
   
831
832
let typ env p =
  do_typ p.loc (derecurs (penv env) p)
833
    
834
835
let pat env p = 
  let s = compile_slot (derecurs (penv env) p) in
836
837
838
  flush_defs ();
  try pat_node s
  with Patterns.Error e -> raise_loc_generic p.loc e
839
    | Location (loc,_,exn) when loc = noloc -> raise (Location (p.loc, `Full, exn))
840
841


842
843
(* II. Build skeleton *)

844

845
type type_fun = Types.t -> bool -> Types.t
846
let typ_cst = ref (fun _ -> assert false)
847
848
849
850
let mk_unary_op = ref (fun _ _ -> assert false)
let typ_unary_op = ref (fun _ _ _ -> assert false)
let mk_binary_op = ref (fun _ _ -> assert false)
let typ_binary_op = ref (fun _ _ _ _ -> assert false)
851
852


853
module Fv = IdSet
854

855
856
857
type branch = Branch of Typed.branch * branch list

let cur_branch : branch list ref = ref []
858

859
let exp loc fv e =
860
861
  fv,
  { Typed.exp_loc = loc;
862
    Typed.exp_typ = Types.empty;
863
    Typed.exp_descr = e;
864
  }
865

866
let ops = Hashtbl.create 13
867
868
let register_op op arity f = Hashtbl.add ops op (arity,f)
let typ_op op = snd (Hashtbl.find ops op)
869

870
871
872
873
874
let is_op env s = 
  if (Env.mem (ident s) env.ids) then None
  else 
    try let s = U.get_str s in Some (s, fst (Hashtbl.find ops s))
    with Not_found -> None
875

876
877
let rec expr env loc = function
  | LocatedExpr (loc,e) -> expr env loc e
878
  | Forget (e,t) ->
879
      let (fv,e) = expr env loc e and t = typ env t in
880
      exp loc fv (Typed.Forget (e,t))
881
  | Var s -> var env loc s
882
  | Apply (e1,e2) -> 
883
884
885
886
887
888
889
890
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
      let fv = Fv.cup fv1 fv2 in
      (match e1.Typed.exp_descr with
	 | Typed.Op (op,arity,args) when arity > 0 -> 
	     exp loc fv (Typed.Op (op,arity - 1,args @ [e2]))
	 | _ ->
	     exp loc fv (Typed.Apply (e1,e2)))
  | Abstraction a -> abstraction env loc a
891
  | (Integer _ | Char _ | Atom _ | Const _) as c -> 
892
      exp loc Fv.empty (Typed.Cst (const env loc c))
893
  | Pair (e1,e2) ->
894
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
895
896
      exp loc (Fv.cup fv1 fv2) (Typed.Pair (e1,e2))
  | Xml (e1,e2) ->
897
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
898
899
      exp loc (Fv.cup fv1 fv2) (Typed.Xml (e1,e2))
  | Dot (e,l) ->
900
901
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.Dot (e,parse_label env loc l))
902
  | RemoveField (e,l) ->
903
904
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.RemoveField (e,parse_label env loc l))
905
906
  | RecordLitt r -> 
      let fv = ref Fv.empty in
907
      let r = parse_record env loc
908
		(fun e -> 
909
		   let (fv2,e) = expr env loc e 
910
911
912
		   in fv := Fv.cup !fv fv2; e)
		r in
      exp loc !fv (Typed.RecordLitt r)
913
  | String (i,j,s,e) ->
914
      let (fv,e) = expr env loc e in
915
      exp loc fv (Typed.String (i,j,s,e))
916
  | Op (op,le) ->
917
      let (fvs,ltes) = List.split (List.map (expr env loc) le) in
918
      let fv = List.fold_left Fv.cup Fv.empty fvs in
919
      (try
920
921
922
	 (match ltes with
	    | [e] -> exp loc fv (Typed.UnaryOp (!mk_unary_op op env, e))
	    | [e1;e2] -> exp loc fv (Typed.BinaryOp (!mk_binary_op op env, e1,e2))
923
924
	    | _ -> assert false)
       with Not_found -> assert false)
925
  | Match (e,b) -> 
926
927
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
928
      exp loc (Fv.cup fv1 fv2) (Typed.Match (e, b))
929
  | Map (e,b) ->
930
931
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
932
933
      exp loc (Fv.cup fv1 fv2) (Typed.Map (e, b))
  | Transform (e,b) ->
934
935
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
936
      exp loc (Fv.cup fv1 fv2) (Typed.Transform (e, b))
937
  | Xtrans (e,b) ->
938
939
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
940
      exp loc (Fv.cup fv1 fv2) (Typed.Xtrans (e, b))
941
  | Validate (e,kind,schema,elt) ->
942
      let (fv,e) = expr env loc e in
943
944
      let uri = find_schema schema env in
      exp loc fv (Typed.Validate (e, kind, uri, elt))
945
  | Try (e,b) ->
946
947
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
948
      exp loc (Fv.cup fv1 fv2) (Typed.Try (e, b))
949
  | NamespaceIn (pr,ns,e) ->
950
951
      let env = enter_ns pr ns env in
      expr env loc e
952
  | Ref (e,t) ->
953
      let (fv,e) = expr env loc e and t = typ env t in
954
      exp loc fv (Typed.Ref (e,t))
955
  | External (s,args) ->
956
      extern loc env s args
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
	
and extern loc env s args = 
  let args = List.map (typ env) args in
  try
    let (i,t) = Externals.resolve s args in
    exp loc Fv.empty (Typed.External (t,i))
  with exn -> raise_loc loc exn
    
and var env loc s =
  match is_op env s with
    | Some (s,arity) -> exp loc Fv.empty (Typed.Op (s, arity, []))
    | None ->
	match Ns.split_qname s with
	  | "", id -> 
	      let s = U.get_str id in
	      if String.contains s '.' then
		extern loc env s []
	      else
		let id = ident id in
		(try ignore (find_value id env)
		 with Not_found -> raise_loc loc (UnboundId (id, Env.mem id env.ids)));
978
	  exp loc (Fv.singleton id) (Typed.Var id)
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
	  | cu, id -> 
	      let cu = find_cu (U.mk cu) env in
	      let id = ident id in
	      let t =
		try find_value_global cu id env
		with Not_found ->
		  raise_loc loc (UnboundExtId (cu,id) ) in
	      exp loc Fv.empty (Typed.ExtVar (cu, id, t))

and abstraction env loc a =
  let iface = 
    List.map 
      (fun (t1,t2) -> (typ env t1, typ env t2)) a.fun_iface in
  let t = 
    List.fold_left 
      (fun accu (t1,t2) -> Types.cap accu (Types.arrow t1 t2)) 
      Types.any iface in
  let iface = 
    List.map 
      (fun (t1,t2) -> (Types.descr t1, Types.descr t2)) 
      iface in
  let env' = 
    match a.fun_name with 
      | None -> env
      | Some f -> enter_values_dummy [ f ] env
  in
  let (fv0,body) = branches env' a.fun_body in
  let fv = match a.fun_name with
    | None -> fv0
    | Some f -> Fv.remove f fv0 in
  let e = Typed.Abstraction 
	    { Typed.fun_name = a.fun_name;
	      Typed.fun_iface = iface;
	      Typed.fun_body = body;
	      Typed.fun_typ = t;
	      Typed.fun_fv = fv
	    } in
  exp loc fv e
    
and branches env b = 
  let fv = ref Fv.empty in
  let accept = ref Types.empty in
  let branch (p,e) = 
    let cur_br = !cur