typer.ml 53 KB
Newer Older
1
(* TODO:
2
 - rewrite type-checking of operators to propagate constraint
3
4
 - optimize computation of pattern free variables
 - check whether it is worth using recursive hash-consing internally
5
6
*)

7
8
9
open Location
open Ast
open Ident
10

11
12
let debug_schema = false

13
let warning loc msg =
14
  Format.fprintf !Location.warning_ppf "Warning %a:@\n%a%s@." 
15
16
    Location.print_loc (loc,`Full)
    Location.html_hilight (loc,`Full)
17
18
    msg

19
20
21
22
23
24
25
26
27
28
29
30
31
exception NonExhaustive of Types.descr
exception Constraint of Types.descr * Types.descr
exception ShouldHave of Types.descr * string
exception ShouldHave2 of Types.descr * string * Types.descr
exception WrongLabel of Types.descr * label
exception UnboundId of id * bool
exception UnboundExtId of Types.CompUnit.t * id
exception Error of string

let raise_loc loc exn = raise (Location (loc,`Full,exn))
let raise_loc_str loc ofs exn = raise (Location (loc,`Char ofs,exn))
let error loc msg = raise_loc loc (Error msg)

32
33
type item =
  | Type of Types.t
34
  | Val of Types.t
35

36
37
module UEnv = Map.Make(U)

38
type t = {
39
  ids : item Env.t;
40
  ns: Ns.table;
41
  cu: Types.CompUnit.t UEnv.t;
42
  schemas: string UEnv.t
43
}
44

45
46
47
48
49
let hash _ = failwith "Typer.hash"
let compare _ _ = failwith "Typer.compare"
let dump ppf _ = failwith "Typer.dump"
let equal _ _ = failwith "Typer.equal"
let check _ = failwith "Typer.check"
50
51

(* TODO: filter out builtin defs ? *)
52
53
54
55
let serialize_item s = function
  | Type t -> Serialize.Put.bits 1 s 0; Types.serialize s t
  | Val t -> Serialize.Put.bits 1 s 1; Types.serialize s t

56
let serialize s env =
57
  Serialize.Put.env Id.serialize serialize_item Env.iter s env.ids;
58
  Ns.serialize_table s env.ns
59

60
61
62
63
64
let deserialize_item s = match Serialize.Get.bits 1 s with
  | 0 -> Type (Types.deserialize s)
  | 1 -> Val (Types.deserialize s)
  | _ -> assert false

65
let deserialize s =
66
  let ids = Serialize.Get.env Id.deserialize deserialize_item Env.add Env.empty s in
67
  let ns = Ns.deserialize_table s in
68
  { ids = ids; ns = ns; cu = UEnv.empty; schemas = UEnv.empty }
69
70


71
72
let empty_env = {
  ids = Env.empty;
73
  ns = Ns.empty_table;
74
  cu = UEnv.empty;
75
  schemas = UEnv.empty
76
77
}

78
79
let from_comp_unit = ref (fun cu -> assert false)

80
let enter_cu x cu env =
81
  { env with cu = UEnv.add x cu env.cu }
82

83
84
85
let find_cu x env =
  try UEnv.find x env.cu
  with Not_found -> Types.CompUnit.mk x
86
87


88
89
90
91
92
93
let enter_schema x uri env =
  { env with schemas = UEnv.add x uri env.schemas }
let find_schema x env =
  try UEnv.find x env.schemas
  with Not_found -> raise (Error (Printf.sprintf "%s: no such schema" (U.get_str x)))

94
95
96
97
98
99
100
101
let enter_type id t env =
  { env with ids = Env.add id (Type t) env.ids }
let enter_types l env =
  { env with ids = 
      List.fold_left (fun accu (id,t) -> Env.add id (Type t) accu) env.ids l }
let find_type id env =
  match Env.find id env.ids with
    | Type t -> t
102
    | Val _ -> raise Not_found
103

104
let find_type_global loc cu id env =
105
  let cu = find_cu cu env in
106
107
108
  let env = !from_comp_unit cu in
  find_type id env

109
let enter_value id t env = 
110
  { env with ids = Env.add id (Val t) env.ids }
111
112
let enter_values l env =
  { env with ids = 
113
      List.fold_left (fun accu (id,t) -> Env.add id (Val t) accu) env.ids l }
114
115
let find_value id env =
  match Env.find id env.ids with
116
    | Val t -> t
117
    | _ -> raise Not_found
118
119
120
let find_value_global cu id env =
  let env = !from_comp_unit cu in
  find_value id env
121
	
122
123
124
125
126
127
let value_name_ok id env =
  try match Env.find id env.ids with
    | Val t -> true
    | _ -> false
  with Not_found -> true

128
129
130
131
let iter_values env f =
  Env.iter (fun x ->
	      function Val t -> f x t;
		| _ -> ()) env.ids
132

133

134
135
136
137
138
139
140
141
142
let register_types cu env =
  let prefix = U.concat (Types.CompUnit.value cu) (U.mk ":") in
  Env.iter (fun x ->
	      function 
		| Type t ->
		    let n = U.concat prefix (Id.value x) in
		    Types.Print.register_global n t
		| _ -> ()) env.ids

143

144
(* Namespaces *)
145

146
let set_ns_table_for_printer env = 
147
  Ns.InternalPrinter.set_table env.ns
148

149
let get_ns_table tenv = tenv.ns
150

151
let enter_ns p ns env =
152
  { env with ns = Ns.add_prefix p ns env.ns }
153

154
155
156
157
158
let protect_error_ns loc f x =
  try f x
  with Ns.UnknownPrefix ns ->
    raise_loc_generic loc 
    ("Undefined namespace prefix " ^ (U.to_string ns))
159

160
let parse_atom env loc t =
161
  let (ns,l) = protect_error_ns loc (Ns.map_tag env.ns) t in
162
163
164
  Atoms.V.mk ns l
 
let parse_ns env loc ns =
165
  protect_error_ns loc (Ns.map_prefix env.ns) ns
166

167
let parse_label env loc t =
168
  let (ns,l) = protect_error_ns loc (Ns.map_attr env.ns) t in
169
  LabelPool.mk (ns,l)
170

171
172
173
174
175
176
177
178
179
180
181
182
183
let parse_record env loc f r =
  let r = List.map (fun (l,x) -> (parse_label env loc l, f x)) r in
  LabelMap.from_list (fun _ _ -> raise_loc_generic loc "Duplicated record field") r

let rec const env loc = function
  | LocatedExpr (loc,e) -> const env loc e
  | Pair (x,y) -> Types.Pair (const env loc x, const env loc y)
  | Xml (x,y) -> Types.Xml (const env loc x, const env loc y)
  | RecordLitt x -> Types.Record (parse_record env loc (const env loc) x)
  | String (i,j,s,c) -> Types.String (i,j,s,const env loc c)
  | Atom t -> Types.Atom (parse_atom env loc t)
  | Integer i -> Types.Integer i
  | Char c -> Types.Char c
184
  | Const c -> c
185
186
187
188
  | _ -> raise_loc_generic loc "This should be a scalar or structured constant"

(* I. Transform the abstract syntax of types and patterns into
      the internal form *)
189

190

191
(* Schema *)
192

193
194
195
let is_registered_schema env s = UEnv.mem s env.schemas

(* uri -> schema binding *)
196
let schemas = State.ref "Typer.schemas" (Hashtbl.create 3)
197
198
199

let schema_types = State.ref "Typer.schema_types" (Hashtbl.create 51)
let schema_elements = State.ref "Typer.schema_elements" (Hashtbl.create 51)
200
let schema_attributes = State.ref "Typer.schema_attributes" (Hashtbl.create 51)
201
202
203
204
let schema_attribute_groups =
  State.ref "Typer.schema_attribute_groups" (Hashtbl.create 51)
let schema_model_groups =
  State.ref "Typer.schema_model_groups" (Hashtbl.create 51)
205

206
207


208
209
  (* raise Not_found *)

210
211
212
213

let get_schema_fwd = ref (fun _ -> assert false)

let find_schema_descr_uri kind uri name =
214
  try
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
    ignore (!get_schema_fwd uri);
    let elt () = Hashtbl.find !schema_elements (uri, name) in
    let typ () = Hashtbl.find !schema_types (uri, name) in
    let att () = Hashtbl.find !schema_attributes (uri, name) in
    let att_group () = Hashtbl.find !schema_attribute_groups (uri, name) in
    let mod_group () = Hashtbl.find !schema_model_groups (uri, name) in
    let rec do_try n = function
      | [] -> raise Not_found
      | f :: rem -> (try f () with Not_found -> do_try n rem)
    in
    match kind with
      | Some `Element -> do_try "element" [ elt ]
      | Some `Type -> do_try "type" [ typ ]
      | Some `Attribute -> do_try "atttribute" [ att ]
      | Some `Attribute_group -> do_try "attribute group" [ att_group ]
      | Some `Model_group -> do_try "model group" [ mod_group ]
      | None ->
          (* policy for unqualified schema component resolution. This order should
           * be consistent with Schema_component.get_component *)
          do_try "component" [ elt; typ; att; att_group; mod_group ]
    with Not_found ->    
236
      raise (Error (Printf.sprintf "No %s named '%s' found in schema '%s'"
237
238
239
240
241
242
		      (Schema_common.string_of_component_kind kind) (U.get_str name) uri))

let find_schema_descr env kind schema name =
  let uri = find_schema schema env in
  find_schema_descr_uri kind uri name

243

244
245
246
247
248
249
250
251
(* Eliminate Recursion, propagate Sequence Capture Variables *)

let rec seq_vars accu = function
  | Epsilon | Elem _ -> accu
  | Seq (r1,r2) | Alt (r1,r2) -> seq_vars (seq_vars accu r1) r2
  | Star r | WeakStar r -> seq_vars accu r
  | SeqCapture (v,r) -> seq_vars (IdSet.add v accu) r

252
253
254
255
256
257
258
259
260
261
262
263
264
265
(* We use two intermediate representation from AST types/patterns
   to internal ones:

      AST -(1)-> derecurs -(2)-> slot -(3)-> internal

   (1) eliminate recursion, schema, 
       propagate sequence capture variables, keep regexps

   (2) stratify, detect ill-formed recursion, compile regexps

   (3) check additional constraints on types / patterns;
       deep (recursive) hash-consing
*)     

266
267
268
269
type derecurs_slot = {
  ploc : Location.loc;
  pid  : int;
  mutable ploop : bool;
270
  mutable pdescr : derecurs;
271
} and derecurs =
272
  | PDummy
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
  | PAlias of derecurs_slot
  | PType of Types.descr
  | POr of derecurs * derecurs
  | PAnd of derecurs * derecurs
  | PDiff of derecurs * derecurs
  | PTimes of derecurs * derecurs
  | PXml of derecurs * derecurs
  | PArrow of derecurs * derecurs
  | POptional of derecurs
  | PRecord of bool * derecurs label_map
  | PCapture of id
  | PConstant of id * Types.const
  | PRegexp of derecurs_regexp * derecurs
and derecurs_regexp =
  | PEpsilon
  | PElem of derecurs
  | PSeq of derecurs_regexp * derecurs_regexp
  | PAlt of derecurs_regexp * derecurs_regexp
  | PStar of derecurs_regexp
  | PWeakStar of derecurs_regexp

294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313

type descr = 
  | IDummy
  | IType of Types.descr
  | IOr of descr * descr
  | IAnd of descr * descr
  | IDiff of descr * descr
  | ITimes of slot * slot
  | IXml of slot * slot
  | IArrow of slot * slot
  | IOptional of descr
  | IRecord of bool * slot label_map
  | ICapture of id
  | IConstant of id * Types.const
and slot = {
  mutable fv : fv option;
  mutable hash : int option;
  mutable rank1: int; mutable rank2: int;
  mutable gen1 : int; mutable gen2: int;
  mutable d    : descr;
314
}
315
316
317
318
319
320
321
322
323
324
325
326
327
    

let counter = ref 0
let mk_derecurs_slot loc = 
  incr counter; 
  { ploop = false; ploc = loc; pid = !counter; pdescr = PDummy }
	  
let mk_slot () = 
  { d=IDummy; fv=None; hash=None; rank1=0; rank2=0; gen1=0; gen2=0 } 


(* This environment is used in phase (1) to eliminate recursion *)
type penv = {
328
  penv_tenv : t;
329
330
331
332
  penv_derec : derecurs_slot Env.t;
}

let penv tenv = { penv_tenv = tenv; penv_derec = Env.empty }
333

334
let rec hash_derecurs = function
335
  | PDummy -> assert false
336
337
338
  | PAlias s -> 
      s.pid
  | PType t -> 
339
      1 + 17 * (Types.hash t)
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
  | POr (p1,p2) -> 
      2 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PAnd (p1,p2) -> 
      3 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PDiff (p1,p2) -> 
      4 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PTimes (p1,p2) -> 
      5 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PXml (p1,p2) -> 
      6 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PArrow (p1,p2) -> 
      7 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | POptional p -> 
      8 + 17 * (hash_derecurs p)
  | PRecord (o,r) -> 
      (if o then 9 else 10) + 17 * (LabelMap.hash hash_derecurs r)
  | PCapture x -> 
      11 + 17 * (Id.hash x)
  | PConstant (x,c) -> 
359
      12 + 17 * (Id.hash x) + 257 * (Types.Const.hash c)
360
361
  | PRegexp (p,q) -> 
      13 + 17 * (hash_derecurs_regexp p) + 257 * (hash_derecurs q)
362
and hash_derecurs_regexp = function
363
364
365
366
367
368
369
370
371
372
373
374
  | PEpsilon -> 
      1
  | PElem p -> 
      2 + 17 * (hash_derecurs p)
  | PSeq (p1,p2) -> 
      3 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PAlt (p1,p2) -> 
      4 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PStar p -> 
      5 + 17 * (hash_derecurs_regexp p)
  | PWeakStar p -> 
      6 + 17 * (hash_derecurs_regexp p)
375
376

let rec equal_derecurs p1 p2 = (p1 == p2) || match p1,p2 with
377
378
379
  | PAlias s1, PAlias s2 -> 
      s1 == s2
  | PType t1, PType t2 -> 
380
      Types.equal t1 t2
381
382
383
384
385
  | POr (p1,q1), POr (p2,q2)
  | PAnd (p1,q1), PAnd (p2,q2)
  | PDiff (p1,q1), PDiff (p2,q2)
  | PTimes (p1,q1), PTimes (p2,q2)
  | PXml (p1,q1), PXml (p2,q2)
386
387
388
389
390
391
392
393
394
  | PArrow (p1,q1), PArrow (p2,q2) -> 
      (equal_derecurs p1 p2) && (equal_derecurs q1 q2)
  | POptional p1, POptional p2 -> 
      equal_derecurs p1 p2
  | PRecord (o1,r1), PRecord (o2,r2) -> 
      (o1 == o2) && (LabelMap.equal equal_derecurs r1 r2)
  | PCapture x1, PCapture x2 -> 
      Id.equal x1 x2
  | PConstant (x1,c1), PConstant (x2,c2) -> 
395
      (Id.equal x1 x2) && (Types.Const.equal c1 c2)
396
397
  | PRegexp (p1,q1), PRegexp (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
398
399
  | _ -> false
and equal_derecurs_regexp r1 r2 = match r1,r2 with
400
401
402
403
  | PEpsilon, PEpsilon -> 
      true
  | PElem p1, PElem p2 -> 
      equal_derecurs p1 p2
404
  | PSeq (p1,q1), PSeq (p2,q2) 
405
406
  | PAlt (p1,q1), PAlt (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs_regexp q1 q2)
407
  | PStar p1, PStar p2
408
409
  | PWeakStar p1, PWeakStar p2 -> 
      equal_derecurs_regexp p1 p2
410
  | _ -> false
411

412
413
414
415
416
417
418
419
420
421
422
module DerecursTable = Hashtbl.Make(
  struct 
    type t = derecurs 
    let hash = hash_derecurs
    let equal = equal_derecurs
  end
)

module RE = Hashtbl.Make(
  struct 
    type t = derecurs_regexp * derecurs 
423
424
425
426
    let hash (p,q) = 
      (hash_derecurs_regexp p) + 17 * (hash_derecurs q)
    let equal (p1,q1) (p2,q2) = 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
427
428
  end
)
429

430
431
432
433
let gen = ref 0
let rank = ref 0
	     
let rec hash_descr = function
434
  | IDummy -> assert false
435
  | IType x -> Types.hash x
436
437
438
439
440
441
442
443
444
  | IOr (d1,d2) -> 1 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IAnd (d1,d2) -> 2 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IDiff (d1,d2) -> 3 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IOptional d -> 4 + 17 * (hash_descr d)
  | ITimes (s1,s2) -> 5 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IXml (s1,s2) -> 6 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IArrow (s1,s2) -> 7 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IRecord (o,r) -> (if o then 8 else 9) + 17 * (LabelMap.hash hash_slot r)
  | ICapture x -> 10 + 17 * (Id.hash x)
445
  | IConstant (x,y) -> 11 + 17 * (Id.hash x) + 257 * (Types.Const.hash y)
446
447
448
449
450
and hash_slot s =
  if s.gen1 = !gen then 13 * s.rank1
  else (
    incr rank;
    s.rank1 <- !rank; s.gen1 <- !gen;
451
    hash_descr s.d
452
453
454
455
  )
    
let rec equal_descr d1 d2 = 
  match (d1,d2) with
456
  | IType x1, IType x2 -> Types.equal x1 x2
457
458
459
460
461
462
463
  | IOr (x1,y1), IOr (x2,y2) 
  | IAnd (x1,y1), IAnd (x2,y2) 
  | IDiff (x1,y1), IDiff (x2,y2) -> (equal_descr x1 x2) && (equal_descr y1 y2)
  | IOptional x1, IOptional x2 -> equal_descr x1 x2
  | ITimes (x1,y1), ITimes (x2,y2) 
  | IXml (x1,y1), IXml (x2,y2) 
  | IArrow (x1,y1), IArrow (x2,y2) -> (equal_slot x1 x2) && (equal_slot y1 y2)
464
465
  | IRecord (o1,r1), IRecord (o2,r2) -> 
      (o1 = o2) && (LabelMap.equal equal_slot r1 r2)
466
  | ICapture x1, ICapture x2 -> Id.equal x1 x2
467
  | IConstant (x1,y1), IConstant (x2,y2) -> 
468
      (Id.equal x1 x2) && (Types.Const.equal y1 y2)
469
470
471
472
473
474
475
476
  | _ -> false
and equal_slot s1 s2 =
  ((s1.gen1 = !gen) && (s2.gen2 = !gen) && (s1.rank1 = s2.rank2))
  ||
  ((s1.gen1 <> !gen) && (s2.gen2 <> !gen) && (
     incr rank;
     s1.rank1 <- !rank; s1.gen1 <- !gen;
     s2.rank2 <- !rank; s2.gen2 <- !gen;
477
     equal_descr s1.d s2.d
478
479
   ))
  
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
module SlotTable = Hashtbl.Make(
  struct
    type t = slot
	
    let hash s =
      match s.hash with
	| Some h -> h
	| None ->
	    incr gen; rank := 0; 
	    let h = hash_slot s in
	    s.hash <- Some h;
	    h
	      
    let equal s1 s2 = 
      (s1 == s2) || 
      (incr gen; rank := 0; 
       let e = equal_slot s1 s2 in
       (*     if e then Printf.eprintf "Recursive hash-consing: Equal\n";  *)
       e)
  end)


let rec derecurs env p = match p.descr with
503
  | PatVar v -> derecurs_var env p.loc v
504
  | SchemaVar (kind, schema_name, component_name) ->
505
      PType (find_schema_descr env.penv_tenv kind schema_name component_name)
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
  | Recurs (p,b) -> derecurs (derecurs_def env b) p
  | Internal t -> PType t
  | NsT ns -> PType (Types.atom (Atoms.any_in_ns (parse_ns env.penv_tenv p.loc ns)))
  | Or (p1,p2) -> POr (derecurs env p1, derecurs env p2)
  | And (p1,p2) -> PAnd (derecurs env p1, derecurs env p2)
  | Diff (p1,p2) -> PDiff (derecurs env p1, derecurs env p2)
  | Prod (p1,p2) -> PTimes (derecurs env p1, derecurs env p2)
  | XmlT (p1,p2) -> PXml (derecurs env p1, derecurs env p2)
  | Arrow (p1,p2) -> PArrow (derecurs env p1, derecurs env p2)
  | Optional p -> POptional (derecurs env p)
  | Record (o,r) -> PRecord (o, parse_record env.penv_tenv p.loc (derecurs env) r)
  | Constant (x,c) -> PConstant (x,const env.penv_tenv p.loc c)
  | Cst c -> PType (Types.constant (const env.penv_tenv p.loc c))
  | Regexp (r,q) -> 
      let constant_nil t v = 
	PAnd (t, PConstant (v, Types.Atom Sequence.nil_atom)) in
      let vars = seq_vars IdSet.empty r in
      let q = IdSet.fold constant_nil (derecurs env q) vars in
      let r = derecurs_regexp (fun p -> p) env r in
      PRegexp (r, q)
and derecurs_regexp vars env = function
  | Epsilon -> 
      PEpsilon
  | Elem p -> 
      PElem (vars (derecurs env p))
  | Seq (p1,p2) -> 
      PSeq (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Alt (p1,p2) -> 
      PAlt (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Star p -> 
      PStar (derecurs_regexp vars env p)
  | WeakStar p -> 
      PWeakStar (derecurs_regexp vars env p)
  | SeqCapture (x,p) -> 
      derecurs_regexp (fun p -> PAnd (vars p, PCapture x)) env p

542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
and derecurs_var env loc v =
  match Ns.split_qname v with
    | "", v ->
	let v = ident v in
	(try PAlias (Env.find v env.penv_derec)
	 with Not_found -> 
	   try PType (find_type v env.penv_tenv)
	   with Not_found -> PCapture v)
    | cu, v -> 
	try 
	  let cu = U.mk cu in
	  PType (find_type_global loc cu (ident v) env.penv_tenv)
	with Not_found ->
	  raise_loc_generic loc 
	  ("Unbound external type " ^ cu ^ ":" ^ (U.to_string v))


559
560
561
562
563
564
565
566
567

and derecurs_def env b =
  let b = List.map (fun (v,p) -> (v,p,mk_derecurs_slot p.loc)) b in
  let n = 
    List.fold_left (fun env (v,p,s) -> Env.add v s env) env.penv_derec b in
  let env = { env with penv_derec = n } in
  List.iter (fun (v,p,s) -> s.pdescr <- derecurs env p) b;
  env

568
569
570
571
572
let rec fv_slot s =
  match s.fv with
    | Some x -> x
    | None ->
	if s.gen1 = !gen then IdSet.empty 
573
	else (s.gen1 <- !gen; fv_descr s.d)
574
and fv_descr = function
575
  | IDummy -> assert false
576
  | IType _ -> IdSet.empty
577
578
579
580
581
582
583
  | IOr (d1,d2)
  | IAnd (d1,d2)  
  | IDiff (d1,d2) -> IdSet.cup (fv_descr d1) (fv_descr d2)
  | IOptional d -> fv_descr d
  | ITimes (s1,s2)  
  | IXml (s1,s2)  
  | IArrow (s1,s2) -> IdSet.cup (fv_slot s1) (fv_slot s2)
584
585
  | IRecord (o,r) -> 
      List.fold_left IdSet.cup IdSet.empty (LabelMap.map_to_list fv_slot r)
586
  | ICapture x | IConstant (x,_) -> IdSet.singleton x
587

588
589
590
591
592
593
594
595
let compute_fv s =
  match s.fv with
    | Some x -> ()
    | None ->
	incr gen;
	let x = fv_slot s in
	s.fv <- Some x
	  
596
597
598
let check_no_capture loc s =
  match IdSet.pick s with
    | Some x ->  
599
	raise_loc_generic loc ("Capture variable not allowed: " ^ (Ident.to_string x))
600
    | None -> ()
601
    
602
603
604
let compile_slot_hash = DerecursTable.create 67
let compile_hash = DerecursTable.create 67

605
606
let todo_defs = ref []
let todo_fv = ref []
607
608
609
610
611
612
613
614

let rec compile p =
  try DerecursTable.find compile_hash p
  with Not_found ->
    let c = real_compile p in
    DerecursTable.replace compile_hash p c;
    c
and real_compile = function
615
  | PDummy -> assert false
616
617
618
619
  | PAlias v ->
      if v.ploop then
	raise_loc_generic v.ploc ("Unguarded recursion on type/pattern");
      v.ploop <- true;
620
      let r = compile v.pdescr in
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
      v.ploop <- false;
      r
  | PType t -> IType t
  | POr (t1,t2) -> IOr (compile t1, compile t2)
  | PAnd (t1,t2) -> IAnd (compile t1, compile t2)
  | PDiff (t1,t2) -> IDiff (compile t1, compile t2)
  | PTimes (t1,t2) -> ITimes (compile_slot t1, compile_slot t2)
  | PXml (t1,t2) -> IXml (compile_slot t1, compile_slot t2)
  | PArrow (t1,t2) -> IArrow (compile_slot t1, compile_slot t2)
  | POptional t -> IOptional (compile t)
  | PRecord (o,r) ->  IRecord (o, LabelMap.map compile_slot r)
  | PConstant (x,v) -> IConstant (x,v)
  | PCapture x -> ICapture x
  | PRegexp (r,q) -> compile_regexp r q
and compile_regexp r q =
  let memo = RE.create 17 in
637
638
639
  let add accu i = 
    match accu with None -> Some i | Some j -> Some (IOr (j,i)) in
  let get = function Some x -> x | None -> assert false in
640
  let rec queue accu = function
641
642
643
    | PRegexp (r,q) -> aux accu r q 
    | _ -> add accu (compile q)
  and aux accu r q =
644
645
646
647
    if RE.mem memo (r,q) then accu
    else (
      RE.add memo (r,q) ();
      match r with
648
	| PEpsilon -> queue accu q
649
650
651
652
653
654
655
656
657
658
659
	| PElem p ->
(* Be careful not to create pairs with same second component *)
	    let rec extract = function
	      | PConstant (x,v) -> `Const (x,v)
	      | POr (x,y) ->
		  (match extract x, extract y with
		    | `Pat x, `Pat y -> `Pat (POr (x,y))
		    | x, y -> `Or (x,y))
	      | p -> `Pat p
	    in
	    let rec mk accu = function
660
661
662
663
	      | `Const (x,v) -> 
		  (match queue None q with 
		    | Some q -> add accu (IAnd (IConstant (x,v), q))
		    | None -> accu)
664
	      | `Or (x,y) -> mk (mk accu x) y
665
666
	      | `Pat p -> 
		  add accu (ITimes (compile_slot p, compile_slot q))
667
668
	    in
	    mk accu (extract p)
669
670
671
672
673
674
	| PSeq (r1,r2) -> aux accu r1 (PRegexp (r2,q))
	| PAlt (r1,r2) -> aux (aux accu r1 q) r2 q
	| PStar r1 -> aux (aux accu r1 (PRegexp (r,q))) PEpsilon q
	| PWeakStar r1 -> aux (aux accu PEpsilon q) r1 (PRegexp (r,q))
    )
  in
675
  get (aux None r q)
676
677
and compile_slot p =
  try DerecursTable.find compile_slot_hash p
678
  with Not_found ->
679
680
681
    let s = mk_slot () in
    todo_defs := (s,p) :: !todo_defs;
    todo_fv := s :: !todo_fv;
682
    DerecursTable.add compile_slot_hash p s;
683
    s
684

685
      
686
let timer_fv = Stats.Timer.create "Typer.fv"
687
let rec flush_defs () = 
688
689
690
691
  match !todo_defs with
    | [] -> 
	Stats.Timer.start timer_fv;
	List.iter compute_fv !todo_fv;
692
693
	todo_fv := [];
	Stats.Timer.stop timer_fv ()
694
695
696
697
    | (s,p)::t -> 
	todo_defs := t; 
	s.d <- compile p; 
	flush_defs ()
698
699
700
701
702
703
704
705
706
707
708
709
710
711
	
let typ_nodes = SlotTable.create 67
let pat_nodes = SlotTable.create 67
		  
let rec typ = function
  | IType t -> t
  | IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
  | IAnd (s1,s2) ->  Types.cap (typ s1) (typ s2)
  | IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
  | ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
  | IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
  | IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
  | IOptional s -> Types.Record.or_absent (typ s)
  | IRecord (o,r) -> Types.record' (o, LabelMap.map typ_node r)
712
  | IDummy | ICapture _ | IConstant (_,_) -> assert false
713
      
714
and typ_node s : Types.Node.t =
715
716
717
718
  try SlotTable.find typ_nodes s
  with Not_found ->
    let x = Types.make () in
    SlotTable.add typ_nodes s x;
719
    Types.define x (typ s.d);
720
721
722
723
724
725
726
727
    x
      
let rec pat d : Patterns.descr =
  if IdSet.is_empty (fv_descr d)
  then Patterns.constr (typ d)
  else pat_aux d
    
and pat_aux = function
728
  | IDummy -> assert false
729
730
731
732
733
734
  | IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
  | IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
  | IDiff (s1,s2) when IdSet.is_empty (fv_descr s2) ->
      let s2 = Types.neg (typ s2) in
      Patterns.cap (pat s1) (Patterns.constr s2)
  | IDiff _ ->
735
      raise (Patterns.Error "Differences are not allowed in patterns")
736
737
738
  | ITimes (s1,s2) -> Patterns.times (pat_node s1) (pat_node s2)
  | IXml (s1,s2) -> Patterns.xml (pat_node s1) (pat_node s2)
  | IOptional _ -> 
739
      raise (Patterns.Error "Optional fields are not allowed in record patterns")
740
741
742
743
744
745
746
747
748
749
750
751
752
753
  | IRecord (o,r) ->
      let pats = ref [] in
      let aux l s = 
	if IdSet.is_empty (fv_slot s) then typ_node s
	else
	  ( pats := Patterns.record l (pat_node s) :: !pats;
	    Types.any_node )
      in
      let constr = Types.record' (o,LabelMap.mapi aux r) in
      List.fold_left Patterns.cap (Patterns.constr constr) !pats
	(* TODO: can avoid constr when o=true, and all fields have fv *)
  | ICapture x -> Patterns.capture x
  | IConstant (x,c) -> Patterns.constant x c
  | IArrow _ ->
754
      raise (Patterns.Error "Arrows are not allowed in patterns")
755
756
757
758
759
760
  | IType _ -> assert false
      
and pat_node s : Patterns.node =
  try SlotTable.find pat_nodes s
  with Not_found ->
    let x = Patterns.make (fv_slot s) in
761
762
    try
      SlotTable.add pat_nodes s x;
763
      Patterns.define x (pat s.d);
764
765
766
      x
    with exn -> SlotTable.remove pat_nodes s; raise exn
      (* For the toplevel ... *)
767

768

769
module Ids = Set.Make(Id)
770
let type_defs env b =
771
772
773
774
775
776
777
778
779
780
  ignore 
    (List.fold_left 
       (fun seen (v,p) ->
	  if Ids.mem v seen then 
	    raise_loc_generic p.loc 
	      ("Multiple definitions for the type identifer " ^ 
	       (Ident.to_string v));
	  Ids.add v seen
       ) Ids.empty b);

781
782
  let penv = derecurs_def (penv env) b in
  let b = List.map (fun (v,p) -> (v,p,compile (derecurs penv p))) b in
783
784
785
786
  flush_defs ();
  let b = 
    List.map 
      (fun (v,p,s) -> 
787
	 check_no_capture p.loc (fv_descr s);
788
789
790
	 let t = typ s in
	 if (p.loc <> noloc) && (Types.is_empty t) then
	   warning p.loc 
791
	     ("This definition yields an empty type for " ^ (Ident.to_string v));
792
	 (v,t)) b in
793
  List.iter (fun (v,t) -> Types.Print.register_global (Id.value v) t) b;
794
  b
795
796


797
798
799
800
801
let dump_types ppf env =
  Env.iter (fun v -> 
	      function 
		  (Type _) -> Format.fprintf ppf " %a" Ident.print v
		| _ -> ()) env.ids
802
803
let dump_type ppf env name =
  try
804
    (match Env.find (Ident.ident name) env.ids with
805
806
    | Type t -> Types.Print.print ppf t
    | _ -> raise Not_found)
807
808
  with Not_found ->
    raise (Error (Printf.sprintf "Type %s not found" (U.get_str name)))
809

810
811
812
let dump_schema_type ppf env (k, s, n) =
  let uri = find_schema s env in
  let descr = find_schema_descr_uri k uri n in
813
  Types.Print.print ppf descr
814

815
let dump_ns ppf env =
816
  Ns.dump_table ppf env.ns
817

818

819
820
let do_typ loc r = 
  let s = compile_slot r in
821
  flush_defs ();
822
823
  check_no_capture loc (fv_slot s);
  typ_node s
824
   
825
826
let typ env p =
  do_typ p.loc (derecurs (penv env) p)
827
    
828
829
let pat env p = 
  let s = compile_slot (derecurs (penv env) p) in
830
831
832
  flush_defs ();
  try pat_node s
  with Patterns.Error e -> raise_loc_generic p.loc e
833
    | Location (loc,_,exn) when loc = noloc -> raise (Location (p.loc, `Full, exn))
834
835


836
837
(* II. Build skeleton *)

838

839
type type_fun = Types.t -> bool -> Types.t
840
let typ_cst = ref (fun _ -> assert false)
841
842
843
844
let mk_unary_op = ref (fun _ _ -> assert false)
let typ_unary_op = ref (fun _ _ _ -> assert false)
let mk_binary_op = ref (fun _ _ -> assert false)
let typ_binary_op = ref (fun _ _ _ _ -> assert false)
845
846


847
module Fv = IdSet
848

849
850
851
type branch = Branch of Typed.branch * branch list

let cur_branch : branch list ref = ref []
852

853
let exp loc fv e =
854
855
  fv,
  { Typed.exp_loc = loc;
856
    Typed.exp_typ = Types.empty;
857
    Typed.exp_descr = e;
858
  }
859

860
861
862
863
864
865
866
867
868
869
870
let ops = Hashtbl.create 13
let is_op = Hashtbl.mem ops 
let register_op = Hashtbl.add ops
let typ_op = Hashtbl.find ops

let rec apply_op args = function
  | Apply (e1,e2) -> apply_op (e2::args) e1
  | LocatedExpr (_,e) -> apply_op args e
  | Var s when is_op (U.get_str s) -> (U.get_str s,args)
  | _ -> raise Not_found

871

872
873
let rec expr env loc = function
  | LocatedExpr (loc,e) -> expr env loc e
874
  | Forget (e,t) ->
875
      let (fv,e) = expr env loc e and t = typ env t in
876
      exp loc fv (Typed.Forget (e,t))
877
  | Var s -> var env loc s
878
  | Apply (e1,e2) -> 
879
880
881
882
883
884
885
886
887
      (try 
	 let (op,args) = apply_op [e2] e1 in
	 let (fvs,args) = List.split (List.map (expr env loc) args) in
	 let fv = List.fold_left Fv.cup Fv.empty fvs in
	 exp loc fv (Typed.Op (op,args))
       with Not_found ->
	 let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
	exp loc (Fv.cup fv1 fv2) (Typed.Apply (e1,e2))
      )
888
  | Abstraction a ->
889
      let iface = List.map (fun (t1,t2) -> (typ env t1, typ env t2)) 
890
891
892
893
894
895
896
		    a.fun_iface in
      let t = List.fold_left 
		(fun accu (t1,t2) -> Types.cap accu (Types.arrow t1 t2)) 
		Types.any iface in
      let iface = List.map 
		    (fun (t1,t2) -> (Types.descr t1, Types.descr t2)) 
		    iface in
897
      let (fv0,body) = branches env a.fun_body in
898
899
900
901
902
903
904
905
906
907
908
      let fv = match a.fun_name with
	| None -> fv0
	| Some f -> Fv.remove f fv0 in
      let e = Typed.Abstraction 
		{ Typed.fun_name = a.fun_name;
		  Typed.fun_iface = iface;
		  Typed.fun_body = body;
		  Typed.fun_typ = t;
		  Typed.fun_fv = fv
		} in
      exp loc fv e
909
  | (Integer _ | Char _ | Atom _ | Const _) as c -> 
910
      exp loc Fv.empty (Typed.Cst (const env loc c))
911
  | Pair (e1,e2) ->
912
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
913
914
      exp loc (Fv.cup fv1 fv2) (Typed.Pair (e1,e2))
  | Xml (e1,e2) ->
915
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
916
917
      exp loc (Fv.cup fv1 fv2) (Typed.Xml (e1,e2))
  | Dot (e,l) ->
918
919
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.Dot (e,parse_label env loc l))
920
  | RemoveField (e,l) ->
921
922
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.RemoveField (e,parse_label env loc l))
923
924
  | RecordLitt r -> 
      let fv = ref Fv.empty in
925
      let r = parse_record env loc
926
		(fun e -> 
927
		   let (fv2,e) = expr env loc e 
928
929
930
		   in fv := Fv.cup !fv fv2; e)
		r in
      exp loc !fv (Typed.RecordLitt r)
931
  | String (i,j,s,e) ->
932
      let (fv,e) = expr env loc e in
933
      exp loc fv (Typed.String (i,j,s,e))
934
  | Op (op,le) ->
935
      let (fvs,ltes) = List.split (List.map (expr env loc) le) in
936
      let fv = List.fold_left Fv.cup Fv.empty fvs in
937
      (try
938
939
940
	 (match ltes with
	    | [e] -> exp loc fv (Typed.UnaryOp (!mk_unary_op op env, e))
	    | [e1;e2] -> exp loc fv (Typed.BinaryOp (!mk_binary_op op env, e1,e2))
941
942
943
	    | _ -> assert false)
       with Not_found -> assert false)

944
  | Match (e,b) -> 
945
946
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
947
      exp loc (Fv.cup fv1 fv2) (Typed.Match (e, b))
948
  | Map (e,b) ->
949
950
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
951
952
      exp loc (Fv.cup fv1 fv2) (Typed.Map (e, b))
  | Transform (e,b) ->
953
954
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
955
      exp loc (Fv.cup fv1 fv2) (Typed.Transform (e, b))
956
  | Xtrans (e,b) ->
957
958
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
959
      exp loc (Fv.cup fv1 fv2) (Typed.Xtrans (e, b))
960
  | Validate (e,kind,schema,elt) ->
961
      let (fv,e) = expr env loc e in
962
963
      let uri = find_schema schema env in
      exp loc fv (Typed.Validate (e, kind, uri, elt))
964
  | Try (e,b) ->
965
966
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
967
      exp loc (Fv.cup fv1 fv2) (Typed.Try (e, b))
968
  | NamespaceIn (pr,ns,e) ->
969
970
      let env = enter_ns pr ns env in
      expr env loc e
971
  | Ref (e,t) ->
972
      let (fv,e) = expr env loc e and t = typ env t in
973
      exp loc fv (Typed.Ref (e,t))
974
  | External (s,args) ->
975
976
977
      extern loc env s args

  and extern loc env s args = 
978
979
    let args = List.map (typ env) args in
    try
980
      let (i,t) = Externals.resolve s args in
981
      exp loc Fv.empty (Typed.External (t,i))
982
    with exn -> raise_loc loc exn
983
984
985
986
987
988
989
990
991
992
993
994
995

  and var env loc s =
  match Ns.split_qname s with
    | "", id -> 
	let s = U.get_str id in
	if String.contains s '.' then
	  extern loc env s []
	else
	  let id = ident id in
	  exp loc (Fv.singleton id) (Typed.Var id)
    | cu, id -> 
	let cu = find_cu (U.mk cu) env in
	exp loc Fv.empty (Typed.ExtVar (cu, ident id))
996
	      
997
  and branches env b = 
998
    let fv = ref Fv.empty in
999
    let accept = ref Types.empty in
1000
    let branch (p,e) = 
1001
1002
      let cur_br = !cur_branch in
      cur_branch := [];