typer.ml 35.5 KB
Newer Older
1
(* TODO:
2
 - rewrite type-checking of operators to propagate constraint
3
4
 - optimize computation of pattern free variables
 - check whether it is worth using recursive hash-consing internally
5
6
*)

7

8
9
10
11
12
13
let warning loc msg =
  Format.fprintf !Location.warning_ppf "Warning %a:@\n%a%s@\n" 
    Location.print_loc loc
    Location.html_hilight loc
    msg

14
15
(* I. Transform the abstract syntax of types and patterns into
      the internal form *)
16
17
18

open Location
open Ast
19
open Ident
20

21
module S = struct type t = string let compare = compare end
22
module TypeEnv = Map.Make(S)
23

24
exception NonExhaustive of Types.descr
25
exception Constraint of Types.descr * Types.descr
26
exception ShouldHave of Types.descr * string
27
exception ShouldHave2 of Types.descr * string * Types.descr
28
exception WrongLabel of Types.descr * label
29
exception UnboundId of id
30
exception Error of string
31
32

let raise_loc loc exn = raise (Location (loc,exn))
33
let error loc msg = raise_loc loc (Error msg)
34

35
36
37
38
39
40
41
42
43
44
  (* Schema datastructures *)

module StringSet = Set.Make (String)
let schemas = State.ref "Typer.schemas" StringSet.empty (* just to remember imported schemas *)

let schema_types = State.ref "Typer.schema_types" (Hashtbl.create 51)
let schema_elements = State.ref "Typer.schema_elements" (Hashtbl.create 51)
let schema_attributes : (string * string, Types.descr) Hashtbl.t ref =
  State.ref "Typer.schema_attributes" (Hashtbl.create 51)

45

46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
(* Eliminate Recursion, propagate Sequence Capture Variables *)

let rec seq_vars accu = function
  | Epsilon | Elem _ -> accu
  | Seq (r1,r2) | Alt (r1,r2) -> seq_vars (seq_vars accu r1) r2
  | Star r | WeakStar r -> seq_vars accu r
  | SeqCapture (v,r) -> seq_vars (IdSet.add v accu) r

type derecurs_slot = {
  ploc : Location.loc;
  pid  : int;
  mutable ploop : bool;
  mutable pdescr : derecurs option
} and derecurs =
  | PAlias of derecurs_slot
  | PType of Types.descr
  | POr of derecurs * derecurs
  | PAnd of derecurs * derecurs
  | PDiff of derecurs * derecurs
  | PTimes of derecurs * derecurs
  | PXml of derecurs * derecurs
  | PArrow of derecurs * derecurs
  | POptional of derecurs
  | PRecord of bool * derecurs label_map
  | PCapture of id
  | PConstant of id * Types.const
  | PRegexp of derecurs_regexp * derecurs
and derecurs_regexp =
  | PEpsilon
  | PElem of derecurs
  | PSeq of derecurs_regexp * derecurs_regexp
  | PAlt of derecurs_regexp * derecurs_regexp
  | PStar of derecurs_regexp
  | PWeakStar of derecurs_regexp

let rec hash_derecurs = function
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
  | PAlias s -> 
      s.pid
  | PType t -> 
      1 + 17 * (Types.hash_descr t)
  | POr (p1,p2) -> 
      2 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PAnd (p1,p2) -> 
      3 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PDiff (p1,p2) -> 
      4 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PTimes (p1,p2) -> 
      5 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PXml (p1,p2) -> 
      6 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PArrow (p1,p2) -> 
      7 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | POptional p -> 
      8 + 17 * (hash_derecurs p)
  | PRecord (o,r) -> 
      (if o then 9 else 10) + 17 * (LabelMap.hash hash_derecurs r)
  | PCapture x -> 
      11 + 17 * (Id.hash x)
  | PConstant (x,c) -> 
      12 + 17 * (Id.hash x) + 257 * (Types.hash_const c)
  | PRegexp (p,q) -> 
      13 + 17 * (hash_derecurs_regexp p) + 257 * (hash_derecurs q)
108
and hash_derecurs_regexp = function
109
110
111
112
113
114
115
116
117
118
119
120
  | PEpsilon -> 
      1
  | PElem p -> 
      2 + 17 * (hash_derecurs p)
  | PSeq (p1,p2) -> 
      3 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PAlt (p1,p2) -> 
      4 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PStar p -> 
      5 + 17 * (hash_derecurs_regexp p)
  | PWeakStar p -> 
      6 + 17 * (hash_derecurs_regexp p)
121
122

let rec equal_derecurs p1 p2 = (p1 == p2) || match p1,p2 with
123
124
125
126
  | PAlias s1, PAlias s2 -> 
      s1 == s2
  | PType t1, PType t2 -> 
      Types.equal_descr t1 t2
127
128
129
130
131
  | POr (p1,q1), POr (p2,q2)
  | PAnd (p1,q1), PAnd (p2,q2)
  | PDiff (p1,q1), PDiff (p2,q2)
  | PTimes (p1,q1), PTimes (p2,q2)
  | PXml (p1,q1), PXml (p2,q2)
132
133
134
135
136
137
138
139
140
141
142
143
  | PArrow (p1,q1), PArrow (p2,q2) -> 
      (equal_derecurs p1 p2) && (equal_derecurs q1 q2)
  | POptional p1, POptional p2 -> 
      equal_derecurs p1 p2
  | PRecord (o1,r1), PRecord (o2,r2) -> 
      (o1 == o2) && (LabelMap.equal equal_derecurs r1 r2)
  | PCapture x1, PCapture x2 -> 
      Id.equal x1 x2
  | PConstant (x1,c1), PConstant (x2,c2) -> 
      (Id.equal x1 x2) && (Types.equal_const c1 c2)
  | PRegexp (p1,q1), PRegexp (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
144
145
  | _ -> false
and equal_derecurs_regexp r1 r2 = match r1,r2 with
146
147
148
149
  | PEpsilon, PEpsilon -> 
      true
  | PElem p1, PElem p2 -> 
      equal_derecurs p1 p2
150
  | PSeq (p1,q1), PSeq (p2,q2) 
151
152
  | PAlt (p1,q1), PAlt (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs_regexp q1 q2)
153
  | PStar p1, PStar p2
154
155
  | PWeakStar p1, PWeakStar p2 -> 
      equal_derecurs_regexp p1 p2
156
  | _ -> false
157

158
159
160
161
162
163
164
165
166
167
168
module DerecursTable = Hashtbl.Make(
  struct 
    type t = derecurs 
    let hash = hash_derecurs
    let equal = equal_derecurs
  end
)

module RE = Hashtbl.Make(
  struct 
    type t = derecurs_regexp * derecurs 
169
170
171
172
    let hash (p,q) = 
      (hash_derecurs_regexp p) + 17 * (hash_derecurs q)
    let equal (p1,q1) (p2,q2) = 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
173
174
  end
)
175

176
177
178
179
180
181
182
183
184
  
let counter = State.ref "Typer.counter - derecurs" 0
let mk_slot loc = 
  incr counter; 
  { ploop = false; ploc = loc; pid = !counter; pdescr = None }
  
let rec derecurs env p = match p.descr with
  | PatVar v ->
      (try PAlias (TypeEnv.find v env)
185
186
       with Not_found -> 
	 raise_loc_generic p.loc ("Undefined type/pattern " ^ v))
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
  | SchemaVar (kind, schema, item) ->
      let try_elt () = fst (Hashtbl.find !schema_elements (schema, item)) in
      let try_typ () = Hashtbl.find !schema_types (schema, item) in
      let try_att () = Hashtbl.find !schema_attributes (schema, item) in
      (match kind with
      | `Element ->
          (try
            PType (try_elt ())
          with Not_found ->
            failwith (Printf.sprintf
              "No element named '%s' found in schema '%s'" item schema))
      | `Type ->
          (try
            PType (try_typ ())
          with Not_found ->
            failwith (Printf.sprintf
              "No type named '%s' found in schema '%s'" item schema))
      | `Attribute ->
          (try
            PType (try_att ())
          with Not_found ->
            failwith (Printf.sprintf
              "No attribute named '%s' found in schema '%s'" item schema))
      | `Any ->
          PType
            (try try_elt () with Not_found ->
              (try try_typ () with Not_found ->
                (try try_att () with Not_found ->
                  failwith (Printf.sprintf
                    "No item named '%s' found in schema '%s'" item schema)))))
217
218
219
220
221
222
223
224
225
226
227
228
229
  | Recurs (p,b) -> derecurs (derecurs_def env b) p
  | Internal t -> PType t
  | Or (p1,p2) -> POr (derecurs env p1, derecurs env p2)
  | And (p1,p2) -> PAnd (derecurs env p1, derecurs env p2)
  | Diff (p1,p2) -> PDiff (derecurs env p1, derecurs env p2)
  | Prod (p1,p2) -> PTimes (derecurs env p1, derecurs env p2)
  | XmlT (p1,p2) -> PXml (derecurs env p1, derecurs env p2)
  | Arrow (p1,p2) -> PArrow (derecurs env p1, derecurs env p2)
  | Optional p -> POptional (derecurs env p)
  | Record (o,r) -> PRecord (o, LabelMap.map (derecurs env) r)
  | Capture x -> PCapture x
  | Constant (x,c) -> PConstant (x,c)
  | Regexp (r,q) -> 
230
231
      let constant_nil t v = 
	PAnd (t, PConstant (v, Types.Atom Sequence.nil_atom)) in
232
233
234
235
236
      let vars = seq_vars IdSet.empty r in
      let q = IdSet.fold constant_nil (derecurs env q) vars in
      let r = derecurs_regexp (fun p -> p) env r in
      PRegexp (r, q)
and derecurs_regexp vars env = function
237
238
239
240
241
242
243
244
245
246
247
248
249
250
  | Epsilon -> 
      PEpsilon
  | Elem p -> 
      PElem (vars (derecurs env p))
  | Seq (p1,p2) -> 
      PSeq (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Alt (p1,p2) -> 
      PAlt (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Star p -> 
      PStar (derecurs_regexp vars env p)
  | WeakStar p -> 
      PWeakStar (derecurs_regexp vars env p)
  | SeqCapture (x,p) -> 
      derecurs_regexp (fun p -> PAnd (vars p, PCapture x)) env p
251
252
253
254
255
256
257


and derecurs_def env b =
  let b = List.map (fun (v,p) -> (v,p,mk_slot p.loc)) b in
  let env = List.fold_left (fun env (v,p,s) -> TypeEnv.add v s env) env b in
  List.iter (fun (v,p,s) -> s.pdescr <- Some (derecurs env p)) b;
  env
258

259
(* Stratification and recursive hash-consing *)
260
261
262
263
264
265
266
267
268

type descr = 
  | IType of Types.descr
  | IOr of descr * descr
  | IAnd of descr * descr
  | IDiff of descr * descr
  | ITimes of slot * slot
  | IXml of slot * slot
  | IArrow of slot * slot
269
  | IOptional of descr
270
271
272
273
274
275
276
277
  | IRecord of bool * slot label_map
  | ICapture of id
  | IConstant of id * Types.const
and slot = {
  mutable fv : fv option;
  mutable hash : int option;
  mutable rank1: int; mutable rank2: int;
  mutable gen1 : int; mutable gen2: int;
278
  mutable d    : descr option
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
}
    
let descr s = 
  match s.d with
    | Some d -> d
    | None -> assert false
	
let gen = ref 0
let rank = ref 0
	     
let rec hash_descr = function
  | IType x -> Types.hash_descr x
  | IOr (d1,d2) -> 1 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IAnd (d1,d2) -> 2 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IDiff (d1,d2) -> 3 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IOptional d -> 4 + 17 * (hash_descr d)
  | ITimes (s1,s2) -> 5 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IXml (s1,s2) -> 6 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IArrow (s1,s2) -> 7 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IRecord (o,r) -> (if o then 8 else 9) + 17 * (LabelMap.hash hash_slot r)
  | ICapture x -> 10 + 17 * (Id.hash x)
  | IConstant (x,y) -> 11 + 17 * (Id.hash x) + 257 * (Types.hash_const y)
and hash_slot s =
  if s.gen1 = !gen then 13 * s.rank1
  else (
    incr rank;
    s.rank1 <- !rank; s.gen1 <- !gen;
    hash_descr (descr s)
  )
    
let rec equal_descr d1 d2 = 
  match (d1,d2) with
  | IType x1, IType x2 -> Types.equal_descr x1 x2
  | IOr (x1,y1), IOr (x2,y2) 
  | IAnd (x1,y1), IAnd (x2,y2) 
  | IDiff (x1,y1), IDiff (x2,y2) -> (equal_descr x1 x2) && (equal_descr y1 y2)
  | IOptional x1, IOptional x2 -> equal_descr x1 x2
  | ITimes (x1,y1), ITimes (x2,y2) 
  | IXml (x1,y1), IXml (x2,y2) 
  | IArrow (x1,y1), IArrow (x2,y2) -> (equal_slot x1 x2) && (equal_slot y1 y2)
319
320
  | IRecord (o1,r1), IRecord (o2,r2) -> 
      (o1 = o2) && (LabelMap.equal equal_slot r1 r2)
321
  | ICapture x1, ICapture x2 -> Id.equal x1 x2
322
323
  | IConstant (x1,y1), IConstant (x2,y2) -> 
      (Id.equal x1 x2) && (Types.equal_const y1 y2)
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
  | _ -> false
and equal_slot s1 s2 =
  ((s1.gen1 = !gen) && (s2.gen2 = !gen) && (s1.rank1 = s2.rank2))
  ||
  ((s1.gen1 <> !gen) && (s2.gen2 <> !gen) && (
     incr rank;
     s1.rank1 <- !rank; s1.gen1 <- !gen;
     s2.rank2 <- !rank; s2.gen2 <- !gen;
     equal_descr (descr s1) (descr s2)
   ))
  
module Arg = struct
  type t = slot
      
  let hash s =
    match s.hash with
      | Some h -> h
      | None ->
	  incr gen; rank := 0; 
	  let h = hash_slot s in
	  s.hash <- Some h;
	  h
	    
347
348
349
350
  let equal s1 s2 = 
    (s1 == s2) || 
    (incr gen; rank := 0; 
     let e = equal_slot s1 s2 in
351
(*     if e then Printf.eprintf "Recursive hash-consig: Equal\n";  *)
352
     e)
353
end
354
355
356
357
358
359
360
361
362
module SlotTable = Hashtbl.Make(Arg)
  
let rec fv_slot s =
  match s.fv with
    | Some x -> x
    | None ->
	if s.gen1 = !gen then IdSet.empty 
	else (s.gen1 <- !gen; fv_descr (descr s))
and fv_descr = function
363
  | IType _ -> IdSet.empty
364
365
366
367
368
369
370
  | IOr (d1,d2)
  | IAnd (d1,d2)  
  | IDiff (d1,d2) -> IdSet.cup (fv_descr d1) (fv_descr d2)
  | IOptional d -> fv_descr d
  | ITimes (s1,s2)  
  | IXml (s1,s2)  
  | IArrow (s1,s2) -> IdSet.cup (fv_slot s1) (fv_slot s2)
371
372
  | IRecord (o,r) -> 
      List.fold_left IdSet.cup IdSet.empty (LabelMap.map_to_list fv_slot r)
373
  | ICapture x | IConstant (x,_) -> IdSet.singleton x
374

375
376
377
378
379
380
381
382
383
      
let compute_fv s =
  match s.fv with
    | Some x -> ()
    | None ->
	incr gen;
	let x = fv_slot s in
	s.fv <- Some x
	  
384
385

let todo_fv = ref []
386
387
388
389
390
391
392
393
	  
let mk () =   
  let s = 
    { d = None;
      fv = None;
      hash = None;
      rank1 = 0; rank2 = 0;
      gen1 = 0; gen2 = 0 } in
394
  todo_fv := s :: !todo_fv;
395
  s
396
397
398
399

let flush_fv () =
  List.iter compute_fv !todo_fv;
  todo_fv := []
400
    
401
402
403
let compile_slot_hash = DerecursTable.create 67
let compile_hash = DerecursTable.create 67

404
let defs = ref []
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438

let rec compile p =
  try DerecursTable.find compile_hash p
  with Not_found ->
    let c = real_compile p in
    DerecursTable.replace compile_hash p c;
    c
and real_compile = function
  | PAlias v ->
      if v.ploop then
	raise_loc_generic v.ploc ("Unguarded recursion on type/pattern");
      v.ploop <- true;
      let r = match v.pdescr with Some x -> compile x | _ -> assert false in
      v.ploop <- false;
      r
  | PType t -> IType t
  | POr (t1,t2) -> IOr (compile t1, compile t2)
  | PAnd (t1,t2) -> IAnd (compile t1, compile t2)
  | PDiff (t1,t2) -> IDiff (compile t1, compile t2)
  | PTimes (t1,t2) -> ITimes (compile_slot t1, compile_slot t2)
  | PXml (t1,t2) -> IXml (compile_slot t1, compile_slot t2)
  | PArrow (t1,t2) -> IArrow (compile_slot t1, compile_slot t2)
  | POptional t -> IOptional (compile t)
  | PRecord (o,r) ->  IRecord (o, LabelMap.map compile_slot r)
  | PConstant (x,v) -> IConstant (x,v)
  | PCapture x -> ICapture x
  | PRegexp (r,q) -> compile_regexp r q
and compile_regexp r q =
  let memo = RE.create 17 in
  let rec aux accu r q =
    if RE.mem memo (r,q) then accu
    else (
      RE.add memo (r,q) ();
      match r with
439
440
441
442
	| PEpsilon -> 
	    (match q with 
	       | PRegexp (r,q) -> aux accu r q 
	       | _ -> (compile q) :: accu)
443
444
445
446
447
448
449
450
451
452
453
454
455
	| PElem p -> ITimes (compile_slot p, compile_slot q) :: accu
	| PSeq (r1,r2) -> aux accu r1 (PRegexp (r2,q))
	| PAlt (r1,r2) -> aux (aux accu r1 q) r2 q
	| PStar r1 -> aux (aux accu r1 (PRegexp (r,q))) PEpsilon q
	| PWeakStar r1 -> aux (aux accu PEpsilon q) r1 (PRegexp (r,q))
    )
  in
  let accu = aux [] r q in
  match accu with
    | [] -> assert false
    | p::l -> List.fold_left (fun acc p -> IOr (p,acc)) p l
and compile_slot p =
  try DerecursTable.find compile_slot_hash p
456
457
  with Not_found ->
    let s = mk () in
458
459
    defs := (s,p) :: !defs;
    DerecursTable.add compile_slot_hash p s;
460
    s
461

462
463
464
465
      
let rec flush_defs () = 
  match !defs with
    | [] -> ()
466
    | (s,p)::t -> defs := t; s.d <- Some (compile p); flush_defs ()
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
	
let typ_nodes = SlotTable.create 67
let pat_nodes = SlotTable.create 67
		  
let rec typ = function
  | IType t -> t
  | IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
  | IAnd (s1,s2) ->  Types.cap (typ s1) (typ s2)
  | IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
  | ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
  | IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
  | IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
  | IOptional s -> Types.Record.or_absent (typ s)
  | IRecord (o,r) -> Types.record' (o, LabelMap.map typ_node r)
  | ICapture x | IConstant (x,_) -> assert false
      
and typ_node s : Types.node =
  try SlotTable.find typ_nodes s
  with Not_found ->
    let x = Types.make () in
    SlotTable.add typ_nodes s x;
    Types.define x (typ (descr s));
    x
      
let rec pat d : Patterns.descr =
  if IdSet.is_empty (fv_descr d)
  then Patterns.constr (typ d)
  else pat_aux d
    
    
and pat_aux = function
  | IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
  | IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
  | IDiff (s1,s2) when IdSet.is_empty (fv_descr s2) ->
      let s2 = Types.neg (typ s2) in
      Patterns.cap (pat s1) (Patterns.constr s2)
  | IDiff _ ->
      raise (Patterns.Error "Difference not allowed in patterns")
  | ITimes (s1,s2) -> Patterns.times (pat_node s1) (pat_node s2)
  | IXml (s1,s2) -> Patterns.xml (pat_node s1) (pat_node s2)
  | IOptional _ -> 
      raise (Patterns.Error "Optional field not allowed in record patterns")
  | IRecord (o,r) ->
      let pats = ref [] in
      let aux l s = 
	if IdSet.is_empty (fv_slot s) then typ_node s
	else
	  ( pats := Patterns.record l (pat_node s) :: !pats;
	    Types.any_node )
      in
      let constr = Types.record' (o,LabelMap.mapi aux r) in
      List.fold_left Patterns.cap (Patterns.constr constr) !pats
	(* TODO: can avoid constr when o=true, and all fields have fv *)
  | ICapture x -> Patterns.capture x
  | IConstant (x,c) -> Patterns.constant x c
  | IArrow _ ->
      raise (Patterns.Error "Arrow not allowed in patterns")
  | IType _ -> assert false
      
and pat_node s : Patterns.node =
  try SlotTable.find pat_nodes s
  with Not_found ->
    let x = Patterns.make (fv_slot s) in
    SlotTable.add pat_nodes s x;
    Patterns.define x (pat (descr s));
    x
      
let glb = State.ref "Typer.glb_env" TypeEnv.empty
535

536
let register_global_types b =
537
538
539
540
541
  List.iter 
    (fun (v,p) ->
       if TypeEnv.mem v !glb
       then raise_loc_generic p.loc ("Multiple definition for type " ^ v)
    ) b;
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
  let old_glb = !glb in
  try
    glb := derecurs_def !glb b;
    let b = List.map (fun (v,p) -> (v,p,compile (derecurs !glb p))) b in
    flush_defs ();
    flush_fv ();
    let b = 
      List.map 
	(fun (v,p,s) -> 
	   if not (IdSet.is_empty (fv_descr s)) then
	     raise_loc_generic p.loc 
	       "Capture variables are not allowed in types";
	   let t = typ s in
	   if (p.loc <> noloc) && (Types.is_empty t) then
	     warning p.loc 
	       ("This definition yields an empty type for " ^ v);
	   (v,t)) b in
    List.iter (fun (v,t) -> Types.Print.register_global v t) b
  with e ->
    glb := old_glb;
    raise e
563
564
565

let dump_global_types ppf =
  TypeEnv.iter (fun v _ -> Format.fprintf ppf " %s" v) !glb
566
567
568

let do_typ loc r = 
  let s = compile_slot r in
569
570
571
  flush_defs ();
  flush_fv ();
  if IdSet.is_empty (fv_slot s) then typ_node s
572
573
574
575
  else raise_loc_generic loc "Capture variables are not allowed in types"
   
let typ p =
  do_typ p.loc (derecurs !glb p)
576
577
    
let pat p = 
578
  let s = compile_slot (derecurs !glb p) in
579
580
581
582
583
  flush_defs ();
  flush_fv ();
  try pat_node s
  with Patterns.Error e -> raise_loc_generic p.loc e
    | Location (loc,exn) when loc = noloc -> raise (Location (p.loc, exn))
584
585


586
587
(* II. Build skeleton *)

588
module Fv = IdSet
589

590
591
592
type branch = Branch of Typed.branch * branch list

let cur_branch : branch list ref = ref []
593

594
let exp loc fv e =
595
596
  fv,
  { Typed.exp_loc = loc;
597
    Typed.exp_typ = Types.empty;
598
    Typed.exp_descr = e;
599
  }
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657


let rec expr loc = function
  | LocatedExpr (loc,e) -> expr loc e
  | Forget (e,t) ->
      let (fv,e) = expr loc e and t = typ t in
      exp loc fv (Typed.Forget (e,t))
  | Var s -> 
      exp loc (Fv.singleton s) (Typed.Var s)
  | Apply (e1,e2) -> 
      let (fv1,e1) = expr loc e1 and (fv2,e2) = expr loc e2 in
      exp loc (Fv.cup fv1 fv2) (Typed.Apply (e1,e2))
  | Abstraction a ->
      let iface = List.map (fun (t1,t2) -> (typ t1, typ t2)) 
		    a.fun_iface in
      let t = List.fold_left 
		(fun accu (t1,t2) -> Types.cap accu (Types.arrow t1 t2)) 
		Types.any iface in
      let iface = List.map 
		    (fun (t1,t2) -> (Types.descr t1, Types.descr t2)) 
		    iface in
      let (fv0,body) = branches a.fun_body in
      let fv = match a.fun_name with
	| None -> fv0
	| Some f -> Fv.remove f fv0 in
      let e = Typed.Abstraction 
		{ Typed.fun_name = a.fun_name;
		  Typed.fun_iface = iface;
		  Typed.fun_body = body;
		  Typed.fun_typ = t;
		  Typed.fun_fv = fv
		} in
      exp loc fv e
  | Cst c -> 
      exp loc Fv.empty (Typed.Cst c)
  | Pair (e1,e2) ->
      let (fv1,e1) = expr loc e1 and (fv2,e2) = expr loc e2 in
      exp loc (Fv.cup fv1 fv2) (Typed.Pair (e1,e2))
  | Xml (e1,e2) ->
      let (fv1,e1) = expr loc e1 and (fv2,e2) = expr loc e2 in
      exp loc (Fv.cup fv1 fv2) (Typed.Xml (e1,e2))
  | Dot (e,l) ->
      let (fv,e) = expr loc e in
      exp loc fv (Typed.Dot (e,l))
  | RemoveField (e,l) ->
      let (fv,e) = expr loc e in
      exp loc fv (Typed.RemoveField (e,l))
  | RecordLitt r -> 
      let fv = ref Fv.empty in
      let r = LabelMap.map 
		(fun e -> 
		   let (fv2,e) = expr loc e 
		   in fv := Fv.cup !fv fv2; e)
		r in
      exp loc !fv (Typed.RecordLitt r)
  | Op (op,le) ->
      let (fvs,ltes) = List.split (List.map (expr loc) le) in
      let fv = List.fold_left Fv.cup Fv.empty fvs in
658
659
660
661
662
663
664
      (try
	 (match (ltes,Typed.find_op op) with
	    | [e], `Unary op -> exp loc fv (Typed.UnaryOp (op, e))
	    | [e1;e2], `Binary op -> exp loc fv (Typed.BinaryOp (op, e1,e2))
	    | _ -> assert false)
       with Not_found -> assert false)

665
666
667
668
  | Match (e,b) -> 
      let (fv1,e) = expr loc e
      and (fv2,b) = branches b in
      exp loc (Fv.cup fv1 fv2) (Typed.Match (e, b))
669
670
671
672
673
  | Map (e,b) ->
      let (fv1,e) = expr loc e
      and (fv2,b) = branches b in
      exp loc (Fv.cup fv1 fv2) (Typed.Map (e, b))
  | Transform (e,b) ->
674
675
      let (fv1,e) = expr loc e
      and (fv2,b) = branches b in
676
      exp loc (Fv.cup fv1 fv2) (Typed.Transform (e, b))
677
  | Xtrans (e,b) ->
678
679
      let (fv1,e) = expr loc e
      and (fv2,b) = branches b in
680
      exp loc (Fv.cup fv1 fv2) (Typed.Xtrans (e, b))
681
682
683
  | Validate (e,schema,elt) ->
      let (fv,e) = expr loc e in
      exp loc fv (Typed.Validate (e, schema, elt))
684
685
686
687
688
  | Try (e,b) ->
      let (fv1,e) = expr loc e
      and (fv2,b) = branches b in
      exp loc (Fv.cup fv1 fv2) (Typed.Try (e, b))

689
	      
690
  and branches b = 
691
    let fv = ref Fv.empty in
692
    let accept = ref Types.empty in
693
    let branch (p,e) = 
694
695
      let cur_br = !cur_branch in
      cur_branch := [];
696
697
      let (fv2,e) = expr noloc e in
      let br_loc = merge_loc p.loc e.Typed.exp_loc in
698
699
700
701
702
703
704
705
706
707
      let p = pat p in
      let fv2 = Fv.diff fv2 (Patterns.fv p) in
      fv := Fv.cup !fv fv2;
      accept := Types.cup !accept (Types.descr (Patterns.accept p));
      let br = 
	{ 
	  Typed.br_loc = br_loc;
	  Typed.br_used = br_loc = noloc;
	  Typed.br_pat = p;
	  Typed.br_body = e } in
708
      cur_branch := Branch (br, !cur_branch) :: cur_br;
709
710
      br in
    let b = List.map branch b in
711
712
713
714
    (!fv, 
     { 
       Typed.br_typ = Types.empty; 
       Typed.br_branches = b; 
715
716
       Typed.br_accept = !accept;
       Typed.br_compiled = None;
717
718
     } 
    )
719

720
721
let expr = expr noloc

722
723
724
let let_decl p e =
  let (_,e) = expr e in
  { Typed.let_pat = pat p;
725
726
727
728
729
730
    Typed.let_body = e;
    Typed.let_compiled = None }

(* III. Type-checks *)

type env = Types.descr Env.t
731
732
733

open Typed

734
735
let require loc t s = 
  if not (Types.subtype t s) then raise_loc loc (Constraint (t, s))
736

737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
let check loc t s = 
  require loc t s; t

let should_have loc constr s =
  raise_loc loc (ShouldHave (constr,s))

let flatten loc arg constr precise =
  let constr' = Sequence.star 
		  (Sequence.approx (Types.cap Sequence.any constr)) in
  let sconstr' = Sequence.star constr' in
  let exact = Types.subtype constr' constr in
  if exact then
    let t = arg sconstr' precise in
    if precise then Sequence.flatten t else constr
  else
    let t = arg sconstr' true in
    Sequence.flatten t
754

755
756
let rec type_check env e constr precise = 
  let d = type_check' e.exp_loc env e.exp_descr constr precise in
757
  let d = if precise then d else constr in
758
759
760
  e.exp_typ <- Types.cup e.exp_typ d;
  d

761
and type_check' loc env e constr precise = match e with
762
763
764
  | Forget (e,t) ->
      let t = Types.descr t in
      ignore (type_check env e t false);
765
766
      check loc t constr

767
  | Abstraction a ->
768
769
770
      let t =
	try Types.Arrow.check_strenghten a.fun_typ constr 
	with Not_found -> 
771
772
	  should_have loc constr
	    "but the interface of the abstraction is not compatible"
773
      in
774
775
776
      let env = match a.fun_name with
	| None -> env
	| Some f -> Env.add f a.fun_typ env in
777
778
      List.iter 
	(fun (t1,t2) ->
779
780
781
	   let acc = a.fun_body.br_accept in 
	   if not (Types.subtype t1 acc) then
	     raise_loc loc (NonExhaustive (Types.diff t1 acc));
782
	   ignore (type_check_branches loc env t1 a.fun_body t2 false)
783
784
	) a.fun_iface;
      t
785

786
787
  | Match (e,b) ->
      let t = type_check env e b.br_accept true in
788
      type_check_branches loc env t b constr precise
789
790
791

  | Try (e,b) ->
      let te = type_check env e constr precise in
792
      let tb = type_check_branches loc env Types.any b constr precise in
793
      Types.cup te tb
794

795
796
  | Pair (e1,e2) ->
      type_check_pair loc env e1 e2 constr precise
797

798
799
  | Xml (e1,e2) ->
      type_check_pair ~kind:`XML loc env e1 e2 constr precise
800

801
  | RecordLitt r ->
802
803
804
805
806
807
808
809
      type_record loc env r constr precise

  | Map (e,b) ->
      type_map loc env false e b constr precise

  | Transform (e,b) ->
      flatten loc (type_map loc env true e b) constr precise

810
811
812
813
  | Apply (e1,e2) ->
      let t1 = type_check env e1 Types.Arrow.any true in
      let t1 = Types.Arrow.get t1 in
      let dom = Types.Arrow.domain t1 in
814
815
816
817
818
819
820
      let res =
	if Types.Arrow.need_arg t1 then
	  let t2 = type_check env e2 dom true in
	  Types.Arrow.apply t1 t2
	else
	  (ignore (type_check env e2 dom false); Types.Arrow.apply_noarg t1)
      in
821
822
823
      check loc res constr

  | UnaryOp (o,e) ->
824
825
      let t = o.un_op_typer loc 
		(type_check env e) constr precise in
826
827
828
      check loc t constr

  | BinaryOp (o,e1,e2) ->
829
830
831
      let t = o.bin_op_typer loc 
		(type_check env e1) 
		(type_check env e2) constr precise in
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
      check loc t constr

  | Var s -> 
      let t = 
	try Env.find s env
	with Not_found -> raise_loc loc (UnboundId s) in
      check loc t constr
      
  | Cst c -> 
      check loc (Types.constant c) constr

  | Dot (e,l) ->
      let t = type_check env e Types.Record.any true in
      let t = 
        try (Types.Record.project t l) 
        with Not_found -> raise_loc loc (WrongLabel(t,l))
      in
      check loc t constr

  | RemoveField (e,l) ->
      let t = type_check env e Types.Record.any true in
      let t = Types.Record.remove_field t l in
      check loc t constr

  | Xtrans (e,b) ->
      let t = type_check env e Sequence.any true in
      let t = 
	Sequence.map_tree 
	  (fun t ->
	     let resid = Types.diff t b.br_accept in
	     let res = type_check_branches loc env t b Sequence.any true in
	     (res,resid)
	  ) t in
      check loc t constr

867
868
869
870
  | Validate (e, schema_name, elt_name) ->
      ignore (type_check env e Types.any false);
      let t = fst (Hashtbl.find !schema_elements (schema_name, elt_name)) in
      check loc t constr
871

872
and type_check_pair ?(kind=`Normal) loc env e1 e2 constr precise =
873
  let rects = Types.Product.normal ~kind constr in
874
875
  if Types.Product.is_empty rects then 
    (match kind with
876
877
      | `Normal -> should_have loc constr "but it is a pair"
      | `XML -> should_have loc constr "but it is an XML element");
878
  let need_s = Types.Product.need_second rects in
879
880
881
882
883
  let t1 = type_check env e1 (Types.Product.pi1 rects) (precise || need_s) in
  let c2 = Types.Product.constraint_on_2 rects t1 in
  if Types.is_empty c2 then 
    raise_loc loc (ShouldHave2 (constr,"but the first component has type",t1));
  let t2 = type_check env e2 c2 precise in
884

885
  if precise then 
886
887
888
    match kind with
      | `Normal -> Types.times (Types.cons t1) (Types.cons t2)
      | `XML -> Types.xml (Types.cons t1) (Types.cons t2)
889
890
891
  else
    constr

892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
and type_record loc env r constr precise =
(* try to get rid of precise = true for values of fields *)
(* also: the use equivalent of need_second to optimize... *)
  if not (Types.Record.has_record constr) then
    should_have loc constr "but it is a record";
  let (rconstr,res) = 
    List.fold_left
      (fun (rconstr,res) (l,e) ->
	 (* could compute (split l e) once... *)
	 let pi = Types.Record.project_opt rconstr l in
	 if Types.is_empty pi then 
	   (let l = U.to_string (LabelPool.value l) in
	    should_have loc constr
	      (Printf.sprintf "Field %s is not allowed here." l));
	 let t = type_check env e pi true in
	 let rconstr = Types.Record.condition rconstr l t in
	 let res = (l,Types.cons t) :: res in
	 (rconstr,res)
      ) (constr, []) (LabelMap.get r)
  in
  if not (Types.Record.has_empty_record rconstr) then
    should_have loc constr "More fields should be present";
  let t = 
    Types.record' (false, LabelMap.from_list (fun _ _ -> assert false) res)
  in
  check loc t constr
918

919

920
and type_check_branches loc env targ brs constr precise =
921
  if Types.is_empty targ then Types.empty
922
923
  else (
    brs.br_typ <- Types.cup brs.br_typ targ;
924
    branches_aux loc env targ 
925
926
      (if precise then Types.empty else constr) 
      constr precise brs.br_branches
927
  )
928
    
929
and branches_aux loc env targ tres constr precise = function
930
  | [] -> tres
931
932
933
934
935
936
  | b :: rem ->
      let p = b.br_pat in
      let acc = Types.descr (Patterns.accept p) in

      let targ' = Types.cap targ acc in
      if Types.is_empty targ' 
937
      then branches_aux loc env targ tres constr precise rem
938
939
940
941
942
943
      else 
	( b.br_used <- true;
	  let res = Patterns.filter targ' p in
	  let env' = List.fold_left 
		       (fun env (x,t) -> Env.add x (Types.descr t) env) 
		       env res in
944
945
	  let t = type_check env' b.br_body constr precise in
	  let tres = if precise then Types.cup t tres else tres in
946
947
	  let targ'' = Types.diff targ acc in
	  if (Types.non_empty targ'') then 
948
	    branches_aux loc env targ'' tres constr precise rem 
949
950
	  else
	    tres
951
	)
952

953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
and type_map loc env def e b constr precise = 
  let acc = if def then Sequence.any else Sequence.star b.br_accept in
  let t = type_check env e acc true in

  let constr' = Sequence.approx (Types.cap Sequence.any constr) in
  let exact = Types.subtype (Sequence.star constr') constr in
  (* Note: 
     - could be more precise by integrating the decomposition
     of constr inside Sequence.map.
  *)
  let res = 
    Sequence.map 
      (fun t ->
	 let res = 
	   type_check_branches loc env t b constr' (precise || (not exact)) in
	 if def && not (Types.subtype t b.br_accept) 
	 then Types.cup res Sequence.nil_type
	 else res)
      t in
  if exact then res else check loc res constr

974
975
976
977
978
979
980
981
982
and type_let_decl env l =
  let acc = Types.descr (Patterns.accept l.let_pat) in
  let t = type_check env l.let_body acc true in
  let res = Patterns.filter t l.let_pat in
  List.map (fun (x,t) -> (x, Types.descr t)) res

and type_rec_funs env l =
  let types = 
    List.fold_left
983
984
985
986
987
      (fun accu -> function  
	 | { exp_descr=Abstraction { fun_typ = t; fun_name = Some f } } ->
	     (f,t) :: accu
	 | _ -> assert false
      ) [] l
988
989
  in
  let env' = List.fold_left (fun env (x,t) -> Env.add x t env) env types in
990
  List.iter (fun e -> ignore (type_check env' e Types.any false)) l;
991
992
  types

993
994

let rec unused_branches b =
995
  List.iter
996
997
998
999
1000
1001
1002
1003
1004
1005
    (fun (Branch (br,s)) -> 
       if not br.br_used 
       then warning br.br_loc "This branch is not used"
       else unused_branches s
    )
    b

let report_unused_branches () =
  unused_branches !cur_branch;
  cur_branch := []
1006

1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
  (* Schema stuff from now on ... *)

let debug = true ;;

  (** convertion from XML Schema types (including global elements and
  attributes) to CDuce Types.descr *)
module Schema_converter =
  struct

    open Printf ;;
    open Schema_types ;;

    (* auxiliary functions *)

      (* build a regexp Elem from a Types.descr *)
1022
    let mk_re_elt descr = PElem descr
1023
1024
1025
1026

    (* conversion functions *)

    let cd_type_of_simple_type = function
1027
      | SBuilt_in name -> PType (Schema_builtin.cd_type_of_builtin name)
1028
1029
1030
      | SUser_defined (_, _, _, _) -> assert false (* TODO *)
    ;;

1031
1032
    let complex_memo = Hashtbl.create 213

1033
1034
    let rec regexp_of_term = function
      | All _ -> assert false
1035
      | Choice [] -> PEpsilon
1036
1037
      | Choice (hd :: tl) ->
          List.fold_left
1038
            (fun acc particle -> PAlt (acc, regexp_of_particle particle))
1039
            (regexp_of_particle hd) tl
1040
      | Sequence [] -> PEpsilon
1041
1042
      | Sequence (hd :: tl) ->
          List.fold_left
1043
            (fun acc particle -> PSeq (acc, regexp_of_particle particle))
1044
1045
1046
1047
            (regexp_of_particle hd) tl
      | Elt decl -> mk_re_elt (cd_type_of_elt_decl !decl)

    and regexp_of_content_type = function
1048
      | CT_empty -> PEpsilon
1049
1050
1051
1052
1053
1054
1055
1056
1057
      | CT_simple st -> mk_re_elt (cd_type_of_simple_type st)
      | CT_model (particle, mixed) ->
          assert (not mixed); (* TODO mixed support *)
          regexp_of_particle particle

    and regexp_of_particle =
        (* given a regexp re and a (non negative) integer n create a regexp
        matching exactly n times re *)
      let rec repeat_regexp re = function
1058
1059
        | 0 -> PEpsilon
        | n when n > 0 -> PSeq (re, repeat_regexp re (n - 1))
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
        | _ -> assert false
      in
      fun (min, max, term) ->
        let term_regexp = regexp_of_term term in
        let min_regexp = repeat_regexp term_regexp min in
        match max with
        | Some max ->
            assert (max >= min);
            let rec aux acc = function
              | 0 -> acc
              | n ->
                  aux
1072
                    (PAlt (PEpsilon, (PSeq (term_regexp, acc))))
1073
1074
                    (n - 1)
            in
1075
1076
            PSeq (min_regexp, aux PEpsilon (max - min))
        | None -> PSeq (min_regexp, PStar term_regexp)
1077
1078
1079
1080
1081

      (** @return a pair composed by a type for the attributes (a record) and a
      type for the content model (a sequence) *)
    and cd_type_of_complex_type' = function
      | CBuilt_in name -> assert false
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
      | CUser_defined (id, name, _, _, attr_uses, content) ->
	  try PAlias (Hashtbl.find complex_memo id)
	  with Not_found -> 
	    let slot = mk_slot noloc in
	    Hashtbl.add complex_memo id slot;
            let content_re = regexp_of_content_type content in
            let content_ast_node = PRegexp (content_re, PType Sequence.nil_type) in
	    slot.pdescr <- Some 
	      (PTimes (cd_type_of_attr_uses attr_uses, content_ast_node));
	    PAlias slot
	    
1093
1094
1095

      (** @return a closed record *)
    and cd_type_of_attr_uses attr_uses =
1096
1097
1098
1099
1100
1101
1102
1103
      let fields = 
	List.map 
	  (fun (required, (name, st, _), _) ->
	     let r = cd_type_of_simple_type !st in
	     let r = if required then r else POptional r in
	     (LabelPool.mk (U.mk name), r)
	  ) attr_uses in
      PRecord (false, LabelMap.from_list_disj fields)
1104
1105

    and cd_type_of_elt_decl (name, typ, _) =
1106
1107
1108
1109
1110
1111
1112
1113
      let atom_type = PType (Types.atom (Atoms.atom (Atoms.mk (U.mk name)))) in
      let content = match !typ with
	| S st -> PTimes (PType Types.empty_closed_record, cd_type_of_simple_type st)
	| C ct -> cd_type_of_complex_type' ct
      in
      PXml (atom_type, content)

    let typ r = Types.descr (do_typ noloc r)
1114
1115
1116

    let cd_type_of_complex_type = function
      | CBuilt_in name -> Schema_builtin.cd_type_of_builtin name
1117
      | ct -> typ (PXml (PType Types.any, cd_type_of_complex_type' ct))
1118
1119

    let cd_type_of_type_def = function
1120
      | S st -> typ (cd_type_of_simple_type st)
1121
1122
1123
      | C ct -> cd_type_of_complex_type ct
    ;;

1124
1125
1126
    let cd_type_of_elt_decl x =
      typ (cd_type_of_elt_decl x)

1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
  end
;;

let get_schema_validator (schema_name, elt_name) =
  snd (Hashtbl.find !schema_elements (schema_name, elt_name))
;;

let register_schema schema_name schema =
  if StringSet.mem schema_name !schemas then
    failwith ("Redefinition of schema " ^ schema_name)
  else begin
    schemas := StringSet.add schema_name !schemas;
    List.iter (* Schema types -> CDuce types *)
      (fun type_def ->
        let cd_type = Schema_converter.cd_type_of_type_def type_def in
        Hashtbl.add !schema_types
          (schema_name, Schema_types.name_of_type_def type_def)
          cd_type)
      schema.Schema_types.type_defs;
              (* Schema attributes -> CDuce types TODO *)
    List.iter (* Schema elements -> CDuce types * validators *)
      (fun elt_decl ->
        let cd_type = Schema_converter.cd_type_of_elt_decl elt_decl in
        if debug then
          (Types.Print.print Format.std_formatter cd_type;
          Format.fprintf Format.std_formatter "\n";
          Format.pp_print_flush Format.std_formatter ());
        let validator = Schema_validator.validator_of_elt_decl elt_decl in
        Hashtbl.add !schema_elements
          (schema_name, Schema_types.name_of_elt_decl elt_decl)
          (cd_type, validator))
      schema.Schema_types.elt_decls
  end
;;

(* DEBUGGING ONLY *)

let get_schema_type x = fst (Hashtbl.find !schema_elements x) ;;