typer.ml 39.2 KB
Newer Older
1
(* TODO:
2
 - rewrite type-checking of operators to propagate constraint
3
4
 - optimize computation of pattern free variables
 - check whether it is worth using recursive hash-consing internally
5
6
*)

7

8
9
let warning loc msg =
  Format.fprintf !Location.warning_ppf "Warning %a:@\n%a%s@\n" 
10
11
    Location.print_loc (loc,`Full)
    Location.html_hilight (loc,`Full)
12
13
    msg

14
15
16
17




18
19
(* I. Transform the abstract syntax of types and patterns into
      the internal form *)
20
21
22

open Location
open Ast
23
open Ident
24

25
module TypeEnv = Map.Make(U)
26

27
exception NonExhaustive of Types.descr
28
exception Constraint of Types.descr * Types.descr
29
exception ShouldHave of Types.descr * string
30
exception ShouldHave2 of Types.descr * string * Types.descr
31
exception WrongLabel of Types.descr * label
32
exception UnboundId of id
33
exception Error of string
34

35
36
let raise_loc loc exn = raise (Location (loc,`Full,exn))
let raise_loc_str loc ofs exn = raise (Location (loc,`Char ofs,exn))
37
let error loc msg = raise_loc loc (Error msg)
38

39
40
41
  (* Schema datastructures *)

module StringSet = Set.Make (String)
42
43
44

  (* just to remember imported schemas *)
let schemas = State.ref "Typer.schemas" StringSet.empty
45
46
47

let schema_types = State.ref "Typer.schema_types" (Hashtbl.create 51)
let schema_elements = State.ref "Typer.schema_elements" (Hashtbl.create 51)
48
let schema_attributes = State.ref "Typer.schema_attributes" (Hashtbl.create 51)
49

50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
(* Eliminate Recursion, propagate Sequence Capture Variables *)

let rec seq_vars accu = function
  | Epsilon | Elem _ -> accu
  | Seq (r1,r2) | Alt (r1,r2) -> seq_vars (seq_vars accu r1) r2
  | Star r | WeakStar r -> seq_vars accu r
  | SeqCapture (v,r) -> seq_vars (IdSet.add v accu) r

type derecurs_slot = {
  ploc : Location.loc;
  pid  : int;
  mutable ploop : bool;
  mutable pdescr : derecurs option
} and derecurs =
  | PAlias of derecurs_slot
  | PType of Types.descr
  | POr of derecurs * derecurs
  | PAnd of derecurs * derecurs
  | PDiff of derecurs * derecurs
  | PTimes of derecurs * derecurs
  | PXml of derecurs * derecurs
  | PArrow of derecurs * derecurs
  | POptional of derecurs
  | PRecord of bool * derecurs label_map
  | PCapture of id
  | PConstant of id * Types.const
  | PRegexp of derecurs_regexp * derecurs
and derecurs_regexp =
  | PEpsilon
  | PElem of derecurs
  | PSeq of derecurs_regexp * derecurs_regexp
  | PAlt of derecurs_regexp * derecurs_regexp
  | PStar of derecurs_regexp
  | PWeakStar of derecurs_regexp

85
86
type tenv = {
  tenv_names : derecurs_slot TypeEnv.t;
87
  tenv_nspref: Ns.table;
88
89
  tenv_loc   : Location.loc
}
90
let get_ns_table tenv = tenv.tenv_nspref
91

92
let rec hash_derecurs = function
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
  | PAlias s -> 
      s.pid
  | PType t -> 
      1 + 17 * (Types.hash_descr t)
  | POr (p1,p2) -> 
      2 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PAnd (p1,p2) -> 
      3 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PDiff (p1,p2) -> 
      4 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PTimes (p1,p2) -> 
      5 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PXml (p1,p2) -> 
      6 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PArrow (p1,p2) -> 
      7 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | POptional p -> 
      8 + 17 * (hash_derecurs p)
  | PRecord (o,r) -> 
      (if o then 9 else 10) + 17 * (LabelMap.hash hash_derecurs r)
  | PCapture x -> 
      11 + 17 * (Id.hash x)
  | PConstant (x,c) -> 
      12 + 17 * (Id.hash x) + 257 * (Types.hash_const c)
  | PRegexp (p,q) -> 
      13 + 17 * (hash_derecurs_regexp p) + 257 * (hash_derecurs q)
119
and hash_derecurs_regexp = function
120
121
122
123
124
125
126
127
128
129
130
131
  | PEpsilon -> 
      1
  | PElem p -> 
      2 + 17 * (hash_derecurs p)
  | PSeq (p1,p2) -> 
      3 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PAlt (p1,p2) -> 
      4 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PStar p -> 
      5 + 17 * (hash_derecurs_regexp p)
  | PWeakStar p -> 
      6 + 17 * (hash_derecurs_regexp p)
132
133

let rec equal_derecurs p1 p2 = (p1 == p2) || match p1,p2 with
134
135
136
137
  | PAlias s1, PAlias s2 -> 
      s1 == s2
  | PType t1, PType t2 -> 
      Types.equal_descr t1 t2
138
139
140
141
142
  | POr (p1,q1), POr (p2,q2)
  | PAnd (p1,q1), PAnd (p2,q2)
  | PDiff (p1,q1), PDiff (p2,q2)
  | PTimes (p1,q1), PTimes (p2,q2)
  | PXml (p1,q1), PXml (p2,q2)
143
144
145
146
147
148
149
150
151
152
153
154
  | PArrow (p1,q1), PArrow (p2,q2) -> 
      (equal_derecurs p1 p2) && (equal_derecurs q1 q2)
  | POptional p1, POptional p2 -> 
      equal_derecurs p1 p2
  | PRecord (o1,r1), PRecord (o2,r2) -> 
      (o1 == o2) && (LabelMap.equal equal_derecurs r1 r2)
  | PCapture x1, PCapture x2 -> 
      Id.equal x1 x2
  | PConstant (x1,c1), PConstant (x2,c2) -> 
      (Id.equal x1 x2) && (Types.equal_const c1 c2)
  | PRegexp (p1,q1), PRegexp (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
155
156
  | _ -> false
and equal_derecurs_regexp r1 r2 = match r1,r2 with
157
158
159
160
  | PEpsilon, PEpsilon -> 
      true
  | PElem p1, PElem p2 -> 
      equal_derecurs p1 p2
161
  | PSeq (p1,q1), PSeq (p2,q2) 
162
163
  | PAlt (p1,q1), PAlt (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs_regexp q1 q2)
164
  | PStar p1, PStar p2
165
166
  | PWeakStar p1, PWeakStar p2 -> 
      equal_derecurs_regexp p1 p2
167
  | _ -> false
168

169
170
171
172
173
174
175
176
177
178
179
module DerecursTable = Hashtbl.Make(
  struct 
    type t = derecurs 
    let hash = hash_derecurs
    let equal = equal_derecurs
  end
)

module RE = Hashtbl.Make(
  struct 
    type t = derecurs_regexp * derecurs 
180
181
182
183
    let hash (p,q) = 
      (hash_derecurs_regexp p) + 17 * (hash_derecurs q)
    let equal (p1,q1) (p2,q2) = 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs q1 q2)
184
185
  end
)
186

187
188
189
190
191
  
let counter = State.ref "Typer.counter - derecurs" 0
let mk_slot loc = 
  incr counter; 
  { ploop = false; ploc = loc; pid = !counter; pdescr = None }
192

193
(*
194
195
196
197
198
let ns_from_prefix env loc ns =
  try TypeEnv.find ns env.tenv_nspref
  with Not_found -> 
    raise_loc_generic loc 
      ("Undefined namespace prefix " ^ (U.to_string ns))
199
*)
200
  
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
let parse_atom env loc t =
  try 
    let (ns,l) = Ns.map_tag env.tenv_nspref t in
    Atoms.mk ns l
  with Ns.UnknownPrefix ns ->
    raise_loc_generic loc 
    ("Undefined namespace prefix " ^ (U.to_string ns))

let parse_ns env loc ns =
  try Ns.map_prefix env.tenv_nspref ns
  with Ns.UnknownPrefix ns ->
    raise_loc_generic loc 
    ("Undefined namespace prefix " ^ (U.to_string ns))
 

216
217
let const env loc = function
  | Const_internal c -> c
218
  | Const_atom t -> Types.Atom (parse_atom env loc t)
219

220
221
let rec derecurs env p = match p.descr with
  | PatVar v ->
222
      (try PAlias (TypeEnv.find v env.tenv_names)
223
       with Not_found -> 
224
	 raise_loc_generic p.loc ("Undefined type/pattern " ^ (U.to_string v)))
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
  | SchemaVar (kind, schema, item) ->
      let try_elt () = fst (Hashtbl.find !schema_elements (schema, item)) in
      let try_typ () = Hashtbl.find !schema_types (schema, item) in
      let try_att () = Hashtbl.find !schema_attributes (schema, item) in
      (match kind with
      | `Element ->
          (try
            PType (try_elt ())
          with Not_found ->
            failwith (Printf.sprintf
              "No element named '%s' found in schema '%s'" item schema))
      | `Type ->
          (try
            PType (try_typ ())
          with Not_found ->
            failwith (Printf.sprintf
              "No type named '%s' found in schema '%s'" item schema))
      | `Attribute ->
          (try
            PType (try_att ())
          with Not_found ->
            failwith (Printf.sprintf
              "No attribute named '%s' found in schema '%s'" item schema))
      | `Any ->
          PType
            (try try_elt () with Not_found ->
              (try try_typ () with Not_found ->
                (try try_att () with Not_found ->
                  failwith (Printf.sprintf
                    "No item named '%s' found in schema '%s'" item schema)))))
255
256
  | Recurs (p,b) -> derecurs (derecurs_def env b) p
  | Internal t -> PType t
257
258
  | AtomT t -> PType (Types.atom (Atoms.atom (parse_atom env p.loc t)))
  | NsT ns -> PType (Types.atom (Atoms.any_in_ns (parse_ns env p.loc ns)))
259
260
261
262
263
264
265
266
267
  | Or (p1,p2) -> POr (derecurs env p1, derecurs env p2)
  | And (p1,p2) -> PAnd (derecurs env p1, derecurs env p2)
  | Diff (p1,p2) -> PDiff (derecurs env p1, derecurs env p2)
  | Prod (p1,p2) -> PTimes (derecurs env p1, derecurs env p2)
  | XmlT (p1,p2) -> PXml (derecurs env p1, derecurs env p2)
  | Arrow (p1,p2) -> PArrow (derecurs env p1, derecurs env p2)
  | Optional p -> POptional (derecurs env p)
  | Record (o,r) -> PRecord (o, LabelMap.map (derecurs env) r)
  | Capture x -> PCapture x
268
  | Constant (x,c) -> PConstant (x,const env p.loc c)
269
  | Regexp (r,q) -> 
270
271
      let constant_nil t v = 
	PAnd (t, PConstant (v, Types.Atom Sequence.nil_atom)) in
272
273
274
275
276
      let vars = seq_vars IdSet.empty r in
      let q = IdSet.fold constant_nil (derecurs env q) vars in
      let r = derecurs_regexp (fun p -> p) env r in
      PRegexp (r, q)
and derecurs_regexp vars env = function
277
278
279
280
281
282
283
284
285
286
287
288
289
290
  | Epsilon -> 
      PEpsilon
  | Elem p -> 
      PElem (vars (derecurs env p))
  | Seq (p1,p2) -> 
      PSeq (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Alt (p1,p2) -> 
      PAlt (derecurs_regexp vars env p1, derecurs_regexp vars env p2)
  | Star p -> 
      PStar (derecurs_regexp vars env p)
  | WeakStar p -> 
      PWeakStar (derecurs_regexp vars env p)
  | SeqCapture (x,p) -> 
      derecurs_regexp (fun p -> PAnd (vars p, PCapture x)) env p
291
292
293
294


and derecurs_def env b =
  let b = List.map (fun (v,p) -> (v,p,mk_slot p.loc)) b in
295
296
297
  let n = 
    List.fold_left (fun env (v,p,s) -> TypeEnv.add v s env) env.tenv_names b in
  let env = { env with tenv_names = n } in
298
299
  List.iter (fun (v,p,s) -> s.pdescr <- Some (derecurs env p)) b;
  env
300

301
(* Stratification and recursive hash-consing *)
302
303
304
305
306
307
308
309
310

type descr = 
  | IType of Types.descr
  | IOr of descr * descr
  | IAnd of descr * descr
  | IDiff of descr * descr
  | ITimes of slot * slot
  | IXml of slot * slot
  | IArrow of slot * slot
311
  | IOptional of descr
312
313
314
315
316
317
318
319
  | IRecord of bool * slot label_map
  | ICapture of id
  | IConstant of id * Types.const
and slot = {
  mutable fv : fv option;
  mutable hash : int option;
  mutable rank1: int; mutable rank2: int;
  mutable gen1 : int; mutable gen2: int;
320
  mutable d    : descr option
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
}
    
let descr s = 
  match s.d with
    | Some d -> d
    | None -> assert false
	
let gen = ref 0
let rank = ref 0
	     
let rec hash_descr = function
  | IType x -> Types.hash_descr x
  | IOr (d1,d2) -> 1 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IAnd (d1,d2) -> 2 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IDiff (d1,d2) -> 3 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IOptional d -> 4 + 17 * (hash_descr d)
  | ITimes (s1,s2) -> 5 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IXml (s1,s2) -> 6 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IArrow (s1,s2) -> 7 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IRecord (o,r) -> (if o then 8 else 9) + 17 * (LabelMap.hash hash_slot r)
  | ICapture x -> 10 + 17 * (Id.hash x)
  | IConstant (x,y) -> 11 + 17 * (Id.hash x) + 257 * (Types.hash_const y)
and hash_slot s =
  if s.gen1 = !gen then 13 * s.rank1
  else (
    incr rank;
    s.rank1 <- !rank; s.gen1 <- !gen;
    hash_descr (descr s)
  )
    
let rec equal_descr d1 d2 = 
  match (d1,d2) with
  | IType x1, IType x2 -> Types.equal_descr x1 x2
  | IOr (x1,y1), IOr (x2,y2) 
  | IAnd (x1,y1), IAnd (x2,y2) 
  | IDiff (x1,y1), IDiff (x2,y2) -> (equal_descr x1 x2) && (equal_descr y1 y2)
  | IOptional x1, IOptional x2 -> equal_descr x1 x2
  | ITimes (x1,y1), ITimes (x2,y2) 
  | IXml (x1,y1), IXml (x2,y2) 
  | IArrow (x1,y1), IArrow (x2,y2) -> (equal_slot x1 x2) && (equal_slot y1 y2)
361
362
  | IRecord (o1,r1), IRecord (o2,r2) -> 
      (o1 = o2) && (LabelMap.equal equal_slot r1 r2)
363
  | ICapture x1, ICapture x2 -> Id.equal x1 x2
364
365
  | IConstant (x1,y1), IConstant (x2,y2) -> 
      (Id.equal x1 x2) && (Types.equal_const y1 y2)
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
  | _ -> false
and equal_slot s1 s2 =
  ((s1.gen1 = !gen) && (s2.gen2 = !gen) && (s1.rank1 = s2.rank2))
  ||
  ((s1.gen1 <> !gen) && (s2.gen2 <> !gen) && (
     incr rank;
     s1.rank1 <- !rank; s1.gen1 <- !gen;
     s2.rank2 <- !rank; s2.gen2 <- !gen;
     equal_descr (descr s1) (descr s2)
   ))
  
module Arg = struct
  type t = slot
      
  let hash s =
    match s.hash with
      | Some h -> h
      | None ->
	  incr gen; rank := 0; 
	  let h = hash_slot s in
	  s.hash <- Some h;
	  h
	    
389
390
391
392
  let equal s1 s2 = 
    (s1 == s2) || 
    (incr gen; rank := 0; 
     let e = equal_slot s1 s2 in
393
(*     if e then Printf.eprintf "Recursive hash-consig: Equal\n";  *)
394
     e)
395
end
396
397
398
399
400
401
402
403
404
module SlotTable = Hashtbl.Make(Arg)
  
let rec fv_slot s =
  match s.fv with
    | Some x -> x
    | None ->
	if s.gen1 = !gen then IdSet.empty 
	else (s.gen1 <- !gen; fv_descr (descr s))
and fv_descr = function
405
  | IType _ -> IdSet.empty
406
407
408
409
410
411
412
  | IOr (d1,d2)
  | IAnd (d1,d2)  
  | IDiff (d1,d2) -> IdSet.cup (fv_descr d1) (fv_descr d2)
  | IOptional d -> fv_descr d
  | ITimes (s1,s2)  
  | IXml (s1,s2)  
  | IArrow (s1,s2) -> IdSet.cup (fv_slot s1) (fv_slot s2)
413
414
  | IRecord (o,r) -> 
      List.fold_left IdSet.cup IdSet.empty (LabelMap.map_to_list fv_slot r)
415
  | ICapture x | IConstant (x,_) -> IdSet.singleton x
416

417
418
419
420
421
422
423
424
425
      
let compute_fv s =
  match s.fv with
    | Some x -> ()
    | None ->
	incr gen;
	let x = fv_slot s in
	s.fv <- Some x
	  
426
427

let todo_fv = ref []
428
429
430
431
432
433
434
435
	  
let mk () =   
  let s = 
    { d = None;
      fv = None;
      hash = None;
      rank1 = 0; rank2 = 0;
      gen1 = 0; gen2 = 0 } in
436
  todo_fv := s :: !todo_fv;
437
  s
438
439
440
441

let flush_fv () =
  List.iter compute_fv !todo_fv;
  todo_fv := []
442
    
443
444
445
let compile_slot_hash = DerecursTable.create 67
let compile_hash = DerecursTable.create 67

446
let defs = ref []
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480

let rec compile p =
  try DerecursTable.find compile_hash p
  with Not_found ->
    let c = real_compile p in
    DerecursTable.replace compile_hash p c;
    c
and real_compile = function
  | PAlias v ->
      if v.ploop then
	raise_loc_generic v.ploc ("Unguarded recursion on type/pattern");
      v.ploop <- true;
      let r = match v.pdescr with Some x -> compile x | _ -> assert false in
      v.ploop <- false;
      r
  | PType t -> IType t
  | POr (t1,t2) -> IOr (compile t1, compile t2)
  | PAnd (t1,t2) -> IAnd (compile t1, compile t2)
  | PDiff (t1,t2) -> IDiff (compile t1, compile t2)
  | PTimes (t1,t2) -> ITimes (compile_slot t1, compile_slot t2)
  | PXml (t1,t2) -> IXml (compile_slot t1, compile_slot t2)
  | PArrow (t1,t2) -> IArrow (compile_slot t1, compile_slot t2)
  | POptional t -> IOptional (compile t)
  | PRecord (o,r) ->  IRecord (o, LabelMap.map compile_slot r)
  | PConstant (x,v) -> IConstant (x,v)
  | PCapture x -> ICapture x
  | PRegexp (r,q) -> compile_regexp r q
and compile_regexp r q =
  let memo = RE.create 17 in
  let rec aux accu r q =
    if RE.mem memo (r,q) then accu
    else (
      RE.add memo (r,q) ();
      match r with
481
482
483
484
	| PEpsilon -> 
	    (match q with 
	       | PRegexp (r,q) -> aux accu r q 
	       | _ -> (compile q) :: accu)
485
486
487
488
489
490
491
492
493
494
495
496
497
	| PElem p -> ITimes (compile_slot p, compile_slot q) :: accu
	| PSeq (r1,r2) -> aux accu r1 (PRegexp (r2,q))
	| PAlt (r1,r2) -> aux (aux accu r1 q) r2 q
	| PStar r1 -> aux (aux accu r1 (PRegexp (r,q))) PEpsilon q
	| PWeakStar r1 -> aux (aux accu PEpsilon q) r1 (PRegexp (r,q))
    )
  in
  let accu = aux [] r q in
  match accu with
    | [] -> assert false
    | p::l -> List.fold_left (fun acc p -> IOr (p,acc)) p l
and compile_slot p =
  try DerecursTable.find compile_slot_hash p
498
499
  with Not_found ->
    let s = mk () in
500
501
    defs := (s,p) :: !defs;
    DerecursTable.add compile_slot_hash p s;
502
    s
503

504
505
506
507
      
let rec flush_defs () = 
  match !defs with
    | [] -> ()
508
    | (s,p)::t -> defs := t; s.d <- Some (compile p); flush_defs ()
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
	
let typ_nodes = SlotTable.create 67
let pat_nodes = SlotTable.create 67
		  
let rec typ = function
  | IType t -> t
  | IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
  | IAnd (s1,s2) ->  Types.cap (typ s1) (typ s2)
  | IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
  | ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
  | IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
  | IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
  | IOptional s -> Types.Record.or_absent (typ s)
  | IRecord (o,r) -> Types.record' (o, LabelMap.map typ_node r)
  | ICapture x | IConstant (x,_) -> assert false
      
and typ_node s : Types.node =
  try SlotTable.find typ_nodes s
  with Not_found ->
    let x = Types.make () in
    SlotTable.add typ_nodes s x;
    Types.define x (typ (descr s));
    x
      
let rec pat d : Patterns.descr =
  if IdSet.is_empty (fv_descr d)
  then Patterns.constr (typ d)
  else pat_aux d
    
    
and pat_aux = function
  | IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
  | IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
  | IDiff (s1,s2) when IdSet.is_empty (fv_descr s2) ->
      let s2 = Types.neg (typ s2) in
      Patterns.cap (pat s1) (Patterns.constr s2)
  | IDiff _ ->
      raise (Patterns.Error "Difference not allowed in patterns")
  | ITimes (s1,s2) -> Patterns.times (pat_node s1) (pat_node s2)
  | IXml (s1,s2) -> Patterns.xml (pat_node s1) (pat_node s2)
  | IOptional _ -> 
      raise (Patterns.Error "Optional field not allowed in record patterns")
  | IRecord (o,r) ->
      let pats = ref [] in
      let aux l s = 
	if IdSet.is_empty (fv_slot s) then typ_node s
	else
	  ( pats := Patterns.record l (pat_node s) :: !pats;
	    Types.any_node )
      in
      let constr = Types.record' (o,LabelMap.mapi aux r) in
      List.fold_left Patterns.cap (Patterns.constr constr) !pats
	(* TODO: can avoid constr when o=true, and all fields have fv *)
  | ICapture x -> Patterns.capture x
  | IConstant (x,c) -> Patterns.constant x c
  | IArrow _ ->
      raise (Patterns.Error "Arrow not allowed in patterns")
  | IType _ -> assert false
      
and pat_node s : Patterns.node =
  try SlotTable.find pat_nodes s
  with Not_found ->
    let x = Patterns.make (fv_slot s) in
    SlotTable.add pat_nodes s x;
    Patterns.define x (pat (descr s));
    x
575

576
let register_global_types glb b =
577
578
  List.iter 
    (fun (v,p) ->
579
580
       if TypeEnv.mem v glb.tenv_names
       then raise_loc_generic p.loc ("Multiple definition for type " ^ (U.to_string v))
581
    ) b;
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
  let glb = derecurs_def glb b in
  let b = List.map (fun (v,p) -> (v,p,compile (derecurs glb p))) b in
  flush_defs ();
  flush_fv ();
  let b = 
    List.map 
      (fun (v,p,s) -> 
	 if not (IdSet.is_empty (fv_descr s)) then
	   raise_loc_generic p.loc 
	     "Capture variables are not allowed in types";
	 let t = typ s in
	 if (p.loc <> noloc) && (Types.is_empty t) then
	   warning p.loc 
	     ("This definition yields an empty type for " ^ (U.to_string v));
	 (v,t)) b in
  List.iter (fun (v,t) -> Types.Print.register_global v t) b;
  glb

let register_ns_prefix glb p ns =
601
  { glb with tenv_nspref = Ns.add_prefix p ns glb.tenv_nspref }
602
603
604

let dump_global_types ppf glb =
  TypeEnv.iter (fun v _ -> Format.fprintf ppf " %a" U.print v) glb.tenv_names
605
606
607

let do_typ loc r = 
  let s = compile_slot r in
608
609
610
  flush_defs ();
  flush_fv ();
  if IdSet.is_empty (fv_slot s) then typ_node s
611
612
  else raise_loc_generic loc "Capture variables are not allowed in types"
   
613
614
let typ glb p =
  do_typ p.loc (derecurs glb p)
615
    
616
617
let pat glb p = 
  let s = compile_slot (derecurs glb p) in
618
619
620
621
  flush_defs ();
  flush_fv ();
  try pat_node s
  with Patterns.Error e -> raise_loc_generic p.loc e
622
    | Location (loc,_,exn) when loc = noloc -> raise (Location (p.loc, `Full, exn))
623
624


625
626
(* II. Build skeleton *)

627
628
629
630
631
632
633
634

type op = [ `Unary of tenv -> Typed.unary_op | `Binary of tenv -> Typed.binary_op ]
let op_table : (string,op) Hashtbl.t = Hashtbl.create 31
let register_unary_op s f = Hashtbl.add op_table s (`Unary f)
let register_binary_op s f = Hashtbl.add op_table s (`Binary f)
let find_op s = Hashtbl.find op_table s


635
module Fv = IdSet
636

637
638
639
type branch = Branch of Typed.branch * branch list

let cur_branch : branch list ref = ref []
640

641
let exp loc fv e =
642
643
  fv,
  { Typed.exp_loc = loc;
644
    Typed.exp_typ = Types.empty;
645
    Typed.exp_descr = e;
646
  }
647
648


649
650
let rec expr glb loc = function
  | LocatedExpr (loc,e) -> expr glb loc e
651
  | Forget (e,t) ->
652
      let (fv,e) = expr glb loc e and t = typ glb t in
653
654
655
656
      exp loc fv (Typed.Forget (e,t))
  | Var s -> 
      exp loc (Fv.singleton s) (Typed.Var s)
  | Apply (e1,e2) -> 
657
      let (fv1,e1) = expr glb loc e1 and (fv2,e2) = expr glb loc e2 in
658
659
      exp loc (Fv.cup fv1 fv2) (Typed.Apply (e1,e2))
  | Abstraction a ->
660
      let iface = List.map (fun (t1,t2) -> (typ glb t1, typ glb t2)) 
661
662
663
664
665
666
667
		    a.fun_iface in
      let t = List.fold_left 
		(fun accu (t1,t2) -> Types.cap accu (Types.arrow t1 t2)) 
		Types.any iface in
      let iface = List.map 
		    (fun (t1,t2) -> (Types.descr t1, Types.descr t2)) 
		    iface in
668
      let (fv0,body) = branches glb a.fun_body in
669
670
671
672
673
674
675
676
677
678
679
680
      let fv = match a.fun_name with
	| None -> fv0
	| Some f -> Fv.remove f fv0 in
      let e = Typed.Abstraction 
		{ Typed.fun_name = a.fun_name;
		  Typed.fun_iface = iface;
		  Typed.fun_body = body;
		  Typed.fun_typ = t;
		  Typed.fun_fv = fv
		} in
      exp loc fv e
  | Cst c -> 
681
      exp loc Fv.empty (Typed.Cst (const glb loc c))
682
  | Pair (e1,e2) ->
683
      let (fv1,e1) = expr glb loc e1 and (fv2,e2) = expr glb loc e2 in
684
685
      exp loc (Fv.cup fv1 fv2) (Typed.Pair (e1,e2))
  | Xml (e1,e2) ->
686
      let (fv1,e1) = expr glb loc e1 and (fv2,e2) = expr glb loc e2 in
687
688
      exp loc (Fv.cup fv1 fv2) (Typed.Xml (e1,e2))
  | Dot (e,l) ->
689
      let (fv,e) = expr glb loc e in
690
691
      exp loc fv (Typed.Dot (e,l))
  | RemoveField (e,l) ->
692
      let (fv,e) = expr glb loc e in
693
694
695
696
697
      exp loc fv (Typed.RemoveField (e,l))
  | RecordLitt r -> 
      let fv = ref Fv.empty in
      let r = LabelMap.map 
		(fun e -> 
698
		   let (fv2,e) = expr glb loc e 
699
700
701
		   in fv := Fv.cup !fv fv2; e)
		r in
      exp loc !fv (Typed.RecordLitt r)
702
  | String (i,j,s,e) ->
703
      let (fv,e) = expr glb loc e in
704
      exp loc fv (Typed.String (i,j,s,e))
705
  | Op (op,le) ->
706
      let (fvs,ltes) = List.split (List.map (expr glb loc) le) in
707
      let fv = List.fold_left Fv.cup Fv.empty fvs in
708
      (try
709
710
711
	 (match (ltes,find_op op) with
	    | [e], `Unary op -> exp loc fv (Typed.UnaryOp (op glb, e))
	    | [e1;e2], `Binary op -> exp loc fv (Typed.BinaryOp (op glb, e1,e2))
712
713
714
	    | _ -> assert false)
       with Not_found -> assert false)

715
  | Match (e,b) -> 
716
717
      let (fv1,e) = expr glb loc e
      and (fv2,b) = branches glb b in
718
      exp loc (Fv.cup fv1 fv2) (Typed.Match (e, b))
719
  | Map (e,b) ->
720
721
      let (fv1,e) = expr glb loc e
      and (fv2,b) = branches glb b in
722
723
      exp loc (Fv.cup fv1 fv2) (Typed.Map (e, b))
  | Transform (e,b) ->
724
725
      let (fv1,e) = expr glb loc e
      and (fv2,b) = branches glb b in
726
      exp loc (Fv.cup fv1 fv2) (Typed.Transform (e, b))
727
  | Xtrans (e,b) ->
728
729
      let (fv1,e) = expr glb loc e
      and (fv2,b) = branches glb b in
730
      exp loc (Fv.cup fv1 fv2) (Typed.Xtrans (e, b))
731
  | Validate (e,schema,elt) ->
732
      let (fv,e) = expr glb loc e in
733
      exp loc fv (Typed.Validate (e, schema, elt))
734
  | Try (e,b) ->
735
736
      let (fv1,e) = expr glb loc e
      and (fv2,b) = branches glb b in
737
      exp loc (Fv.cup fv1 fv2) (Typed.Try (e, b))
738
739
740
  | NamespaceIn (pr,ns,e) ->
      let glb = register_ns_prefix glb pr ns in
      expr glb loc e
741

742
	      
743
  and branches glb b = 
744
    let fv = ref Fv.empty in
745
    let accept = ref Types.empty in
746
    let branch (p,e) = 
747
748
      let cur_br = !cur_branch in
      cur_branch := [];
749
      let (fv2,e) = expr glb noloc e in
750
      let br_loc = merge_loc p.loc e.Typed.exp_loc in
751
      let p = pat glb p in
752
753
754
755
756
757
758
759
760
      let fv2 = Fv.diff fv2 (Patterns.fv p) in
      fv := Fv.cup !fv fv2;
      accept := Types.cup !accept (Types.descr (Patterns.accept p));
      let br = 
	{ 
	  Typed.br_loc = br_loc;
	  Typed.br_used = br_loc = noloc;
	  Typed.br_pat = p;
	  Typed.br_body = e } in
761
      cur_branch := Branch (br, !cur_branch) :: cur_br;
762
763
      br in
    let b = List.map branch b in
764
765
766
767
    (!fv, 
     { 
       Typed.br_typ = Types.empty; 
       Typed.br_branches = b; 
768
769
       Typed.br_accept = !accept;
       Typed.br_compiled = None;
770
771
     } 
    )
772

773
let expr glb = expr glb noloc
774

775
776
777
let let_decl glb p e =
  let (_,e) = expr glb e in
  { Typed.let_pat = pat glb p;
778
779
780
    Typed.let_body = e;
    Typed.let_compiled = None }

781
782
783
784
785

(* Hide global "typing/parsing" environment *)

let glb = State.ref "Typer.glb_env" 
	    { tenv_names = TypeEnv.empty;
786
	      tenv_nspref = Ns.empty_table;
787
788
789
790
791
792
793
794
795
796
797
798
	      tenv_loc = noloc }

let pat p = pat !glb p
let typ t = typ !glb t
let expr e = expr !glb e
let let_decl p e = let_decl !glb p e

let register_global_types l = glb := register_global_types !glb l
let dump_global_types ppf = dump_global_types ppf !glb

let register_ns_prefix p ns = glb := register_ns_prefix !glb p ns

799
800
801
(* III. Type-checks *)

type env = Types.descr Env.t
802
803
804

open Typed

805
806
let require loc t s = 
  if not (Types.subtype t s) then raise_loc loc (Constraint (t, s))
807

808
809
810
let check loc t s = 
  require loc t s; t

811
812
813
814
815
let check_str loc ofs t s = 
  if not (Types.subtype t s) then raise_loc_str loc ofs (Constraint (t, s));
  t

let should_have loc constr s = 
816
817
  raise_loc loc (ShouldHave (constr,s))

818
819
820
let should_have_str loc ofs constr s = 
  raise_loc_str loc ofs (ShouldHave (constr,s))

821
822
823
824
825
826
827
828
829
830
831
let flatten loc arg constr precise =
  let constr' = Sequence.star 
		  (Sequence.approx (Types.cap Sequence.any constr)) in
  let sconstr' = Sequence.star constr' in
  let exact = Types.subtype constr' constr in
  if exact then
    let t = arg sconstr' precise in
    if precise then Sequence.flatten t else constr
  else
    let t = arg sconstr' true in
    Sequence.flatten t
832

833
834
let rec type_check env e constr precise = 
  let d = type_check' e.exp_loc env e.exp_descr constr precise in
835
  let d = if precise then d else constr in
836
837
838
  e.exp_typ <- Types.cup e.exp_typ d;
  d

839
and type_check' loc env e constr precise = match e with
840
841
842
  | Forget (e,t) ->
      let t = Types.descr t in
      ignore (type_check env e t false);
843
844
      check loc t constr

845
  | Abstraction a ->
846
847
848
      let t =
	try Types.Arrow.check_strenghten a.fun_typ constr 
	with Not_found -> 
849
850
	  should_have loc constr
	    "but the interface of the abstraction is not compatible"
851
      in
852
853
854
      let env = match a.fun_name with
	| None -> env
	| Some f -> Env.add f a.fun_typ env in
855
856
      List.iter 
	(fun (t1,t2) ->
857
858
859
	   let acc = a.fun_body.br_accept in 
	   if not (Types.subtype t1 acc) then
	     raise_loc loc (NonExhaustive (Types.diff t1 acc));
860
	   ignore (type_check_branches loc env t1 a.fun_body t2 false)
861
862
	) a.fun_iface;
      t
863

864
865
  | Match (e,b) ->
      let t = type_check env e b.br_accept true in
866
      type_check_branches loc env t b constr precise
867
868
869

  | Try (e,b) ->
      let te = type_check env e constr precise in
870
      let tb = type_check_branches loc env Types.any b constr precise in
871
      Types.cup te tb
872

873
874
  | Pair (e1,e2) ->
      type_check_pair loc env e1 e2 constr precise
875

876
877
  | Xml (e1,e2) ->
      type_check_pair ~kind:`XML loc env e1 e2 constr precise
878

879
  | RecordLitt r ->
880
881
882
883
884
885
886
887
      type_record loc env r constr precise

  | Map (e,b) ->
      type_map loc env false e b constr precise

  | Transform (e,b) ->
      flatten loc (type_map loc env true e b) constr precise

888
889
890
891
  | Apply (e1,e2) ->
      let t1 = type_check env e1 Types.Arrow.any true in
      let t1 = Types.Arrow.get t1 in
      let dom = Types.Arrow.domain t1 in
892
893
894
895
896
897
898
      let res =
	if Types.Arrow.need_arg t1 then
	  let t2 = type_check env e2 dom true in
	  Types.Arrow.apply t1 t2
	else
	  (ignore (type_check env e2 dom false); Types.Arrow.apply_noarg t1)
      in
899
900
901
      check loc res constr

  | UnaryOp (o,e) ->
902
903
      let t = o.un_op_typer loc 
		(type_check env e) constr precise in
904
905
906
      check loc t constr

  | BinaryOp (o,e1,e2) ->
907
908
909
      let t = o.bin_op_typer loc 
		(type_check env e1) 
		(type_check env e2) constr precise in
910
911
912
913
914
915
916
917
918
919
920
      check loc t constr

  | Var s -> 
      let t = 
	try Env.find s env
	with Not_found -> raise_loc loc (UnboundId s) in
      check loc t constr
      
  | Cst c -> 
      check loc (Types.constant c) constr

921
922
923
  | String (i,j,s,e) ->
      type_check_string loc env 0 s i j e constr precise

924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
  | Dot (e,l) ->
      let t = type_check env e Types.Record.any true in
      let t = 
        try (Types.Record.project t l) 
        with Not_found -> raise_loc loc (WrongLabel(t,l))
      in
      check loc t constr

  | RemoveField (e,l) ->
      let t = type_check env e Types.Record.any true in
      let t = Types.Record.remove_field t l in
      check loc t constr

  | Xtrans (e,b) ->
      let t = type_check env e Sequence.any true in
      let t = 
	Sequence.map_tree 
	  (fun t ->
	     let resid = Types.diff t b.br_accept in
	     let res = type_check_branches loc env t b Sequence.any true in
	     (res,resid)
	  ) t in
      check loc t constr

948
949
950
951
  | Validate (e, schema_name, elt_name) ->
      ignore (type_check env e Types.any false);
      let t = fst (Hashtbl.find !schema_elements (schema_name, elt_name)) in
      check loc t constr
952

953
and type_check_pair ?(kind=`Normal) loc env e1 e2 constr precise =
954
  let rects = Types.Product.normal ~kind constr in
955
956
  if Types.Product.is_empty rects then 
    (match kind with
957
958
      | `Normal -> should_have loc constr "but it is a pair"
      | `XML -> should_have loc constr "but it is an XML element");
959
  let need_s = Types.Product.need_second rects in
960
961
962
963
964
  let t1 = type_check env e1 (Types.Product.pi1 rects) (precise || need_s) in
  let c2 = Types.Product.constraint_on_2 rects t1 in
  if Types.is_empty c2 then 
    raise_loc loc (ShouldHave2 (constr,"but the first component has type",t1));
  let t2 = type_check env e2 c2 precise in
965

966
  if precise then 
967
968
969
    match kind with
      | `Normal -> Types.times (Types.cons t1) (Types.cons t2)
      | `XML -> Types.xml (Types.cons t1) (Types.cons t2)
970
971
972
  else
    constr

973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
and type_check_string loc env ofs s i j e constr precise =
  if U.equal_index i j then type_check env e constr precise
  else
    let rects = Types.Product.normal constr in
    if Types.Product.is_empty rects 
    then should_have_str loc ofs constr "but it is a string"
    else
      let need_s = Types.Product.need_second rects in
      let (ch,i') = U.next s i in
      let ch = Chars.mk_int ch in
      let tch = Types.constant (Types.Char ch) in
      let t1 = check_str loc ofs tch (Types.Product.pi1 rects) in
      let c2 = Types.Product.constraint_on_2 rects t1 in
      let t2 = type_check_string loc env (ofs + 1) s i' j e c2 precise in
      if precise then Types.times (Types.cons t1) (Types.cons t2)
      else constr

990
991
992
993
994
995
996
997
998
999
1000
and type_record loc env r constr precise =
(* try to get rid of precise = true for values of fields *)
(* also: the use equivalent of need_second to optimize... *)
  if not (Types.Record.has_record constr) then
    should_have loc constr "but it is a record";
  let (rconstr,res) = 
    List.fold_left
      (fun (rconstr,res) (l,e) ->
	 (* could compute (split l e) once... *)
	 let pi = Types.Record.project_opt rconstr l in
	 if Types.is_empty pi then 
1001
	   (let l = Label.to_string (LabelPool.value l) in
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
	    should_have loc constr
	      (Printf.sprintf "Field %s is not allowed here." l));
	 let t = type_check env e pi true in
	 let rconstr = Types.Record.condition rconstr l t in
	 let res = (l,Types.cons t) :: res in
	 (rconstr,res)
      ) (constr, []) (LabelMap.get r)
  in
  if not (Types.Record.has_empty_record rconstr) then
    should_have loc constr "More fields should be present";
  let t = 
    Types.record' (false, LabelMap.from_list (fun _ _ -> assert false) res)
  in
  check loc t constr
1016

1017

1018
and type_check_branches loc env targ brs constr precise =
1019
  if Types.is_empty targ then Types.empty
1020
1021
  else (
    brs.br_typ <- Types.cup brs.br_typ targ;
1022
    branches_aux loc env targ 
1023
1024
      (if precise then Types.empty else constr) 
      constr precise brs.br_branches
1025
  )
1026
    
1027
and branches_aux loc env targ tres constr precise = function
1028
  | [] -> tres
1029
1030
1031
1032
1033
1034
  | b :: rem ->
      let p = b.br_pat in
      let acc = Types.descr (Patterns.accept p) in

      let targ' = Types.cap targ acc in
      if Types.is_empty targ' 
1035
      then branches_aux loc env targ tres constr precise rem
1036
1037
1038
1039
1040
1041
      else 
	( b.br_used <- true;
	  let res = Patterns.filter targ' p in
	  let env' = List.fold_left 
		       (fun env (x,t) -> Env.add x (Types.descr t) env) 
		       env res in
1042
1043
	  let t = type_check env' b.br_body constr precise in
	  let tres = if precise then Types.cup t tres else tres in
1044
1045
	  let targ'' = Types.diff targ acc in
	  if (Types.non_empty targ'') then 
1046
	    branches_aux loc env targ'' tres constr precise rem 
1047
1048
	  else
	    tres
1049
	)
1050

1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
and type_map loc env def e b constr precise = 
  let acc = if def then Sequence.any else Sequence.star b.br_accept in
  let t = type_check env e acc true in

  let constr' = Sequence.approx (Types.cap Sequence.any constr) in
  let exact = Types.subtype (Sequence.star constr') constr in
  (* Note: 
     - could be more precise by integrating the decomposition
     of constr inside Sequence.map.
  *)
  let res = 
    Sequence.map 
      (fun t ->
	 let res = 
	   type_check_branches loc env t b constr' (precise || (not exact)) in
	 if def && not (Types.subtype t b.br_accept) 
	 then Types.cup res Sequence.nil_type
	 else res)
      t in
  if exact then res else check loc res constr

1072
1073
1074
1075
1076
1077
1078
1079
1080
and type_let_decl env l =
  let acc = Types.descr (Patterns.accept l.let_pat) in
  let t = type_check env l.let_body acc true in
  let res = Patterns.filter t l.let_pat in
  List.map (fun (x,t) -> (x, Types.descr t)) res

and type_rec_funs env l =
  let types = 
    List.fold_left
1081
1082
1083
1084
1085
      (fun accu -> function  
	 | { exp_descr=Abstraction { fun_typ = t; fun_name = Some f } } ->
	     (f,t) :: accu
	 | _ -> assert false
      ) [] l
1086
1087
  in
  let env' = List.fold_left (fun env (x,t) -> Env.add x t env) env types in
1088
  List.iter (fun e -> ignore (type_check env' e Types.any false)) l;
1089
1090
  types

1091
1092

let rec unused_branches b =
1093
  List.iter
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
    (fun (Branch (br,s)) -> 
       if not br.br_used 
       then warning br.br_loc "This branch is not used"
       else unused_branches s
    )
    b

let report_unused_branches () =
  unused_branches !cur_branch;
  cur_branch := []
1104

1105
1106
  (* Schema stuff from now on ... *)

1107
let debug = true
1108
1109
1110
1111
1112
1113

  (** convertion from XML Schema types (including global elements and
  attributes) to CDuce Types.descr *)
module Schema_converter =
  struct

1114
1115
    open Printf
    open Schema_types
1116
1117
1118
1119

    (* auxiliary functions *)

      (* build a regexp Elem from a Types.descr *)
1120
    let mk_re_elt descr = PElem descr
1121
1122
1123
1124

    (* conversion functions *)

    let cd_type_of_simple_type = function
1125
      | SBuilt_in name -> PType (Schema_builtin.cd_type_of_builtin name)
1126
1127
      | SUser_defined (_, _, _, _) -> assert false (* TODO *)

1128
1129
    let complex_memo = Hashtbl.create 213

1130
    let rec regexp_of_term = function
1131
      | All [] | Choice [] | Sequence [] -> PEpsilon
1132
1133
      | Choice (hd :: tl) ->
          List.fold_left
1134
            (fun acc particle -> PAlt (acc, regexp_of_particle particle))
1135
            (regexp_of_particle hd) tl
1136
      | All (hd :: tl) | Sequence (hd :: tl) ->
1137
          List.fold_left
1138
            (fun acc particle -> PSeq (acc, regexp_of_particle particle))
Pietro Abate's avatar