typer.ml 52.5 KB
Newer Older
1
2
3
open Location
open Ast
open Ident
4

5
6
7
8
9
10
let (=) (x:int) y = x = y
let (<=) (x:int) y = x <= y
let (<) (x:int) y = x < y
let (>=) (x:int) y = x >= y
let (>) (x:int) y = x > y

11
let warning loc msg =
12
  let v = Location.get_viewport () in
13
  let ppf = if Html.is_html v then Html.ppf v else Format.err_formatter in
14
15
16
  Format.fprintf ppf "Warning %a:@\n" Location.print_loc (loc,`Full);
  Location.html_hilight (loc,`Full);
  Format.fprintf ppf "%s@." msg
17

18
19
20
21
22
23
24
25
exception NonExhaustive of Types.descr
exception Constraint of Types.descr * Types.descr
exception ShouldHave of Types.descr * string
exception ShouldHave2 of Types.descr * string * Types.descr
exception WrongLabel of Types.descr * label
exception UnboundId of id * bool
exception UnboundExtId of Types.CompUnit.t * id
exception Error of string
26
27
exception Warning of string * Types.t

28
29
30
31
let raise_loc loc exn = raise (Location (loc,`Full,exn))
let raise_loc_str loc ofs exn = raise (Location (loc,`Char ofs,exn))
let error loc msg = raise_loc loc (Error msg)

32
33
type item =
  | Type of Types.t
34
  | Val of Types.t
35

36
37
38
39
40
type ext =
  | ECDuce of Types.CompUnit.t   (* CDuce unit *)
  | EOCaml of string             (* OCaml module *)
  | ESchema of string            (* XML Schema *)

41
42
module UEnv = Map.Make(U)

43
type t = {
44
  ids : item Env.t;
45
  ns: Ns.table;
46
  cu: ext UEnv.t;
47
}
48

49
50
51
52
53
let load_schema = ref (fun _ _ -> assert false)
let from_comp_unit = ref (fun _ -> assert false)
let has_comp_unit = ref (fun _ -> assert false)
let has_ocaml_unit = ref (fun _ -> false)
let has_static_external = ref (fun _ -> assert false)
54

55
let schemas = Hashtbl.create 13
56

57
let type_schema env x uri =
58
59
  if not (Hashtbl.mem schemas uri) then
    Hashtbl.add schemas uri (!load_schema x uri);
60
  { env with cu = UEnv.add x (ESchema uri) env.cu }
61

62
(* TODO: filter out builtin defs ? *)
63
64
65
66
let serialize_item s = function
  | Type t -> Serialize.Put.bits 1 s 0; Types.serialize s t
  | Val t -> Serialize.Put.bits 1 s 1; Types.serialize s t

67
let serialize s env =
68
  Serialize.Put.env Id.serialize serialize_item Env.iter s env.ids;
69
70
71
  Ns.serialize_table s env.ns;

  let schs =
72
73
74
    UEnv.fold 
      (fun name cu accu -> 
	 match cu with ESchema uri -> (name,uri)::accu | _ -> accu) 
75
      env.cu [] in
76
77
  Serialize.Put.list 
    (Serialize.Put.pair U.serialize Serialize.Put.string) s schs
78

79
80
81
82
83
let deserialize_item s = match Serialize.Get.bits 1 s with
  | 0 -> Type (Types.deserialize s)
  | 1 -> Val (Types.deserialize s)
  | _ -> assert false

84
let deserialize s =
85
86
  let ids = 
    Serialize.Get.env Id.deserialize deserialize_item Env.add Env.empty s in
87
  let ns = Ns.deserialize_table s in
88
89
90
91
  let schs = 
    Serialize.Get.list 
      (Serialize.Get.pair U.deserialize Serialize.Get.string) s in
  let env = 
92
    { ids = ids; ns = ns; cu = UEnv.empty } in
93
  List.fold_left (fun env (name,uri) -> type_schema env name uri) env schs
94
95


96
97
let empty_env = {
  ids = Env.empty;
98
  ns = Ns.empty_table;
99
  cu = UEnv.empty;
100
101
}

102
let enter_cu x cu env =
103
  { env with cu = UEnv.add x (ECDuce cu) env.cu }
104

105
let find_cu loc x env =
106
  try UEnv.find x env.cu
107
108
109
110
  with Not_found ->
    if !has_comp_unit x then (ECDuce (Types.CompUnit.mk x))
    else if !has_ocaml_unit x then (EOCaml (U.get_str x))
    else error loc ("Cannot find external unit " ^ (U.to_string x))
111
112


113
let find_schema x env =
114
115
116
117
118
119
  try 
    (match UEnv.find x env.cu with
      | ESchema s -> s 
      | _ -> raise Not_found)
  with Not_found -> 
    raise (Error (Printf.sprintf "%s: no such schema" (U.to_string x)))
120

121
122
123
124
125
126
127
128
let enter_type id t env =
  { env with ids = Env.add id (Type t) env.ids }
let enter_types l env =
  { env with ids = 
      List.fold_left (fun accu (id,t) -> Env.add id (Type t) accu) env.ids l }
let find_type id env =
  match Env.find id env.ids with
    | Type t -> t
129
    | Val _ -> raise Not_found
130

131

132
let enter_value id t env = 
133
  { env with ids = Env.add id (Val t) env.ids }
134
135
let enter_values l env =
  { env with ids = 
136
      List.fold_left (fun accu (id,t) -> Env.add id (Val t) accu) env.ids l }
137
138
139
let enter_values_dummy l env =
  { env with ids = 
      List.fold_left (fun accu id -> Env.add id (Val Types.empty) accu) env.ids l }
140
141
let find_value id env =
  match Env.find id env.ids with
142
    | Val t -> t
143
    | _ -> raise Not_found
144
145
146
let find_value_global loc cu id env =
  try find_value id (!from_comp_unit cu)
  with Not_found -> raise_loc loc (UnboundExtId (cu,id))
147
	
148
149
150
151
152
153
let value_name_ok id env =
  try match Env.find id env.ids with
    | Val t -> true
    | _ -> false
  with Not_found -> true

154
155
156
157
let iter_values env f =
  Env.iter (fun x ->
	      function Val t -> f x t;
		| _ -> ()) env.ids
158

159

160
let register_types cu env =
161
162
163
  Env.iter (fun x t -> match t with
	      | Type t -> Types.Print.register_global cu (Ident.value x) t
	      | _ -> ()) env.ids
164

165

166
(* Namespaces *)
167

168
let set_ns_table_for_printer env = 
169
  Ns.InternalPrinter.set_table env.ns
170

171
let get_ns_table tenv = tenv.ns
172

173
let type_ns env p ns =
174
  { env with ns = Ns.add_prefix p ns env.ns }
175

176
177
178
179
180
let protect_error_ns loc f x =
  try f x
  with Ns.UnknownPrefix ns ->
    raise_loc_generic loc 
    ("Undefined namespace prefix " ^ (U.to_string ns))
181

182
183
184
let qname env loc t = 
  protect_error_ns loc (Ns.map_tag env.ns) t
    
185
186
187
188
189
190
191
192
193
194
let ident env loc t =
  let q = protect_error_ns loc (Ns.map_attr env.ns) t in
  Ident.ident q

let has_value id env =
  try match Env.find (Ident.ident (Ns.map_attr env.ns id)) env.ids with
    | Val t -> true
    | _ -> false
  with Not_found | Ns.UnknownPrefix _ -> false

195
let parse_atom env loc t =
196
  Atoms.V.of_qname (qname env loc t)
197
198
 
let parse_ns env loc ns =
199
  protect_error_ns loc (Ns.map_prefix env.ns) ns
200

201
let parse_label env loc t =
202
  let (ns,l) = protect_error_ns loc (Ns.map_attr env.ns) t in
203
  LabelPool.mk (ns,l)
204

205
206
207
208
209
210
211
212
213
214
215
216
217
let parse_record env loc f r =
  let r = List.map (fun (l,x) -> (parse_label env loc l, f x)) r in
  LabelMap.from_list (fun _ _ -> raise_loc_generic loc "Duplicated record field") r

let rec const env loc = function
  | LocatedExpr (loc,e) -> const env loc e
  | Pair (x,y) -> Types.Pair (const env loc x, const env loc y)
  | Xml (x,y) -> Types.Xml (const env loc x, const env loc y)
  | RecordLitt x -> Types.Record (parse_record env loc (const env loc) x)
  | String (i,j,s,c) -> Types.String (i,j,s,const env loc c)
  | Atom t -> Types.Atom (parse_atom env loc t)
  | Integer i -> Types.Integer i
  | Char c -> Types.Char c
218
  | Const c -> c
219
220
221
222
  | _ -> raise_loc_generic loc "This should be a scalar or structured constant"

(* I. Transform the abstract syntax of types and patterns into
      the internal form *)
223

224

225
226
227
228
let get_schema_names env = 
  UEnv.fold 
    (fun n cu acc -> match cu with ESchema _ -> n :: acc | _ -> acc) env.cu []
    
229
230
let find_schema_component uri name =
  Env.find (Ident.ident name) (Hashtbl.find schemas uri)
231

232
233
234
let get_schema_validator uri name =
  snd (find_schema_component uri name)

235
let find_schema_descr uri (name : Ns.qname) =
236
  try fst (find_schema_component uri name)
237
238
239
  with Not_found ->    
    raise (Error (Printf.sprintf "No component named '%s' found in schema '%s'"
		    (Ns.QName.to_string name) uri))
240
241


242
243
244
245
246
247
let find_type_global loc cu id env =
  match find_cu loc cu env with
    | ECDuce cu -> find_type id (!from_comp_unit cu)
    | EOCaml _ -> error loc "OCaml units don't export types" (* TODO *)
    | ESchema s -> find_schema_descr s (Ident.value id)
	
248

249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
module IType = struct
  type node = {
    mutable desc: desc;
    mutable smallhash: int;  (* Local hash *)
    mutable rechash: int;    (* Global (recursive) hash *)
    mutable sid: int;        (* Sequential id used to compute rechash *)
    mutable t: Types.t option;
    mutable tnode: Types.Node.t option;
    mutable p: Patterns.descr option;
    mutable pnode: Patterns.node option;
    mutable fv: fv option
  } 
  and desc =
    | ILink of node
    | IType of Types.descr * int
    | IOr of node * node
    | IAnd of node * node
    | IDiff of node * node
    | ITimes of node * node
    | IXml of node * node
    | IArrow of node * node
    | IOptional of node
    | IRecord of bool * (node * node option) label_map
    | ICapture of id
    | IConstant of id * Types.const
274
    | IConcat of node * node
275
    | IMerge of node * node
276
277
278
279
280
281
282
283

  let rec node_temp = { 
    desc = ILink node_temp;
    smallhash = 0; rechash = 0; sid = 0;
    t = None; tnode = None; p = None; pnode = None;
    fv = None
  }
			
284
(* Recursive hash-consing *)
285

286
287
288
289
290
291
  let hash_field f = function
    | (p, Some e) -> 1 + 17 * f p + 257 * f e
    | (p, None) -> 2 + 17 * f p

  let rec hash f n = match n.desc with
    | ILink n -> hash f n
292
    | IType (t,h) -> 1 + 17 * h
293
294
295
296
297
298
299
300
301
    | IOr (p1,p2) -> 2 + 17 * f p1 + 257 * f p2
    | IAnd (p1,p2) -> 3 + 17 * f p1 + 257 * f p2
    | IDiff (p1,p2) -> 4 + 17 * f p1 + 257 * f p2
    | ITimes (p1,p2) -> 5 + 17 * f p1 + 257 * f p2
    | IXml (p1,p2) -> 6 + 17 * f p1 + 257 * f p2
    | IArrow (p1,p2) -> 7 + 17 * f p1 + 257 * f p2
    | IOptional p -> 8 + 17 * f p
    | IRecord (o,r)->9+(if o then 17 else 0)+
	257*(LabelMap.hash (hash_field f) r)
302
303
    | ICapture x -> 10 + 17 * (Id.hash x)
    | IConstant (x,c) -> 11 + 17 * (Id.hash x) + 257*(Types.Const.hash c)
304
    | IConcat _ | IMerge _ -> assert false
305

306
307
308
309
310
  let hash0 = hash (fun n -> 1)
  let hash1 = hash hash0
  let hash2 = hash hash1
  let hash3 = hash hash2

311
312
  let smallhash n =
    if n.smallhash !=0 then n.smallhash
313
314
315
316
    else (
      let h = hash2 n in 
      n.smallhash <- h; h
    )
317
318

  let rec repr = function
319
    | { desc = ILink n } as m -> let z = repr n in m.desc <- ILink z; z
320
321
322
323
    | n -> n

  let back = ref []

324
325
326
327
  let rec prot_repr = function
    | { desc = ILink n } -> repr n
    | n -> n

328
329
330
331
332
333
334
335
336
  let link x y = match x,y with
    | { t = None } as x, y 
    | y, ({ t = None } as x) -> back := (x,x.desc) :: !back; x.desc <- ILink y
    | _ -> assert false

  exception Unify

  let rec unify x y =
    if x == y then ()
337
338
339
340
341
    else let x = prot_repr x and y = prot_repr y in if x == y then ()
    else if (smallhash x != smallhash y) then raise Unify 
    else if (x.t != None) && (y.t != None) then raise Unify
      (* x and y have been internalized; if they were equivalent,
	 they would be equal *)
342
    else match x.desc,y.desc with
343
      | IType (tx,_), IType (ty,_) when Types.equal tx ty -> link x y
344
345
346
347
348
      | IOr (x1,x2), IOr (y1,y2)
      | IAnd (x1,x2), IAnd (y1,y2)
      | IDiff (x1,x2), IDiff (y1,y2)
      | ITimes (x1,x2), ITimes (y1,y2)
      | IXml (x1,x2), IXml (y1,y2)
349
350
      | IArrow (x1,x2), IArrow (y1,y2) -> link x y; unify x1 y1; unify x2 y2
      | IOptional x1, IOptional y1 -> link x y; unify x1 y1
351
352
353
354
355
356
357
358
359
360
361
      | IRecord (xo,xr), IRecord (yo,yr) when xo == yo ->
	  link x y; LabelMap.may_collide unify_field Unify xr yr
      | ICapture xv, ICapture yv when Id.equal xv yv -> ()
      | IConstant (xv,xc), IConstant (yv,yc) when
	  Id.equal xv yv && Types.Const.equal xc yc -> ()
      | _ -> raise Unify
  and unify_field f1 f2 = match f1,f2 with
    | (p1, Some e1), (p2, Some e2) -> unify p1 p2; unify e1 e2
    | (p1, None), (p2, None) -> unify p1 p2
    | _ -> raise Unify

362

363
364
  let may_unify x y =
    try unify x y; back := []; true
365
    with Unify ->
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
      List.iter (fun (x,xd) -> x.desc <- xd) !back; back := []; false

  module SmallHash = Hashtbl.Make(
    struct 
      type t = node
      let equal = may_unify
      let hash = smallhash
    end
  )

  let iter_field f = function
    | (x, Some y) -> f x; f y
    | (x, None) -> f x
  let iter f = function
    | IOr (x,y) | IAnd (x,y) | IDiff (x,y)
    | ITimes (x,y) | IXml (x,y) | IArrow (x,y) -> f x; f y
    | IOptional x -> f x
    | IRecord (_,r) -> LabelMap.iter (iter_field f) r
    | _ -> ()

  let minimize ((mem,add) as h) =
    let rec aux n =
      let n = repr n in
389
390
391
392
      if mem n then () else (
	let n = repr n in add n (); 
	if n.t == None then iter aux n.desc
      )
393
394
395
396
397
398
399
    in aux

  let to_clear = ref []
  let sid = ref 0
  let rec rechash n =
    let n = repr n in
    if (n.sid != 0) then 17 * n.sid
400
    else (incr sid; n.sid <- !sid; to_clear := n :: !to_clear; hash rechash n)
401
402

  let clear () =
403
404
    sid := 0; List.iter (fun x -> x.sid <- 0) !to_clear;
    to_clear := []
405
406
407
408
409
410
411
412
413
414
415
416
417
418

  let rechash n =
    let n = repr n in
    if (n.rechash != 0) then n.rechash 
    else (let h = rechash n in clear (); n.rechash <- h; h)

  module RecHash = Hashtbl.Make(
    struct
      type t = node
      let equal = may_unify
      let hash = smallhash
    end
  )

419
420
421

(** Two-phases recursive hash-consing **)
(*
422
423
424
  let gtable = RecHash.create 17577

  let internalize n =
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
    let local = SmallHash.create 17 in
    minimize (SmallHash.mem local, SmallHash.add local) n; 
    minimize (RecHash.mem gtable, RecHash.add gtable) n;
    ()
*)

(** Single-phase hash-consing **)
  let gtable = SmallHash.create 17

  let internalize n =
    minimize (SmallHash.mem gtable, SmallHash.add gtable) n



(*  let internalize n = () *)
440
441
442
443
444
445
446
447
448

(* Compute free variables *)

  let fv n =
    let fv = ref IdSet.empty in
    let rec aux n =
      let n = repr n in
      if (n.sid = 0) then (
	n.sid <- 1;
449
	to_clear := n :: !to_clear; 
450
451
452
453
454
455
	match n.fv, n.desc with
	  | Some x, _ -> fv := IdSet.cup !fv x
	  | None, (ICapture x | IConstant (x,_)) -> fv := IdSet.add x !fv
	  | None, d -> iter aux d
      )
    in
456
    assert(!to_clear == []);
457
458
459
460
    match n.fv with
      | Some x -> x
      | None -> aux n; clear (); n.fv <- Some !fv; !fv

461
462
463
(* optimized version to check closedness *)

  let no_fv = Some IdSet.empty
464
465
466
  exception FoundFv of id
  let peek_fv n =
    let err x = raise (FoundFv x) in
467
468
469
470
471
472
    let rec aux n =
      let n = repr n in
      if (n.sid = 0) then (
	n.sid <- 1;
	to_clear := n :: !to_clear; 
	match n.fv, n.desc with
473
474
	  | Some x, _ when IdSet.is_empty x -> ()
	  | Some x, _ -> err (IdSet.choose x)
475
476
477
478
	  | None, (ICapture x | IConstant (x,_)) -> err x;
	  | None, d -> iter aux d
      )
    in
479
    assert(!to_clear == []);
480
481
    try
      match n.fv with
482
483
	| Some x when IdSet.is_empty x -> ()
	| Some x -> err (IdSet.choose x)
484
485
486
487
488
	| None -> aux n; 
	    List.iter (fun n -> n.sid <- 0; n.fv <- no_fv) !to_clear;
	    to_clear := []
    with exn -> clear (); raise exn

489
490
491
492
493
494
495
496
497
498
499
  let check_no_fv loc n =
    try peek_fv n 
    with FoundFv x ->
      raise_loc_generic loc 
	("Capture variable not allowed: " ^ (Ident.to_string x))

  let has_no_fv n =
    try peek_fv n; true
    with FoundFv _ -> false


500
(* From the intermediate representation to the internal one *)
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519


  let rec typ n =
    let n = repr n in
    match n.t with
      | Some t -> t
      | None -> let t = compute_typ n.desc in n.t <- Some t; t
  and compute_typ = function
    | IType (t,_) -> t
    | IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
    | IAnd (s1,s2) ->  Types.cap (typ s1) (typ s2)
    | IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
    | ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
    | IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
    | IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
    | IOptional s -> Types.Record.or_absent (typ s)
    | IRecord (o,r) ->  Types.record' (o, LabelMap.map compute_typ_field r)
    | ILink _ -> assert false
    | ICapture _ | IConstant (_,_) -> assert false
520
    | IConcat _ | IMerge _ -> assert false
521
522
523
524
525
526
  and compute_typ_field = function
    | (s, None) -> typ_node s
    | (s, Some _) -> 
	raise (Patterns.Error "Or-else clauses are not allowed in types")

  and typ_node n =
527
    let n = repr n in
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
    match n.tnode with
      | Some t -> t
      | None ->
	  let x = Types.make () in
	  n.tnode <- Some x;
	  Types.define x (typ n);
	  x
      
  let rec pat n =
    let n = repr n in
    if IdSet.is_empty (fv n)
    then Patterns.constr (typ n)
    else match n.p with
      | Some p -> p
      | None -> let p = compute_pat n.desc in n.p <- Some p; p

  and compute_pat = function
    | IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
    | IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
    | IDiff (s1,s2) when IdSet.is_empty (fv s2) ->
	let s2 = Types.neg (typ s2) in
	Patterns.cap (pat s1) (Patterns.constr s2)
    | IDiff _ ->
	raise (Patterns.Error "Differences are not allowed in patterns")
    | ITimes (s1,s2) -> Patterns.times (pat_node s1) (pat_node s2)
    | IXml (s1,s2) -> Patterns.xml (pat_node s1) (pat_node s2)
    | IOptional _ -> 
	raise (Patterns.Error "Optional fields are not allowed in record patterns")
    | IRecord (o,r) ->
	let pats = ref [] in
	let aux l = function
	  | (s,None) ->
	      if IdSet.is_empty (fv s) then typ_node s
	      else
		( pats := Patterns.record l (pat_node s) :: !pats;
		  Types.any_node )
	  | (s,Some e) ->
	      if IdSet.is_empty (fv s) then
		raise (Patterns.Error "Or-else clauses are not allowed in types")
	      else
		( pats := Patterns.cup 
		    (Patterns.record l (pat_node s))
		    (pat e) :: !pats;
		  Types.Record.any_or_absent_node )
	in
	let constr = Types.record' (o,LabelMap.mapi aux r) in
	List.fold_left Patterns.cap (Patterns.constr constr) !pats
	  (* TODO: can avoid constr when o=true, and all fields have fv *)
    | ICapture x -> Patterns.capture x
    | IConstant (x,c) -> Patterns.constant x c
    | IArrow _ ->
	raise (Patterns.Error "Arrows are not allowed in patterns")
580
    | IType _ | ILink _ | IConcat _ | IMerge _ -> assert false
581
582
      
  and pat_node n =
583
    let n = repr n in
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
    match n.pnode with
      | Some p -> p
      | None ->
	  let x = Patterns.make (fv n) in
	  try
	    n.pnode <- Some x;
	    Patterns.define x (pat n);
	    x
	  with exn -> n.pnode <- None; raise exn

(* From AST to the intermediate representation *)

  type penv = {
    penv_tenv : t;
    penv_derec : node Env.t;
  }

  let penv tenv = { penv_tenv = tenv; penv_derec = Env.empty }

603
604
  let concats = ref []

605
606
607
608
609
610
  let mk d = { node_temp with desc = d }
  let mk_delayed () = { node_temp with desc = ILink node_temp }
  let itype t = mk (IType (t, Types.hash t))
  let iempty = itype Types.empty

  let ior p1 p2 =
611
612
    if p1.desc == iempty.desc then p2 
    else if p2.desc == iempty.desc then p1 
613
614
615
    else mk (IOr (p1,p2))

  let iand p1 p2 =
616
    if (p1.desc == iempty.desc) || (p2.desc == iempty.desc) then iempty 
617
618
    else mk (IAnd (p1,p2))

619
620
621
622
623
  let times x y = mk (ITimes (x,y))
  let xml x y = mk (IXml (x,y))
  let record o m = mk (IRecord (o,m))
  let optional x = mk (IOptional x)

624
625
626
  type regexp =
    | PElem of node
    | PGuard of node
627
628
    | PSeq of regexp list
    | PAlt of regexp list
629
630
631
    | PStar of regexp
    | PWeakStar of regexp

632
633
634
635
636
637
638
639
  let rec nullable = function
    | PElem _ -> false
    | PSeq rl -> List.for_all nullable rl
    | PAlt rl -> List.exists nullable rl
    | PStar _ | PWeakStar _ | PGuard _ -> true

  let eps = PSeq []
  let emp = PAlt []
640
641
  let star x = PStar x
  let elem x = PElem x
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657

  let seq r1 r2 =
    let r1 = match r1 with PSeq l -> l | x -> [ x ] in
    let r2 = match r2 with PSeq l -> l | x -> [ x ] in
    match r1 @ r2 with
      | [ x ] -> x
      | l -> PSeq l

  let alt r1 r2 =
    let r1 = match r1 with PAlt l -> l | x -> [ x ] in
    let r2 = match r2 with PAlt l -> l | x -> [ x ] in
    match r1 @ r2 with
      | [ x ] -> x
      | l -> PAlt l

  let rec merge_alt = function
658
    | PElem p::PElem q::l -> merge_alt (PElem (ior p q) :: l)
659
660
    | r::l -> r::(merge_alt l)
    | [] -> []
661
662
663
664
665
666
667
668
669

(* Works only for types, not patterns, because
   [ (x&Int|_) R' ] is possible *)
  let rec simplify_regexp = function
    | PSeq l -> PSeq (List.map simplify_regexp l)
    | PAlt l -> PAlt (merge_alt (List.map simplify_regexp l))
    | PStar r | PWeakStar r -> PStar (simplify_regexp r)
    | x -> x

670
671
672
673
674
675
676
677
678
  let rec print_regexp ppf = function
    | PElem _ -> Format.fprintf ppf "Elem"
    | PGuard _ -> Format.fprintf ppf "Guard"
    | PSeq l -> Format.fprintf ppf "Seq(%a)" print_regexp_list l
    | PAlt l -> Format.fprintf ppf "Alt(%a)" print_regexp_list l
    | PStar r -> Format.fprintf ppf "Star(%a)" print_regexp r
    | PWeakStar r -> Format.fprintf ppf "WStar(%a)" print_regexp r
  and print_regexp_list ppf l =
    List.iter (fun x -> Format.fprintf ppf "%a;" print_regexp x) l
679

680
681
  let rec remove_regexp r q = 
    match r with
682
683
684
685
    | PElem p ->
	mk (ITimes (p, q))
    | PGuard p ->
	iand p q
686
687
688
689
    | PSeq l ->
	List.fold_right (fun r a -> remove_regexp r a) l q
    | PAlt rl ->
	List.fold_left (fun a r -> ior a (remove_regexp r q)) iempty rl
690
691
692
    | PStar r ->
	let x = mk_delayed () in
	let res = ior x q in
693
	x.desc <- ILink (remove_regexp_nullable r res iempty);
694
695
696
697
	res
    | PWeakStar r ->
	let x = mk_delayed () in
	let res = ior q x in
698
	x.desc <- ILink (remove_regexp_nullable r res iempty);
699
	res
700
701
702
703
704

  and remove_regexp_nullable r q_nonempty q_empty =
    if nullable r then remove_regexp2 r q_nonempty q_empty
    else remove_regexp r q_nonempty

705
  and remove_regexp2 r q_nonempty q_empty =
706
707
    (* Assume r is nullable *)
    if q_nonempty == q_empty then remove_regexp r q_nonempty
708
    else match r with
709
      | PSeq [] ->
710
711
          q_empty
      | PElem p ->
712
	  assert false
713
714
      | PGuard p ->
	  iand p q_empty
715
716
717
718
719
720
721
722
      | PSeq (r::rl) ->
          remove_regexp2 r
            (remove_regexp (PSeq rl) q_nonempty)
            (remove_regexp2 (PSeq rl) q_nonempty q_empty)
      | PAlt rl ->
	  List.fold_left 
	    (fun a r -> ior a (remove_regexp_nullable r q_nonempty q_empty))
	    iempty rl
723
724
      | PStar r ->
 	  let x = mk_delayed () in
725
          x.desc <- ILink (remove_regexp_nullable r (ior x q_nonempty) iempty);
726
727
728
          ior x q_empty
      | PWeakStar r ->
 	  let x = mk_delayed () in
729
          x.desc <- ILink (remove_regexp_nullable r (ior q_nonempty x) iempty);
730
731
732
          ior q_empty x


733
734
735
736
737
738
739
740
741
742
743
744
745
  let pcdata = star (PElem (itype (Types.char Chars.any)))
  let mix regexp =
    let rec aux = function
      | PSeq [] -> eps
      | PElem re -> PElem re
      | PGuard re -> assert false
      | PSeq (r::rl) -> seq (aux r) (seq pcdata (aux (PSeq rl)))
      | PAlt rl -> PAlt (List.map aux rl)
      | PStar re -> star (seq pcdata (aux re))
      | PWeakStar re -> assert false
    in
    seq pcdata (seq (aux regexp) pcdata)

746
747
748
749
750
751
  let cst_nil = Types.Atom Sequence.nil_atom
  let capture_all vars p = 
    IdSet.fold (fun p x -> iand p (mk (ICapture x))) p vars
  let termin b vars p = 
    if b then p 
    else IdSet.fold 
752
      (fun p x -> seq p (PGuard (mk (IConstant (x,cst_nil))))) p vars
753
754
755

  let rexp r = remove_regexp r (itype Sequence.nil_type)

756
757
  let all_delayed = ref []

758
759
  let clean_on_err () = all_delayed := []

760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
  let delayed loc =
    let s = mk_delayed () in
    all_delayed := (loc,s) :: !all_delayed;
    s

  let check_one_delayed (loc,p) =
    let rec aux q = if p == q then raise Exit; aux2 q.desc
    and aux2 = function
      | IOr (q1,q2) | IAnd (q1,q2) | IDiff (q1,q2) -> aux q1; aux q2
      | ILink q -> aux q
      | _ -> ()
    in
    try aux2 p.desc
    with Exit -> error loc "Ill-formed recursion"
    
  let check_delayed () =
    let l = !all_delayed in
    all_delayed := []; 
    List.iter check_one_delayed l
779

780
    
781
  let rec derecurs env p = match p.descr with
782
    | PatVar (cu,v) -> derecurs_var env p.loc cu v
783
    | Recurs (p,b) -> derecurs (fst (derecurs_def env b)) p
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
    | Internal t -> itype t
    | NsT ns -> 
	itype (Types.atom (Atoms.any_in_ns (parse_ns env.penv_tenv p.loc ns)))
    | Or (p1,p2) -> mk (IOr (derecurs env p1, derecurs env p2))
    | And (p1,p2) -> mk (IAnd (derecurs env p1, derecurs env p2))
    | Diff (p1,p2) -> mk (IDiff (derecurs env p1, derecurs env p2))
    | Prod (p1,p2) -> mk (ITimes (derecurs env p1, derecurs env p2))
    | XmlT (p1,p2) -> mk (IXml (derecurs env p1, derecurs env p2))
    | Arrow (p1,p2) -> mk (IArrow (derecurs env p1, derecurs env p2))
    | Optional p -> mk (IOptional (derecurs env p))
    | Record (o,r) -> 
	let aux = function
	  | (p,Some e) -> (derecurs env p, Some (derecurs env e))
	  | (p,None) -> derecurs env p, None in
	mk (IRecord (o, parse_record env.penv_tenv p.loc aux r))
799
800
    | Constant (x,c) -> mk (IConstant (ident env.penv_tenv p.loc x,
				       const env.penv_tenv p.loc c))
801
802
803
804
    | Cst c -> itype (Types.constant (const env.penv_tenv p.loc c))
    | Regexp r ->
	let r,_ = derecurs_regexp IdSet.empty false IdSet.empty true env r in
	rexp r
805
806
807
808
    | Concat (p1,p2) -> 
	let n = mk (IConcat (derecurs env p1, derecurs env p2)) in
	concats := n :: !concats;
	n
809
810
811
812
    | Merge (p1,p2) -> 
	let n = mk (IMerge (derecurs env p1, derecurs env p2)) in
	concats := n :: !concats;
	n
813
814
815
816
817
818
819
820
821
822
823
824
	  
  and derecurs_regexp vars b rvars f env = function
      (* - vars: seq variables to be propagated top-down and added
	 to each captured element
	 - b: below a star ?
	 - rvars: seq variables that appear on the right of the regexp
	 - f: tail position
	 
	 returns the set of seq variable of the regexp minus rvars
	 (they have already been terminated if not below a star)
      *)
    | Epsilon -> 
825
	PSeq [], IdSet.empty
826
827
828
829
830
831
832
    | Elem p -> 
	PElem (capture_all vars (derecurs env p)), IdSet.empty
    | Guard p ->
	PGuard (derecurs env p), IdSet.empty
    | Seq (p1,p2) -> 
	let (p2,v2) = derecurs_regexp vars b rvars f env p2 in
	let (p1,v1) = derecurs_regexp vars b (IdSet.cup rvars v2) false env p1 in
833
	seq p1 p2, IdSet.cup v1 v2
834
835
836
    | Alt (p1,p2) -> 
	let (p1,v1) = derecurs_regexp vars b rvars f env p1
	and (p2,v2) = derecurs_regexp vars b rvars f env p2 in
837
	alt (termin b (IdSet.diff v2 v1) p1) (termin b (IdSet.diff v1 v2) p2),
838
839
840
841
842
843
844
	IdSet.cup v1 v2
    | Star p -> 
	let (p,v) = derecurs_regexp vars true rvars false env p in
	termin b v (PStar p), v
    | WeakStar p -> 
	let (p,v) = derecurs_regexp vars true rvars false env p in
	termin b v (PWeakStar p), v
845
846
    | SeqCapture (loc,x,p) -> 
	let x = ident env.penv_tenv loc x in
847
848
849
850
851
	let vars = if f then vars else IdSet.add x vars in
	let after = IdSet.mem rvars x in
	let rvars = IdSet.add x rvars in
	let (p,v) = derecurs_regexp vars b rvars false env p in
	(if f 
852
	 then seq (PGuard (mk (ICapture x))) p 
853
854
855
856
	 else termin (after || b) (IdSet.singleton x) p), 
	(if after then v else IdSet.add x v)
	  
	  
857
858
859
860
861
  and derecurs_var env loc cu v =
    let v = ident env.penv_tenv loc v in
    match cu with
      | None ->
	  (try Env.find v env.penv_derec 
862
863
864
	   with Not_found -> 
	     try itype (find_type v env.penv_tenv)
	     with Not_found -> mk (ICapture v))
865
866
867
868
869
870
      | Some cu ->
	  (try itype (find_type_global loc cu v env.penv_tenv)
	   with Not_found ->
	     raise_loc_generic loc 
	       ("Unbound external type " ^ (U.get_str cu) ^ "." ^ 
		  (Ident.to_string v)))
871
872
	      
  and derecurs_def env b =
873
874
875
876
877
878
879
880
881
882
883
884
885
    let seen = ref IdSet.empty in
    let b = 
      List.map 
	(fun (loc,v,p) -> 
	   let v = ident env.penv_tenv loc v in
	   if IdSet.mem !seen v then 
	     raise_loc_generic loc
	       ("Multiple definitions for the type identifer " ^ 
		  (Ident.to_string v));
	   seen := IdSet.add v !seen;
	   (v,p,delayed loc))
	b in

886
887
888
889
    let n = 
      List.fold_left (fun env (v,p,s) -> Env.add v s env) env.penv_derec b in
    let env = { env with penv_derec = n } in
    List.iter (fun (v,p,s) -> s.desc <- ILink (derecurs env p)) b;
890
891
892
893
894
895
896
    (env, b)

  module H = Hashtbl.Make(Types)

  let rec elim_concat n =
    match n.desc with
      | IConcat (a,b) ->
897
898
	  if (n.sid > 0) 
	  then 	raise (Patterns.Error "Ill-formed concatenation loop");
899
900
	  n.sid <- 1;
	  n.desc <- ILink (elim_conc a b)
901
902
903
904
905
      | IMerge (a,b) ->
	  if (n.sid > 0) 
	  then 	raise (Patterns.Error "Ill-formed concatenation loop");
	  n.sid <- 1;
	  n.desc <- ILink (elim_merge a b)
906
      | _ -> ()
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
  and elim_merge a b =
    let get_rec t =
      let t = Types.Record.get t in
      List.map (fun (l,o,_) ->
		  o, 
		  LabelMap.map 
		    (fun (opt,x) ->
		       let x = itype x in 
		       (if opt then mk (IOptional x) else x),
		       None)
		    l) t in
    let merge (o1,l1) (o2,l2) =
      mk (IRecord (o1||o2, LabelMap.merge (fun _ x -> x) l1 l2)) in
    (* Problem: repr can loop with ill-formed recursion.
       type t = s + t where s = s | s;; *)
    match (repr a).desc, (repr b).desc with
      | IType (t1,_), IType (t2,_) -> 
	  if not (Types.subtype t1 Types.Record.any) then
	    raise 
	      (Patterns.Error 
		 "Left argument of record concatenation is not a record type");
	  if not (Types.subtype t2 Types.Record.any) then
	    raise 
	      (Patterns.Error 
		 "Right argument of record concatenation is not a record type");
	  itype (Types.Record.merge t1 t2)
      | IOr (a1,a2), _ -> ior (elim_merge a1 b) (elim_merge a2 b)
      | _, IOr (b1,b2) -> ior (elim_merge a b1) (elim_merge a b2)
      | IRecord (o1,l1), IRecord (o2,l2) -> merge (o1,l1) (o2,l2)
      | IType (t1,_), IRecord (o2,l2) ->
	  if not (Types.subtype t1 Types.Record.any) then
	    raise 
	      (Patterns.Error 
		 "Left argument of record concatenation is not a record type");
	  List.fold_left (fun accu (o1,l1) -> 
			    ior accu (merge (o1,l1) (o2,l2)))
	    iempty (get_rec t1)
      | IRecord (o1,l1), IType (t2,_) ->
	  if not (Types.subtype t2 Types.Record.any) then
	    raise 
	      (Patterns.Error 
		 "Right argument of record concatenation is not a record type");
	  List.fold_left (fun accu (o2,l2) -> 
			    ior accu (merge (o1,l1) (o2,l2)))
	    iempty (get_rec t2)
      | _ -> raise (Patterns.Error "Cannot compute record concatenation")
953
954
955
956
957
958
959
960
  and elim_conc n q =
    let mem = ref [] in
    let rec aux n =
      try List.assq n !mem
      with Not_found ->
	let r = mk_delayed () in
	mem := (n,r) :: !mem;
	let rec aux2 n =
961
	  match n.desc with
962
963
964
965
966
	    | ILink n' -> aux2 n'
	    | IOr (a,b) -> ior (aux a) (aux b)
	    | ITimes (a,b) -> mk (ITimes (a, aux b))
	    | IConcat (a,b) -> elim_concat n; aux2 n
	    | IType (t,_) -> elim_concat_type t q
967
	    | _ -> raise (Patterns.Error "Cannot compute concatenation")
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
	in
	r.desc <- ILink (aux2 n);
	r
    in
    aux n
  and elim_concat_type t q =
    if not (Types.subtype t Sequence.any) then
      raise (Patterns.Error "Left argument of concatenation is not a sequence type");
    (* TODO: check t <= [ Any* ] *)
    let mem = H.create 17 in
    let rec aux t =
      try H.find mem t 
      with Not_found ->
	let n = mk_delayed () in
	H.add mem t n;
	let d = 
	  List.fold_left
	    (fun accu (t1,t2) -> ior accu (mk (ITimes (itype t1, aux t2))))
	    (if Types.Atom.has_atom t Sequence.nil_atom then q else iempty)
	    (Types.Product.get t) in
	n.desc <- d.desc;
	n
    in
    aux t
    
   
    
  let elim_concats () =
    try
      List.iter elim_concat !concats;
      List.iter (fun n -> n.sid <- 0) !concats;
      concats := []
    with exn ->
      List.iter (fun n -> n.sid <- 0) !concats;
      concats := [];
      raise exn
1004

1005
1006
  let derec penv p =
    let d = derecurs penv p in
1007
    elim_concats ();
1008
1009
1010
    check_delayed ();
    internalize d;
    d
1011
1012


1013
(* API *)
1014
1015
1016

  module Ids = Set.Make(Id)
  let type_defs env b =
1017
1018
1019
1020
1021
1022
    let _,b' = derecurs_def (penv env) b in
    elim_concats ();
    check_delayed ();
    let aux loc d =
      internalize d;
      check_no_fv loc d;
1023
      try typ d
1024
      with Patterns.Error s -> raise_loc_generic loc s
1025
    in
1026
    let b = 
1027
1028
1029
      List.map2 
	(fun (loc,v,p) (v',_,d) ->
	   let t = aux loc d in
1030
1031
1032
1033
	   if (loc <> noloc) && (Types.is_empty t) then
	     warning loc 
	       ("This definition yields an empty type for " ^ (U.to_string v));
	   let v = ident env loc v in
1034
	   (v',t)) b b' in
1035
1036
    List.iter (fun (v,t) -> Types.Print.register_global 
		 (Types.CompUnit.get_current ()) (Id.value v) t) b;
1037
    enter_types b env
1038

1039
1040
1041
1042
  let type_defs env b =
    try type_defs env b
    with exn -> clean_on_err (); raise exn

1043

1044
1045
  let get_type d =
    check_delayed ();
1046
1047
    try internalize d; typ d
    with exn -> clean_on_err (); raise exn
1048

1049
  let typ env t = 
1050
1051
1052
1053
1054
1055
    try
      let d = derec (penv env) t in
      check_no_fv t.loc d;
      try typ_node d
      with Patterns.Error s -> raise_loc_generic t.loc s
    with exn -> clean_on_err (); raise exn
1056
1057

  let pat env t = 
1058
1059
1060
1061
1062
    try
      let d = derec (penv env) t in
      try pat_node d
      with Patterns.Error s -> raise_loc_generic t.loc s
    with exn -> clean_on_err (); raise exn
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072

  let delayed () = delayed noloc
  let link a b = a.desc <- ILink b

  let get_ct c =
    match c.desc with
      | ITimes ({ desc = IRecord (o,fields) },content) -> (o,fields,content)
      | _ -> assert false


1073
end
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089

let typ = IType.typ
let pat = IType.pat
let type_defs = IType.type_defs

let dump_types ppf env =
  Env.iter (fun v -> 
	      function 
		  (Type _) -> Format.fprintf ppf " %a" Ident.print v
		| _ -> ()) env.ids

let dump_ns ppf env =
  Ns.dump_table ppf env.ns



1090

1091
1092
(* II. Build skeleton *)

1093

1094
type type_fun = Types.t -> bool -> Types.t
1095

1096
module Fv = IdSet
1097

1098
1099
1100
type branch = Branch of Typed.branch * branch list

let cur_branch : branch list ref = ref []
1101

1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
let exp' loc e = 
  { Typed.exp_loc = loc; Typed.exp_typ = Types.empty; Typed.exp_descr = e; }

let exp loc fv e = fv, exp' loc e

let exp_nil = exp' noloc (Typed.Cst Sequence.nil_cst)

let pat_true = 
  let n = Patterns.make Fv.empty in
  Patterns.define n (Patterns.constr Builtin_defs.true_type);
  n

let pat_false =   
  let n = Patterns.make Fv.empty in
  Patterns.define n (Patterns.constr Builtin_defs.false_type);
  n

1119

1120
let ops = Hashtbl.create 13
1121
1122
let register_op op arity f = Hashtbl.add ops op (arity,f)
let typ_op op = snd (Hashtbl.find ops op)
1123

1124
1125
1126
1127
1128
let fun_name env a =
  match a.fun_name with
    | None -> None
    | Some (loc,s) -> Some (ident env loc s)

1129
let is_op env s = 
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
  if (Env.mem s env.ids) then None
  else
    let (ns,s) = Id.value s in
    if Ns.equal ns Ns.empty then
      let s = U.get_str s in
      try 
	let o = Hashtbl.find ops s in
	Some (s, fst o)
      with Not_found -> None
    else None
1140

1141
1142
let rec expr env loc = function
  | LocatedExpr (loc,e) -> expr env loc e
1143
  | Forget (e,t) ->
1144
      let (fv,e) = expr env loc e and t = typ env t in
1145
      exp loc fv (Typed.Forget (e,t))
1146
1147
  | Check (e,t) ->
      let (fv,e) = expr env loc e and t = typ env t in
1148
      exp loc fv (Typed.Check (ref Types.empty,e,t))
1149
  | Var s -> var env loc s
1150
  | Apply (e1,e2) -> 
1151
1152
1153
1154
1155
1156
1157
1158
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
      let fv = Fv.cup fv1 fv2 in
      (match e1.Typed.exp_descr with
	 | Typed.Op (op,arity,args) when arity > 0 -> 
	     exp loc fv (Typed.Op (op,arity - 1,args @ [e2]))
	 | _ ->
	     exp loc fv (Typed.Apply (e1,e2)))
  | Abstraction a -> abstraction env loc a
1159
  | (Integer _ | Char _ | Atom _ | Const _) as c -> 
1160
      exp loc Fv.empty (Typed.Cst (const env loc c))
1161
  | Pair (e1,e2) ->