typer.ml 55.9 KB
Newer Older
1
(* TODO:
2
 - rewrite type-checking of operators to propagate constraint
3
 - check whether it is worth using recursive hash-consing internally
4
5
*)

6
7
8
open Location
open Ast
open Ident
9

10
11
12
13
14
15
let (=) (x:int) y = x = y
let (<=) (x:int) y = x <= y
let (<) (x:int) y = x < y
let (>=) (x:int) y = x >= y
let (>) (x:int) y = x > y

16
17
let debug_schema = false

18
let warning loc msg =
19
  let v = Location.get_viewport () in
20
  let ppf = if Html.is_html v then Html.ppf v else Format.err_formatter in
21
22
23
  Format.fprintf ppf "Warning %a:@\n" Location.print_loc (loc,`Full);
  Location.html_hilight (loc,`Full);
  Format.fprintf ppf "%s@." msg
24

25
26
27
28
29
30
31
32
33
exception NonExhaustive of Types.descr
exception Constraint of Types.descr * Types.descr
exception ShouldHave of Types.descr * string
exception ShouldHave2 of Types.descr * string * Types.descr
exception WrongLabel of Types.descr * label
exception UnboundId of id * bool
exception UnboundExtId of Types.CompUnit.t * id
exception Error of string

34
35
36

exception Warning of string * Types.t

37
38
39
40
let raise_loc loc exn = raise (Location (loc,`Full,exn))
let raise_loc_str loc ofs exn = raise (Location (loc,`Char ofs,exn))
let error loc msg = raise_loc loc (Error msg)

41

42
43
type item =
  | Type of Types.t
44
  | Val of Types.t
45

46
47
module UEnv = Map.Make(U)

48
type t = {
49
  ids : item Env.t;
50
  ns: Ns.table;
51
  cu: Types.CompUnit.t UEnv.t;
52
  schemas: string UEnv.t
53
}
54

55
56
57
58
59
let hash _ = failwith "Typer.hash"
let compare _ _ = failwith "Typer.compare"
let dump ppf _ = failwith "Typer.dump"
let equal _ _ = failwith "Typer.equal"
let check _ = failwith "Typer.check"
60

61
62
63

let load_schema_fwd = ref (fun x uri -> assert false)

64
65
66
67
68
69
70
71
72
73
74
75
76
let enter_schema ?prefix x uri env =
  let sch,reg = !load_schema_fwd x uri in
(* Set the namespace prefix before registration for better pretty
   printing *)
  let env = 
    { env with 
	schemas = UEnv.add x uri env.schemas;
	ns = (match prefix with 
		| Some p -> 
		    Ns.add_prefix p sch.Schema_types.targetNamespace env.ns
		| None -> env.ns) } in
  reg ();
  env
77
78


79
(* TODO: filter out builtin defs ? *)
80
81
82
83
let serialize_item s = function
  | Type t -> Serialize.Put.bits 1 s 0; Types.serialize s t
  | Val t -> Serialize.Put.bits 1 s 1; Types.serialize s t

84
let serialize s env =
85
  Serialize.Put.env Id.serialize serialize_item Env.iter s env.ids;
86
87
88
89
90
  Ns.serialize_table s env.ns;

  let schs =
    UEnv.fold (fun name uri accu -> (name,uri)::accu) env.schemas [] in
  Serialize.Put.list (Serialize.Put.pair U.serialize Serialize.Put.string) s schs
91

92
93
94
95
96
let deserialize_item s = match Serialize.Get.bits 1 s with
  | 0 -> Type (Types.deserialize s)
  | 1 -> Val (Types.deserialize s)
  | _ -> assert false

97
let deserialize s =
98
  let ids = Serialize.Get.env Id.deserialize deserialize_item Env.add Env.empty s in
99
  let ns = Ns.deserialize_table s in
100
101
102
103
104
105
  let schs = 
    Serialize.Get.list 
      (Serialize.Get.pair U.deserialize Serialize.Get.string) s in
  let env = 
    { ids = ids; ns = ns; cu = UEnv.empty; schemas = UEnv.empty } in
  List.fold_left (fun env (name,uri) -> enter_schema name uri env) env schs
106
107


108
109
let empty_env = {
  ids = Env.empty;
110
  ns = Ns.empty_table;
111
  cu = UEnv.empty;
112
  schemas = UEnv.empty
113
114
}

115
116
let from_comp_unit = ref (fun cu -> assert false)

117
let enter_cu x cu env =
118
  { env with cu = UEnv.add x cu env.cu }
119

120
121
122
let find_cu x env =
  try UEnv.find x env.cu
  with Not_found -> Types.CompUnit.mk x
123
124


125
126
127
128
let find_schema x env =
  try UEnv.find x env.schemas
  with Not_found -> raise (Error (Printf.sprintf "%s: no such schema" (U.get_str x)))

129
130
131
132
133
134
135
136
let enter_type id t env =
  { env with ids = Env.add id (Type t) env.ids }
let enter_types l env =
  { env with ids = 
      List.fold_left (fun accu (id,t) -> Env.add id (Type t) accu) env.ids l }
let find_type id env =
  match Env.find id env.ids with
    | Type t -> t
137
    | Val _ -> raise Not_found
138

139
let find_type_global loc cu id env =
140
  let cu = find_cu cu env in
141
142
143
  let env = !from_comp_unit cu in
  find_type id env

144
let enter_value id t env = 
145
  { env with ids = Env.add id (Val t) env.ids }
146
147
let enter_values l env =
  { env with ids = 
148
      List.fold_left (fun accu (id,t) -> Env.add id (Val t) accu) env.ids l }
149
150
151
let enter_values_dummy l env =
  { env with ids = 
      List.fold_left (fun accu id -> Env.add id (Val Types.empty) accu) env.ids l }
152
153
let find_value id env =
  match Env.find id env.ids with
154
    | Val t -> t
155
    | _ -> raise Not_found
156
157
158
let find_value_global cu id env =
  let env = !from_comp_unit cu in
  find_value id env
159
160
161
let is_cu id env =
  try ignore (!from_comp_unit (find_cu id env)); true
  with _ -> false
162
	
163
164
165
166
167
168
let value_name_ok id env =
  try match Env.find id env.ids with
    | Val t -> true
    | _ -> false
  with Not_found -> true

169
170
171
172
let iter_values env f =
  Env.iter (fun x ->
	      function Val t -> f x t;
		| _ -> ()) env.ids
173

174

175
let register_types cu env =
176
177
178
  Env.iter (fun x t -> match t with
	      | Type t -> Types.Print.register_global cu (Ident.value x) t
	      | _ -> ()) env.ids
179

180

181
(* Namespaces *)
182

183
let set_ns_table_for_printer env = 
184
  Ns.InternalPrinter.set_table env.ns
185

186
let get_ns_table tenv = tenv.ns
187

188
let enter_ns p ns env =
189
  { env with ns = Ns.add_prefix p ns env.ns }
190

191
192
193
194
195
let protect_error_ns loc f x =
  try f x
  with Ns.UnknownPrefix ns ->
    raise_loc_generic loc 
    ("Undefined namespace prefix " ^ (U.to_string ns))
196

197
198
199
let qname env loc t = 
  protect_error_ns loc (Ns.map_tag env.ns) t
    
200
201
202
203
204
205
206
207
208
209
let ident env loc t =
  let q = protect_error_ns loc (Ns.map_attr env.ns) t in
  Ident.ident q

let has_value id env =
  try match Env.find (Ident.ident (Ns.map_attr env.ns id)) env.ids with
    | Val t -> true
    | _ -> false
  with Not_found | Ns.UnknownPrefix _ -> false

210
let parse_atom env loc t =
211
  Atoms.V.of_qname (qname env loc t)
212
213
 
let parse_ns env loc ns =
214
  protect_error_ns loc (Ns.map_prefix env.ns) ns
215

216
let parse_label env loc t =
217
  let (ns,l) = protect_error_ns loc (Ns.map_attr env.ns) t in
218
  LabelPool.mk (ns,l)
219

220
221
222
223
224
225
226
227
228
229
230
231
232
let parse_record env loc f r =
  let r = List.map (fun (l,x) -> (parse_label env loc l, f x)) r in
  LabelMap.from_list (fun _ _ -> raise_loc_generic loc "Duplicated record field") r

let rec const env loc = function
  | LocatedExpr (loc,e) -> const env loc e
  | Pair (x,y) -> Types.Pair (const env loc x, const env loc y)
  | Xml (x,y) -> Types.Xml (const env loc x, const env loc y)
  | RecordLitt x -> Types.Record (parse_record env loc (const env loc) x)
  | String (i,j,s,c) -> Types.String (i,j,s,const env loc c)
  | Atom t -> Types.Atom (parse_atom env loc t)
  | Integer i -> Types.Integer i
  | Char c -> Types.Char c
233
  | Const c -> c
234
235
236
237
  | _ -> raise_loc_generic loc "This should be a scalar or structured constant"

(* I. Transform the abstract syntax of types and patterns into
      the internal form *)
238

239

240
(* Schema *)
241

242
243
244
let is_registered_schema env s = UEnv.mem s env.schemas

(* uri -> schema binding *)
245
let schemas = State.ref "Typer.schemas" (Hashtbl.create 3)
246
247
248

let schema_types = State.ref "Typer.schema_types" (Hashtbl.create 51)
let schema_elements = State.ref "Typer.schema_elements" (Hashtbl.create 51)
249
let schema_attributes = State.ref "Typer.schema_attributes" (Hashtbl.create 51)
250
251
252
253
let schema_attribute_groups =
  State.ref "Typer.schema_attribute_groups" (Hashtbl.create 51)
let schema_model_groups =
  State.ref "Typer.schema_model_groups" (Hashtbl.create 51)
254

255

256
257
258
259
260
(*
let get_schema uri =
  try Hashtbl.find !schemas uri
  with Not_found -> assert false
*)
261

262
let find_schema_descr_uri kind uri (name : Ns.qname) =
263
  try
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
    let elt () = Hashtbl.find !schema_elements (uri, name) in
    let typ () = Hashtbl.find !schema_types (uri, name) in
    let att () = Hashtbl.find !schema_attributes (uri, name) in
    let att_group () = Hashtbl.find !schema_attribute_groups (uri, name) in
    let mod_group () = Hashtbl.find !schema_model_groups (uri, name) in
    let rec do_try n = function
      | [] -> raise Not_found
      | f :: rem -> (try f () with Not_found -> do_try n rem)
    in
    match kind with
      | Some `Element -> do_try "element" [ elt ]
      | Some `Type -> do_try "type" [ typ ]
      | Some `Attribute -> do_try "atttribute" [ att ]
      | Some `Attribute_group -> do_try "attribute group" [ att_group ]
      | Some `Model_group -> do_try "model group" [ mod_group ]
      | None ->
          (* policy for unqualified schema component resolution. This order should
           * be consistent with Schema_component.get_component *)
          do_try "component" [ elt; typ; att; att_group; mod_group ]
    with Not_found ->    
284
      raise (Error (Printf.sprintf "No %s named '%s' found in schema '%s'"
285
		      (Schema_common.string_of_component_kind kind) (Ns.QName.to_string name) uri))
286
287
288
289
290

let find_schema_descr env kind schema name =
  let uri = find_schema schema env in
  find_schema_descr_uri kind uri name

291

292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
module IType = struct
  type node = {
    mutable desc: desc;
    mutable smallhash: int;  (* Local hash *)
    mutable rechash: int;    (* Global (recursive) hash *)
    mutable sid: int;        (* Sequential id used to compute rechash *)
    mutable t: Types.t option;
    mutable tnode: Types.Node.t option;
    mutable p: Patterns.descr option;
    mutable pnode: Patterns.node option;
    mutable fv: fv option
  } 
  and desc =
    | ILink of node
    | IType of Types.descr * int
    | IOr of node * node
    | IAnd of node * node
    | IDiff of node * node
    | ITimes of node * node
    | IXml of node * node
    | IArrow of node * node
    | IOptional of node
    | IRecord of bool * (node * node option) label_map
    | ICapture of id
    | IConstant of id * Types.const

  let rec node_temp = { 
    desc = ILink node_temp;
    smallhash = 0; rechash = 0; sid = 0;
    t = None; tnode = None; p = None; pnode = None;
    fv = None
  }
			
325
(* Recursive hash-consing *)
326

327
328
329
330
331
332
  let hash_field f = function
    | (p, Some e) -> 1 + 17 * f p + 257 * f e
    | (p, None) -> 2 + 17 * f p

  let rec hash f n = match n.desc with
    | ILink n -> hash f n
333
    | IType (t,h) -> 1 + 17 * h
334
335
336
337
338
339
340
341
342
    | IOr (p1,p2) -> 2 + 17 * f p1 + 257 * f p2
    | IAnd (p1,p2) -> 3 + 17 * f p1 + 257 * f p2
    | IDiff (p1,p2) -> 4 + 17 * f p1 + 257 * f p2
    | ITimes (p1,p2) -> 5 + 17 * f p1 + 257 * f p2
    | IXml (p1,p2) -> 6 + 17 * f p1 + 257 * f p2
    | IArrow (p1,p2) -> 7 + 17 * f p1 + 257 * f p2
    | IOptional p -> 8 + 17 * f p
    | IRecord (o,r)->9+(if o then 17 else 0)+
	257*(LabelMap.hash (hash_field f) r)
343
344
345
    | ICapture x -> 10 + 17 * (Id.hash x)
    | IConstant (x,c) -> 11 + 17 * (Id.hash x) + 257*(Types.Const.hash c)

346
347
348
349
350
  let hash0 = hash (fun n -> 1)
  let hash1 = hash hash0
  let hash2 = hash hash1
  let hash3 = hash hash2

351
352
  let smallhash n =
    if n.smallhash !=0 then n.smallhash
353
354
355
356
    else (
      let h = hash2 n in 
      n.smallhash <- h; h
    )
357
358

  let rec repr = function
359
    | { desc = ILink n } as m -> let z = repr n in m.desc <- ILink z; z
360
361
362
363
    | n -> n

  let back = ref []

364
365
366
367
  let rec prot_repr = function
    | { desc = ILink n } -> repr n
    | n -> n

368
369
370
371
372
373
374
375
376
  let link x y = match x,y with
    | { t = None } as x, y 
    | y, ({ t = None } as x) -> back := (x,x.desc) :: !back; x.desc <- ILink y
    | _ -> assert false

  exception Unify

  let rec unify x y =
    if x == y then ()
377
378
379
380
381
    else let x = prot_repr x and y = prot_repr y in if x == y then ()
    else if (smallhash x != smallhash y) then raise Unify 
    else if (x.t != None) && (y.t != None) then raise Unify
      (* x and y have been internalized; if they were equivalent,
	 they would be equal *)
382
    else match x.desc,y.desc with
383
      | IType (tx,_), IType (ty,_) when Types.equal tx ty -> link x y
384
385
386
387
388
      | IOr (x1,x2), IOr (y1,y2)
      | IAnd (x1,x2), IAnd (y1,y2)
      | IDiff (x1,x2), IDiff (y1,y2)
      | ITimes (x1,x2), ITimes (y1,y2)
      | IXml (x1,x2), IXml (y1,y2)
389
390
      | IArrow (x1,x2), IArrow (y1,y2) -> link x y; unify x1 y1; unify x2 y2
      | IOptional x1, IOptional y1 -> link x y; unify x1 y1
391
392
393
394
395
396
397
398
399
400
401
      | IRecord (xo,xr), IRecord (yo,yr) when xo == yo ->
	  link x y; LabelMap.may_collide unify_field Unify xr yr
      | ICapture xv, ICapture yv when Id.equal xv yv -> ()
      | IConstant (xv,xc), IConstant (yv,yc) when
	  Id.equal xv yv && Types.Const.equal xc yc -> ()
      | _ -> raise Unify
  and unify_field f1 f2 = match f1,f2 with
    | (p1, Some e1), (p2, Some e2) -> unify p1 p2; unify e1 e2
    | (p1, None), (p2, None) -> unify p1 p2
    | _ -> raise Unify

402

403
404
  let may_unify x y =
    try unify x y; back := []; true
405
    with Unify ->
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
      List.iter (fun (x,xd) -> x.desc <- xd) !back; back := []; false

  module SmallHash = Hashtbl.Make(
    struct 
      type t = node
      let equal = may_unify
      let hash = smallhash
    end
  )

  let iter_field f = function
    | (x, Some y) -> f x; f y
    | (x, None) -> f x
  let iter f = function
    | IOr (x,y) | IAnd (x,y) | IDiff (x,y)
    | ITimes (x,y) | IXml (x,y) | IArrow (x,y) -> f x; f y
    | IOptional x -> f x
    | IRecord (_,r) -> LabelMap.iter (iter_field f) r
    | _ -> ()

  let minimize ((mem,add) as h) =
    let rec aux n =
      let n = repr n in
429
430
431
432
      if mem n then () else (
	let n = repr n in add n (); 
	if n.t == None then iter aux n.desc
      )
433
434
435
436
437
438
439
    in aux

  let to_clear = ref []
  let sid = ref 0
  let rec rechash n =
    let n = repr n in
    if (n.sid != 0) then 17 * n.sid
440
    else (incr sid; n.sid <- !sid; to_clear := n :: !to_clear; hash rechash n)
441
442

  let clear () =
443
444
    sid := 0; List.iter (fun x -> x.sid <- 0) !to_clear;
    to_clear := []
445
446
447
448
449
450
451
452
453
454
455
456
457
458

  let rechash n =
    let n = repr n in
    if (n.rechash != 0) then n.rechash 
    else (let h = rechash n in clear (); n.rechash <- h; h)

  module RecHash = Hashtbl.Make(
    struct
      type t = node
      let equal = may_unify
      let hash = smallhash
    end
  )

459
460
461

(** Two-phases recursive hash-consing **)
(*
462
463
464
  let gtable = RecHash.create 17577

  let internalize n =
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
    let local = SmallHash.create 17 in
    minimize (SmallHash.mem local, SmallHash.add local) n; 
    minimize (RecHash.mem gtable, RecHash.add gtable) n;
    ()
*)

(** Single-phase hash-consing **)
  let gtable = SmallHash.create 17

  let internalize n =
    minimize (SmallHash.mem gtable, SmallHash.add gtable) n



(*  let internalize n = () *)
480
481
482
483
484
485
486
487
488

(* Compute free variables *)

  let fv n =
    let fv = ref IdSet.empty in
    let rec aux n =
      let n = repr n in
      if (n.sid = 0) then (
	n.sid <- 1;
489
	to_clear := n :: !to_clear; 
490
491
492
493
494
495
	match n.fv, n.desc with
	  | Some x, _ -> fv := IdSet.cup !fv x
	  | None, (ICapture x | IConstant (x,_)) -> fv := IdSet.add x !fv
	  | None, d -> iter aux d
      )
    in
496
    assert(!to_clear == []);
497
498
499
500
    match n.fv with
      | Some x -> x
      | None -> aux n; clear (); n.fv <- Some !fv; !fv

501
502
503
(* optimized version to check closedness *)

  let no_fv = Some IdSet.empty
504
505
506
  exception FoundFv of id
  let peek_fv n =
    let err x = raise (FoundFv x) in
507
508
509
510
511
512
    let rec aux n =
      let n = repr n in
      if (n.sid = 0) then (
	n.sid <- 1;
	to_clear := n :: !to_clear; 
	match n.fv, n.desc with
513
514
	  | Some x, _ when IdSet.is_empty x -> ()
	  | Some x, _ -> err (IdSet.choose x)
515
516
517
518
	  | None, (ICapture x | IConstant (x,_)) -> err x;
	  | None, d -> iter aux d
      )
    in
519
    assert(!to_clear == []);
520
521
    try
      match n.fv with
522
523
	| Some x when IdSet.is_empty x -> ()
	| Some x -> err (IdSet.choose x)
524
525
526
527
528
	| None -> aux n; 
	    List.iter (fun n -> n.sid <- 0; n.fv <- no_fv) !to_clear;
	    to_clear := []
    with exn -> clear (); raise exn

529
530
531
532
533
534
535
536
537
538
539
  let check_no_fv loc n =
    try peek_fv n 
    with FoundFv x ->
      raise_loc_generic loc 
	("Capture variable not allowed: " ^ (Ident.to_string x))

  let has_no_fv n =
    try peek_fv n; true
    with FoundFv _ -> false


540
(* From the intermediate representation to the internal one *)
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565


  let rec typ n =
    let n = repr n in
    match n.t with
      | Some t -> t
      | None -> let t = compute_typ n.desc in n.t <- Some t; t
  and compute_typ = function
    | IType (t,_) -> t
    | IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
    | IAnd (s1,s2) ->  Types.cap (typ s1) (typ s2)
    | IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
    | ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
    | IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
    | IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
    | IOptional s -> Types.Record.or_absent (typ s)
    | IRecord (o,r) ->  Types.record' (o, LabelMap.map compute_typ_field r)
    | ILink _ -> assert false
    | ICapture _ | IConstant (_,_) -> assert false
  and compute_typ_field = function
    | (s, None) -> typ_node s
    | (s, Some _) -> 
	raise (Patterns.Error "Or-else clauses are not allowed in types")

  and typ_node n =
566
    let n = repr n in
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
    match n.tnode with
      | Some t -> t
      | None ->
	  let x = Types.make () in
	  n.tnode <- Some x;
	  Types.define x (typ n);
	  x
      
  let rec pat n =
    let n = repr n in
    if IdSet.is_empty (fv n)
    then Patterns.constr (typ n)
    else match n.p with
      | Some p -> p
      | None -> let p = compute_pat n.desc in n.p <- Some p; p

  and compute_pat = function
    | IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
    | IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
    | IDiff (s1,s2) when IdSet.is_empty (fv s2) ->
	let s2 = Types.neg (typ s2) in
	Patterns.cap (pat s1) (Patterns.constr s2)
    | IDiff _ ->
	raise (Patterns.Error "Differences are not allowed in patterns")
    | ITimes (s1,s2) -> Patterns.times (pat_node s1) (pat_node s2)
    | IXml (s1,s2) -> Patterns.xml (pat_node s1) (pat_node s2)
    | IOptional _ -> 
	raise (Patterns.Error "Optional fields are not allowed in record patterns")
    | IRecord (o,r) ->
	let pats = ref [] in
	let aux l = function
	  | (s,None) ->
	      if IdSet.is_empty (fv s) then typ_node s
	      else
		( pats := Patterns.record l (pat_node s) :: !pats;
		  Types.any_node )
	  | (s,Some e) ->
	      if IdSet.is_empty (fv s) then
		raise (Patterns.Error "Or-else clauses are not allowed in types")
	      else
		( pats := Patterns.cup 
		    (Patterns.record l (pat_node s))
		    (pat e) :: !pats;
		  Types.Record.any_or_absent_node )
	in
	let constr = Types.record' (o,LabelMap.mapi aux r) in
	List.fold_left Patterns.cap (Patterns.constr constr) !pats
	  (* TODO: can avoid constr when o=true, and all fields have fv *)
    | ICapture x -> Patterns.capture x
    | IConstant (x,c) -> Patterns.constant x c
    | IArrow _ ->
	raise (Patterns.Error "Arrows are not allowed in patterns")
    | IType _ | ILink _ -> assert false
      
  and pat_node n =
622
    let n = repr n in
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
    match n.pnode with
      | Some p -> p
      | None ->
	  let x = Patterns.make (fv n) in
	  try
	    n.pnode <- Some x;
	    Patterns.define x (pat n);
	    x
	  with exn -> n.pnode <- None; raise exn

(* From AST to the intermediate representation *)

  type penv = {
    penv_tenv : t;
    penv_derec : node Env.t;
  }

  let penv tenv = { penv_tenv = tenv; penv_derec = Env.empty }

  let mk d = { node_temp with desc = d }
  let mk_delayed () = { node_temp with desc = ILink node_temp }
  let itype t = mk (IType (t, Types.hash t))
  let iempty = itype Types.empty

  let ior p1 p2 =
648
649
    if p1.desc == iempty.desc then p2 
    else if p2.desc == iempty.desc then p1 
650
651
652
    else mk (IOr (p1,p2))

  let iand p1 p2 =
653
    if (p1.desc == iempty.desc) || (p2.desc == iempty.desc) then iempty 
654
655
656
657
658
    else mk (IAnd (p1,p2))

  type regexp =
    | PElem of node
    | PGuard of node
659
660
    | PSeq of regexp list
    | PAlt of regexp list
661
662
663
    | PStar of regexp
    | PWeakStar of regexp

664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
  let rec nullable = function
    | PElem _ -> false
    | PSeq rl -> List.for_all nullable rl
    | PAlt rl -> List.exists nullable rl
    | PStar _ | PWeakStar _ | PGuard _ -> true

  let eps = PSeq []
  let emp = PAlt []

  let seq r1 r2 =
    let r1 = match r1 with PSeq l -> l | x -> [ x ] in
    let r2 = match r2 with PSeq l -> l | x -> [ x ] in
    match r1 @ r2 with
      | [ x ] -> x
      | l -> PSeq l

  let alt r1 r2 =
    let r1 = match r1 with PAlt l -> l | x -> [ x ] in
    let r2 = match r2 with PAlt l -> l | x -> [ x ] in
    match r1 @ r2 with
      | [ x ] -> x
      | l -> PAlt l

  let rec merge_alt = function
688
    | PElem p::PElem q::l -> merge_alt (PElem (ior p q) :: l)
689
690
    | r::l -> r::(merge_alt l)
    | [] -> []
691
692
693
694
695
696
697
698
699

(* Works only for types, not patterns, because
   [ (x&Int|_) R' ] is possible *)
  let rec simplify_regexp = function
    | PSeq l -> PSeq (List.map simplify_regexp l)
    | PAlt l -> PAlt (merge_alt (List.map simplify_regexp l))
    | PStar r | PWeakStar r -> PStar (simplify_regexp r)
    | x -> x

700
701
702
703
704
705
706
707
708
  let rec print_regexp ppf = function
    | PElem _ -> Format.fprintf ppf "Elem"
    | PGuard _ -> Format.fprintf ppf "Guard"
    | PSeq l -> Format.fprintf ppf "Seq(%a)" print_regexp_list l
    | PAlt l -> Format.fprintf ppf "Alt(%a)" print_regexp_list l
    | PStar r -> Format.fprintf ppf "Star(%a)" print_regexp r
    | PWeakStar r -> Format.fprintf ppf "WStar(%a)" print_regexp r
  and print_regexp_list ppf l =
    List.iter (fun x -> Format.fprintf ppf "%a;" print_regexp x) l
709

710
711
  let rec remove_regexp r q = 
    match r with
712
713
714
715
    | PElem p ->
	mk (ITimes (p, q))
    | PGuard p ->
	iand p q
716
717
718
719
    | PSeq l ->
	List.fold_right (fun r a -> remove_regexp r a) l q
    | PAlt rl ->
	List.fold_left (fun a r -> ior a (remove_regexp r q)) iempty rl
720
721
722
    | PStar r ->
	let x = mk_delayed () in
	let res = ior x q in
723
	x.desc <- ILink (remove_regexp_nullable r res iempty);
724
725
726
727
	res
    | PWeakStar r ->
	let x = mk_delayed () in
	let res = ior q x in
728
	x.desc <- ILink (remove_regexp_nullable r res iempty);
729
	res
730
731
732
733
734

  and remove_regexp_nullable r q_nonempty q_empty =
    if nullable r then remove_regexp2 r q_nonempty q_empty
    else remove_regexp r q_nonempty

735
  and remove_regexp2 r q_nonempty q_empty =
736
737
    (* Assume r is nullable *)
    if q_nonempty == q_empty then remove_regexp r q_nonempty
738
    else match r with
739
      | PSeq [] ->
740
741
          q_empty
      | PElem p ->
742
	  assert false
743
744
      | PGuard p ->
	  iand p q_empty
745
746
747
748
749
750
751
752
      | PSeq (r::rl) ->
          remove_regexp2 r
            (remove_regexp (PSeq rl) q_nonempty)
            (remove_regexp2 (PSeq rl) q_nonempty q_empty)
      | PAlt rl ->
	  List.fold_left 
	    (fun a r -> ior a (remove_regexp_nullable r q_nonempty q_empty))
	    iempty rl
753
754
      | PStar r ->
 	  let x = mk_delayed () in
755
          x.desc <- ILink (remove_regexp_nullable r (ior x q_nonempty) iempty);
756
757
758
          ior x q_empty
      | PWeakStar r ->
 	  let x = mk_delayed () in
759
          x.desc <- ILink (remove_regexp_nullable r (ior q_nonempty x) iempty);
760
761
762
763
764
765
766
767
768
          ior q_empty x


  let cst_nil = Types.Atom Sequence.nil_atom
  let capture_all vars p = 
    IdSet.fold (fun p x -> iand p (mk (ICapture x))) p vars
  let termin b vars p = 
    if b then p 
    else IdSet.fold 
769
      (fun p x -> seq p (PGuard (mk (IConstant (x,cst_nil))))) p vars
770
771
772

  let rexp r = remove_regexp r (itype Sequence.nil_type)

773
774
  let all_delayed = ref []

775
776
  let clean_on_err () = all_delayed := []

777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
  let delayed loc =
    let s = mk_delayed () in
    all_delayed := (loc,s) :: !all_delayed;
    s

  let check_one_delayed (loc,p) =
    let rec aux q = if p == q then raise Exit; aux2 q.desc
    and aux2 = function
      | IOr (q1,q2) | IAnd (q1,q2) | IDiff (q1,q2) -> aux q1; aux q2
      | ILink q -> aux q
      | _ -> ()
    in
    try aux2 p.desc
    with Exit -> error loc "Ill-formed recursion"
    
  let check_delayed () =
    let l = !all_delayed in
    all_delayed := []; 
    List.iter check_one_delayed l
    
797
  let rec derecurs env p = match p.descr with
798
    | PatVar (cu,v) -> derecurs_var env p.loc cu v
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
    | SchemaVar (kind, schema_name, component_name) ->

	let name = qname env.penv_tenv  p.loc component_name in
	itype (find_schema_descr env.penv_tenv kind schema_name name)

    | Recurs (p,b) -> derecurs (derecurs_def env b) p
    | Internal t -> itype t
    | NsT ns -> 
	itype (Types.atom (Atoms.any_in_ns (parse_ns env.penv_tenv p.loc ns)))
    | Or (p1,p2) -> mk (IOr (derecurs env p1, derecurs env p2))
    | And (p1,p2) -> mk (IAnd (derecurs env p1, derecurs env p2))
    | Diff (p1,p2) -> mk (IDiff (derecurs env p1, derecurs env p2))
    | Prod (p1,p2) -> mk (ITimes (derecurs env p1, derecurs env p2))
    | XmlT (p1,p2) -> mk (IXml (derecurs env p1, derecurs env p2))
    | Arrow (p1,p2) -> mk (IArrow (derecurs env p1, derecurs env p2))
    | Optional p -> mk (IOptional (derecurs env p))
    | Record (o,r) -> 
	let aux = function
	  | (p,Some e) -> (derecurs env p, Some (derecurs env e))
	  | (p,None) -> derecurs env p, None in
	mk (IRecord (o, parse_record env.penv_tenv p.loc aux r))
820
821
    | Constant (x,c) -> mk (IConstant (ident env.penv_tenv p.loc x,
				       const env.penv_tenv p.loc c))
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
    | Cst c -> itype (Types.constant (const env.penv_tenv p.loc c))
    | Regexp r ->
	let r,_ = derecurs_regexp IdSet.empty false IdSet.empty true env r in
	rexp r
	  
  and derecurs_regexp vars b rvars f env = function
      (* - vars: seq variables to be propagated top-down and added
	 to each captured element
	 - b: below a star ?
	 - rvars: seq variables that appear on the right of the regexp
	 - f: tail position
	 
	 returns the set of seq variable of the regexp minus rvars
	 (they have already been terminated if not below a star)
      *)
    | Epsilon -> 
838
	PSeq [], IdSet.empty
839
840
841
842
843
844
845
    | Elem p -> 
	PElem (capture_all vars (derecurs env p)), IdSet.empty
    | Guard p ->
	PGuard (derecurs env p), IdSet.empty
    | Seq (p1,p2) -> 
	let (p2,v2) = derecurs_regexp vars b rvars f env p2 in
	let (p1,v1) = derecurs_regexp vars b (IdSet.cup rvars v2) false env p1 in
846
	seq p1 p2, IdSet.cup v1 v2
847
848
849
    | Alt (p1,p2) -> 
	let (p1,v1) = derecurs_regexp vars b rvars f env p1
	and (p2,v2) = derecurs_regexp vars b rvars f env p2 in
850
	alt (termin b (IdSet.diff v2 v1) p1) (termin b (IdSet.diff v1 v2) p2),
851
852
853
854
855
856
857
	IdSet.cup v1 v2
    | Star p -> 
	let (p,v) = derecurs_regexp vars true rvars false env p in
	termin b v (PStar p), v
    | WeakStar p -> 
	let (p,v) = derecurs_regexp vars true rvars false env p in
	termin b v (PWeakStar p), v
858
859
    | SeqCapture (loc,x,p) -> 
	let x = ident env.penv_tenv loc x in
860
861
862
863
864
	let vars = if f then vars else IdSet.add x vars in
	let after = IdSet.mem rvars x in
	let rvars = IdSet.add x rvars in
	let (p,v) = derecurs_regexp vars b rvars false env p in
	(if f 
865
	 then seq (PGuard (mk (ICapture x))) p 
866
867
868
869
	 else termin (after || b) (IdSet.singleton x) p), 
	(if after then v else IdSet.add x v)
	  
	  
870
871
872
873
874
  and derecurs_var env loc cu v =
    let v = ident env.penv_tenv loc v in
    match cu with
      | None ->
	  (try Env.find v env.penv_derec 
875
876
877
	   with Not_found -> 
	     try itype (find_type v env.penv_tenv)
	     with Not_found -> mk (ICapture v))
878
879
880
881
882
883
      | Some cu ->
	  (try itype (find_type_global loc cu v env.penv_tenv)
	   with Not_found ->
	     raise_loc_generic loc 
	       ("Unbound external type " ^ (U.get_str cu) ^ "." ^ 
		  (Ident.to_string v)))
884
885
	      
  and derecurs_def env b =
886
887
888
889
890
891
892
893
894
895
896
897
898
    let seen = ref IdSet.empty in
    let b = 
      List.map 
	(fun (loc,v,p) -> 
	   let v = ident env.penv_tenv loc v in
	   if IdSet.mem !seen v then 
	     raise_loc_generic loc
	       ("Multiple definitions for the type identifer " ^ 
		  (Ident.to_string v));
	   seen := IdSet.add v !seen;
	   (v,p,delayed loc))
	b in

899
900
901
902
903
904
    let n = 
      List.fold_left (fun env (v,p,s) -> Env.add v s env) env.penv_derec b in
    let env = { env with penv_derec = n } in
    List.iter (fun (v,p,s) -> s.desc <- ILink (derecurs env p)) b;
    env

905
906
907
908
909
  let derec penv p =
    let d = derecurs penv p in
    check_delayed ();
    internalize d;
    d
910
911


912
(* API *)
913
914
915
916

  module Ids = Set.Make(Id)
  let type_defs env b =
    let penv = derecurs_def (penv env) b in
917
918
919
920
921
922
    let aux t =
      let d = derec penv t in
      check_no_fv t.loc d;
      try typ d
      with Patterns.Error s -> raise_loc_generic t.loc s
    in
923
924
    let b = 
      List.map 
925
	(fun (loc,v,p) ->
926
	   let t = aux p in
927
928
929
930
	   if (loc <> noloc) && (Types.is_empty t) then
	     warning loc 
	       ("This definition yields an empty type for " ^ (U.to_string v));
	   let v = ident env loc v in
931
	   (v,t)) b in
932
933
    List.iter (fun (v,t) -> Types.Print.register_global 
		 (Types.CompUnit.get_current ()) (Id.value v) t) b;
934
935
    b

936
937
938
939
  let type_defs env b =
    try type_defs env b
    with exn -> clean_on_err (); raise exn

940

941
  let typ_descr d =
942
943
    try internalize d; typ d
    with exn -> clean_on_err (); raise exn
944

945
  let typ env t = 
946
947
948
949
950
951
    try
      let d = derec (penv env) t in
      check_no_fv t.loc d;
      try typ_node d
      with Patterns.Error s -> raise_loc_generic t.loc s
    with exn -> clean_on_err (); raise exn
952
953

  let pat env t = 
954
955
956
957
958
    try
      let d = derec (penv env) t in
      try pat_node d
      with Patterns.Error s -> raise_loc_generic t.loc s
    with exn -> clean_on_err (); raise exn
959
end
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975

let typ = IType.typ
let pat = IType.pat
let type_defs = IType.type_defs

let dump_types ppf env =
  Env.iter (fun v -> 
	      function 
		  (Type _) -> Format.fprintf ppf " %a" Ident.print v
		| _ -> ()) env.ids

let dump_ns ppf env =
  Ns.dump_table ppf env.ns



976

977
978
(* II. Build skeleton *)

979

980
type type_fun = Types.t -> bool -> Types.t
981

982
module Fv = IdSet
983

984
985
986
type branch = Branch of Typed.branch * branch list

let cur_branch : branch list ref = ref []
987

988
let exp loc fv e =
989
990
  fv,
  { Typed.exp_loc = loc;
991
    Typed.exp_typ = Types.empty;
992
    Typed.exp_descr = e;
993
  }
994

995
let ops = Hashtbl.create 13
996
997
let register_op op arity f = Hashtbl.add ops op (arity,f)
let typ_op op = snd (Hashtbl.find ops op)
998

999
1000
1001
1002
1003
let fun_name env a =
  match a.fun_name with
    | None -> None
    | Some (loc,s) -> Some (ident env loc s)

1004
let is_op env s = 
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
  if (Env.mem s env.ids) then None
  else
    let (ns,s) = Id.value s in
    if Ns.equal ns Ns.empty then
      let s = U.get_str s in
      try 
	let o = Hashtbl.find ops s in
	Some (s, fst o)
      with Not_found -> None
    else None
1015

1016
1017
let rec expr env loc = function
  | LocatedExpr (loc,e) -> expr env loc e
1018
  | Forget (e,t) ->
1019
      let (fv,e) = expr env loc e and t = typ env t in
1020
      exp loc fv (Typed.Forget (e,t))
1021
1022
  | Check (e,t) ->
      let (fv,e) = expr env loc e and t = typ env t in
1023
      exp loc fv (Typed.Check (ref Types.empty,e,t))
1024
  | Var s -> var env loc s
1025
  | Apply (e1,e2) -> 
1026
1027
1028
1029
1030
1031
1032
1033
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
      let fv = Fv.cup fv1 fv2 in
      (match e1.Typed.exp_descr with
	 | Typed.Op (op,arity,args) when arity > 0 -> 
	     exp loc fv (Typed.Op (op,arity - 1,args @ [e2]))
	 | _ ->
	     exp loc fv (Typed.Apply (e1,e2)))
  | Abstraction a -> abstraction env loc a
1034
  | (Integer _ | Char _ | Atom _ | Const _) as c -> 
1035
      exp loc Fv.empty (Typed.Cst (const env loc c))
1036
  | Pair (e1,e2) ->
1037
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
1038
1039
      exp loc (Fv.cup fv1 fv2) (Typed.Pair (e1,e2))
  | Xml (e1,e2) ->
1040
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
1041
      exp loc (Fv.cup fv1 fv2) (Typed.Xml (e1,e2))
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
  | Dot (LocatedExpr (_,Var cu), id) when not (has_value cu env) ->
      if is_cu cu env then (
	let cu = find_cu cu env in
	let id = ident env loc id in
	let t =
	  try find_value_global cu id env
	  with Not_found ->
	    raise_loc loc (UnboundExtId (cu,id) ) in
	exp loc Fv.empty (Typed.ExtVar (cu, id, t))
      )
      else extern loc env (U.get_str cu ^ "." ^ U.get_str id) []
	(* TODO: allow nested OCaml modules A.B.C.x *)
1054
  | Dot (e,l) ->
1055
1056
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.Dot (e,parse_label env loc l))
1057
  | RemoveField (e,l) ->
1058
1059
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.RemoveField (e,parse_label env loc l))
1060
1061
  | RecordLitt r -> 
      let fv = ref Fv.empty in
1062
      let r = parse_record env loc
1063
		(fun e -> 
1064
		   let (fv2,e) = expr env loc e 
1065
1066
1067
		   in fv := Fv.cup !fv fv2; e)
		r in
      exp loc !fv (Typed.RecordLitt r)
1068
  | String (i,j,s,e) ->
1069
      let (fv,e) = expr env loc e in
1070
      exp loc fv (Typed.String (i,j,s,e))
1071
  | Match (e,b) -> 
1072
1073
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
1074
      exp loc (Fv.cup fv1 fv2) (Typed.Match (e, b))
1075
  | Map (e,b) ->
1076
1077
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
1078
1079
      exp loc (Fv.cup fv1 fv2) (Typed.Map (e, b))
  | Transform (e,b) ->
1080
1081
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
1082
      exp loc (Fv.cup fv1 fv2) (Typed.Transform (e, b))
1083
  | Xtrans (e,b) ->
1084
1085
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
1086
      exp loc (Fv.cup fv1 fv2) (Typed.Xtrans (e, b))
1087
  | Validate (e,kind,schema,elt) ->
1088
      let (fv,e) = expr env loc e in
1089
      let uri = find_schema schema env in
1090
      exp loc fv (Typed.Validate (e, kind, uri, qname env loc elt))
1091
  | Try (e,b) ->
1092
1093
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
1094
      exp loc (Fv.cup fv1 fv2) (Typed.Try (e, b))
1095
  | NamespaceIn (pr,ns,e) ->
1096
1097
      let env = enter_ns pr ns env in
      expr env loc e
1098
  | Ref (e,t) ->
1099
      let (fv,e) = expr env loc e and t = typ env t in
1100
      exp loc fv (Typed.Ref (e,t))
1101
  | External (s,args) ->
1102
      extern loc env s args
1103
1104
1105
1106
110