typer.ml 53.3 KB
Newer Older
1
(* TODO:
2
 - rewrite type-checking of operators to propagate constraint
3
 - check whether it is worth using recursive hash-consing internally
4
5
*)

6
7
8
open Location
open Ast
open Ident
9

10
11
12
13
14
15
let (=) (x:int) y = x = y
let (<=) (x:int) y = x <= y
let (<) (x:int) y = x < y
let (>=) (x:int) y = x >= y
let (>) (x:int) y = x > y

16
17
let debug_schema = false

18
let warning loc msg =
19
  let v = Location.get_viewport () in
20
  let ppf = if Html.is_html v then Html.ppf v else Format.err_formatter in
21
22
23
  Format.fprintf ppf "Warning %a:@\n" Location.print_loc (loc,`Full);
  Location.html_hilight (loc,`Full);
  Format.fprintf ppf "%s@." msg
24

25
26
27
28
29
30
31
32
33
exception NonExhaustive of Types.descr
exception Constraint of Types.descr * Types.descr
exception ShouldHave of Types.descr * string
exception ShouldHave2 of Types.descr * string * Types.descr
exception WrongLabel of Types.descr * label
exception UnboundId of id * bool
exception UnboundExtId of Types.CompUnit.t * id
exception Error of string

34
35
36

exception Warning of string * Types.t

37
38
39
40
let raise_loc loc exn = raise (Location (loc,`Full,exn))
let raise_loc_str loc ofs exn = raise (Location (loc,`Char ofs,exn))
let error loc msg = raise_loc loc (Error msg)

41
42
type item =
  | Type of Types.t
43
  | Val of Types.t
44

45
46
module UEnv = Map.Make(U)

47
type t = {
48
  ids : item Env.t;
49
  ns: Ns.table;
50
  cu: Types.CompUnit.t UEnv.t;
51
  schemas: string UEnv.t
52
}
53

54
55
56
57
58
let hash _ = failwith "Typer.hash"
let compare _ _ = failwith "Typer.compare"
let dump ppf _ = failwith "Typer.dump"
let equal _ _ = failwith "Typer.equal"
let check _ = failwith "Typer.check"
59

60
61
62

let load_schema_fwd = ref (fun x uri -> assert false)

63
64
65
66
67
68
69
70
71
72
73
74
75
let enter_schema ?prefix x uri env =
  let sch,reg = !load_schema_fwd x uri in
(* Set the namespace prefix before registration for better pretty
   printing *)
  let env = 
    { env with 
	schemas = UEnv.add x uri env.schemas;
	ns = (match prefix with 
		| Some p -> 
		    Ns.add_prefix p sch.Schema_types.targetNamespace env.ns
		| None -> env.ns) } in
  reg ();
  env
76
77


78
(* TODO: filter out builtin defs ? *)
79
80
81
82
let serialize_item s = function
  | Type t -> Serialize.Put.bits 1 s 0; Types.serialize s t
  | Val t -> Serialize.Put.bits 1 s 1; Types.serialize s t

83
let serialize s env =
84
  Serialize.Put.env Id.serialize serialize_item Env.iter s env.ids;
85
86
87
88
89
  Ns.serialize_table s env.ns;

  let schs =
    UEnv.fold (fun name uri accu -> (name,uri)::accu) env.schemas [] in
  Serialize.Put.list (Serialize.Put.pair U.serialize Serialize.Put.string) s schs
90

91
92
93
94
95
let deserialize_item s = match Serialize.Get.bits 1 s with
  | 0 -> Type (Types.deserialize s)
  | 1 -> Val (Types.deserialize s)
  | _ -> assert false

96
let deserialize s =
97
  let ids = Serialize.Get.env Id.deserialize deserialize_item Env.add Env.empty s in
98
  let ns = Ns.deserialize_table s in
99
100
101
102
103
104
  let schs = 
    Serialize.Get.list 
      (Serialize.Get.pair U.deserialize Serialize.Get.string) s in
  let env = 
    { ids = ids; ns = ns; cu = UEnv.empty; schemas = UEnv.empty } in
  List.fold_left (fun env (name,uri) -> enter_schema name uri env) env schs
105
106


107
108
let empty_env = {
  ids = Env.empty;
109
  ns = Ns.empty_table;
110
  cu = UEnv.empty;
111
  schemas = UEnv.empty
112
113
}

114
115
let from_comp_unit = ref (fun cu -> assert false)

116
let enter_cu x cu env =
117
  { env with cu = UEnv.add x cu env.cu }
118

119
120
121
let find_cu x env =
  try UEnv.find x env.cu
  with Not_found -> Types.CompUnit.mk x
122
123


124
125
126
127
let find_schema x env =
  try UEnv.find x env.schemas
  with Not_found -> raise (Error (Printf.sprintf "%s: no such schema" (U.get_str x)))

128
129
130
131
132
133
134
135
let enter_type id t env =
  { env with ids = Env.add id (Type t) env.ids }
let enter_types l env =
  { env with ids = 
      List.fold_left (fun accu (id,t) -> Env.add id (Type t) accu) env.ids l }
let find_type id env =
  match Env.find id env.ids with
    | Type t -> t
136
    | Val _ -> raise Not_found
137

138
let find_type_global loc cu id env =
139
  let cu = find_cu cu env in
140
141
142
  let env = !from_comp_unit cu in
  find_type id env

143
let enter_value id t env = 
144
  { env with ids = Env.add id (Val t) env.ids }
145
146
let enter_values l env =
  { env with ids = 
147
      List.fold_left (fun accu (id,t) -> Env.add id (Val t) accu) env.ids l }
148
149
150
let enter_values_dummy l env =
  { env with ids = 
      List.fold_left (fun accu id -> Env.add id (Val Types.empty) accu) env.ids l }
151
152
let find_value id env =
  match Env.find id env.ids with
153
    | Val t -> t
154
    | _ -> raise Not_found
155
156
157
let find_value_global cu id env =
  let env = !from_comp_unit cu in
  find_value id env
158
	
159
160
161
162
163
164
let value_name_ok id env =
  try match Env.find id env.ids with
    | Val t -> true
    | _ -> false
  with Not_found -> true

165
166
167
168
let iter_values env f =
  Env.iter (fun x ->
	      function Val t -> f x t;
		| _ -> ()) env.ids
169

170

171
172
173
174
175
176
177
178
179
let register_types cu env =
  let prefix = U.concat (Types.CompUnit.value cu) (U.mk ":") in
  Env.iter (fun x ->
	      function 
		| Type t ->
		    let n = U.concat prefix (Id.value x) in
		    Types.Print.register_global n t
		| _ -> ()) env.ids

180

181
(* Namespaces *)
182

183
let set_ns_table_for_printer env = 
184
  Ns.InternalPrinter.set_table env.ns
185

186
let get_ns_table tenv = tenv.ns
187

188
let enter_ns p ns env =
189
  { env with ns = Ns.add_prefix p ns env.ns }
190

191
192
193
194
195
let protect_error_ns loc f x =
  try f x
  with Ns.UnknownPrefix ns ->
    raise_loc_generic loc 
    ("Undefined namespace prefix " ^ (U.to_string ns))
196

197
198
199
let qname env loc t = 
  protect_error_ns loc (Ns.map_tag env.ns) t
    
200
let parse_atom env loc t =
201
  Atoms.V.of_qname (qname env loc t)
202
203
 
let parse_ns env loc ns =
204
  protect_error_ns loc (Ns.map_prefix env.ns) ns
205

206
let parse_label env loc t =
207
  let (ns,l) = protect_error_ns loc (Ns.map_attr env.ns) t in
208
  LabelPool.mk (ns,l)
209

210
211
212
213
214
215
216
217
218
219
220
221
222
let parse_record env loc f r =
  let r = List.map (fun (l,x) -> (parse_label env loc l, f x)) r in
  LabelMap.from_list (fun _ _ -> raise_loc_generic loc "Duplicated record field") r

let rec const env loc = function
  | LocatedExpr (loc,e) -> const env loc e
  | Pair (x,y) -> Types.Pair (const env loc x, const env loc y)
  | Xml (x,y) -> Types.Xml (const env loc x, const env loc y)
  | RecordLitt x -> Types.Record (parse_record env loc (const env loc) x)
  | String (i,j,s,c) -> Types.String (i,j,s,const env loc c)
  | Atom t -> Types.Atom (parse_atom env loc t)
  | Integer i -> Types.Integer i
  | Char c -> Types.Char c
223
  | Const c -> c
224
225
226
227
  | _ -> raise_loc_generic loc "This should be a scalar or structured constant"

(* I. Transform the abstract syntax of types and patterns into
      the internal form *)
228

229

230
(* Schema *)
231

232
233
234
let is_registered_schema env s = UEnv.mem s env.schemas

(* uri -> schema binding *)
235
let schemas = State.ref "Typer.schemas" (Hashtbl.create 3)
236
237
238

let schema_types = State.ref "Typer.schema_types" (Hashtbl.create 51)
let schema_elements = State.ref "Typer.schema_elements" (Hashtbl.create 51)
239
let schema_attributes = State.ref "Typer.schema_attributes" (Hashtbl.create 51)
240
241
242
243
let schema_attribute_groups =
  State.ref "Typer.schema_attribute_groups" (Hashtbl.create 51)
let schema_model_groups =
  State.ref "Typer.schema_model_groups" (Hashtbl.create 51)
244

245

246
247
248
249
250
(*
let get_schema uri =
  try Hashtbl.find !schemas uri
  with Not_found -> assert false
*)
251

252
let find_schema_descr_uri kind uri (name : Ns.qname) =
253
  try
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
    let elt () = Hashtbl.find !schema_elements (uri, name) in
    let typ () = Hashtbl.find !schema_types (uri, name) in
    let att () = Hashtbl.find !schema_attributes (uri, name) in
    let att_group () = Hashtbl.find !schema_attribute_groups (uri, name) in
    let mod_group () = Hashtbl.find !schema_model_groups (uri, name) in
    let rec do_try n = function
      | [] -> raise Not_found
      | f :: rem -> (try f () with Not_found -> do_try n rem)
    in
    match kind with
      | Some `Element -> do_try "element" [ elt ]
      | Some `Type -> do_try "type" [ typ ]
      | Some `Attribute -> do_try "atttribute" [ att ]
      | Some `Attribute_group -> do_try "attribute group" [ att_group ]
      | Some `Model_group -> do_try "model group" [ mod_group ]
      | None ->
          (* policy for unqualified schema component resolution. This order should
           * be consistent with Schema_component.get_component *)
          do_try "component" [ elt; typ; att; att_group; mod_group ]
    with Not_found ->    
274
      raise (Error (Printf.sprintf "No %s named '%s' found in schema '%s'"
275
		      (Schema_common.string_of_component_kind kind) (Ns.QName.to_string name) uri))
276
277
278
279
280

let find_schema_descr env kind schema name =
  let uri = find_schema schema env in
  find_schema_descr_uri kind uri name

281

282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
module IType = struct
  type node = {
    mutable desc: desc;
    mutable smallhash: int;  (* Local hash *)
    mutable rechash: int;    (* Global (recursive) hash *)
    mutable sid: int;        (* Sequential id used to compute rechash *)
    mutable t: Types.t option;
    mutable tnode: Types.Node.t option;
    mutable p: Patterns.descr option;
    mutable pnode: Patterns.node option;
    mutable fv: fv option
  } 
  and desc =
    | ILink of node
    | IType of Types.descr * int
    | IOr of node * node
    | IAnd of node * node
    | IDiff of node * node
    | ITimes of node * node
    | IXml of node * node
    | IArrow of node * node
    | IOptional of node
    | IRecord of bool * (node * node option) label_map
    | ICapture of id
    | IConstant of id * Types.const

  let rec node_temp = { 
    desc = ILink node_temp;
    smallhash = 0; rechash = 0; sid = 0;
    t = None; tnode = None; p = None; pnode = None;
    fv = None
  }
			
315
(* Recursive hash-consing *)
316

317
318
319
320
321
322
  let hash_field f = function
    | (p, Some e) -> 1 + 17 * f p + 257 * f e
    | (p, None) -> 2 + 17 * f p

  let rec hash f n = match n.desc with
    | ILink n -> hash f n
323
    | IType (t,h) -> 1 + 17 * h
324
325
326
327
328
329
330
331
332
    | IOr (p1,p2) -> 2 + 17 * f p1 + 257 * f p2
    | IAnd (p1,p2) -> 3 + 17 * f p1 + 257 * f p2
    | IDiff (p1,p2) -> 4 + 17 * f p1 + 257 * f p2
    | ITimes (p1,p2) -> 5 + 17 * f p1 + 257 * f p2
    | IXml (p1,p2) -> 6 + 17 * f p1 + 257 * f p2
    | IArrow (p1,p2) -> 7 + 17 * f p1 + 257 * f p2
    | IOptional p -> 8 + 17 * f p
    | IRecord (o,r)->9+(if o then 17 else 0)+
	257*(LabelMap.hash (hash_field f) r)
333
334
335
    | ICapture x -> 10 + 17 * (Id.hash x)
    | IConstant (x,c) -> 11 + 17 * (Id.hash x) + 257*(Types.Const.hash c)

336
337
338
339
340
  let hash0 = hash (fun n -> 1)
  let hash1 = hash hash0
  let hash2 = hash hash1
  let hash3 = hash hash2

341
342
  let smallhash n =
    if n.smallhash !=0 then n.smallhash
343
344
345
346
    else (
      let h = hash2 n in 
      n.smallhash <- h; h
    )
347
348

  let rec repr = function
349
    | { desc = ILink n } as m -> let z = repr n in m.desc <- ILink z; z
350
351
352
353
    | n -> n

  let back = ref []

354
355
356
357
  let rec prot_repr = function
    | { desc = ILink n } -> repr n
    | n -> n

358
359
360
361
362
363
364
365
366
  let link x y = match x,y with
    | { t = None } as x, y 
    | y, ({ t = None } as x) -> back := (x,x.desc) :: !back; x.desc <- ILink y
    | _ -> assert false

  exception Unify

  let rec unify x y =
    if x == y then ()
367
368
369
370
371
    else let x = prot_repr x and y = prot_repr y in if x == y then ()
    else if (smallhash x != smallhash y) then raise Unify 
    else if (x.t != None) && (y.t != None) then raise Unify
      (* x and y have been internalized; if they were equivalent,
	 they would be equal *)
372
    else match x.desc,y.desc with
373
      | IType (tx,_), IType (ty,_) when Types.equal tx ty -> link x y
374
375
376
377
378
      | IOr (x1,x2), IOr (y1,y2)
      | IAnd (x1,x2), IAnd (y1,y2)
      | IDiff (x1,x2), IDiff (y1,y2)
      | ITimes (x1,x2), ITimes (y1,y2)
      | IXml (x1,x2), IXml (y1,y2)
379
380
      | IArrow (x1,x2), IArrow (y1,y2) -> link x y; unify x1 y1; unify x2 y2
      | IOptional x1, IOptional y1 -> link x y; unify x1 y1
381
382
383
384
385
386
387
388
389
390
391
      | IRecord (xo,xr), IRecord (yo,yr) when xo == yo ->
	  link x y; LabelMap.may_collide unify_field Unify xr yr
      | ICapture xv, ICapture yv when Id.equal xv yv -> ()
      | IConstant (xv,xc), IConstant (yv,yc) when
	  Id.equal xv yv && Types.Const.equal xc yc -> ()
      | _ -> raise Unify
  and unify_field f1 f2 = match f1,f2 with
    | (p1, Some e1), (p2, Some e2) -> unify p1 p2; unify e1 e2
    | (p1, None), (p2, None) -> unify p1 p2
    | _ -> raise Unify

392

393
394
  let may_unify x y =
    try unify x y; back := []; true
395
    with Unify ->
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
      List.iter (fun (x,xd) -> x.desc <- xd) !back; back := []; false

  module SmallHash = Hashtbl.Make(
    struct 
      type t = node
      let equal = may_unify
      let hash = smallhash
    end
  )

  let iter_field f = function
    | (x, Some y) -> f x; f y
    | (x, None) -> f x
  let iter f = function
    | IOr (x,y) | IAnd (x,y) | IDiff (x,y)
    | ITimes (x,y) | IXml (x,y) | IArrow (x,y) -> f x; f y
    | IOptional x -> f x
    | IRecord (_,r) -> LabelMap.iter (iter_field f) r
    | _ -> ()

  let minimize ((mem,add) as h) =
    let rec aux n =
      let n = repr n in
419
420
421
422
      if mem n then () else (
	let n = repr n in add n (); 
	if n.t == None then iter aux n.desc
      )
423
424
425
426
427
428
429
    in aux

  let to_clear = ref []
  let sid = ref 0
  let rec rechash n =
    let n = repr n in
    if (n.sid != 0) then 17 * n.sid
430
    else (incr sid; n.sid <- !sid; to_clear := n :: !to_clear; hash rechash n)
431
432

  let clear () =
433
434
    sid := 0; List.iter (fun x -> x.sid <- 0) !to_clear;
    to_clear := []
435
436
437
438
439
440
441
442
443
444
445
446
447
448

  let rechash n =
    let n = repr n in
    if (n.rechash != 0) then n.rechash 
    else (let h = rechash n in clear (); n.rechash <- h; h)

  module RecHash = Hashtbl.Make(
    struct
      type t = node
      let equal = may_unify
      let hash = smallhash
    end
  )

449
450
451

(** Two-phases recursive hash-consing **)
(*
452
453
454
  let gtable = RecHash.create 17577

  let internalize n =
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
    let local = SmallHash.create 17 in
    minimize (SmallHash.mem local, SmallHash.add local) n; 
    minimize (RecHash.mem gtable, RecHash.add gtable) n;
    ()
*)

(** Single-phase hash-consing **)
  let gtable = SmallHash.create 17

  let internalize n =
    minimize (SmallHash.mem gtable, SmallHash.add gtable) n



(*  let internalize n = () *)
470
471
472
473
474
475
476
477
478

(* Compute free variables *)

  let fv n =
    let fv = ref IdSet.empty in
    let rec aux n =
      let n = repr n in
      if (n.sid = 0) then (
	n.sid <- 1;
479
	to_clear := n :: !to_clear; 
480
481
482
483
484
485
	match n.fv, n.desc with
	  | Some x, _ -> fv := IdSet.cup !fv x
	  | None, (ICapture x | IConstant (x,_)) -> fv := IdSet.add x !fv
	  | None, d -> iter aux d
      )
    in
486
    assert(!to_clear == []);
487
488
489
490
    match n.fv with
      | Some x -> x
      | None -> aux n; clear (); n.fv <- Some !fv; !fv

491
492
493
(* optimized version to check closedness *)

  let no_fv = Some IdSet.empty
494
495
496
  exception FoundFv of id
  let peek_fv n =
    let err x = raise (FoundFv x) in
497
498
499
500
501
502
503
504
505
506
507
    let rec aux n =
      let n = repr n in
      if (n.sid = 0) then (
	n.sid <- 1;
	to_clear := n :: !to_clear; 
	match n.fv, n.desc with
	  | Some x, _ -> (match IdSet.pick x with Some x -> err x | None -> ())
	  | None, (ICapture x | IConstant (x,_)) -> err x;
	  | None, d -> iter aux d
      )
    in
508
    assert(!to_clear == []);
509
510
511
512
513
514
515
516
    try
      match n.fv with
	| Some x -> (match IdSet.pick x with Some x -> err x | None -> ())
	| None -> aux n; 
	    List.iter (fun n -> n.sid <- 0; n.fv <- no_fv) !to_clear;
	    to_clear := []
    with exn -> clear (); raise exn

517
518
519
520
521
522
523
524
525
526
527
  let check_no_fv loc n =
    try peek_fv n 
    with FoundFv x ->
      raise_loc_generic loc 
	("Capture variable not allowed: " ^ (Ident.to_string x))

  let has_no_fv n =
    try peek_fv n; true
    with FoundFv _ -> false


528
(* From the intermediate representation to the internal one *)
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553


  let rec typ n =
    let n = repr n in
    match n.t with
      | Some t -> t
      | None -> let t = compute_typ n.desc in n.t <- Some t; t
  and compute_typ = function
    | IType (t,_) -> t
    | IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
    | IAnd (s1,s2) ->  Types.cap (typ s1) (typ s2)
    | IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
    | ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
    | IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
    | IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
    | IOptional s -> Types.Record.or_absent (typ s)
    | IRecord (o,r) ->  Types.record' (o, LabelMap.map compute_typ_field r)
    | ILink _ -> assert false
    | ICapture _ | IConstant (_,_) -> assert false
  and compute_typ_field = function
    | (s, None) -> typ_node s
    | (s, Some _) -> 
	raise (Patterns.Error "Or-else clauses are not allowed in types")

  and typ_node n =
554
    let n = repr n in
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
    match n.tnode with
      | Some t -> t
      | None ->
	  let x = Types.make () in
	  n.tnode <- Some x;
	  Types.define x (typ n);
	  x
      
  let rec pat n =
    let n = repr n in
    if IdSet.is_empty (fv n)
    then Patterns.constr (typ n)
    else match n.p with
      | Some p -> p
      | None -> let p = compute_pat n.desc in n.p <- Some p; p

  and compute_pat = function
    | IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
    | IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
    | IDiff (s1,s2) when IdSet.is_empty (fv s2) ->
	let s2 = Types.neg (typ s2) in
	Patterns.cap (pat s1) (Patterns.constr s2)
    | IDiff _ ->
	raise (Patterns.Error "Differences are not allowed in patterns")
    | ITimes (s1,s2) -> Patterns.times (pat_node s1) (pat_node s2)
    | IXml (s1,s2) -> Patterns.xml (pat_node s1) (pat_node s2)
    | IOptional _ -> 
	raise (Patterns.Error "Optional fields are not allowed in record patterns")
    | IRecord (o,r) ->
	let pats = ref [] in
	let aux l = function
	  | (s,None) ->
	      if IdSet.is_empty (fv s) then typ_node s
	      else
		( pats := Patterns.record l (pat_node s) :: !pats;
		  Types.any_node )
	  | (s,Some e) ->
	      if IdSet.is_empty (fv s) then
		raise (Patterns.Error "Or-else clauses are not allowed in types")
	      else
		( pats := Patterns.cup 
		    (Patterns.record l (pat_node s))
		    (pat e) :: !pats;
		  Types.Record.any_or_absent_node )
	in
	let constr = Types.record' (o,LabelMap.mapi aux r) in
	List.fold_left Patterns.cap (Patterns.constr constr) !pats
	  (* TODO: can avoid constr when o=true, and all fields have fv *)
    | ICapture x -> Patterns.capture x
    | IConstant (x,c) -> Patterns.constant x c
    | IArrow _ ->
	raise (Patterns.Error "Arrows are not allowed in patterns")
    | IType _ | ILink _ -> assert false
      
  and pat_node n =
610
    let n = repr n in
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
    match n.pnode with
      | Some p -> p
      | None ->
	  let x = Patterns.make (fv n) in
	  try
	    n.pnode <- Some x;
	    Patterns.define x (pat n);
	    x
	  with exn -> n.pnode <- None; raise exn

(* From AST to the intermediate representation *)

  type penv = {
    penv_tenv : t;
    penv_derec : node Env.t;
  }

  let penv tenv = { penv_tenv = tenv; penv_derec = Env.empty }

  let mk d = { node_temp with desc = d }
  let mk_delayed () = { node_temp with desc = ILink node_temp }
  let itype t = mk (IType (t, Types.hash t))
  let iempty = itype Types.empty

  let ior p1 p2 =
636
637
    if p1.desc == iempty.desc then p2 
    else if p2.desc == iempty.desc then p1 
638
639
640
    else mk (IOr (p1,p2))

  let iand p1 p2 =
641
    if (p1.desc == iempty.desc) || (p2.desc == iempty.desc) then iempty 
642
643
644
645
646
    else mk (IAnd (p1,p2))

  type regexp =
    | PElem of node
    | PGuard of node
647
648
    | PSeq of regexp list
    | PAlt of regexp list
649
650
651
    | PStar of regexp
    | PWeakStar of regexp

652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
  let rec nullable = function
    | PElem _ -> false
    | PSeq rl -> List.for_all nullable rl
    | PAlt rl -> List.exists nullable rl
    | PStar _ | PWeakStar _ | PGuard _ -> true

  let eps = PSeq []
  let emp = PAlt []

  let seq r1 r2 =
    let r1 = match r1 with PSeq l -> l | x -> [ x ] in
    let r2 = match r2 with PSeq l -> l | x -> [ x ] in
    match r1 @ r2 with
      | [ x ] -> x
      | l -> PSeq l

  let alt r1 r2 =
    let r1 = match r1 with PAlt l -> l | x -> [ x ] in
    let r2 = match r2 with PAlt l -> l | x -> [ x ] in
    match r1 @ r2 with
      | [ x ] -> x
      | l -> PAlt l

  let rec merge_alt = function
676
    | PElem p::PElem q::l -> merge_alt (PElem (ior p q) :: l)
677
678
    | r::l -> r::(merge_alt l)
    | [] -> []
679
680
681
682
683
684
685
686
687

(* Works only for types, not patterns, because
   [ (x&Int|_) R' ] is possible *)
  let rec simplify_regexp = function
    | PSeq l -> PSeq (List.map simplify_regexp l)
    | PAlt l -> PAlt (merge_alt (List.map simplify_regexp l))
    | PStar r | PWeakStar r -> PStar (simplify_regexp r)
    | x -> x

688
689
690
691
692
693
694
695
696
  let rec print_regexp ppf = function
    | PElem _ -> Format.fprintf ppf "Elem"
    | PGuard _ -> Format.fprintf ppf "Guard"
    | PSeq l -> Format.fprintf ppf "Seq(%a)" print_regexp_list l
    | PAlt l -> Format.fprintf ppf "Alt(%a)" print_regexp_list l
    | PStar r -> Format.fprintf ppf "Star(%a)" print_regexp r
    | PWeakStar r -> Format.fprintf ppf "WStar(%a)" print_regexp r
  and print_regexp_list ppf l =
    List.iter (fun x -> Format.fprintf ppf "%a;" print_regexp x) l
697

698
699
  let rec remove_regexp r q = 
    match r with
700
701
702
703
    | PElem p ->
	mk (ITimes (p, q))
    | PGuard p ->
	iand p q
704
705
706
707
    | PSeq l ->
	List.fold_right (fun r a -> remove_regexp r a) l q
    | PAlt rl ->
	List.fold_left (fun a r -> ior a (remove_regexp r q)) iempty rl
708
709
710
    | PStar r ->
	let x = mk_delayed () in
	let res = ior x q in
711
	x.desc <- ILink (remove_regexp_nullable r res iempty);
712
713
714
715
	res
    | PWeakStar r ->
	let x = mk_delayed () in
	let res = ior q x in
716
	x.desc <- ILink (remove_regexp_nullable r res iempty);
717
	res
718
719
720
721
722

  and remove_regexp_nullable r q_nonempty q_empty =
    if nullable r then remove_regexp2 r q_nonempty q_empty
    else remove_regexp r q_nonempty

723
  and remove_regexp2 r q_nonempty q_empty =
724
725
    (* Assume r is nullable *)
    if q_nonempty == q_empty then remove_regexp r q_nonempty
726
    else match r with
727
      | PSeq [] ->
728
729
          q_empty
      | PElem p ->
730
	  assert false
731
732
      | PGuard p ->
	  iand p q_empty
733
734
735
736
737
738
739
740
      | PSeq (r::rl) ->
          remove_regexp2 r
            (remove_regexp (PSeq rl) q_nonempty)
            (remove_regexp2 (PSeq rl) q_nonempty q_empty)
      | PAlt rl ->
	  List.fold_left 
	    (fun a r -> ior a (remove_regexp_nullable r q_nonempty q_empty))
	    iempty rl
741
742
      | PStar r ->
 	  let x = mk_delayed () in
743
          x.desc <- ILink (remove_regexp_nullable r (ior x q_nonempty) iempty);
744
745
746
          ior x q_empty
      | PWeakStar r ->
 	  let x = mk_delayed () in
747
          x.desc <- ILink (remove_regexp_nullable r (ior q_nonempty x) iempty);
748
749
750
751
752
753
754
755
756
          ior q_empty x


  let cst_nil = Types.Atom Sequence.nil_atom
  let capture_all vars p = 
    IdSet.fold (fun p x -> iand p (mk (ICapture x))) p vars
  let termin b vars p = 
    if b then p 
    else IdSet.fold 
757
      (fun p x -> seq p (PGuard (mk (IConstant (x,cst_nil))))) p vars
758
759
760

  let rexp r = remove_regexp r (itype Sequence.nil_type)

761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
  let all_delayed = ref []

  let delayed loc =
    let s = mk_delayed () in
    all_delayed := (loc,s) :: !all_delayed;
    s

  let check_one_delayed (loc,p) =
    let rec aux q = if p == q then raise Exit; aux2 q.desc
    and aux2 = function
      | IOr (q1,q2) | IAnd (q1,q2) | IDiff (q1,q2) -> aux q1; aux q2
      | ILink q -> aux q
      | _ -> ()
    in
    try aux2 p.desc
    with Exit -> error loc "Ill-formed recursion"
    
  let check_delayed () =
    let l = !all_delayed in
    all_delayed := []; 
    List.iter check_one_delayed l
    
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
  let rec derecurs env p = match p.descr with
    | PatVar v -> derecurs_var env p.loc v
    | SchemaVar (kind, schema_name, component_name) ->

	let name = qname env.penv_tenv  p.loc component_name in
	itype (find_schema_descr env.penv_tenv kind schema_name name)

    | Recurs (p,b) -> derecurs (derecurs_def env b) p
    | Internal t -> itype t
    | NsT ns -> 
	itype (Types.atom (Atoms.any_in_ns (parse_ns env.penv_tenv p.loc ns)))
    | Or (p1,p2) -> mk (IOr (derecurs env p1, derecurs env p2))
    | And (p1,p2) -> mk (IAnd (derecurs env p1, derecurs env p2))
    | Diff (p1,p2) -> mk (IDiff (derecurs env p1, derecurs env p2))
    | Prod (p1,p2) -> mk (ITimes (derecurs env p1, derecurs env p2))
    | XmlT (p1,p2) -> mk (IXml (derecurs env p1, derecurs env p2))
    | Arrow (p1,p2) -> mk (IArrow (derecurs env p1, derecurs env p2))
    | Optional p -> mk (IOptional (derecurs env p))
    | Record (o,r) -> 
	let aux = function
	  | (p,Some e) -> (derecurs env p, Some (derecurs env e))
	  | (p,None) -> derecurs env p, None in
	mk (IRecord (o, parse_record env.penv_tenv p.loc aux r))
    | Constant (x,c) -> mk (IConstant (x,const env.penv_tenv p.loc c))
    | Cst c -> itype (Types.constant (const env.penv_tenv p.loc c))
    | Regexp r ->
	let r,_ = derecurs_regexp IdSet.empty false IdSet.empty true env r in
	rexp r
	  
  and derecurs_regexp vars b rvars f env = function
      (* - vars: seq variables to be propagated top-down and added
	 to each captured element
	 - b: below a star ?
	 - rvars: seq variables that appear on the right of the regexp
	 - f: tail position
	 
	 returns the set of seq variable of the regexp minus rvars
	 (they have already been terminated if not below a star)
      *)
    | Epsilon -> 
823
	PSeq [], IdSet.empty
824
825
826
827
828
829
830
    | Elem p -> 
	PElem (capture_all vars (derecurs env p)), IdSet.empty
    | Guard p ->
	PGuard (derecurs env p), IdSet.empty
    | Seq (p1,p2) -> 
	let (p2,v2) = derecurs_regexp vars b rvars f env p2 in
	let (p1,v1) = derecurs_regexp vars b (IdSet.cup rvars v2) false env p1 in
831
	seq p1 p2, IdSet.cup v1 v2
832
833
834
    | Alt (p1,p2) -> 
	let (p1,v1) = derecurs_regexp vars b rvars f env p1
	and (p2,v2) = derecurs_regexp vars b rvars f env p2 in
835
	alt (termin b (IdSet.diff v2 v1) p1) (termin b (IdSet.diff v1 v2) p2),
836
837
838
839
840
841
842
843
844
845
846
847
848
	IdSet.cup v1 v2
    | Star p -> 
	let (p,v) = derecurs_regexp vars true rvars false env p in
	termin b v (PStar p), v
    | WeakStar p -> 
	let (p,v) = derecurs_regexp vars true rvars false env p in
	termin b v (PWeakStar p), v
    | SeqCapture (x,p) -> 
	let vars = if f then vars else IdSet.add x vars in
	let after = IdSet.mem rvars x in
	let rvars = IdSet.add x rvars in
	let (p,v) = derecurs_regexp vars b rvars false env p in
	(if f 
849
	 then seq (PGuard (mk (ICapture x))) p 
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
	 else termin (after || b) (IdSet.singleton x) p), 
	(if after then v else IdSet.add x v)
	  
	  
  and derecurs_var env loc v =
    match Ns.split_qname v with
      | "", v ->
	  let v = ident v in
	  (try Env.find v env.penv_derec
	   with Not_found -> 
	     try itype (find_type v env.penv_tenv)
	     with Not_found -> mk (ICapture v))
      | cu, v -> 
	  try 
	    let cu = U.mk cu in
	    itype (find_type_global loc cu (ident v) env.penv_tenv)
	  with Not_found ->
	    raise_loc_generic loc 
	      ("Unbound external type " ^ cu ^ ":" ^ (U.to_string v))
	      
  and derecurs_def env b =
871
    let b = List.map (fun (v,p) -> (v,p,delayed p.loc)) b in
872
873
874
875
876
877
    let n = 
      List.fold_left (fun env (v,p,s) -> Env.add v s env) env.penv_derec b in
    let env = { env with penv_derec = n } in
    List.iter (fun (v,p,s) -> s.desc <- ILink (derecurs env p)) b;
    env

878
879
880
881
882
  let derec penv p =
    let d = derecurs penv p in
    check_delayed ();
    internalize d;
    d
883
884


885
(* API *)
886
887
888
889
890
891
892
893
894
895
896
897
898
899

  module Ids = Set.Make(Id)
  let type_defs env b =
    ignore 
      (List.fold_left 
	 (fun seen (v,p) ->
	    if Ids.mem v seen then 
	      raise_loc_generic p.loc 
		("Multiple definitions for the type identifer " ^ 
		   (Ident.to_string v));
	    Ids.add v seen
	 ) Ids.empty b);
    
    let penv = derecurs_def (penv env) b in
900
901
902
903
904
905
    let aux t =
      let d = derec penv t in
      check_no_fv t.loc d;
      try typ d
      with Patterns.Error s -> raise_loc_generic t.loc s
    in
906
907
    let b = 
      List.map 
908
909
	(fun (v,p) ->
	   let t = aux p in
910
911
912
913
914
915
916
	   if (p.loc <> noloc) && (Types.is_empty t) then
	     warning p.loc 
	       ("This definition yields an empty type for " ^ (Ident.to_string v));
	   (v,t)) b in
    List.iter (fun (v,t) -> Types.Print.register_global (Id.value v) t) b;
    b

917

918
919
920
  let typ_descr d =
    internalize d;
    typ d
921

922
923
924
925
926
927
928
929
930
931
932
  let typ env t = 
    let d = derec (penv env) t in
    check_no_fv t.loc d;
    try typ_node d
    with Patterns.Error s -> raise_loc_generic t.loc s

  let pat env t = 
    let d = derec (penv env) t in
    try pat_node d
    with Patterns.Error s -> raise_loc_generic t.loc s
end
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948

let typ = IType.typ
let pat = IType.pat
let type_defs = IType.type_defs

let dump_types ppf env =
  Env.iter (fun v -> 
	      function 
		  (Type _) -> Format.fprintf ppf " %a" Ident.print v
		| _ -> ()) env.ids

let dump_ns ppf env =
  Ns.dump_table ppf env.ns



949

950
951
(* II. Build skeleton *)

952

953
type type_fun = Types.t -> bool -> Types.t
954

955
module Fv = IdSet
956

957
958
959
type branch = Branch of Typed.branch * branch list

let cur_branch : branch list ref = ref []
960

961
let exp loc fv e =
962
963
  fv,
  { Typed.exp_loc = loc;
964
    Typed.exp_typ = Types.empty;
965
    Typed.exp_descr = e;
966
  }
967

968
let ops = Hashtbl.create 13
969
970
let register_op op arity f = Hashtbl.add ops op (arity,f)
let typ_op op = snd (Hashtbl.find ops op)
971

972
973
974
975
976
let is_op env s = 
  if (Env.mem (ident s) env.ids) then None
  else 
    try let s = U.get_str s in Some (s, fst (Hashtbl.find ops s))
    with Not_found -> None
977

978
979
let rec expr env loc = function
  | LocatedExpr (loc,e) -> expr env loc e
980
  | Forget (e,t) ->
981
      let (fv,e) = expr env loc e and t = typ env t in
982
      exp loc fv (Typed.Forget (e,t))
983
984
  | Check (e,t) ->
      let (fv,e) = expr env loc e and t = typ env t in
985
      exp loc fv (Typed.Check (ref Types.empty,e,t))
986
  | Var s -> var env loc s
987
  | Apply (e1,e2) -> 
988
989
990
991
992
993
994
995
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
      let fv = Fv.cup fv1 fv2 in
      (match e1.Typed.exp_descr with
	 | Typed.Op (op,arity,args) when arity > 0 -> 
	     exp loc fv (Typed.Op (op,arity - 1,args @ [e2]))
	 | _ ->
	     exp loc fv (Typed.Apply (e1,e2)))
  | Abstraction a -> abstraction env loc a
996
  | (Integer _ | Char _ | Atom _ | Const _) as c -> 
997
      exp loc Fv.empty (Typed.Cst (const env loc c))
998
  | Pair (e1,e2) ->
999
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
1000
1001
      exp loc (Fv.cup fv1 fv2) (Typed.Pair (e1,e2))
  | Xml (e1,e2) ->
1002
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
1003
1004
      exp loc (Fv.cup fv1 fv2) (Typed.Xml (e1,e2))
  | Dot (e,l) ->
1005
1006
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.Dot (e,parse_label env loc l))
1007
  | RemoveField (e,l) ->
1008
1009
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.RemoveField (e,parse_label env loc l))
1010
1011
  | RecordLitt r -> 
      let fv = ref Fv.empty in
1012
      let r = parse_record env loc
1013
		(fun e -> 
1014
		   let (fv2,e) = expr env loc e 
1015
1016
1017
		   in fv := Fv.cup !fv fv2; e)
		r in
      exp loc !fv (Typed.RecordLitt r)
1018
  | String (i,j,s,e) ->
1019
      let (fv,e) = expr env loc e in
1020
      exp loc fv (Typed.String (i,j,s,e))
1021
  | Match (e,b) -> 
1022
1023
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
1024
      exp loc (Fv.cup fv1 fv2) (Typed.Match (e, b))
1025
  | Map (e,b) ->
1026
1027
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
1028
1029
      exp loc (Fv.cup fv1 fv2) (Typed.Map (e, b))
  | Transform (e,b) ->
1030
1031
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
1032
      exp loc (Fv.cup fv1 fv2) (Typed.Transform (e, b))
1033
  | Xtrans (e,b) ->
1034
1035
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
1036
      exp loc (Fv.cup fv1 fv2) (Typed.Xtrans (e, b))
1037
  | Validate (e,kind,schema,elt) ->
1038
      let (fv,e) = expr env loc e in
1039
      let uri = find_schema schema env in
1040
      exp loc fv (Typed.Validate (e, kind, uri, qname env loc elt))
1041
  | Try (e,b) ->
1042
1043
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
1044
      exp loc (Fv.cup fv1 fv2) (Typed.Try (e, b))
1045
  | NamespaceIn (pr,ns,e) ->
1046
1047
      let env = enter_ns pr ns env in
      expr env loc e
1048
  | Ref (e,t) ->
1049
      let (fv,e) = expr env loc e and t = typ env t in
1050
      exp loc fv (Typed.Ref (e,t))
1051
  | External (s,args) ->
1052
      extern loc env s args
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
	
and extern loc env s args = 
  let args = List.map (typ env) args in
  try
    let (i,t) = Externals.resolve s args in
    exp loc Fv.empty (Typed.External (t,i))
  with exn -> raise_loc loc exn
    
and var env loc s =
  match is_op env s with
1063
    | Some (s,arity) -> 
1064
1065
	let need_ns = match s with "print_xml" | "print_xml_utf8" -> true
	  | _ -> false in
1066
1067
1068
	let e = Typed.Op (s, arity, []) in
	let e = if need_ns then Typed.NsTable (env.ns,e) else e in
	exp loc Fv.empty e
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
    | None ->
	match Ns.split_qname s with
	  | "", id -> 
	      let s = U.get_str id in
	      if String.contains s '.' then
		extern loc env s []
	      else
		let id = ident id in
		(try ignore (find_value id env)
		 with Not_found -> raise_loc loc (UnboundId (id, Env.mem id env.ids)));
1079
	  exp loc (Fv.singleton id) (Typed.Var id)
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
	  | cu, id -> 
	      let cu = find_cu (U.mk cu) env in
	      let id = ident id in
	      let t =
		try find_value_global cu id env
		with Not_found ->
		  raise_loc loc (UnboundExtId (cu,id) ) in
	      exp loc Fv.empty (Typed.ExtVar (cu, id, t))

and abstraction env loc a =
  let iface = 
    List.map 
      (fun (t1,t2) -> (typ env t1, typ env t2)) a.fun_iface in
  let t = 
    List.fold_left 
      (fun accu (t1,t2) -> Types.cap accu (Types.arrow t1 t2)) 
      Types.any iface in
  let iface = 
    List.map 
      (fun (t1,t2) -> (Types.descr t1, Types.descr t2)) 
      iface in
  let env' = 
    match a.fun_name with 
      | None -> env
      | Some f -> enter_values_dummy [ f ] env
  in
  let (fv0,body) = branches env' a.fun_body in
  let fv = match a.fun_name with
    | None -> fv0
    | Some f -> Fv.remove f fv0 in
  let e = Typed.Abstraction 
	    { Typed.fun_name = a.fun_name;
	      Typed.fun_iface = iface;
	      Typed.fun_body = body;
	      Typed.fun_typ = t;
	      Typed.fun_fv = fv
	    } in
  exp loc fv e
    
and branches env b = 
  let fv = ref Fv.empty in
  let accept = ref Types.empty in
  let branch (p,e) = 
    let cur_br = !cur_branch in
    cur_branch := [];
    let p' = pat env p in
    let fvp = Patterns.fv p' in
    let env' = enter_values_dummy fvp env in
    let (fv2,e) = expr env' noloc e in
    let br_loc = merge_loc p.loc e.Typed.exp_loc in
    (match Fv.pick (Fv.diff fvp fv2) with
       | None -> ()
       | Some x ->
	   let x = U.to_string (Id.value x) in
	   warning br_loc 
	     ("The capture variable " ^ x ^ 
	      " is declared in the pattern but not used in the body of this branch. It might be a misspelled or undeclared type or name (if it isn't, use _ instead)."));
    let fv2 = Fv.diff fv2 fvp in
    fv := Fv.cup !fv fv2;
    accept := Types.cup !accept (Types.descr (Patterns.accept p'));
    let br = 
      { 
	Typed.br_loc = br_loc;
1143
	Typed.br_used = br_loc == noloc;
1144
	Typed.br_vars_empty = Patterns.fv p';
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
	Typed.br_pat = p';
	Typed.br_body = e } in
    cur_branch := Branch (br, !cur_branch) :: cur_br;
    br in
  let b = List.map branch b in
  (!fv, 
   { 
     Typed.br_typ = Types.empty; 
     Typed.br_branches = b; 
     Typed.br_accept = !accept;
     Typed.br_compiled = None;
   } 
  )
1158

1159
let expr env e = snd (expr env noloc e)
1160

1161
1162
let let_decl env p e =
  { Typed.let_pat = pat env p;
1163
    Typed.let_body = expr env e;
1164
1165
    Typed.let_compiled = None }

1166
1167
1168

(* Hide global "typing/parsing" environment *)

1169

1170
1171
(* III. Type-checks *)

1172
1173
open Typed

1174
1175
1176
1177
1178
1179
let localize loc f x =
  try f x
  with 
    | (Error _ | Constraint (_,_)) as exn -> raise (Location.Location (loc,`Full,exn))
    | Warning (s,t) -> warning loc s; t

1180
1181
let require loc t s = 
  if not (Types.subtype t s) then raise_loc loc (Constraint (t, s))
1182

1183
let verify loc t s = 
1184
1185
  require loc t s; t

1186
1187
1188
1189
let verify_noloc t s =
  if not (Types.subtype t s) then raise (Constraint (t, s));
  t

1190
1191
1192
1193
1194
let check_str loc ofs t s = 
  if not (Types.subtype t s) then raise_loc_str loc ofs (Constraint (t, s));
  t

let should_have loc constr s = 
1195
1196