typer.ml 54.4 KB
Newer Older
1
(* TODO:
2
 - rewrite type-checking of operators to propagate constraint
3
4
 - optimize computation of pattern free variables
 - check whether it is worth using recursive hash-consing internally
5
6
*)

7
8
9
open Location
open Ast
open Ident
10

11
12
13
14
15
16
let (=) (x:int) y = x = y
let (<=) (x:int) y = x <= y
let (<) (x:int) y = x < y
let (>=) (x:int) y = x >= y
let (>) (x:int) y = x > y

17
18
let debug_schema = false

19
let warning loc msg =
20
  let v = Location.get_viewport () in
21
  let ppf = if Html.is_html v then Html.ppf v else Format.err_formatter in
22
23
24
  Format.fprintf ppf "Warning %a:@\n" Location.print_loc (loc,`Full);
  Location.html_hilight (loc,`Full);
  Format.fprintf ppf "%s@." msg
25

26
27
28
29
30
31
32
33
34
exception NonExhaustive of Types.descr
exception Constraint of Types.descr * Types.descr
exception ShouldHave of Types.descr * string
exception ShouldHave2 of Types.descr * string * Types.descr
exception WrongLabel of Types.descr * label
exception UnboundId of id * bool
exception UnboundExtId of Types.CompUnit.t * id
exception Error of string

35
36
37

exception Warning of string * Types.t

38
39
40
41
let raise_loc loc exn = raise (Location (loc,`Full,exn))
let raise_loc_str loc ofs exn = raise (Location (loc,`Char ofs,exn))
let error loc msg = raise_loc loc (Error msg)

42
43
type item =
  | Type of Types.t
44
  | Val of Types.t
45

46
47
module UEnv = Map.Make(U)

48
type t = {
49
  ids : item Env.t;
50
  ns: Ns.table;
51
  cu: Types.CompUnit.t UEnv.t;
52
  schemas: string UEnv.t
53
}
54

55
56
57
58
59
let hash _ = failwith "Typer.hash"
let compare _ _ = failwith "Typer.compare"
let dump ppf _ = failwith "Typer.dump"
let equal _ _ = failwith "Typer.equal"
let check _ = failwith "Typer.check"
60
61

(* TODO: filter out builtin defs ? *)
62
63
64
65
let serialize_item s = function
  | Type t -> Serialize.Put.bits 1 s 0; Types.serialize s t
  | Val t -> Serialize.Put.bits 1 s 1; Types.serialize s t

66
let serialize s env =
67
  Serialize.Put.env Id.serialize serialize_item Env.iter s env.ids;
68
  Ns.serialize_table s env.ns
69

70
71
72
73
74
let deserialize_item s = match Serialize.Get.bits 1 s with
  | 0 -> Type (Types.deserialize s)
  | 1 -> Val (Types.deserialize s)
  | _ -> assert false

75
let deserialize s =
76
  let ids = Serialize.Get.env Id.deserialize deserialize_item Env.add Env.empty s in
77
  let ns = Ns.deserialize_table s in
78
  { ids = ids; ns = ns; cu = UEnv.empty; schemas = UEnv.empty }
79
80


81
82
let empty_env = {
  ids = Env.empty;
83
  ns = Ns.empty_table;
84
  cu = UEnv.empty;
85
  schemas = UEnv.empty
86
87
}

88
89
let from_comp_unit = ref (fun cu -> assert false)

90
let enter_cu x cu env =
91
  { env with cu = UEnv.add x cu env.cu }
92

93
94
95
let find_cu x env =
  try UEnv.find x env.cu
  with Not_found -> Types.CompUnit.mk x
96
97


98
99
100
101
102
103
let enter_schema x uri env =
  { env with schemas = UEnv.add x uri env.schemas }
let find_schema x env =
  try UEnv.find x env.schemas
  with Not_found -> raise (Error (Printf.sprintf "%s: no such schema" (U.get_str x)))

104
105
106
107
108
109
110
111
let enter_type id t env =
  { env with ids = Env.add id (Type t) env.ids }
let enter_types l env =
  { env with ids = 
      List.fold_left (fun accu (id,t) -> Env.add id (Type t) accu) env.ids l }
let find_type id env =
  match Env.find id env.ids with
    | Type t -> t
112
    | Val _ -> raise Not_found
113

114
let find_type_global loc cu id env =
115
  let cu = find_cu cu env in
116
117
118
  let env = !from_comp_unit cu in
  find_type id env

119
let enter_value id t env = 
120
  { env with ids = Env.add id (Val t) env.ids }
121
122
let enter_values l env =
  { env with ids = 
123
      List.fold_left (fun accu (id,t) -> Env.add id (Val t) accu) env.ids l }
124
125
126
let enter_values_dummy l env =
  { env with ids = 
      List.fold_left (fun accu id -> Env.add id (Val Types.empty) accu) env.ids l }
127
128
let find_value id env =
  match Env.find id env.ids with
129
    | Val t -> t
130
    | _ -> raise Not_found
131
132
133
let find_value_global cu id env =
  let env = !from_comp_unit cu in
  find_value id env
134
	
135
136
137
138
139
140
let value_name_ok id env =
  try match Env.find id env.ids with
    | Val t -> true
    | _ -> false
  with Not_found -> true

141
142
143
144
let iter_values env f =
  Env.iter (fun x ->
	      function Val t -> f x t;
		| _ -> ()) env.ids
145

146

147
148
149
150
151
152
153
154
155
let register_types cu env =
  let prefix = U.concat (Types.CompUnit.value cu) (U.mk ":") in
  Env.iter (fun x ->
	      function 
		| Type t ->
		    let n = U.concat prefix (Id.value x) in
		    Types.Print.register_global n t
		| _ -> ()) env.ids

156

157
(* Namespaces *)
158

159
let set_ns_table_for_printer env = 
160
  Ns.InternalPrinter.set_table env.ns
161

162
let get_ns_table tenv = tenv.ns
163

164
let enter_ns p ns env =
165
  { env with ns = Ns.add_prefix p ns env.ns }
166

167
168
169
170
171
let protect_error_ns loc f x =
  try f x
  with Ns.UnknownPrefix ns ->
    raise_loc_generic loc 
    ("Undefined namespace prefix " ^ (U.to_string ns))
172

173
174
175
let qname env loc t = 
  protect_error_ns loc (Ns.map_tag env.ns) t
    
176
let parse_atom env loc t =
177
  Atoms.V.of_qname (qname env loc t)
178
179
 
let parse_ns env loc ns =
180
  protect_error_ns loc (Ns.map_prefix env.ns) ns
181

182
let parse_label env loc t =
183
  let (ns,l) = protect_error_ns loc (Ns.map_attr env.ns) t in
184
  LabelPool.mk (ns,l)
185

186
187
188
189
190
191
192
193
194
195
196
197
198
let parse_record env loc f r =
  let r = List.map (fun (l,x) -> (parse_label env loc l, f x)) r in
  LabelMap.from_list (fun _ _ -> raise_loc_generic loc "Duplicated record field") r

let rec const env loc = function
  | LocatedExpr (loc,e) -> const env loc e
  | Pair (x,y) -> Types.Pair (const env loc x, const env loc y)
  | Xml (x,y) -> Types.Xml (const env loc x, const env loc y)
  | RecordLitt x -> Types.Record (parse_record env loc (const env loc) x)
  | String (i,j,s,c) -> Types.String (i,j,s,const env loc c)
  | Atom t -> Types.Atom (parse_atom env loc t)
  | Integer i -> Types.Integer i
  | Char c -> Types.Char c
199
  | Const c -> c
200
201
202
203
  | _ -> raise_loc_generic loc "This should be a scalar or structured constant"

(* I. Transform the abstract syntax of types and patterns into
      the internal form *)
204

205

206
(* Schema *)
207

208
209
210
let is_registered_schema env s = UEnv.mem s env.schemas

(* uri -> schema binding *)
211
let schemas = State.ref "Typer.schemas" (Hashtbl.create 3)
212
213
214

let schema_types = State.ref "Typer.schema_types" (Hashtbl.create 51)
let schema_elements = State.ref "Typer.schema_elements" (Hashtbl.create 51)
215
let schema_attributes = State.ref "Typer.schema_attributes" (Hashtbl.create 51)
216
217
218
219
let schema_attribute_groups =
  State.ref "Typer.schema_attribute_groups" (Hashtbl.create 51)
let schema_model_groups =
  State.ref "Typer.schema_model_groups" (Hashtbl.create 51)
220

221
222
223

let get_schema_fwd = ref (fun _ -> assert false)

224
let find_schema_descr_uri kind uri (name : Ns.qname) =
225
  try
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
    ignore (!get_schema_fwd uri);
    let elt () = Hashtbl.find !schema_elements (uri, name) in
    let typ () = Hashtbl.find !schema_types (uri, name) in
    let att () = Hashtbl.find !schema_attributes (uri, name) in
    let att_group () = Hashtbl.find !schema_attribute_groups (uri, name) in
    let mod_group () = Hashtbl.find !schema_model_groups (uri, name) in
    let rec do_try n = function
      | [] -> raise Not_found
      | f :: rem -> (try f () with Not_found -> do_try n rem)
    in
    match kind with
      | Some `Element -> do_try "element" [ elt ]
      | Some `Type -> do_try "type" [ typ ]
      | Some `Attribute -> do_try "atttribute" [ att ]
      | Some `Attribute_group -> do_try "attribute group" [ att_group ]
      | Some `Model_group -> do_try "model group" [ mod_group ]
      | None ->
          (* policy for unqualified schema component resolution. This order should
           * be consistent with Schema_component.get_component *)
          do_try "component" [ elt; typ; att; att_group; mod_group ]
    with Not_found ->    
247
      raise (Error (Printf.sprintf "No %s named '%s' found in schema '%s'"
248
		      (Schema_common.string_of_component_kind kind) (Ns.QName.to_string name) uri))
249
250
251
252
253

let find_schema_descr env kind schema name =
  let uri = find_schema schema env in
  find_schema_descr_uri kind uri name

254

255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
module IType = struct
  type node = {
    mutable desc: desc;
    mutable smallhash: int;  (* Local hash *)
    mutable rechash: int;    (* Global (recursive) hash *)
    mutable sid: int;        (* Sequential id used to compute rechash *)
    mutable t: Types.t option;
    mutable tnode: Types.Node.t option;
    mutable p: Patterns.descr option;
    mutable pnode: Patterns.node option;
    mutable fv: fv option
  } 
  and desc =
    | ILink of node
    | IType of Types.descr * int
    | IOr of node * node
    | IAnd of node * node
    | IDiff of node * node
    | ITimes of node * node
    | IXml of node * node
    | IArrow of node * node
    | IOptional of node
    | IRecord of bool * (node * node option) label_map
    | ICapture of id
    | IConstant of id * Types.const

  let rec node_temp = { 
    desc = ILink node_temp;
    smallhash = 0; rechash = 0; sid = 0;
    t = None; tnode = None; p = None; pnode = None;
    fv = None
  }
			
288
(* Recursive hash-consing *)
289

290
291
292
293
294
295
  let hash_field f = function
    | (p, Some e) -> 1 + 17 * f p + 257 * f e
    | (p, None) -> 2 + 17 * f p

  let rec hash f n = match n.desc with
    | ILink n -> hash f n
296
    | IType (t,h) -> 1 + 17 * h
297
298
299
300
301
302
303
304
305
    | IOr (p1,p2) -> 2 + 17 * f p1 + 257 * f p2
    | IAnd (p1,p2) -> 3 + 17 * f p1 + 257 * f p2
    | IDiff (p1,p2) -> 4 + 17 * f p1 + 257 * f p2
    | ITimes (p1,p2) -> 5 + 17 * f p1 + 257 * f p2
    | IXml (p1,p2) -> 6 + 17 * f p1 + 257 * f p2
    | IArrow (p1,p2) -> 7 + 17 * f p1 + 257 * f p2
    | IOptional p -> 8 + 17 * f p
    | IRecord (o,r)->9+(if o then 17 else 0)+
	257*(LabelMap.hash (hash_field f) r)
306
307
308
    | ICapture x -> 10 + 17 * (Id.hash x)
    | IConstant (x,c) -> 11 + 17 * (Id.hash x) + 257*(Types.Const.hash c)

309
310
311
312
313
  let hash0 = hash (fun n -> 1)
  let hash1 = hash hash0
  let hash2 = hash hash1
  let hash3 = hash hash2

314
315
  let smallhash n =
    if n.smallhash !=0 then n.smallhash
316
317
318
319
    else (
      let h = hash2 n in 
      n.smallhash <- h; h
    )
320
321

  let rec repr = function
322
    | { desc = ILink n } as m -> let z = repr n in m.desc <- ILink z; z
323
324
325
326
    | n -> n

  let back = ref []

327
328
329
330
  let rec prot_repr = function
    | { desc = ILink n } -> repr n
    | n -> n

331
332
333
334
335
336
337
338
339
  let link x y = match x,y with
    | { t = None } as x, y 
    | y, ({ t = None } as x) -> back := (x,x.desc) :: !back; x.desc <- ILink y
    | _ -> assert false

  exception Unify

  let rec unify x y =
    if x == y then ()
340
341
342
343
344
    else let x = prot_repr x and y = prot_repr y in if x == y then ()
    else if (smallhash x != smallhash y) then raise Unify 
    else if (x.t != None) && (y.t != None) then raise Unify
      (* x and y have been internalized; if they were equivalent,
	 they would be equal *)
345
    else match x.desc,y.desc with
346
      | IType (tx,_), IType (ty,_) when Types.equal tx ty -> link x y
347
348
349
350
351
      | IOr (x1,x2), IOr (y1,y2)
      | IAnd (x1,x2), IAnd (y1,y2)
      | IDiff (x1,x2), IDiff (y1,y2)
      | ITimes (x1,x2), ITimes (y1,y2)
      | IXml (x1,x2), IXml (y1,y2)
352
353
      | IArrow (x1,x2), IArrow (y1,y2) -> link x y; unify x1 y1; unify x2 y2
      | IOptional x1, IOptional y1 -> link x y; unify x1 y1
354
355
356
357
358
359
360
361
362
363
364
      | IRecord (xo,xr), IRecord (yo,yr) when xo == yo ->
	  link x y; LabelMap.may_collide unify_field Unify xr yr
      | ICapture xv, ICapture yv when Id.equal xv yv -> ()
      | IConstant (xv,xc), IConstant (yv,yc) when
	  Id.equal xv yv && Types.Const.equal xc yc -> ()
      | _ -> raise Unify
  and unify_field f1 f2 = match f1,f2 with
    | (p1, Some e1), (p2, Some e2) -> unify p1 p2; unify e1 e2
    | (p1, None), (p2, None) -> unify p1 p2
    | _ -> raise Unify

365

366
367
  let may_unify x y =
    try unify x y; back := []; true
368
    with Unify ->
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
      List.iter (fun (x,xd) -> x.desc <- xd) !back; back := []; false

  module SmallHash = Hashtbl.Make(
    struct 
      type t = node
      let equal = may_unify
      let hash = smallhash
    end
  )

  let iter_field f = function
    | (x, Some y) -> f x; f y
    | (x, None) -> f x
  let iter f = function
    | IOr (x,y) | IAnd (x,y) | IDiff (x,y)
    | ITimes (x,y) | IXml (x,y) | IArrow (x,y) -> f x; f y
    | IOptional x -> f x
    | IRecord (_,r) -> LabelMap.iter (iter_field f) r
    | _ -> ()

  let minimize ((mem,add) as h) =
    let rec aux n =
      let n = repr n in
392
393
394
395
      if mem n then () else (
	let n = repr n in add n (); 
	if n.t == None then iter aux n.desc
      )
396
397
398
399
400
401
402
    in aux

  let to_clear = ref []
  let sid = ref 0
  let rec rechash n =
    let n = repr n in
    if (n.sid != 0) then 17 * n.sid
403
    else (incr sid; n.sid <- !sid; to_clear := n :: !to_clear; hash rechash n)
404
405

  let clear () =
406
407
    sid := 0; List.iter (fun x -> x.sid <- 0) !to_clear;
    to_clear := []
408
409
410
411
412
413
414
415
416
417
418
419
420
421

  let rechash n =
    let n = repr n in
    if (n.rechash != 0) then n.rechash 
    else (let h = rechash n in clear (); n.rechash <- h; h)

  module RecHash = Hashtbl.Make(
    struct
      type t = node
      let equal = may_unify
      let hash = smallhash
    end
  )

422
423
424

(** Two-phases recursive hash-consing **)
(*
425
426
427
  let gtable = RecHash.create 17577

  let internalize n =
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
    let local = SmallHash.create 17 in
    minimize (SmallHash.mem local, SmallHash.add local) n; 
    minimize (RecHash.mem gtable, RecHash.add gtable) n;
    ()
*)

(** Single-phase hash-consing **)
  let gtable = SmallHash.create 17

  let internalize n =
    minimize (SmallHash.mem gtable, SmallHash.add gtable) n



(*  let internalize n = () *)
443
444
445
446
447
448
449
450
451

(* Compute free variables *)

  let fv n =
    let fv = ref IdSet.empty in
    let rec aux n =
      let n = repr n in
      if (n.sid = 0) then (
	n.sid <- 1;
452
	to_clear := n :: !to_clear; 
453
454
455
456
457
458
459
460
461
462
	match n.fv, n.desc with
	  | Some x, _ -> fv := IdSet.cup !fv x
	  | None, (ICapture x | IConstant (x,_)) -> fv := IdSet.add x !fv
	  | None, d -> iter aux d
      )
    in
    match n.fv with
      | Some x -> x
      | None -> aux n; clear (); n.fv <- Some !fv; !fv

463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
(* optimized version to check closedness *)

  let no_fv = Some IdSet.empty
  let check_no_fv loc n =
    let err x = 
      raise_loc_generic loc 
	("Capture variable not allowed: " ^ (Ident.to_string x))
    in
    let rec aux n =
      let n = repr n in
      if (n.sid = 0) then (
	n.sid <- 1;
	to_clear := n :: !to_clear; 
	match n.fv, n.desc with
	  | Some x, _ -> (match IdSet.pick x with Some x -> err x | None -> ())
	  | None, (ICapture x | IConstant (x,_)) -> err x;
	  | None, d -> iter aux d
      )
    in
    try
      match n.fv with
	| Some x -> (match IdSet.pick x with Some x -> err x | None -> ())
	| None -> aux n; 
	    List.iter (fun n -> n.sid <- 0; n.fv <- no_fv) !to_clear;
	    to_clear := []
    with exn -> clear (); raise exn

(* From the intermediate representation to the internal one *)
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515


  let rec typ n =
    let n = repr n in
    match n.t with
      | Some t -> t
      | None -> let t = compute_typ n.desc in n.t <- Some t; t
  and compute_typ = function
    | IType (t,_) -> t
    | IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
    | IAnd (s1,s2) ->  Types.cap (typ s1) (typ s2)
    | IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
    | ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
    | IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
    | IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
    | IOptional s -> Types.Record.or_absent (typ s)
    | IRecord (o,r) ->  Types.record' (o, LabelMap.map compute_typ_field r)
    | ILink _ -> assert false
    | ICapture _ | IConstant (_,_) -> assert false
  and compute_typ_field = function
    | (s, None) -> typ_node s
    | (s, Some _) -> 
	raise (Patterns.Error "Or-else clauses are not allowed in types")

  and typ_node n =
516
    let n = repr n in
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
    match n.tnode with
      | Some t -> t
      | None ->
	  let x = Types.make () in
	  n.tnode <- Some x;
	  Types.define x (typ n);
	  x
      
  let rec pat n =
    let n = repr n in
    if IdSet.is_empty (fv n)
    then Patterns.constr (typ n)
    else match n.p with
      | Some p -> p
      | None -> let p = compute_pat n.desc in n.p <- Some p; p

  and compute_pat = function
    | IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
    | IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
    | IDiff (s1,s2) when IdSet.is_empty (fv s2) ->
	let s2 = Types.neg (typ s2) in
	Patterns.cap (pat s1) (Patterns.constr s2)
    | IDiff _ ->
	raise (Patterns.Error "Differences are not allowed in patterns")
    | ITimes (s1,s2) -> Patterns.times (pat_node s1) (pat_node s2)
    | IXml (s1,s2) -> Patterns.xml (pat_node s1) (pat_node s2)
    | IOptional _ -> 
	raise (Patterns.Error "Optional fields are not allowed in record patterns")
    | IRecord (o,r) ->
	let pats = ref [] in
	let aux l = function
	  | (s,None) ->
	      if IdSet.is_empty (fv s) then typ_node s
	      else
		( pats := Patterns.record l (pat_node s) :: !pats;
		  Types.any_node )
	  | (s,Some e) ->
	      if IdSet.is_empty (fv s) then
		raise (Patterns.Error "Or-else clauses are not allowed in types")
	      else
		( pats := Patterns.cup 
		    (Patterns.record l (pat_node s))
		    (pat e) :: !pats;
		  Types.Record.any_or_absent_node )
	in
	let constr = Types.record' (o,LabelMap.mapi aux r) in
	List.fold_left Patterns.cap (Patterns.constr constr) !pats
	  (* TODO: can avoid constr when o=true, and all fields have fv *)
    | ICapture x -> Patterns.capture x
    | IConstant (x,c) -> Patterns.constant x c
    | IArrow _ ->
	raise (Patterns.Error "Arrows are not allowed in patterns")
    | IType _ | ILink _ -> assert false
      
  and pat_node n =
572
    let n = repr n in
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
    match n.pnode with
      | Some p -> p
      | None ->
	  let x = Patterns.make (fv n) in
	  try
	    n.pnode <- Some x;
	    Patterns.define x (pat n);
	    x
	  with exn -> n.pnode <- None; raise exn

(* From AST to the intermediate representation *)

  type penv = {
    penv_tenv : t;
    penv_derec : node Env.t;
  }

  let penv tenv = { penv_tenv = tenv; penv_derec = Env.empty }

  let mk d = { node_temp with desc = d }
  let mk_delayed () = { node_temp with desc = ILink node_temp }
  let itype t = mk (IType (t, Types.hash t))
  let iempty = itype Types.empty

  let ior p1 p2 =
598
599
    if p1.desc == iempty.desc then p2 
    else if p2.desc == iempty.desc then p1 
600
601
602
    else mk (IOr (p1,p2))

  let iand p1 p2 =
603
    if (p1.desc == iempty.desc) || (p2.desc == iempty.desc) then iempty 
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
    else mk (IAnd (p1,p2))

  type regexp =
    | PEpsilon
    | PElem of node
    | PGuard of node
    | PSeq of regexp * regexp
    | PAlt of regexp * regexp
    | PStar of regexp
    | PWeakStar of regexp

  let rec remove_regexp r q = match r with
    | PEpsilon ->
	q
    | PElem p ->
	mk (ITimes (p, q))
    | PGuard p ->
	iand p q
    | PSeq (r1,r2) ->
	remove_regexp r1 (remove_regexp r2 q)
    | PAlt (r1,r2) ->
	ior (remove_regexp r1 q) (remove_regexp r2 q)
    | PStar r ->
	let x = mk_delayed () in
	let res = ior x q in
	x.desc <- ILink (remove_regexp2 r res iempty);
	res
    | PWeakStar r ->
	let x = mk_delayed () in
	let res = ior q x in
	x.desc <- ILink (remove_regexp2 r res iempty);
	res
	  
  and remove_regexp2 r q_nonempty q_empty =
    if q_nonempty == q_empty then remove_regexp r q_empty
    else match r with
      | PEpsilon ->
          q_empty
      | PElem p ->
          mk (ITimes (p, q_nonempty))
      | PGuard p ->
	  iand p q_empty
      | PSeq (r1,r2) ->
          remove_regexp2 r1
            (remove_regexp2 r2 q_nonempty q_nonempty)
            (remove_regexp2 r2 q_nonempty q_empty)
      | PAlt (r1,r2) ->
          ior
            (remove_regexp2 r1 q_nonempty q_empty)
            (remove_regexp2 r2 q_nonempty q_empty)
      | PStar r ->
 	  let x = mk_delayed () in
          x.desc <- ILink (remove_regexp2 r (ior x q_nonempty) iempty);
          ior x q_empty
      | PWeakStar r ->
 	  let x = mk_delayed () in
          x.desc <- ILink (remove_regexp2 r (ior q_nonempty x) iempty);
          ior q_empty x


  let cst_nil = Types.Atom Sequence.nil_atom
  let capture_all vars p = 
    IdSet.fold (fun p x -> iand p (mk (ICapture x))) p vars
  let termin b vars p = 
    if b then p 
    else IdSet.fold 
      (fun p x -> PSeq (p, PGuard (mk (IConstant (x,cst_nil))))) p vars

  let rexp r = remove_regexp r (itype Sequence.nil_type)

674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
  let all_delayed = ref []

  let delayed loc =
    let s = mk_delayed () in
    all_delayed := (loc,s) :: !all_delayed;
    s

  let check_one_delayed (loc,p) =
    let rec aux q = if p == q then raise Exit; aux2 q.desc
    and aux2 = function
      | IOr (q1,q2) | IAnd (q1,q2) | IDiff (q1,q2) -> aux q1; aux q2
      | ILink q -> aux q
      | _ -> ()
    in
    try aux2 p.desc
    with Exit -> error loc "Ill-formed recursion"
    
  let check_delayed () =
    let l = !all_delayed in
    all_delayed := []; 
    List.iter check_one_delayed l
    
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
  let rec derecurs env p = match p.descr with
    | PatVar v -> derecurs_var env p.loc v
    | SchemaVar (kind, schema_name, component_name) ->

	let name = qname env.penv_tenv  p.loc component_name in
	itype (find_schema_descr env.penv_tenv kind schema_name name)

    | Recurs (p,b) -> derecurs (derecurs_def env b) p
    | Internal t -> itype t
    | NsT ns -> 
	itype (Types.atom (Atoms.any_in_ns (parse_ns env.penv_tenv p.loc ns)))
    | Or (p1,p2) -> mk (IOr (derecurs env p1, derecurs env p2))
    | And (p1,p2) -> mk (IAnd (derecurs env p1, derecurs env p2))
    | Diff (p1,p2) -> mk (IDiff (derecurs env p1, derecurs env p2))
    | Prod (p1,p2) -> mk (ITimes (derecurs env p1, derecurs env p2))
    | XmlT (p1,p2) -> mk (IXml (derecurs env p1, derecurs env p2))
    | Arrow (p1,p2) -> mk (IArrow (derecurs env p1, derecurs env p2))
    | Optional p -> mk (IOptional (derecurs env p))
    | Record (o,r) -> 
	let aux = function
	  | (p,Some e) -> (derecurs env p, Some (derecurs env e))
	  | (p,None) -> derecurs env p, None in
	mk (IRecord (o, parse_record env.penv_tenv p.loc aux r))
    | Constant (x,c) -> mk (IConstant (x,const env.penv_tenv p.loc c))
    | Cst c -> itype (Types.constant (const env.penv_tenv p.loc c))
    | Regexp r ->
	let r,_ = derecurs_regexp IdSet.empty false IdSet.empty true env r in
	rexp r
	  
  and derecurs_regexp vars b rvars f env = function
      (* - vars: seq variables to be propagated top-down and added
	 to each captured element
	 - b: below a star ?
	 - rvars: seq variables that appear on the right of the regexp
	 - f: tail position
	 
	 returns the set of seq variable of the regexp minus rvars
	 (they have already been terminated if not below a star)
      *)
    | Epsilon -> 
	PEpsilon, IdSet.empty
    | Elem p -> 
	PElem (capture_all vars (derecurs env p)), IdSet.empty
    | Guard p ->
	PGuard (derecurs env p), IdSet.empty
    | Seq (p1,p2) -> 
	let (p2,v2) = derecurs_regexp vars b rvars f env p2 in
	let (p1,v1) = derecurs_regexp vars b (IdSet.cup rvars v2) false env p1 in
	PSeq (p1,p2), IdSet.cup v1 v2
    | Alt (p1,p2) -> 
	let (p1,v1) = derecurs_regexp vars b rvars f env p1
	and (p2,v2) = derecurs_regexp vars b rvars f env p2 in
	PAlt (termin b (IdSet.diff v2 v1) p1, termin b (IdSet.diff v1 v2) p2),
	IdSet.cup v1 v2
    | Star p -> 
	let (p,v) = derecurs_regexp vars true rvars false env p in
	termin b v (PStar p), v
    | WeakStar p -> 
	let (p,v) = derecurs_regexp vars true rvars false env p in
	termin b v (PWeakStar p), v
    | SeqCapture (x,p) -> 
	let vars = if f then vars else IdSet.add x vars in
	let after = IdSet.mem rvars x in
	let rvars = IdSet.add x rvars in
	let (p,v) = derecurs_regexp vars b rvars false env p in
	(if f 
	 then PSeq (PGuard (mk (ICapture x)), p) 
	 else termin (after || b) (IdSet.singleton x) p), 
	(if after then v else IdSet.add x v)
	  
	  
  and derecurs_var env loc v =
    match Ns.split_qname v with
      | "", v ->
	  let v = ident v in
	  (try Env.find v env.penv_derec
	   with Not_found -> 
	     try itype (find_type v env.penv_tenv)
	     with Not_found -> mk (ICapture v))
      | cu, v -> 
	  try 
	    let cu = U.mk cu in
	    itype (find_type_global loc cu (ident v) env.penv_tenv)
	  with Not_found ->
	    raise_loc_generic loc 
	      ("Unbound external type " ^ cu ^ ":" ^ (U.to_string v))
	      
  and derecurs_def env b =
784
    let b = List.map (fun (v,p) -> (v,p,delayed p.loc)) b in
785
786
787
788
789
790
    let n = 
      List.fold_left (fun env (v,p,s) -> Env.add v s env) env.penv_derec b in
    let env = { env with penv_derec = n } in
    List.iter (fun (v,p,s) -> s.desc <- ILink (derecurs env p)) b;
    env

791
792
793
794
795
  let derec penv p =
    let d = derecurs penv p in
    check_delayed ();
    internalize d;
    d
796
797


798
(* API *)
799
800
801
802
803
804
805
806
807
808
809
810
811
812

  module Ids = Set.Make(Id)
  let type_defs env b =
    ignore 
      (List.fold_left 
	 (fun seen (v,p) ->
	    if Ids.mem v seen then 
	      raise_loc_generic p.loc 
		("Multiple definitions for the type identifer " ^ 
		   (Ident.to_string v));
	    Ids.add v seen
	 ) Ids.empty b);
    
    let penv = derecurs_def (penv env) b in
813
814
815
816
817
818
    let aux t =
      let d = derec penv t in
      check_no_fv t.loc d;
      try typ d
      with Patterns.Error s -> raise_loc_generic t.loc s
    in
819
820
    let b = 
      List.map 
821
822
	(fun (v,p) ->
	   let t = aux p in
823
824
825
826
827
828
829
	   if (p.loc <> noloc) && (Types.is_empty t) then
	     warning p.loc 
	       ("This definition yields an empty type for " ^ (Ident.to_string v));
	   (v,t)) b in
    List.iter (fun (v,t) -> Types.Print.register_global (Id.value v) t) b;
    b

830

831
832
833
  let typ_descr d =
    internalize d;
    typ d
834

835
836
837
838
839
840
841
842
843
844
845
  let typ env t = 
    let d = derec (penv env) t in
    check_no_fv t.loc d;
    try typ_node d
    with Patterns.Error s -> raise_loc_generic t.loc s

  let pat env t = 
    let d = derec (penv env) t in
    try pat_node d
    with Patterns.Error s -> raise_loc_generic t.loc s
end
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864

let typ = IType.typ
let pat = IType.pat
let type_defs = IType.type_defs

let dump_types ppf env =
  Env.iter (fun v -> 
	      function 
		  (Type _) -> Format.fprintf ppf " %a" Ident.print v
		| _ -> ()) env.ids
let dump_type ppf env name =
  try
    (match Env.find (Ident.ident name) env.ids with
    | Type t -> Types.Print.print ppf t
    | _ -> raise Not_found)
  with Not_found ->
    raise (Error (Printf.sprintf "Type %s not found" (U.get_str name)))

let dump_schema_type ppf env (k, s, n) =
865
  let name = qname env noloc n in
866
  let uri = find_schema s env in
867
  let descr = find_schema_descr_uri k uri name in
868
869
870
871
872
873
874
  Types.Print.print ppf descr

let dump_ns ppf env =
  Ns.dump_table ppf env.ns



875

876
877
(* II. Build skeleton *)

878

879
type type_fun = Types.t -> bool -> Types.t
880

881
module Fv = IdSet
882

883
884
885
type branch = Branch of Typed.branch * branch list

let cur_branch : branch list ref = ref []
886

887
let exp loc fv e =
888
889
  fv,
  { Typed.exp_loc = loc;
890
    Typed.exp_typ = Types.empty;
891
    Typed.exp_descr = e;
892
  }
893

894
let ops = Hashtbl.create 13
895
896
let register_op op arity f = Hashtbl.add ops op (arity,f)
let typ_op op = snd (Hashtbl.find ops op)
897

898
899
900
901
902
let is_op env s = 
  if (Env.mem (ident s) env.ids) then None
  else 
    try let s = U.get_str s in Some (s, fst (Hashtbl.find ops s))
    with Not_found -> None
903

904
905
let rec expr env loc = function
  | LocatedExpr (loc,e) -> expr env loc e
906
  | Forget (e,t) ->
907
      let (fv,e) = expr env loc e and t = typ env t in
908
      exp loc fv (Typed.Forget (e,t))
909
910
  | Check (e,t) ->
      let (fv,e) = expr env loc e and t = typ env t in
911
      exp loc fv (Typed.Check (ref Types.empty,e,t))
912
  | Var s -> var env loc s
913
  | Apply (e1,e2) -> 
914
915
916
917
918
919
920
921
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
      let fv = Fv.cup fv1 fv2 in
      (match e1.Typed.exp_descr with
	 | Typed.Op (op,arity,args) when arity > 0 -> 
	     exp loc fv (Typed.Op (op,arity - 1,args @ [e2]))
	 | _ ->
	     exp loc fv (Typed.Apply (e1,e2)))
  | Abstraction a -> abstraction env loc a
922
  | (Integer _ | Char _ | Atom _ | Const _) as c -> 
923
      exp loc Fv.empty (Typed.Cst (const env loc c))
924
  | Pair (e1,e2) ->
925
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
926
927
      exp loc (Fv.cup fv1 fv2) (Typed.Pair (e1,e2))
  | Xml (e1,e2) ->
928
      let (fv1,e1) = expr env loc e1 and (fv2,e2) = expr env loc e2 in
929
930
      exp loc (Fv.cup fv1 fv2) (Typed.Xml (e1,e2))
  | Dot (e,l) ->
931
932
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.Dot (e,parse_label env loc l))
933
  | RemoveField (e,l) ->
934
935
      let (fv,e) = expr env loc e in
      exp loc fv (Typed.RemoveField (e,parse_label env loc l))
936
937
  | RecordLitt r -> 
      let fv = ref Fv.empty in
938
      let r = parse_record env loc
939
		(fun e -> 
940
		   let (fv2,e) = expr env loc e 
941
942
943
		   in fv := Fv.cup !fv fv2; e)
		r in
      exp loc !fv (Typed.RecordLitt r)
944
  | String (i,j,s,e) ->
945
      let (fv,e) = expr env loc e in
946
      exp loc fv (Typed.String (i,j,s,e))
947
  | Match (e,b) -> 
948
949
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
950
      exp loc (Fv.cup fv1 fv2) (Typed.Match (e, b))
951
  | Map (e,b) ->
952
953
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
954
955
      exp loc (Fv.cup fv1 fv2) (Typed.Map (e, b))
  | Transform (e,b) ->
956
957
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
958
      exp loc (Fv.cup fv1 fv2) (Typed.Transform (e, b))
959
  | Xtrans (e,b) ->
960
961
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
962
      exp loc (Fv.cup fv1 fv2) (Typed.Xtrans (e, b))
963
  | Validate (e,kind,schema,elt) ->
964
      let (fv,e) = expr env loc e in
965
      let uri = find_schema schema env in
966
      exp loc fv (Typed.Validate (e, kind, uri, qname env loc elt))
967
  | Try (e,b) ->
968
969
      let (fv1,e) = expr env loc e
      and (fv2,b) = branches env b in
970
      exp loc (Fv.cup fv1 fv2) (Typed.Try (e, b))
971
  | NamespaceIn (pr,ns,e) ->
972
973
      let env = enter_ns pr ns env in
      expr env loc e
974
  | Ref (e,t) ->
975
      let (fv,e) = expr env loc e and t = typ env t in
976
      exp loc fv (Typed.Ref (e,t))
977
  | External (s,args) ->
978
      extern loc env s args
979
980
981
982
983
984
985
986
987
988
	
and extern loc env s args = 
  let args = List.map (typ env) args in
  try
    let (i,t) = Externals.resolve s args in
    exp loc Fv.empty (Typed.External (t,i))
  with exn -> raise_loc loc exn
    
and var env loc s =
  match is_op env s with
989
    | Some (s,arity) -> 
990
991
	let need_ns = match s with "print_xml" | "print_xml_utf8" -> true
	  | _ -> false in
992
993
994
	let e = Typed.Op (s, arity, []) in
	let e = if need_ns then Typed.NsTable (env.ns,e) else e in
	exp loc Fv.empty e
995
996
997
998
999
1000
1001
1002
1003
1004
    | None ->
	match Ns.split_qname s with
	  | "", id -> 
	      let s = U.get_str id in
	      if String.contains s '.' then
		extern loc env s []
	      else
		let id = ident id in
		(try ignore (find_value id env)
		 with Not_found -> raise_loc loc (UnboundId (id, Env.mem id env.ids)));
1005
	  exp loc (Fv.singleton id) (Typed.Var id)
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
	  | cu, id -> 
	      let cu = find_cu (U.mk cu) env in
	      let id = ident id in
	      let t =
		try find_value_global cu id env
		with Not_found ->
		  raise_loc loc (UnboundExtId (cu,id) ) in
	      exp loc Fv.empty (Typed.ExtVar (cu, id, t))

and abstraction env loc a =
  let iface = 
    List.map 
      (fun (t1,t2) -> (typ env t1, typ env t2)) a.fun_iface in
  let t = 
    List.fold_left 
      (fun accu (t1,t2) -> Types.cap accu (Types.arrow t1 t2)) 
      Types.any iface in
  let iface = 
    List.map 
      (fun (t1,t2) -> (Types.descr t1, Types.descr t2)) 
      iface in
  let env' = 
    match a.fun_name with 
      | None -> env
      | Some f -> enter_values_dummy [ f ] env
  in
  let (fv0,body) = branches env' a.fun_body in
  let fv = match a.fun_name with
    | None -> fv0
    | Some f -> Fv.remove f fv0 in
  let e = Typed.Abstraction 
	    { Typed.fun_name = a.fun_name;
	      Typed.fun_iface = iface;
	      Typed.fun_body = body;
	      Typed.fun_typ = t;
	      Typed.fun_fv = fv
	    } in
  exp loc fv e
    
and branches env b = 
  let fv = ref Fv.empty in
  let accept = ref Types.empty in
  let branch (p,e) = 
    let cur_br = !cur_branch in
    cur_branch := [];
    let p' = pat env p in
    let fvp = Patterns.fv p' in
    let env' = enter_values_dummy fvp env in
    let (fv2,e) = expr env' noloc e in
    let br_loc = merge_loc p.loc e.Typed.exp_loc in
    (match Fv.pick (Fv.diff fvp fv2) with
       | None -> ()
       | Some x ->
	   let x = U.to_string (Id.value x) in
	   warning br_loc 
	     ("The capture variable " ^ x ^ 
	      " is declared in the pattern but not used in the body of this branch. It might be a misspelled or undeclared type or name (if it isn't, use _ instead)."));
    let fv2 = Fv.diff fv2 fvp in
    fv := Fv.cup !fv fv2;
    accept := Types.cup !accept (Types.descr (Patterns.accept p'));
    let br = 
      { 
	Typed.br_loc = br_loc;
1069
	Typed.br_used = br_loc == noloc;
1070
	Typed.br_vars_empty = Patterns.fv p';
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
	Typed.br_pat = p';
	Typed.br_body = e } in
    cur_branch := Branch (br, !cur_branch) :: cur_br;
    br in
  let b = List.map branch b in
  (!fv, 
   { 
     Typed.br_typ = Types.empty; 
     Typed.br_branches = b; 
     Typed.br_accept = !accept;
     Typed.br_compiled = None;
   } 
  )
1084

1085
let expr env e = snd (expr env noloc e)
1086

1087
1088
let let_decl env p e =
  { Typed.let_pat = pat env p;
1089
    Typed.let_body = expr env e;
1090
1091
    Typed.let_compiled = None }

1092
1093
1094

(* Hide global "typing/parsing" environment *)

1095

1096
1097
(* III. Type-checks *)

1098
1099
open Typed

1100
1101
1102
1103
1104
1105
let localize loc f x =
  try f x
  with 
    | (Error _ | Constraint (_,_)) as exn -> raise (Location.Location (loc,`Full,exn))
    | Warning (s,t) -> warning loc s; t

1106
1107
let require loc t s = 
  if not (Types.subtype t s) then raise_loc loc (Constraint (t, s))
1108

1109
let verify loc t s = 
1110
1111
  require loc t s; t

1112
1113
1114
1115
let verify_noloc t s =
  if not (Types.subtype t s) then raise (Constraint (t, s));
  t

1116
1117
1118
1119
1120
let check_str loc ofs t s = 
  if not (Types.subtype t s) then raise_loc_str loc ofs (Constraint (t, s));
  t

let should_have loc constr s = 
1121
1122
  raise_loc loc (ShouldHave (constr,s))

1123
1124
1125
let should_have_str loc ofs constr s = 
  raise_loc_str loc ofs (ShouldHave (constr,s))

1126
let flatten arg constr precise =
1127
1128
1129
1130
1131
1132
1133
1134
1135
  let constr' = Sequence.star 
		  (Sequence.approx (Types.cap Sequence.any constr)) in
  let sconstr' = Sequence.star constr' in
  let exact = Types.subtype constr' constr in
  if exact then
    let t = arg sconstr' precise in
    if precise then Sequence.flatten t else constr
  else
    let t = arg sconstr' true in
1136
    verify_noloc (Sequence.flatten t) constr
1137

1138
1139
let rec type_check env e constr precise = 
  let d = type_check' e.exp_loc env e.exp_descr constr precise in
1140
  let d = if precise then d else constr in
1141
1142
1143
  e.exp_typ <- Types.cup e.exp_typ d;
  d

1144
and type_check' loc env e constr precise = match e with
1145
1146
1147
  | Forget (e,t) ->
      let t = Types.descr t in
      ignore (type_check env e t false);
1148
      verify loc t constr
1149

1150
1151
1152
  | Check (t0,e,t) ->
      let te = type_check env e Types.any true in
      t0 := Types.cup !t0 te;
1153
      verify loc (Types.cap te (Types.descr t)) constr
1154

1155
  | Abstraction a ->
1156
1157
1158
      let t =
	try Types.Arrow.check_strenghten a.fun_typ constr 
	with Not_found -> 
1159
1160
	  should_have loc constr
	    "but the interface of the abstraction is not compatible"
1161
      in
1162
1163
      let env = match a.fun_name with
	| None -> env
1164
	| Some f -> enter_value f a.fun_typ env in
1165
1166
      List.iter 
	(fun (t1,t2) ->
1167
1168
1169
	   let acc = a.fun_body.br_accept in 
	   if not (Types.subtype t1 acc) then
	     raise_loc loc (NonExhaustive (Types.diff t1 acc));
1170
	   ignore (type_check_branches loc env t1 a.fun_body t2 false)
1171
1172
	) a.fun_iface;
      t
1173

1174
1175
  | Match (e,b) ->
      let t = type_check env e b.br_accept true in
1176
      type_check_branches loc env t b constr precise
1177
1178
1179

  | Try (e,b) ->
      let te = type_check env e constr precise in
1180
      let tb = type_check_branches loc env Types.any b constr precise in
1181
      Types.cup te tb
1182

1183
1184
  | Pair (e1,e2) ->
      type_check_pair loc env e1 e2 constr precise
1185

1186
1187
  | Xml (e1,e2) ->
      type_check_pair ~kind:`XML loc env e1 e2 constr precise
1188

1189
  | RecordLitt r ->
Pietro Abate's avatar