typer.ml 59.1 KB
Newer Older
1
(* TODO:
2
 - rewrite type-checking of operators to propagate constraint
3
4
 - optimize computation of pattern free variables
 - check whether it is worth using recursive hash-consing internally
5
6
*)

7
8
9
open Location
open Ast
open Ident
10

11
12
13
14
15
16
let (=) (x:int) y = x = y
let (<=) (x:int) y = x <= y
let (<) (x:int) y = x < y
let (>=) (x:int) y = x >= y
let (>) (x:int) y = x > y

17
18
let debug_schema = false

19
let warning loc msg =
20
  let v = Location.get_viewport () in
21
  let ppf = if Html.is_html v then Html.ppf v else Format.err_formatter in
22
23
24
  Format.fprintf ppf "Warning %a:@\n" Location.print_loc (loc,`Full);
  Location.html_hilight (loc,`Full);
  Format.fprintf ppf "%s@." msg
25

26
27
28
29
30
31
32
33
34
exception NonExhaustive of Types.descr
exception Constraint of Types.descr * Types.descr
exception ShouldHave of Types.descr * string
exception ShouldHave2 of Types.descr * string * Types.descr
exception WrongLabel of Types.descr * label
exception UnboundId of id * bool
exception UnboundExtId of Types.CompUnit.t * id
exception Error of string

35
36
37

exception Warning of string * Types.t

38
39
40
41
let raise_loc loc exn = raise (Location (loc,`Full,exn))
let raise_loc_str loc ofs exn = raise (Location (loc,`Char ofs,exn))
let error loc msg = raise_loc loc (Error msg)

42
43
type item =
  | Type of Types.t
44
  | Val of Types.t
45

46
47
module UEnv = Map.Make(U)

48
type t = {
49
  ids : item Env.t;
50
  ns: Ns.table;
51
  cu: Types.CompUnit.t UEnv.t;
52
  schemas: string UEnv.t
53
}
54

55
56
57
58
59
let hash _ = failwith "Typer.hash"
let compare _ _ = failwith "Typer.compare"
let dump ppf _ = failwith "Typer.dump"
let equal _ _ = failwith "Typer.equal"
let check _ = failwith "Typer.check"
60
61

(* TODO: filter out builtin defs ? *)
62
63
64
65
let serialize_item s = function
  | Type t -> Serialize.Put.bits 1 s 0; Types.serialize s t
  | Val t -> Serialize.Put.bits 1 s 1; Types.serialize s t

66
let serialize s env =
67
  Serialize.Put.env Id.serialize serialize_item Env.iter s env.ids;
68
  Ns.serialize_table s env.ns
69

70
71
72
73
74
let deserialize_item s = match Serialize.Get.bits 1 s with
  | 0 -> Type (Types.deserialize s)
  | 1 -> Val (Types.deserialize s)
  | _ -> assert false

75
let deserialize s =
76
  let ids = Serialize.Get.env Id.deserialize deserialize_item Env.add Env.empty s in
77
  let ns = Ns.deserialize_table s in
78
  { ids = ids; ns = ns; cu = UEnv.empty; schemas = UEnv.empty }
79
80


81
82
let empty_env = {
  ids = Env.empty;
83
  ns = Ns.empty_table;
84
  cu = UEnv.empty;
85
  schemas = UEnv.empty
86
87
}

88
89
let from_comp_unit = ref (fun cu -> assert false)

90
let enter_cu x cu env =
91
  { env with cu = UEnv.add x cu env.cu }
92

93
94
95
let find_cu x env =
  try UEnv.find x env.cu
  with Not_found -> Types.CompUnit.mk x
96
97


98
99
100
101
102
103
let enter_schema x uri env =
  { env with schemas = UEnv.add x uri env.schemas }
let find_schema x env =
  try UEnv.find x env.schemas
  with Not_found -> raise (Error (Printf.sprintf "%s: no such schema" (U.get_str x)))

104
105
106
107
108
109
110
111
let enter_type id t env =
  { env with ids = Env.add id (Type t) env.ids }
let enter_types l env =
  { env with ids = 
      List.fold_left (fun accu (id,t) -> Env.add id (Type t) accu) env.ids l }
let find_type id env =
  match Env.find id env.ids with
    | Type t -> t
112
    | Val _ -> raise Not_found
113

114
let find_type_global loc cu id env =
115
  let cu = find_cu cu env in
116
117
118
  let env = !from_comp_unit cu in
  find_type id env

119
let enter_value id t env = 
120
  { env with ids = Env.add id (Val t) env.ids }
121
122
let enter_values l env =
  { env with ids = 
123
      List.fold_left (fun accu (id,t) -> Env.add id (Val t) accu) env.ids l }
124
125
126
let enter_values_dummy l env =
  { env with ids = 
      List.fold_left (fun accu id -> Env.add id (Val Types.empty) accu) env.ids l }
127
128
let find_value id env =
  match Env.find id env.ids with
129
    | Val t -> t
130
    | _ -> raise Not_found
131
132
133
let find_value_global cu id env =
  let env = !from_comp_unit cu in
  find_value id env
134
	
135
136
137
138
139
140
let value_name_ok id env =
  try match Env.find id env.ids with
    | Val t -> true
    | _ -> false
  with Not_found -> true

141
142
143
144
let iter_values env f =
  Env.iter (fun x ->
	      function Val t -> f x t;
		| _ -> ()) env.ids
145

146

147
148
149
150
151
152
153
154
155
let register_types cu env =
  let prefix = U.concat (Types.CompUnit.value cu) (U.mk ":") in
  Env.iter (fun x ->
	      function 
		| Type t ->
		    let n = U.concat prefix (Id.value x) in
		    Types.Print.register_global n t
		| _ -> ()) env.ids

156

157
(* Namespaces *)
158

159
let set_ns_table_for_printer env = 
160
  Ns.InternalPrinter.set_table env.ns
161

162
let get_ns_table tenv = tenv.ns
163

164
let enter_ns p ns env =
165
  { env with ns = Ns.add_prefix p ns env.ns }
166

167
168
169
170
171
let protect_error_ns loc f x =
  try f x
  with Ns.UnknownPrefix ns ->
    raise_loc_generic loc 
    ("Undefined namespace prefix " ^ (U.to_string ns))
172

173
174
175
let qname env loc t = 
  protect_error_ns loc (Ns.map_tag env.ns) t
    
176
let parse_atom env loc t =
177
  Atoms.V.of_qname (qname env loc t)
178
179
 
let parse_ns env loc ns =
180
  protect_error_ns loc (Ns.map_prefix env.ns) ns
181

182
let parse_label env loc t =
183
  let (ns,l) = protect_error_ns loc (Ns.map_attr env.ns) t in
184
  LabelPool.mk (ns,l)
185

186
187
188
189
190
191
192
193
194
195
196
197
198
let parse_record env loc f r =
  let r = List.map (fun (l,x) -> (parse_label env loc l, f x)) r in
  LabelMap.from_list (fun _ _ -> raise_loc_generic loc "Duplicated record field") r

let rec const env loc = function
  | LocatedExpr (loc,e) -> const env loc e
  | Pair (x,y) -> Types.Pair (const env loc x, const env loc y)
  | Xml (x,y) -> Types.Xml (const env loc x, const env loc y)
  | RecordLitt x -> Types.Record (parse_record env loc (const env loc) x)
  | String (i,j,s,c) -> Types.String (i,j,s,const env loc c)
  | Atom t -> Types.Atom (parse_atom env loc t)
  | Integer i -> Types.Integer i
  | Char c -> Types.Char c
199
  | Const c -> c
200
201
202
203
  | _ -> raise_loc_generic loc "This should be a scalar or structured constant"

(* I. Transform the abstract syntax of types and patterns into
      the internal form *)
204

205

206
(* Schema *)
207

208
209
210
let is_registered_schema env s = UEnv.mem s env.schemas

(* uri -> schema binding *)
211
let schemas = State.ref "Typer.schemas" (Hashtbl.create 3)
212
213
214

let schema_types = State.ref "Typer.schema_types" (Hashtbl.create 51)
let schema_elements = State.ref "Typer.schema_elements" (Hashtbl.create 51)
215
let schema_attributes = State.ref "Typer.schema_attributes" (Hashtbl.create 51)
216
217
218
219
let schema_attribute_groups =
  State.ref "Typer.schema_attribute_groups" (Hashtbl.create 51)
let schema_model_groups =
  State.ref "Typer.schema_model_groups" (Hashtbl.create 51)
220

221
222


223
224
  (* raise Not_found *)

225
226
227

let get_schema_fwd = ref (fun _ -> assert false)

228
let find_schema_descr_uri kind uri (name : Ns.qname) =
229
  try
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
    ignore (!get_schema_fwd uri);
    let elt () = Hashtbl.find !schema_elements (uri, name) in
    let typ () = Hashtbl.find !schema_types (uri, name) in
    let att () = Hashtbl.find !schema_attributes (uri, name) in
    let att_group () = Hashtbl.find !schema_attribute_groups (uri, name) in
    let mod_group () = Hashtbl.find !schema_model_groups (uri, name) in
    let rec do_try n = function
      | [] -> raise Not_found
      | f :: rem -> (try f () with Not_found -> do_try n rem)
    in
    match kind with
      | Some `Element -> do_try "element" [ elt ]
      | Some `Type -> do_try "type" [ typ ]
      | Some `Attribute -> do_try "atttribute" [ att ]
      | Some `Attribute_group -> do_try "attribute group" [ att_group ]
      | Some `Model_group -> do_try "model group" [ mod_group ]
      | None ->
          (* policy for unqualified schema component resolution. This order should
           * be consistent with Schema_component.get_component *)
          do_try "component" [ elt; typ; att; att_group; mod_group ]
    with Not_found ->    
251
      raise (Error (Printf.sprintf "No %s named '%s' found in schema '%s'"
252
		      (Schema_common.string_of_component_kind kind) (Ns.QName.to_string name) uri))
253
254
255
256
257

let find_schema_descr env kind schema name =
  let uri = find_schema schema env in
  find_schema_descr_uri kind uri name

258

259
260
(* Eliminate Recursion, propagate Sequence Capture Variables *)

261
262
263
264
265
266
267
268
269
270
271
272
273
274
(* We use two intermediate representation from AST types/patterns
   to internal ones:

      AST -(1)-> derecurs -(2)-> slot -(3)-> internal

   (1) eliminate recursion, schema, 
       propagate sequence capture variables, keep regexps

   (2) stratify, detect ill-formed recursion, compile regexps

   (3) check additional constraints on types / patterns;
       deep (recursive) hash-consing
*)     

275
276
277
278
type derecurs_slot = {
  ploc : Location.loc;
  pid  : int;
  mutable ploop : bool;
279
  mutable pdescr : derecurs;
280
} and derecurs =
281
  | PDummy
282
  | PAlias of derecurs_slot
283
  | PType of Types.descr * int
284
285
286
287
288
289
290
  | POr of derecurs * derecurs
  | PAnd of derecurs * derecurs
  | PDiff of derecurs * derecurs
  | PTimes of derecurs * derecurs
  | PXml of derecurs * derecurs
  | PArrow of derecurs * derecurs
  | POptional of derecurs
291
  | PRecord of bool * (derecurs * derecurs option) label_map
292
293
  | PCapture of id
  | PConstant of id * Types.const
294
  | PRegexp of derecurs_regexp
295
296
297
and derecurs_regexp =
  | PEpsilon
  | PElem of derecurs
298
  | PGuard of derecurs
299
300
301
302
303
  | PSeq of derecurs_regexp * derecurs_regexp
  | PAlt of derecurs_regexp * derecurs_regexp
  | PStar of derecurs_regexp
  | PWeakStar of derecurs_regexp

304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328

let rec print_derecurs ppf = function
  | PDummy -> Format.fprintf ppf "Dummy"
  | PAlias a -> Format.fprintf ppf "Alias %i" a.pid
  | PType _ -> Format.fprintf ppf "Type"
  | POr (r1,r2) -> Format.fprintf ppf "Or(%a,%a)" 
      print_derecurs r1 print_derecurs r2
  | PAnd (r1,r2) -> Format.fprintf ppf "And(%a,%a)" 
      print_derecurs r1 print_derecurs r2
  | PDiff (r1,r2) -> Format.fprintf ppf "Diff(%a,%a)" 
      print_derecurs r1 print_derecurs r2
  | PTimes (r1,r2) -> Format.fprintf ppf "Times(%a,%a)" 
      print_derecurs r1 print_derecurs r2
  | PXml (r1,r2) -> Format.fprintf ppf "Xml(%a,%a)" 
      print_derecurs r1 print_derecurs r2
  | PRegexp r -> Format.fprintf ppf "Regexp(%a)" print_regexp r
  | _ -> Format.fprintf ppf "Other"
and print_regexp ppf = function
  | PEpsilon -> Format.fprintf ppf "e"
  | PElem r -> Format.fprintf ppf "(%a)" print_derecurs r
  | PGuard r -> Format.fprintf ppf "/(%a)" print_derecurs r
  | PSeq (r1,r2) -> Format.fprintf ppf "%a,%a" print_regexp r1 print_regexp r2
  | PAlt (r1,r2) -> Format.fprintf ppf "%a|%a" print_regexp r1 print_regexp r2
  | PStar r | PWeakStar r -> Format.fprintf ppf "%a*" print_regexp r

329
330
type descr = 
  | IDummy
331
  | IType of Types.descr * int
332
333
334
335
336
337
338
  | IOr of descr * descr
  | IAnd of descr * descr
  | IDiff of descr * descr
  | ITimes of slot * slot
  | IXml of slot * slot
  | IArrow of slot * slot
  | IOptional of descr
339
  | IRecord of bool * (slot * descr option) label_map
340
341
342
343
344
345
346
347
  | ICapture of id
  | IConstant of id * Types.const
and slot = {
  mutable fv : fv option;
  mutable hash : int option;
  mutable rank1: int; mutable rank2: int;
  mutable gen1 : int; mutable gen2: int;
  mutable d    : descr;
348
}
349
350
351
352
353
354
355
356
357
358
359
360
361
    

let counter = ref 0
let mk_derecurs_slot loc = 
  incr counter; 
  { ploop = false; ploc = loc; pid = !counter; pdescr = PDummy }
	  
let mk_slot () = 
  { d=IDummy; fv=None; hash=None; rank1=0; rank2=0; gen1=0; gen2=0 } 


(* This environment is used in phase (1) to eliminate recursion *)
type penv = {
362
  penv_tenv : t;
363
364
365
366
  penv_derec : derecurs_slot Env.t;
}

let penv tenv = { penv_tenv = tenv; penv_derec = Env.empty }
367

368
let rec hash_derecurs = function
369
  | PDummy -> assert false
370
371
  | PAlias s -> 
      s.pid
372
373
  | PType (t,hash) -> 
      1 + 17 * hash
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
  | POr (p1,p2) -> 
      2 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PAnd (p1,p2) -> 
      3 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PDiff (p1,p2) -> 
      4 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PTimes (p1,p2) -> 
      5 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PXml (p1,p2) -> 
      6 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | PArrow (p1,p2) -> 
      7 + 17 * (hash_derecurs p1) + 257 * (hash_derecurs p2)
  | POptional p -> 
      8 + 17 * (hash_derecurs p)
  | PRecord (o,r) -> 
389
      (if o then 9 else 10) + 17 * (LabelMap.hash hash_derecurs_field r)
390
391
392
  | PCapture x -> 
      11 + 17 * (Id.hash x)
  | PConstant (x,c) -> 
393
      12 + 17 * (Id.hash x) + 257 * (Types.Const.hash c)
394
395
  | PRegexp p -> 
      13 + 17 * (hash_derecurs_regexp p)
396
397
398
and hash_derecurs_field = function
  | (p, Some e) -> 1 + 17 * hash_derecurs p + 257 * hash_derecurs e
  | (p, None) -> 2 + 17 * hash_derecurs p
399
and hash_derecurs_regexp = function
400
401
402
403
404
405
406
407
408
409
410
411
  | PEpsilon -> 
      1
  | PElem p -> 
      2 + 17 * (hash_derecurs p)
  | PSeq (p1,p2) -> 
      3 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PAlt (p1,p2) -> 
      4 + 17 * (hash_derecurs_regexp p1) + 257 * (hash_derecurs_regexp p2)
  | PStar p -> 
      5 + 17 * (hash_derecurs_regexp p)
  | PWeakStar p -> 
      6 + 17 * (hash_derecurs_regexp p)
412
413
  | PGuard p ->
      7 + 17 * (hash_derecurs p)
414
415

let rec equal_derecurs p1 p2 = (p1 == p2) || match p1,p2 with
416
417
  | PAlias s1, PAlias s2 -> 
      s1 == s2
418
419
  | PType (t1,h1), PType (t2,h2) -> 
      (h1 == h2) && (Types.equal t1 t2)
420
421
422
423
424
  | POr (p1,q1), POr (p2,q2)
  | PAnd (p1,q1), PAnd (p2,q2)
  | PDiff (p1,q1), PDiff (p2,q2)
  | PTimes (p1,q1), PTimes (p2,q2)
  | PXml (p1,q1), PXml (p2,q2)
425
426
427
428
429
  | PArrow (p1,q1), PArrow (p2,q2) -> 
      (equal_derecurs p1 p2) && (equal_derecurs q1 q2)
  | POptional p1, POptional p2 -> 
      equal_derecurs p1 p2
  | PRecord (o1,r1), PRecord (o2,r2) -> 
430
      (o1 == o2) && (LabelMap.equal equal_derecurs_field r1 r2)
431
432
433
  | PCapture x1, PCapture x2 -> 
      Id.equal x1 x2
  | PConstant (x1,c1), PConstant (x2,c2) -> 
434
      (Id.equal x1 x2) && (Types.Const.equal c1 c2)
435
436
  | PRegexp p1, PRegexp p2 -> 
      equal_derecurs_regexp p1 p2
437
  | _ -> false
438
439
440
441
and equal_derecurs_field r1 r2 = match (r1,r2) with
  | (p1,None),(p2,None) -> equal_derecurs p1 p2
  | (p1, Some e1), (p2, Some e2) -> equal_derecurs p1 p2 && equal_derecurs e1 e2
  | _ -> false
442
and equal_derecurs_regexp r1 r2 = match r1,r2 with
443
444
445
446
  | PEpsilon, PEpsilon -> 
      true
  | PElem p1, PElem p2 -> 
      equal_derecurs p1 p2
447
448
  | PGuard p1, PGuard p2 ->
      equal_derecurs p1 p2
449
  | PSeq (p1,q1), PSeq (p2,q2) 
450
451
  | PAlt (p1,q1), PAlt (p2,q2) -> 
      (equal_derecurs_regexp p1 p2) && (equal_derecurs_regexp q1 q2)
452
  | PStar p1, PStar p2
453
454
  | PWeakStar p1, PWeakStar p2 -> 
      equal_derecurs_regexp p1 p2
455
  | _ -> false
456

457
458
459
460
461
462
463
464
module DerecursTable = Hashtbl.Make(
  struct 
    type t = derecurs 
    let hash = hash_derecurs
    let equal = equal_derecurs
  end
)

465
466
467
468
let gen = ref 0
let rank = ref 0
	     
let rec hash_descr = function
469
  | IDummy -> assert false
470
  | IType (t,h) -> h
471
472
473
474
475
476
477
  | IOr (d1,d2) -> 1 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IAnd (d1,d2) -> 2 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IDiff (d1,d2) -> 3 + 17 * (hash_descr d1) + 257 * (hash_descr d2)
  | IOptional d -> 4 + 17 * (hash_descr d)
  | ITimes (s1,s2) -> 5 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IXml (s1,s2) -> 6 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
  | IArrow (s1,s2) -> 7 + 17 * (hash_slot s1) + 257 * (hash_slot s2)
478
  | IRecord (o,r) -> (if o then 8 else 9) + 17 * (LabelMap.hash hash_descr_field r)
479
  | ICapture x -> 10 + 17 * (Id.hash x)
480
  | IConstant (x,y) -> 11 + 17 * (Id.hash x) + 257 * (Types.Const.hash y)
481
482
483
and hash_descr_field = function
  | (d, Some e) -> 1 + 17 * hash_slot d + 257 * hash_descr e
  | (d, None) -> 2 + 17 * hash_slot d
484
485
486
487
488
and hash_slot s =
  if s.gen1 = !gen then 13 * s.rank1
  else (
    incr rank;
    s.rank1 <- !rank; s.gen1 <- !gen;
489
    hash_descr s.d
490
491
492
493
  )
    
let rec equal_descr d1 d2 = 
  match (d1,d2) with
494
  | IType (x1,h1), IType (x2,h2) -> (h1 == h2) && (Types.equal x1 x2)
495
496
497
498
499
500
501
  | IOr (x1,y1), IOr (x2,y2) 
  | IAnd (x1,y1), IAnd (x2,y2) 
  | IDiff (x1,y1), IDiff (x2,y2) -> (equal_descr x1 x2) && (equal_descr y1 y2)
  | IOptional x1, IOptional x2 -> equal_descr x1 x2
  | ITimes (x1,y1), ITimes (x2,y2) 
  | IXml (x1,y1), IXml (x2,y2) 
  | IArrow (x1,y1), IArrow (x2,y2) -> (equal_slot x1 x2) && (equal_slot y1 y2)
502
  | IRecord (o1,r1), IRecord (o2,r2) -> 
503
      (o1 == o2) && (LabelMap.equal equal_descr_field r1 r2)
504
  | ICapture x1, ICapture x2 -> Id.equal x1 x2
505
  | IConstant (x1,y1), IConstant (x2,y2) -> 
506
      (Id.equal x1 x2) && (Types.Const.equal y1 y2)
507
  | _ -> false
508
509
510
511
and equal_descr_field d1 d2 = match (d1,d2) with
  | (d1,None),(d2,None) -> equal_slot d1 d2
  | (d1, Some e1), (d2, Some e2) -> equal_slot d1 d2 && equal_descr e1 e2
  | _ -> false
512
513
514
515
516
517
518
and equal_slot s1 s2 =
  ((s1.gen1 = !gen) && (s2.gen2 = !gen) && (s1.rank1 = s2.rank2))
  ||
  ((s1.gen1 <> !gen) && (s2.gen2 <> !gen) && (
     incr rank;
     s1.rank1 <- !rank; s1.gen1 <- !gen;
     s2.rank2 <- !rank; s2.gen2 <- !gen;
519
     equal_descr s1.d s2.d
520
521
   ))
  
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
module SlotTable = Hashtbl.Make(
  struct
    type t = slot
	
    let hash s =
      match s.hash with
	| Some h -> h
	| None ->
	    incr gen; rank := 0; 
	    let h = hash_slot s in
	    s.hash <- Some h;
	    h
	      
    let equal s1 s2 = 
      (s1 == s2) || 
      (incr gen; rank := 0; 
       let e = equal_slot s1 s2 in
       (*     if e then Printf.eprintf "Recursive hash-consing: Equal\n";  *)
       e)
  end)

543
let ptype t = PType (t, Types.hash t)
544

545
let pempty = ptype Types.empty
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602

let por p1 p2 =
  if p1 == pempty then p2 else
    if p2 == pempty then p1 else
      POr (p1,p2)

let pand p1 p2 =
  if (p1 == pempty) || (p2 == pempty) then pempty else PAnd (p1,p2)

let rec remove_regexp r q = match r with
  | PEpsilon ->
      q
  | PElem p ->
      PTimes (p, q)
  | PGuard p ->
      pand p q
  | PSeq (r1,r2) ->
      remove_regexp r1 (remove_regexp r2 q)
  | PAlt (r1,r2) ->
      por (remove_regexp r1 q) (remove_regexp r2 q)
  | PStar r ->
      let x = mk_derecurs_slot noloc in
      let res = POr (PAlias x, q) in
      x.pdescr <- remove_regexp2 r res pempty;
      res
  | PWeakStar r ->
      let x = mk_derecurs_slot noloc in
      let res = POr (q, PAlias x) in
      x.pdescr <- remove_regexp2 r res pempty;
      res

and remove_regexp2 r q_nonempty q_empty =
  if q_nonempty == q_empty then remove_regexp r q_empty
  else match r with
    | PEpsilon ->
        q_empty
    | PElem p ->
        PTimes (p, q_nonempty)
    | PGuard p ->
	pand p q_empty
    | PSeq (r1,r2) ->
        remove_regexp2 r1
        (remove_regexp2 r2 q_nonempty q_nonempty)
        (remove_regexp2 r2 q_nonempty q_empty)
    | PAlt (r1,r2) ->
        por
        (remove_regexp2 r1 q_nonempty q_empty)
        (remove_regexp2 r2 q_nonempty q_empty)
    | PStar r ->
        let x = mk_derecurs_slot noloc in
        x.pdescr <- remove_regexp2 r (POr (PAlias x, q_nonempty)) pempty;
        por (PAlias x) q_empty
    | PWeakStar r ->
        let x = mk_derecurs_slot noloc in
        x.pdescr <- remove_regexp2 r (POr (q_nonempty, PAlias x)) pempty;
        por q_empty (PAlias x)

603
604
605
606
607
608
let cst_nil = Types.Atom Sequence.nil_atom
let capture_all vars p = IdSet.fold (fun p x -> PAnd (p, PCapture x)) p vars
let termin b vars p = 
  if b then p 
  else IdSet.fold (fun p x -> PSeq (p, PGuard (PConstant (x,cst_nil)))) p vars

609
let rec derecurs env p = match p.descr with
610
  | PatVar v -> derecurs_var env p.loc v
611
  | SchemaVar (kind, schema_name, component_name) ->
612
      let name = qname env.penv_tenv  p.loc component_name in
613
      ptype (find_schema_descr env.penv_tenv kind schema_name name)
614
  | Recurs (p,b) -> derecurs (derecurs_def env b) p
615
616
  | Internal t -> ptype t
  | NsT ns -> ptype (Types.atom (Atoms.any_in_ns (parse_ns env.penv_tenv p.loc ns)))
617
618
619
620
621
622
623
  | Or (p1,p2) -> POr (derecurs env p1, derecurs env p2)
  | And (p1,p2) -> PAnd (derecurs env p1, derecurs env p2)
  | Diff (p1,p2) -> PDiff (derecurs env p1, derecurs env p2)
  | Prod (p1,p2) -> PTimes (derecurs env p1, derecurs env p2)
  | XmlT (p1,p2) -> PXml (derecurs env p1, derecurs env p2)
  | Arrow (p1,p2) -> PArrow (derecurs env p1, derecurs env p2)
  | Optional p -> POptional (derecurs env p)
624
625
626
627
628
  | Record (o,r) -> 
      let aux = function
	| (p,Some e) -> (derecurs env p, Some (derecurs env e))
	| (p,None) -> derecurs env p, None in
      PRecord (o, parse_record env.penv_tenv p.loc aux r)
629
  | Constant (x,c) -> PConstant (x,const env.penv_tenv p.loc c)
630
  | Cst c -> ptype (Types.constant (const env.penv_tenv p.loc c))
631
632
633
  | Regexp r ->
      let r,_ = derecurs_regexp IdSet.empty false IdSet.empty true env r in
      PRegexp r
634
635
636
	(* Note: computing remove_regexp here is slower (because
	   of caching ?) *)

637
and derecurs_regexp vars b rvars f env = function
638
639
640
641
(* - vars: seq variables to be propagated top-down and added
     to each captured element
   - b: below a star ?
   - rvars: seq variables that appear on the right of the regexp
642
   - f: tail position
643
644

  returns the set of seq variable of the regexp minus rvars
645
  (they have already been terminated if not below a star)
646
*)
647
  | Epsilon -> 
648
      PEpsilon, IdSet.empty
649
  | Elem p -> 
650
      PElem (capture_all vars (derecurs env p)), IdSet.empty
651
  | Guard p ->
652
      PGuard (derecurs env p), IdSet.empty
653
  | Seq (p1,p2) -> 
654
655
      let (p2,v2) = derecurs_regexp vars b rvars f env p2 in
      let (p1,v1) = derecurs_regexp vars b (IdSet.cup rvars v2) false env p1 in
656
      PSeq (p1,p2), IdSet.cup v1 v2
657
  | Alt (p1,p2) -> 
658
659
      let (p1,v1) = derecurs_regexp vars b rvars f env p1
      and (p2,v2) = derecurs_regexp vars b rvars f env p2 in
660
661
      PAlt (termin b (IdSet.diff v2 v1) p1, termin b (IdSet.diff v1 v2) p2),
      IdSet.cup v1 v2
662
  | Star p -> 
663
      let (p,v) = derecurs_regexp vars true rvars false env p in
664
      termin b v (PStar p), v
665
  | WeakStar p -> 
666
      let (p,v) = derecurs_regexp vars true rvars false env p in
667
      termin b v (PWeakStar p), v
668
  | SeqCapture (x,p) -> 
669
      let vars = if f then vars else IdSet.add x vars in
670
671
      let after = IdSet.mem rvars x in
      let rvars = IdSet.add x rvars in
672
673
674
675
      let (p,v) = derecurs_regexp vars b rvars false env p in
      (if f 
       then PSeq (PGuard (PCapture x), p) 
       else termin (after || b) (IdSet.singleton x) p), 
676
677
      (if after then v else IdSet.add x v)

678

679
680
681
682
683
684
and derecurs_var env loc v =
  match Ns.split_qname v with
    | "", v ->
	let v = ident v in
	(try PAlias (Env.find v env.penv_derec)
	 with Not_found -> 
685
	   try ptype (find_type v env.penv_tenv)
686
687
688
689
	   with Not_found -> PCapture v)
    | cu, v -> 
	try 
	  let cu = U.mk cu in
690
	  ptype (find_type_global loc cu (ident v) env.penv_tenv)
691
692
693
694
	with Not_found ->
	  raise_loc_generic loc 
	  ("Unbound external type " ^ cu ^ ":" ^ (U.to_string v))

695
696
697
698
699
700
701
702
and derecurs_def env b =
  let b = List.map (fun (v,p) -> (v,p,mk_derecurs_slot p.loc)) b in
  let n = 
    List.fold_left (fun env (v,p,s) -> Env.add v s env) env.penv_derec b in
  let env = { env with penv_derec = n } in
  List.iter (fun (v,p,s) -> s.pdescr <- derecurs env p) b;
  env

703

704
705
706
707
708
let rec fv_slot s =
  match s.fv with
    | Some x -> x
    | None ->
	if s.gen1 = !gen then IdSet.empty 
709
	else (s.gen1 <- !gen; fv_descr s.d)
710
and fv_descr = function
711
  | IDummy -> assert false
712
  | IType _ -> IdSet.empty
713
714
715
716
717
718
719
  | IOr (d1,d2)
  | IAnd (d1,d2)  
  | IDiff (d1,d2) -> IdSet.cup (fv_descr d1) (fv_descr d2)
  | IOptional d -> fv_descr d
  | ITimes (s1,s2)  
  | IXml (s1,s2)  
  | IArrow (s1,s2) -> IdSet.cup (fv_slot s1) (fv_slot s2)
720
  | IRecord (o,r) -> 
721
      List.fold_left IdSet.cup IdSet.empty (LabelMap.map_to_list fv_field r)
722
  | ICapture x | IConstant (x,_) -> IdSet.singleton x
723
724
725
726
and fv_field = function
  | (d,Some e) -> IdSet.cup (fv_slot d) (fv_descr e)
  | (d,None) -> fv_slot d

727

728
729
730
731
732
733
734
735
let compute_fv s =
  match s.fv with
    | Some x -> ()
    | None ->
	incr gen;
	let x = fv_slot s in
	s.fv <- Some x
	  
736
737
738
let check_no_capture loc s =
  match IdSet.pick s with
    | Some x ->  
739
	raise_loc_generic loc ("Capture variable not allowed: " ^ (Ident.to_string x))
740
    | None -> ()
741
    
742
743
let compile_slot_hash = DerecursTable.create 15067
let compile_hash = DerecursTable.create 15067
744

745
746
let todo_defs = ref []
let todo_fv = ref []
747
748

let rec compile p =
749
750
751
752
  real_compile p
(*
  print_char '*'; flush stdout;
  try Stats.InOut.wrap "lookup" (DerecursTable.find compile_hash) p; 
753
  with Not_found ->
754
    Stats.InOut.enter "compile";
755
756
    let c = real_compile p in
    DerecursTable.replace compile_hash p c;
757
    Stats.InOut.leave "compile";
758
    c
759
*)
760
and real_compile = function
761
  | PDummy -> assert false
762
763
764
765
  | PAlias v ->
      if v.ploop then
	raise_loc_generic v.ploc ("Unguarded recursion on type/pattern");
      v.ploop <- true;
766
      let r = compile v.pdescr in
767
768
      v.ploop <- false;
      r
769
  | PType (t,h) -> IType (t,h)
770
771
772
773
774
775
776
  | POr (t1,t2) -> IOr (compile t1, compile t2)
  | PAnd (t1,t2) -> IAnd (compile t1, compile t2)
  | PDiff (t1,t2) -> IDiff (compile t1, compile t2)
  | PTimes (t1,t2) -> ITimes (compile_slot t1, compile_slot t2)
  | PXml (t1,t2) -> IXml (compile_slot t1, compile_slot t2)
  | PArrow (t1,t2) -> IArrow (compile_slot t1, compile_slot t2)
  | POptional t -> IOptional (compile t)
777
  | PRecord (o,r) ->  IRecord (o, LabelMap.map compile_field r)
778
779
  | PConstant (x,v) -> IConstant (x,v)
  | PCapture x -> ICapture x
780
  | PRegexp r -> compile (remove_regexp r (ptype Sequence.nil_type))
781

782
783
784
785
and compile_field = function
  | (p, Some e) -> (compile_slot p, Some (compile e))
  | (p, None) -> (compile_slot p, None)

786
787
and compile_slot p =
  try DerecursTable.find compile_slot_hash p
788
  with Not_found ->
789
790
791
    let s = mk_slot () in
    todo_defs := (s,p) :: !todo_defs;
    todo_fv := s :: !todo_fv;
792
    DerecursTable.add compile_slot_hash p s;
793
    s
794

795
      
796
let timer_fv = Stats.Timer.create "Typer.fv"
797
let rec flush_defs () = 
798
799
800
801
  match !todo_defs with
    | [] -> 
	Stats.Timer.start timer_fv;
	List.iter compute_fv !todo_fv;
802
803
	todo_fv := [];
	Stats.Timer.stop timer_fv ()
804
805
806
    | (s,p)::t ->
(*	Format.fprintf Format.std_formatter "flush slot:%a@."
	  print_derecurs p; *)
807
808
809
	todo_defs := t; 
	s.d <- compile p; 
	flush_defs ()
810
811
812
813
814
	
let typ_nodes = SlotTable.create 67
let pat_nodes = SlotTable.create 67
		  
let rec typ = function
815
  | IType (t,_) -> t
816
817
818
819
820
821
822
  | IOr (s1,s2) -> Types.cup (typ s1) (typ s2)
  | IAnd (s1,s2) ->  Types.cap (typ s1) (typ s2)
  | IDiff (s1,s2) -> Types.diff (typ s1) (typ s2)
  | ITimes (s1,s2) -> Types.times (typ_node s1) (typ_node s2)
  | IXml (s1,s2) -> Types.xml (typ_node s1) (typ_node s2)
  | IArrow (s1,s2) -> Types.arrow (typ_node s1) (typ_node s2)
  | IOptional s -> Types.Record.or_absent (typ s)
823
  | IRecord (o,r) ->  Types.record' (o, LabelMap.map typ_field r)
824
  | IDummy | ICapture _ | IConstant (_,_) -> assert false
825
      
826
827
828
829
830
and typ_field = function
  | (s, None) -> typ_node s
  | (s, Some _) -> 
      raise (Patterns.Error "Or-else clauses are not allowed in types")

831
and typ_node s : Types.Node.t =
832
833
834
835
  try SlotTable.find typ_nodes s
  with Not_found ->
    let x = Types.make () in
    SlotTable.add typ_nodes s x;
836
    Types.define x (typ s.d);
837
838
839
840
841
842
843
844
    x
      
let rec pat d : Patterns.descr =
  if IdSet.is_empty (fv_descr d)
  then Patterns.constr (typ d)
  else pat_aux d
    
and pat_aux = function
845
  | IDummy -> assert false
846
847
848
849
850
851
  | IOr (s1,s2) -> Patterns.cup (pat s1) (pat s2)
  | IAnd (s1,s2) -> Patterns.cap (pat s1) (pat s2)
  | IDiff (s1,s2) when IdSet.is_empty (fv_descr s2) ->
      let s2 = Types.neg (typ s2) in
      Patterns.cap (pat s1) (Patterns.constr s2)
  | IDiff _ ->
852
      raise (Patterns.Error "Differences are not allowed in patterns")
853
854
855
  | ITimes (s1,s2) -> Patterns.times (pat_node s1) (pat_node s2)
  | IXml (s1,s2) -> Patterns.xml (pat_node s1) (pat_node s2)
  | IOptional _ -> 
856
      raise (Patterns.Error "Optional fields are not allowed in record patterns")
857
858
  | IRecord (o,r) ->
      let pats = ref [] in
859
860
861
862
863
864
865
866
867
868
869
870
871
872
      let aux l = function
	| (s,None) ->
	    if IdSet.is_empty (fv_slot s) then typ_node s
	    else
	      ( pats := Patterns.record l (pat_node s) :: !pats;
		Types.any_node )
	| (s,Some e) ->
	    if IdSet.is_empty (fv_slot s) then
	      raise (Patterns.Error "Or-else clauses are not allowed in types")
	    else
	      ( pats := Patterns.cup 
		  (Patterns.record l (pat_node s))
		  (pat e) :: !pats;
		Types.Record.any_or_absent_node )
873
874
875
876
877
878
879
      in
      let constr = Types.record' (o,LabelMap.mapi aux r) in
      List.fold_left Patterns.cap (Patterns.constr constr) !pats
	(* TODO: can avoid constr when o=true, and all fields have fv *)
  | ICapture x -> Patterns.capture x
  | IConstant (x,c) -> Patterns.constant x c
  | IArrow _ ->
880
      raise (Patterns.Error "Arrows are not allowed in patterns")
881
882
883
884
885
886
  | IType _ -> assert false
      
and pat_node s : Patterns.node =
  try SlotTable.find pat_nodes s
  with Not_found ->
    let x = Patterns.make (fv_slot s) in
887
888
    try
      SlotTable.add pat_nodes s x;
889
      Patterns.define x (pat s.d);
890
891
892
      x
    with exn -> SlotTable.remove pat_nodes s; raise exn
      (* For the toplevel ... *)
893

894

895
module Ids = Set.Make(Id)
896
let type_defs env b =
897
898
899
900
901
902
903
904
905
906
  ignore 
    (List.fold_left 
       (fun seen (v,p) ->
	  if Ids.mem v seen then 
	    raise_loc_generic p.loc 
	      ("Multiple definitions for the type identifer " ^ 
	       (Ident.to_string v));
	  Ids.add v seen
       ) Ids.empty b);

907
908
  let penv = derecurs_def (penv env) b in
  let b = List.map (fun (v,p) -> (v,p,compile (derecurs penv p))) b in
909
910
911
912
  flush_defs ();
  let b = 
    List.map 
      (fun (v,p,s) -> 
913
	 check_no_capture p.loc (fv_descr s);
914
915
916
	 let t = typ s in
	 if (p.loc <> noloc) && (Types.is_empty t) then
	   warning p.loc 
917
	     ("This definition yields an empty type for " ^ (Ident.to_string v));
918
	 (v,t)) b in
919
  List.iter (fun (v,t) -> Types.Print.register_global (Id.value v) t) b;
920
  b
921
922


923
924
925
926
927
let dump_types ppf env =
  Env.iter (fun v -> 
	      function 
		  (Type _) -> Format.fprintf ppf " %a" Ident.print v
		| _ -> ()) env.ids
928
929
let dump_type ppf env name =
  try
930
    (match Env.find (Ident.ident name) env.ids with
931
932
    | Type t -> Types.Print.print ppf t
    | _ -> raise Not_found)
933
934
  with Not_found ->
    raise (Error (Printf.sprintf "Type %s not found" (U.get_str name)))
935

936
937
938
let dump_schema_type ppf env (k, s, n) =
  let uri = find_schema s env in
  let descr = find_schema_descr_uri k uri n in
939
  Types.Print.print ppf descr
940

941
let dump_ns ppf env =
942
  Ns.dump_table ppf env.ns
943

944

945
let do_typ loc r = 
946
947
948
949
(*
  DerecursTable.clear compile_slot_hash; 
  DerecursTable.clear compile_hash;
*)
950
  let s = compile_slot r in
951
  flush_defs ();
952
953
  check_no_capture loc (fv_slot s);
  typ_node s
954
   
955
956
let typ env p =
  do_typ p.loc (derecurs (penv env) p)
957
    
958
959
let pat env p = 
  let s = compile_slot (derecurs (penv env) p) in
960
961
962
  flush_defs ();
  try pat_node s
  with Patterns.Error e -> raise_loc_generic p.loc e
963
964
    | Location (loc,_,exn) when loc == noloc -> 
	raise (Location (p.loc, `Full, exn))
965
966


967
968
(* II. Build skeleton *)

969

970
type type_fun = Types.t -> bool -> Types.t
971

972
module Fv = IdSet
973

Pietro Abate's avatar