atoms.ml 10.8 KB
Newer Older
1
open Encodings
2

3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
module Ns = struct
  include Pool.Make(Utf8)

  let prefixes_to_ns = State.ref "Ns.prefixes" (Hashtbl.create 63)
  let ns_to_prefixes = State.ref "Ns.prefixes" (Hashtbl.create 63)

  let register_prefix p ns =
    if (Hashtbl.mem !prefixes_to_ns p) || 
       (Hashtbl.mem !ns_to_prefixes ns) 
    then ()
    else ( Hashtbl.add !ns_to_prefixes ns p;
	   Hashtbl.add !prefixes_to_ns p ns )

  let counter = State.ref "Ns.prefixes" 0

  let rec fresh_prefix () =
    incr counter;
    let s = Utf8.mk (Printf.sprintf "ns%i" !counter) in
    if (Hashtbl.mem !prefixes_to_ns s) then fresh_prefix () else s

  let prefix ns =
    try Hashtbl.find !ns_to_prefixes ns
    with Not_found ->
      let p = fresh_prefix () in
      register_prefix p ns;
      p

  let dump_prefix_table ppf =
    Hashtbl.iter
      (fun ns p ->
	 Format.fprintf ppf "%a=>%a@." Utf8.print p Utf8.print (value ns))
      !ns_to_prefixes

  let empty = mk (Utf8.mk "")
  let _ = register_prefix (Utf8.mk "") empty
end

module Symbol = Pool.Make(Utf8)

type v = Ns.t * Symbol.t
let vcompare (ns1,x1) (ns2,x2) =
  let c = Ns.compare ns1 ns2 in if c <> 0 then c else Symbol.compare x1 x2
let vhash (ns,x) = ns + 17 * x

let atom_table = Hashtbl.create 63


(* Hash-consing: only to reduce memory usage *)
let mk ns x =
  let a = (ns, x) in
  try Hashtbl.find atom_table a 
  with Not_found ->
    let b = (ns, Symbol.mk x) in
    Hashtbl.add atom_table a b;
    b

let mk_ascii s = mk Ns.empty (Utf8.mk s)

let value (ns,x) = 
  assert (ns == Ns.empty);
  Symbol.value x
  (* get rid of this function *)

let print_prefix ppf ns =
  if ns == Ns.empty then () else
    Format.fprintf ppf "%a:" Utf8.print (Ns.prefix ns)

let print_symbol ppf x =
  Utf8.print ppf (Symbol.value x)

let vprint ppf ((ns,x) : v) = 
  Format.fprintf ppf "%a%a" print_prefix ns print_symbol x

let print_any_in_ns ppf ns =
  let ns = Ns.prefix ns in
  if Utf8.get_str ns = "" then Format.fprintf ppf ".:*"
  else Format.fprintf ppf "%a:*" Utf8.print ns

let print_v ppf a = 
  Format.fprintf ppf "`%a" vprint a

module SymbolSet = struct
  module SList = SortedList.Make_transp(SortedList.Lift(Symbol))
  type t = Finite of unit SList.t | Cofinite of unit SList.t

  let hash = function
    | Finite l -> SList.hash l
    | Cofinite l -> 17 * SList.hash l + 1

  let compare l1 l2 =
    match (l1,l2) with
      | Finite l1, Finite l2 
      | Cofinite l1, Cofinite l2 -> SList.compare l1 l2
      | Finite _, Cofinite _ -> -1
      | _ -> 1

  let empty = Finite []
  let any = Cofinite []
  let atom x = Finite [x]

  let cup s t =
    match (s,t) with
      | (Finite s, Finite t) -> Finite (SList.cup s t)
      | (Finite s, Cofinite t) -> Cofinite (SList.diff t s)
      | (Cofinite s, Finite t) -> Cofinite (SList.diff s t)
      | (Cofinite s, Cofinite t) -> Cofinite (SList.cap s t)

  let cap s t =
    match (s,t) with
      | (Finite s, Finite t) -> Finite (SList.cap s t)
      | (Finite s, Cofinite t) -> Finite (SList.diff s t)
      | (Cofinite s, Finite t) -> Finite (SList.diff t s)
      | (Cofinite s, Cofinite t) -> Cofinite (SList.cup s t)

  let diff s t =
    match (s,t) with
      | (Finite s, Cofinite t) -> Finite (SList.cap s t)
      | (Finite s, Finite t) -> Finite (SList.diff s t)
      | (Cofinite s, Cofinite t) -> Finite (SList.diff t s)
      | (Cofinite s, Finite t) -> Cofinite (SList.cup s t)

  let neg = function
      | Finite s -> Cofinite s
      | Cofinite s -> Finite s
	
  let contains x = function
    | Finite s -> SList.mem s x
    | Cofinite s -> not (SList.mem s x)
	
  let disjoint s t =
    match (s,t) with
      | (Finite s, Finite t) -> SList.disjoint s t
      | (Finite s, Cofinite t) -> SList.subset s t
      | (Cofinite s, Finite t) -> SList.subset t s
      | (Cofinite s, Cofinite t) -> false

  let rec iter_sep sep f = function
    | [] -> ()
    | [ h ] -> f h
    | h :: t -> f h; sep (); iter_sep sep f t

(* Atom 
   bla:*   bla:x
   :* :x *)

  let print ns ppf = function
    | Finite l -> 
	iter_sep 
	  (fun () -> Format.fprintf ppf " |@ ") 
	  (fun x -> print_v ppf (ns,x)) l
    | Cofinite t ->
	Format.fprintf ppf "@[`%a" print_any_in_ns ns;
	List.iter (fun x -> Format.fprintf ppf " \@ %a" print_v (ns,x)) t;
	Format.fprintf ppf "@]"
end

module T0 = SortedList.Make_transp(SortedList.Lift(Ns))
module T = T0.Map
type t = Finite of (unit,SymbolSet.t) T.map | Cofinite of (unit,SymbolSet.t) T.map
162

163
164
165
let empty = Finite T.empty
let any   = Cofinite T.empty
let any_in_ns ns = Finite (T.singleton ns SymbolSet.any)
166

167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
let finite l =
  let l = 
    T.filter 
      (fun _ x -> match x with SymbolSet.Finite [] -> false | _ -> true)
      l in
  Finite l

let cofinite l =
  let l = 
    T.filter 
      (fun _ x -> match x with SymbolSet.Cofinite [] -> false | _ -> true)
      l in
  Cofinite l
  

let atom (ns,x) = Finite (T.singleton ns (SymbolSet.atom x))
183
184
185

let cup s t =
  match (s,t) with
186
187
188
189
    | (Finite s, Finite t) -> finite (T.merge SymbolSet.cup s t)
    | (Finite s, Cofinite t) -> cofinite (T.sub SymbolSet.diff t s)
    | (Cofinite s, Finite t) -> cofinite (T.sub SymbolSet.diff s t)
    | (Cofinite s, Cofinite t) -> cofinite (T.cap SymbolSet.cap s t)
190
191
192

let cap s t =
  match (s,t) with
193
194
195
196
    | (Finite s, Finite t) -> finite (T.cap SymbolSet.cap s t)
    | (Finite s, Cofinite t) -> finite (T.sub SymbolSet.diff s t)
    | (Cofinite s, Finite t) -> finite (T.sub SymbolSet.diff t s)
    | (Cofinite s, Cofinite t) -> cofinite (T.merge SymbolSet.cup s t)
197
	
198
let diff s t =
199
  match (s,t) with
200
201
202
203
    | (Finite s, Cofinite t) -> finite (T.cap SymbolSet.cap s t)
    | (Finite s, Finite t) -> finite (T.sub SymbolSet.diff s t)
    | (Cofinite s, Cofinite t) -> finite (T.sub SymbolSet.diff t s)
    | (Cofinite s, Finite t) -> cofinite (T.merge SymbolSet.cup s t)
204

205
let is_empty = function
206
  | Finite l -> T.is_empty l
207
  | _ -> false
208

209
210
211
212
213
214
215
216
217
218
219
220
221
let print_tag = function
  | Finite l ->
      (match T.get l with 
	| [ns, SymbolSet.Finite [a]] -> 
	    Some (fun ppf -> vprint ppf (ns,a))
	| [ns, SymbolSet.Cofinite []] -> 
	    Some (fun ppf -> Format.fprintf ppf "%a" print_any_in_ns ns)
	| _ -> None)
  | Cofinite l ->
      (match T.get l with
	 | [] -> 
	     Some (fun ppf -> Format.fprintf ppf "_")
	 | _ -> None)
222

223
224
225
226
227
228
229
230
231
232
233
234
235
236
let symbol_set ns = function
  | Finite s ->
      (try T.assoc ns s with Not_found -> SymbolSet.empty)
  | Cofinite s ->
      (try SymbolSet.neg (T.assoc ns s) with Not_found -> SymbolSet.any)

let contains (ns,x) = function
  | Finite s -> 
      (try SymbolSet.contains x (T.assoc ns s) with Not_found -> false)
  | Cofinite s -> 
      (try not (SymbolSet.contains x (T.assoc ns s)) with Not_found -> true)
	
let disjoint s t = 
  is_empty (cap t s) (* TODO: OPT *)
237

238
let print = function
239
240
241
242
243
244
245
246
247
248
249
250
  | Finite l -> 
      List.map (fun (ns,s) ppf -> SymbolSet.print ns ppf s) (T.get l)
  | Cofinite l -> 
      match T.get l with
	| [] -> [ fun ppf -> Format.fprintf ppf "Atom" ]
	| l ->
	    [ fun ppf ->
		Format.fprintf ppf "Atom";
		List.iter 
		  (fun (ns,s) -> 
		     Format.fprintf ppf " \@ (%a)" (SymbolSet.print ns) s)
		  l ]
251

252

253
254
255
let hash accu = function
  | Finite l -> 17 * accu + (T.hash SymbolSet.hash l)
  | Cofinite l -> 17 * (accu + 2) +  (T.hash SymbolSet.hash l)
256

257
let compare l1 l2 =
258
  match (l1,l2) with
259
260
261
    | Finite l1, Finite l2 
    | Cofinite l1, Cofinite l2 -> T.compare SymbolSet.compare l1 l2
    | Finite _, Cofinite _ -> -1
262
263
    | _ -> 1

264
265
let equal t1 t2 = 
  compare t1 t2 = 0
266

267
268
269
270
(* Optimize lookup:
   - decision tree
   - merge adjacent segment with same result
*)
271

272
273
274
275
276
(*
type 'a map = v -> 'a

let rec mk_map l v = 
  match l with
277
    | [] -> assert false
278
279
280
281
282
283
284
285
286
287
288
289
290
    | (x,y) :: rem -> if (contains v x) then y else mk_map rem v

let get_map v m = m v
*)


(* Patricia trees; code adapted from http://www.lri.fr/~filliatr/ftp/ocaml/misc/ptmap.ml *)

module IMap = struct
  type 'a t =
    | Empty
    | Leaf of int * 'a
    | Branch of int * int * 'a t * 'a t
291
292
293
294
295
296

  type 'a s =
    | DError
    | DReturn of 'a
    | DLeaf of int * 'a * 'a
    | DBranch of int * int * 'a s * 'a s
297
298
299
300
301
302
303
	
  let zero_bit k m = (k land m) == 0
  let lowest_bit x = x land (-x)
  let branching_bit p0 p1 = lowest_bit (p0 lxor p1)
  let mask p m = p land (m-1)
  let match_prefix k p m = (mask k m) == p

304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
  let rec prepare_def y = function
    | Empty -> DReturn y
    | Leaf (k,x) -> DLeaf (k,x,y)
    | Branch (p,m,t0,t1) -> 
	DBranch (p,m,prepare_def y t0, prepare_def y t1)

  let rec prepare_nodef = function
    | Empty -> DError
    | Leaf (k,x) -> DReturn x
    | Branch (p,m,t0,t1) ->
	match (prepare_nodef t0, prepare_nodef t1) with
	  | (DReturn x0, DReturn x1) when x0 == x1 -> DReturn x0
	  | (t0,t1) -> DBranch (p,m,t0,t1)

  let prepare def y =
    match def with 
      | None -> prepare_nodef y
      | Some def -> prepare_def def y
322
323

  let rec find k = function
324
325
326
327
    | DError -> assert false
    | DReturn y -> y
    | DLeaf (j,x,y) -> if k == j then x else y
    | DBranch (_, m, l, r) -> find k (if zero_bit k m then l else r)
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347

  let join p0 t0 p1 t1 =
    let m = branching_bit p0 p1 in
    if zero_bit p0 m 
    then Branch (mask p0 m, m, t0, t1)
    else Branch (mask p0 m, m, t1, t0)

  let rec add k x = function
    | Empty -> Leaf (k,x)
    | Leaf (j,_) as t -> 
	if j == k then Leaf (k,x) else join k (Leaf (k,x)) j t
    | Branch (p,m,t0,t1) as t ->
	if match_prefix k p m 
	then
	  if zero_bit k m 
	  then Branch (p, m, add k x t0, t1)
	  else Branch (p, m, t0, add k x t1)
	else
	  join k (Leaf (k,x)) p t

348
349
350
351
352
353
354
355
  let rec dump f ppf = function
    | DError -> Format.fprintf ppf "Error" 
    | DReturn x -> Format.fprintf ppf "Return %a" f x 
    | DLeaf(j,x,y) -> Format.fprintf ppf "Leaf(%i,%a,%a)" j f x f y
    | DBranch (p,m,t0,t1) -> 
	Format.fprintf ppf "B(%i,%i,%a,%a)" p m (dump f) t0 (dump f) t1
	
end
356

357
type 'a map = 'a IMap.s IMap.s
358

359
let get_map (ns,x) m = IMap.find x (IMap.find ns m)
360

361
362
module IntSet = 
  Set.Make(struct type t = int let compare (x:int) y = Pervasives.compare x y end)
363
364

let mk_map l =
365
  let all_ns = ref IntSet.empty in
366
367
368
369
  let def = ref None in
  List.iter 
    (function
       | (Finite s, _) -> 
370
371
372
373
374
375
	   List.iter (fun (ns,_) -> all_ns := IntSet.add ns !all_ns) (T.get s)
       | (Cofinite _, y) -> def := Some (IMap.DReturn y)) l;

  let one_ns ns =
    let def = ref None in
    let t = 
376
377
378
379
380
381
      List.fold_left
        (fun accu (s, y) -> 
	   match (symbol_set ns s) with
	     | SymbolSet.Finite syms ->
		 List.fold_left (fun accu x -> IMap.add x y accu) accu syms
	     | SymbolSet.Cofinite syms ->
382
		 def := Some y; accu)
383
        IMap.Empty 
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
        l in
    IMap.prepare !def t
  in

  let t = 
    List.fold_left (fun accu ns -> IMap.add ns (one_ns ns) accu)
      IMap.Empty 
      (IntSet.elements !all_ns) in
  let t = IMap.prepare !def t in

(*
  let rec rank y i = function
    | (_,x)::_ when x == y -> i
    | _::r -> rank y (succ i) r
    | [] -> assert false in

  let dump_ns =
    IMap.dump (fun ppf y -> Format.fprintf ppf "[%i]" (rank y 0 l)) in

  Format.fprintf Format.std_formatter "table: %a@." 
    (IMap.dump (fun ppf y -> Format.fprintf ppf "[%a]" dump_ns y)) t;
*)

  t




412