atoms.ml 10.2 KB
Newer Older
1
open Encodings
2

3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
(* TODO:
   - pretty-printing
   - efficient dispatch
*)

module Ns = struct
  include Pool.Make(Utf8)

  let prefixes_to_ns = State.ref "Ns.prefixes" (Hashtbl.create 63)
  let ns_to_prefixes = State.ref "Ns.prefixes" (Hashtbl.create 63)

  let register_prefix p ns =
    if (Hashtbl.mem !prefixes_to_ns p) || 
       (Hashtbl.mem !ns_to_prefixes ns) 
    then ()
    else ( Hashtbl.add !ns_to_prefixes ns p;
	   Hashtbl.add !prefixes_to_ns p ns )

  let counter = State.ref "Ns.prefixes" 0

  let rec fresh_prefix () =
    incr counter;
    let s = Utf8.mk (Printf.sprintf "ns%i" !counter) in
    if (Hashtbl.mem !prefixes_to_ns s) then fresh_prefix () else s

  let prefix ns =
    try Hashtbl.find !ns_to_prefixes ns
    with Not_found ->
      let p = fresh_prefix () in
      register_prefix p ns;
      p

  let dump_prefix_table ppf =
    Hashtbl.iter
      (fun ns p ->
	 Format.fprintf ppf "%a=>%a@." Utf8.print p Utf8.print (value ns))
      !ns_to_prefixes

  let empty = mk (Utf8.mk "")
  let _ = register_prefix (Utf8.mk "") empty
end

module Symbol = Pool.Make(Utf8)

type v = Ns.t * Symbol.t
let vcompare (ns1,x1) (ns2,x2) =
  let c = Ns.compare ns1 ns2 in if c <> 0 then c else Symbol.compare x1 x2
let vhash (ns,x) = ns + 17 * x

let atom_table = Hashtbl.create 63


(* Hash-consing: only to reduce memory usage *)
let mk ns x =
  let a = (ns, x) in
  try Hashtbl.find atom_table a 
  with Not_found ->
    let b = (ns, Symbol.mk x) in
    Hashtbl.add atom_table a b;
    b

let mk_ascii s = mk Ns.empty (Utf8.mk s)

let value (ns,x) = 
  assert (ns == Ns.empty);
  Symbol.value x
  (* get rid of this function *)

let print_prefix ppf ns =
  if ns == Ns.empty then () else
    Format.fprintf ppf "%a:" Utf8.print (Ns.prefix ns)

let print_symbol ppf x =
  Utf8.print ppf (Symbol.value x)

let vprint ppf ((ns,x) : v) = 
  Format.fprintf ppf "%a%a" print_prefix ns print_symbol x

let print_any_in_ns ppf ns =
  let ns = Ns.prefix ns in
  if Utf8.get_str ns = "" then Format.fprintf ppf ".:*"
  else Format.fprintf ppf "%a:*" Utf8.print ns

let print_v ppf a = 
  Format.fprintf ppf "`%a" vprint a

module SymbolSet = struct
  module SList = SortedList.Make_transp(SortedList.Lift(Symbol))
  type t = Finite of unit SList.t | Cofinite of unit SList.t

  let hash = function
    | Finite l -> SList.hash l
    | Cofinite l -> 17 * SList.hash l + 1

  let compare l1 l2 =
    match (l1,l2) with
      | Finite l1, Finite l2 
      | Cofinite l1, Cofinite l2 -> SList.compare l1 l2
      | Finite _, Cofinite _ -> -1
      | _ -> 1

  let empty = Finite []
  let any = Cofinite []
  let atom x = Finite [x]

  let cup s t =
    match (s,t) with
      | (Finite s, Finite t) -> Finite (SList.cup s t)
      | (Finite s, Cofinite t) -> Cofinite (SList.diff t s)
      | (Cofinite s, Finite t) -> Cofinite (SList.diff s t)
      | (Cofinite s, Cofinite t) -> Cofinite (SList.cap s t)

  let cap s t =
    match (s,t) with
      | (Finite s, Finite t) -> Finite (SList.cap s t)
      | (Finite s, Cofinite t) -> Finite (SList.diff s t)
      | (Cofinite s, Finite t) -> Finite (SList.diff t s)
      | (Cofinite s, Cofinite t) -> Cofinite (SList.cup s t)

  let diff s t =
    match (s,t) with
      | (Finite s, Cofinite t) -> Finite (SList.cap s t)
      | (Finite s, Finite t) -> Finite (SList.diff s t)
      | (Cofinite s, Cofinite t) -> Finite (SList.diff t s)
      | (Cofinite s, Finite t) -> Cofinite (SList.cup s t)

  let neg = function
      | Finite s -> Cofinite s
      | Cofinite s -> Finite s
	
  let contains x = function
    | Finite s -> SList.mem s x
    | Cofinite s -> not (SList.mem s x)
	
  let disjoint s t =
    match (s,t) with
      | (Finite s, Finite t) -> SList.disjoint s t
      | (Finite s, Cofinite t) -> SList.subset s t
      | (Cofinite s, Finite t) -> SList.subset t s
      | (Cofinite s, Cofinite t) -> false

  let rec iter_sep sep f = function
    | [] -> ()
    | [ h ] -> f h
    | h :: t -> f h; sep (); iter_sep sep f t

(* Atom 
   bla:*   bla:x
   :* :x *)

  let print ns ppf = function
    | Finite l -> 
	iter_sep 
	  (fun () -> Format.fprintf ppf " |@ ") 
	  (fun x -> print_v ppf (ns,x)) l
    | Cofinite t ->
	Format.fprintf ppf "@[`%a" print_any_in_ns ns;
	List.iter (fun x -> Format.fprintf ppf " \@ %a" print_v (ns,x)) t;
	Format.fprintf ppf "@]"
end

module T0 = SortedList.Make_transp(SortedList.Lift(Ns))
module T = T0.Map
type t = Finite of (unit,SymbolSet.t) T.map | Cofinite of (unit,SymbolSet.t) T.map
167

168
169
170
let empty = Finite T.empty
let any   = Cofinite T.empty
let any_in_ns ns = Finite (T.singleton ns SymbolSet.any)
171

172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
let finite l =
  let l = 
    T.filter 
      (fun _ x -> match x with SymbolSet.Finite [] -> false | _ -> true)
      l in
  Finite l

let cofinite l =
  let l = 
    T.filter 
      (fun _ x -> match x with SymbolSet.Cofinite [] -> false | _ -> true)
      l in
  Cofinite l
  

let atom (ns,x) = Finite (T.singleton ns (SymbolSet.atom x))
188
189
190

let cup s t =
  match (s,t) with
191
192
193
194
    | (Finite s, Finite t) -> finite (T.merge SymbolSet.cup s t)
    | (Finite s, Cofinite t) -> cofinite (T.sub SymbolSet.diff t s)
    | (Cofinite s, Finite t) -> cofinite (T.sub SymbolSet.diff s t)
    | (Cofinite s, Cofinite t) -> cofinite (T.cap SymbolSet.cap s t)
195
196
197

let cap s t =
  match (s,t) with
198
199
200
201
    | (Finite s, Finite t) -> finite (T.cap SymbolSet.cap s t)
    | (Finite s, Cofinite t) -> finite (T.sub SymbolSet.diff s t)
    | (Cofinite s, Finite t) -> finite (T.sub SymbolSet.diff t s)
    | (Cofinite s, Cofinite t) -> cofinite (T.merge SymbolSet.cup s t)
202
	
203
let diff s t =
204
  match (s,t) with
205
206
207
208
    | (Finite s, Cofinite t) -> finite (T.cap SymbolSet.cap s t)
    | (Finite s, Finite t) -> finite (T.sub SymbolSet.diff s t)
    | (Cofinite s, Cofinite t) -> finite (T.sub SymbolSet.diff t s)
    | (Cofinite s, Finite t) -> cofinite (T.merge SymbolSet.cup s t)
209

210
let is_empty = function
211
  | Finite l -> T.is_empty l
212
  | _ -> false
213

214
215
216
217
218
219
220
221
222
223
224
225
226
let print_tag = function
  | Finite l ->
      (match T.get l with 
	| [ns, SymbolSet.Finite [a]] -> 
	    Some (fun ppf -> vprint ppf (ns,a))
	| [ns, SymbolSet.Cofinite []] -> 
	    Some (fun ppf -> Format.fprintf ppf "%a" print_any_in_ns ns)
	| _ -> None)
  | Cofinite l ->
      (match T.get l with
	 | [] -> 
	     Some (fun ppf -> Format.fprintf ppf "_")
	 | _ -> None)
227

228
229
230
231
232
233
234
235
236
237
238
239
240
241
let symbol_set ns = function
  | Finite s ->
      (try T.assoc ns s with Not_found -> SymbolSet.empty)
  | Cofinite s ->
      (try SymbolSet.neg (T.assoc ns s) with Not_found -> SymbolSet.any)

let contains (ns,x) = function
  | Finite s -> 
      (try SymbolSet.contains x (T.assoc ns s) with Not_found -> false)
  | Cofinite s -> 
      (try not (SymbolSet.contains x (T.assoc ns s)) with Not_found -> true)
	
let disjoint s t = 
  is_empty (cap t s) (* TODO: OPT *)
242

243
let print = function
244
245
246
247
248
249
250
251
252
253
254
255
  | Finite l -> 
      List.map (fun (ns,s) ppf -> SymbolSet.print ns ppf s) (T.get l)
  | Cofinite l -> 
      match T.get l with
	| [] -> [ fun ppf -> Format.fprintf ppf "Atom" ]
	| l ->
	    [ fun ppf ->
		Format.fprintf ppf "Atom";
		List.iter 
		  (fun (ns,s) -> 
		     Format.fprintf ppf " \@ (%a)" (SymbolSet.print ns) s)
		  l ]
256

257

258
259
260
let hash accu = function
  | Finite l -> 17 * accu + (T.hash SymbolSet.hash l)
  | Cofinite l -> 17 * (accu + 2) +  (T.hash SymbolSet.hash l)
261

262
let compare l1 l2 =
263
  match (l1,l2) with
264
265
266
    | Finite l1, Finite l2 
    | Cofinite l1, Cofinite l2 -> T.compare SymbolSet.compare l1 l2
    | Finite _, Cofinite _ -> -1
267
268
    | _ -> 1

269
270
let equal t1 t2 = 
  compare t1 t2 = 0
271

272
273
274
275
(* Optimize lookup:
   - decision tree
   - merge adjacent segment with same result
*)
276

277
278
279
280
281
(*
type 'a map = v -> 'a

let rec mk_map l v = 
  match l with
282
    | [] -> assert false
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
    | (x,y) :: rem -> if (contains v x) then y else mk_map rem v

let get_map v m = m v
*)


(* Patricia trees; code adapted from http://www.lri.fr/~filliatr/ftp/ocaml/misc/ptmap.ml *)

module IMap = struct
  type 'a t =
    | Empty
    | Leaf of int * 'a
    | Branch of int * int * 'a t * 'a t
	
  let zero_bit k m = (k land m) == 0
  let lowest_bit x = x land (-x)
  let branching_bit p0 p1 = lowest_bit (p0 lxor p1)
  let mask p m = p land (m-1)
  let match_prefix k p m = (mask k m) == p

  let rec find_def def k = function
    | Empty -> def
    | Leaf (j,x) -> if k == j then x else def
    | Branch (_, m, l, r) -> find_def def k (if zero_bit k m then l else r)

  let rec find k = function
    | Empty -> assert false
    | Leaf (j,x) -> x
    | Branch (_, m, l, r) -> find k (if zero_bit k m then l else r)

  let join p0 t0 p1 t1 =
    let m = branching_bit p0 p1 in
    if zero_bit p0 m 
    then Branch (mask p0 m, m, t0, t1)
    else Branch (mask p0 m, m, t1, t0)

  let rec add k x = function
    | Empty -> Leaf (k,x)
    | Leaf (j,_) as t -> 
	if j == k then Leaf (k,x) else join k (Leaf (k,x)) j t
    | Branch (p,m,t0,t1) as t ->
	if match_prefix k p m 
	then
	  if zero_bit k m 
	  then Branch (p, m, add k x t0, t1)
	  else Branch (p, m, t0, add k x t1)
	else
	  join k (Leaf (k,x)) p t
end

(* TODO: avoid option (using functional types instead ?) *)

type 'a map = 
    { min_ns : int;
      table  : 'a IMap.t array;
      table_def : 'a option array;
      def    : 'a option }

let get_map (ns,x) m =
  let i = ns - m.min_ns in
  if (i < 0) || (i >= Array.length m.table)
  then (match m.def with Some y -> y | None -> assert false)
  else 
    match m.table_def.(i) with
      | Some def -> IMap.find_def def x m.table.(i)
      | None -> IMap.find x m.table.(i)

let rec get_max = function
  | [ (ns,_) ] -> ns
  | _ :: l -> get_max l
  | [] -> assert false

let mk_map l =
  let min_ns = ref max_int and max_ns = ref min_int in
  let def = ref None in
  List.iter 
    (function
       | (Finite s, _) -> 
	   (match T.get s with
	     | [] -> ()
	     | (ns,_)::_ as l ->
		 min_ns := min !min_ns ns;
		 max_ns := max !max_ns (get_max l))
       | (Cofinite _, y) -> def := Some y) l;

  let n = !max_ns - !min_ns + 1 in
  let table = Array.make n IMap.Empty in
  let table_def = Array.make n None in
371

372
373
374
375
376
377
378
379
380
381
382
383
384
385
  let ofs = !min_ns in
  for ns = ofs to !max_ns do
    table.(ns - ofs) <-
      List.fold_left
        (fun accu (s, y) -> 
	   match (symbol_set ns s) with
	     | SymbolSet.Finite syms ->
		 List.fold_left (fun accu x -> IMap.add x y accu) accu syms
	     | SymbolSet.Cofinite syms ->
		 table_def.(ns - ofs) <- Some y; accu)
        IMap.Empty 
        l;
  done;
  { min_ns = ofs; table = table; table_def = table_def; def = !def }
386