value.ml 16 KB
Newer Older
1
open Ident
2
open Encodings
3

4
5
type t =
  | Pair of t * t
6
  | Xml of t * t * t
7
  | Record of t label_map
8
9
10
  | Atom of Atoms.V.t
  | Integer of Intervals.V.t
  | Char of Chars.V.t
11
  | Abstraction of (Types.descr * Types.descr) list * (t -> t)
12
  | Abstraction2 of t array * (Types.t * Types.t) list * Lambda.branches
13
  | String_latin1 of int * int * string * t
14
  | String_utf8 of Utf8.uindex * Utf8.uindex * Utf8.t * t
15
  | Concat of t * t
16
  | Absent
17

18
19
  | Delayed of t ref

20

21
22
exception CDuceExn of t

23
let nil = Atom Sequence.nil_atom
24
let string_latin1 s = String_latin1 (0,String.length s, s, nil)
25
let string_utf8 s = String_utf8 (Utf8.start_index s,Utf8.end_index s, s, nil)
26
27
let vtrue = Atom (Atoms.V.mk_ascii "true")
let vfalse = Atom (Atoms.V.mk_ascii "false")
28
29
let vbool x = if x then vtrue else vfalse

30
let vrecord l =
31
  let l = List.map (fun (qname,v) -> LabelPool.mk qname, v) l in
32
  Record (LabelMap.from_list_disj l)
33
34

let get_fields = function
35
  | Record map -> LabelMap.mapi_to_list (fun k v -> LabelPool.value k, v) map
36
  | _ -> raise (Invalid_argument "Value.get_fields")
37

38
39
40
let rec sequence = function
  | [] -> nil
  | h::t -> Pair (h, sequence t)
41

42
43
44
45
let concat v1 v2 = 
  match (v1,v2) with
    | (Atom _, v) | (v, Atom _) -> v
    | (v1,v2) -> Concat (v1,v2)
46
47


48
let rec const = function
49
50
51
  | Types.Integer i -> Integer i
  | Types.Atom a -> Atom a
  | Types.Char c -> Char c
52
53
54
55
56
57
  | Types.Pair (x,y) -> Pair (const x, const y)
  | Types.Xml (x, Types.Pair (y, z)) -> Xml (const x, const y, const z)
  | Types.Xml (_,_) -> assert false
  | Types.Record x -> Record (LabelMap.map const x)
  | Types.String (i,j,s,c) -> String_utf8 (i,j,s, const c)

58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
let rec inv_const = function
  | Pair (x, y) -> Types.Pair (inv_const x, inv_const y)
  | Xml (x, y, z) ->
      Types.Pair (inv_const x, Types.Pair (inv_const y, inv_const z))
  | Record x -> Types.Record (LabelMap.map inv_const x)
  | Atom a -> Types.Atom a
  | Integer i -> Types.Integer i
  | Char c -> Types.Char c
  | String_latin1 (_, _, s, v) ->
      let s = Utf8.mk s in
      Types.String (Utf8.start_index s, Utf8.end_index s, s, inv_const v)
  | String_utf8 (i, j, s, v) -> Types.String (i, j, s, inv_const v)
  | Concat (x, y) as v ->
      let rec children = function
        | Concat (x, y) -> children x @ children y
        | x -> [x]
      in
      inv_const (sequence (children v))
  | _ -> failwith "inv_const"
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159

let normalize_string_latin1 i j s q = 
  if i = j then q else
    Pair (Char (Chars.V.mk_char (String.unsafe_get s i)), String_latin1 (succ i,j,s,q))

let normalize_string_utf8 i j s q = 
  if Utf8.equal_index i j then q 
  else
    let (c,i) = Utf8.next s i in
    Pair (Char (Chars.V.mk_int c), String_utf8 (i,j,s,q))



(***** The dirty things **********)

type pair = { dummy : t; mutable pair_tl : t }
type str  = { dummy1 : t; dummy2 : t; dummy3 : t; mutable str_tl : t }

(* Could optimize this function by changing the order of the fields
   in String_latin1, String_utf8 *)

let set_cdr cell tl =
  match cell with
    | Pair (_,_) -> (Obj.magic cell).pair_tl <- tl
    | String_latin1 (_,_,_,_) 
    | String_utf8(_,_,_,_)-> (Obj.magic cell).str_tl <- tl
    | _ -> assert false

let rec append_cdr cell tl =
  match tl with
    | Concat (x,y) ->
	append_cdr (append_cdr cell x) y
    | Pair (x,tl) -> 
	let cell' = Pair (x,Absent) in
	set_cdr cell cell';
	append_cdr cell' tl
    | String_latin1 (s,i,j,tl) ->
	let cell' = String_latin1 (s,i,j,Absent) in
	set_cdr cell cell';
	append_cdr cell' tl
    | String_utf8 (s,i,j,tl) ->
	let cell' = String_utf8 (s,i,j,Absent) in
	set_cdr cell cell';
	append_cdr cell' tl
    | _ -> cell


let rec flatten = function
  | Pair (x,y) -> concat x (flatten y)
  | Concat (x,y) -> concat (flatten x) (flatten y)
  | q -> q

let eval_lazy_concat v =
  let accu = Obj.magic (Pair (nil,Absent)) in
  let rec aux accu = function
    | Concat (x,y) -> aux (append_cdr accu x) y
    | v -> set_cdr accu v
  in
  aux accu v;
  let nv = match snd accu with 
    | Pair (_,_) as nv -> nv
    | String_latin1 (i,j,s,q) -> normalize_string_latin1 i j s q
    | String_utf8 (i,j,s,q) -> normalize_string_utf8 i j s q
    | _ -> assert false in
  let v = Obj.repr v in
  let nv = Obj.repr nv in
  Obj.set_tag v (Obj.tag nv);
  Obj.set_field v 0 (Obj.field nv 0);
  Obj.set_field v 1 (Obj.field nv 1)
	  
  

(******************************)

let normalize = function
  | String_latin1 (i,j,s,q) -> normalize_string_latin1 i j s q
  | String_utf8 (i,j,s,q) -> normalize_string_utf8 i j s q
  | Concat (_,_) as v -> eval_lazy_concat v; v
  | v -> assert false




160
161
162
163
164
165
166
167
168
169
let buf = Buffer.create 100

let rec add_buf_utf8_to_latin1 src i j =
  if Utf8.equal_index i j  then ()
  else
    let (c,i) = Utf8.next src i in
    if (c > 255) then failwith "get_string_latin1";
    Buffer.add_char buf (Char.chr c);
    add_buf_utf8_to_latin1 src i j

170
171
172
let rec add_buf_latin1_to_utf8 src i j =
  for k = i to j - 1 do 
    Utf8.store buf (Char.code src.[k]) 
173
174
175
176
  done

let get_string_latin1 e =
  let rec aux = function
177
    | Pair (Char x,y) -> Buffer.add_char buf (Chars.V.to_char x); aux y
178
179
    | String_latin1 (i,j,src,y) -> Buffer.add_substring buf src i (j - i); aux y
    | String_utf8 (i,j,src,y) -> add_buf_utf8_to_latin1 src i j; aux y
180
    | Concat (_,_) as v -> eval_lazy_concat v; aux v
181
182
183
184
185
186
187
188
    | _ -> () in
  aux e;
  let s = Buffer.contents buf in
  Buffer.clear buf;
  s

let get_string_utf8 e =
  let rec aux = function
189
    | Pair (Char x,y) -> Utf8.store buf (Chars.V.to_int x); aux y
190
    | String_latin1 (i,j,src,y) -> add_buf_latin1_to_utf8 src i j; aux y
191
    | String_utf8 (i,j,src,y) -> Utf8.copy buf src i j; aux y
192
    | Concat (_,_) as v -> eval_lazy_concat v; aux v
193
194
    | q -> q in
  let q = aux e in
195
196
  let s = Buffer.contents buf in
  Buffer.clear buf;
197
  (Utf8.mk s, q)
198

199
let get_int = function
200
  | Integer i when Intervals.V.is_int i -> Intervals.V.get_int i
201
  | _ -> raise (Invalid_argument "Value.get_int")
202

203
204
205
let rec is_seq = function
  | Pair (_, y) when is_seq y -> true
  | Atom a when a = Sequence.nil_atom -> true
206
  | String_latin1 (_,_,_,y) | String_utf8 (_,_,_,y) when is_seq y  -> true
207
  | Concat (_,_) as v -> eval_lazy_concat v; is_seq v
208
209
210
  | _ -> false

let rec is_str = function
211
  | Pair (Char _, y) -> is_str y
212
  | Atom a when a = Sequence.nil_atom -> true
213
  | String_latin1 (_,_,_,q) | String_utf8(_,_,_,q) -> is_str q
214
  | Concat (_,_) as v -> eval_lazy_concat v; is_str v
215
216
217
  | _ -> false

let rec print ppf v =
218
219
220
221
222
223
  if is_str v then 
    (Format.fprintf ppf "\"";
     print_quoted_str ppf v;
     Format.fprintf ppf "\"")
  else if is_seq v then Format.fprintf ppf "[ @[<hv>%a@]]" print_seq v
  else  match v with
224
    | Pair (x,y) -> Format.fprintf ppf "(%a,%a)" print x print y
225
    | Xml (x,y,z)  -> print_xml ppf x y z
226
    | Record l -> Format.fprintf ppf "{%a }" print_record (LabelMap.get l)
227
228
229
    | Atom a ->	Atoms.V.print_quote ppf a
    | Integer i -> Intervals.V.print ppf i
    | Char c ->	Chars.V.print ppf c
230
    | Abstraction _ -> Format.fprintf ppf "<fun>"
231
    | Abstraction2 _ -> Format.fprintf ppf "<fun>"
232
233
234
235
236
    | String_latin1 (i,j,s,q) -> 
	Format.fprintf ppf "<string_latin1:%i-%i,%S,%a>" i j s print q
    | String_utf8 (i,j,s,q) -> 
	Format.fprintf ppf "<string_utf8:%i-%i,%S,%a>" 
	(Utf8.get_idx i) (Utf8.get_idx j) (Utf8.get_str s) print q
237
238
    | Concat (x,y) ->
	Format.fprintf ppf "<concat:%a;%a>" print x print y
239
240
    | Absent -> 	
	Format.fprintf ppf "<[absent]>"
241
242
    | Delayed x ->
	Format.fprintf ppf "<[delayed]>"
243
and print_quoted_str ppf = function
244
  | Pair (Char c, q) -> 
245
      Chars.V.print_in_string ppf c; 
246
      print_quoted_str ppf q
247
  | String_latin1 (i,j,s, q) ->
248
      for k = i to j - 1 do
249
	Chars.V.print_in_string ppf (Chars.V.mk_char s.[k])
250
      done;
251
      print_quoted_str ppf q
252
  | String_utf8 (i,j,s, q) ->
253
254
255
256
257
(*      Format.fprintf ppf "UTF8:{"; *)
      let rec aux i =
	if Utf8.equal_index i j then q
	else 
	  let (c,i) =Utf8.next s i in
258
	  Chars.V.print_in_string ppf (Chars.V.mk_int c);
259
260
261
262
263
264
	  aux i
      in
      let q = aux i in
(*      Format.fprintf ppf "}"; *)
      print_quoted_str ppf q
  | q -> q
265
and print_seq ppf = function
266
  | (Pair(Char _, _)|String_latin1 (_,_,_,_)|String_utf8 (_,_,_,_)) as s ->
267
      Format.fprintf ppf "'";
268
269
270
271
272
273
      let q = print_quoted_str ppf s in
      Format.fprintf ppf "'@ ";
      print_seq ppf q
  | Pair (x,y) -> 
      Format.fprintf ppf "@[%a@]@ " print x;
      print_seq ppf y
274
275
  | _ -> ()

276
and print_xml ppf tag attr content =
277
278
279
280
281
282
283
284
285
286
  if is_seq content then
    Format.fprintf ppf "@[<hv2><%a%a>[@ %a@]]"
      print_tag tag
      print_attr attr
      print_seq content
  else
    Format.fprintf ppf "@[<hv2><%a%a>@ %a@]"
      print_tag tag
      print_attr attr
      print content
287
and print_tag ppf = function
288
  | Atom tag -> Atoms.V.print ppf tag
289
290
291
292
  | tag -> Format.fprintf ppf "(%a)" print tag
and print_attr ppf = function
  | Record attr -> print_record ppf (LabelMap.get attr)
  | attr -> Format.fprintf ppf "(%a)" print attr
293
294
295

and print_record ppf = function
  | [] -> ()
296
297
  | [f] -> Format.fprintf ppf " %a" print_field f
  | f :: rem -> Format.fprintf ppf " %a;%a" print_field f print_record rem
298
299

and print_field ppf (l,v) = 
300
  Format.fprintf ppf "%a=%a" Label.print (LabelPool.value l) print v  
301

302
303
304
305
306
307
308
309
310
311
312
313
314
315
let dump_xml ppf v =
  let rec aux ppf = function
    | Pair (x, y) ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<pair>@,%a@,%a@,</pair>@]" aux x aux y
    | Xml (x, y, z) ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<xml>@,%a@,%a@,%a@,</xml>@]" aux x aux y aux z
    | Record x ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<record>@,%a@,</record>@]"
          (fun ppf x -> print_record ppf (LabelMap.get x)) x
    | Atom a ->
        Format.fprintf ppf "@[<hv1>";
316
        Format.fprintf ppf "<atom>@,%a@,</atom>@]"
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
          (fun ppf x -> Atoms.V.print ppf x) a
    | Integer i ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<integer>@,%a@,</integer>@]"
          (fun ppf x -> Intervals.V.print ppf x) i
    | Char c ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<char>@,%a@,</char>@]"
          (fun ppf x -> Chars.V.print ppf x) c
    | Abstraction _ ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<abstraction />@]"
    | Abstraction2 _ ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<abstraction2 />@]"
    | String_latin1 (_, _, s, v) ->
        Format.fprintf ppf "@[<hv1>";
334
        Format.fprintf ppf "<string_latin1>@,%s@,</string_latin1>@," s;
335
336
337
338
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<follow>@,%a@,</follow>@]</string_latin1>@]" aux v
    | String_utf8 (_, _, s, v) ->
        Format.fprintf ppf "@[<hv1>";
339
340
        Format.fprintf ppf "<string_utf8>@,%s@,</string_utf8>@,"
          (Utf8.get_str s);
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<follow>@,%a@,</follow>@]</string_utf8>@]" aux v
    | Concat (x, y) ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<concat>@,%a@,%a@,</concat>@]" aux x aux y
    | Absent ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<absent />@]"
    | Delayed _ ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<delayed />@]"
  in
  Format.fprintf ppf "@[<hv1>";
  Format.fprintf ppf "<value>@,%a@,</value>@]" aux v

356
357
358
359
let rec compare x y =
  if (x == y) then 0
  else
    match (x,y) with
360
      | Pair (x1,x2), Pair (y1,y2) ->
361
362
	  let c = compare x1 y1 in if c <> 0 then c 
	  else compare x2 y2	    
363
364
      | Xml (x1,x2,x3), Xml (y1,y2,y3) ->
	  let c = compare x1 y1 in if c <> 0 then c 
365
	  else let c = compare x2 y2 in if c <> 0 then c 
366
	  else compare x3 y3
367
      | Record rx, Record ry -> LabelMap.compare compare rx ry
368
369
370
      | Atom x, Atom y -> Atoms.V.compare x y
      | Integer x, Integer y -> Intervals.V.compare x y
      | Char x, Char y -> Chars.V.compare x y
371
372
      | Abstraction (_,_), _
      | _, Abstraction (_,_) -> 
373
	  raise (CDuceExn (string_latin1 "comparing functional values"))
374
375
376
      | Abstraction2 (_,_,_), _
      | _, Abstraction2 (_,_,_) -> 
	  raise (CDuceExn (string_latin1 "comparing functional values"))
377
378
      | Absent,_ | _,Absent 
      | Delayed _, _ | _, Delayed _ -> assert false
379
380
      | Concat (_,_) as x, y -> eval_lazy_concat x; compare x y
      | x, (Concat (_,_) as y) -> eval_lazy_concat y; compare x y
381
      | String_latin1 (ix,jx,sx,qx), String_latin1 (iy,jy,sy,qy) ->
382
383
384
385
386
387
388
	  if (sx == sy) && (ix = iy) && (jx = jy) then compare qx qy
	  else
	    (* Note: we would like to compare first jx-ix and jy-iy,
	       but this is not compatible with the equivalence of values *)
	    let rec aux ix iy =
	      if (ix = jx) then
		if (iy = jy) then compare qx qy
389
		else compare qx (normalize_string_latin1 iy jy sy qy)
390
	      else
391
		if (iy = jy) then compare (normalize_string_latin1 ix jx sx qx) qy
392
393
394
395
396
397
398
		else
		  let c1 = String.unsafe_get sx ix
		  and c2 = String.unsafe_get sy iy in
		  if c1 < c2 then -1 else
		    if c1 > c2 then 1 else aux (ix + 1) (iy + 1)
	    in
	    aux ix iy
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
      | String_utf8 (ix,jx,sx,qx), String_utf8 (iy,jy,sy,qy) ->
	  if (sx == sy) && (Utf8.equal_index ix iy) && (Utf8.equal_index jx jy) then compare qx qy
	  else
	    let rec aux ix iy =
	      if (Utf8.equal_index ix jx) then
		if (Utf8.equal_index iy jy) then compare qx qy
		else compare qx (normalize_string_utf8 iy jy sy qy)
	      else
		if (Utf8.equal_index iy jy) then compare (normalize_string_utf8 ix jx sx qx) qy
		else
		  let (c1,ix) = Utf8.next sx ix in
		  let (c2,iy) = Utf8.next sy iy in
		  if c1 < c2 then -1 else
		    if c1 > c2 then 1 else aux ix iy
	    in
	    aux ix iy
      | String_latin1 (i,j,s,q), _ -> compare (normalize_string_latin1 i j s q) y
      | _, String_latin1 (i,j,s,q) -> compare x (normalize_string_latin1 i j s q)
      | String_utf8 (i,j,s,q), _   -> compare (normalize_string_utf8 i j s q) y
      | _, String_utf8 (i,j,s,q)   -> compare x (normalize_string_utf8 i j s q)
419
420
421
422
423
424
425

      | Pair (_,_), _ -> -1 | _, Pair(_,_) -> 1
      | Xml (_,_,_),_ -> -1 | _, Xml(_,_,_) -> 1
      | Record _,_ -> -1 | _, Record _ -> 1
      | Atom _,_ -> -1 | _, Atom _ -> 1
      | Integer _,_ -> -1 | _, Integer _ -> 1

426
427
428
let iter_xml pcdata_callback other_callback =
  let rec aux = function
    | v when compare v nil = 0 -> ()
429
    | Pair (Char c, tl) ->
430
        pcdata_callback (U.mk_char (Chars.V.to_int c));
431
        aux tl
432
433
    | String_latin1 (i,j,s,tl) ->
        pcdata_callback (U.mk_latin1 (String.sub s i j));
434
        aux tl
435
    | String_utf8 (i,j,s,tl) ->
436
437
438
439
440
        pcdata_callback (U.mk (U.get_substr s i j));
        aux tl
    | Pair (hd, tl) ->
        other_callback hd;
        aux tl
441
    | Concat (_,_) as v -> eval_lazy_concat v; aux v
442
    | v -> raise (Invalid_argument "Value.iter_xml")
443
444
445
446
  in
  function
    | Xml (_,_,cont) -> aux cont
    | _ -> raise (Invalid_argument "Value.iter_xml")
447

448
449
450
451
452
453
let map_xml map_pcdata map_other =
  let patch_string_utf8 cont = function
    | String_utf8 (i, j, u, v) when compare v nil = 0 ->
        String_utf8 (i, j, u, cont)
    | _ -> assert false
  in
454
455
456
457
458
  let rec aux v =
    match v with
    | Pair (Char _, _) | String_latin1 _ | String_utf8 _ ->
        let (u, rest) = get_string_utf8 v in
        patch_string_utf8 (aux rest) (string_utf8 (map_pcdata u))
459
460
461
462
463
464
465
466
    | Pair (hd, tl) -> Pair (map_other hd, aux tl)
    | Concat (_,_) as v -> eval_lazy_concat v; aux v
    | v when compare v nil = 0 -> v
    | v -> raise (Invalid_argument "Value.map_xml")
  in
  function
    | Xml (tag,attrs,cont) -> Xml (tag, attrs, aux cont)
    | _ -> raise (Invalid_argument "Value.map_xml")
467

468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
  (** set of values *)

type tmp = t
module OrderedValue =
  struct
    type t = tmp
    let compare = compare
  end
module ValueSet = Set.Make(OrderedValue)

let ( |<| ) x y = compare x y < 0
let ( |>| ) x y = compare x y > 0
let ( |<=| ) x y = let c = compare x y in c < 0 || c = 0
let ( |>=| ) x y = let c = compare x y in c > 0 || c = 0
let ( |=| ) x y = compare x y = 0
483
let equal = ( |=| )
484
let ( |<>| ) x y = compare x y <> 0
485

486
487
488
489
490
491
(*
let rec concat l1 l2 = match l1 with
  | Pair (x,y) -> Pair (x, concat y l2)
  | String_latin1 (s,i,j,q) -> String_latin1 (s,i,j, concat q l2)
  | String_utf8 (s,i,j,q) -> String_utf8 (s,i,j, concat q l2)
  | q -> l2
492

493
494
495
let rec flatten = function
  | Pair (x,y) -> concat x (flatten y)
  | q -> q
496

497
*)
498
499
500
501

let failwith' s = raise (CDuceExn (string_latin1 s))
let raise' v = raise (CDuceExn v)