value.ml 25 KB
Newer Older
1
open Ident
2
open Encodings
3

4
5
type t =
  | Pair of t * t
6
  | Xml of t * t * t
7
  | XmlNs of t * t * t * Ns.table
8
  | Record of t Imap.t
9
10
11
  | Atom of Atoms.V.t
  | Integer of Intervals.V.t
  | Char of Chars.V.t
12
  | Abstraction of (Types.descr * Types.descr) list option * (t -> t)
13
  | Abstraction2 of t array * (Types.t * Types.t) list * Lambda.branches
14
  | Abstract of Types.Abstract.V.t
15
  | String_latin1 of int * int * string * t
16
  | String_utf8 of Utf8.uindex * Utf8.uindex * Utf8.t * t
17
  | Concat of t * t
18
  | Absent
19

20
21
22
23
24
25
(* 
  The only representation of the empty sequence is nil.
  In particular, in String_latin1 and String_utf8, the string cannot be empty.
*)

let dump_forward = ref (fun _ _ -> assert false)
26

27
28
exception CDuceExn of t

29
let nil = Atom Sequence.nil_atom
30
31
32
33
34
35
36
37
38
let string_latin1 s = 
  if String.length s = 0 then nil
  else String_latin1 (0,String.length s, s, nil)
let string_utf8 s = 
  if String.length (Utf8.get_str s) = 0 then nil
  else String_utf8 (Utf8.start_index s,Utf8.end_index s, s, nil)
let substring_utf8 i j s q =
  if Utf8.equal_index i j then q 
  else String_utf8 (i,j,s,q)
39
40
let vtrue = Atom (Atoms.V.mk_ascii "true")
let vfalse = Atom (Atoms.V.mk_ascii "false")
41
42
let vbool x = if x then vtrue else vfalse

43
let vrecord l =
44
  let l = List.map (fun (qname,v) -> LabelPool.mk qname, v) l in
45
  Record (Imap.create (Array.of_list l))
46
47

let get_fields = function
48
  | Record map -> List.map (fun (k,v) -> LabelPool.value k, v) (Imap.elements map)
49
  | _ -> raise (Invalid_argument "Value.get_fields")
50

51
52
53
let rec sequence = function
  | [] -> nil
  | h::t -> Pair (h, sequence t)
54

55
56
57
58
59
60
let rec sequence_rev accu = function
  | [] -> accu
  | h::t -> sequence_rev (Pair (h,accu)) t

let sequence_rev l = sequence_rev nil l

61
62
63
64
65
66
let sequence_of_array a =
  let rec aux accu i =
    if (i = 0) then accu
    else let i = pred i in aux (Pair (a.(i), accu)) i in
  aux nil (Array.length a)

67
68
69
70
71
72
73
let tuple_of_array a =
  let rec aux accu i =
    if (i = 0) then accu
    else let i = pred i in aux (Pair (a.(i), accu)) i in
  let n = Array.length a in
  aux a.(n) (pred n)

74
75
76
77
let concat v1 v2 = 
  match (v1,v2) with
    | (Atom _, v) | (v, Atom _) -> v
    | (v1,v2) -> Concat (v1,v2)
78

79
80
81
let append v1 v2 =
  concat v1 (Pair (v2,nil))

82
let raise' v = raise (CDuceExn v)
83
let failwith' s = raise' (string_latin1 s)
84

85

86
let rec const = function
87
88
89
  | Types.Integer i -> Integer i
  | Types.Atom a -> Atom a
  | Types.Char c -> Char c
90
91
92
  | Types.Pair (x,y) -> Pair (const x, const y)
  | Types.Xml (x, Types.Pair (y, z)) -> Xml (const x, const y, const z)
  | Types.Xml (_,_) -> assert false
93
94
95
  | Types.Record x -> 
      let x = LabelMap.mapi_to_list (fun l c -> (l,const c)) x in
      Record (Imap.create (Array.of_list x))
96
97
  | Types.String (i,j,s,c) -> String_utf8 (i,j,s, const c)

98
99
let rec inv_const = function
  | Pair (x, y) -> Types.Pair (inv_const x, inv_const y)
100
  | Xml (x, y, z) | XmlNs (x,y,z,_) ->
101
      Types.Pair (inv_const x, Types.Pair (inv_const y, inv_const z))
102
103
104
105
  | Record x -> 
      let x = Imap.elements x in
      let x = List.map (fun (l,c) -> (l,inv_const c)) x in
      Types.Record (LabelMap.from_list_disj x)
106
107
108
109
110
111
112
113
114
115
116
117
118
119
  | Atom a -> Types.Atom a
  | Integer i -> Types.Integer i
  | Char c -> Types.Char c
  | String_latin1 (_, _, s, v) ->
      let s = Utf8.mk s in
      Types.String (Utf8.start_index s, Utf8.end_index s, s, inv_const v)
  | String_utf8 (i, j, s, v) -> Types.String (i, j, s, inv_const v)
  | Concat (x, y) as v ->
      let rec children = function
        | Concat (x, y) -> children x @ children y
        | x -> [x]
      in
      inv_const (sequence (children v))
  | _ -> failwith "inv_const"
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197

let normalize_string_latin1 i j s q = 
  if i = j then q else
    Pair (Char (Chars.V.mk_char (String.unsafe_get s i)), String_latin1 (succ i,j,s,q))

let normalize_string_utf8 i j s q = 
  if Utf8.equal_index i j then q 
  else
    let (c,i) = Utf8.next s i in
    Pair (Char (Chars.V.mk_int c), String_utf8 (i,j,s,q))



(***** The dirty things **********)

type pair = { dummy : t; mutable pair_tl : t }
type str  = { dummy1 : t; dummy2 : t; dummy3 : t; mutable str_tl : t }

(* Could optimize this function by changing the order of the fields
   in String_latin1, String_utf8 *)

let set_cdr cell tl =
  match cell with
    | Pair (_,_) -> (Obj.magic cell).pair_tl <- tl
    | String_latin1 (_,_,_,_) 
    | String_utf8(_,_,_,_)-> (Obj.magic cell).str_tl <- tl
    | _ -> assert false

let rec append_cdr cell tl =
  match tl with
    | Concat (x,y) ->
	append_cdr (append_cdr cell x) y
    | Pair (x,tl) -> 
	let cell' = Pair (x,Absent) in
	set_cdr cell cell';
	append_cdr cell' tl
    | String_latin1 (s,i,j,tl) ->
	let cell' = String_latin1 (s,i,j,Absent) in
	set_cdr cell cell';
	append_cdr cell' tl
    | String_utf8 (s,i,j,tl) ->
	let cell' = String_utf8 (s,i,j,Absent) in
	set_cdr cell cell';
	append_cdr cell' tl
    | _ -> cell


let rec flatten = function
  | Pair (x,y) -> concat x (flatten y)
  | Concat (x,y) -> concat (flatten x) (flatten y)
  | q -> q

let eval_lazy_concat v =
  let accu = Obj.magic (Pair (nil,Absent)) in
  let rec aux accu = function
    | Concat (x,y) -> aux (append_cdr accu x) y
    | v -> set_cdr accu v
  in
  aux accu v;
  let nv = match snd accu with 
    | Pair (_,_) as nv -> nv
    | String_latin1 (i,j,s,q) -> normalize_string_latin1 i j s q
    | String_utf8 (i,j,s,q) -> normalize_string_utf8 i j s q
    | _ -> assert false in
  let v = Obj.repr v in
  let nv = Obj.repr nv in
  Obj.set_tag v (Obj.tag nv);
  Obj.set_field v 0 (Obj.field nv 0);
  Obj.set_field v 1 (Obj.field nv 1)
	  
  

(******************************)

let normalize = function
  | String_latin1 (i,j,s,q) -> normalize_string_latin1 i j s q
  | String_utf8 (i,j,s,q) -> normalize_string_utf8 i j s q
  | Concat (_,_) as v -> eval_lazy_concat v; v
198
  | v -> v
199
200
201
202




203
204
205
206
207
208
let buf = Buffer.create 100

let rec add_buf_utf8_to_latin1 src i j =
  if Utf8.equal_index i j  then ()
  else
    let (c,i) = Utf8.next src i in
209
    if (c > 255) then failwith' "get_string_latin1";
210
211
212
    Buffer.add_char buf (Char.chr c);
    add_buf_utf8_to_latin1 src i j

213
214
215
let rec add_buf_latin1_to_utf8 src i j =
  for k = i to j - 1 do 
    Utf8.store buf (Char.code src.[k]) 
216
217
218
219
  done

let get_string_latin1 e =
  let rec aux = function
220
    | Pair (Char x,y) -> Buffer.add_char buf (Chars.V.to_char x); aux y
221
222
    | String_latin1 (i,j,src,y) -> Buffer.add_substring buf src i (j - i); aux y
    | String_utf8 (i,j,src,y) -> add_buf_utf8_to_latin1 src i j; aux y
223
    | Concat (_,_) as v -> eval_lazy_concat v; aux v
224
    | _ -> () in
225
  Buffer.clear buf;
226
227
228
229
230
231
232
  aux e;
  let s = Buffer.contents buf in
  Buffer.clear buf;
  s

let get_string_utf8 e =
  let rec aux = function
233
    | Pair (Char x,y) -> Utf8.store buf (Chars.V.to_int x); aux y
234
    | String_latin1 (i,j,src,y) -> add_buf_latin1_to_utf8 src i j; aux y
235
    | String_utf8 (i,j,src,y) -> Utf8.copy buf src i j; aux y
236
    | Concat (_,_) as v -> eval_lazy_concat v; aux v
237
238
    | q -> q in
  let q = aux e in
239
240
  let s = Buffer.contents buf in
  Buffer.clear buf;
241
  (Utf8.mk s, q)
242

243
let get_int = function
244
  | Integer i when Intervals.V.is_int i -> Intervals.V.get_int i
245
  | _ -> raise (Invalid_argument "Value.get_int")
246

247
248
249
250
let get_integer = function
  | Integer i -> i
  | _ -> assert false

251
252
253
let rec is_seq = function
  | Pair (_, y) when is_seq y -> true
  | Atom a when a = Sequence.nil_atom -> true
254
  | String_latin1 (_,_,_,y) | String_utf8 (_,_,_,y) when is_seq y  -> true
255
  | Concat (_,_) as v -> eval_lazy_concat v; is_seq v
256
257
258
  | _ -> false

let rec is_str = function
259
  | Pair (Char _, y) -> is_str y
260
  | Atom a when a = Sequence.nil_atom -> true
261
  | String_latin1 (_,_,_,q) | String_utf8(_,_,_,q) -> is_str q
262
  | Concat (_,_) as v -> eval_lazy_concat v; is_str v
263
264
265
  | _ -> false

let rec print ppf v =
266
267
268
269
270
271
  if is_str v then 
    (Format.fprintf ppf "\"";
     print_quoted_str ppf v;
     Format.fprintf ppf "\"")
  else if is_seq v then Format.fprintf ppf "[ @[<hv>%a@]]" print_seq v
  else  match v with
272
    | Pair (x,y) -> Format.fprintf ppf "(%a,%a)" print x print y
273
    | Xml (x,y,z) | XmlNs (x,y,z,_) -> print_xml ppf x y z
274
    | Record l -> Format.fprintf ppf "{%a }" print_record (Imap.elements l)
275
276
277
    | Atom a ->	Atoms.V.print_quote ppf a
    | Integer i -> Intervals.V.print ppf i
    | Char c ->	Chars.V.print ppf c
278
    | Abstraction _ -> Format.fprintf ppf "<fun>"
279
    | Abstraction2 _ -> Format.fprintf ppf "<fun>"
280
281
282
283
284
    | String_latin1 (i,j,s,q) -> 
	Format.fprintf ppf "<string_latin1:%i-%i,%S,%a>" i j s print q
    | String_utf8 (i,j,s,q) -> 
	Format.fprintf ppf "<string_utf8:%i-%i,%S,%a>" 
	(Utf8.get_idx i) (Utf8.get_idx j) (Utf8.get_str s) print q
285
286
    | Concat (x,y) ->
	Format.fprintf ppf "<concat:%a;%a>" print x print y
287
288
    | Abstract ("float",o) ->
	Format.fprintf ppf "%f" (Obj.magic o : float)
289
290
    | Abstract (s,_) ->
	Format.fprintf ppf "<abstract=%s>" s
291
292
    | Absent -> 	
	Format.fprintf ppf "<[absent]>"
293
and print_quoted_str ppf = function
294
  | Pair (Char c, q) -> 
295
      Chars.V.print_in_string ppf c; 
296
      print_quoted_str ppf q
297
  | String_latin1 (i,j,s, q) ->
298
      for k = i to j - 1 do
299
	Chars.V.print_in_string ppf (Chars.V.mk_char s.[k])
300
      done;
301
      print_quoted_str ppf q
302
  | String_utf8 (i,j,s, q) ->
303
304
305
306
307
(*      Format.fprintf ppf "UTF8:{"; *)
      let rec aux i =
	if Utf8.equal_index i j then q
	else 
	  let (c,i) =Utf8.next s i in
308
	  Chars.V.print_in_string ppf (Chars.V.mk_int c);
309
310
311
312
313
314
	  aux i
      in
      let q = aux i in
(*      Format.fprintf ppf "}"; *)
      print_quoted_str ppf q
  | q -> q
315
and print_seq ppf = function
316
  | (Pair(Char _, _)|String_latin1 (_,_,_,_)|String_utf8 (_,_,_,_)) as s ->
317
      Format.fprintf ppf "'";
318
319
320
321
322
323
      let q = print_quoted_str ppf s in
      Format.fprintf ppf "'@ ";
      print_seq ppf q
  | Pair (x,y) -> 
      Format.fprintf ppf "@[%a@]@ " print x;
      print_seq ppf y
324
325
  | _ -> ()

326
and print_xml ppf tag attr content =
327
328
329
330
331
332
333
334
335
336
  if is_seq content then
    Format.fprintf ppf "@[<hv2><%a%a>[@ %a@]]"
      print_tag tag
      print_attr attr
      print_seq content
  else
    Format.fprintf ppf "@[<hv2><%a%a>@ %a@]"
      print_tag tag
      print_attr attr
      print content
337
and print_tag ppf = function
338
  | Atom tag -> Atoms.V.print ppf tag
339
340
  | tag -> Format.fprintf ppf "(%a)" print tag
and print_attr ppf = function
341
  | Record attr -> print_record ppf (Imap.elements attr)
342
  | attr -> Format.fprintf ppf "(%a)" print attr
343
344
345

and print_record ppf = function
  | [] -> ()
346
  | f :: rem -> Format.fprintf ppf " %a" print_field f; print_record ppf rem
347
348

and print_field ppf (l,v) = 
349
  Format.fprintf ppf "%a=%a" Label.print (LabelPool.value l) print v  
350

351
352
353
354
355
let dump_xml ppf v =
  let rec aux ppf = function
    | Pair (x, y) ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<pair>@,%a@,%a@,</pair>@]" aux x aux y
356
    | Xml (x, y, z) | XmlNs (x,y,z,_) ->
357
358
359
360
361
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<xml>@,%a@,%a@,%a@,</xml>@]" aux x aux y aux z
    | Record x ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<record>@,%a@,</record>@]"
362
          (fun ppf x -> print_record ppf (Imap.elements x)) x
363
364
    | Atom a ->
        Format.fprintf ppf "@[<hv1>";
365
        Format.fprintf ppf "<atom>@,%a@,</atom>@]"
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
          (fun ppf x -> Atoms.V.print ppf x) a
    | Integer i ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<integer>@,%a@,</integer>@]"
          (fun ppf x -> Intervals.V.print ppf x) i
    | Char c ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<char>@,%a@,</char>@]"
          (fun ppf x -> Chars.V.print ppf x) c
    | Abstraction _ ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<abstraction />@]"
    | Abstraction2 _ ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<abstraction2 />@]"
381
382
    | Abstract (s,_) ->
	Format.fprintf ppf "<abstract>%s</abstract>" s
383
384
    | String_latin1 (_, _, s, v) ->
        Format.fprintf ppf "@[<hv1>";
385
        Format.fprintf ppf "<string_latin1>@,%s@,</string_latin1>@," s;
386
387
388
389
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<follow>@,%a@,</follow>@]</string_latin1>@]" aux v
    | String_utf8 (_, _, s, v) ->
        Format.fprintf ppf "@[<hv1>";
390
391
        Format.fprintf ppf "<string_utf8>@,%s@,</string_utf8>@,"
          (Utf8.get_str s);
392
393
394
395
396
397
398
399
400
401
402
403
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<follow>@,%a@,</follow>@]</string_utf8>@]" aux v
    | Concat (x, y) ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<concat>@,%a@,%a@,</concat>@]" aux x aux y
    | Absent ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<absent />@]"
  in
  Format.fprintf ppf "@[<hv1>";
  Format.fprintf ppf "<value>@,%a@,</value>@]" aux v

404
405
406
407
let rec compare x y =
  if (x == y) then 0
  else
    match (x,y) with
408
      | Pair (x1,x2), Pair (y1,y2) ->
409
410
	  let c = compare x1 y1 in if c <> 0 then c 
	  else compare x2 y2	    
411
412
      | (Xml (x1,x2,x3) | XmlNs (x1,x2,x3,_)), 
	  (Xml (y1,y2,y3) | XmlNs(y1,y2,y3,_)) ->
413
	  let c = compare x1 y1 in if c <> 0 then c 
414
	  else let c = compare x2 y2 in if c <> 0 then c 
415
	  else compare x3 y3
416
      | Record rx, Record ry -> Imap.compare compare rx ry
417
418
419
      | Atom x, Atom y -> Atoms.V.compare x y
      | Integer x, Integer y -> Intervals.V.compare x y
      | Char x, Char y -> Chars.V.compare x y
420
      | Abstraction (_,_), _
421
      | _, Abstraction (_,_)
422
423
424
      | Abstraction2 (_,_,_), _
      | _, Abstraction2 (_,_,_) -> 
	  raise (CDuceExn (string_latin1 "comparing functional values"))
425
426
      | Abstract (s1,v1), Abstract (s2,v2) ->
	  let c = Types.Abstract.T.compare s1 s2 in if c <> 0 then c 
427
428
	  else Pervasives.compare (Obj.magic v1 : float) (Obj.magic v2 : float)
	    (* raise (CDuceExn (string_latin1 "comparing abstract values")) *)
429
430
431
432
      | Absent,_ | _,Absent ->
	  Format.fprintf Format.std_formatter
	    "ERR: Compare %a %a@." print x print y;
	  assert false
433
434
      | Concat (_,_) as x, y -> eval_lazy_concat x; compare x y
      | x, (Concat (_,_) as y) -> eval_lazy_concat y; compare x y
435
      | String_latin1 (ix,jx,sx,qx), String_latin1 (iy,jy,sy,qy) ->
436
437
438
439
440
441
442
	  if (sx == sy) && (ix = iy) && (jx = jy) then compare qx qy
	  else
	    (* Note: we would like to compare first jx-ix and jy-iy,
	       but this is not compatible with the equivalence of values *)
	    let rec aux ix iy =
	      if (ix = jx) then
		if (iy = jy) then compare qx qy
443
		else compare qx (normalize_string_latin1 iy jy sy qy)
444
	      else
445
		if (iy = jy) then compare (normalize_string_latin1 ix jx sx qx) qy
446
447
448
449
450
451
452
		else
		  let c1 = String.unsafe_get sx ix
		  and c2 = String.unsafe_get sy iy in
		  if c1 < c2 then -1 else
		    if c1 > c2 then 1 else aux (ix + 1) (iy + 1)
	    in
	    aux ix iy
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
      | String_utf8 (ix,jx,sx,qx), String_utf8 (iy,jy,sy,qy) ->
	  if (sx == sy) && (Utf8.equal_index ix iy) && (Utf8.equal_index jx jy) then compare qx qy
	  else
	    let rec aux ix iy =
	      if (Utf8.equal_index ix jx) then
		if (Utf8.equal_index iy jy) then compare qx qy
		else compare qx (normalize_string_utf8 iy jy sy qy)
	      else
		if (Utf8.equal_index iy jy) then compare (normalize_string_utf8 ix jx sx qx) qy
		else
		  let (c1,ix) = Utf8.next sx ix in
		  let (c2,iy) = Utf8.next sy iy in
		  if c1 < c2 then -1 else
		    if c1 > c2 then 1 else aux ix iy
	    in
	    aux ix iy
      | String_latin1 (i,j,s,q), _ -> compare (normalize_string_latin1 i j s q) y
      | _, String_latin1 (i,j,s,q) -> compare x (normalize_string_latin1 i j s q)
      | String_utf8 (i,j,s,q), _   -> compare (normalize_string_utf8 i j s q) y
      | _, String_utf8 (i,j,s,q)   -> compare x (normalize_string_utf8 i j s q)
473
474

      | Pair (_,_), _ -> -1 | _, Pair(_,_) -> 1
475
476
      | (Xml (_,_,_) | XmlNs (_,_,_,_)),_ -> -1 
      | _, (Xml(_,_,_) | XmlNs(_,_,_,_)) -> 1
477
478
479
      | Record _,_ -> -1 | _, Record _ -> 1
      | Atom _,_ -> -1 | _, Atom _ -> 1
      | Integer _,_ -> -1 | _, Integer _ -> 1
480
      | Abstract _, _ -> -1 | _, Abstract _ -> 1
481

482
483
let rec hash = function
  | Pair (x1,x2) ->
484
      1 + hash x1 * 257 + hash x2 * 17 
485
  | (Xml (x1,x2,x3) | XmlNs (x1,x2,x3,_)) ->
486
487
      2 + hash x1 * 65537 + hash x2 * 257 + hash x3 * 17
  | Record rx -> 
488
      3 + 17 * Imap.hash hash rx
489
490
491
492
493
494
  | Atom x -> 
      4 + 17 * Atoms.V.hash x
  | Integer x -> 
      5 + 17 * Intervals.V.hash x
  | Char x -> 
      6 + 17 * Chars.V.hash x
495
  | Abstraction _
496
497
  | Abstraction2 _ -> 7
  | Abstract _ -> 8
498
499
500
501
502
  | Absent -> assert false
  | Concat _ as x -> eval_lazy_concat x; hash x
  | String_latin1 (i,j,s,q) -> hash (normalize_string_latin1 i j s q)
  | String_utf8 (i,j,s,q) ->hash (normalize_string_utf8 i j s q)

503
504
505
let iter_xml pcdata_callback other_callback =
  let rec aux = function
    | v when compare v nil = 0 -> ()
506
    | Pair (Char c, tl) ->
507
        pcdata_callback (U.mk_char (Chars.V.to_int c));
508
        aux tl
509
510
    | String_latin1 (i,j,s,tl) ->
        pcdata_callback (U.mk_latin1 (String.sub s i j));
511
        aux tl
512
    | String_utf8 (i,j,s,tl) ->
513
514
515
516
517
        pcdata_callback (U.mk (U.get_substr s i j));
        aux tl
    | Pair (hd, tl) ->
        other_callback hd;
        aux tl
518
    | Concat (_,_) as v -> eval_lazy_concat v; aux v
519
    | v -> raise (Invalid_argument "Value.iter_xml")
520
521
  in
  function
522
    | Xml (_,_,cont) | XmlNs (_,_,cont,_) -> aux cont
523
    | _ -> raise (Invalid_argument "Value.iter_xml")
524

525
(*
526
527
528
529
530
531
let map_xml map_pcdata map_other =
  let patch_string_utf8 cont = function
    | String_utf8 (i, j, u, v) when compare v nil = 0 ->
        String_utf8 (i, j, u, cont)
    | _ -> assert false
  in
532
533
534
535
536
  let rec aux v =
    match v with
    | Pair (Char _, _) | String_latin1 _ | String_utf8 _ ->
        let (u, rest) = get_string_utf8 v in
        patch_string_utf8 (aux rest) (string_utf8 (map_pcdata u))
537
538
539
540
541
542
543
544
    | Pair (hd, tl) -> Pair (map_other hd, aux tl)
    | Concat (_,_) as v -> eval_lazy_concat v; aux v
    | v when compare v nil = 0 -> v
    | v -> raise (Invalid_argument "Value.map_xml")
  in
  function
    | Xml (tag,attrs,cont) -> Xml (tag, attrs, aux cont)
    | _ -> raise (Invalid_argument "Value.map_xml")
545
*)
546
547
548

let tagged_tuple tag vl =
  let ct = sequence vl in
549
  let at = Record Imap.empty in
550
551
552
  let tag = Atom (Atoms.V.mk_ascii tag) in
  Xml (tag, at, ct)

553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
  (** set of values *)

type tmp = t
module OrderedValue =
  struct
    type t = tmp
    let compare = compare
  end
module ValueSet = Set.Make(OrderedValue)

let ( |<| ) x y = compare x y < 0
let ( |>| ) x y = compare x y > 0
let ( |<=| ) x y = let c = compare x y in c < 0 || c = 0
let ( |>=| ) x y = let c = compare x y in c > 0 || c = 0
let ( |=| ) x y = compare x y = 0
568
let equal = ( |=| )
569
let ( |<>| ) x y = compare x y <> 0
570

571
572
573
574
575
576
(*
let rec concat l1 l2 = match l1 with
  | Pair (x,y) -> Pair (x, concat y l2)
  | String_latin1 (s,i,j,q) -> String_latin1 (s,i,j, concat q l2)
  | String_utf8 (s,i,j,q) -> String_utf8 (s,i,j, concat q l2)
  | q -> l2
577

578
579
580
let rec flatten = function
  | Pair (x,y) -> concat x (flatten y)
  | q -> q
581

582
*)
583
584


585
let () = dump_forward := dump_xml
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605

let get_pair v =
  match normalize v with
    | Pair (x,y) -> (x,y)
    | _ -> assert false

(* TODO: tail-rec version of get_sequence *)

let rec get_sequence v =
  match normalize v with
    | Pair (x,y) -> x :: (get_sequence y)
    | _ -> []

let rec get_sequence_rev accu v =
  match normalize v with
    | Pair (x,y) -> get_sequence_rev (x::accu) y
    | _ -> accu

let get_sequence_rev v = get_sequence_rev [] v

606
607
608
609
610
let rec fold_sequence f accu v =
  match normalize v with
    | Pair (x,y) -> fold_sequence f (f accu x) y
    | _ -> accu

611
612
613
614
615
616
617
618
619
620
621
622
623
let atom_ascii s =
  Atom (Atoms.V.mk_ascii s)

let get_variant = function
  | Atom a -> Atoms.V.get_ascii a, None
  | v -> match normalize v with
      | Pair (Atom a,x) -> Atoms.V.get_ascii a, Some x
      | _ -> assert false

let label_ascii s =
  LabelPool.mk (Ns.empty, U.mk s)

let record l =
624
  Record (Imap.create (Array.of_list l))
625

626
627
628
let record_ascii l =
  record (List.map (fun (l,v) -> (label_ascii l, v)) l)

629
630
631

let get_field v l =
  match v with
632
    | Record fields -> Imap.find fields l
633
    | _ -> raise Not_found
634

635
636
let get_field_ascii v l = get_field v (label_ascii l)

637
638
639
640
641
642
643
let abstract a v =
  Abstract (a,Obj.repr v)

let get_abstract = function
  | Abstract (_,v) -> Obj.magic v
  | _ -> assert false
  
644

645
646
647
let get_label = LabelPool.mk (Ns.empty, U.mk "get") 
let set_label = LabelPool.mk (Ns.empty, U.mk "set")
let mk_rf ~get ~set =
648
  Imap.create [| get_label, get; set_label, set |]
649

650
651
let mk_ref t v =
  let r = ref v in
652
653
  let get = Abstraction (Some [Sequence.nil_type, t], fun _ -> !r)
  and set = Abstraction (Some [t, Sequence.nil_type], fun x -> r := x; nil) in
654
  Record (mk_rf ~get ~set)
655
656
657


let mk_ext_ref t get set =
658
659
660
661
662
663
  let get = Abstraction (
    (match t with Some t -> Some [Sequence.nil_type, t] | None -> None),
    fun _ -> get ())
  and set = Abstraction (
    (match t with Some t -> Some [t, Sequence.nil_type] | None -> None), 
    fun v -> set v; nil) in
664
  Record (mk_rf ~get ~set)
665
  
666
667
668
669
670
671
672
673

let ocaml2cduce_int i =
  Integer (Intervals.V.from_int i)

let cduce2ocaml_int = function
  | Integer i -> Intervals.V.get_int i
  | _ -> assert false

674
675
676
677
678
679
680
681
682
683
684
685
686
687
let ocaml2cduce_int32 i =
  Integer (Intervals.V.from_int32 i)

let cduce2ocaml_int32 = function
  | Integer i -> Intervals.V.to_int32 i
  | _ -> assert false

let ocaml2cduce_int64 i =
  Integer (Intervals.V.from_int64 i)

let cduce2ocaml_int64 = function
  | Integer i -> Intervals.V.to_int64 i
  | _ -> assert false

688
let ocaml2cduce_string s = string_latin1 (String.copy s)
689

690
let cduce2ocaml_string = get_string_latin1 (* Result is already fresh *)
691

692
let ocaml2cduce_string_utf8 s = string_utf8 (U.mk (String.copy (U.get_str s)))
693

694
let cduce2ocaml_string_utf8 s = fst (get_string_utf8 s) (* Result is already fresh *)
695

696
697
698
let ocaml2cduce_char c =
  Char (Chars.V.mk_char c)

699
700
701
let ocaml2cduce_wchar c =
  Char (Chars.V.mk_int c)

702
703
704
let cduce2ocaml_char = function
  | Char c -> Chars.V.to_char c 
  | _ -> assert false
705

706
707
708
709
710
711
let ocaml2cduce_bigint i =
  Integer (Intervals.V.from_bigint i)

let cduce2ocaml_bigint = function
  | Integer i -> Intervals.V.get_bigint i
  | _ -> assert false
712
713
714

let ocaml2cduce_atom ns l =
  Atom (Atoms.V.mk (Ns.mk ns) l)
715
716
717
718

let print_utf8 v =
  print_string (U.get_str v);
  flush stdout
719

720
721

let float n =
722
  Abstract ("float", Obj.repr n)
723
724
725
726
727
728
729
730
731
732
733
734
735

let cduce2ocaml_option f v =
  match normalize v with
    | Pair (x,y) -> Some (f x)
    | _ -> None


let ocaml2cduce_option f = function
  | Some x -> Pair (f x, nil)
  | None -> nil



736
737
let add v1 v2 = match (v1,v2) with
  | (Integer x, Integer y) -> Integer (Intervals.V.add x y)
738
  | (Record r1, Record r2) -> Record (Imap.merge r1 r2)
739
740
  | _ -> assert false

741
let merge v1 v2 = match (v1,v2) with
742
  | (Record r1, Record r2) -> Record (Imap.merge r1 r2)
743
744
  | _ -> assert false

745
746
747
748
let sub v1 v2 = match (v1,v2) with
  | (Integer x, Integer y) -> Integer (Intervals.V.sub x y)
  | _ -> assert false

749
750
751
752
753
754
755
756
let mul v1 v2 = match (v1,v2) with
  | (Integer x, Integer y) -> Integer (Intervals.V.mult x y)
  | _ -> assert false

let div v1 v2 = match (v1,v2) with
  | (Integer x, Integer y) -> Integer (Intervals.V.div x y)
  | _ -> assert false

757
758
759
760
let modulo v1 v2 = match (v1,v2) with
  | (Integer x, Integer y) -> Integer (Intervals.V.modulo x y)
  | _ -> assert false

761

762
763
764
765
766
767
768
let pair v1 v2 = Pair (v1,v2)
let xml v1 v2 v3 = Xml (v1,v2,v3)

let mk_record labels fields =
  let l = ref [] in
  assert (Array.length labels == Array.length fields);
  for i = 0 to Array.length labels - 1 do
769
    l := (labels.(i),fields.(i)) :: !l;
770
  done;
771
  record !l
772
  
773

774
775
776
777
(* TODO: optimize cases
     - (f x = [])
     - all chars copied or deleted *)

778
let rec transform_aux f accu = function
779
  | Pair (x,y) -> let accu = concat accu (f x) in transform_aux f accu y
780
781
782
783
  | Atom _ -> accu
  | v -> transform_aux f accu (normalize v)

let transform f v = transform_aux f nil v
784

785
786
787
788
789
790
791
792
793
794
795
796
797

let rec xtransform_aux f accu = function
  | Pair (x,y) -> 
      let accu = match f x with
	| Absent ->
	    let x = match x with
	      | Xml (tag, attr, child) -> 
		  let child = xtransform_aux f nil child in
		  Xml (tag, attr, child)
	      | XmlNs (tag, attr, child, ns) ->
		  let child = xtransform_aux f nil child in
		  XmlNs (tag, attr, child, ns)
	      | x -> x in
798
799
	    concat accu (Pair (x,nil))
	| x -> concat accu x
800
801
802
803
804
805
806
      in 
      xtransform_aux f accu y
  | Atom _ -> accu
  | v -> xtransform_aux f accu (normalize v)

let xtransform f v = xtransform_aux f nil v

807
let remove_field l = function
808
  | Record r -> Record (Imap.remove r l)
809
  | _ -> assert false
810
811
812
813
814

let rec ocaml2cduce_list f = function
  | [] -> nil
  | hd::tl -> Pair (f hd, ocaml2cduce_list f tl)

815
816
let rec cduce2ocaml_list f v =
  match normalize v with
817
    | Pair (x,y) -> f x :: (cduce2ocaml_list f y)
818
819
    | _ -> []

820
let no_attr = Record Imap.empty
821

822
823
let ocaml2cduce_constr tag va = 
  Xml (tag, no_attr, sequence_of_array va)
824

825
826
827
let rec cduce2ocaml_constr m = function
  | Atom v ->
      Obj.repr (Atoms.get_map v m)
828
  | Xml (Atom v,_,f) | XmlNs (Atom v,_,f,_) ->
829
830
831
832
	let tag = Atoms.get_map v m in
	let x = Obj.repr (Array.of_list (get_sequence f)) in
	Obj.set_tag x tag;
	x
833
  | _ -> assert false
834

835
836
837
838
839
840
841
842
843
let rec cduce2ocaml_variant m = function
  | Atom v ->
      Obj.repr (Atoms.get_map v m)
  | Xml (Atom v,_,f) | XmlNs (Atom v,_,f,_) ->
	let tag = Atoms.get_map v m in
	let (x,_) = get_pair f in
	Obj.repr (tag,x)
  | _ -> assert false

844
      
845
846
847
848
849
850
851
852
853
854
let ocaml2cduce_fun farg fres f =
  Abstraction (None, fun x -> fres (f (farg x)))

let cduce2ocaml_fun farg fres = function
  | Abstraction (_, f) -> (fun x -> fres (f (farg x)))
  | _ -> assert false

let apply f arg = match f with
  | Abstraction (_,f) -> f arg
  | _ -> assert false