value.ml 29.9 KB
Newer Older
1
open Ident
2
open Encodings
3

Pietro Abate's avatar
Pietro Abate committed
4
type iface = (Types.t * Types.t) list
5
6
7
type sigma =
  | List of Types.Tallying.CS.sl
  | Comp of (sigma * sigma)
Pietro Abate's avatar
Pietro Abate committed
8
  | Sel of (int * iface * sigma)
9
  | Identity
10
  | Mono
11

12
and t =
13
14
15
16
  | Pair of t * t * sigma
  | Xml of t * t * t * sigma
  | XmlNs of t * t * t * Ns.table * sigma
  | Record of t Imap.t * sigma
17
18
19
  | Atom of Atoms.V.t
  | Integer of Intervals.V.t
  | Char of Chars.V.t
20
  | Abstraction of (Types.descr * Types.descr) list option * (t -> t) * sigma
21
  | Abstract of Types.Abstract.V.t
22
  | String_latin1 of int * int * string * t
23
  | String_utf8 of Utf8.uindex * Utf8.uindex * Utf8.t * t
24
  | Concat of t * t
25
  | Absent
26

27
28
29
30
31
32
33
34
35
36
37
38
let rec domain = function
  | Identity | Mono -> Var.Set.empty
  | List(l) -> Types.Tallying.domain l
  | Comp(s1,s2) -> Var.Set.union (domain s1) (domain s2)
  | Sel(_,_,sigma) -> (domain sigma)

let rec codomain = function
  | Identity | Mono -> Var.Set.empty
  | List(l) -> Types.Tallying.codomain l
  | Comp(s1,s2) -> Var.Set.union (codomain s1) (codomain s2)
  | Sel(_,_,sigma) -> (codomain sigma)

Julien Lopez's avatar
Julien Lopez committed
39
(* Comp for Value.sigma but simplify if possible. *)
Julien Lopez's avatar
Julien Lopez committed
40
let rec comp s1 s2 = match s1, s2 with
41
42
  | Identity, _ | Mono, _ -> s2
  | _, Identity | _, Mono -> s1
Julien Lopez's avatar
Julien Lopez committed
43

Julien Lopez's avatar
Julien Lopez committed
44
  | Comp(s3, s4), List(_) -> (match comp s4 s2 with
45
      | Comp(_) as s5 when s4 = s5 -> s1
Julien Lopez's avatar
Julien Lopez committed
46
47
48
      | Comp(_) -> Comp(s1, s2)
      | res -> comp s3 res)
  | List(_), Comp(s3, s4) | Sel(_), Comp(s3, s4) -> (match comp s1 s3 with
49
      | Comp(_) as s5 when s3 = s5 -> s2
Julien Lopez's avatar
Julien Lopez committed
50
51
52
53
54
55
      | Comp(_) -> Comp(s1, s2)
      | res -> comp res s4)
  | Comp(s3, s4), Comp(s5, s6) -> (match comp s4 s5 with
      | Comp(_) -> Comp(s1, s2)
      | res -> comp s3 (comp res s6))

Julien Lopez's avatar
Julien Lopez committed
56
57
58
59
  (* If a variable in the image of s2 is in the domain of s1 we can't simplify *)
  | _, _ when not (Var.Set.is_empty (Var.Set.inter (domain s1) (codomain s2)))
      -> Comp(s1, s2)

60
  | List(_), List(_) | Sel(_), List(_) ->
Julien Lopez's avatar
Julien Lopez committed
61
62
    if Var.Set.subset (domain s1) (domain s2) then s2 else Comp(s1, s2)

Julien Lopez's avatar
Julien Lopez committed
63
  (* Default: comp s1 s2 -> Comp(s1, s2). *)
64
65
  | _, _ -> Comp(s1, s2)

66
67
68
69
70
71
(* 
  The only representation of the empty sequence is nil.
  In particular, in String_latin1 and String_utf8, the string cannot be empty.
*)

let dump_forward = ref (fun _ _ -> assert false)
72

73
74
exception CDuceExn of t

75
let nil = Atom Sequence.nil_atom
76
77
78
79
80
81
82
83
84
let string_latin1 s = 
  if String.length s = 0 then nil
  else String_latin1 (0,String.length s, s, nil)
let string_utf8 s = 
  if String.length (Utf8.get_str s) = 0 then nil
  else String_utf8 (Utf8.start_index s,Utf8.end_index s, s, nil)
let substring_utf8 i j s q =
  if Utf8.equal_index i j then q 
  else String_utf8 (i,j,s,q)
85
86
let vtrue = Atom (Atoms.V.mk_ascii "true")
let vfalse = Atom (Atoms.V.mk_ascii "false")
87
88
let vbool x = if x then vtrue else vfalse

89
let vrecord l =
90
  let l = List.map (fun (lab,v) -> Upool.int lab, v) l in
91
  Record (Imap.create (Array.of_list l),Mono)
92
93

let get_fields = function
94
  | Record (map,_) -> Obj.magic (Imap.elements map)
95
  | _ -> raise (Invalid_argument "Value.get_fields")
96

97
98
let rec sequence = function
  | [] -> nil
99
  | h::t -> Pair (h, sequence t,Mono)
100

101
102
let rec sequence_rev accu = function
  | [] -> accu
103
  | h::t -> sequence_rev (Pair (h,accu,Mono)) t
104
105
106

let sequence_rev l = sequence_rev nil l

107
108
109
let sequence_of_array a =
  let rec aux accu i =
    if (i = 0) then accu
110
    else let i = pred i in aux (Pair (a.(i), accu,Mono)) i in
111
112
  aux nil (Array.length a)

113
114
115
let tuple_of_array a =
  let rec aux accu i =
    if (i = 0) then accu
116
    else let i = pred i in aux (Pair (a.(i), accu,Mono)) i in
117
118
119
  let n = Array.length a in
  aux a.(n) (pred n)

120
121
122
123
let concat v1 v2 = 
  match (v1,v2) with
    | (Atom _, v) | (v, Atom _) -> v
    | (v1,v2) -> Concat (v1,v2)
124

125
let append v1 v2 =
126
  concat v1 (Pair (v2,nil,Mono))
127

128
let failwith' s = raise (CDuceExn (string_latin1 s))
129

130
let rec const = function
131
132
133
  | Types.Integer i -> Integer i
  | Types.Atom a -> Atom a
  | Types.Char c -> Char c
134
135
  | Types.Pair (x,y) -> Pair (const x, const y,Mono)
  | Types.Xml (x, Types.Pair (y, z)) -> Xml (const x, const y, const z,Mono)
136
  | Types.Xml (_,_) -> assert false
137
  | Types.Record x -> 
138
      let x = LabelMap.mapi_to_list (fun l c -> (Upool.int l,const c)) x in
139
      Record (Imap.create (Array.of_list x),Mono)
140
141
  | Types.String (i,j,s,c) -> String_utf8 (i,j,s, const c)

142
let rec inv_const = function
143
144
  | Pair (x, y,_) -> Types.Pair (inv_const x, inv_const y)
  | Xml (x, y, z,_) | XmlNs (x,y,z,_,_) ->
145
      Types.Pair (inv_const x, Types.Pair (inv_const y, inv_const z))
146
  | Record (x,_) -> 
147
      let x = Imap.elements x in
148
      let x = List.map (fun (l,c) -> (Label.from_int l,inv_const c)) x in
149
      Types.Record (LabelMap.from_list_disj x)
150
151
152
153
154
155
156
157
158
159
160
161
162
163
  | Atom a -> Types.Atom a
  | Integer i -> Types.Integer i
  | Char c -> Types.Char c
  | String_latin1 (_, _, s, v) ->
      let s = Utf8.mk s in
      Types.String (Utf8.start_index s, Utf8.end_index s, s, inv_const v)
  | String_utf8 (i, j, s, v) -> Types.String (i, j, s, inv_const v)
  | Concat (x, y) as v ->
      let rec children = function
        | Concat (x, y) -> children x @ children y
        | x -> [x]
      in
      inv_const (sequence (children v))
  | _ -> failwith "inv_const"
164
165
166

let normalize_string_latin1 i j s q = 
  if i = j then q else
167
    Pair (Char (Chars.V.mk_char (String.unsafe_get s i)), String_latin1 (succ i,j,s,q),Mono)
168
169

let normalize_string_utf8 i j s q = 
170
  if Utf8.equal_index i j then q
171
172
  else
    let (c,i) = Utf8.next s i in
173
    Pair (Char (Chars.V.mk_int c), String_utf8 (i,j,s,q),Mono)
174
175
176
177
178
179
180
181
182
183
184

(***** The dirty things **********)

type pair = { dummy : t; mutable pair_tl : t }
type str  = { dummy1 : t; dummy2 : t; dummy3 : t; mutable str_tl : t }

(* Could optimize this function by changing the order of the fields
   in String_latin1, String_utf8 *)

let set_cdr cell tl =
  match cell with
185
    | Pair (_,_,_) -> (Obj.magic cell).pair_tl <- tl
186
187
188
189
190
191
192
193
    | String_latin1 (_,_,_,_) 
    | String_utf8(_,_,_,_)-> (Obj.magic cell).str_tl <- tl
    | _ -> assert false

let rec append_cdr cell tl =
  match tl with
    | Concat (x,y) ->
	append_cdr (append_cdr cell x) y
194
195
    | Pair (x,tl,sigma) -> 
	let cell' = Pair (x,Absent,sigma) in
196
197
198
199
200
201
202
203
204
205
206
207
208
209
	set_cdr cell cell';
	append_cdr cell' tl
    | String_latin1 (s,i,j,tl) ->
	let cell' = String_latin1 (s,i,j,Absent) in
	set_cdr cell cell';
	append_cdr cell' tl
    | String_utf8 (s,i,j,tl) ->
	let cell' = String_utf8 (s,i,j,Absent) in
	set_cdr cell cell';
	append_cdr cell' tl
    | _ -> cell


let rec flatten = function
210
  | Pair (x,y,_) -> concat x (flatten y)
211
212
213
214
  | Concat (x,y) -> concat (flatten x) (flatten y)
  | q -> q

let eval_lazy_concat v =
215
  let accu = Obj.magic (Pair (nil,Absent,Mono)) in
216
217
218
219
220
221
  let rec aux accu = function
    | Concat (x,y) -> aux (append_cdr accu x) y
    | v -> set_cdr accu v
  in
  aux accu v;
  let nv = match snd accu with 
222
    | Pair (_,_,sigma) as nv -> nv
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
    | String_latin1 (i,j,s,q) -> normalize_string_latin1 i j s q
    | String_utf8 (i,j,s,q) -> normalize_string_utf8 i j s q
    | _ -> assert false in
  let v = Obj.repr v in
  let nv = Obj.repr nv in
  Obj.set_tag v (Obj.tag nv);
  Obj.set_field v 0 (Obj.field nv 0);
  Obj.set_field v 1 (Obj.field nv 1)
	  
  
(******************************)

let normalize = function
  | String_latin1 (i,j,s,q) -> normalize_string_latin1 i j s q
  | String_utf8 (i,j,s,q) -> normalize_string_utf8 i j s q
  | Concat (_,_) as v -> eval_lazy_concat v; v
239
  | v -> v
240

241
242
243
244
245
246
let buf = Buffer.create 100

let rec add_buf_utf8_to_latin1 src i j =
  if Utf8.equal_index i j  then ()
  else
    let (c,i) = Utf8.next src i in
247
    if (c > 255) then failwith' "get_string_latin1";
248
249
250
    Buffer.add_char buf (Char.chr c);
    add_buf_utf8_to_latin1 src i j

251
252
253
let rec add_buf_latin1_to_utf8 src i j =
  for k = i to j - 1 do 
    Utf8.store buf (Char.code src.[k]) 
254
255
256
257
  done

let get_string_latin1 e =
  let rec aux = function
258
    | Pair (Char x,y,_) -> Buffer.add_char buf (Chars.V.to_char x); aux y
259
260
    | String_latin1 (i,j,src,y) -> Buffer.add_substring buf src i (j - i); aux y
    | String_utf8 (i,j,src,y) -> add_buf_utf8_to_latin1 src i j; aux y
261
    | Concat (_,_) as v -> eval_lazy_concat v; aux v
262
    | _ -> () in
263
  Buffer.clear buf;
264
265
266
267
268
269
270
  aux e;
  let s = Buffer.contents buf in
  Buffer.clear buf;
  s

let get_string_utf8 e =
  let rec aux = function
271
    | Pair (Char x,y,_) -> Utf8.store buf (Chars.V.to_int x); aux y
272
    | String_latin1 (i,j,src,y) -> add_buf_latin1_to_utf8 src i j; aux y
273
    | String_utf8 (i,j,src,y) -> Utf8.copy buf src i j; aux y
274
    | Concat (_,_) as v -> eval_lazy_concat v; aux v
275
276
    | q -> q in
  let q = aux e in
277
278
  let s = Buffer.contents buf in
  Buffer.clear buf;
279
  (Utf8.mk s, q)
280

281
let get_int = function
282
  | Integer i when Intervals.V.is_int i -> Intervals.V.get_int i
283
  | _ -> raise (Invalid_argument "Value.get_int")
284

285
286
287
288
let get_integer = function
  | Integer i -> i
  | _ -> assert false

289
let rec is_seq = function
290
  | Pair (_, y,_) when is_seq y -> true
291
  | Atom a when a = Sequence.nil_atom -> true
292
  | String_latin1 (_,_,_,y) | String_utf8 (_,_,_,y) when is_seq y  -> true
293
  | Concat (_,_) as v -> eval_lazy_concat v; is_seq v
294
295
296
  | _ -> false

let rec is_str = function
297
  | Pair (Char _, y,_) -> is_str y
298
  | Atom a when a = Sequence.nil_atom -> true
299
  | String_latin1 (_,_,_,q) | String_utf8(_,_,_,q) -> is_str q
300
  | Concat (_,_) as v -> eval_lazy_concat v; is_str v
301
302
  | _ -> false

Pietro Abate's avatar
Pietro Abate committed
303
304
305
306
307
308
module Print = struct

  let rec pp_sigma ppf =
    let pp_aux ppf =
      Utils.pp_list (fun ppf (t1,t2) ->
        Format.fprintf ppf "(%a -> %a)"
309
310
        Types.Print.pp_type t1
        Types.Print.pp_type t2
Pietro Abate's avatar
Pietro Abate committed
311
312
313
314
315
316
317
318
319
320
321
322
      ) ppf
    in
    function
    |List ll -> Types.Tallying.CS.pp_sl ppf ll
    |Comp(s1,s2) -> Format.fprintf ppf "Comp(%a,%a)" pp_sigma s1 pp_sigma s2
    |Sel(x,iface,s) -> Format.fprintf ppf "Sel(%d,%a,%a)" x pp_aux iface pp_sigma s
    |Identity -> Format.fprintf ppf "Id"
    |Mono -> Format.fprintf ppf "Mono"

  let pp_iface ppf l =
    let f ppf (t1,t2) =
      Format.fprintf ppf "(%a,%a)"
323
324
      Types.Print.pp_type t1
      Types.Print.pp_type t2
Pietro Abate's avatar
Pietro Abate committed
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
    in
    Utils.pp_list f ppf l

  let rec pp_value ppf = function
    | Pair(v1, v2, sigma) ->
        Format.fprintf ppf "(%a,%a,%a)"
        pp_value v1
        pp_value v2
        pp_sigma sigma
    | Xml(_,_,_,sigma) -> Format.fprintf ppf "Xml(%a)" pp_sigma sigma
    | XmlNs(_,_,_,_,sigma) -> Format.fprintf ppf "XmlNs(%a)" pp_sigma sigma
    | Record(_,sigma) -> Format.fprintf ppf "Record(%a)" pp_sigma sigma
    | Atom(a) -> Format.fprintf ppf "Atom(%a)" Atoms.V.print a
    | Integer(i) -> Format.fprintf ppf "%d" (Intervals.V.get_int i)
    | Char(i) -> Format.fprintf ppf "'%c'" (Chars.V.to_char i)
    | Abstraction(None, _, sigma) ->
        Format.fprintf ppf  "Abstraction(None,%a)" pp_sigma sigma
    | Abstraction(Some t, _, sigma) ->
        Format.fprintf ppf  "Abstraction(%a,%a)"
        pp_iface t
        pp_sigma sigma
    | Abstract((name, _)) -> Format.fprintf ppf "Abstract(%s)" name
    | String_latin1(_,_,s,_) -> Format.fprintf ppf "\"%s\"" s
    | String_utf8(_,_,s,_) -> Format.fprintf ppf "\"%s\"" (Encodings.Utf8.get_str s)
    | Concat(v1, v2) ->
        Format.fprintf ppf "Concat(%a, %a)"
        pp_value v1
        pp_value v2
    | Absent -> Format.fprintf ppf "Absent"

  let string_of_value = Utils.string_of_formatter pp_value

end
358

359
let rec print ppf v =
360
361
  if is_str v then 
    (Format.fprintf ppf "\"";
Pietro Abate's avatar
Pietro Abate committed
362
     ignore (pp_quoted_str ppf v);
363
     Format.fprintf ppf "\"")
Pietro Abate's avatar
Pietro Abate committed
364
  else if is_seq v then Format.fprintf ppf "[ @[<hv>%a@]]" pp_seq v
365
  else  match v with
366
    | Pair (x,y,_) -> Format.fprintf ppf "(%a,%a)" print x print y
Pietro Abate's avatar
Pietro Abate committed
367
    | Xml (x,y,z,sigma) | XmlNs (x,y,z,_,sigma) -> pp_xml ppf x y z (* sigma *)
368
    | Record (l,_) -> Format.fprintf ppf "@[{%a }@]" pp_record (Imap.elements l)
Pietro Abate's avatar
Pietro Abate committed
369
    | Atom a ->	Atoms.V.print ppf a
370
371
    | Integer i -> Intervals.V.print ppf i
    | Char c ->	Chars.V.print ppf c
372
    | Abstraction _ -> Format.fprintf ppf "<fun>"
373
    | String_latin1 (i,j,s,q) -> 
Pietro Abate's avatar
Pietro Abate committed
374
      Format.fprintf ppf "<string_latin1:%i-%i,%S,%a>" i j s print q
375
    | String_utf8 (i,j,s,q) -> 
Pietro Abate's avatar
Pietro Abate committed
376
377
      Format.fprintf ppf "<string_utf8:%i-%i,%S,%a>" 
      (Utf8.get_idx i) (Utf8.get_idx j) (Utf8.get_str s) print q
378
    | Concat (x,y) ->
Pietro Abate's avatar
Pietro Abate committed
379
      Format.fprintf ppf "<concat:%a;%a>" print x print y
380
    | Abstract ("float",o) ->
Pietro Abate's avatar
Pietro Abate committed
381
      Format.fprintf ppf "%f" (Obj.magic o : float)
Pietro Abate's avatar
Pietro Abate committed
382
383
384
385
386
    | Abstract ("cdata",o) ->
        let s = Utf8.get_str (Obj.magic o : Utf8.t) in
        Format.fprintf ppf "'%s'" s
        (* Format.fprintf ppf "%s" (Utf8.get_str (Obj.magic o :
            * Encodings.Utf8.t)) *)
387
    | Abstract (s,_) ->
Pietro Abate's avatar
Pietro Abate committed
388
      Format.fprintf ppf "<abstract=%s>" s
389
    | Absent -> 	
Pietro Abate's avatar
Pietro Abate committed
390
391
      Format.fprintf ppf "<[absent]>"
and pp_quoted_str ppf = function
Pietro Abate's avatar
WIP    
Pietro Abate committed
392
  | Pair (Char c, q,_) -> 
393
      Chars.V.print_in_string ppf c; 
Pietro Abate's avatar
Pietro Abate committed
394
      pp_quoted_str ppf q
395
  | String_latin1 (i,j,s, q) ->
396
      for k = i to j - 1 do
Pietro Abate's avatar
Pietro Abate committed
397
      Chars.V.print_in_string ppf (Chars.V.mk_char s.[k])
398
      done;
Pietro Abate's avatar
Pietro Abate committed
399
      pp_quoted_str ppf q
400
  | String_utf8 (i,j,s, q) ->
401
402
(*      Format.fprintf ppf "UTF8:{"; *)
      let rec aux i =
Pietro Abate's avatar
Pietro Abate committed
403
404
405
406
407
      if Utf8.equal_index i j then q
      else 
        let (c,i) =Utf8.next s i in
        Chars.V.print_in_string ppf (Chars.V.mk_int c);
        aux i
408
409
410
      in
      let q = aux i in
(*      Format.fprintf ppf "}"; *)
Pietro Abate's avatar
Pietro Abate committed
411
      pp_quoted_str ppf q
412
  | q -> q
Pietro Abate's avatar
Pietro Abate committed
413
and pp_seq ppf = function
414
  | (Pair(Char _, _,_)|String_latin1 (_,_,_,_)|String_utf8 (_,_,_,_)) as s ->
415
      Format.fprintf ppf "'";
Pietro Abate's avatar
Pietro Abate committed
416
      let q = pp_quoted_str ppf s in
417
      Format.fprintf ppf "'@ ";
Pietro Abate's avatar
Pietro Abate committed
418
      pp_seq ppf q
419
  | Pair (x,y,_) -> 
420
      Format.fprintf ppf "@[%a@]@ " print x;
Pietro Abate's avatar
Pietro Abate committed
421
      pp_seq ppf y
422
423
  | _ -> ()

Pietro Abate's avatar
Pietro Abate committed
424
and pp_xml ppf tag attr content =
425
426
  if is_seq content then
    Format.fprintf ppf "@[<hv2><%a%a>[@ %a@]]"
Pietro Abate's avatar
Pietro Abate committed
427
428
429
      pp_tag tag
      pp_attr attr
      pp_seq content
430
431
  else
    Format.fprintf ppf "@[<hv2><%a%a>@ %a@]"
Pietro Abate's avatar
Pietro Abate committed
432
433
      pp_tag tag
      pp_attr attr
434
      print content
Pietro Abate's avatar
Pietro Abate committed
435
and pp_tag ppf = function
436
  | Atom tag -> Atoms.V.print ppf tag
437
  | tag -> Format.fprintf ppf "(%a)" print tag
Pietro Abate's avatar
Pietro Abate committed
438
439
and pp_attr ppf = function
  | Record (attr,_) -> pp_record ppf (Imap.elements attr)
440
  | attr -> Format.fprintf ppf "(%a)" print attr
Pietro Abate's avatar
Pietro Abate committed
441
and pp_record ppf = function
442
  | [] -> ()
Pietro Abate's avatar
Pietro Abate committed
443
444
  | f :: rem -> Format.fprintf ppf "@ %a" pp_field f; pp_record ppf rem
and pp_field ppf (l,v) = 
445
  Format.fprintf ppf "%a=%a" Label.print_attr (Label.from_int l) print v  
446

447
448
let dump_xml ppf v =
  let rec aux ppf = function
449
    | Pair (x, y,sigma) ->
450
451
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<pair>@,%a@,%a@,</pair>@]" aux x aux y
452
    | Xml (x, y, z,sigma) | XmlNs (x,y,z,_,sigma) ->
453
454
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<xml>@,%a@,%a@,%a@,</xml>@]" aux x aux y aux z
455
    | Record (x,sigma) ->
456
457
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<record>@,%a@,</record>@]"
Pietro Abate's avatar
Pietro Abate committed
458
          (fun ppf x -> pp_record ppf (Imap.elements x)) x
459
460
    | Atom a ->
        Format.fprintf ppf "@[<hv1>";
461
        Format.fprintf ppf "<atom>@,%a@,</atom>@]"
462
463
464
465
466
467
468
469
470
471
472
473
          (fun ppf x -> Atoms.V.print ppf x) a
    | Integer i ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<integer>@,%a@,</integer>@]"
          (fun ppf x -> Intervals.V.print ppf x) i
    | Char c ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<char>@,%a@,</char>@]"
          (fun ppf x -> Chars.V.print ppf x) c
    | Abstraction _ ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<abstraction />@]"
474
475
    | Abstract (s,_) ->
	Format.fprintf ppf "<abstract>%s</abstract>" s
476
477
    | String_latin1 (_, _, s, v) ->
        Format.fprintf ppf "@[<hv1>";
478
        Format.fprintf ppf "<string_latin1>@,%s@,</string_latin1>@," s;
479
480
481
482
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<follow>@,%a@,</follow>@]</string_latin1>@]" aux v
    | String_utf8 (_, _, s, v) ->
        Format.fprintf ppf "@[<hv1>";
483
484
        Format.fprintf ppf "<string_utf8>@,%s@,</string_utf8>@,"
          (Utf8.get_str s);
485
486
487
488
489
490
491
492
493
494
495
496
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<follow>@,%a@,</follow>@]</string_utf8>@]" aux v
    | Concat (x, y) ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<concat>@,%a@,%a@,</concat>@]" aux x aux y
    | Absent ->
        Format.fprintf ppf "@[<hv1>";
        Format.fprintf ppf "<absent />@]"
  in
  Format.fprintf ppf "@[<hv1>";
  Format.fprintf ppf "<value>@,%a@,</value>@]" aux v

497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
(*
let rec compare_sigma x y = 
  if (x == y) then 0
  else 
    match x,y with
    |Comp(sx1,sx2),Comp(sy1,xy2) ->

    | List(sl1), List(sl2) ->
        if List.for_all2 (fun v1 v2 ->
          Types.Tallying.E.comparea v1 v2 ) sl1 sl2 = 0 then 0
        else (List.length sl1) - (List.length sl2)
    | Sel(t1,if1,s1), Sel(t2,if2,s2) ->

    | _, _ -> Pervasives.compare x y
*)

Pietro Abate's avatar
WIP    
Pietro Abate committed
513
(* XXX here I don't compare sigmas !!! *)
514
515
516
517
let rec compare x y =
  if (x == y) then 0
  else
    match (x,y) with
518
      | Pair (x1,x2,sigma1), Pair (y1,y2,sigma2) ->
519
	  let c = compare x1 y1 in if c <> 0 then c 
Pietro Abate's avatar
WIP    
Pietro Abate committed
520
	  else compare x2 y2
521
522
      | (Xml (x1,x2,x3,sigmax1) | XmlNs (x1,x2,x3,_,sigmax1)), 
	  (Xml (y1,y2,y3,sigmay2) | XmlNs(y1,y2,y3,_,sigmay2)) ->
523
	  let c = compare x1 y1 in if c <> 0 then c 
524
	  else let c = compare x2 y2 in if c <> 0 then c 
525
	  else compare x3 y3
526
      | Record (rx,sigma1), Record (ry,sigma2) -> Imap.compare compare rx ry
527
528
529
      | Atom x, Atom y -> Atoms.V.compare x y
      | Integer x, Integer y -> Intervals.V.compare x y
      | Char x, Char y -> Chars.V.compare x y
530
531
      | Abstraction (_,_,_), _
      | _, Abstraction (_,_,_) ->
532
	  raise (CDuceExn (string_latin1 "comparing functional values"))
533
534
      | Abstract (s1,v1), Abstract (s2,v2) ->
	  let c = Types.Abstract.T.compare s1 s2 in if c <> 0 then c 
Pietro Abate's avatar
Pietro Abate committed
535
536
537
538
539
540
	  else begin
              match s1 with
              |"float" -> Pervasives.compare (Obj.magic v1 : float) (Obj.magic v2 : float)
              |"cdata" -> Pervasives.compare (Obj.magic v1 : Encodings.Utf8.t) (Obj.magic v2 : Encodings.Utf8.t)
              |_ -> raise (CDuceExn (string_latin1 "comparing abstract values"))
          end
541
542
543
544
      | Absent,_ | _,Absent ->
	  Format.fprintf Format.std_formatter
	    "ERR: Compare %a %a@." print x print y;
	  assert false
545
546
      | Concat (_,_) as x, y -> eval_lazy_concat x; compare x y
      | x, (Concat (_,_) as y) -> eval_lazy_concat y; compare x y
547
      | String_latin1 (ix,jx,sx,qx), String_latin1 (iy,jy,sy,qy) ->
548
549
550
551
552
553
554
	  if (sx == sy) && (ix = iy) && (jx = jy) then compare qx qy
	  else
	    (* Note: we would like to compare first jx-ix and jy-iy,
	       but this is not compatible with the equivalence of values *)
	    let rec aux ix iy =
	      if (ix = jx) then
		if (iy = jy) then compare qx qy
555
		else compare qx (normalize_string_latin1 iy jy sy qy)
556
	      else
557
		if (iy = jy) then compare (normalize_string_latin1 ix jx sx qx) qy
558
559
560
561
562
563
564
		else
		  let c1 = String.unsafe_get sx ix
		  and c2 = String.unsafe_get sy iy in
		  if c1 < c2 then -1 else
		    if c1 > c2 then 1 else aux (ix + 1) (iy + 1)
	    in
	    aux ix iy
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
      | String_utf8 (ix,jx,sx,qx), String_utf8 (iy,jy,sy,qy) ->
	  if (sx == sy) && (Utf8.equal_index ix iy) && (Utf8.equal_index jx jy) then compare qx qy
	  else
	    let rec aux ix iy =
	      if (Utf8.equal_index ix jx) then
		if (Utf8.equal_index iy jy) then compare qx qy
		else compare qx (normalize_string_utf8 iy jy sy qy)
	      else
		if (Utf8.equal_index iy jy) then compare (normalize_string_utf8 ix jx sx qx) qy
		else
		  let (c1,ix) = Utf8.next sx ix in
		  let (c2,iy) = Utf8.next sy iy in
		  if c1 < c2 then -1 else
		    if c1 > c2 then 1 else aux ix iy
	    in
	    aux ix iy
      | String_latin1 (i,j,s,q), _ -> compare (normalize_string_latin1 i j s q) y
      | _, String_latin1 (i,j,s,q) -> compare x (normalize_string_latin1 i j s q)
      | String_utf8 (i,j,s,q), _   -> compare (normalize_string_utf8 i j s q) y
      | _, String_utf8 (i,j,s,q)   -> compare x (normalize_string_utf8 i j s q)
585

586
587
588
      | Pair (_,_,_), _ -> -1 | _, Pair(_,_,_) -> 1
      | (Xml (_,_,_,_) | XmlNs (_,_,_,_,_)),_ -> -1 
      | _, (Xml(_,_,_,_) | XmlNs(_,_,_,_,_)) -> 1
589
590
591
      | Record _,_ -> -1 | _, Record _ -> 1
      | Atom _,_ -> -1 | _, Atom _ -> 1
      | Integer _,_ -> -1 | _, Integer _ -> 1
592
      | Abstract _, _ -> -1 | _, Abstract _ -> 1
593

594
let rec hash = function
595
  | Pair (x1,x2,sigma) ->
596
      1 + hash x1 * 257 + hash x2 * 17 
597
  | (Xml (x1,x2,x3,sigma) | XmlNs (x1,x2,x3,_,sigma)) ->
598
      2 + hash x1 * 65537 + hash x2 * 257 + hash x3 * 17
599
  | Record (rx,sigma) -> 
600
      3 + 17 * Imap.hash hash rx
601
602
603
604
605
606
  | Atom x -> 
      4 + 17 * Atoms.V.hash x
  | Integer x -> 
      5 + 17 * Intervals.V.hash x
  | Char x -> 
      6 + 17 * Chars.V.hash x
607
  | Abstraction _ -> 7
608
  | Abstract _ -> 8
609
610
611
612
613
  | Absent -> assert false
  | Concat _ as x -> eval_lazy_concat x; hash x
  | String_latin1 (i,j,s,q) -> hash (normalize_string_latin1 i j s q)
  | String_utf8 (i,j,s,q) ->hash (normalize_string_utf8 i j s q)

614
615
616
let iter_xml pcdata_callback other_callback =
  let rec aux = function
    | v when compare v nil = 0 -> ()
617
    | Pair (Char c, tl,sigma) ->
618
        pcdata_callback (U.mk_char (Chars.V.to_int c));
619
        aux tl
620
621
    | String_latin1 (i,j,s,tl) ->
        pcdata_callback (U.mk_latin1 (String.sub s i j));
622
        aux tl
623
    | String_utf8 (i,j,s,tl) ->
624
625
        pcdata_callback (U.mk (U.get_substr s i j));
        aux tl
626
    | Pair (hd, tl,sigma) ->
627
628
        other_callback hd;
        aux tl
629
    | Concat (_,_) as v -> eval_lazy_concat v; aux v
630
    | v -> raise (Invalid_argument "Value.iter_xml")
631
632
  in
  function
633
    | Xml (_,_,cont,sigma) | XmlNs (_,_,cont,_,sigma) -> aux cont
634
    | _ -> raise (Invalid_argument "Value.iter_xml")
635

636
(*
637
638
639
640
641
642
let map_xml map_pcdata map_other =
  let patch_string_utf8 cont = function
    | String_utf8 (i, j, u, v) when compare v nil = 0 ->
        String_utf8 (i, j, u, cont)
    | _ -> assert false
  in
643
644
645
646
647
  let rec aux v =
    match v with
    | Pair (Char _, _) | String_latin1 _ | String_utf8 _ ->
        let (u, rest) = get_string_utf8 v in
        patch_string_utf8 (aux rest) (string_utf8 (map_pcdata u))
648
649
650
651
652
653
654
655
    | Pair (hd, tl) -> Pair (map_other hd, aux tl)
    | Concat (_,_) as v -> eval_lazy_concat v; aux v
    | v when compare v nil = 0 -> v
    | v -> raise (Invalid_argument "Value.map_xml")
  in
  function
    | Xml (tag,attrs,cont) -> Xml (tag, attrs, aux cont)
    | _ -> raise (Invalid_argument "Value.map_xml")
656
*)
657
658
659

let tagged_tuple tag vl =
  let ct = sequence vl in
660
  let at = Record (Imap.empty,Mono) in
661
  let tag = Atom (Atoms.V.mk_ascii tag) in
662
  Xml (tag, at, ct,Mono)
663

664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
  (** set of values *)

type tmp = t
module OrderedValue =
  struct
    type t = tmp
    let compare = compare
  end
module ValueSet = Set.Make(OrderedValue)

let ( |<| ) x y = compare x y < 0
let ( |>| ) x y = compare x y > 0
let ( |<=| ) x y = let c = compare x y in c < 0 || c = 0
let ( |>=| ) x y = let c = compare x y in c > 0 || c = 0
let ( |=| ) x y = compare x y = 0
679
let equal = ( |=| )
680
let ( |<>| ) x y = compare x y <> 0
681

682
683
684
685
686
687
(*
let rec concat l1 l2 = match l1 with
  | Pair (x,y) -> Pair (x, concat y l2)
  | String_latin1 (s,i,j,q) -> String_latin1 (s,i,j, concat q l2)
  | String_utf8 (s,i,j,q) -> String_utf8 (s,i,j, concat q l2)
  | q -> l2
688

689
690
691
let rec flatten = function
  | Pair (x,y) -> concat x (flatten y)
  | q -> q
692

693
*)
694
695


696
let () = dump_forward := dump_xml
697
698
699

let get_pair v =
  match normalize v with
700
    | Pair (x,y,_) -> (x,y)
701
702
703
704
705
706
    | _ -> assert false

(* TODO: tail-rec version of get_sequence *)

let rec get_sequence v =
  match normalize v with
707
    | Pair (x,y,_) -> x :: (get_sequence y)
708
709
710
711
    | _ -> []

let rec get_sequence_rev accu v =
  match normalize v with
712
    | Pair (x,y,_) -> get_sequence_rev (x::accu) y
713
714
715
716
    | _ -> accu

let get_sequence_rev v = get_sequence_rev [] v

717
718
let rec fold_sequence f accu v =
  match normalize v with
719
    | Pair (x,y,_) -> fold_sequence f (f accu x) y
720
721
    | _ -> accu

722
723
724
725
726
727
let atom_ascii s =
  Atom (Atoms.V.mk_ascii s)

let get_variant = function
  | Atom a -> Atoms.V.get_ascii a, None
  | v -> match normalize v with
728
      | Pair (Atom a,x,sigma) -> Atoms.V.get_ascii a, Some x
729
730
731
      | _ -> assert false

let label_ascii s =
732
  Label.mk_ascii s
733

734
let record (l : (label * t) list) =
735
  Record (Imap.create (Array.of_list (Obj.magic l)),Mono)
736

737
738
739
let record_ascii l =
  record (List.map (fun (l,v) -> (label_ascii l, v)) l)

740
741
742

let get_field v l =
  match v with
743
    | Record (fields,sigma) -> Imap.find fields (Upool.int l)
744
    | _ -> raise Not_found
745

746
747
let get_field_ascii v l = get_field v (label_ascii l)

748
749
750
751
752
753
754
let abstract a v =
  Abstract (a,Obj.repr v)

let get_abstract = function
  | Abstract (_,v) -> Obj.magic v
  | _ -> assert false
  
755
756
let get_label = Upool.int (label_ascii "get")
let set_label = Upool.int (label_ascii "set")
757
let mk_rf ~get ~set =
758
  Imap.create [| get_label, get; set_label, set |]
759

760
761
let mk_ref t v =
  let r = ref v in
762
763
764
  let get = Abstraction (Some [Sequence.nil_type, t], (fun _ -> !r),Mono)
  and set = Abstraction (Some [t, Sequence.nil_type], (fun x -> r := x; nil),Mono) in
  Record (mk_rf ~get ~set,Mono)
765
766
767


let mk_ext_ref t get set =
768
769
  let get = Abstraction (
    (match t with Some t -> Some [Sequence.nil_type, t] | None -> None),
770
    (fun _ -> get ()),Mono)
771
772
  and set = Abstraction (
    (match t with Some t -> Some [t, Sequence.nil_type] | None -> None), 
773
    (fun v -> set v; nil),Mono)
774
  in
775
  Record (mk_rf ~get ~set,Mono)
776
  
777
778
779
780
781
782
783
let ocaml2cduce_int i =
  Integer (Intervals.V.from_int i)

let cduce2ocaml_int = function
  | Integer i -> Intervals.V.get_int i
  | _ -> assert false

784
785
786
787
788
789
790
791
792
793
794
795
796
797
let ocaml2cduce_int32 i =
  Integer (Intervals.V.from_int32 i)

let cduce2ocaml_int32 = function
  | Integer i -> Intervals.V.to_int32 i
  | _ -> assert false

let ocaml2cduce_int64 i =
  Integer (Intervals.V.from_int64 i)

let cduce2ocaml_int64 = function
  | Integer i -> Intervals.V.to_int64 i
  | _ -> assert false

798
let ocaml2cduce_string s = string_latin1 (String.copy s)
799

800
let cduce2ocaml_string = get_string_latin1 (* Result is already fresh *)
801

802
let ocaml2cduce_string_utf8 s = string_utf8 (U.mk (String.copy (U.get_str s)))
803

804
let cduce2ocaml_string_utf8 s = fst (get_string_utf8 s) (* Result is already fresh *)
805

806
807
808
let ocaml2cduce_char c =
  Char (Chars.V.mk_char c)

809
810
811
let ocaml2cduce_wchar c =
  Char (Chars.V.mk_int c)

812
813
814
let cduce2ocaml_char = function
  | Char c -> Chars.V.to_char c 
  | _ -> assert false
815

816
817
818
819
820
821
let ocaml2cduce_bigint i =
  Integer (Intervals.V.from_bigint i)

let cduce2ocaml_bigint = function
  | Integer i -> Intervals.V.get_bigint i
  | _ -> assert false
822

823
824
let ocaml2cduce_atom q = Atom q
let cduce2ocaml_atom = function Atom a -> a | _ -> assert false
825
826
827
828

let print_utf8 v =
  print_string (U.get_str v);
  flush stdout
829

830
831

let float n =
832
  Abstract ("float", Obj.repr n)
833

Pietro Abate's avatar
Pietro Abate committed
834
835
836
let cdata n =
  Abstract ("cdata", Obj.repr n)

837
838
let cduce2ocaml_option f v =
  match normalize v with
839
    | Pair (x,y,sigma) -> Some (f x)
840
841
842
    | _ -> None

let ocaml2cduce_option f = function
843
  | Some x -> Pair (f x, nil,Mono)
844
845
  | None -> nil

846
847
let add v1 v2 = match (v1,v2) with
  | (Integer x, Integer y) -> Integer (Intervals.V.add x y)
848
  | (Record (r1,sigma1), Record (r2,sigma2)) -> Record (Imap.merge r1 r2,Mono) (* XXX *)
849
850
  | _ -> assert false

851
let merge v1 v2 = match (v1,v2) with
852
  | (Record (r1,sigma1), Record (r2,sigma2)) -> Record (Imap.merge r1 r2,Mono) (* XXX *)
853
854
  | _ -> assert false

855
856
857
858
let sub v1 v2 = match (v1,v2) with
  | (Integer x, Integer y) -> Integer (Intervals.V.sub x y)
  | _ -> assert false

859
860
861
862
863
864
865
866
let mul v1 v2 = match (v1,v2) with
  | (Integer x, Integer y) -> Integer (Intervals.V.mult x y)
  | _ -> assert false

let div v1 v2 = match (v1,v2) with
  | (Integer x, Integer y) -> Integer (Intervals.V.div x y)
  | _ -> assert false

867
868
869
870
let modulo v1 v2 = match (v1,v2) with
  | (Integer x, Integer y) -> Integer (Intervals.V.modulo x y)
  | _ -> assert false

871
872
let pair v1 v2 = Pair (v1,v2,Mono)
let xml v1 v2 v3 = Xml (v1,v2,v3,Mono)
873
874
875
876
877

let mk_record labels fields =
  let l = ref [] in
  assert (Array.length labels == Array.length fields);
  for i = 0 to Array.length labels - 1 do
878
    l := (labels.(i),fields.(i)) :: !l;
879
  done;
880
  record !l
881
  
882
883
884
885
(* TODO: optimize cases
     - (f x = [])
     - all chars copied or deleted *)

886
let rec transform_aux f accu = function
887
  | Pair (x,y,sigma) -> let accu = concat accu (f x) in transform_aux f accu y
888
889
890
891
  | Atom _ -> accu
  | v -> transform_aux f accu (normalize v)

let transform f v = transform_aux f nil v
892

893
let rec xtransform_aux f accu = function
894
  | Pair (x,y,sigma) -> 
895
896
897
      let accu = match f x with
	| Absent ->
	    let x = match x with
898
	      | Xml (tag, attr, child,sigma) -> 
899
		  let child = xtransform_aux f nil child in
900
901
		  Xml (tag, attr, child,sigma)
	      | XmlNs (tag, attr, child, ns,sigma) ->