chars.ml 3.55 KB
Newer Older
1 2
module V = struct
  include Custom.Int
3

4
let max_char = 0x10FFFF
5

6 7
let check i = assert(i >= 0); assert(i <= max_char)

8 9 10 11
let mk_int c = 
  if (c < 0) || (c > max_char) then 
    failwith "Chars.mk_int: code point out of bound";
  c
12

13 14 15 16
let mk_char c =
  Char.code c
    
let to_int c = c
17

18 19 20
let to_char c =
  if (c > 255) then failwith "Chars.to_char: code-point > 255";
  Char.chr c
21

22
let print_in_string ppf = function
23 24 25
  | 10 -> Format.fprintf ppf "\\n"
  | 13 -> Format.fprintf ppf "\\r"
  | 9  -> Format.fprintf ppf "\\t"
26 27
  | 39 -> Format.fprintf ppf "\\'"
  | 34 -> Format.fprintf ppf "\\\""
28 29 30 31 32
  | c ->
      if (c < 32) || ((c >= 128) && (c < 192)) || (c > 255)
      then Format.fprintf ppf "\\%i;" c
      else Format.fprintf ppf "%c" (Char.chr c)

33 34 35 36 37 38
let print ppf c =
  Format.fprintf ppf "'%a'" print_in_string c
end
open V
      
include Custom.List(Custom.Pair(V)(V))
39

40 41 42 43
let rec check = function
  | [] -> ()
  | (a,b)::((c,d)::_ as tl) -> assert (a <= b); assert (b < c - 1); check tl
  | [(a,b)] -> assert (a <= b)  
44

45

46
let from_int c = 
47
  if (c < 0) || (c > V.max_char) then 
48 49 50 51 52 53
    failwith "Chars.from_int: code point out of bound";
  c

let to_int c = c

let empty = []
54
let any = [0,V.max_char]
55

Pietro Abate's avatar
Pietro Abate committed
56 57 58 59
(* this is to have a uniform signature of all basic types *)
type elem = V.t
let full = any

60 61 62
let char_class a b = if a<=b then [a,b] else empty

let atom a = [a,a]
63 64 65 66 67 68 69 70 71 72 73

let rec add l ((a,b) as i) = match l with
  | [] -> 
      [i]
  | ((a1,_) :: _) as l when (b < a1 - 1) -> 
      i::l
  | ((a1,b1) as i' :: l') when (a > b1 + 1) -> 
      i'::(add l' i)
  | (a1,b1) :: l' -> 
      add l' (min a a1, max b b1)

74

75 76 77 78 79 80 81 82 83 84 85
let rec neg' start l = match l with
  | [] -> [start,max_char]
  | [ (a,b) ] when b = max_char -> [start,a-1]
  | (a,b) :: l' -> (start, a-1) :: (neg' (b+1) l')

let neg = function
  | (0,b) :: l -> if b = max_char then []  else neg' (b+1) l
  | l -> neg' 0 l

let cup i1 i2 = List.fold_left add i1 i2

86 87 88
let mk_classes c =
  List.fold_left (fun accu (i,j) -> cup accu (char_class i j)) empty c

89 90 91 92 93 94 95
(* TODO: optimize this ? *)
let cap i1 i2 = neg (cup (neg i1) (neg i2))

let diff i1 i2 = neg (cup (neg i1) i2)

let is_empty i = i = []

96 97 98 99 100 101 102 103 104
let rec disjoint (a : t) b =
  match (a,b) with
    | [],_ | _,[] -> true
    | (xa,ya)::a', (xb,yb)::b' ->
	if xa = xb then false
	else
	  if xa < xb then (ya < xb) && (disjoint a' b)
	  else (yb < xa) && (disjoint a b')

105 106 107 108 109
let contains n = List.exists (fun (a,b) -> (n>=a) && (n<=b))

let sample = function
  | (i,j) :: _ -> i
  | _ -> raise Not_found
110

111 112 113 114 115
let single = function
  | [ (i,j) ] when i = j -> i
  | [] -> raise Not_found
  | _ -> raise Exit

116 117 118
let is_char = function
  | [(i,j) ] when i = j -> Some i
  | _ -> None
119 120 121 122 123 124

let print =
  List.map 
    (fun (a,b) ->
       if a = b 
       then fun ppf -> 
125
	 V.print ppf a
126
       else fun ppf -> 
127
	 if a = 0 && b = max_char then Format.fprintf ppf "Char" else
128
	 Format.fprintf ppf "%a--%a" V.print a V.print b
129
    )
130

131 132 133 134 135 136
let dump ppf t =
  match print t with
    | [] -> Format.fprintf ppf "()"
    | hd::tl -> hd ppf; List.iter (fun x -> Format.fprintf ppf "|"; x ppf) tl
  

137
type 'a map = (int * 'a) list
138 139 140

let map_map f l = List.map (fun (i,x) -> (i, f x)) l

141 142 143 144
(* Optimize lookup:
   - decision tree
   - merge adjacent segment with same result
*)
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161

let mk_map l =
  let m = 
    List.fold_left 
      (fun accu (i,x) -> 
	 List.fold_left (fun accu (a,b) -> (b,x)::accu) accu i) [] l in
  let m = 
    List.sort 
      (fun (b1,x1) (b2,x2) -> 
	 if (b1 : int) < b2 then -1 else if b1 = b2 then 0 else 1)
      m in
  m

let rec get_map c = function
  | [_,x] -> x
  | (b,x)::rem -> if (c : int) <= b then x else get_map c rem
  | [] -> assert false