Commit cf45f6ac authored by Pietro Abate's avatar Pietro Abate

[r2002-11-06 10:10:54 by cvscast] Empty log message

Original author: cvscast
Date: 2002-11-06 10:11:15+00:00
parent 792c7bed
......@@ -5,7 +5,7 @@ CLEAN_DIRS = $(DIRS) tools tests
# Objects to build
MISC = misc/pool.cmo
MISC = misc/pool.cmo misc/encodings.cmo
PARSER = parser/lexer.cmo parser/location.cmo parser/ast.cmo parser/parser.cmo
......
misc/encodings.cmo: misc/encodings.cmi
misc/encodings.cmx: misc/encodings.cmi
misc/pool.cmo: misc/pool.cmi
misc/pool.cmx: misc/pool.cmi
parser/ast.cmo: parser/location.cmi types/patterns.cmi types/types.cmi
......
type uchar = int
module type T =
sig
val get: string -> int -> uchar
val next: string -> int -> int
val put: string -> int -> uchar -> int
val bytes: uchar -> int
end
module Iso88591 =
struct
let get s i = Char.code s.[i]
let next s i = succ i
let put s i c = s.[i] <- Char.chr i; succ i
let bytes c = 1
end
module Utf8 =
struct
(* TODO: handle 5,6 bytes chars; report malformed UTF-8 *)
let get s i =
match s.[i] with
| '\000'..'\127' as c ->
Char.code c
| '\192'..'\223' as c ->
((Char.code c - 192) lsl 6) lor
((Char.code s.[i+1] - 128))
| '\224'..'\239' as c ->
((Char.code c - 192) lsl 12) lor
((Char.code s.[i+1] - 128) lsl 6) lor
((Char.code s.[i+2] - 128))
| '\240'..'\248' as c ->
((Char.code c - 192) lsl 18) lor
((Char.code s.[i+1] - 128) lsl 12) lor
((Char.code s.[i+2] - 128) lsl 6) lor
((Char.code s.[i+3] - 128))
| _ -> failwith "Malformed UTF-8 bufffer"
let width = Array.create 256 1
let () =
for i = 192 to 223 do width.(i) <- 2 done;
for i = 224 to 249 do width.(i) <- 3 done;
for i = 240 to 248 do width.(i) <- 4 done
let next s i =
Array.unsafe_get width (Char.code s.[i])
let put s i c =
failwith "Encodings.Utf8.put: not yet implemented"
let bytes c =
failwith "Encodings.Utf8.bytes: not yet implemented"
end
type uchar = int
module type T =
sig
val get: string -> int -> uchar
val next: string -> int -> int
val put: string -> int -> uchar -> int
val bytes: uchar -> int
end
module Iso88591 : T
module Utf8 : T
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment