Commit ae97d8df authored by Kim Nguyễn's avatar Kim Nguyễn
Browse files

Change to utf-8 by default again.

parent e6e927c3
type encoding = Ascii | Latin1 | Utf8
let default_encoding = Utf8
let str_encoding = function
| Ascii -> "ascii"
| Latin1 -> "latin-1"
......@@ -142,7 +144,7 @@ let rec token enc lexbuf =
Cduce_loc.push_source (`File path);
try
let cs = Stream.of_channel ic in
let newenc = ref Latin1 in
let newenc = ref default_encoding in
(* or ref !enc ? *)
let newlb = mk_lexbuf newenc cs in
let past = pre_prog (token newenc newlb) in
......@@ -173,7 +175,7 @@ let get_loc lexbuf =
(loc1.Lexing.pos_cnum, loc2.Lexing.pos_cnum)
let protect_parser ?global_enc do_sync gram stream =
let enc = match global_enc with Some e -> e | None -> ref Latin1 in
let enc = match global_enc with Some e -> e | None -> ref default_encoding in
let b = mk_lexbuf enc stream in
try
let f = token enc b in
......@@ -201,7 +203,7 @@ let protect_parser ?global_enc do_sync gram stream =
let prog = protect_parser false pre_prog
let top_phrases =
protect_parser ~global_enc:(ref Latin1) true (for_sedlex Parser.top_phrases)
protect_parser ~global_enc:(ref default_encoding) true (for_sedlex Parser.top_phrases)
let protect_exn f g =
try
......
......@@ -12,7 +12,7 @@ it reads the code point in utf-8 and writes it as
iso8859-1 if <= 255, otherwise calls subst.
*)
let convert ~(in_enc : [ `Enc_utf8 ]) ~(out_enc : [ `Enc_iso88591 ])
let convert ~(in_enc : [ `Enc_utf8 ]) ~(out_enc : [ `Enc_iso88591|`Enc_ascii ])
~(subst : int -> string) ~(range_pos:int) ~(range_len:int) s =
let buff = Buffer.create (range_len lsl 2) in
let open Encodings in
......@@ -22,7 +22,7 @@ let rec loop idx =
if idx >= ulen then Buffer.contents buff else
let code_point, nidx = Utf8.next in_s idx in
let () =
if code_point > 255 then Buffer.add_string buff (subst code_point)
if code_point > 127 then Buffer.add_string buff (subst code_point)
else
Buffer.add_char buff (Char.unsafe_chr code_point)
in loop nidx
......@@ -48,9 +48,9 @@ let write_data_string ~to_enc buf s =
if len > 0 then
match to_enc with
| `Enc_utf8 -> buf (String.sub s i len)
| `Enc_iso88591 as to_enc ->
| `Enc_iso88591 ->
let s' =
convert ~in_enc:`Enc_utf8 ~out_enc:to_enc
convert ~in_enc:`Enc_utf8 ~out_enc:`Enc_ascii
~subst:(fun n -> "&#" ^ string_of_int n ^ ";")
~range_pos:i ~range_len:len s
in
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment