Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
cduce
cduce
Commits
ae97d8df
Commit
ae97d8df
authored
Apr 01, 2021
by
Kim Nguyễn
Browse files
Change to utf-8 by default again.
parent
e6e927c3
Changes
2
Hide whitespace changes
Inline
Side-by-side
lang/parser/parse.ml
View file @
ae97d8df
type
encoding
=
Ascii
|
Latin1
|
Utf8
let
default_encoding
=
Utf8
let
str_encoding
=
function
|
Ascii
->
"ascii"
|
Latin1
->
"latin-1"
...
...
@@ -142,7 +144,7 @@ let rec token enc lexbuf =
Cduce_loc
.
push_source
(
`File
path
);
try
let
cs
=
Stream
.
of_channel
ic
in
let
newenc
=
ref
Lat
in
1
in
let
newenc
=
ref
default_encod
in
g
in
(* or ref !enc ? *)
let
newlb
=
mk_lexbuf
newenc
cs
in
let
past
=
pre_prog
(
token
newenc
newlb
)
in
...
...
@@ -173,7 +175,7 @@ let get_loc lexbuf =
(
loc1
.
Lexing
.
pos_cnum
,
loc2
.
Lexing
.
pos_cnum
)
let
protect_parser
?
global_enc
do_sync
gram
stream
=
let
enc
=
match
global_enc
with
Some
e
->
e
|
None
->
ref
Lat
in
1
in
let
enc
=
match
global_enc
with
Some
e
->
e
|
None
->
ref
default_encod
in
g
in
let
b
=
mk_lexbuf
enc
stream
in
try
let
f
=
token
enc
b
in
...
...
@@ -201,7 +203,7 @@ let protect_parser ?global_enc do_sync gram stream =
let
prog
=
protect_parser
false
pre_prog
let
top_phrases
=
protect_parser
~
global_enc
:
(
ref
Lat
in
1
)
true
(
for_sedlex
Parser
.
top_phrases
)
protect_parser
~
global_enc
:
(
ref
default_encod
in
g
)
true
(
for_sedlex
Parser
.
top_phrases
)
let
protect_exn
f
g
=
try
...
...
lang/runtime/print_xml.ml
View file @
ae97d8df
...
...
@@ -12,7 +12,7 @@ it reads the code point in utf-8 and writes it as
iso8859-1 if <= 255, otherwise calls subst.
*)
let
convert
~
(
in_enc
:
[
`Enc_utf8
])
~
(
out_enc
:
[
`Enc_iso88591
])
let
convert
~
(
in_enc
:
[
`Enc_utf8
])
~
(
out_enc
:
[
`Enc_iso88591
|
`Enc_ascii
])
~
(
subst
:
int
->
string
)
~
(
range_pos
:
int
)
~
(
range_len
:
int
)
s
=
let
buff
=
Buffer
.
create
(
range_len
lsl
2
)
in
let
open
Encodings
in
...
...
@@ -22,7 +22,7 @@ let rec loop idx =
if
idx
>=
ulen
then
Buffer
.
contents
buff
else
let
code_point
,
nidx
=
Utf8
.
next
in_s
idx
in
let
()
=
if
code_point
>
255
then
Buffer
.
add_string
buff
(
subst
code_point
)
if
code_point
>
127
then
Buffer
.
add_string
buff
(
subst
code_point
)
else
Buffer
.
add_char
buff
(
Char
.
unsafe_chr
code_point
)
in
loop
nidx
...
...
@@ -48,9 +48,9 @@ let write_data_string ~to_enc buf s =
if
len
>
0
then
match
to_enc
with
|
`Enc_utf8
->
buf
(
String
.
sub
s
i
len
)
|
`Enc_iso88591
as
to_enc
->
|
`Enc_iso88591
->
let
s'
=
convert
~
in_enc
:
`Enc_utf8
~
out_enc
:
to_enc
convert
~
in_enc
:
`Enc_utf8
~
out_enc
:
`Enc_ascii
~
subst
:
(
fun
n
->
"&#"
^
string_of_int
n
^
";"
)
~
range_pos
:
i
~
range_len
:
len
s
in
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment