Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
cduce
cduce
Commits
9ccc9725
Commit
9ccc9725
authored
Jul 10, 2007
by
Pietro Abate
Browse files
[r2003-05-10 14:44:29 by cvscast] Start Unicode support. Remove more generic comparisons
Original author: cvscast Date: 2003-05-10 14:44:30+00:00
parent
f89f50ae
Changes
16
Hide whitespace changes
Inline
Side-by-side
Makefile
View file @
9ccc9725
...
...
@@ -42,7 +42,7 @@ XWEBIFACE = $(WEBIFACE:.cmo=.cmx)
DEBUG
=
-g
PACKAGES
=
pxp-engine,pxp-lex-iso88591,wlexing,camlp4,num,cgi
PACKAGES
=
pxp-engine,pxp-lex-iso88591,
pxp-wlex-utf8,
wlexing,camlp4,num,cgi
OCAMLCP
=
ocamlc
OCAMLC
=
ocamlfind
$(OCAMLCP)
-package
$(PACKAGES)
OCAMLOPT
=
ocamlfind ocamlopt
-package
$(PACKAGES)
...
...
depend
View file @
9ccc9725
...
...
@@ -11,13 +11,15 @@ parser/ast.cmx: types/ident.cmx parser/location.cmx types/types.cmx
parser/location.cmo: parser/location.cmi
parser/location.cmx: parser/location.cmi
parser/parser.cmo: parser/ast.cmo types/atoms.cmi types/builtin.cmo \
types/chars.cmi types/ident.cmo types/intervals.cmi parser/location.cmi \
types/sequence.cmi types/types.cmi parser/wlexer.cmo parser/parser.cmi
types/chars.cmi misc/encodings.cmi types/ident.cmo types/intervals.cmi \
parser/location.cmi types/sequence.cmi types/types.cmi parser/wlexer.cmo \
parser/parser.cmi
parser/parser.cmx: parser/ast.cmx types/atoms.cmx types/builtin.cmx \
types/chars.cmx types/ident.cmx types/intervals.cmx parser/location.cmx \
types/sequence.cmx types/types.cmx parser/wlexer.cmx parser/parser.cmi
parser/wlexer.cmo: parser/location.cmi
parser/wlexer.cmx: parser/location.cmx
types/chars.cmx misc/encodings.cmx types/ident.cmx types/intervals.cmx \
parser/location.cmx types/sequence.cmx types/types.cmx parser/wlexer.cmx \
parser/parser.cmi
parser/wlexer.cmo: misc/encodings.cmi parser/location.cmi
parser/wlexer.cmx: misc/encodings.cmx parser/location.cmx
typing/typed.cmo: types/ident.cmo parser/location.cmi types/patterns.cmi \
types/types.cmi
typing/typed.cmx: types/ident.cmx parser/location.cmx types/patterns.cmx \
...
...
@@ -80,18 +82,18 @@ runtime/print_xml.cmo: types/atoms.cmi types/chars.cmi types/ident.cmo \
types/sequence.cmi runtime/value.cmi
runtime/print_xml.cmx: types/atoms.cmx types/chars.cmx types/ident.cmx \
types/sequence.cmx runtime/value.cmx
runtime/run_dispatch.cmo: types/atoms.cmi types/chars.cmi
types/ident
.cm
o
\
types/patterns.cmi types/types.cmi runtime/value.cmi \
runtime/run_dispatch.cmo: types/atoms.cmi types/chars.cmi
misc/encodings
.cm
i
\
types/ident.cmo
types/patterns.cmi types/types.cmi runtime/value.cmi \
runtime/run_dispatch.cmi
runtime/run_dispatch.cmx: types/atoms.cmx types/chars.cmx
types/ident
.cmx \
types/patterns.cmx types/types.cmx runtime/value.cmx \
runtime/run_dispatch.cmx: types/atoms.cmx types/chars.cmx
misc/encodings
.cmx \
types/ident.cmx
types/patterns.cmx types/types.cmx runtime/value.cmx \
runtime/run_dispatch.cmi
runtime/value.cmo: types/atoms.cmi types/builtin.cmo types/chars.cmi \
types/ident.cmo types/intervals.cmi types/sequence.cmi
types/types.cmi
\
runtime/value.cmi
misc/encodings.cmi
types/ident.cmo types/intervals.cmi types/sequence.cmi \
types/types.cmi
runtime/value.cmi
runtime/value.cmx: types/atoms.cmx types/builtin.cmx types/chars.cmx \
types/ident.cmx types/intervals.cmx types/sequence.cmx
types/types.cmx
\
runtime/value.cmi
misc/encodings.cmx
types/ident.cmx types/intervals.cmx types/sequence.cmx \
types/types.cmx
runtime/value.cmi
driver/cduce.cmo: parser/ast.cmo types/builtin.cmo runtime/eval.cmi \
types/ident.cmo parser/location.cmi parser/parser.cmi types/patterns.cmi \
misc/state.cmi typing/typed.cmo typing/typer.cmi types/types.cmi \
...
...
@@ -123,5 +125,5 @@ types/types.cmi: types/atoms.cmi types/chars.cmi types/ident.cmo \
runtime/eval.cmi: types/ident.cmo typing/typed.cmo runtime/value.cmi
runtime/load_xml.cmi: runtime/value.cmi
runtime/run_dispatch.cmi: types/patterns.cmi runtime/value.cmi
runtime/value.cmi: types/atoms.cmi types/chars.cmi
types/ident
.cm
o
\
types/intervals.cmi types/types.cmi
runtime/value.cmi: types/atoms.cmi types/chars.cmi
misc/encodings
.cm
i
\
types/ident.cmo
types/intervals.cmi types/types.cmi
misc/encodings.ml
View file @
9ccc9725
type
uchar
=
int
module
type
T
=
sig
val
get
:
string
->
int
->
uchar
val
next
:
string
->
int
->
int
val
put
:
string
->
int
->
uchar
->
int
val
bytes
:
uchar
->
int
end
module
Iso88591
=
struct
let
get
s
i
=
Char
.
code
s
.
[
i
]
let
next
s
i
=
succ
i
let
put
s
i
c
=
s
.
[
i
]
<-
Char
.
chr
i
;
succ
i
let
bytes
c
=
1
end
module
Utf8
=
struct
type
ustring
=
string
type
uindex
=
int
let
start_index
s
=
0
let
end_index
s
=
String
.
length
s
let
equal_index
=
(
==
)
let
mk
s
=
s
let
get_str
s
=
s
let
get_idx
i
=
i
(* TODO: handle 5,6 bytes chars; report malformed UTF-8 *)
let
get
s
i
=
match
s
.
[
i
]
with
...
...
@@ -39,18 +31,71 @@ struct
((
Char
.
code
s
.
[
i
+
3
]
-
128
))
|
_
->
failwith
"Malformed UTF-8 bufffer"
let
next
s
i
=
match
s
.
[
i
]
with
|
'\000'
..
'\127'
as
c
->
Char
.
code
c
,
i
+
1
|
'\192'
..
'\223'
as
c
->
((
Char
.
code
c
-
192
)
lsl
6
)
lor
((
Char
.
code
s
.
[
i
+
1
]
-
128
))
,
i
+
2
|
'\224'
..
'\239'
as
c
->
((
Char
.
code
c
-
192
)
lsl
12
)
lor
((
Char
.
code
s
.
[
i
+
1
]
-
128
)
lsl
6
)
lor
((
Char
.
code
s
.
[
i
+
2
]
-
128
))
,
i
+
3
|
'\240'
..
'\248'
as
c
->
((
Char
.
code
c
-
192
)
lsl
18
)
lor
((
Char
.
code
s
.
[
i
+
1
]
-
128
)
lsl
12
)
lor
((
Char
.
code
s
.
[
i
+
2
]
-
128
)
lsl
6
)
lor
((
Char
.
code
s
.
[
i
+
3
]
-
128
))
,
i
+
4
|
_
->
failwith
"Malformed UTF-8 bufffer"
let
advance
s
i
=
match
s
.
[
i
]
with
|
'\000'
..
'\127'
as
c
->
i
+
1
|
'\192'
..
'\223'
as
c
->
i
+
2
|
'\224'
..
'\239'
as
c
->
i
+
3
|
'\240'
..
'\248'
as
c
->
i
+
4
|
_
->
failwith
"Malformed UTF-8 bufffer"
(*
let width = Array.create 256 1
let () =
for i = 192 to 223 do width.(i) <- 2 done;
for i = 224 to 249 do width.(i) <- 3 done;
for i = 240 to 248 do width.(i) <- 4 done
let
next
s
i
=
let
len
s i =
Array.unsafe_get width (Char.code s.[i])
*)
let
store
b
p
=
(* Adapted from Netstring's netconversion.ml/write_utf8 *)
if
p
<=
127
then
Buffer
.
add_char
b
(
Char
.
chr
p
)
else
if
p
<=
0x7ff
then
(
Buffer
.
add_char
b
(
Char
.
chr
(
0xc0
lor
(
p
lsr
6
)));
Buffer
.
add_char
b
(
Char
.
chr
(
0x80
lor
(
p
land
0x3f
)))
)
else
if
p
<=
0xffff
then
(
(* Refuse writing surrogate pairs, and fffe, ffff *)
if
(
p
>=
0xd800
&
p
<
0xe000
)
or
(
p
>=
0xfffe
)
then
failwith
"Encodings.Utf8.store"
;
Buffer
.
add_char
b
(
Char
.
chr
(
0xe0
lor
(
p
lsr
12
)));
Buffer
.
add_char
b
(
Char
.
chr
(
0x80
lor
((
p
lsr
6
)
land
0x3f
)));
Buffer
.
add_char
b
(
Char
.
chr
(
0x80
lor
(
p
land
0x3f
)))
)
else
if
p
<=
0x10ffff
then
(
Buffer
.
add_char
b
(
Char
.
chr
(
0xf0
lor
(
p
lsr
18
)));
Buffer
.
add_char
b
(
Char
.
chr
(
0x80
lor
((
p
lsr
12
)
land
0x3f
)));
Buffer
.
add_char
b
(
Char
.
chr
(
0x80
lor
((
p
lsr
6
)
land
0x3f
)));
Buffer
.
add_char
b
(
Char
.
chr
(
0x80
lor
(
p
land
0x3f
)))
)
else
(* Higher code points are not possible in XML: *)
failwith
"Encodings.Utf8.store"
let
put
s
i
c
=
failwith
"Encodings.Utf8.put: not yet implemented"
let
copy
b
s
i
j
=
Buffer
.
add_substring
b
s
i
(
j
-
i
)
let
bytes
c
=
failwith
"Encodings.Utf8.bytes: not yet implemented"
let
get_substr
s
i
j
=
String
.
sub
s
i
(
j
-
i
)
end
misc/encodings.mli
View file @
9ccc9725
type
uchar
=
int
module
type
T
=
module
Utf8
:
sig
val
get
:
string
->
int
->
uchar
val
next
:
string
->
int
->
int
type
ustring
type
uindex
val
put
:
string
->
int
->
uchar
->
int
val
bytes
:
uchar
->
int
end
val
end_index
:
ustring
->
uindex
val
start_index
:
ustring
->
uindex
val
equal_index
:
uindex
->
uindex
->
bool
val
mk
:
string
->
ustring
val
get_str
:
ustring
->
string
val
get_idx
:
uindex
->
int
val
get
:
ustring
->
uindex
->
uchar
val
advance
:
ustring
->
uindex
->
uindex
val
next
:
ustring
->
uindex
->
uchar
*
uindex
module
Iso88591
:
T
module
Utf8
:
T
val
store
:
Buffer
.
t
->
uchar
->
unit
val
copy
:
Buffer
.
t
->
ustring
->
uindex
->
uindex
->
unit
val
get_substr
:
ustring
->
uindex
->
uindex
->
string
end
parser/parser.ml
View file @
9ccc9725
...
...
@@ -36,12 +36,16 @@ let string_regexp = Star (Elem char)
let
cst_nil
=
mknoloc
(
Cst
(
Types
.
Atom
Sequence
.
nil_atom
))
let
seq_of_string
pos
s
=
let
s
=
Encodings
.
Utf8
.
mk
s
in
(* What about locations when input file is not Utf8 ? *)
let
(
pos
,_
)
=
pos
in
let
rec
aux
accu
i
=
if
(
i
=
0
)
then
accu
else
aux
(((
pos
+
i
,
pos
+
i
+
1
)
,
s
.
[
i
-
1
])
::
accu
)
(
i
-
1
)
in
aux
[]
(
String
.
length
s
)
let
rec
aux
pos
i
j
=
if
Encodings
.
Utf8
.
equal_index
i
j
then
[]
else
let
(
c
,
i
)
=
Encodings
.
Utf8
.
next
s
i
in
((
pos
,
pos
+
1
)
,
c
)
::
(
aux
(
pos
+
1
)
i
j
)
in
aux
pos
(
Encodings
.
Utf8
.
start_index
s
)
(
Encodings
.
Utf8
.
end_index
s
)
exception
Error
of
string
let
error
(
i
,
j
)
s
=
Location
.
raise_loc
i
j
(
Error
s
)
...
...
@@ -50,14 +54,14 @@ let make_record loc r =
LabelMap
.
from_list
(
fun
_
_
->
error
loc
"Duplicated record field"
)
r
let
parse_char
loc
s
=
(* TODO: Unicode *)
if
String
.
length
s
<>
1
then
error
loc
"Character litteral must have length 1"
;
s
.
[
0
]
let
s
=
seq_of_string
loc
s
in
match
s
with
|
[
_
,
c
]
->
c
|
_
->
error
loc
"Character litteral must have length 1"
let
char_list
pos
s
=
let
s
=
seq_of_string
pos
s
in
List
.
map
(
fun
(
loc
,
c
)
->
mk
loc
(
Cst
(
Types
.
Char
(
Chars
.
mk_
char
c
))))
s
List
.
map
(
fun
(
loc
,
c
)
->
mk
loc
(
Cst
(
Types
.
Char
(
Chars
.
mk_
int
c
))))
s
let
include_stack
=
ref
[]
...
...
@@ -285,14 +289,14 @@ EXTEND
Elem
(
mk
loc
(
Constant
((
ident
a
,
c
))))
|
UIDENT
"PCDATA"
->
string_regexp
|
i
=
STRING1
;
"--"
;
j
=
STRING1
->
let
i
=
Chars
.
mk_
char
(
parse_char
loc
i
)
and
j
=
Chars
.
mk_
char
(
parse_char
loc
j
)
in
let
i
=
Chars
.
mk_
int
(
parse_char
loc
i
)
and
j
=
Chars
.
mk_
int
(
parse_char
loc
j
)
in
Elem
(
mk
loc
(
Internal
(
Types
.
char
(
Chars
.
char_class
i
j
))))
|
s
=
STRING1
->
let
s
=
seq_of_string
loc
s
in
List
.
fold_right
(
fun
(
loc
,
c
)
accu
->
let
c
=
Chars
.
mk_
char
c
in
let
c
=
Chars
.
mk_
int
c
in
let
c
=
Chars
.
atom
c
in
Seq
(
Elem
(
mk
loc
(
Internal
(
Types
.
char
c
)))
,
accu
))
s
...
...
@@ -356,7 +360,7 @@ EXTEND
mk
loc
(
Internal
(
Types
.
char
(
Chars
.
atom
(
Chars
.
mk_
char
c
)))))
s
in
(
Chars
.
mk_
int
c
)))))
s
in
let
s
=
s
@
[
mk
loc
(
Internal
(
Sequence
.
nil_type
))]
in
multi_prod
loc
s
]
...
...
@@ -375,8 +379,7 @@ EXTEND
char
:
[
[
c
=
STRING1
->
Chars
.
mk_char
(
parse_char
loc
c
)
|
"!"
;
i
=
INT
->
Chars
.
mk_int
(
int_of_string
i
)
]
[
c
=
STRING1
->
Chars
.
mk_int
(
parse_char
loc
c
)
]
];
...
...
parser/wlexer.ml
View file @
9ccc9725
...
...
@@ -76,28 +76,34 @@ let nb_classes = 34
exception
Unterminated_string_in_comment
(* Buffer for string literals *)
(* Buffer for string literals
: always encoded in Utf8
*)
let
string_buff
=
Buffer
.
create
1024
let
store_char
=
Buffer
.
add_char
string_buff
let
store_ascii
=
Buffer
.
add_char
string_buff
let
store_char
=
Buffer
.
add_string
string_buff
let
store_code
=
Encodings
.
Utf8
.
store
string_buff
let
get_stored_string
()
=
let
s
=
Buffer
.
contents
string_buff
in
Buffer
.
clear
string_buff
;
s
let
store_special
=
function
|
'
n'
->
store_
char
'\n'
|
'
r'
->
store_
char
'\r'
|
'
t'
->
store_
char
'\t'
|
'
n'
->
store_
ascii
'\n'
|
'
r'
->
store_
ascii
'\r'
|
'
t'
->
store_
ascii
'\t'
|
c
->
raise
(
Illegal_character
'\\'
)
let
string_start_pos
=
ref
0
;;
let
comment_start_pos
:
int
list
ref
=
ref
[]
;;
let
char_for_decimal_code
s
=
let
s
=
String
.
sub
s
1
(
String
.
length
s
-
1
)
in
let
c
=
int_of_string
s
in
assert
(
c
<
256
);
(* TODO: handle Unicode *)
Char
.
chr
c
let
numeric_char
s
=
int_of_string
(
String
.
sub
s
1
(
String
.
length
s
-
2
))
let
hexa_char
s
=
let
rec
aux
i
accu
=
if
i
=
String
.
length
s
-
1
then
accu
else
aux
(
succ
i
)
(
accu
*
16
+
Char
.
code
s
.
[
i
]
-
Char
.
code
'
0
'
)
in
aux
0
0
let
rec
tag_of_tag
s
i
=
match
s
.
[
i
]
with
...
...
@@ -106,125 +112,125 @@ let nb_classes = 34
let
lex_tables
=
{
Lexing
.
lex_base
=
"
\000\000\012\000\01
0
\000\0
18
\000\25
1
\255\
250\255\016
\000\255\255
\
\25
3
\255\
005\00
0\25
4
\255\
027\000\013
\000\252\255\25
1
\255\
000\000
\
\00
6
\000\
253\255
\25
5
\255\247\255\246\255\02
1
\000\047\000\
060\000
\
\0
28
\000\0
67
\000\02
6
\000\250\255\0
33
\000\02
4
\000\04
0\000\053
\000
\
\0
11
\000\01
8
\000\03
9
\000\0
37
\000\249\255\248\255\07
7\000\080
\000
\
\084\000\097\000\0
55
\000\101\000\114\000\118\000\131\000\
135\000
\
\148\000\07
1
\000
"
;
"
\000\000\012\000\01
8
\000\0
04
\000\25
4
\255\
005\000\017
\000\255\255
\
\25
1
\255\
25
0\25
5
\255\
255\016\000\253\255\020
\000\252\255\25
2
\255
\
\251\255
\00
7
\000\
007\000
\25
3
\255\247\255\246\255\0
3
2\000\047\000
\
\0
51\000\030
\000\0
35
\000\0
5
2\000\250\255\0
55
\000\02
7
\000\04
4
\000
\
\0
57
\000\0
4
1\000\0
4
3\000\0
54\000\052
\000\249\255\248\255\07
2
\000
\
\080\000
\084\000\097\000\0
72
\000\101\000\114\000\118\000\131\000
\
\135\000
\148\000\07
7
\000
"
;
Lexing
.
lex_backtrk
=
"
\255\255\255\255\255\255\255\255\255\255\
255\255\005\000
\255\255
\
\255\255\
003\000
\255\255\005\000\
003\000
\255\255\255\255\
004\000
\
\004\000\
255\255
\255\255\255\255\255\255\000\000\001\000\
002\000
\
\003\000\005\000\005\000\255\255\005\000\005\000\005\000\
005\000
\
\005\000\005\000\005\000\005\000\255\255\255\255\255\255\
004\000
\
\255\255\004\000\003\000\002\000\255\255\002\000\001\000\
255\255
\
\001\000\000\000
"
;
"
\255\255\255\255\255\255\255\255\255\255\
001\000\255\255
\255\255
\
\255\255\
255\255
\255\255\005\000\
255\255\255
\255\255\255\255\
255
\
\255\255
\004\000\
004\000
\255\255\255\255\255\255\000\000\001\000
\
\002\000
\003\000\005\000\005\000\255\255\005\000\005\000\005\000
\
\005\000\005\000\005\000\005\000\
005\000\
255\255\255\255\255\255
\
\004\000
\255\255\004\000\003\000\002\000\255\255\002\000\001\000
\
\255\255
\001\000\000\000
"
;
Lexing
.
lex_default
=
"
\02
7
\000\01
4
\000\00
5
\000\00
5
\000\000\000\
000\000
\255\255\000\000
\
\000\000\
255\255
\000\000\255\255\255\255\000\000\000\000\
255\255
\
\255\255\000\000\000\000\000\000\
000\000\255\255\
255\255\255\255
\
\255\255\255\255\255\255\
000\000\255\255
\255\255\255\255\255\255
\
\255\255\255\255\255\255\255\255\000\000\000\000\255\255\
255\255
\
"
\02
8
\000\01
6
\000\00
9
\000\00
4
\000\000\000\
255\255
\255\255\000\000
\
\000\000\
000\000
\000\000\255\255\
000\000\
255\255\000\000\000\000
\
\000\000\255\255
\255\255\000\000\000\000\000\000\255\255\255\255
\
\255\255\255\255\255\255\
255\255\000\000
\255\255\255\255\255\255
\
\255\255\255\255\255\255\255\255\
255\255\
000\000\000\000\255\255
\
\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255
\
\255\255\255\255
"
;
\255\255\255\255
\255\255
"
;
Lexing
.
lex_trans
=
"
\
019\000\
020\000\02
0
\000\021\000\022\000\023\000\024\000\02
2
\000
\
\02
5
\000\02
6
\000\00
4
\000\00
9
\000\01
3\000\028
\000\029\000\030\000
\
\03
1
\000\03
2
\000\00
4
\000\0
12
\000\0
08
\000\03
3
\000\0
09
\000\0
18
\000
\
\0
49
\000\03
4
\000\01
0
\000\03
5
\000\03
2
\000\01
5
\000\03
6
\000\00
8
\000
\
\03
6
\000\0
12
\000\04
2
\000\0
0
5\000\0
05
\000\01
6
\000\0
05
\000\00
5
\000
\
\00
7
\000\0
11
\000\01
7
\000\00
5
\000\01
7
\000\00
5
\000\00
5
\000\0
1
0\000
\
\010\000\0
06
\000\00
7
\000\04
6
\000\04
6
\000\04
6
\000\04
6
\000\0
05
\000
\
\0
37
\000\0
10
\000\0
10
\000\00
5
\000\04
7
\000\0
42
\000\00
5
\000\04
6
\000
\
\04
3
\000\0
43
\000\0
43
\000\04
3
\000\00
5
\000\00
5
\000\0
38
\000\03
9
\000
\
\0
3
9\000\0
44
\000\0
4
9\000\0
05
\000\04
3
\000\00
5
\000\0
00
\000\04
6
\000
\
\0
38\000\039
\000\0
39
\000\000\000\0
39
\000\0
39
\000\0
39
\000\0
39
\000
\
\04
1
\000\04
1
\000\04
1
\000\04
1
\000\0
43
\000\04
0
\000\000\000\000\000
\
\0
39
\000\000\000\000\000\000\000\04
1
\000\04
1
\000\04
1
\000\04
1
\000
\
\04
1
\000\04
3
\000\04
3
\000\04
3
\000\04
3
\000\000\000\04
0
\000\000\000
\
\0
39
\000\04
1
\000\04
4
\000\000\000\04
1
\000\04
3
\000\04
5
\000\04
5
\000
\
\04
5
\000\04
5
\000\04
5
\000\04
5
\000\04
5
\000\04
5
\000\000\000\000\000
\
\000\000\04
1
\000\04
5
\000\04
4
\000\000\000\04
3
\000\04
5
\000\04
6
\000
\
\04
6
\000\04
6
\000\04
6
\000\04
8
\000\04
8
\000\04
8
\000\04
8
\000\000\000
\
\04
7
\000\000\000\04
5
\000\04
6
\000\000\000\000\000\04
5
\000\04
8
\000
\
\04
8
\000\04
8
\000\04
8
\000\04
8
\000\000\000\000\000\000\000\000\000
\
\000\000\04
7
\000\000\000\04
6
\000\04
8
\000\000\000\000\000\04
8
\000
\
"
\020\000\02
1
\000\021\000\022\000\023\000\024\000\02
5\000\023
\000
\
\02
6
\000\02
7
\000\00
5
\000\00
6
\000\01
5
\000\029\000\030\000\
031\000
\
\03
2
\000\03
3
\000\00
8
\000\0
07
\000\0
12
\000\03
4
\000\0
13
\000\0
06
\000
\
\0
07
\000\03
5
\000\01
3
\000\03
6
\000\03
3
\000\01
7
\000\03
7
\000\00
7
\000
\
\03
7
\000\0
04
\000\0
1
4\000\05
0
\000\0
43
\000\01
8
\000\0
39
\000\0
4
0\000
\
\0
4
0\000\0
09
\000\01
9
\000\00
9
\000\01
9
\000\00
9
\000\00
4
\000\00
4
\000
\
\010\000\0
11
\000\0
1
0\000\04
7
\000\04
7
\000\04
7
\000\04
7
\000\0
44
\000
\
\0
44
\000\0
44
\000\0
44
\000\00
9
\000\04
8
\000\0
09
\000\00
9
\000\04
7
\000
\
\04
5
\000\0
09
\000\0
09
\000\04
4
\000\00
9
\000\00
9
\000\0
09
\000\03
8
\000
\
\0
0
9\000\0
09
\000\0
0
9\000\0
39
\000\04
0
\000\0
4
0\000\0
43
\000\04
7
\000
\
\0
50\000
\000\0
00
\000\000\
044\
000\0
40
\000\0
40
\000\0
40
\000\0
40
\000
\
\04
2
\000\04
2
\000\04
2
\000\04
2
\000\0
00
\000\04
1
\000\000\000\000\000
\
\0
40
\000\000\000\000\000\000\000\04
2
\000\04
2
\000\04
2
\000\04
2
\000
\
\04
2
\000\04
4
\000\04
4
\000\04
4
\000\04
4
\000\000\000\04
1
\000\000\000
\
\0
40
\000\04
2
\000\04
5
\000\000\000\04
2
\000\04
4
\000\04
6
\000\04
6
\000
\
\04
6
\000\04
6
\000\04
6
\000\04
6
\000\04
6
\000\04
6
\000\000\000\000\000
\
\000\000\04
2
\000\04
6
\000\04
5
\000\000\000\04
4
\000\04
6
\000\04
7
\000
\
\04
7
\000\04
7
\000\04
7
\000\04
9
\000\04
9
\000\04
9
\000\04
9
\000\000\000
\
\04
8
\000\000\000\04
6
\000\04
7
\000\000\000\000\000\04
6
\000\04
9
\000
\
\04
9
\000\04
9
\000\04
9
\000\04
9
\000\000\000\000\000\000\000\000\000
\
\000\000\04
8
\000\000\000\04
7
\000\04
9
\000\000\000\000\000\04
9
\000
\
\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000
\
\000\000\000\000\000\000\000\000\04
8
\000\000\000
"
;
\000\000\000\000\000\000\000\000\04
9
\000\000\000
"
;
Lexing
.
lex_check
=
"
\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000
\
\000\000\000\000\00
2
\000\00
9
\000\001\000\000\000\000\000\000\000
\
\000\000\000\000\00
3
\000\0
12
\000\0
06
\000\000\000\0
06
\000\0
1
6\000
\
\0
2
1\000\000\000\01
5
\000\000\000\000\000\001\000\000\000\0
11
\000
\
\000\000\01
1
\000\0
24
\000\02
6
\000\02
6
\000\001\000\02
9
\000\0
3
2\000
\
\0
0
2\000\00
2
\000\001\000\02
8
\000\001\000\0
33
\000\0
28
\000\0
06
\000
\
\00
6
\000\00
3
\000\00
3
\000\02
2
\000\02
2
\000\02
2
\000\02
2
\000\0
30
\000
\
\0
3
4\000\0
11
\000\0
11
\000\03
5
\000\02
2
\000\0
4
2\000\0
31
\000\02
2
\000
\
\02
3
\000\02
3
\000\0
2
3\000\02
3
\000\0
30
\000\03
1
\000\0
25
\000\0
2
5\000
\
\0
25
\000\0
2
3\000\0
49
\000\0
25
\000\0
2
3\000\0
25
\000\
255\255
\02
2
\000
\
\0
38\000\038\000\038\000
\255\255\0
39
\000\0
39
\000\0
39
\000\0
39
\000
\
\04
0
\000\04
0
\000\04
0
\000\04
0
\000\
023\000\039
\000\255\255\255\255
\
\0
39
\000\255\255\255\255\255\255\04
0
\000\04
1
\000\04
1
\000\04
1
\000
\
\04
1
\000\04
3
\000\04
3
\000\04
3
\000\04
3
\000\255\255\04
1
\000\255\255
\
\0
39
\000\04
1
\000\04
3
\000\255\255\04
0\000\043
\000\044\000\04
4
\000
\
\04
4
\000\04
4
\000\04
5
\000\04
5
\000\04
5
\000\04
5
\000\255\255\255\255
\
\255\255\04
1
\000\04
4
\000\04
5
\000\255\255\04
3
\000\04
5
\000\04
6
\000
\
\04
6
\000\04
6
\000\04
6
\000\04
7
\000\04
7
\000\04
7
\000\04
7
\000\255\255
\
\04
6
\000\255\255\04
4
\000\04
6
\000\255\255\255\255\04
5
\000\04
7
\000
\
\04
8
\000\04
8
\000\04
8
\000\04
8
\000\255\255\255\255\255\255\255\255
\
\255\255\04
8
\000\255\255\04
6
\000\04
8
\000\255\255\255\255\04
7
\000
\
\000\000\000\000\00
3
\000\00
5
\000\001\000\000\000\000\000\000\000
\
\000\000\000\000\00
2
\000\0
05
\000\0
11
\000\000\000\0
11
\000\0
0
6\000
\
\01
8
\000\000\000\01
3
\000\000\000\000\000\001\000\000\000\0
06
\000
\
\000\000\01
7
\000\0
13
\000\02
2
\000\02
5
\000\001\000\02
6
\000\02
6
\000
\
\02
6
\000\0
3
0\000\001\000\02
6
\000\001\000\0
26
\000\0
11
\000\0
11
\000
\
\00
2
\000\00
2
\000\00
2
\000\02
3
\000\02
3
\000\02
3
\000\02
3
\000\0
24
\000
\
\0
2
4\000\0
24
\000\0
24
\000\03
1
\000\02
3
\000\02
7
\000\0
27
\000\02
3
\000
\
\02
4
\000\02
9
\000\03
2
\000\02
4
\000\0
29
\000\03
3
\000\0
34
\000\0
3
5\000
\
\0
31
\000\03
2
\000\0
36
\000\0
39
\000\03
9
\000\0
39
\000\
043\000
\02
3
\000
\
\0
50\000\255\255
\255\255\0
24
\000\0
40\000\040
\000\0
40
\000\0
40
\000
\
\04
1
\000\04
1
\000\04
1
\000\04
1
\000\
255\255\040
\000\255\255\255\255
\
\0
40
\000\255\255\255\255\255\255\04
1
\000\04
2
\000\04
2
\000\04
2
\000
\
\04
2
\000\04
4
\000\04
4
\000\04
4
\000\04
4
\000\255\255\04
2
\000\255\255
\
\0
40
\000\04
2
\000\04
4
\000\255\255\04
1
\000\044\000\04
5\000\045
\000
\
\04
5
\000\04
5
\000\04
6
\000\04
6
\000\04
6
\000\04
6
\000\255\255\255\255
\
\255\255\04
2
\000\04
5
\000\04
6
\000\255\255\04
4
\000\04
6
\000\04
7
\000
\
\04
7
\000\04
7
\000\04
7
\000\04
8
\000\04
8
\000\04
8
\000\04
8
\000\255\255
\
\04
7
\000\255\255\04
5
\000\04
7
\000\255\255\255\255\04
6
\000\04
8
\000
\
\04
9
\000\04
9
\000\04
9
\000\04
9
\000\255\255\255\255\255\255\255\255
\
\255\255\04
9
\000\255\255\04
7
\000\04
9
\000\255\255\255\255\04
8
\000
\
\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255
\
\255\255\255\255\255\255\255\255\04
8
\000\255\255
"
\255\255\255\255\255\255\255\255\04
9
\000\255\255
"
}
let
rec
token
engine
lexbuf
=
match
engine
lex_tables
0
lexbuf
with
0
->
(
#
5
7
"parser/wlexer.mll"
#
6
5
"parser/wlexer.mll"
token
engine
lexbuf
)
|
1
->
(
#
58
"parser/wlexer.mll"
#
66
"parser/wlexer.mll"
let
s
=
Lexing
.
lexeme
lexbuf
in
if
Hashtbl
.
mem
keywords
s
then
""
,
s
else
"LIDENT"
,
s
)
|
2
->
(
#
62
"parser/wlexer.mll"
#
70
"parser/wlexer.mll"
"UIDENT"
,
Lexing
.
lexeme
lexbuf
)
|
3
->
(
#
63
"parser/wlexer.mll"
#
71
"parser/wlexer.mll"
"INT"
,
Lexing
.
lexeme
lexbuf
)
|
4
->
(
#
64
"parser/wlexer.mll"
#
72
"parser/wlexer.mll"
let
s
=
Lexing
.
lexeme
lexbuf
in
"TAG"
,
tag_of_tag
s
1
)
|
5
->
(
#
72
"parser/wlexer.mll"
#
80
"parser/wlexer.mll"
""
,
Lexing
.
lexeme
lexbuf
)
|
6
->
(
#
75
"parser/wlexer.mll"
#
83
"parser/wlexer.mll"
let
string_start
=
Lexing
.
lexeme_start
lexbuf
in
string_start_pos
:=
string_start
;
let
double_quote
=
Lexing
.
lexeme_char
lexbuf
0
=
'
"' in
if double_quote then string2 engine lexbuf else string1
engine lexbuf;
string (Lexing.lexeme lexbuf)
engine lexbuf;
lexbuf.Lexing.lex_start_pos <-
string_start - lexbuf.Lexing.lex_abs_pos;
(if double_quote then "
STRING2
" else "
STRING1
"),
(get_stored_string()) )
| 7 -> (
#
85
"
parser
/
wlexer
.
mll
"
#
93
"
parser
/
wlexer
.
mll
"
comment_start_pos := [Lexing.lexeme_start lexbuf];
comment engine lexbuf;
token engine lexbuf )
| 8 -> (
# 9
0
"
parser
/
wlexer
.
mll
"
# 9
8
"
parser
/
wlexer
.
mll
"
"
EOI
","" )
| 9 -> (
#
92
"
parser
/
wlexer
.
mll
"
#
100
"
parser
/
wlexer
.
mll
"
error
(Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
(Illegal_character ((Lexing.lexeme lexbuf).[0])) )
...
...
@@ -233,90 +239,77 @@ let rec token engine lexbuf =
and comment engine lexbuf =
match engine lex_tables 1 lexbuf with
0 -> (
#
98
"
parser
/
wlexer
.
mll
"
#
106
"
parser
/
wlexer
.
mll
"
comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos;
comment engine lexbuf;
)
| 1 -> (
# 10
2
"
parser
/
wlexer
.
mll
"
# 1
1
0 "
parser
/
wlexer
.
mll
"
comment_start_pos := List.tl !comment_start_pos;
if !comment_start_pos <> [] then comment engine lexbuf;
)
| 2 -> (
# 1
06
"
parser
/
wlexer
.
mll
"
# 1
14
"
parser
/
wlexer
.
mll
"
string_start_pos := Lexing.lexeme_start lexbuf;
let string =
if Lexing.lexeme_char lexbuf 0 = '"
'
then
string2
else
string1
in
(
try
string
engine
lexbuf
let ender = Lexing.lexeme lexbuf in
(try string ender engine lexbuf
with Location.Location (_,Unterminated_string) ->
let st = List.hd !comment_start_pos in
error st (st+2) Unterminated_string_in_comment);
Buffer.clear string_buff;
comment engine lexbuf )
| 3 -> (
#
1
16
"parser/wlexer.mll"
# 1
23
"
parser
/
wlexer
.
mll
"
let st = List.hd !comment_start_pos in
error st (st+2) Unterminated_comment
)
| 4 -> (
#
12
0
"parser/wlexer.mll"
# 12
7
"
parser
/
wlexer
.
mll
"
comment engine lexbuf )
| _ -> failwith "
lexing
:
empty
token
[
comment
]
"
and
string
2
engine
lexbuf
=
and string
ender
engine lexbuf =
match engine lex_tables 2 lexbuf with
0 -> (
#
124
"parser/wlexer.mll"
()
)
# 131 "
parser
/
wlexer
.
mll
"
let c = Lexing.lexeme lexbuf in
if c = ender then ()
else (store_char (Lexing.lexeme lexbuf); string ender engine lexbuf)
)
| 1 -> (
#
1
26
"parser/wlexer.mll"
store_
char
(
Lexing
.
lexeme_char
lexbuf
1
);
string
2
engine
lexbuf
)
# 1
37
"
parser
/
wlexer
.
mll
"
store_
ascii
(Lexing.lexeme_char lexbuf 1);
string
ender
engine lexbuf )
| 2 -> (
#
1
28
"parser/wlexer.mll"
# 1
39
"
parser
/
wlexer
.
mll
"
store_special
(
Lexing
.
lexeme_char
lexbuf
1
);
string1
engine
lexbuf
)
let c = Lexing.lexeme_char lexbuf 1 in