Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
cduce
cduce
Commits
deb43e17
Commit
deb43e17
authored
Apr 02, 2021
by
Kim Nguyễn
Browse files
Use menhir incremental API and a custom driver to give much better parsing error messages.
parent
89a9232d
Changes
9
Hide whitespace changes
Inline
Side-by-side
cduce.opam
View file @
deb43e17
...
...
@@ -32,6 +32,7 @@ depends: [
"ocaml" {>= "4.07.0"}
"dune" {build & >= "2.4"}
"menhir" {build & >= "20181026"}
"menhirLib" { >= "20181026" }
"cduce-types"
"odoc" {with-doc}
"sedlex" {>= "2.0"}
...
...
cduce.opam.template
View file @
deb43e17
...
...
@@ -2,6 +2,7 @@ depends: [
"ocaml" {>= "4.07.0"}
"dune" {build & >= "2.4"}
"menhir" {build & >= "20181026"}
"menhirLib" { >= "20181026" }
"cduce-types"
"odoc" {with-doc}
"sedlex" {>= "2.0"}
...
...
lang/dune
View file @
deb43e17
...
...
@@ -3,7 +3,7 @@
(library
(name cduce_core)
(public_name cduce.lib.core)
(libraries cduce-types sedlex)
(libraries cduce-types sedlex
menhirLib
)
(flags
(-open Cduce_types))
(modules
...
...
lang/parser/dune
View file @
deb43e17
(menhir
(modules parser)
(infer false))
\ No newline at end of file
(flags --table --inspection)
(infer true))
lang/parser/parse.ml
View file @
deb43e17
...
...
@@ -21,18 +21,6 @@ let invalid_byte s e =
(* Taken from Menhir/Lib/Convert.ml*)
let
for_sedlex
parser
lexer
=
let
lexbuf
:
Lexing
.
lexbuf
=
Lexing
.
from_string
""
in
let
lexer
(
lexbuf
:
Lexing
.
lexbuf
)
=
let
token
,
startp
,
endp
=
lexer
()
in
lexbuf
.
Lexing
.
lex_start_p
<-
startp
;
lexbuf
.
Lexing
.
lex_curr_p
<-
endp
;
token
in
parser
lexer
lexbuf
(**)
let
mk_lexbuf
enc
cs
=
(* Workaround the sedlex functions buffer 512 characters before
propagating them, making it unusable with interactive input *)
...
...
@@ -53,10 +41,11 @@ let mk_lexbuf enc cs =
invalid_byte
(
Bytes
.
sub_string
uchars
0
(
i
+
1
))
Utf8
in
let
lexbuf
=
let
closed
=
ref
false
in
Sedlexing
.
create
(
fun
arr
pos
_num
->
if
!
closed
then
raise
End_of_file
;
try
let
next
cs
=
Stream
.
next
cs
in
Bytes
.
set
uchars
0
'\000'
;
Bytes
.
set
uchars
1
'\000'
;
Bytes
.
set
uchars
2
'\000'
;
...
...
@@ -85,7 +74,9 @@ let mk_lexbuf enc cs =
in
arr
.
(
pos
)
<-
codepoint
;
1
with
Stream
.
Failure
->
0
)
with
Stream
.
Failure
when
not
!
closed
->
closed
:=
true
;
0
)
in
Sedlexing
.
set_position
lexbuf
Lexing
.{
pos_fname
=
""
;
pos_lnum
=
1
;
pos_bol
=
0
;
pos_cnum
=
0
};
...
...
@@ -98,8 +89,6 @@ let mk_lexbuf enc cs =
let
include_stack
=
ref
[]
let
pre_prog
=
for_sedlex
Parser
.
prog
let
close_in
ic
=
try
close_in
ic
with
_
->
()
let
exit_include
ic
=
...
...
@@ -161,6 +150,69 @@ let rec token enc lexbuf =
in
f
and
incremental
parser
token
=
let
open
Parser
.
MenhirInterpreter
in
let
init
=
parser
Lexing
.{
dummy_pos
with
pos_lnum
=
1
;
pos_bol
=
0
;
pos_cnum
=
0
}
in
let
last_token
=
ref
(
Parser
.
EOI
,
Lexing
.
dummy_pos
,
Lexing
.
dummy_pos
)
in
let
last_checkpoint
=
ref
init
in
let
par_stack
=
ref
[]
in
let
rec
loop
checkpoint
=
match
checkpoint
with
|
InputNeeded
_
->
last_checkpoint
:=
checkpoint
;
last_token
:=
token
()
;
begin
match
(
!
last_token
,
!
par_stack
)
with
|
((
LP
|
LSB
|
LCB
)
,
_
,
_
)
,
_
->
par_stack
:=
!
last_token
::
!
par_stack
|
(((
RP
|
RSB
|
RCB
)
as
b
)
,
_
,
_
)
,
(
t
,
_
,
_
)
::
rest
when
t
=
b
->
par_stack
:=
rest
|
_
->
()
(* will yield an error*)
end
;
loop
(
offer
checkpoint
!
last_token
)
|
Shifting
_
|
AboutToReduce
_
->
loop
(
resume
checkpoint
)
|
Accepted
v
->
v
|
Rejected
->
raise
Parser
.
Error
|
HandlingError
env
->
let
last_token
,
last_spos
,
last_epos
=
!
last_token
in
let
has_open
,
candidates
=
List
.
fold_left
(
fun
(
cp
,
acc
)
(
tok
,
stok
)
->
match
(
tok
,
!
par_stack
,
acceptable
!
last_checkpoint
tok
last_spos
)
with
|
RP
,
(
LP
,
_
,
_
)
::
_
,
true
|
RCB
,
(
LCB
,
_
,
_
)
::
_
,
true
->
(
true
,
stok
::
acc
)
|
RSB
,
(
LSB
,
_
,
_
)
::
_
,
true
->
(
true
,
stok
::
acc
)
|
_
,
_
,
true
->
(
cp
,
stok
::
acc
)
|
_
,
_
,
false
->
(
cp
,
acc
)
|
exception
_
->
(
cp
,
acc
))
(
false
,
[]
)
Parse_util
.
all_tokens
in
let
i
,
j
=
(
last_spos
.
Lexing
.
pos_cnum
,
last_epos
.
Lexing
.
pos_cnum
)
in
let
msg
=
Format
.
asprintf
"invalid token ``%s''"
(
Parse_util
.
string_of_token
last_token
)
^
if
has_open
then
let
op
,
i
,
j
=
List
.
hd
!
par_stack
in
let
i
,
j
=
(
i
.
Lexing
.
pos_cnum
,
i
.
Lexing
.
pos_cnum
)
in
Format
.
asprintf
"@
\n
%aThe opening parenthesis ``%s'' might be unmatched"
(
fun
fmt
l
->
Cduce_loc
.
print_loc
fmt
(
l
,
`Full
))
(
Cduce_loc
.
loc_of_pos
(
i
,
j
))
(
Parse_util
.
string_of_token
op
)
else
Format
.
asprintf
"%a"
Parse_util
.
expect_message
candidates
in
Cduce_loc
.
raise_loc
i
j
(
Ast
.
Parsing_error
msg
)
in
loop
init
and
pre_prog
lb
=
incremental
Parser
.
Incremental
.
prog
lb
let
rec
sync
f
=
match
!
last_tok
with
|
Parser
.
EOI
|
Parser
.
SEMISEMI
->
()
...
...
@@ -175,7 +227,9 @@ let get_loc lexbuf =
(
loc1
.
Lexing
.
pos_cnum
,
loc2
.
Lexing
.
pos_cnum
)
let
protect_parser
?
global_enc
do_sync
gram
stream
=
let
enc
=
match
global_enc
with
Some
e
->
e
|
None
->
ref
default_encoding
in
let
enc
=
match
global_enc
with
Some
e
->
e
|
None
->
ref
default_encoding
in
let
b
=
mk_lexbuf
enc
stream
in
try
let
f
=
token
enc
b
in
...
...
@@ -203,7 +257,8 @@ let protect_parser ?global_enc do_sync gram stream =
let
prog
=
protect_parser
false
pre_prog
let
top_phrases
=
protect_parser
~
global_enc
:
(
ref
default_encoding
)
true
(
for_sedlex
Parser
.
top_phrases
)
protect_parser
~
global_enc
:
(
ref
default_encoding
)
true
(
incremental
Parser
.
Incremental
.
top_phrases
)
let
protect_exn
f
g
=
try
...
...
@@ -216,4 +271,17 @@ let protect_exn f g =
let
sync
()
=
()
let
()
=
Printexc
.
record_backtrace
true
let
dump_tokens
fmt
cs
=
let
enc
=
ref
default_encoding
in
let
b
=
mk_lexbuf
enc
cs
in
let
f
=
token
enc
b
in
let
f
()
=
let
t
,
_
,
_
=
f
()
in
t
in
let
token
=
ref
(
f
()
)
in
while
!
token
!=
Parser
.
EOI
do
Format
.
fprintf
fmt
"%a "
(
Parse_util
.
pp_token
~
content
:
false
)
!
token
;
token
:=
f
()
done
;
Format
.
fprintf
fmt
"%a
\n
%!"
(
Parse_util
.
pp_token
~
content
:
false
)
!
token
lang/parser/parse.mli
View file @
deb43e17
...
...
@@ -3,3 +3,5 @@ val top_phrases : char Stream.t -> Ast.pmodule_item list
val
prog
:
char
Stream
.
t
->
Ast
.
pmodule_item
list
val
sync
:
unit
->
unit
val
dump_tokens
:
Format
.
formatter
->
char
Stream
.
t
->
unit
\ No newline at end of file
lang/parser/parse_util.ml
0 → 100644
View file @
deb43e17
let
pp_token
?
(
content
=
false
)
fmt
t
=
let
pp
=
Format
.
fprintf
fmt
in
let
open
Parser
in
match
t
with
|
AMP
->
pp
"%s"
"AMP"
|
AMPAMP
->
pp
"%s"
"AMPAMP"
|
AND
->
pp
"%s"
"AND"
|
ANY_IN_NS
s
->
pp
"%s"
"ANY_IN_NS"
;
if
content
then
pp
"(
\"
%s
\"
)"
s
|
AT
->
pp
"%s"
"AT"
|
BANG
->
pp
"%s"
"BANG"
|
BANGEQ
->
pp
"%s"
"BANGEQ"
|
BAR
->
pp
"%s"
"BAR"
|
BARBAR
->
pp
"%s"
"BARBAR"
|
BQUOTE
->
pp
"%s"
"BQUOTE"
|
COLCOL
->
pp
"%s"
"COLCOL"
|
COLEQ
->
pp
"%s"
"COLEQ"
|
COLON
->
pp
"%s"
"COLON"
|
COMMA
->
pp
"%s"
"COMMA"
|
DEBUG
->
pp
"%s"
"DEBUG"
|
DIV
->
pp
"%s"
"DIV"
|
DOT
->
pp
"%s"
"DOT"
|
DOTDOT
->
pp
"%s"
"DOTDOT"
|
ELSE
->
pp
"%s"
"ELSE"
|
EOI
->
pp
"%s"
"EOI"
|
EQ
->
pp
"%s"
"EQ"
|
EQQMARK
->
pp
"%s"
"EQQMARK"
|
FROM
->
pp
"%s"
"FROM"
|
FUN
->
pp
"%s"
"FUN"
|
GT
->
pp
"%s"
"GT"
|
GTEQ
->
pp
"%s"
"GTEQ"
|
GTGT
->
pp
"%s"
"GTGT"
|
HASH_ASCII
->
pp
"%s"
"HASH_ASCII"
|
HASH_DIRECTIVE
s
->
pp
"%s"
"HASH_DIRECTIVE"
;
if
content
then
pp
"(
\"
%s
\"
)"
s
|
HASH_DUMP_VALUE
->
pp
"%s"
"HASH_DUMP_VALUE"
|
HASH_LATIN1
->
pp
"%s"
"HASH_LATIN1"
|
HASH_PRINT_TYPE
->
pp
"%s"
"HASH_PRINT_TYPE"
|
HASH_UTF8
->
pp
"%s"
"HASH_UTF8"
|
IDENT
s
->
pp
"%s"
"IDENT"
;
if
content
then
pp
"(
\"
%s
\"
)"
s
|
IF
->
pp
"%s"
"IF"
|
IN
->
pp
"%s"
"IN"
|
INCLUDE
->
pp
"%s"
"INCLUDE"
|
INT
s
->
pp
"%s"
"INT"
;
if
content
then
pp
"(
\"
%s
\"
)"
s
|
LCB
->
pp
"%s"
"LCB"
|
LET
->
pp
"%s"
"LET"
|
LP
->
pp
"%s"
"LP"
|
LSB
->
pp
"%s"
"LSB"
|
LT
->
pp
"%s"
"LT"
|
LTEQ
->
pp
"%s"
"LTEQ"
|
LTLT
->
pp
"%s"
"LTLT"
|
MAP
->
pp
"%s"
"MAP"
|
MATCH
->
pp
"%s"
"MATCH"
|
MINUS
->
pp
"%s"
"MINUS"
|
MINUSGT
->
pp
"%s"
"MINUSGT"
|
MINUSMINUS
->
pp
"%s"
"MINUSMINUS"
|
MOD
->
pp
"%s"
"MOD"
|
NAMESPACE
->
pp
"%s"
"NAMESPACE"
|
OFF
->
pp
"%s"
"OFF"
|
ON
->
pp
"%s"
"ON"
|
OPEN
->
pp
"%s"
"OPEN"
|
OR
->
pp
"%s"
"OR"
|
PLUS
->
pp
"%s"
"PLUS"
|
PLUSQMARK
->
pp
"%s"
"PLUSQMARK"
|
QMARK
->
pp
"%s"
"QMARK"
|
QMARKQMARK
->
pp
"%s"
"QMARKQMARK"
|
RCB
->
pp
"%s"
"RCB"
|
REF
->
pp
"%s"
"REF"
|
RESOLVED_INCLUDE
_
->
pp
"%s"
"RESOLVED_INCLUDE"
;
if
content
then
pp
"%s"
"([ ... ])"
|
RP
->
pp
"%s"
"RP"
|
RSB
->
pp
"%s"
"RSB"
|
SCHEMA
->
pp
"%s"
"SCHEMA"
|
SELECT
->
pp
"%s"
"SELECT"
|
SEMI
->
pp
"%s"
"SEMI"
|
SEMISEMI
->
pp
"%s"
"SEMISEMI"
|
SETMINUS
->
pp
"%s"
"SETMINUS"
|
SLASH
->
pp
"%s"
"SLASH"
|
SLASHAT
->
pp
"%s"
"SLASHAT"
|
SLASHSLASH
->
pp
"%s"
"SLASHSLASH"
|
STAR
->
pp
"%s"
"STAR"
|
STARQMARK
->
pp
"%s"
"STARQMARK"
|
STARSTAR
->
pp
"%s"
"STARSTAR"
|
STRING1
s
->
pp
"%s"
"STRING1"
;
if
content
then
pp
"(
\"
%s
\"
)"
s
|
STRING2
s
->
pp
"%s"
"STRING2"
;
if
content
then
pp
"(
\"
%s
\"
)"
s
|
THEN
->
pp
"%s"
"THEN"
|
TRANSFORM
->
pp
"%s"
"TRANSFORM"
|
TRY
->
pp
"%s"
"TRY"
|
TYPE
->
pp
"%s"
"TYPE"
|
UNDERSCORE
->
pp
"%s"
"UNDERSCORE"
|
USING
->
pp
"%s"
"USING"
|
VALIDATE
->
pp
"%s"
"VALIDATE"
|
WHERE
->
pp
"%s"
"WHERE"
|
WITH
->
pp
"%s"
"WITH"
|
XTRANSFORM
->
pp
"%s"
"XTRANSFORM"
let
all_tokens
=
Parser
.
[
(
AMP
,
"&"
);
(
AMPAMP
,
"&&"
);
(
AND
,
"and"
);
(
ANY_IN_NS
"ns"
,
".:*"
);
(
AT
,
"@"
);
(
BANG
,
"!"
);
(
BANGEQ
,
"!="
);
(
BAR
,
"|"
);
(
BARBAR
,
"||"
);
(
BQUOTE
,
"`"
);
(
COLCOL
,
"::"
);
(
COLEQ
,
":="
);
(
COLON
,
":"
);
(
COMMA
,
","
);
(
DEBUG
,
"debug"
);
(
DIV
,
"div"
);
(
DOT
,
"."
);
(
DOTDOT
,
".."
);
(
ELSE
,
"else"
);
(
EOI
,
"the end of input"
);
(
EQ
,
"="
);
(
EQQMARK
,
"=?"
);
(
FROM
,
"from"
);
(
FUN
,
"fun"
);
(
GT
,
">"
);
(
GTEQ
,
">="
);
(
GTGT
,
">>"
);
(
HASH_ASCII
,
"#ascii"
);
(
HASH_DIRECTIVE
"#quiet"
,
"#quiet"
);
(
HASH_DUMP_VALUE
,
"#dump_value"
);
(
HASH_LATIN1
,
"#latin1"
);
(
HASH_PRINT_TYPE
,
"#print_type"
);
(
HASH_UTF8
,
"#utf8"
);
(
IDENT
"x"
,
"a variable"
);
(
IF
,
"if"
);
(
IN
,
"in"
);
(
INCLUDE
,
"include"
);
(
INT
"42"
,
"an integer"
);
(
LCB
,
"{"
);
(
LET
,
"let"
);
(
LP
,
"("
);
(
LSB
,
"["
);
(
LT
,
"<"
);
(
LTEQ
,
"<="
);
(
LTLT
,
"<<"
);
(
MAP
,
"map"
);
(
MATCH
,
"match"
);
(
MINUS
,
"-"
);
(
MINUSGT
,
"->"
);
(
MINUSMINUS
,
"--"
);
(
MOD
,
"mod"
);
(
NAMESPACE
,
"namespace"
);
(
OFF
,
"off"
);
(
ON
,
"on"
);
(
OPEN
,
"open"
);
(
OR
,
"or"
);
(
PLUS
,
"+"
);
(
PLUSQMARK
,
"+?"
);
(
QMARK
,
"?"
);
(
QMARKQMARK
,
"??"
);
(
RCB
,
"]"
);
(
REF
,
"ref"
);
(* (this is a pseudo token, don't consider it a candidate)
(RESOLVED_INCLUDE [], "");
*)
(
RP
,
")"
);
(
RSB
,
"]"
);
(
SCHEMA
,
"schema"
);
(
SELECT
,
"select"
);
(
SEMI
,
";"
);
(
SEMISEMI
,
";;"
);
(
SETMINUS
,
"
\\
"
);
(
SLASH
,
"/"
);
(
SLASHAT
,
"/@"
);
(
SLASHSLASH
,
"//"
);
(
STAR
,
"*"
);
(
STARQMARK
,
"?"
);
(
STARSTAR
,
"**"
);
(
STRING1
"hello"
,
"a character literal"
);
(
STRING2
"hello"
,
"a string literal"
);
(
THEN
,
"then"
);
(
TRANSFORM
,
"transform"
);
(
TRY
,
"try"
);
(
TYPE
,
"type"
);
(
UNDERSCORE
,
"_"
);
(
USING
,
"using"
);
(
VALIDATE
,
"validate"
);
(
WHERE
,
"where"
);
(
WITH
,
"with"
);
(
XTRANSFORM
,
"xtransform"
);
]
let
escape_string
s
=
let
b
=
Buffer
.
create
(
String
.
length
s
)
in
let
rec
loop
idx
end_
us
=
if
idx
=
end_
then
Buffer
.
contents
b
else
let
cp
,
nidx
=
Encodings
.
Utf8
.
next
us
idx
in
let
()
=
match
cp
with
|
10
->
Buffer
.
add_string
b
"
\\
n"
|
9
->
Buffer
.
add_string
b
"
\\
t"
|
13
->
Buffer
.
add_string
b
"
\\
r"
|
_
when
cp
<
32
||
cp
>
127
->
Buffer
.
add_char
b
'\\'
;
Buffer
.
add_string
b
(
string_of_int
cp
)
|
_
->
Buffer
.
add_char
b
(
Char
.
unsafe_chr
cp
)
in
loop
nidx
end_
us
in
let
us
=
Encodings
.
Utf8
.
mk
s
in
loop
(
Encodings
.
Utf8
.
start_index
us
)
(
Encodings
.
Utf8
.
end_index
us
)
us
let
string_of_token
tok
=
let
open
Parser
in
match
tok
with
|
ANY_IN_NS
n
->
n
^
":*"
|
HASH_DIRECTIVE
n
->
"n"
|
INT
i
->
i
|
STRING1
s
->
"'"
^
escape_string
s
^
"'"
|
STRING2
s
->
"
\"
"
^
escape_string
s
^
"
\"
"
|
RESOLVED_INCLUDE
_
->
""
|
_
->
List
.
assoc
tok
all_tokens
let
text_of_token
tok
=
try
List
.
assoc
tok
all_tokens
with
Not_found
->
""
let
expect_message
fmt
l
=
let
l
=
List
.
sort
compare
l
in
match
l
with
|
[]
->
()
|
[
t
]
->
Format
.
fprintf
fmt
"@
\n
Expecting token ``%s''"
t
|
f
::
rest
->
let
rec
loop
l
=
match
l
with
|
[]
->
()
|
[
t
]
->
Format
.
fprintf
fmt
" or ``%s''"
t
|
t
::
ll
->
Format
.
fprintf
fmt
", ``%s''"
t
;
loop
ll
in
Format
.
fprintf
fmt
"@
\n
Expecting token ``%s''"
f
;
loop
rest
lang/parser/parser.mly
View file @
deb43e17
...
...
@@ -57,7 +57,7 @@ let seq_of_string s =
let
parse_char
loc
s
=
match
seq_of_string
s
with
|
[
c
]
->
c
|
_
->
parsing_error
loc
(
Format
.
sprintf
"
I
nvalid character litteral '%s'"
s
)
|
_
->
parsing_error
loc
(
Format
.
sprintf
"
i
nvalid character litteral '%s'"
s
)
let
mk_rec_field
loc
lab
def
=
let
o
,
x
,
y
=
...
...
@@ -186,7 +186,11 @@ let id_dummy = U.mk "$$$"
%
start
<
Ast
.
pprog
>
prog
%
start
<
Ast
.
pprog
>
top_phrases
%
type
<
Ast
.
ppat
>
pat
%
type
<
Ast
.
ppat
>
pat
%
start
<
Ast
.
ppat
>
parse_pat
%
start
<
Ast
.
pexpr
>
parse_expr
%
start
<
Ast
.
pmodule_item
>
parse_pmodule_item
%%
/*
Macros
*/
...
...
@@ -201,6 +205,16 @@ x=X { mk $sloc x }
;
/*
Toplevel
definitions
*/
parse_pat
:
p
=
pat
EOI
{
p
}
;
parse_expr
:
e
=
expr
EOI
{
e
}
;
parse_pmodule_item
:
pi
=
loc
(
prog_item_
)
EOI
{
pi
}
;
top_phrases
:
...
...
@@ -265,7 +279,7 @@ match n with
|
"sample"
,
[
t
]
->
`Sample
t
|
"subtype"
,
[
t1
;
t2
]
->
`Subtype
(
t1
,
t2
)
|
"single"
,
[
t
]
->
`Single
t
|
_
->
parsing_error
$
loc
(
d
)
(
Format
.
sprintf
"
I
nvalid debug directive %s"
d
)
|
_
->
parsing_error
$
loc
(
d
)
(
Format
.
sprintf
"
i
nvalid debug directive %s"
d
)
in
Directive
(
`Debug
dir
)
}
|
d
=
HASH_DIRECTIVE
{
...
...
@@ -277,7 +291,7 @@ match n with
|
"#reinit_ns"
->
`Reinit_ns
|
"#help"
->
`Help
|
"#builtins"
->
`Builtins
|
_
->
parsing_error
$
loc
(
d
)
(
Format
.
sprintf
"
I
nvalid toplevel directive %s"
d
)
|
_
->
parsing_error
$
loc
(
d
)
(
Format
.
sprintf
"
i
nvalid toplevel directive %s"
d
)
in
Directive
dir
}
|
HASH_PRINT_TYPE
t
=
pat
{
Directive
(
`Print_type
t
)
}
...
...
@@ -348,7 +362,7 @@ constr_pat:
let
i
=
V
.
mk
i
and
j
=
V
.
mk
j
in
mk
$
sloc
(
Internal
(
Types
.
interval
(
bounded
i
j
)))
|
None
,
None
->
parsing_error
$
sloc
(
Format
.
sprintf
"
I
nvalid interval *--*"
)
(
Format
.
sprintf
"
i
nvalid interval *--*"
)
}
|
i
=
INT
{
let
open
Intervals
in
...
...
@@ -536,13 +550,13 @@ regexp_and:
match
x
,
y
with
Elem
x
,
Elem
y
->
Elem
(
mk
$
sloc
(
And
(
x
,
y
)))
|
_
->
parsing_error
$
sloc
"
C
onjunction not allowed in regular expression"
"
c
onjunction not allowed in regular expression"
}
|
x
=
regexp_and
SETMINUS
y
=
regexp_acc
{
match
x
,
y
with
Elem
x
,
Elem
y
->
Elem
(
mk
$
sloc
(
Diff
(
x
,
y
)))
|
_
->
parsing_error
$
sloc
"
D
ifference not allowed in regular expression"
"
d
ifference not allowed in regular expression"
}
|
r
=
regexp_acc
{
r
}
...
...
@@ -570,16 +584,16 @@ regexp_simple:
try
let
i
=
int_of_string
i
in
if
(
i
>
1024
)
then
raise
Exit
else
i
with
Failure
_
|
Exit
->
parsing_error
$
loc
(
i
)
"
R
epetition number too large"
with
Failure
_
|
Exit
->
parsing_error
$
loc
(
i
)
"
r
epetition number too large"
in
if
i
<=
0
then
parsing_error
$
sloc
"
R
epetition number must be a positive integer"
;
if
i
<=
0
then
parsing_error
$
sloc
"
r
epetition number must be a positive integer"
;
aux
i
Epsilon
}
|
"("
x
=
separated_nonempty_list
(
","
,
regexp
)
")"
{
match
x
with
[
x
]
->
x
|
_
->
let
x
=
List
.
map
(
function
Elem
x
->
x
|
_
->
parsing_error
$
sloc
"
M
ixing regular expressions and products"
)
x
parsing_error
$
sloc
"
m
ixing regular expressions and products
is not allowed
"
)
x
in
Elem
(
multi_prod
$
sloc
x
)
}
...
...
@@ -632,7 +646,7 @@ namespace_binding:
|
_
,
(
`Path
_
as
path
)
->
`Prefix
(
ident
name
,
path
)
|
"on"
,
`Empty
->
`Keep
true
|
"off"
,
`Empty
->
`Keep
false
|
_
->
parsing_error
$
sloc
"
I
nvalid namespace specification"
|
_
->
parsing_error
$
sloc
"
i
nvalid namespace specification"
}
;
...
...
lang/parser/sedlexer.ml
View file @
deb43e17
...
...
@@ -11,7 +11,7 @@ let string_buff = Buffer.create 1024
let
store_lexeme
lexbuf
=
let
s
=
L
.
lexeme
lexbuf
in
for
i
=
0
to
Array
.
length
s
-
1
do
Encodings
.
Utf8
.
store
string_buff
(
Uchar
.
to_int
s
.
(
i
))
;
Encodings
.
Utf8
.
store
string_buff
(
Uchar
.
to_int
s
.
(
i
))
done
let
store_ascii
=
Buffer
.
add_char
string_buff
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment