Commit 36b83c45 authored by Kim Nguyễn's avatar Kim Nguyễn

Fix the handling of polymorphic variables in the lexer. The solution

to use two lexers (depending on whether we are between square brackets
or not) is too brittle (it crudely tries to parse
 ``( [whitespace] 'a  [whitespace] )'' as a variable, to force the user
to write the variable beetween parenthesis. However this does not scale
to types with two arguments (says [ t ('a, 'b) ]).

We use a simpler heuristic (with look ahead)

(1) try to see if the regular expression

' (anything but ', \n)* '(anything but the first letter of an identifier)

can be found. If so, we put back the lexeme in the buffer and parse it as as
a string.

(2) if (1) failed, try to parse it as a variable

(3) if (3) failed, try to parse it again as a string. We are
guaranteed to fail here but it means we have a malformed string, so we
parse as a string to get a proper error message.

The only thing this does not cover are cases like
type t = [ 'abcd'Int ]
which was tokenized before as [, 'abcd', Int, ]
and is now tokenized as [, 'abcd, 'Int, ]
It does not seem to be a problem in practice though (since in the code
I have seen thus far, people were at least putting a space).
it is easy to emmit a warning in this case, suggesting the user to add
a whitespace to get the old behaviour back.
parent 77556e8b
......@@ -325,7 +325,6 @@ let catch_exn ppf_err exn =
Format.fprintf ppf_err "@."
let parse rule input =
Ulexer.toplevel := !toplevel;
try Parser.localize_exn (fun () -> rule input)
with e -> Parser.sync (); raise e
......@@ -408,4 +407,3 @@ let () =
| [ (None,v) ] -> v
| _ -> Value.failwith' "eval: the string must evaluate to a single value"
)
......@@ -110,8 +110,6 @@ let is_fun_decl =
(fun strm ->
match Stream.npeek 3 strm with
| [ KEYWORD "fun", _; IDENT _, _; KEYWORD "(", _ ]
| [ KEYWORD "fun", _; IDENTPAR _, _; _ ]
| [ IDENTPAR _, _; _ ; _ ]
| [ IDENT _, _; KEYWORD "(", _; _ ] -> ()
| _ -> raise Stream.Failure
)
......@@ -159,12 +157,12 @@ EXTEND Gram
args = OPT [ "("; l = LIST1 [ v = PVAR -> U.mk (clean_pvar v) ] SEP ","; ")" -> l ];
"="; t = pat ->
[ mk _loc (TypeDecl (x, opt_to_list args ,t)) ]
| "using"; name = IDENT; "="; cu = [ x = IDENT -> x | x = STRING -> x ] ->
| "using"; name = IDENT; "="; cu = [ x = IDENT -> x | x = STRING2 -> x ] ->
[ mk _loc (Using (U.mk name, U.mk cu)) ]
| "open"; ids = LIST1 ident_or_keyword SEP "." ->
let ids = List.map (fun x -> ident x) ids in
[ mk _loc (Open ids) ]
| "schema"; name = IDENT; "="; uri = STRING ->
| "schema"; name = IDENT; "="; uri = STRING2 ->
protect_op "schema";
[ mk _loc (SchemaDecl (U.mk name, uri)) ]
| n = namespace_binding ->
......@@ -194,7 +192,7 @@ EXTEND Gram
| "#"; IDENT "help" -> [ mk _loc (Directive (`Help None)) ]
| "#"; IDENT "help"; "debug" -> [ mk _loc (Directive (`Help (Some "debug"))) ]
| "#"; IDENT "builtins" -> [ mk _loc (Directive `Builtins) ]
| "include"; s = STRING ->
| "include"; s = STRING2 ->
protect_op "File inclusion";
let s = Cduce_loc.resolve_filename s in
(* avoid looping; should issue an error ? *)
......@@ -392,7 +390,7 @@ EXTEND Gram
a = expr_attrib_spec; ">"; c = expr ->
exp _loc (Xml (t, Pair (a,c)))
| "{"; r = expr_record_spec; "}" -> r
| s = STRING ->
| s = STRING2 ->
let s = U.mk s in
exp _loc (String (U.start_index s, U.end_index s, s, cst_nil))
| a = IDENT -> exp _loc (Var (ident a))
......@@ -414,7 +412,7 @@ EXTEND Gram
];
seq_elem: [
[ x = STRING2 ->
[ x = STRING1 ->
let s = U.mk x in
`String (_loc, U.start_index s, U.end_index s, s)
| e = expr LEVEL "no_appl" -> `Elems (_loc,e)
......@@ -434,7 +432,7 @@ EXTEND Gram
];
ns_expr: [
[ uri = STRING -> `Uri (Ns.Uri.mk (ident uri))
[ uri = STRING2 -> `Uri (Ns.Uri.mk (ident uri))
| ids = LIST1 ident_or_keyword SEP "." ->
let ids = List.map (fun x -> ident x) ids in
`Path ids ]
......@@ -600,11 +598,11 @@ EXTEND Gram
Elem (mk _loc (Constant ((ident a,c))))
| "/"; p = pat LEVEL "simple" -> Guard p
| IDENT "PCDATA" -> string_regexp
| i = STRING2; "--"; j = STRING2 ->
| i = STRING1; "--"; j = STRING1 ->
let i = Chars.V.mk_int (parse_char _loc i)
and j = Chars.V.mk_int (parse_char _loc j) in
Elem (mk _loc (Internal (Types.char (Chars.char_class i j))))
| s = STRING2 ->
| s = STRING1 ->
List.fold_right
(fun c accu ->
let c = Chars.V.mk_int c in
......@@ -693,7 +691,7 @@ EXTEND Gram
| "("; t = pat; ")" -> t ];
a = attrib_spec; ">"; c = pat ->
mk _loc (XmlT (t, multi_prod _loc [a;c]))
| s = STRING ->
| s = STRING2 ->
let s =
List.map
(fun c ->
......@@ -731,10 +729,9 @@ EXTEND Gram
]
];
char: [
[ c = CHAR -> Chars.V.mk_int (parse_char _loc c)
| c = STRING2 -> Chars.V.mk_int (parse_char _loc c) ]
char:
[
[ c = STRING1 -> Chars.V.mk_int (parse_char _loc c) ]
];
......
This diff is collapsed.
......@@ -5,17 +5,13 @@ type token =
| IDENT of string
| ANY_IN_NS of string
| INT of string
| CHAR of string
| STRING of string
| STRING1 of string
| STRING2 of string
| PVAR of string
| IDENTPAR of string
| EOI
exception Error of int * int * string
val toplevel : bool ref
module Loc : Loc with type t = int * int
module Token : Token with module Loc = Loc and type t = token
module Error : Error
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment