Commit f481f019 authored by Julien Lopez's avatar Julien Lopez

Fix everything. Last problem: strings with simple quotes and type variables

coexist inside brackets:
	type t = ['dt' | 'a]
is supposed to be a valid type, but for now the new syntax isn't working inside
brackets, so to write this type use:
	type t = ['dt' | `$a]
parent 3f812175
......@@ -410,17 +410,8 @@ EXTEND Gram
]
];
any: [
[ x = KEYWORD -> x
| x = IDENT -> x
| x = ANY_IN_NS -> x
| x = INT -> x
| x = STRING -> x ]
];
seq_elem: [
[ `SQUOTE; x = LIST0 any; `SQUOTE ->
let x = List.fold_left (fun x y -> x ^ y) "" x in
[ x = STRING2 ->
let s = U.mk x in
`String (_loc, U.start_index s, U.end_index s, s)
| e = expr LEVEL "no_appl" -> `Elems (_loc,e)
......@@ -575,15 +566,11 @@ EXTEND Gram
Elem (mk _loc (Constant ((ident a,c))))
| "/"; p = pat LEVEL "simple" -> Guard p
| IDENT "PCDATA" -> string_regexp
| `SQUOTE; i = LIST0 any; `SQUOTE; "--";
`SQUOTE; j = LIST0 any; `SQUOTE ->
let i = List.fold_left (fun x y -> x ^ y) "" i in
let j = List.fold_left (fun x y -> x ^ y) "" j in
| i = STRING2; "--"; j = STRING2 ->
let i = Chars.V.mk_int (parse_char _loc i)
and j = Chars.V.mk_int (parse_char _loc j) in
Elem (mk _loc (Internal (Types.char (Chars.char_class i j))))
| `SQUOTE; s = LIST0 any; `SQUOTE ->
let s = List.fold_left (fun x y -> x ^ y) "" s in
| s = STRING2 ->
List.fold_right
(fun c accu ->
let c = Chars.V.mk_int c in
......@@ -701,8 +688,7 @@ EXTEND Gram
char:
[
[ `SQUOTE; c = LIST0 any; `SQUOTE ->
let c = List.fold_left (fun x y -> x ^ y) "" c in
[ c = CHAR ->
Chars.V.mk_int (parse_char _loc c) ]
];
......
......@@ -48,7 +48,9 @@ type token =
| IDENT of string
| ANY_IN_NS of string
| INT of string
| CHAR of string
| STRING of string
| STRING2 of string
| SQUOTE
| EOI
......@@ -66,7 +68,9 @@ module Token = struct
| IDENT s -> sf "IDENT %S" s
| INT s -> sf "INT %s" s
| SQUOTE -> sf "'"
| CHAR s -> sf "CHAR \'%s\'" s
| STRING s -> sf "STRING \"%s\"" s
| STRING2 s -> sf "STRING \'%s\'" s
(* here it's not %S since the string is already escaped *)
| ANY_IN_NS s -> sf "ANY_IN_NS %S" s
| EOI -> sf "EOI"
......@@ -80,7 +84,7 @@ module Token = struct
let extract_string =
function
| KEYWORD s | IDENT s | INT s | STRING s |
| KEYWORD s | IDENT s | INT s | CHAR s | STRING s | STRING2 s |
ANY_IN_NS s -> s
| tok ->
invalid_arg ("Cannot extract a string from this token: "^
......@@ -186,6 +190,7 @@ let illegal lexbuf =
"Illegal character"
let in_comment = ref false
let in_brackets = ref 0
let return lexbuf tok = (tok, L.loc lexbuf)
let return_loc i j tok = (tok, (i,j))
......@@ -202,17 +207,25 @@ let rec token = lexer
return lexbuf (ANY_IN_NS "")
| '-'? ['0'-'9']+ ->
return lexbuf (INT (L.utf8_lexeme lexbuf))
| [ "<>=.,:;+-*/@&{}[]()|?`!$" ]
| [ "<>=.,:;+-*/@&{}()|?`!$" ]
| "->" | "::" | ";;" | "--" | "//" | "/@" | ":=" | "\\" | "++"
| "<=" | ">=" | "<<" | ">>" | "||" | "&&" | "**" | "_"
| ".."
| ["?+*"] "?" | "#" ->
return lexbuf (KEYWORD (L.utf8_lexeme lexbuf))
| "[" -> in_brackets := !in_brackets + 1;
return lexbuf (KEYWORD (L.utf8_lexeme lexbuf))
| "]" -> in_brackets := !in_brackets - 1;
return lexbuf (KEYWORD (L.utf8_lexeme lexbuf))
| '"' ->
let start = L.lexeme_start lexbuf in
string (L.lexeme_start lexbuf) '"' lexbuf;
let s = get_stored_string () in
return_loc start (L.lexeme_end lexbuf) (STRING s)
| "'" "\\"? _ "'" ->
let s = L.utf8_lexeme lexbuf in
let s = String.make 1 (s.[String.length s - 2]) in
return lexbuf (CHAR s)
| "'" -> return lexbuf SQUOTE
| "(*" ->
in_comment := true;
......@@ -229,6 +242,67 @@ let rec token = lexer
| _ ->
illegal lexbuf
and token2 = lexer
| xml_blank+ -> token2 lexbuf
| qname ->
let s = L.utf8_lexeme lexbuf in
return lexbuf (IDENT s)
| ncname ":*" ->
let s = L.utf8_sub_lexeme lexbuf 0 (L.lexeme_length lexbuf - 2) in
return lexbuf (ANY_IN_NS s)
| ".:*" ->
return lexbuf (ANY_IN_NS "")
| '-'? ['0'-'9']+ ->
return lexbuf (INT (L.utf8_lexeme lexbuf))
| [ "<>=.,:;+-*/@&{}()|?`!$" ]
| "->" | "::" | ";;" | "--" | "//" | "/@" | ":=" | "\\" | "++"
| "<=" | ">=" | "<<" | ">>" | "||" | "&&" | "**" | "_"
| ".."
| ["?+*"] "?" | "#" ->
return lexbuf (KEYWORD (L.utf8_lexeme lexbuf))
| "[" -> in_brackets := !in_brackets + 1;
return lexbuf (KEYWORD (L.utf8_lexeme lexbuf))
| "]" -> in_brackets := !in_brackets - 1;
return lexbuf (KEYWORD (L.utf8_lexeme lexbuf))
| '"' ->
let start = L.lexeme_start lexbuf in
string (L.lexeme_start lexbuf) '"' lexbuf;
let s = get_stored_string () in
return_loc start (L.lexeme_end lexbuf) (STRING s)
| "'" [^ '\'']+ "'" ->
let s = L.utf8_lexeme lexbuf in
let s = String.sub s 1 (String.length s - 2) in
let s = unescape s (L.lexeme_start lexbuf) in
return lexbuf (STRING2 s)
| "'" -> return lexbuf SQUOTE
| "(*" ->
in_comment := true;
comment (L.lexeme_start lexbuf) lexbuf;
in_comment := false;
token2 lexbuf
| "/*" ->
in_comment := true;
tcomment (L.lexeme_start lexbuf) lexbuf;
in_comment := false;
token2 lexbuf
| eof ->
return lexbuf EOI
| _ ->
illegal lexbuf
and unescape s start =
let rec aux acc start = function
| "" -> acc
| s ->
let len = String.length s in
if s.[0] = '\\' then if len != 1
then aux (acc ^ String.make 1 s.[1]) start (String.sub s 2 (len - 2))
else error start (start+1) "Unterminated string"
else
let tmp = String.sub s 1 (len - 1) in
aux (acc ^ String.make 1 s.[0]) start tmp in
aux "" start s
and comment start = lexer
| "(*" ->
comment (L.lexeme_start lexbuf) lexbuf;
......@@ -274,6 +348,8 @@ and string start endchar = lexer
| eof -> error start (start+1) "Unterminated string"
| _ -> store_lexeme lexbuf; string start endchar lexbuf
let token lexbuf = if !in_brackets = 0 then token lexbuf else token2 lexbuf
let lexbuf = ref None
let last_tok = ref (KEYWORD "DUMMY")
......
......@@ -5,7 +5,9 @@ type token =
| IDENT of string
| ANY_IN_NS of string
| INT of string
| CHAR of string
| STRING of string
| STRING2 of string
| SQUOTE
| EOI
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment