Commit b9bc28b8 authored by Kim Nguyễn's avatar Kim Nguyễn
Browse files

Fix handling of included files and remove old work around to better fit with menhir/sedlex.

parent 2b4ade16
......@@ -239,7 +239,6 @@ let debug ppf tenv cenv = function
let flush_ppf ppf = Format.fprintf ppf "@."
let directive ppf tenv cenv = function
| `Ignore -> ()
| `Debug d ->
debug ppf tenv cenv d
| `Quit ->
......
......@@ -38,7 +38,6 @@ and toplevel_directive =
| `Verbose
| `Silent
| `Builtins
| `Ignore
]
......@@ -139,4 +138,4 @@ let if_then_else cond e1 e2 = Match (cond, [pat_true,e1; pat_false,e2])
let logical_and e1 e2 = if_then_else e1 e2 cst_false
let logical_or e1 e2 = if_then_else e1 cst_true e2
let logical_not e = if_then_else e cst_false cst_true
\ No newline at end of file
let logical_not e = if_then_else e cst_false cst_true
......@@ -152,6 +152,7 @@ let id_dummy = U.mk "$$$"
%token <string> STRING2
%token <string> INT
%token <string> HASH_DIRECTIVE
%token <Ast.pprog> RESOLVED_INCLUDE
%token EOI
/* Priorities */
......@@ -189,6 +190,7 @@ let id_dummy = U.mk "$$$"
%inline loc(X):
x=X { mk $sloc x }
;
%inline iloption(X):
{ [] }
| x = X { [x] }
......@@ -203,7 +205,7 @@ e = expr EOI { e }
top_phrases:
| e = multi_expr ";;" { [ mk $sloc (EvalStatement e) ] }
| p = list(prog_item) ";;" { p }
| p = list(prog_item) ";;" { List.concat p }
;
prog:
......@@ -219,27 +221,33 @@ prog:
prog_items:
";;" e = opt_prog_expr { e }
| p = prog_item { [ p ] }
| p = prog_item { p }
;
prog_item:
l = loc(prog_item_) { l }
| item = loc(prog_item_) { [ item ] }
| "include" items = RESOLVED_INCLUDE { items }
| "include" _s = STRING2 { [] }
;
%inline prog_item_:
| l = let_binding {
let f, p, e = l in
if f then FunDecl e else LetDecl (p, e)
| l = let_binding { let f, p, e = l in
if f then FunDecl e else LetDecl (p, e)
}
| n = namespace_binding {
match n with
| `Prefix (name,ns) -> Namespace (name, ns)
| `Keep b -> KeepNs b
| `Prefix (name,ns) -> Namespace (name, ns)
| `Keep b -> KeepNs b
}
| "type" x = ident_or_keyword "=" t = pat {
let id = lop $loc(x), ident x in TypeDecl (id, t) }
let id = lop $loc(x), ident x in TypeDecl (id, t) }
| "using" name = IDENT "=" cu = ident_or_string2 {
Using (U.mk name, U.mk cu) }
Using (U.mk name, U.mk cu)
}
| "open" ids = separated_nonempty_list(".", ident_or_keyword) {
Open (List.map ident ids)
}
......@@ -247,7 +255,6 @@ match n with
Cduce_loc.protect_op "schema";
SchemaDecl (U.mk name, uri)
}
| "include" _s = STRING2 { Directive (`Ignore) }
| "debug" d = IDENT "(" l = nonempty_list (pat) ")" {
let dir = match d, l with
"filter", [t; p] -> `Filter(t, p)
......
......@@ -62,10 +62,6 @@ let illegal lexbuf =
let in_comment = ref false
let return lexbuf tok = (tok, L.loc lexbuf)
let return_loc i j tok = (tok, (i, j))
let ident_or_keyword =
let l =
[
......@@ -108,70 +104,66 @@ let ident_or_keyword =
let rec token lexbuf =
match%sedlex lexbuf with
| Plus xml_blank -> token lexbuf
| qname ->
let s = L.Utf8.lexeme lexbuf in
return lexbuf (ident_or_keyword s)
| "_" -> return lexbuf (UNDERSCORE)
| "#print_type" -> return lexbuf HASH_PRINT_TYPE
| "#dump_value" -> return lexbuf HASH_DUMP_VALUE
| "#", qname -> return lexbuf
(HASH_DIRECTIVE (L.Utf8.lexeme lexbuf))
| qname -> ident_or_keyword (L.Utf8.lexeme lexbuf)
| "_" -> UNDERSCORE
| "#print_type" -> HASH_PRINT_TYPE
| "#dump_value" -> HASH_DUMP_VALUE
| "#", qname -> HASH_DIRECTIVE (L.Utf8.lexeme lexbuf)
| ncname, ":*" ->
let s = L.Utf8.sub_lexeme lexbuf 0 (L.lexeme_length lexbuf - 2) in
return lexbuf (ANY_IN_NS s)
| ".:*" -> return lexbuf (ANY_IN_NS "")
| Opt '-', Plus '0' .. '9' -> return lexbuf (INT (L.Utf8.lexeme lexbuf))
| "(" -> return lexbuf LP
| ")" -> return lexbuf RP
| "[" -> return lexbuf LSB
| "]" -> return lexbuf RSB
| "<" -> return lexbuf LT
| ">" -> return lexbuf GT
| "{" -> return lexbuf LCB
| "}" -> return lexbuf RCB
| ":" -> return lexbuf COLON
| "," -> return lexbuf COMMA
| "?" -> return lexbuf QMARK
| "=" -> return lexbuf EQ
| "+" -> return lexbuf PLUS
| "-" -> return lexbuf MINUS
| "@" -> return lexbuf AT
| "|" -> return lexbuf BAR
| "." -> return lexbuf DOT
| "`" -> return lexbuf BQUOTE
| "!" -> return lexbuf BANG
| "\\" -> return lexbuf SETMINUS
| "*" -> return lexbuf STAR
| "&" -> return lexbuf AMP
| "/" -> return lexbuf SLASH
| ";" -> return lexbuf SEMI
| ":=" -> return lexbuf COLEQ
| "->" -> return lexbuf MINUSGT
| "<=" -> return lexbuf LTEQ
| "<<" -> return lexbuf LTLT
| ">>" -> return lexbuf GTGT
| ">=" -> return lexbuf GTEQ
| "!=" -> return lexbuf BANGEQ
| "&&" -> return lexbuf AMPAMP
| "**" -> return lexbuf STARSTAR
| "/@" -> return lexbuf SLASHAT
| "//" -> return lexbuf SLASHSLASH
| "::" -> return lexbuf COLCOL
| ".." -> return lexbuf DOTDOT
| "--" -> return lexbuf MINUSMINUS
| "??" -> return lexbuf QMARKQMARK
| "+?" -> return lexbuf PLUSQMARK
| "*?" -> return lexbuf STARQMARK
| "=?" -> return lexbuf EQQMARK
| "||" -> return lexbuf BARBAR
| ";;" -> return lexbuf SEMISEMI
ANY_IN_NS s
| ".:*" -> ANY_IN_NS ""
| Opt '-', Plus '0' .. '9' -> INT (L.Utf8.lexeme lexbuf)
| "(" -> LP
| ")" -> RP
| "[" -> LSB
| "]" -> RSB
| "<" -> LT
| ">" -> GT
| "{" -> LCB
| "}" -> RCB
| ":" -> COLON
| "," -> COMMA
| "?" -> QMARK
| "=" -> EQ
| "+" -> PLUS
| "-" -> MINUS
| "@" -> AT
| "|" -> BAR
| "." -> DOT
| "`" -> BQUOTE
| "!" -> BANG
| "\\" -> SETMINUS
| "*" -> STAR
| "&" -> AMP
| "/" -> SLASH
| ";" -> SEMI
| ":=" -> COLEQ
| "->" -> MINUSGT
| "<=" -> LTEQ
| "<<" -> LTLT
| ">>" -> GTGT
| ">=" -> GTEQ
| "!=" -> BANGEQ
| "&&" -> AMPAMP
| "**" -> STARSTAR
| "/@" -> SLASHAT
| "//" -> SLASHSLASH
| "::" -> COLCOL
| ".." -> DOTDOT
| "--" -> MINUSMINUS
| "??" -> QMARKQMARK
| "+?" -> PLUSQMARK
| "*?" -> STARQMARK
| "=?" -> EQQMARK
| "||" -> BARBAR
| ";;" -> SEMISEMI
| '"' | "'" ->
let start = L.lexeme_start lexbuf in
let double_quote = L.Latin1.lexeme_char lexbuf 0 = '"' in
string (L.lexeme_start lexbuf) double_quote lexbuf;
let s = get_stored_string () in
return_loc start (L.lexeme_end lexbuf)
(if double_quote then STRING2 s else STRING1 s)
if double_quote then STRING2 s else STRING1 s
| "(*" ->
in_comment := true;
comment (L.lexeme_start lexbuf) lexbuf;
......@@ -182,7 +174,7 @@ let rec token lexbuf =
tcomment (L.lexeme_start lexbuf) lexbuf;
in_comment := false;
token lexbuf
| eof -> return lexbuf EOI
| eof -> EOI
| any -> illegal lexbuf
| _ -> assert false
......@@ -240,111 +232,15 @@ and string start double lexbuf =
string start double lexbuf
| _ -> assert false
let last_tok = ref (IDENT "$$$DUMMY")
let rec sync lb =
match !last_tok with
| SEMISEMI | EOI -> ()
| _ ->
last_tok := fst (token lb);
sync lb
let raise_clean e =
clear_buff ();
in_comment := false;
(* reinit encoding ? *)
raise e
let token lexbuf =
try
token lexbuf
with e ->
clear_buff ();
in_comment := false;
(* reinit encoding ? *)
raise e
let eat_shebang lexbuf =
match%sedlex lexbuf with Opt ("#!", Star (Compl '\n'), "\n") -> () | _ -> ()
let create_lb cs =
(* Workaround the sedlex functions buffer 512 characters before
propagating them, making it unusable with interactive input *)
let uchars = Bytes.make 4 '\000' in
L.create (fun arr pos _num ->
let module U = Encodings.Utf8 in
try
let next cs = Stream.next cs in
Bytes.set uchars 0 '\000';
Bytes.set uchars 1 '\000';
Bytes.set uchars 2 '\000';
Bytes.set uchars 3 '\000';
let c0 = next cs in
let () =
match c0 with
| '\x00' .. '\x7f' -> Bytes.set uchars 0 c0
| '\xc0' .. '\xdf' ->
Bytes.set uchars 0 c0;
Bytes.set uchars 1 (next cs)
| '\xe0' .. '\xef' ->
Bytes.set uchars 0 c0;
Bytes.set uchars 1 (next cs);
Bytes.set uchars 2 (next cs)
| '\xf0' .. '\xf7' ->
Bytes.set uchars 0 c0;
Bytes.set uchars 1 (next cs);
Bytes.set uchars 2 (next cs);
Bytes.set uchars 3 (next cs)
| _ -> raise L.MalFormed
in
let us = U.mk (Bytes.unsafe_to_string uchars) in
let uc = U.get us (U.start_index us) in
arr.(pos) <- Uchar.unsafe_of_int uc;
1
with Stream.Failure -> 0)
let include_stack = ref []
let mk () _ cs =
let lb = create_lb cs in
eat_shebang lb;
include_stack := (lb, ("", stdin)) :: !include_stack;
let rec next _ =
let rec loop () =
match token (fst (List.hd !include_stack)), !include_stack with
(Cparser.EOI, _), (buf, (_, ic)) :: (_ :: _ as tl) ->
close_in ic;
Cduce_loc.pop_source ();
include_stack := tl;
loop ()
| x,_ -> x
| exception Sedlexing.MalFormed ->
raise_clean
(Error
( Sedlexing.lexeme_end lb,
Sedlexing.lexeme_end lb,
"Unexpected character" ))
| exception Sedlexing.InvalidCodepoint i ->
raise_clean
(Error
( Sedlexing.lexeme_end lb,
Sedlexing.lexeme_end lb,
"Code point invalid for the current encoding" ))
| exception e -> raise_clean e
in
let tok, loc = loop () in
let () =
match !last_tok, tok with
Cparser.INCLUDE, Cparser.STRING2 s ->
Cduce_loc.protect_op "File inclusion";
let s = Cduce_loc.resolve_filename s in
(* avoid looping; should issue an error ? *)
(* it is possible to have looping with x/../x/../x/.. ....
Need to canonicalize filename *)
if not (List.exists (fun (_ ,(name,_)) -> s = name) !include_stack) then begin
Cduce_loc.push_source (`File s);
let ic = open_in s in
let cstream = Stream.of_channel ic in
let buff = create_lb cstream in
eat_shebang buff;
include_stack := (buff, (s, ic)) :: !include_stack;
end
| _ -> ()
in
last_tok := tok;
Some (tok, loc)
in
Stream.from next
......@@ -18,35 +18,113 @@ let for_sedlex parser =
(**)
let mk_lexbuf cs =
(* Workaround the sedlex functions buffer 512 characters before
propagating them, making it unusable with interactive input *)
let uchars = Bytes.make 4 '\000' in
let lexbuf =
Sedlexing.create (fun arr pos _num ->
let module U = Encodings.Utf8 in
try
let next cs = Stream.next cs in
Bytes.set uchars 0 '\000';
Bytes.set uchars 1 '\000';
Bytes.set uchars 2 '\000';
Bytes.set uchars 3 '\000';
let c0 = next cs in
let () =
match c0 with
| '\x00' .. '\x7f' -> Bytes.set uchars 0 c0
| '\xc0' .. '\xdf' ->
Bytes.set uchars 0 c0;
Bytes.set uchars 1 (next cs)
| '\xe0' .. '\xef' ->
Bytes.set uchars 0 c0;
Bytes.set uchars 1 (next cs);
Bytes.set uchars 2 (next cs)
| '\xf0' .. '\xf7' ->
Bytes.set uchars 0 c0;
Bytes.set uchars 1 (next cs);
Bytes.set uchars 2 (next cs);
Bytes.set uchars 3 (next cs)
| _ -> raise Sedlexing.MalFormed
in
let us = U.mk (Bytes.unsafe_to_string uchars) in
let uc = U.get us (U.start_index us) in
arr.(pos) <- Uchar.unsafe_of_int uc;
1
with Stream.Failure -> 0)
in
Sedlexing.set_position lexbuf
Lexing.{ pos_fname = "";
pos_lnum = 1;
pos_bol = 0;
pos_cnum = 0 };
Csedlexer.eat_shebang lexbuf;
lexbuf
let protect_exn f g =
try let x = f () in g (); x
with e -> g (); raise e
let stream_of ic =
Stream.from (fun _ ->
match input_char ic with
c -> Some c
| exception End_of_file -> close_in ic; None
| exception e -> close_in ic; raise e
)
let include_stack = ref []
let pre_prog = for_sedlex Cparser.prog
let rec token lexbuf =
let f = Sedlexing.with_tokenizer Csedlexer.token lexbuf in
let last_tok = ref Cparser.EOI in
let f () =
let tok, p1, p2 = f () in
let tok =
match !last_tok, tok with
| Cparser.INCLUDE, Cparser.STRING2 path ->
Cduce_loc.protect_op "File inclusion";
let path = Cduce_loc.resolve_filename path in
if List.mem path !include_stack then tok
else begin
include_stack := path :: !include_stack;
Cduce_loc.push_source (`File path);
let ic = open_in path in (* TODO Handle error *)
let cs = stream_of ic in
let newlb = mk_lexbuf cs in
let past = pre_prog (token newlb) in
Cduce_loc.pop_source ();
include_stack := List.tl !include_stack;
Cparser.RESOLVED_INCLUDE past
end
| _ -> tok
in
last_tok := tok;
(tok, p1, p2)
in f
let lexbuf_of stream =
let token_stream = Csedlexer.mk () () stream in
let buff = Lexing.from_string "" in
buff,
function (_ : Lexing.lexbuf) ->
let t, (i, j) = Stream.next token_stream in
let pos1 = { Lexing.dummy_pos with pos_cnum = i } in
let pos2 = { Lexing.dummy_pos with pos_cnum = j } in
buff.lex_start_p <- pos1;
buff.lex_curr_p <- pos2;
t
let parser gram stream =
let b, f = lexbuf_of stream in
let protect_parser gram stream =
let b = mk_lexbuf stream in
try
gram f b
gram (token b)
with
Cparser.Error ->
let loc1, loc2 = b.Lexing.lex_start_p.pos_cnum, b.Lexing.lex_curr_p.pos_cnum in
Cduce_loc.raise_loc loc1 loc2 (Error "")
let loc1, loc2 = Sedlexing.lexing_positions b in
Cduce_loc.raise_loc loc1.Lexing.pos_cnum loc2.Lexing.pos_cnum (Error "")
let prog cs = protect_parser pre_prog cs
let top_phrases cs = protect_parser (for_sedlex Cparser.top_phrases) cs
let protect_exn f g =
try let x = f () in g (); x
with e -> g (); raise e
let prog = parser Cparser.prog
let top_phrases = parser Cparser.top_phrases
let sync () = ()
let localize_exn f = (* TODO Location *)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment