Commit a53cec51 authored by Pietro Abate's avatar Pietro Abate
Browse files

[r2003-05-18 14:42:51 by cvscast] Clean-up

Original author: cvscast
Date: 2003-05-18 14:44:17+00:00
parent 1b66fe0a
......@@ -8,14 +8,14 @@ CLEAN_DIRS = $(DIRS) tools tests
MISC = misc/state.cmo misc/pool.cmo misc/encodings.cmo misc/bool.cmo \
misc/pretty.cmo
PARSER = parser/lexer.cmo parser/location.cmo \
PARSER = parser/location.cmo \
parser/wlexer.cmo \
parser/ast.cmo parser/parser.cmo
TYPING = typing/typed.cmo typing/typer.cmo
TYPES = \
types/sortedList.cmo types/sortedMap.cmo types/boolean.cmo \
types/sortedList.cmo types/boolean.cmo \
types/ident.cmo \
types/intervals.cmo types/chars.cmo types/atoms.cmo \
types/normal.cmo \
......
......@@ -12,7 +12,7 @@ let rec is_abstraction = function
let print_norm ppf d =
Location.protect ppf
(fun ppf -> Types.Print.print_descr ppf ((*Types.normalize*) d))
(fun ppf -> Types.Print.print ppf ((*Types.normalize*) d))
let print_value ppf v =
Location.protect ppf (fun ppf -> Value.print ppf v)
......@@ -120,7 +120,7 @@ let debug ppf = function
Format.fprintf ppf "[DEBUG:accept]@\n";
let p = Typer.pat p in
let t = Patterns.accept p in
Format.fprintf ppf " %a@\n" Types.Print.print t
Format.fprintf ppf " %a@\n" Types.Print.print (Types.descr t)
| `Compile (t,pl) ->
Format.fprintf ppf "[DEBUG:compile]@\n";
let t = Typer.typ t
......
......@@ -67,4 +67,5 @@ let main () =
let () = main (); Types.print_stat ppf_err
let () = main ()
(* Modified from Camlp4 Plexer module *)
open Stdpp
open Token
let no_quotations = ref false
(* The string buffering machinery *)
let buff = ref (String.create 80)
let store len x =
if len >= String.length !buff then
buff := !buff ^ String.create (String.length !buff);
!buff.[len] <- x;
succ len
let mstore len s =
let rec add_rec len i =
if i == String.length s then len else add_rec (store len s.[i]) (succ i)
in
add_rec len 0
let get_buff len = String.sub !buff 0 len
(* The lexer *)
let rec ident len =
parser
[< ''A'..'Z' | 'a'..'z' | '\192'..'\214' | '\216'..'\246' |
'\248'..'\255' | '0'..'9' | '_' | '\'' as c;
s >] ->
ident (store len c) s
| [< >] -> len
and ident2 len =
parser
[< ''!' | '?' | '~' | '=' | '@' | '^' | '&' | '+' | '-' | '*' | '/' |
'%' | '.' | ':' | '<' | '>' | '|' | '$' as c;
s >] ->
ident2 (store len c) s
| [< >] -> len
and ident3 len =
parser
[< ''0'..'9' | 'A'..'Z' | 'a'..'z' | '\192'..'\214' | '\216'..'\246' |
'\248'..'\255' | '_' | '!' | '%' | '&' | '*' | '+' | '-' | '.' | '/' |
':' | '<' | '=' | '>' | '?' | '@' | '^' | '|' | '~' | '\'' | '$' as c;
s >] ->
ident3 (store len c) s
| [< >] -> len
and base_number len =
parser
[< ''o' | 'O'; s >] -> octal_digits (store len 'o') s
| [< ''x' | 'X'; s >] -> hexa_digits (store len 'x') s
| [< ''b' | 'B'; s >] -> binary_digits (store len 'b') s
| [< a = number len >] -> a
and octal_digits len =
parser
[< ''0'..'7' as d; s >] -> octal_digits (store len d) s
| [< >] -> "INT", get_buff len
and hexa_digits len =
parser
[< ''0'..'9' | 'a'..'f' | 'A'..'F' as d; s >] ->
hexa_digits (store len d) s
| [< >] -> "INT", get_buff len
and binary_digits len =
parser
[< ''0'..'1' as d; s >] -> binary_digits (store len d) s
| [< >] -> "INT", get_buff len
and number len =
parser
[< ''0'..'9' as c; s >] -> number (store len c) s
| [< ''.'; s >] -> decimal_part (store len '.') s
| [< ''e' | 'E'; s >] -> exponent_part (store len 'E') s
| [< >] -> "INT", get_buff len
and decimal_part len =
parser
[< ''0'..'9' as c; s >] -> decimal_part (store len c) s
| [< ''e' | 'E'; s >] -> exponent_part (store len 'E') s
| [< >] -> "FLOAT", get_buff len
and exponent_part len =
parser
[< ''+' | '-' as c; s >] -> end_exponent_part (store len c) s
| [< a = end_exponent_part len >] -> a
and end_exponent_part len =
parser
[< ''0'..'9' as c; s >] -> end_exponent_part (store len c) s
| [< >] -> "FLOAT", get_buff len
let rec skip_spaces =
parser
[< '' ' | '\010' | '\013' | '\t' | '\026' | '\012'; s >] -> skip_spaces s
| [< >] -> ()
let error_on_unknown_keywords = ref false
let err loc msg = raise_with_loc loc (Token.Error msg)
let next_token_fun dfa find_kwd =
let keyword_or_error loc s =
try ("", find_kwd s), loc with
Not_found ->
if !error_on_unknown_keywords then err loc ("illegal token: " ^ s)
else ("", s), loc
in
let rec next_token =
parser bp
[< '' ' | '\010' | '\013' | '\t' | '\026' | '\012'; s >] -> next_token s
| [< ''('; s >] -> left_paren bp s
| [< ''#'; s >] -> spaces_tabs s; linenum bp s
| [< ''A'..'Z' | '\192'..'\214' | '\216'..'\222' as c; s >] ->
let id = get_buff (ident (store 0 c) s) in
let loc = bp, Stream.count s in
(try "", find_kwd id with
Not_found -> "UIDENT", id),
loc
| [< ''a'..'z' | '\223'..'\246' | '\248'..'\255' | '_' as c; s >] ->
let id = get_buff (ident (store 0 c) s) in
let loc = bp, Stream.count s in
(try "", find_kwd id with
Not_found -> "LIDENT", id),
loc
| [< ''1'..'9' as c; s >] ->
let tok = number (store 0 c) s in
let loc = bp, Stream.count s in tok, loc
| [< ''0'; s >] ->
let tok = base_number (store 0 '0') s in
let loc = bp, Stream.count s in tok, loc
| [< ''\''; s >] ->
(* begin match Stream.npeek 2 s with
[_; '\''] | ['\\'; _] -> *)
let tok = "CHAR", get_buff (char bp 0 s) in
let loc = bp, Stream.count s in tok, loc
(* | _ -> keyword_or_error (bp, Stream.count s) "'"
end *)
| [< ''\"'; s >] ->
let tok = "STRING", get_buff (string bp 0 s) in
let loc = bp, Stream.count s in tok, loc
| [< ''$'; s >] ->
let tok = dollar bp 0 s in let loc = bp, Stream.count s in tok, loc
| [< ''!' | '=' | '@' | '^' | '&' | '+' | '-' | '*' | '/' | '%' as c;
s >] ->
let id = get_buff (ident2 (store 0 c) s) in
keyword_or_error (bp, Stream.count s) id
| [< ''~' as c;
a =
parser
[< ''a'..'z' as c; len = ident (store 0 c) >] ep ->
("TILDEIDENT", get_buff len), (bp, ep)
| [< s >] ->
let id = get_buff (ident2 (store 0 c) s) in
keyword_or_error (bp, Stream.count s) id >] ->
a
| [< ''?' as c;
a =
parser
[< ''a'..'z' as c; len = ident (store 0 c) >] ep ->
("QUESTIONIDENT", get_buff len), (bp, ep)
| [< s >] ->
let id = get_buff (ident2 (store 0 c) s) in
keyword_or_error (bp, Stream.count s) id >] ->
a
| [< ''<'; s >] -> less bp s
| [< '':' as c1;
len =
parser
[< '']' | ':' | '=' | '>' as c2 >] -> store (store 0 c1) c2
| [< >] -> store 0 c1 >] ep ->
let id = get_buff len in keyword_or_error (bp, ep) id
| [< ''>' | '|' as c1;
len =
parser
[< '']' | '}' as c2 >] -> store (store 0 c1) c2
| [< a = ident2 (store 0 c1) >] -> a >] ep ->
let id = get_buff len in keyword_or_error (bp, ep) id
| [< ''[' | '{' as c1; s >] ->
let len =
match Stream.npeek 2 s with
['<'; '<' | ':'] -> store 0 c1
| _ ->
match s with parser
[< ''|' | '<' | ':' as c2 >] -> store (store 0 c1) c2
| [< >] -> store 0 c1
in
let ep = Stream.count s in
let id = get_buff len in keyword_or_error (bp, ep) id
| [< ''.';
id =
parser
[< ''.' >] -> ".."
| [< >] -> "." >] ep ->
keyword_or_error (bp, ep) id
| [< '';';
id =
parser
[< '';' >] -> ";;"
| [< >] -> ";" >] ep ->
keyword_or_error (bp, ep) id
| [< ''\\'; s >] ep -> ("LIDENT", get_buff (ident3 0 s)), (bp, ep)
| [< 'c >] ep -> keyword_or_error (bp, ep) (String.make 1 c)
| [< _ = Stream.empty >] -> ("EOI", ""), (bp, succ bp)
and less bp strm =
if !no_quotations then
match strm with parser
[< len = ident2 (store 0 '<') >] ep ->
let id = get_buff len in keyword_or_error (bp, ep) id
else
match strm with parser
[< ''<'; len = quotation bp 0 >] ep ->
("QUOTATION", ":" ^ get_buff len), (bp, ep)
| [< '':';
i =
(parser
[< len = ident 0 >] -> get_buff len);
''<' ?? "character '<' expected"; len = quotation bp 0 >] ep ->
("QUOTATION", i ^ ":" ^ get_buff len), (bp, ep)
| [< len = ident2 (store 0 '<') >] ep ->
let id = get_buff len in keyword_or_error (bp, ep) id
and string bp len =
parser
[< ''\"' >] -> len
| [< ''\\'; 'c; s >] -> string bp (store (store len '\\') c) s
| [< 'c; s >] -> string bp (store len c) s
| [< >] ep -> err (bp, ep) "string not terminated"
and char bp len =
parser
[< ''\''; s >] -> if len = 0 then char bp (store len '\'') s else len
| [< ''\\'; 'c; s >] -> char bp (store (store len '\\') c) s
| [< 'c; s >] -> char bp (store len c) s
| [< >] ep -> err (bp, ep) "char not terminated"
and dollar bp len =
parser
[< ''$' >] -> "ANTIQUOT", ":" ^ get_buff len
| [< ''a'..'z' | 'A'..'Z' as c; s >] -> antiquot bp (store len c) s
| [< ''0'..'9' as c; s >] -> maybe_locate bp (store len c) s
| [< '':'; s >] ->
let k = get_buff len in
"ANTIQUOT", k ^ ":" ^ locate_or_antiquot_rest bp 0 s
| [< ''\\'; 'c; s >] ->
"ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s
| [< s >] ->
if dfa then
match s with parser
[< 'c >] ->
"ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s
| [< >] ep -> err (bp, ep) "antiquotation not terminated"
else "", get_buff (ident2 (store 0 '$') s)
and maybe_locate bp len =
parser
[< ''$' >] -> "ANTIQUOT", ":" ^ get_buff len
| [< ''0'..'9' as c; s >] -> maybe_locate bp (store len c) s
| [< '':'; s >] ->
"LOCATE", get_buff len ^ ":" ^ locate_or_antiquot_rest bp 0 s
| [< ''\\'; 'c; s >] ->
"ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s
| [< 'c; s >] ->
"ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s
| [< >] ep -> err (bp, ep) "antiquotation not terminated"
and antiquot bp len =
parser
[< ''$' >] -> "ANTIQUOT", ":" ^ get_buff len
| [< ''a'..'z' | 'A'..'Z' | '0'..'9' as c; s >] ->
antiquot bp (store len c) s
| [< '':'; s >] ->
let k = get_buff len in
"ANTIQUOT", k ^ ":" ^ locate_or_antiquot_rest bp 0 s
| [< ''\\'; 'c; s >] ->
"ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s
| [< 'c; s >] ->
"ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s
| [< >] ep -> err (bp, ep) "antiquotation not terminated"
and locate_or_antiquot_rest bp len =
parser
[< ''$' >] -> get_buff len
| [< ''\\'; 'c; s >] -> locate_or_antiquot_rest bp (store len c) s
| [< 'c; s >] -> locate_or_antiquot_rest bp (store len c) s
| [< >] ep -> err (bp, ep) "antiquotation not terminated"
and quotation bp len =
parser
[< ''>'; s >] -> maybe_end_quotation bp len s
| [< ''<'; s >] ->
quotation bp (maybe_nested_quotation bp (store len '<') strm__) s
| [< ''\\';
len =
(parser
[< ''>' | '<' | '\\' as c >] -> store len c
| [< >] -> store len '\\');
s >] ->
quotation bp len s
| [< 'c; s >] -> quotation bp (store len c) s
| [< >] ep -> err (bp, ep) "quotation not terminated"
and maybe_nested_quotation bp len =
parser
[< ''<'; s >] -> mstore (quotation bp (store len '<') s) ">>"
| [< '':'; len = ident (store len ':');
a =
parser
[< ''<'; s >] -> mstore (quotation bp (store len '<') s) ">>"
| [< >] -> len >] ->
a
| [< >] -> len
and maybe_end_quotation bp len =
parser
[< ''>' >] -> len
| [< a = quotation bp (store len '>') >] -> a
and left_paren bp =
parser
[< ''*'; _ = comment bp; a = next_token >] -> a
| [< >] ep -> keyword_or_error (bp, ep) "("
and comment bp =
parser
[< ''('; s >] -> left_paren_in_comment bp s
| [< ''*'; s >] -> star_in_comment bp s
| [< ''\"'; _ = string bp 0; s >] -> comment bp s
| [< ''\''; s >] -> quote_in_comment bp s
| [< 'c; s >] -> comment bp s
| [< >] ep -> err (bp, ep) "comment not terminated"
and quote_in_comment bp =
parser
[< ''\''; s >] -> comment bp s
| [< ''\\'; s >] -> quote_antislash_in_comment bp 0 s
| [< '_; s >] -> quote_any_in_comment bp s
| [< a = comment bp >] -> a
and quote_any_in_comment bp =
parser
[< ''\''; s >] -> comment bp s
| [< a = comment bp >] -> a
and quote_antislash_in_comment bp len =
parser
[< ''\''; s >] -> comment bp s
| [< ''\\' | '\"' | 'n' | 't' | 'b' | 'r'; s >] ->
quote_any_in_comment bp s
| [< ''0'..'9'; s >] -> quote_antislash_digit_in_comment bp s
| [< a = comment bp >] -> a
and quote_antislash_digit_in_comment bp =
parser
[< ''0'..'9'; s >] -> quote_antislash_digit2_in_comment bp s
| [< a = comment bp >] -> a
and quote_antislash_digit2_in_comment bp =
parser
[< ''0'..'9'; s >] -> quote_any_in_comment bp s
| [< a = comment bp >] -> a
and left_paren_in_comment bp =
parser
[< ''*'; s >] -> comment bp s; comment bp s
| [< a = comment bp >] -> a
and star_in_comment bp =
parser
[< '')' >] -> ()
| [< a = comment bp >] -> a
and linenum bp =
parser
[< ''0'..'9'; _ = digits; _ = spaces_tabs; ''\"'; _ = any_to_nl; s >] ->
next_token s
| [< >] -> keyword_or_error (bp, bp + 1) "#"
and spaces_tabs =
parser
[< '' ' | '\t'; s >] -> spaces_tabs s
| [< >] -> ()
and digits =
parser
[< ''0'..'9'; s >] -> digits s
| [< >] -> ()
and any_to_nl =
parser
[< ''\013' | '\010' >] -> ()
| [< '_; s >] -> any_to_nl s
| [< >] -> ()
in
fun cstrm ->
try next_token cstrm with
Stream.Error str -> err (Stream.count cstrm, Stream.count cstrm + 1) str
let dollar_for_antiquotation = ref true
let func kwd_table =
let find = Hashtbl.find kwd_table in
let dfa = !dollar_for_antiquotation in
Token.lexer_func_of_parser (next_token_fun dfa find)
let rec check_keyword_stream =
parser
[< _ = check; _ = Stream.empty >] -> true
and check =
parser
[< ''A'..'Z' | 'a'..'z' | '\192'..'\214' | '\216'..'\246' |
'\248'..'\255';
s >] ->
check_ident s
| [< ''!' | '?' | '~' | '=' | '@' | '^' | '&' | '+' | '-' | '*' | '/' |
'%' | '.';
s >] ->
check_ident2 s
| [< ''<'; s >] ->
begin match Stream.npeek 1 s with
[':' | '<'] -> ()
| _ -> check_ident2 s
end
| [< '':';
_ =
parser
[< '']' | ':' | '=' | '>' >] -> ()
| [< >] -> () >] ep ->
()
| [< ''>' | '|';
_ =
parser
[< '']' | '}' >] -> ()
| [< a = check_ident2 >] -> a >] ->
()
| [< ''[' | '{'; s >] ->
begin match Stream.npeek 2 s with
['<'; '<' | ':'] -> ()
| _ ->
match s with parser
[< ''|' | '<' | ':' >] -> () | [< >] -> ()
end
| [< '';';
_ =
parser
[< '';' >] -> ()
| [< >] -> () >] ->
()
| [< '_ >] -> ()
and check_ident =
parser
[< ''A'..'Z' | 'a'..'z' | '\192'..'\214' | '\216'..'\246' |
'\248'..'\255' | '0'..'9' | '_' | '\'';
s >] ->
check_ident s
| [< >] -> ()
and check_ident2 =
parser
[< ''!' | '?' | '~' | '=' | '@' | '^' | '&' | '+' | '-' | '*' | '/' |
'%' | '.' | ':' | '<' | '>' | '|';
s >] ->
check_ident2 s
| [< >] -> ()
let check_keyword s =
try check_keyword_stream (Stream.of_string s) with
_ -> false
let error_no_respect_rules p_con p_prm =
raise
(Token.Error
("the token " ^
(if p_con = "" then "\"" ^ p_prm ^ "\""
else if p_prm = "" then p_con
else p_con ^ " \"" ^ p_prm ^ "\"") ^
" does not respect Plexer rules"))
let error_ident_and_keyword p_con p_prm =
raise
(Token.Error
("the token \"" ^ p_prm ^ "\" is used as " ^ p_con ^
" and as keyword"))
let using_token kwd_table ident_table (p_con, p_prm) =
match p_con with
"" ->
if not (Hashtbl.mem kwd_table p_prm) then
if check_keyword p_prm then
if Hashtbl.mem ident_table p_prm then
error_ident_and_keyword (Hashtbl.find ident_table p_prm) p_prm
else Hashtbl.add kwd_table p_prm p_prm
else error_no_respect_rules p_con p_prm
| "LIDENT" ->
if p_prm = "" then ()
else
begin match p_prm.[0] with
'A'..'Z' -> error_no_respect_rules p_con p_prm
| _ ->
if Hashtbl.mem kwd_table p_prm then
error_ident_and_keyword p_con p_prm
else Hashtbl.add ident_table p_prm p_con
end
| "UIDENT" ->
if p_prm = "" then ()
else
begin match p_prm.[0] with
'a'..'z' -> error_no_respect_rules p_con p_prm
| _ ->
if Hashtbl.mem kwd_table p_prm then
error_ident_and_keyword p_con p_prm
else Hashtbl.add ident_table p_prm p_con
end
| "TILDEIDENT" | "QUESTIONIDENT" | "INT" | "FLOAT" | "CHAR" | "STRING" |
"QUOTATION" | "ANTIQUOT" | "LOCATE" | "EOI" ->
()
| _ ->
raise
(Token.Error
("the constructor \"" ^ p_con ^ "\" is not recognized by Plexer"))
let removing_token kwd_table ident_table (p_con, p_prm) =
match p_con with
"" -> Hashtbl.remove kwd_table p_prm
| "LIDENT" | "UIDENT" ->
if p_prm <> "" then Hashtbl.remove ident_table p_prm
| _ -> ()
let text =
function
"", t -> "'" ^ t ^ "'"
| "LIDENT", "" -> "lowercase identifier"
| "LIDENT", t -> "'" ^ t ^ "'"
| "UIDENT", "" -> "uppercase identifier"
| "UIDENT", t -> "'" ^ t ^ "'"
| "INT", "" -> "integer"
| "INT", s -> "'" ^ s ^ "'"
| "FLOAT", "" -> "float"
| "STRING", "" -> "string"
| "CHAR", "" -> "char"
| "QUOTATION", "" -> "quotation"
| "ANTIQUOT", k -> "antiquot \"" ^ k ^ "\""
| "LOCATE", "" -> "locate"
| "EOI", "" -> "end of input"
| con, "" -> con
| con, prm -> con ^ " \"" ^ prm ^ "\""
let eq_before_colon p e =
let rec loop i =
if i == String.length e then
failwith "Internal error in Plexer: incorrect ANTIQUOT"
else if i == String.length p then e.[i] == ':'
else if p.[i] == e.[i] then loop (i + 1)
else false
in
loop 0
let after_colon e =
try
let i = String.index e ':' in
String.sub e (i + 1) (String.length e - i - 1)
with
Not_found