Commit 2b21d637 authored by Pietro Abate's avatar Pietro Abate
Browse files

[r2002-11-07 00:58:53 by cvscast] Empty log message

Original author: cvscast
Date: 2002-11-07 00:58:54+00:00
parent ccd7e93c
......@@ -7,7 +7,9 @@ CLEAN_DIRS = $(DIRS) tools tests
MISC = misc/pool.cmo misc/encodings.cmo
PARSER = parser/lexer.cmo parser/location.cmo parser/ast.cmo parser/parser.cmo
PARSER = parser/lexer.cmo parser/location.cmo \
parser/wlexer.cmo \
parser/ast.cmo parser/parser.cmo
TYPING = typing/typed.cmo typing/typer.cmo
......@@ -36,7 +38,7 @@ XDRIVER = $(DRIVER:.cmo=.cmx)
DEBUG = -g
PACKAGES = pxp-engine,pxp-lex-iso88591,camlp4,num
PACKAGES = pxp-engine,pxp-lex-iso88591,wlexing,camlp4,num
OCAMLCP = ocamlc
OCAMLC = ocamlfind $(OCAMLCP) -package $(PACKAGES)
OCAMLOPT = ocamlfind ocamlopt -package $(PACKAGES)
......@@ -75,6 +77,9 @@ compute_depend:
@echo "Computing dependencies ..."
ocamldep $(INCLUDES) $(SYNTAX_PARSER) $(DEPEND) > depend
parser/wlexer.ml: parser/wlexer.mll
wlex parser/wlexer.mll
run_top: all.cma
ledit ocaml $(INCLUDES) `ocamlfind use pxp` all.cma
......@@ -82,6 +87,7 @@ clean:
for i in $(CLEAN_DIRS); do \
(cd $$i; rm -f *.cmi *.cmo *.cma *.cmx *.o *~); \
done
rm -f parser/wlexer.ml
rm -f *.cmi *.cmo *.cma *.cmx *.a *.cmxa *.o *~
rm -f cduce cduce.opt ocamlprof.dump
rm -f dtd2cduce pool
......
......@@ -7,11 +7,13 @@ parser/ast.cmx: parser/location.cmx types/patterns.cmx types/types.cmx
parser/location.cmo: parser/location.cmi
parser/location.cmx: parser/location.cmi
parser/parser.cmo: parser/ast.cmo types/atoms.cmi types/chars.cmi \
types/intervals.cmi parser/lexer.cmo parser/location.cmi \
types/sequence.cmi types/types.cmi parser/parser.cmi
types/intervals.cmi parser/location.cmi types/sequence.cmi \
types/types.cmi parser/wlexer.cmo parser/parser.cmi
parser/parser.cmx: parser/ast.cmx types/atoms.cmx types/chars.cmx \
types/intervals.cmx parser/lexer.cmx parser/location.cmx \
types/sequence.cmx types/types.cmx parser/parser.cmi
types/intervals.cmx parser/location.cmx types/sequence.cmx \
types/types.cmx parser/wlexer.cmx parser/parser.cmi
parser/wlexer.cmo: parser/location.cmi
parser/wlexer.cmx: parser/location.cmx
typing/typed.cmo: parser/location.cmi types/patterns.cmi types/sortedMap.cmi \
types/types.cmi
typing/typed.cmx: parser/location.cmx types/patterns.cmx types/sortedMap.cmx \
......@@ -82,10 +84,10 @@ runtime/value.cmx: types/chars.cmx types/sequence.cmx types/sortedMap.cmx \
types/types.cmx runtime/value.cmi
driver/cduce.cmo: parser/ast.cmo types/builtin.cmo runtime/eval.cmi \
parser/location.cmi parser/parser.cmi types/patterns.cmi typing/typer.cmi \
types/types.cmi runtime/value.cmi
types/types.cmi runtime/value.cmi parser/wlexer.cmo
driver/cduce.cmx: parser/ast.cmx types/builtin.cmx runtime/eval.cmx \
parser/location.cmx parser/parser.cmx types/patterns.cmx typing/typer.cmx \
types/types.cmx runtime/value.cmx
types/types.cmx runtime/value.cmx parser/wlexer.cmx
parser/parser.cmi: parser/ast.cmo
typing/typer.cmi: parser/ast.cmo typing/typed.cmo types/types.cmi
types/boolean.cmi: types/sortedList.cmi
......
......@@ -21,6 +21,7 @@ let ppf = Format.std_formatter
let prog () =
try Parser.prog input
with
| Stdpp.Exc_located (_, (Location _ as e)) -> raise e
| Stdpp.Exc_located (loc, e) -> raise (Location (loc, e))
let print_norm ppf d =
......@@ -60,6 +61,16 @@ let rec print_exn ppf = function
Types.Sample.print (Types.Sample.get t)
| Typer.UnboundId x ->
Format.fprintf ppf "Unbound identifier %s@\n" x
| Wlexer.Illegal_character c ->
Format.fprintf ppf "Illegal character (%s)@\n" (Char.escaped c)
| Wlexer.Unterminated_comment ->
Format.fprintf ppf "Comment not terminated@\n"
| Wlexer.Unterminated_string ->
Format.fprintf ppf "String literal not terminated@\n"
| Wlexer.Unterminated_string_in_comment ->
Format.fprintf ppf "This comment contains an unterminated string literal@\n"
| Parser.Error s ->
Format.fprintf ppf "Parsing error: %s@\n" s
| exn ->
Format.fprintf ppf "%s@\n" (Printexc.to_string exn)
......
......@@ -2,7 +2,12 @@ open Location
open Ast
(* let () = Grammar.error_verbose := true *)
let gram = Grammar.gcreate (Lexer.gmake ())
let gram = Grammar.gcreate (Wlexer.lexer Wlexer.token Wlexer.latin1_engine)
let prog = Grammar.Entry.create gram "prog"
let expr = Grammar.Entry.create gram "expression"
let pat = Grammar.Entry.create gram "type/pattern expression"
......@@ -32,9 +37,17 @@ let seq_of_string pos s =
else aux (((pos+i,pos+i+1),s.[i-1])::accu) (i-1) in
aux [] (String.length s)
exception Error of string
let error loc s = raise (Location (loc, Error s))
let parse_char loc s =
(* TODO: Unicode *)
if String.length s <> 1 then
error loc "Character litteral must have length 1";
s.[0]
let char_list pos s =
let s = seq_of_string pos (Token.eval_string s) in
let s = seq_of_string pos s in
List.map (fun (loc,c) -> mk loc (Cst (Types.Char (Chars.Unichar.from_char c)))) s
......@@ -49,8 +62,8 @@ EXTEND
[ (p,e) = let_binding -> LetDecl (p,e)
| (p,e1) = let_binding; "in"; e2 = expr LEVEL "top"->
EvalStatement (mk loc (Match (e1,[p,e2])))
| "type"; x = UIDENT; "="; t = pat -> TypeDecl (x,t)
| "debug"; d = debug_directive -> Debug d
| LIDENT "type"; x = UIDENT; "="; t = pat -> TypeDecl (x,t)
| LIDENT "debug"; d = debug_directive -> Debug d
] |
[ e = expr -> EvalStatement e
]
......@@ -72,7 +85,7 @@ EXTEND
(mk noloc (Capture "x"),
mk noloc (Op ("raise",[mk noloc (Var "x")]))) in
mk loc (Try (e,b@[default]))
| LIDENT "map"; e = SELF; "with"; b = branches -> mk loc (Map (e,b))
| "map"; e = SELF; "with"; b = branches -> mk loc (Map (e,b))
| "transform"; e = SELF; "with"; b = branches ->
mk noloc (Op ("flatten", [mk loc (Map (e,b))]))
| "fun"; (f,a,b) = fun_decl ->
......@@ -113,10 +126,13 @@ EXTEND
let e = match e with Some e -> e | None -> cst_nil in
let l = List.flatten l in
tuple loc (l @ [e])
| "<"; t = expr_tag_spec; a = expr_attrib_spec; ">"; c = expr ->
| t = [ a = TAG ->
mk loc (Cst (Types.Atom (Types.AtomPool.mk a)))
| "<"; e = expr LEVEL "no_appl" -> e ];
a = expr_attrib_spec; ">"; c = expr ->
tuple loc [t;a;c]
| "{"; r = [ expr_record_spec | -> mk loc (RecordLitt []) ]; "}" -> r
| s = STRING ->
| s = STRING2 ->
tuple loc (char_list loc s @ [cst_nil])
| "!"; t = pat -> mk loc (DebugTyper t)
| a = LIDENT -> mk loc (Var a)
......@@ -125,7 +141,7 @@ EXTEND
];
seq_elem: [
[ x = CHAR -> char_list loc x
[ x = STRING1 -> char_list loc x
| e = expr LEVEL "no_appl" -> [e]
]
];
......@@ -160,7 +176,7 @@ EXTEND
];
branches: [
[ OPT "|"; l = LIST1 branch SEP "|" ; OPT "end" -> l ]
[ OPT "|"; l = LIST1 branch SEP "|" -> l ]
];
branch: [
......@@ -180,12 +196,12 @@ EXTEND
| x = regexp; "??" -> Alt (Epsilon, x) ]
| [ "("; x = regexp; ")" -> x
| UIDENT "PCDATA" -> string_regexp
| i = CHAR ; "--"; j = CHAR ->
let i = Chars.Unichar.from_char (Token.eval_char i)
and j = Chars.Unichar.from_char (Token.eval_char j) in
| i = STRING1; "--"; j = STRING1 ->
let i = Chars.Unichar.from_char (parse_char loc i)
and j = Chars.Unichar.from_char (parse_char loc j) in
Elem (mk loc (Internal (Types.char (Chars.char_class i j))))
| s = CHAR ->
let s = seq_of_string loc (Token.eval_string s) in
| s = STRING1 ->
let s = seq_of_string loc s in
List.fold_right
(fun (loc,c) accu ->
let c = Chars.Unichar.from_char c in
......@@ -219,10 +235,10 @@ EXTEND
| i = INT ->
let i = Big_int.big_int_of_string i in
mk loc (Internal (Types.interval (Intervals.atom i)))
| "*--"; j = INT ->
| "*"; "--"; j = INT ->
let j = Big_int.big_int_of_string j in
mk loc (Internal (Types.interval (Intervals.left j)))
| i = INT; "--*" ->
| i = INT; "--"; "*" ->
let i = Big_int.big_int_of_string i in
mk loc (Internal (Types.interval (Intervals.right i)))
| i = char ->
......@@ -235,10 +251,17 @@ EXTEND
q = [ ";"; q = pat -> q
| -> mk noloc (Internal (Sequence.nil_type)) ];
"]" -> mk loc (Regexp (r,q))
| "<"; t = tag_spec; a = attrib_spec; ">"; c = pat ->
| t = [
[ "<"; LIDENT "_" -> mk loc (Internal (Types.atom (Atoms.any)))
| a = TAG ->
mk loc
(Internal (Types.atom (Atoms.atom (Types.AtomPool.mk a)))) ]
| [ "<"; t = pat -> t ]
];
a = attrib_spec; ">"; c = pat ->
multi_prod loc [t;a;c]
| s = STRING ->
let s = seq_of_string loc (Token.eval_string s) in
| s = STRING2 ->
let s = seq_of_string loc s in
let s = List.map
(fun (loc,c) ->
mk loc (Internal
......@@ -252,8 +275,8 @@ EXTEND
];
record_spec:
[ [ r = LIST0 [ l = [LIDENT | UIDENT];
o = ["=?" -> true | "=" -> false];
[ [ r = LIST0 [ l = [LIDENT | UIDENT]; "=";
o = [ "?" -> true | -> false];
x = pat ->
mk loc (Record (Types.LabelPool.mk l,o,x))
] SEP ";" ->
......@@ -264,7 +287,7 @@ EXTEND
char:
[
[ c = CHAR -> Chars.Unichar.from_char (Token.eval_char c)
[ c = STRING1 -> Chars.Unichar.from_char (parse_char loc c)
| "!"; i = INT -> Chars.Unichar.from_int (int_of_string i) ]
];
......@@ -276,13 +299,6 @@ EXTEND
| c = char -> Types.Char c ]
];
tag_spec:
[
[ LIDENT "_" -> mk loc (Internal (Types.atom (Atoms.any))) ]
| [ a = [LIDENT | UIDENT] ->
mk loc (Internal (Types.atom (Atoms.atom (Types.AtomPool.mk a)))) ]
| [ t = pat -> t ]
];
attrib_spec:
[ [ r = record_spec -> r | "("; t = pat; ")" -> t ] ];
......@@ -295,13 +311,6 @@ EXTEND
mk loc (RecordLitt r)
] ];
expr_tag_spec:
[
[ a = [LIDENT | UIDENT] ->
mk loc (Cst (Types.Atom (Types.AtomPool.mk a))) ]
| [ e = expr LEVEL "no_appl" -> e ]
];
expr_attrib_spec:
[ [ r = expr_record_spec -> r ]
| [ e = expr LEVEL "no_appl" -> e
......@@ -316,8 +325,8 @@ EXTEND GLOBAL: pat pat';
END
let pat = Grammar.Entry.parse pat
let expr = Grammar.Entry.parse expr
let prog = Grammar.Entry.parse prog
and expr = Grammar.Entry.parse expr
and prog = Grammar.Entry.parse prog
module From_string = struct
let pat s = Grammar.Entry.parse pat' (Stream.of_string s)
......
exception Error of string
val expr : char Stream.t -> Ast.pexpr
val pat : char Stream.t -> Ast.ppat
val prog : char Stream.t -> Ast.pmodule_item list
......
(* File to be processed by wlex, not ocamllex ! *)
(* Loosely inspired from OCaml lexer.mll *)
classes
encoding_error
xml_char
blank
lowercase uppercase ascii_digit
"_<>=.,:;+-*/@&{}[]()|?`\"\\\'!"
{
let keywords = Hashtbl.create 17
let error i j exn = raise (Location.Location ((i,j),exn))
exception Illegal_character of char
exception Unterminated_comment
exception Unterminated_string
exception Unterminated_string_in_comment
(* Buffer for string literals *)
let string_buff = Buffer.create 1024
let store_char = Buffer.add_char string_buff
let get_stored_string () =
let s = Buffer.contents string_buff in
Buffer.clear string_buff;
s
let string_start_pos = ref 0;;
let comment_start_pos : int list ref = ref [];;
let char_for_decimal_code s =
let s = String.sub s 1 (String.length s - 1) in
let c = int_of_string s in
assert ( c < 256 ); (* TODO: handle Unicode *)
Char.chr c
}
let identchar = lowercase | uppercase | ascii_digit | '_'
rule token = parse
blank+ { token engine lexbuf }
| (lowercase | '_') identchar* {
let s = Lexing.lexeme lexbuf in
if Hashtbl.mem keywords s then "",s else "LIDENT",s
}
| uppercase identchar* { "UIDENT",Lexing.lexeme lexbuf }
| ascii_digit+ { "INT",Lexing.lexeme lexbuf }
| "<" blank* (lowercase | uppercase) identchar* {
let s = Lexing.lexeme lexbuf in
"TAG", String.sub s 1 (String.length s - 1)
}
| [ "<>=.,:;+-*/@&{}[]()|?`!" ]
| "->" | "::" | ";;" | "--" | ":="
| ["?+*"] "?"
{ "",Lexing.lexeme lexbuf }
| '"' | "'"
{ let string_start = Lexing.lexeme_start lexbuf in
string_start_pos := string_start;
let double_quote = Lexing.lexeme_char lexbuf 0 = '"' in
if double_quote then string2 engine lexbuf else string1 engine lexbuf;
lexbuf.Lexing.lex_start_pos <-
string_start - lexbuf.Lexing.lex_abs_pos;
(if double_quote then "STRING2" else "STRING1"),
(get_stored_string()) }
| "(*"
{ comment_start_pos := [Lexing.lexeme_start lexbuf];
comment engine lexbuf;
token engine lexbuf }
| eof
{ "EOI","" }
| _
{ error
(Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
(Illegal_character ((Lexing.lexeme lexbuf).[0])) }
and comment = parse
"(*"
{ comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos;
comment engine lexbuf;
}
| "*)"
{ comment_start_pos := List.tl !comment_start_pos;
if !comment_start_pos <> [] then comment engine lexbuf;
}
| '"' | "'"
{ string_start_pos := Lexing.lexeme_start lexbuf;
let string =
if Lexing.lexeme_char lexbuf 0 = '"' then string2 else string1 in
(try string engine lexbuf
with Location.Location (_,Unterminated_string) ->
let st = List.hd !comment_start_pos in
error st (st+2) Unterminated_string_in_comment);
Buffer.clear string_buff;
comment engine lexbuf }
| eof
{ let st = List.hd !comment_start_pos in
error st (st+2) Unterminated_comment
}
| _
{ comment engine lexbuf }
and string2 = parse
'"'
{ () }
| '\\' ['\\' '"']
{ store_char (Lexing.lexeme_char lexbuf 1);
string2 engine lexbuf }
| '\\' ascii_digit+
{ store_char (char_for_decimal_code (Lexing.lexeme lexbuf));
string2 engine lexbuf }
| eof
{ error !string_start_pos (!string_start_pos+1) Unterminated_string }
| _
{ store_char (Lexing.lexeme_char lexbuf 0);
(* TODO: Unicode *)
string2 engine lexbuf }
and string1 = parse
"'"
{ () }
| '\\' ['\\' '\'']
{ store_char (Lexing.lexeme_char lexbuf 1);
string1 engine lexbuf }
| '\\' ascii_digit+
{ store_char (char_for_decimal_code (Lexing.lexeme lexbuf));
string1 engine lexbuf }
| eof
{ error !string_start_pos (!string_start_pos+1) Unterminated_string }
| _
{ store_char (Lexing.lexeme_char lexbuf 0);
string1 engine lexbuf }
{
let lexer_func_of_wlex lexfun lexengine cs =
let lb =
Lexing.from_function
(fun s n ->
try s.[0] <- Stream.next cs; 1 with Stream.Failure -> 0)
in
let next () =
let tok = lexfun lexengine lb in
let loc = (Lexing.lexeme_start lb, Lexing.lexeme_end lb) in
(tok, loc)
in
Token.make_stream_and_location next
let register_kw (s1,s2) =
if s1 = "" then
match s2.[0] with
| 'a' .. 'z' when not (Hashtbl.mem keywords s2) ->
Hashtbl.add keywords s2 ()
| _ -> ()
let lexer lexfun lexengine =
{
Token.tok_func = lexer_func_of_wlex lexfun lexengine;
Token.tok_using = register_kw;
Token.tok_removing = (fun _ -> ());
Token.tok_match = Token.default_match;
Token.tok_text = Token.lexer_text
}
let classes =
let c i = (i,i) in
let i ch1 ch2 = (Char.code ch1, Char.code ch2) in
[ (ascii_digit, [i '0' '9']);
(lowercase, [i 'a' 'z']);
(uppercase, [i 'A' 'Z']);
(blank, [c 8; c 9; c 10; c 13; c 32]);
]
let table =
assert(nb_classes <= 256);
let v = String.make 256 (Char.chr encoding_error) in
let fill_int c (i, j) = String.fill v i (j-i+1) c in
let fill_class (c, l) = List.iter (fill_int (Char.chr c)) l in
let fill_char (ch, cl) = v.[ch] <- Char.chr cl in
List.iter fill_class classes;
List.iter fill_char one_char_classes;
v
let utf8_engine = Lex_engines.engine_tiny_utf8 table
(fun c ->
if c>=0x10000 && c < 0x11000 then xml_char
else encoding_error)
let latin1_engine = Lex_engines.engine_tiny_8bit table
}
type Company = <company>[ Worker* ];;
type Worker = <worker>[Surname Name Salary];;
type Surname = <surname>[String];;
type Name = <name>[String];;
type Surname = <surname>String;;
type Name = <name>String;;
type Salary = <salary>[Int];;
type PlusQueMoi = <salary>[5000--10000000];;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment