Commit b1877273 authored by Pietro Abate's avatar Pietro Abate

[r2003-06-17 20:39:07 by cvscast] Don't autodetect non-terminal stdint (prevent using ledit); add

--stdin option -- Alain

Original author: cvscast
Date: 2003-06-17 20:39:08+00:00
parent 8d168760
......@@ -107,6 +107,10 @@ Alain 2003-03-16
Decompilation of regexp
Detection of defined names
Alain 2003-06-17
Fait, il y a qq temps déjà
======================================================================
Zack 2003-05-08
......@@ -126,6 +130,13 @@ Alain 2003-05-15
load_xml charge représentation UTF-8; adapter print_xml
noms de tags et identificateurs en UTF-8 en interne
Alain 2003-06-17
Fait, il y a qq temps déjà
Il reste:
- donner la possibilité d'utiliser des identificateurs/tags
Unicode dans un source CDuce en Latin1
- source en UTF8 (ou autre)
======================================================================
......@@ -145,9 +156,14 @@ Alain 2003-05-19
Noeud spécial dans l'AST pour les sequences et les chaines
(meilleurs messages d'erreurs + meilleure implémentation des chaines).
Alain 2003-06-17
Fait
======================================================================
Alain 2003-05-29
let fun f (...) ===> let f (...)
Alain 2003-06-17
Fait, il y a qq temps déjà
......@@ -32,6 +32,8 @@ let specs =
" specify persistency file for loading and saving";
"--quiet", Arg.Set Cduce.quiet,
" suppress normal output (typing, results)";
"--stdin", Arg.Unit (fun () -> src := "" :: !src),
" read CDuce script on standard input";
"-v", Arg.Unit version,
" print CDuce version";
"--version", Arg.Unit version,
......@@ -72,17 +74,14 @@ let outflush s =
let toploop () =
Cduce.toplevel := true;
let tcio =
try Unix.tcgetattr Unix.stdin
let restore =
try
let tcio = Unix.tcgetattr Unix.stdin in
Unix.tcsetattr
Unix.stdin Unix.TCSADRAIN { tcio with Unix.c_vquit = '\004' };
fun () -> Unix.tcsetattr Unix.stdin Unix.TCSADRAIN tcio
with Unix.Unix_error (_,_,_) ->
(* The input is not a terminal *)
Location.push_source `Stream;
let input = Stream.of_channel stdin in
let ok = Cduce.script ppf ppf_err input in
if not ok then exit 1 else exit 0
in
let restore () =
Unix.tcsetattr Unix.stdin Unix.TCSADRAIN tcio
fun () -> ()
in
let quit () =
outflush "\n";
......@@ -90,8 +89,6 @@ let toploop () =
exit 0
in
Format.fprintf ppf " CDuce version %s\n@." <:symbol<cduce_version>>;
Unix.tcsetattr Unix.stdin Unix.TCSADRAIN
{ tcio with Unix.c_vquit = '\004' };
Sys.set_signal Sys.sigquit (Sys.Signal_handle (fun _ -> quit ()));
Sys.catch_break true;
Cduce.toplevel := true;
......@@ -131,7 +128,14 @@ let do_file s =
close_in chan;
if not ok then exit 1
let do_stdin () =
Location.push_source `Stream;
let input = Stream.of_channel stdin in
let ok = Cduce.script ppf ppf_err input in
if not ok then exit 1
let run s =
if s = "" then do_stdin () else do_file s
let main () =
(match !load_dump with
......@@ -153,7 +157,7 @@ let main () =
);
(match !src with
| [] -> toploop ()
| l -> List.iter do_file l);
| l -> List.iter run l);
(match !save_dump with
| Some f ->
Format.fprintf ppf "Saving state ...@\n";
......
......@@ -50,31 +50,13 @@ let string_regexp = Star (Elem char)
let cst_nil = Cst (Types.Atom Sequence.nil_atom)
let seq_of_string pos s =
let s = Encodings.Utf8.mk s in
let (pos,_) = pos in
let rec aux pos i j =
if Encodings.Utf8.equal_index i j then []
else
let (len,i) = Encodings.Utf8.next s i in
let (c,i) = Encodings.Utf8.next s i in
((pos,pos+len),c) :: (aux (pos + len) i j)
in
aux (pos+1) (Encodings.Utf8.start_index s) (Encodings.Utf8.end_index s)
let get_string s =
let seq_of_string s =
let s = Encodings.Utf8.mk s in
let b = Buffer.create 32 in
let rec aux i j =
if Encodings.Utf8.equal_index i j then ()
else
let (len,i) = Encodings.Utf8.next s i in
let (c,i) = Encodings.Utf8.next s i in
Encodings.Utf8.store b c;
aux i j
if Encodings.Utf8.equal_index i j then []
else let (c,i) = Encodings.Utf8.next s i in c :: (aux i j)
in
aux (Encodings.Utf8.start_index s) (Encodings.Utf8.end_index s);
Buffer.contents b
aux (Encodings.Utf8.start_index s) (Encodings.Utf8.end_index s)
exception Error of string
let error (i,j) s = Location.raise_loc i j (Error s)
......@@ -83,15 +65,10 @@ let make_record loc r =
LabelMap.from_list (fun _ _ -> error loc "Duplicated record field") r
let parse_char loc s =
let s = seq_of_string loc s in
match s with
| [ loc,c ] -> c
match seq_of_string s with
| [ c ] -> c
| _ -> error loc "Character litteral must have length 1"
let char_list loc s =
let s = seq_of_string loc s in
List.map (fun (loc,c) -> exp loc (Cst (Types.Char (Chars.mk_int c)))) s
let include_stack = ref []
let protect_exn f g =
......@@ -130,14 +107,13 @@ EXTEND
| "type"; x = LIDENT -> error loc "Type identifiers must be capitalized"
| "schema"; name = UIDENT; "="; uri = STRING2 ->
protect_op "schema";
let schema_doc = Schema_xml.pxp_tree_of (get_string uri) in
let schema_doc = Schema_xml.pxp_tree_of uri in
let schema = Schema_parser.parse_schema schema_doc in
[ mk loc (SchemaDecl (name, schema))]
| "debug"; d = debug_directive -> [ mk loc (Debug d) ]
| DIRECTIVE "#quit" -> [ mk loc (Directive `Quit) ]
| DIRECTIVE "#env" -> [ mk loc (Directive `Env) ]
| "include"; s = STRING2 ->
let s = get_string s in
let s =
if Filename.is_relative s
then Filename.concat (Location.current_dir ()) s
......@@ -290,9 +266,8 @@ EXTEND
exp loc (Xml (t, Pair (a,c)))
| "{"; r = [ expr_record_spec | -> exp loc (RecordLitt LabelMap.empty) ]; "}" -> r
| s = STRING2 ->
let s = U.mk (get_string s) in
let s = U.mk s in
exp loc (String (U.start_index s, U.end_index s, s, cst_nil))
(* exp loc (tuple (char_list loc s @ [cst_nil])) *)
| a = LIDENT -> exp loc (Var (ident a))
]
......@@ -300,9 +275,8 @@ EXTEND
seq_elem: [
[ x = STRING1 ->
let s = U.mk (get_string x) in
let s = U.mk x in
`String (loc, U.start_index s, U.end_index s, s)
(* `Elems (char_list loc x) *)
| e = expr LEVEL "no_appl" -> `Elems [e]
| "!"; e = expr LEVEL "no_appl" -> `Explode e
]
......@@ -386,13 +360,12 @@ EXTEND
and j = Chars.mk_int (parse_char loc j) in
Elem (mk loc (Internal (Types.char (Chars.char_class i j))))
| s = STRING1 ->
let s = seq_of_string loc s in
List.fold_right
(fun (loc,c) accu ->
(fun c accu ->
let c = Chars.mk_int c in
let c = Chars.atom c in
Seq (Elem (mk loc (Internal (Types.char c))), accu))
s
Seq (Elem (mknoloc (Internal (Types.char c))), accu))
(seq_of_string s)
Epsilon
| e = pat LEVEL "simple" -> Elem e
]
......@@ -458,14 +431,15 @@ EXTEND
a = attrib_spec; ">"; c = pat ->
mk loc (XmlT (t, multi_prod loc [a;c]))
| s = STRING2 ->
let s = seq_of_string loc s in
let s = List.map
(fun (loc,c) ->
mk loc (Internal
(Types.char
(Chars.atom
(Chars.mk_int c))))) s in
let s = s @ [mk loc (Internal (Sequence.nil_type))] in
let s =
List.map
(fun c ->
mknoloc (Internal
(Types.char
(Chars.atom
(Chars.mk_int c)))))
(seq_of_string s) in
let s = s @ [mknoloc (Internal (Sequence.nil_type))] in
multi_prod loc s
]
......
......@@ -85,17 +85,10 @@ let nb_classes = 40
exception Unterminated_string_in_comment
(* Buffer for string literals (always encoded in UTF8).
Each character is encoded in two consecutives code point;
the first one gives the number of bytes in the input document;
the second one gives the Unicode representation *)
(* Buffer for string literals (always encoded in UTF8). *)
let string_buff = Buffer.create 1024
let store_len ?(add=0) lexbuf =
let l = add + (Lexing.lexeme_end lexbuf) - (Lexing.lexeme_start lexbuf) in
Encodings.Utf8.store string_buff l
let store_ascii = Buffer.add_char string_buff
let store_char = Buffer.add_string string_buff
let store_code = Encodings.Utf8.store string_buff
......@@ -112,7 +105,7 @@ let nb_classes = 40
let string_start_pos = ref 0;;
let comment_start_pos : int list ref = ref [];;
let numeric_char s =
let decimal_char s =
int_of_string (String.sub s 1 (String.length s - 2))
......@@ -129,11 +122,6 @@ let nb_classes = 40
in
aux 0 0
let rec tag_of_tag s i =
match s.[i] with
| '\008' | '\009' | '\010' | '\013' | '\032' -> tag_of_tag s (i+1)
| _ -> String.sub s i (String.length s - i)
let lex_tables = {
Lexing.lex_base =
"\000\000\023\000\011\000\015\000\254\255\042\000\046\000\255\255\
......@@ -205,10 +193,10 @@ let lex_tables = {
let rec token engine lexbuf =
match engine lex_tables 0 lexbuf with
0 -> (
# 90 "parser/wlexer.mll"
# 78 "parser/wlexer.mll"
token engine lexbuf )
| 1 -> (
# 92 "parser/wlexer.mll"
# 80 "parser/wlexer.mll"
let s = Lexing.lexeme lexbuf in
if (s.[0] >= 'A') && (s.[0] <= 'Z')
......@@ -216,16 +204,16 @@ let rec token engine lexbuf =
else if Hashtbl.mem keywords s then "",s else "LIDENT",s
)
| 2 -> (
# 99 "parser/wlexer.mll"
# 87 "parser/wlexer.mll"
"INT",Lexing.lexeme lexbuf )
| 3 -> (
# 104 "parser/wlexer.mll"
# 92 "parser/wlexer.mll"
"",Lexing.lexeme lexbuf )
| 4 -> (
# 105 "parser/wlexer.mll"
# 93 "parser/wlexer.mll"
"DIRECTIVE",Lexing.lexeme lexbuf )
| 5 -> (
# 107 "parser/wlexer.mll"
# 95 "parser/wlexer.mll"
let string_start = Lexing.lexeme_start lexbuf in
string_start_pos := string_start;
let double_quote = Lexing.lexeme_char lexbuf 0 = '"' in
......@@ -235,17 +223,17 @@ let rec token engine lexbuf =
(if double_quote then "STRING2" else "STRING1"),
(get_stored_string()) )
| 6 -> (
# 117 "parser/wlexer.mll"
# 105 "parser/wlexer.mll"
comment_start_pos := [Lexing.lexeme_start lexbuf];
in_comment := true;
comment engine lexbuf;
in_comment := false;
token engine lexbuf )
| 7 -> (
# 124 "parser/wlexer.mll"
# 112 "parser/wlexer.mll"
"EOI","" )
| 8 -> (
# 126 "parser/wlexer.mll"
# 114 "parser/wlexer.mll"
error
(Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
(Illegal_character ((Lexing.lexeme lexbuf).[0])) )
......@@ -254,17 +242,17 @@ let rec token engine lexbuf =
and comment engine lexbuf =
match engine lex_tables 1 lexbuf with
0 -> (
# 132 "parser/wlexer.mll"
# 120 "parser/wlexer.mll"
comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos;
comment engine lexbuf;
)
| 1 -> (
# 136 "parser/wlexer.mll"
# 124 "parser/wlexer.mll"
comment_start_pos := List.tl !comment_start_pos;
if !comment_start_pos <> [] then comment engine lexbuf;
)
| 2 -> (
# 140 "parser/wlexer.mll"
# 128 "parser/wlexer.mll"
string_start_pos := Lexing.lexeme_start lexbuf;
Buffer.clear string_buff;
let ender = Lexing.lexeme lexbuf in
......@@ -275,53 +263,49 @@ and comment engine lexbuf =
Buffer.clear string_buff;
comment engine lexbuf )
| 3 -> (
# 150 "parser/wlexer.mll"
# 138 "parser/wlexer.mll"
let st = List.hd !comment_start_pos in
error st (st+2) Unterminated_comment
)
| 4 -> (
# 154 "parser/wlexer.mll"
# 142 "parser/wlexer.mll"
comment engine lexbuf )
| _ -> failwith "lexing: empty token [comment]"
and string ender engine lexbuf =
match engine lex_tables 2 lexbuf with
0 -> (
# 158 "parser/wlexer.mll"
# 146 "parser/wlexer.mll"
let c = Lexing.lexeme lexbuf in
if c = ender then ()
else (store_len lexbuf;
store_char (Lexing.lexeme lexbuf);
else (store_char (Lexing.lexeme lexbuf);
string ender engine lexbuf) )
| 1 -> (
# 164 "parser/wlexer.mll"
store_len lexbuf;
store_ascii (Lexing.lexeme_char lexbuf 1);
# 151 "parser/wlexer.mll"
store_ascii (Lexing.lexeme_char lexbuf 1);
string ender engine lexbuf )
| 2 -> (
# 168 "parser/wlexer.mll"
# 154 "parser/wlexer.mll"
let c = Lexing.lexeme_char lexbuf 1 in
if c = 'x'
then parse_hexa_char engine lexbuf
else (store_len lexbuf; store_special c);
else store_special c;
string ender engine lexbuf )
| 3 -> (
# 174 "parser/wlexer.mll"
store_len lexbuf;
store_code (numeric_char (Lexing.lexeme lexbuf));
# 160 "parser/wlexer.mll"
store_code (decimal_char (Lexing.lexeme lexbuf));
string ender engine lexbuf )
| 4 -> (
# 178 "parser/wlexer.mll"
# 163 "parser/wlexer.mll"
error
(Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
(Illegal_character '\\') )
| 5 -> (
# 182 "parser/wlexer.mll"
# 167 "parser/wlexer.mll"
error !string_start_pos (!string_start_pos+1) Unterminated_string )
| 6 -> (
# 184 "parser/wlexer.mll"
store_len lexbuf;
store_code (Char.code (Lexing.lexeme_char lexbuf 0));
# 169 "parser/wlexer.mll"
store_code (Char.code (Lexing.lexeme_char lexbuf 0));
(* Adapt when source is UTF8 *)
string ender engine lexbuf )
| _ -> failwith "lexing: empty token [string ender]"
......@@ -329,11 +313,10 @@ and string ender engine lexbuf =
and parse_hexa_char engine lexbuf =
match engine lex_tables 3 lexbuf with
0 -> (
# 191 "parser/wlexer.mll"
store_len ~add:2 lexbuf;
store_code (hexa_char (Lexing.lexeme lexbuf)) )
# 175 "parser/wlexer.mll"
store_code (hexa_char (Lexing.lexeme lexbuf)) )
| 1 -> (
# 194 "parser/wlexer.mll"
# 177 "parser/wlexer.mll"
error
(Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
(Illegal_character '\\') )
......@@ -341,7 +324,7 @@ and parse_hexa_char engine lexbuf =
;;
# 200 "parser/wlexer.mll"
# 183 "parser/wlexer.mll"
let delta_loc = ref 0
......
......@@ -26,17 +26,10 @@ classes
exception Unterminated_string_in_comment
(* Buffer for string literals (always encoded in UTF8).
Each character is encoded in two consecutives code point;
the first one gives the number of bytes in the input document;
the second one gives the Unicode representation *)
(* Buffer for string literals (always encoded in UTF8). *)
let string_buff = Buffer.create 1024
let store_len ?(add=0) lexbuf =
let l = add + (Lexing.lexeme_end lexbuf) - (Lexing.lexeme_start lexbuf) in
Encodings.Utf8.store string_buff l
let store_ascii = Buffer.add_char string_buff
let store_char = Buffer.add_string string_buff
let store_code = Encodings.Utf8.store string_buff
......@@ -53,7 +46,7 @@ classes
let string_start_pos = ref 0;;
let comment_start_pos : int list ref = ref [];;
let numeric_char s =
let decimal_char s =
int_of_string (String.sub s 1 (String.length s - 2))
......@@ -70,11 +63,6 @@ classes
in
aux 0 0
let rec tag_of_tag s i =
match s.[i] with
| '\008' | '\009' | '\010' | '\013' | '\032' -> tag_of_tag s (i+1)
| _ -> String.sub s i (String.length s - i)
}
let letter = lowercase | uppercase | unicode_base_char | unicode_ideographic
......@@ -157,22 +145,19 @@ and string ender = parse
| '"' | "'"
{ let c = Lexing.lexeme lexbuf in
if c = ender then ()
else (store_len lexbuf;
store_char (Lexing.lexeme lexbuf);
else (store_char (Lexing.lexeme lexbuf);
string ender engine lexbuf) }
| '\\' ['\\' '"' '\'']
{ store_len lexbuf;
store_ascii (Lexing.lexeme_char lexbuf 1);
{ store_ascii (Lexing.lexeme_char lexbuf 1);
string ender engine lexbuf }
| '\\' lowercase
{ let c = Lexing.lexeme_char lexbuf 1 in
if c = 'x'
then parse_hexa_char engine lexbuf
else (store_len lexbuf; store_special c);
else store_special c;
string ender engine lexbuf }
| '\\' ascii_digit+ ';'
{ store_len lexbuf;
store_code (numeric_char (Lexing.lexeme lexbuf));
{ store_code (decimal_char (Lexing.lexeme lexbuf));
string ender engine lexbuf }
| '\\'
{ error
......@@ -181,15 +166,13 @@ and string ender = parse
| eof
{ error !string_start_pos (!string_start_pos+1) Unterminated_string }
| _
{ store_len lexbuf;
store_code (Char.code (Lexing.lexeme_char lexbuf 0));
{ store_code (Char.code (Lexing.lexeme_char lexbuf 0));
(* Adapt when source is UTF8 *)
string ender engine lexbuf }
and parse_hexa_char = parse
| (ascii_digit|lowercase)+ ';'
{ store_len ~add:2 lexbuf;
store_code (hexa_char (Lexing.lexeme lexbuf)) }
{ store_code (hexa_char (Lexing.lexeme lexbuf)) }
| _
{ error
(Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
......
......@@ -49,7 +49,9 @@ to choose the parser for XML files (available only when CDuce
is built with expat support; default is <code>--expat</code>).</li>
<li>All the other arguments on the command line are considered CDuce
scripts, which are executed successively.</li>
scripts, which are executed successively. The option
<code>--stdin</code> forces the interpreter to consider the standard
input as a input script.</li>
</ul>
......
type Person = FPerson | MPerson;;
type FPerson = <person gender = "F">[ Name Children ];;
type MPerson = <person gender = "M">[ Name Children ];;
type Children = <children>[ Person* ];;
type Name = <name>[ PCDATA ];;
type Person = FPerson | MPerson
type FPerson = <person gender = "F">[ Name Children ]
type MPerson = <person gender = "M">[ Name Children ]
type Children = <children>[ Person* ]
type Name = <name>[ PCDATA ]
type Man = <man name=String>[ Sons Daughters ];;
type Woman = <woman name=String>[ Sons Daughters ];;
type Sons = <sons>[ Man* ];;
type Daughters = <daughters>[ Woman* ];;
type Man = <man name=String>[ Sons Daughters ]
type Woman = <woman name=String>[ Sons Daughters ]
type Sons = <sons>[ Man* ]
type Daughters = <daughters>[ Woman* ]
let fun split ({{MPerson -> Man ; FPerson -> Woman}})
<person gender=g>[ <name>n <children>[(mc::MPerson | fc::FPerson)*] ] ->
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment