Commit 82b65a6f authored by Pietro Abate's avatar Pietro Abate

[r2003-05-20 13:27:25 by cvscast] Unicode support

Original author: cvscast
Date: 2003-05-20 13:27:27+00:00
parent ae28fd0f
......@@ -5,7 +5,7 @@ OCAMLOPT = ocamlfind ocamlopt -inline 25 -package $(PACKAGES)
# extra options:
# -p (profiling)
PACKAGES = "pxp-engine pxp-lex-iso88591 wlexing camlp4 num,cgi"
PACKAGES = "pxp-engine pxp-lex-iso88591 pxp-wlex-utf8 wlexing camlp4 num,cgi"
DISTRIB = $(DIRS) tools web depend INSTALL LICENSE README Makefile
......
......@@ -34,8 +34,10 @@ typing/typer.cmx: parser/ast.cmx types/atoms.cmx types/builtin.cmx \
types/ident.cmx types/intervals.cmx parser/location.cmx \
types/patterns.cmx types/sequence.cmx misc/state.cmx typing/typed.cmx \
types/types.cmx typing/typer.cmi
types/atoms.cmo: misc/pool.cmi types/sortedList.cmi types/atoms.cmi
types/atoms.cmx: misc/pool.cmx types/sortedList.cmx types/atoms.cmi
types/atoms.cmo: misc/encodings.cmi misc/pool.cmi types/sortedList.cmi \
types/atoms.cmi
types/atoms.cmx: misc/encodings.cmx misc/pool.cmx types/sortedList.cmx \
types/atoms.cmi
types/boolean.cmo: types/sortedList.cmi types/boolean.cmi
types/boolean.cmx: types/sortedList.cmx types/boolean.cmi
types/builtin.cmo: types/atoms.cmi types/chars.cmi types/sequence.cmi \
......@@ -44,8 +46,8 @@ types/builtin.cmx: types/atoms.cmx types/chars.cmx types/sequence.cmx \
types/types.cmx
types/chars.cmo: types/chars.cmi
types/chars.cmx: types/chars.cmi
types/ident.cmo: misc/pool.cmi types/sortedList.cmi
types/ident.cmx: misc/pool.cmx types/sortedList.cmx
types/ident.cmo: misc/encodings.cmi misc/pool.cmi types/sortedList.cmi
types/ident.cmx: misc/encodings.cmx misc/pool.cmx types/sortedList.cmx
types/intervals.cmo: types/intervals.cmi
types/intervals.cmx: types/intervals.cmi
types/normal.cmo: types/normal.cmi
......@@ -59,27 +61,27 @@ types/sequence.cmx: types/atoms.cmx types/types.cmx types/sequence.cmi
types/sortedList.cmo: types/sortedList.cmi
types/sortedList.cmx: types/sortedList.cmi
types/types.cmo: types/atoms.cmi misc/bool.cmi types/chars.cmi \
types/ident.cmo types/intervals.cmi types/normal.cmi misc/pretty.cmi \
types/sortedList.cmi misc/state.cmi types/types.cmi
misc/encodings.cmi types/ident.cmo types/intervals.cmi types/normal.cmi \
misc/pretty.cmi types/sortedList.cmi misc/state.cmi types/types.cmi
types/types.cmx: types/atoms.cmx misc/bool.cmx types/chars.cmx \
types/ident.cmx types/intervals.cmx types/normal.cmx misc/pretty.cmx \
types/sortedList.cmx misc/state.cmx types/types.cmi
misc/encodings.cmx types/ident.cmx types/intervals.cmx types/normal.cmx \
misc/pretty.cmx types/sortedList.cmx misc/state.cmx types/types.cmi
runtime/eval.cmo: types/atoms.cmi types/ident.cmo types/intervals.cmi \
runtime/load_xml.cmi parser/location.cmi runtime/print_xml.cmo \
runtime/run_dispatch.cmi misc/state.cmi typing/typed.cmo \
runtime/value.cmi runtime/eval.cmi
runtime/load_xml.cmi parser/location.cmi types/patterns.cmi \
runtime/print_xml.cmo runtime/run_dispatch.cmi misc/state.cmi \
typing/typed.cmo types/types.cmi runtime/value.cmi runtime/eval.cmi
runtime/eval.cmx: types/atoms.cmx types/ident.cmx types/intervals.cmx \
runtime/load_xml.cmx parser/location.cmx runtime/print_xml.cmx \
runtime/run_dispatch.cmx misc/state.cmx typing/typed.cmx \
runtime/value.cmx runtime/eval.cmi
runtime/load_xml.cmo: types/atoms.cmi types/ident.cmo parser/location.cmi \
runtime/value.cmi runtime/load_xml.cmi
runtime/load_xml.cmx: types/atoms.cmx types/ident.cmx parser/location.cmx \
runtime/value.cmx runtime/load_xml.cmi
runtime/print_xml.cmo: types/atoms.cmi types/chars.cmi misc/encodings.cmi \
types/ident.cmo types/sequence.cmi runtime/value.cmi
runtime/print_xml.cmx: types/atoms.cmx types/chars.cmx misc/encodings.cmx \
types/ident.cmx types/sequence.cmx runtime/value.cmx
runtime/load_xml.cmx parser/location.cmx types/patterns.cmx \
runtime/print_xml.cmx runtime/run_dispatch.cmx misc/state.cmx \
typing/typed.cmx types/types.cmx runtime/value.cmx runtime/eval.cmi
runtime/load_xml.cmo: types/atoms.cmi misc/encodings.cmi types/ident.cmo \
parser/location.cmi runtime/value.cmi runtime/load_xml.cmi
runtime/load_xml.cmx: types/atoms.cmx misc/encodings.cmx types/ident.cmx \
parser/location.cmx runtime/value.cmx runtime/load_xml.cmi
runtime/print_xml.cmo: types/atoms.cmi misc/encodings.cmi types/ident.cmo \
types/sequence.cmi runtime/value.cmi
runtime/print_xml.cmx: types/atoms.cmx misc/encodings.cmx types/ident.cmx \
types/sequence.cmx runtime/value.cmx
runtime/run_dispatch.cmo: types/atoms.cmi types/chars.cmi misc/encodings.cmi \
types/ident.cmo types/patterns.cmi types/types.cmi runtime/value.cmi \
runtime/run_dispatch.cmi
......@@ -100,10 +102,10 @@ driver/cduce.cmx: parser/ast.cmx types/builtin.cmx runtime/eval.cmx \
types/ident.cmx parser/location.cmx parser/parser.cmx types/patterns.cmx \
misc/state.cmx typing/typed.cmx typing/typer.cmx types/types.cmx \
runtime/value.cmx parser/wlexer.cmx driver/cduce.cmi
driver/run.cmo: driver/cduce.cmi parser/location.cmi misc/state.cmi \
parser/wlexer.cmo
driver/run.cmx: driver/cduce.cmx parser/location.cmx misc/state.cmx \
parser/wlexer.cmx
driver/run.cmo: driver/cduce.cmi parser/location.cmi types/sequence.cmi \
misc/state.cmi runtime/value.cmi parser/wlexer.cmo
driver/run.cmx: driver/cduce.cmx parser/location.cmx types/sequence.cmx \
misc/state.cmx runtime/value.cmx parser/wlexer.cmx
driver/webiface.cmo: driver/cduce.cmi driver/examples.cmo parser/location.cmi \
misc/state.cmi
driver/webiface.cmx: driver/cduce.cmx driver/examples.cmx parser/location.cmx \
......@@ -111,6 +113,7 @@ driver/webiface.cmx: driver/cduce.cmx driver/examples.cmx parser/location.cmx \
parser/parser.cmi: parser/ast.cmo
typing/typer.cmi: parser/ast.cmo types/ident.cmo typing/typed.cmo \
types/types.cmi
types/atoms.cmi: misc/encodings.cmi
types/boolean.cmi: types/sortedList.cmi
types/patterns.cmi: types/atoms.cmi types/chars.cmi types/ident.cmo \
types/types.cmi
......@@ -122,3 +125,4 @@ runtime/load_xml.cmi: runtime/value.cmi
runtime/run_dispatch.cmi: types/patterns.cmi runtime/value.cmi
runtime/value.cmi: types/atoms.cmi types/chars.cmi misc/encodings.cmi \
types/ident.cmo types/intervals.cmi types/types.cmi
driver/cduce.cmi: types/types.cmi runtime/value.cmi
......@@ -8,7 +8,6 @@ let quiet = ref false
let typing_env = State.ref "Cduce.typing_env" Typer.Env.empty
let enter_global_value x v t =
let x = Ident.ident x in
Eval.enter_global x v;
typing_env := Typer.Env.add x t !typing_env
......@@ -31,8 +30,8 @@ let dump_env ppf =
Eval.Env.iter
(fun x v ->
let t = Typer.Env.find x !typing_env in
Format.fprintf ppf "@[|- %s : %a@ => %a@]@\n"
(Id.value x)
Format.fprintf ppf "@[|- %a : %a@ => %a@]@\n"
U.print (Id.value x)
print_norm t
print_value v
)
......@@ -48,10 +47,11 @@ let rec print_exn ppf = function
Format.fprintf ppf "Uncaught CDuce exception: @[%a@]@\n"
print_value v
| Eval.MultipleDeclaration v ->
Format.fprintf ppf "Multiple declaration for global value %s@\n" v
Format.fprintf ppf "Multiple declaration for global value %a@\n"
U.print (Id.value v)
| Typer.WrongLabel (t,l) ->
Format.fprintf ppf "Wrong record selection: the label %s@\n"
(LabelPool.value l);
Format.fprintf ppf "Wrong record selection: the label %a@\n"
U.print (LabelPool.value l);
Format.fprintf ppf "applied to an expression of type:@\n%a@\n"
print_norm t
| Typer.ShouldHave (t,msg) ->
......@@ -80,7 +80,7 @@ let rec print_exn ppf = function
Format.fprintf ppf "Sample value:@\n%a@\n"
Types.Sample.print (Types.Sample.get t)
| Typer.UnboundId x ->
Format.fprintf ppf "Unbound identifier %s@\n" x
Format.fprintf ppf "Unbound identifier %a@\n" U.print (Id.value x)
| Wlexer.Illegal_character c ->
Format.fprintf ppf "Illegal character (%s)@\n" (Char.escaped c)
| Wlexer.Unterminated_comment ->
......@@ -112,7 +112,7 @@ let debug ppf = function
and p = Typer.pat p in
let f = Patterns.filter (Types.descr t) p in
List.iter (fun (x,t) ->
Format.fprintf ppf " %s:%a@\n" (Id.value x)
Format.fprintf ppf " %a:%a@\n" U.print (Id.value x)
print_norm (Types.descr t)) f
| `Compile2 (t,pl) ->
Format.fprintf ppf "[DEBUG:compile2]@\n";
......@@ -152,7 +152,7 @@ let run ppf ppf_err input =
List.iter (fun (x,t) ->
typing_env := Typer.Env.add x t !typing_env;
if not !quiet then
Format.fprintf ppf "|- %s : %a@\n@." (Id.value x) print_norm t)
Format.fprintf ppf "|- %a : %a@\n@." U.print (Id.value x) print_norm t)
in
let type_decl decl =
......@@ -166,7 +166,7 @@ let run ppf ppf_err input =
(fun (x,v) ->
Eval.enter_global x v;
if not !quiet then
Format.fprintf ppf "=> %s : @[%a@]@\n@." (Id.value x) print_value v
Format.fprintf ppf "=> %a : @[%a@]@\n@." U.print (Id.value x) print_value v
) bindings
in
......
......@@ -4,7 +4,7 @@ val quiet: bool ref
val print_exn: Format.formatter -> exn -> unit
val enter_global_value : string -> Value.t -> Types.descr -> unit
val enter_global_value : Ident.id -> Value.t -> Types.descr -> unit
val run : Format.formatter -> Format.formatter -> char Stream.t -> bool
(* Returns true if everything is ok (no error) *)
......
open Ident
let () = State.close ();;
let dump = ref None
......@@ -61,7 +63,7 @@ let main () =
let l = List.rev_map Value.string_latin1 !args in
let l = Value.sequence l in
let t = Sequence.star Sequence.string in
Cduce.enter_global_value "argv" l t
Cduce.enter_global_value (ident (U.mk "argv")) l t
);
(match !src with
| [] ->
......
......@@ -2,17 +2,76 @@ type uchar = int
module Utf8 =
struct
type ustring = string
type t = string
type uindex = int
let hash = Hashtbl.hash
let equal (x : t) y = x = y
(* TODO: handle UTF-8 viewport *)
let to_string s =
Netconversion.recode_string
~subst:(fun i -> Printf.sprintf "\\%i;" i)
~out_enc:`Enc_iso88591
~in_enc:`Enc_utf8 s
let print ppf s =
Format.fprintf ppf "%s" (to_string s)
let start_index s = 0
let end_index s = String.length s
let equal_index = (==)
let mk s = s
let mk_latin1 s = Netconversion.recode_string ~in_enc:`Enc_iso88591 ~out_enc:`Enc_utf8 s
let get_str s = s
let get_idx i = i
(* TODO: handle 5,6 bytes chars; report malformed UTF-8 *)
let rec check s i j =
(i = j) ||
(i < j) &&
match s.[i] with
| '\000'..'\127' -> check s (i+1) j
| '\128'..'\223' as c->
(i+1 < j) &&
let n1 = Char.code c
and n2 = Char.code s.[i+1] in
(n2 >= 128) && (n2 <= 191) &&
(((n1 land 0b11111) lsl 6) lor (n2 land 0b111111) >= 128) &&
check s (i+2) j
| '\224'..'\239' as c ->
(i+2 < j) &&
let n1 = Char.code c
and n2 = Char.code s.[i+1]
and n3 = Char.code s.[i+2] in
(n2 >= 128) && (n2 <= 191) &&
(n3 >= 128) && (n3 <= 191) &&
let p = ((n1 land 0b1111) lsl 12) lor
((n2 land 0b111111) lsl 6) lor
(n3 land 0b111111) in
(p >= 0x800) &&
((p < 0xd800) || (p >= 0xe000)) &&
((p < 0xfffe) || (p > 0xffff)) &&
check s (i+3) j
| '\240'..'\247' as c ->
(i+3 < j) &&
let n1 = Char.code c
and n2 = Char.code s.[i+1]
and n3 = Char.code s.[i+2]
and n4 = Char.code s.[i+3] in
(n2 >= 128) && (n2 <= 191) &&
(n3 >= 128) && (n3 <= 191) &&
(n4 >= 128) && (n4 <= 191) &&
let p = ((n1 land 0b111) lsl 18) lor
((n2 land 0b111111) lsl 12) lor
((n3 land 0b111111) lsl 6) lor
(n4 land 0b111111)
in
(p >= 0x10000) && (p < 0x110000) &&
check s (i+4) j
| _ -> false
let check s = check s 0 (String.length s)
let get s i =
match s.[i] with
| '\000'..'\127' as c ->
......
......@@ -2,21 +2,30 @@ type uchar = int
module Utf8 :
sig
type ustring
type t
type uindex
val end_index: ustring -> uindex
val start_index: ustring -> uindex
val hash: t -> int
val equal: t -> t -> bool
val check: string -> bool
val to_string: t -> string
val print: Format.formatter -> t -> unit
val end_index: t -> uindex
val start_index: t -> uindex
val equal_index: uindex -> uindex -> bool
val mk: string -> ustring
val get_str: ustring -> string
val mk: string -> t
val mk_latin1: string -> t
val get_str: t -> string
val get_idx: uindex -> int
val get: ustring -> uindex -> uchar
val advance: ustring -> uindex -> uindex
val next: ustring -> uindex -> uchar * uindex
val get: t -> uindex -> uchar
val advance: t -> uindex -> uindex
val next: t -> uindex -> uchar * uindex
val store: Buffer.t -> uchar -> unit
val copy: Buffer.t -> ustring -> uindex -> uindex -> unit
val get_substr: ustring -> uindex -> uindex -> string
val copy: Buffer.t -> t -> uindex -> uindex -> unit
val get_substr: t -> uindex -> uindex -> string
end
......@@ -42,7 +42,7 @@ and pexpr =
(* Data destructors *)
| Op of string * pexpr list
| Match of pexpr * branches
| Map of pexpr * branches
| Map of bool * pexpr * branches
| Xtrans of pexpr * branches
| Dot of pexpr* label
| RemoveField of pexpr * label
......@@ -50,8 +50,6 @@ and pexpr =
(* Exceptions *)
| Try of pexpr * branches
| MatchFail (* internal usage *)
and abstr = {
fun_name : id option;
fun_iface : (ppat * ppat) list;
......
......@@ -9,6 +9,10 @@ let () = Grammar.error_verbose := true
let gram = Grammar.gcreate (Wlexer.lexer Wlexer.token Wlexer.latin1_engine)
let parse_ident = Encodings.Utf8.mk_latin1
let label s = LabelPool.mk (parse_ident s)
let prog = Grammar.Entry.create gram "prog"
let expr = Grammar.Entry.create gram "expression"
let pat = Grammar.Entry.create gram "type/pattern expression"
......@@ -141,19 +145,19 @@ EXTEND
"top" RIGHTA
[ "match"; e = SELF; "with"; b = branches -> exp loc (Match (e,b))
| "try"; e = SELF; "with"; b = branches ->
let id = ident (U.mk "x") in
let default =
mknoloc (Capture (ident "x")),
Op ("raise",[Var (ident "x")]) in
mknoloc (Capture id),
Op ("raise",[Var id]) in
exp loc (Try (e,b@[default]))
| "map"; e = SELF; "with"; b = branches -> exp loc (Map (e,b))
| "map"; e = SELF; "with"; b = branches -> exp loc (Map (false,e,b))
| "xtransform"; e = SELF; "with"; b = branches -> exp loc (Xtrans (e,b))
| "if"; e = SELF; "then"; e1 = SELF; "else"; e2 = SELF ->
let p1 = mk loc (Internal (Builtin.true_type))
and p2 = mk loc (Internal (Builtin.false_type)) in
exp loc (Match (e, [p1,e1; p2,e2]))
| "transform"; e = SELF; "with"; b = branches ->
let default = mknoloc (Capture (ident "x")), cst_nil in
exp loc (Op ("flatten", [Map (e,b@[default])]))
exp loc (Op ("flatten", [Map (true,e,b)]))
| "fun"; (f,a,b) = fun_decl ->
exp loc (Abstraction { fun_name = f; fun_iface = a; fun_body = b })
| (p,e1) = let_binding; "in"; e2 = expr LEVEL "top"->
......@@ -176,7 +180,7 @@ EXTEND
[ e1 = expr; op = ["+" | "-" | "@" ]; e2 = expr ->
exp loc (Op (op,[e1;e2]))
| e = expr; "\\"; l = [LIDENT | UIDENT | keyword ] ->
exp loc (RemoveField (e,LabelPool.mk l))
exp loc (RemoveField (e, label l))
]
|
[ e1 = expr; op = ["*"]; e2 = expr -> exp loc (Op (op,[e1;e2]))
......@@ -185,23 +189,24 @@ EXTEND
let tag = mk loc (Internal (Types.atom (Atoms.any))) in
let att = mk loc (Internal Types.Record.any) in
let any = mk loc (Internal (Types.any)) in
let re = Star(Alt(SeqCapture(ident "x",Elem p), Elem any)) in
let id = ident (U.mk "x") in
let re = Star(Alt(SeqCapture(id,Elem p), Elem any)) in
let ct = mk loc (Regexp (re,any)) in
let p = mk loc (XmlT (tag, multi_prod loc [att;ct])) in
let b = (p, Var (ident "x")) in
exp loc (Op ("flatten", [Map (e,[b])]))
let b = (p, Var id) in
exp loc (Op ("flatten", [Map (false,e,[b])]))
]
|
[ e = expr; "."; l = [LIDENT | UIDENT | keyword ] ->
exp loc (Dot (e,LabelPool.mk l))
exp loc (Dot (e, label l))
]
|
[ op = [ LIDENT "flatten"
| LIDENT "load_xml"
| LIDENT "load_file"
| LIDENT "load_file" | LIDENT "load_file_utf8"
| LIDENT "load_html"
| LIDENT "print_xml"
| LIDENT "print_xml" | LIDENT "print_xml_utf8"
| LIDENT "print"
| LIDENT "raise"
| LIDENT "int_of"
......@@ -209,7 +214,7 @@ EXTEND
| LIDENT "atom_of"
];
e = expr -> exp loc (Op (op,[e]))
| op = [ LIDENT "dump_to_file" ];
| op = [ LIDENT "dump_to_file" | LIDENT "dump_to_file_utf8" ];
e1 = expr LEVEL "no_appl"; e2 = expr -> exp loc (Op (op, [e1;e2]))
| e1 = SELF; LIDENT "div"; e2 = expr -> exp loc (Op ("/", [e1;e2]))
| e1 = SELF; LIDENT "mod"; e2 = expr -> exp loc (Op ("mod", [e1;e2]))
......@@ -231,13 +236,14 @@ EXTEND
exp loc l
| "<"; t = [ "("; e = expr; ")" -> e
| a = [ LIDENT | UIDENT | keyword ] ->
exp loc (Cst (Types.Atom (Atoms.mk a))) ];
let a = parse_ident a in
exp loc (Cst (Types.Atom (Atoms.mk a))) ];
a = expr_attrib_spec; ">"; c = expr ->
exp loc (Xml (t, Pair (a,c)))
| "{"; r = [ expr_record_spec | -> exp loc (RecordLitt LabelMap.empty) ]; "}" -> r
| s = STRING2 ->
exp loc (tuple (char_list loc s @ [cst_nil]))
| a = LIDENT -> exp loc (Var (ident a))
| a = LIDENT -> exp loc (Var (ident (parse_ident a)))
]
];
......@@ -266,7 +272,7 @@ EXTEND
fun_decl: [
(* need an hack to do this, because both productions would
match [ OPT LIDENT; "("; pat ] .... *)
[ f = OPT [ x = LIDENT -> ident x]; "("; p1 = pat LEVEL "no_arrow";
[ f = OPT [ x = LIDENT -> ident (parse_ident x)]; "("; p1 = pat LEVEL "no_arrow";
res = [ "->"; p2 = pat;
a = [ ";"; a = LIST0 arrow SEP ";" -> a | -> [] ];
")"; b = branches -> `Classic (p2,a,b)
......@@ -308,7 +314,7 @@ EXTEND
| _ -> Alt (x,y)
]
| [ x = regexp; y = regexp -> Seq (x,y) ]
| [ a = LIDENT; "::"; x = regexp -> SeqCapture (ident a,x) ]
| [ a = LIDENT; "::"; x = regexp -> SeqCapture (ident (parse_ident a),x) ]
| [ x = regexp; "*" -> Star x
| x = regexp; "*?" -> WeakStar x
| x = regexp; "+" -> Seq (x, Star x)
......@@ -317,7 +323,7 @@ EXTEND
| x = regexp; "??" -> Alt (Epsilon, x) ]
| [ "("; x = regexp; ")" -> x
| "("; a = LIDENT; ":="; c = const; ")" ->
Elem (mk loc (Constant ((ident a,c))))
Elem (mk loc (Constant ((ident (parse_ident a),c))))
| UIDENT "PCDATA" -> string_regexp
| i = STRING1; "--"; j = STRING1 ->
let i = Chars.mk_int (parse_char loc i)
......@@ -350,9 +356,9 @@ EXTEND
[ "{"; r = record_spec; "}" -> mk loc (Record (true,r))
| "{|"; r = record_spec; "|}" -> mk loc (Record (false,r))
| LIDENT "_" -> mk loc (Internal Types.any)
| a = LIDENT -> mk loc (Capture (ident a))
| a = LIDENT -> mk loc (Capture (ident (parse_ident a)))
| "("; a = LIDENT; ":="; c = const; ")" ->
mk loc (Constant (ident a,c))
mk loc (Constant (ident (parse_ident a),c))
| a = UIDENT -> mk loc (PatVar a)
| i = INT ; "--"; j = INT ->
let i = Intervals.mk i
......@@ -379,7 +385,7 @@ EXTEND
"]" -> mk loc (Regexp (r,q))
| "<"; t =
[ x = [ LIDENT | UIDENT | keyword ] ->
let a = if x = "_" then Atoms.any else Atoms.atom (Atoms.mk x) in
let a = if x = "_" then Atoms.any else Atoms.atom (Atoms.mk (parse_ident x)) in
mk loc (Internal (Types.atom a))
| "("; t = pat; ")" -> t ];
a = attrib_spec; ">"; c = pat ->
......@@ -403,7 +409,7 @@ EXTEND
o = [ "?" -> true | -> false];
x = pat ->
let x = if o then mk loc (Optional x) else x in
(LabelPool.mk l, x)
(label l, x)
] SEP ";" ->
make_record loc r
] ];
......@@ -417,7 +423,7 @@ EXTEND
const:
[
[ i = INT -> Types.Integer (Intervals.mk i)
| "`"; a = [LIDENT | UIDENT | keyword ] -> Types.Atom (Atoms.mk a)
| "`"; a = [LIDENT | UIDENT | keyword ] -> Types.Atom (Atoms.mk (parse_ident a))
| c = char -> Types.Char c ]
];
......@@ -432,7 +438,7 @@ EXTEND
expr_record_spec:
[ [ r = LIST1
[ l = [LIDENT | UIDENT | keyword ]; "="; x = expr ->
(LabelPool.mk l,x) ]
(label l,x) ]
SEP ";" ->
exp loc (RecordLitt (make_record loc r))
] ];
......
......@@ -2,7 +2,7 @@ open Value
open Run_dispatch
open Ident
exception MultipleDeclaration of string
exception MultipleDeclaration of id
module Env = Map.Make (Ident.Id)
type env = t Env.t
......@@ -10,21 +10,22 @@ let global_env = State.ref "Eval.global_env" Env.empty
let enter_global x v =
if Env.mem x !global_env then
raise (MultipleDeclaration (Id.value x));
raise (MultipleDeclaration x);
global_env := Env.add x v !global_env
let exn_int_of = CDuceExn (Pair (
Atom (Atoms.mk "Invalid_argument"),
Atom (Atoms.mk_ascii "Invalid_argument"),
string_latin1 "int_of"))
let exn_load_file_utf8 = CDuceExn (Pair (
Atom (Atoms.mk_ascii "load_file_utf8"),
string_latin1 "File is not a valid UTF-8 stream"))
(* Evaluation of expressions *)
exception EMatchFail
let rec eval env e0 =
match e0.Typed.exp_descr with
| Typed.Forget (e,_) -> eval env e
......@@ -57,12 +58,13 @@ let rec eval env e0 =
| Typed.Xml (e1,e2) -> Xml (eval env e1, eval env e2)
| Typed.Cst c -> const c
| Typed.Match (arg,brs) -> eval_branches env brs (eval env arg)
| Typed.Map (arg,brs) -> eval_map env brs (eval env arg)
| Typed.Map (false,arg,brs) -> eval_map env brs (eval env arg)
| Typed.Map (true,_,_) -> assert false
| Typed.Xtrans (arg,brs) -> eval_xtrans env brs (eval env arg)
| Typed.Op ("raise", [e]) -> raise (CDuceExn (eval env e))
| Typed.Try (arg,brs) ->
(try eval env arg with CDuceExn v -> eval_branches env brs v)
| Typed.Op ("flatten", [{Typed.exp_descr=Typed.Map (arg,brs)}]) ->
| Typed.Op ("flatten", [{Typed.exp_descr=Typed.Map (true,arg,brs)}]) ->
eval_transform env brs (eval env arg)
| Typed.Op ("flatten", [e]) -> eval_flatten (eval env e)
| Typed.Op ("@", [e1; e2]) -> eval_concat (eval env e1) (eval env e2)
......@@ -73,14 +75,18 @@ let rec eval env e0 =
| Typed.Op ("mod", [e1; e2]) -> eval_mod (eval env e1) (eval env e2)
| Typed.Op ("load_xml", [e]) -> eval_load_xml (eval env e)
| Typed.Op ("load_html", [e]) -> eval_load_html (eval env e)
| Typed.Op ("load_file", [e]) -> eval_load_file (eval env e)
| Typed.Op ("load_file", [e]) -> eval_load_file ~utf8:false (eval env e)
| Typed.Op ("load_file_utf8", [e]) -> eval_load_file ~utf8:true (eval env e)
| Typed.Op ("print_xml", [e]) -> eval_print_xml (eval env e)
| Typed.Op ("print_xml_utf8", [e]) -> eval_print_xml_utf8 (eval env e)
| Typed.Op ("print", [e]) -> eval_print (eval env e)
| Typed.Op ("int_of", [e]) -> eval_int_of (eval env e)
| Typed.Op ("atom_of", [e]) -> eval_atom_of (eval env e)
| Typed.Op ("string_of", [e]) -> eval_string_of (eval env e)
| Typed.Op ("dump_to_file", [e1; e2]) ->
eval_dump_to_file (eval env e1) (eval env e2)
| Typed.Op ("dump_to_file_utf8", [e1; e2]) ->
eval_dump_to_file_utf8 (eval env e1) (eval env e2)
| Typed.Op ("=",[e1; e2]) -> eval_equal (eval env e1) (eval env e2)
| Typed.Op ("<",[e1; e2]) -> eval_lt (eval env e1) (eval env e2)
| Typed.Op ("<=",[e1; e2]) -> eval_lte (eval env e1) (eval env e2)
......@@ -88,7 +94,6 @@ let rec eval env e0 =
| Typed.Op (">=",[e1; e2]) -> eval_gte (eval env e1) (eval env e2)
| Typed.Dot (e, l) -> eval_dot l (eval env e)
| Typed.RemoveField (e, l) -> eval_remove_field l (eval env e)
| Typed.MatchFail -> raise EMatchFail
| Typed.Op (o,_) -> failwith ("Unknown operator " ^ o)
......@@ -102,12 +107,14 @@ and eval_branches' env_ref brs arg =
and eval_branches env brs arg =
let (disp, rhs) = Typed.dispatcher brs in
let (code, bindings) = run_dispatcher disp arg in
let (bind, e) = rhs.(code) in
let env =
List.fold_left (fun env (x,i) ->
if (i == -1) then Env.add x arg env
else Env.add x bindings.(i) env) env (IdMap.get bind) in
eval env e
match rhs.(code) with
| Patterns.Compile.Match (bind,e) ->
let env =
List.fold_left (fun env (x,i) ->
if (i == -1) then Env.add x arg env
else Env.add x bindings.(i) env) env (IdMap.get bind) in
eval env e
| Patterns.Compile.Fail -> Value.Absent
and eval_let_decl env l =
let v = eval env l.Typed.let_body in
......@@ -119,7 +126,8 @@ and eval_map env brs = function
| Pair (x,y) ->
let x = eval_branches env brs x in
Pair (x, eval_map env brs y)
| String_latin1 (_,_,_,_) | String_utf8 (_,_,_,_) as v -> eval_map env brs (normalize v)
| String_latin1 (_,_,_,_) | String_utf8 (_,_,_,_) as v ->
eval_map env brs (normalize v)
| q -> q
and eval_flatten = function
......@@ -128,29 +136,37 @@ and eval_flatten = function
and eval_transform env brs = function
| Pair (x,y) ->
let x = eval_branches env brs x in
let x = match eval_branches env brs x with Value.Absent -> Value.nil | x -> x in
eval_concat x (eval_transform env brs y)
| String_latin1 (_,_,_,_) | String_utf8 (_,_,_,_) as v -> eval_transform env brs (normalize v)