Commit edc7480e authored by Pietro Abate's avatar Pietro Abate

[r2003-10-02 20:04:31 by cvscast] Cleaning + new semantics for default values in regexps

Original author: cvscast
Date: 2003-10-02 20:04:32+00:00
parent 9386048d
0.2.0
* Code upgraded to Ocaml 3.07+beta2
* Major cleaning in progress
* Using ulex instead of wlex
* Using ulex instead of wlex. Sources can now be in Utf8.
* Allow structured constants in default value patterns (p := c)
* Default value patterns in regexps don't consume an element
* --compile option
* Bug fixes in the parser. Tuples are now ok in regexps.
* Lazy implementation of @
0.1.1
* Various bug fixes (expat might now work)
......
......@@ -98,7 +98,7 @@ let compile_let_decl env decl =
let decl = { let_pat = pat; let_expr = compile env false (decl.Typed.let_body) } in
let names = IdSet.get (Patterns.fv pat) in
let env = enter_globals env names in
(names, env, decl)
(env, decl)
let compile_rec_funs env funs =
......@@ -111,4 +111,4 @@ let compile_rec_funs env funs =
let names = List.map fun_name funs in
let env = enter_globals env names in
let exprs = List.map (compile_abstr env) (List.map fun_a funs) in
(names, env, exprs)
(env, exprs)
open Ident
type env
val empty : env
val enter_global : env -> id -> env
val enter_globals : env -> id list -> env
val find : id -> env -> Lambda.var_loc
val compile : env -> bool -> Typed.texpr -> Lambda.expr
val compile_let_decl : env -> Typed.let_decl -> env * Lambda.let_decl
val compile_rec_funs : env -> Typed.texpr list -> env * Lambda.expr list
......@@ -159,9 +159,9 @@ runtime/eval.cmx: misc/q_symbol.cmo types/builtin_defs.cmx types/ident.cmx compi
schema/schema_xml.cmx types/sequence.cmx typing/typed.cmx \
typing/typer.cmx types/types.cmx runtime/value.cmx runtime/eval.cmi
compile/compile.cmo: misc/q_symbol.cmo types/ident.cmo compile/lambda.cmo types/patterns.cmi \
typing/typed.cmo types/types.cmi
typing/typed.cmo types/types.cmi compile/compile.cmi
compile/compile.cmx: misc/q_symbol.cmo types/ident.cmx compile/lambda.cmx types/patterns.cmx \
typing/typed.cmx types/types.cmx
typing/typed.cmx types/types.cmx compile/compile.cmi
compile/operators.cmo: misc/q_symbol.cmo misc/custom.cmo runtime/eval.cmi compile/lambda.cmo \
parser/location.cmi misc/pool.cmi typing/typer.cmi types/types.cmi \
runtime/value.cmi compile/operators.cmi
......@@ -178,7 +178,7 @@ types/builtin.cmx: misc/q_symbol.cmo types/atoms.cmx types/builtin_defs.cmx type
parser/location.cmx misc/ns.cmx compile/operators.cmx \
runtime/print_xml.cmx types/sequence.cmx typing/typer.cmx types/types.cmx \
runtime/value.cmx types/builtin.cmi
driver/cduce.cmo: misc/q_symbol.cmo parser/ast.cmo types/builtin.cmi compile/compile.cmo \
driver/cduce.cmo: misc/q_symbol.cmo parser/ast.cmo types/builtin.cmi compile/compile.cmi \
runtime/eval.cmi runtime/explain.cmi types/ident.cmo parser/location.cmi \
misc/ns.cmi parser/parser.cmi types/patterns.cmi types/sample.cmi \
misc/state.cmi typing/typed.cmo typing/typer.cmi types/types.cmi \
......@@ -231,6 +231,7 @@ runtime/run_dispatch.cmi: misc/q_symbol.cmo types/patterns.cmi runtime/value.cmi
runtime/explain.cmi: misc/q_symbol.cmo types/types.cmi runtime/value.cmi
runtime/eval.cmi: misc/q_symbol.cmo types/ident.cmo compile/lambda.cmo typing/typed.cmo \
runtime/value.cmi
compile/compile.cmi: misc/q_symbol.cmo types/ident.cmo compile/lambda.cmo typing/typed.cmo
compile/operators.cmi: misc/q_symbol.cmo misc/custom.cmo parser/location.cmi misc/serialize.cmi \
typing/typer.cmi types/types.cmi runtime/value.cmi
types/builtin.cmi: misc/q_symbol.cmo typing/typer.cmi
......
......@@ -5,7 +5,7 @@ let quiet = ref false
let toplevel = ref false
let typing_env = State.ref "Cduce.typing_env" Builtin.env
let eval_env = State.ref "Cduce.eval_env" Env.empty
let eval_env = State.ref "Cduce.eval_env" Eval.empty
let compile_env = State.ref "Cduce.compile_env" Compile.empty
let do_compile = ref false
......@@ -13,7 +13,7 @@ let do_compile = ref false
let get_global_value v =
if !do_compile
then Eval.L.eval_var (Compile.find v !compile_env)
else Env.find v !eval_env
else Eval.find_value v !eval_env
let get_global_type v =
Typer.find_value v !typing_env
......@@ -23,7 +23,7 @@ let enter_global_value x v t =
if !do_compile
then (compile_env := Compile.enter_global !compile_env x; Eval.L.push v)
else eval_env := Env.add x v !eval_env
else eval_env := Eval.enter_value x v !eval_env
let rec is_abstraction = function
| Ast.Abstraction _ -> true
......@@ -44,20 +44,18 @@ let print_protect ppf s =
let print_value ppf v =
Location.protect ppf (fun ppf -> Value.print ppf v)
let dump_value ppf x t v =
Format.fprintf ppf "@[val %a : @[%a = %a@]@]@."
U.print (Id.value x) print_norm t print_value v
let dump_env ppf =
Format.fprintf ppf "Types:%a@." Typer.dump_types !typing_env;
Format.fprintf ppf "Namespace prefixes:@\n%a" Typer.dump_ns !typing_env;
Format.fprintf ppf "Namespace prefixes used for pretty-printing:@.%t"
Ns.InternalPrinter.dump;
Format.fprintf ppf "Values:@.";
Env.iter
(fun x v ->
let t = Typer.find_value x !typing_env in
Format.fprintf ppf "@[val %a : @[%a = %a@]@]@."
U.print (Id.value x) print_norm t print_value v
)
!eval_env;
Eval.L.dump ppf
Typer.iter_values !typing_env
(fun x t -> dump_value ppf x t (get_global_value x))
let rec print_exn ppf = function
| Location (loc, w, exn) ->
......@@ -114,47 +112,65 @@ let rec print_exn ppf = function
Format.fprintf ppf "%a@." print_protect (Printexc.to_string exn)
let insert_bindings ppf =
List.iter2
(fun (x,t) (y,v) ->
assert (x = y);
typing_env := Typer.enter_value x t !typing_env;
eval_env := Env.add x v !eval_env;
if not !quiet then
Format.fprintf ppf "val %a : @[@[%a@] =@ @[%a@]@]@."
U.print (Id.value x) print_norm t print_value v)
let display ppf =
List.iter
(fun x ->
let t = get_global_type x in
let v = get_global_value x in
if not !quiet then
Format.fprintf ppf "val %a : @[@[%a@] =@ @[%a@]@]@."
U.print (Id.value x) print_norm t print_value v)
let display ppf l =
if not !quiet then
List.iter
(fun (x,t) -> dump_value ppf x t (get_global_value x))
l
let eval ppf e =
let (fv,e) = Typer.expr !typing_env e in
let e = Typer.expr !typing_env e in
let t = Typer.type_check !typing_env e Types.any true in
Typer.report_unused_branches ();
if not !quiet then
Location.dump_loc ppf (e.Typed.exp_loc,`Full);
let v =
if !do_compile then
let e = Compile.compile !compile_env false e in
Eval.L.eval e
else
Eval.eval !eval_env e
in
if not !quiet then
Format.fprintf ppf "- : @[@[%a@] =@ @[%a@]@]@."
print_norm t print_value v;
v
if !do_compile then
let e = Compile.compile !compile_env false e in
let v = Eval.L.eval e in
if not !quiet then
Format.fprintf ppf "- : @[@[%a@] =@ @[%a@]@]@."
print_norm t print_value v;
v
else
let v = Eval.eval !eval_env e in
if not !quiet then
Format.fprintf ppf "- : @[@[%a@] =@ @[%a@]@]@."
print_norm t print_value v;
v
let let_decl ppf p e =
let decl = Typer.let_decl !typing_env p e in
let typs = Typer.type_let_decl !typing_env decl in
Typer.report_unused_branches ();
let () =
if !do_compile then
let (env,decl) = Compile.compile_let_decl !compile_env decl in
Eval.L.eval_let_decl decl;
compile_env := env
else
eval_env := Eval.eval_let_decl !eval_env decl
in
typing_env := Typer.enter_values typs !typing_env;
display ppf typs
let let_funs ppf funs =
let funs = List.map (Typer.expr !typing_env) funs in
let typs = Typer.type_rec_funs !typing_env funs in
Typer.report_unused_branches ();
let () =
if !do_compile then
let (env,funs) = Compile.compile_rec_funs !compile_env funs in
Eval.L.eval_rec_funs funs;
compile_env := env;
else
eval_env := Eval.eval_rec_funs !eval_env funs
in
typing_env := Typer.enter_values typs !typing_env;
display ppf typs
let debug ppf = function
| `Subtype (t1,t2) ->
......@@ -198,27 +214,10 @@ let debug ppf = function
| None ->
Format.fprintf ppf "Explanation: value has given type@.")
let rec collect_funs ppf accu = function
| { descr = Ast.FunDecl e } :: rest ->
let (_,e) = Typer.expr !typing_env e in
collect_funs ppf (e::accu) rest
| rest ->
let typs = Typer.type_rec_funs !typing_env accu in
Typer.report_unused_branches ();
if !do_compile then
let (names,env,funs) = Compile.compile_rec_funs !compile_env accu in
Eval.L.eval_rec_funs funs;
typing_env := Typer.enter_values typs !typing_env;
compile_env := env;
display ppf names
else (
let vals = Eval.eval_rec_funs !eval_env accu in
insert_bindings ppf typs vals);
rest
let rec collect_funs ppf accu = function
| { descr = Ast.FunDecl e } :: rest -> collect_funs ppf (e::accu) rest
| rest -> let_funs ppf accu; rest
let rec collect_types ppf accu = function
| { descr = Ast.TypeDecl (x,t) } :: rest ->
......@@ -244,20 +243,7 @@ let rec phrases ppf phs = match phs with
ignore (eval ppf e);
phrases ppf rest
| { descr = Ast.LetDecl (p,e) } :: rest ->
let decl = Typer.let_decl !typing_env p e in
let typs = Typer.type_let_decl !typing_env decl in
Typer.report_unused_branches ();
if !do_compile then
let (names,env,decl) = Compile.compile_let_decl !compile_env decl in
Eval.L.eval_let_decl decl;
typing_env := Typer.enter_values typs !typing_env;
compile_env := env;
display ppf names
else
(let vals = Eval.eval_let_decl !eval_env decl in
insert_bindings ppf typs vals);
let_decl ppf p e;
phrases ppf rest
| { descr = Ast.Debug l } :: rest ->
debug ppf l;
......
......@@ -398,13 +398,17 @@ EXTEND
| x = regexp; "?" -> Alt (x, Epsilon)
| x = regexp; "??" -> Alt (Epsilon, x) ]
| [ "("; x = LIST1 regexp SEP ","; ")" ->
let x =
List.map
(function
| Elem x -> x
| _ -> error loc "Mixing regular expressions and products")
x in
Elem (multi_prod loc x)
(match x with
| [ x ] -> x
| _ ->
let x =
List.map
(function
| Elem x -> x
| _ -> error loc
"Mixing regular expressions and products")
x in
Elem (multi_prod loc x))
| "("; a = IDENT; ":="; c = expr; ")" ->
Elem (mk loc (Constant ((ident a,c))))
| IDENT "PCDATA" -> string_regexp
......
......@@ -5,9 +5,17 @@ open Ident
exception MultipleDeclaration of id
type env = t Env.t
let empty = Env.empty
let eval_unary_op = ref (fun _ -> assert false)
let eval_binary_op = ref (fun _ _ -> assert false)
let enter_value = Env.add
let enter_values l env =
List.fold_left (fun env (x,v) -> Env.add x v env) env l
let find_value = Env.find
(* To write tail-recursive map-like iteration *)
let make_accu () = Pair(nil,Absent)
......@@ -105,8 +113,12 @@ and eval_let_decl env l =
let v = eval env l.Typed.let_body in
let (disp,bind) = Typed.dispatcher_let_decl l in
let (_,bindings) = run_dispatcher disp v in
List.map
(fun (x,i) -> (x, if (i == -1) then v else bindings.(i)))
List.fold_left
(fun env (x,i) ->
let v = if (i == -1) then v else bindings.(i) in
enter_value x v env
)
env
(IdMap.get bind)
and eval_rec_funs env l =
......@@ -122,7 +134,8 @@ and eval_rec_funs env l =
List.fold_left
(fun env (f, _ ,s) -> Env.add f (Delayed s) env)
env slots in
List.map (fun (f, e, s) -> s := eval env' e; (f, !s)) slots
List.iter (fun (_, e, s) -> s := eval env' e) slots;
env'
and eval_map env brs v =
map (eval_map_aux env brs) v
......@@ -135,6 +148,9 @@ and eval_map_aux env brs acc = function
eval_map_aux env brs acc' y
| String_latin1 (_,_,_,_) | String_utf8 (_,_,_,_) as v ->
eval_map_aux env brs acc (normalize v)
| Concat (x,y) ->
let acc = eval_map_aux env brs acc x in
eval_map_aux env brs acc y
| _ -> acc
and eval_transform env brs v =
......@@ -150,6 +166,9 @@ and eval_transform_aux env brs acc = function
if Types.Char.is_empty (brs.Typed.br_accept)
then eval_transform_aux env brs acc q
else eval_transform_aux env brs acc (normalize v)
| Concat (x,y) ->
let acc = eval_transform_aux env brs acc x in
eval_transform_aux env brs acc y
| _ -> acc
......@@ -171,6 +190,9 @@ and eval_xtrans_aux env brs acc = function
set_cdr acc acc';
eval_xtrans_aux env brs acc' q
else eval_xtrans_aux env brs acc (normalize v)
| Concat (x,y) ->
let acc = eval_xtrans_aux env brs acc x in
eval_xtrans_aux env brs acc y
| Pair (x,y) ->
let acc =
match eval_branches env brs x with
......@@ -425,6 +447,9 @@ and eval_map_aux env brs acc = function
eval_map_aux env brs acc' y
| Value.String_latin1 (_,_,_,_) | Value.String_utf8 (_,_,_,_) as v ->
eval_map_aux env brs acc (normalize v)
| Value.Concat (x,y) ->
let acc = eval_map_aux env brs acc x in
eval_map_aux env brs acc y
| _ -> acc
and eval_transform env brs v =
......@@ -439,6 +464,9 @@ and eval_transform_aux env brs acc = function
if not brs.brs_accept_chars
then eval_transform_aux env brs acc v
else eval_transform_aux env brs acc (normalize v)
| Value.Concat (x,y) ->
let acc = eval_transform_aux env brs acc x in
eval_transform_aux env brs acc y
| _ -> acc
......@@ -460,6 +488,9 @@ and eval_xtrans_aux env brs acc = function
set_cdr acc acc';
eval_xtrans_aux env brs acc' q
else eval_xtrans_aux env brs acc (normalize v)
| Value.Concat (x,y) ->
let acc = eval_xtrans_aux env brs acc x in
eval_xtrans_aux env brs acc y
| Value.Pair (x,y) ->
let acc =
match eval_branches env brs x with
......
......@@ -2,11 +2,17 @@ open Value
open Ident
exception MultipleDeclaration of id
type env = t Env.t
type env
val empty: env
val enter_value: id -> t -> env -> env
val enter_values: (id * t) list -> env -> env
val find_value: id -> env -> t
val eval: env -> Typed.texpr -> t
val eval_let_decl: env -> Typed.let_decl -> (id * t) list
val eval_rec_funs: env -> Typed.texpr list -> (id * t) list
val eval_let_decl: env -> Typed.let_decl -> env
val eval_rec_funs: env -> Typed.texpr list -> env
val eval_unary_op: (int -> (t -> t)) ref
......
......@@ -69,6 +69,7 @@ let string_of_xml ~utf8 ns_table v =
| String_utf8 (_,_,_,q)
| String_latin1 (_,_,_,q) -> register_content q
| Pair (x, q) -> register_elt x; register_content q
| Concat (x,y) -> register_content x; register_content y
| _ -> ()
in
register_elt v;
......
......@@ -169,6 +169,7 @@ and run_disp_kind actions v =
actions.basic
| Absent ->
run_disp_basic v (fun t -> Types.Record.has_absent t) actions.basic
| Concat (_,_) as v -> run_disp_kind actions (Value.normalize v)
| Delayed _ -> assert false
......
......@@ -12,6 +12,7 @@ type t =
| Abstraction2 of t array * (Types.t * Types.t) list * Lambda.branches
| String_latin1 of int * int * string * t
| String_utf8 of Utf8.uindex * Utf8.uindex * Utf8.t * t
| Concat of t * t
| Absent
| Delayed of t ref
......@@ -26,6 +27,12 @@ let vtrue = Atom (Atoms.V.mk_ascii "true")
let vfalse = Atom (Atoms.V.mk_ascii "false")
let vbool x = if x then vtrue else vfalse
(* TODO: namespaces for the two following functions *)
let vrecord l =
let l = List.map (fun (l,v) -> LabelPool.mk (Ns.empty, U.mk l), v) l in
......@@ -42,15 +49,11 @@ let rec sequence = function
| [] -> nil
| h::t -> Pair (h, sequence t)
let rec concat l1 l2 = match l1 with
| Pair (x,y) -> Pair (x, concat y l2)
| String_latin1 (s,i,j,q) -> String_latin1 (s,i,j, concat q l2)
| String_utf8 (s,i,j,q) -> String_utf8 (s,i,j, concat q l2)
| q -> l2
let concat v1 v2 =
match (v1,v2) with
| (Atom _, v) | (v, Atom _) -> v
| (v1,v2) -> Concat (v1,v2)
let rec flatten = function
| Pair (x,y) -> concat x (flatten y)
| q -> q
let rec const = function
| Types.Integer i -> Integer i
......@@ -63,6 +66,89 @@ let rec const = function
| Types.String (i,j,s,c) -> String_utf8 (i,j,s, const c)
let normalize_string_latin1 i j s q =
if i = j then q else
Pair (Char (Chars.V.mk_char (String.unsafe_get s i)), String_latin1 (succ i,j,s,q))
let normalize_string_utf8 i j s q =
if Utf8.equal_index i j then q
else
let (c,i) = Utf8.next s i in
Pair (Char (Chars.V.mk_int c), String_utf8 (i,j,s,q))
(***** The dirty things **********)
type pair = { dummy : t; mutable pair_tl : t }
type str = { dummy1 : t; dummy2 : t; dummy3 : t; mutable str_tl : t }
(* Could optimize this function by changing the order of the fields
in String_latin1, String_utf8 *)
let set_cdr cell tl =
match cell with
| Pair (_,_) -> (Obj.magic cell).pair_tl <- tl
| String_latin1 (_,_,_,_)
| String_utf8(_,_,_,_)-> (Obj.magic cell).str_tl <- tl
| _ -> assert false
let rec append_cdr cell tl =
match tl with
| Concat (x,y) ->
append_cdr (append_cdr cell x) y
| Pair (x,tl) ->
let cell' = Pair (x,Absent) in
set_cdr cell cell';
append_cdr cell' tl
| String_latin1 (s,i,j,tl) ->
let cell' = String_latin1 (s,i,j,Absent) in
set_cdr cell cell';
append_cdr cell' tl
| String_utf8 (s,i,j,tl) ->
let cell' = String_utf8 (s,i,j,Absent) in
set_cdr cell cell';
append_cdr cell' tl
| _ -> cell
let rec flatten = function
| Pair (x,y) -> concat x (flatten y)
| Concat (x,y) -> concat (flatten x) (flatten y)
| q -> q
let eval_lazy_concat v =
let accu = Obj.magic (Pair (nil,Absent)) in
let rec aux accu = function
| Concat (x,y) -> aux (append_cdr accu x) y
| v -> set_cdr accu v
in
aux accu v;
let nv = match snd accu with
| Pair (_,_) as nv -> nv
| String_latin1 (i,j,s,q) -> normalize_string_latin1 i j s q
| String_utf8 (i,j,s,q) -> normalize_string_utf8 i j s q
| _ -> assert false in
let v = Obj.repr v in
let nv = Obj.repr nv in
Obj.set_tag v (Obj.tag nv);
Obj.set_field v 0 (Obj.field nv 0);
Obj.set_field v 1 (Obj.field nv 1)
(******************************)
let normalize = function
| String_latin1 (i,j,s,q) -> normalize_string_latin1 i j s q
| String_utf8 (i,j,s,q) -> normalize_string_utf8 i j s q
| Concat (_,_) as v -> eval_lazy_concat v; v
| v -> assert false
let buf = Buffer.create 100
let rec add_buf_utf8_to_latin1 src i j =
......@@ -83,6 +169,7 @@ let get_string_latin1 e =
| Pair (Char x,y) -> Buffer.add_char buf (Chars.V.to_char x); aux y
| String_latin1 (i,j,src,y) -> Buffer.add_substring buf src i (j - i); aux y
| String_utf8 (i,j,src,y) -> add_buf_utf8_to_latin1 src i j; aux y
| Concat (_,_) as v -> eval_lazy_concat v; aux v
| _ -> () in
aux e;
let s = Buffer.contents buf in
......@@ -94,6 +181,7 @@ let get_string_utf8 e =
| Pair (Char x,y) -> Utf8.store buf (Chars.V.to_int x); aux y
| String_latin1 (i,j,src,y) -> add_buf_latin1_to_utf8 src i j; aux y
| String_utf8 (i,j,src,y) -> Utf8.copy buf src i j; aux y
| Concat (_,_) as v -> eval_lazy_concat v; aux v
| q -> q in
let q = aux e in
let s = Buffer.contents buf in
......@@ -108,16 +196,14 @@ let rec is_seq = function
| Pair (_, y) when is_seq y -> true
| Atom a when a = Sequence.nil_atom -> true
| String_latin1 (_,_,_,y) | String_utf8 (_,_,_,y) when is_seq y -> true
| _ -> false
let is_xml = function
| Pair (Atom _, Pair (Record _, s)) when is_seq s -> true
| Concat (_,_) as v -> eval_lazy_concat v; is_seq v
| _ -> false
let rec is_str = function
| Pair (Char _, y) -> is_str y
| Atom a when a = Sequence.nil_atom -> true
| String_latin1 (_,_,_,q) | String_utf8(_,_,_,q) -> is_str q
| Concat (_,_) as v -> eval_lazy_concat v; is_str v
| _ -> false
let rec print ppf v =
......@@ -140,6 +226,8 @@ let rec print ppf v =
| String_utf8 (i,j,s,q) ->
Format.fprintf ppf "<string_utf8:%i-%i,%S,%a>"
(Utf8.get_idx i) (Utf8.get_idx j) (Utf8.get_str s) print q
| Concat (x,y) ->
Format.fprintf ppf "<concat:%a;%a>" print x print y
| Absent ->
Format.fprintf ppf "<[absent]>"
| Delayed x ->
......@@ -203,21 +291,6 @@ and print_record ppf = function
and print_field ppf (l,v) =
Format.fprintf ppf "%a=%a" Label.print (LabelPool.value l) print v
let normalize_string_latin1 i j s q =
if i = j then q else
Pair (Char (Chars.V.mk_char (String.unsafe_get s i)), String_latin1 (succ i,j,s,q))
let normalize_string_utf8 i j s q =
if Utf8.equal_index i j then q
else
let (c,i) = Utf8.next s i in
Pair (Char (Chars.V.mk_int c), String_utf8 (i,j,s,q))
let normalize = function
| String_latin1 (i,j,s,q) -> normalize_string_latin1 i j s q
| String_utf8 (i,j,s,q) -> normalize_string_utf8 i j s q
| v -> assert false
let rec compare x y =
if (x == y) then 0
else
......@@ -241,6 +314,8 @@ let rec compare x y =
raise (CDuceExn (string_latin1 "comparing functional values"))
| Absent,_ | _,Absent
| Delayed _, _ | _, Delayed _ -> assert false
| Concat (_,_) as x, y -> eval_lazy_concat x; compare x y
| x, (Concat (_,_) as y) -> eval_lazy_concat y; compare x y