Commit 07bc7bc1 authored by Kim Nguyễn's avatar Kim Nguyễn

Implement the syntax ``t (t1, …, tn)'' for type instantiation, without

requiring an abscence of whitespace between ``t'' and ``(''.  Outside
of regular expression contexts, ``t (t1, …, tn)'' is parsed with a
higher precedence than & and \, to allow one to write
``t (Int) & Bool'' without extra parentheses (i.e.
``(t (Int)) & Bool'').  Inside a regular expression, type
instantiation and sequencing become ambiguous, and there is no way to
distinguish syntactically: ``[ Int (Int, Int) ]'' from
``[ t (Int, Int) ]''. The former should resolve to a sequence while
the latter only makes sense as an instantiation (if ``t'' is a
parametric type). Both are treated as element sequencing and
disambiguated during identifier resolution (more precisely during the
"derecurse" phase, before typechecking).

Note that due to the lower precedence of sequencing w.r.t to other
regular expression constructs, a type ``[ t (Int)* ]'' will be parsed
correctly, but yield an error message saying that t is not fully
intantiated. One has to write ``[ (t (Int))* ]'' which is similar to
function applications for expressions.

Finally, we also re-order sequencing after typing to always group a
potential type instantiation with its argument, i.e. we transform
sequences such as
``[ t1 t2 t3 ... tn ]'' (which are parsed as
``[ (((t1 t2) t3) ... tn) ]'' because sequence concatenation is
left-associative) into ``[ ... (ti tj) ... ]'' if ``ti'' is an
identifier and ``tj'' is of the form ``(s1,...,sk)''. This is sound
because concatenation of regular expression is associative (and the
original sequence would fail, anyway).
parent bd1de27e
......@@ -26,7 +26,7 @@ and debug_directive =
[ `Filter of ppat * ppat
| `Sample of ppat
| `Accept of ppat
| `Compile of ppat * ppat list
| `Compile of ppat * ppat list
| `Subtype of ppat * ppat
| `Single of ppat
| `Typed of pexpr
......@@ -45,14 +45,14 @@ and toplevel_directive =
| `Builtins
]
and pexpr =
and pexpr =
| LocatedExpr of loc * pexpr
(* CDuce is a Lambda-calculus ... *)
| Var of U.t
| Apply of pexpr * pexpr
| Abstraction of abstr
(* Data constructors *)
| Const of Types.Const.t
| Integer of Intervals.V.t
......@@ -62,7 +62,7 @@ and pexpr =
| Xml of pexpr * pexpr
| RecordLitt of (label * pexpr) list
| String of U.uindex * U.uindex * U.t * pexpr
(* Data destructors *)
| Match of pexpr * branches
| Map of pexpr * branches
......@@ -79,7 +79,7 @@ and pexpr =
(* Other *)
| NamespaceIn of U.t * ns_expr * pexpr
| KeepNsIn of bool * pexpr
| Forget of pexpr * ppat
| Forget of pexpr * ppat
| Check of pexpr * ppat
| Ref of pexpr * ppat
......@@ -88,15 +88,15 @@ and pexpr =
and label = U.t
and abstr = {
fun_name : lident option;
and abstr = {
fun_name : lident option;
fun_iface : (ppat * ppat) list;
fun_body : branches
}
and branches = (ppat * pexpr) list
(* A common syntactic class for patterns and types *)
(* A common syntactic class for patterns and types *)
and ppat = ppat' located
and ppat' =
......@@ -117,6 +117,7 @@ and ppat' =
| Regexp of regexp
| Concat of ppat * ppat
| Merge of ppat * ppat
| Group of ppat
and regexp =
| Epsilon
......@@ -127,11 +128,24 @@ and regexp =
| Star of regexp
| WeakStar of regexp
| SeqCapture of lident * regexp
| Arg of regexp
let pat_true = mknoloc (Internal Builtin_defs.true_type)
let pat_false = mknoloc (Internal Builtin_defs.false_type)
let pat_false = mknoloc (Internal Builtin_defs.false_type)
let cst_true = Const (Types.Atom Builtin_defs.true_atom)
let cst_false = Const (Types.Atom Builtin_defs.false_atom)
let cst_nil = Const Sequence.nil_cst
let cst_nil = Const Sequence.nil_cst
let pat_nil = mknoloc (Internal (Sequence.nil_type))
let rec prod_to_list p =
match p.descr with
Prod(p1, p2) -> p1 :: (prod_to_list p2)
| _ -> [ p ]
let re_seq e1 e2 =
match e1, e2 with
Epsilon, _ -> e2
| _, Epsilon -> e1
| _ -> Seq (e1, e2)
......@@ -5,9 +5,7 @@ open Ident
open Printf
open Ulexer
(*
let () = Grammar.error_verbose := true
*)
(* let () = Camlp4_config.verbose := true *)
let tloc (i,j) = (i,j)
let nopos = (-1,-1)
......@@ -134,6 +132,11 @@ let get_ref e = Apply (Dot (e, U.mk "get"), cst_nil)
let let_in e1 p e2 = Match (e1, [p,e2])
let seq e1 e2 = let_in e1 pat_nil e2
let concat e1 e2 = apply_op2_noloc "@" e1 e2
let opt_to_list = function None -> [] | Some l -> l
let clean_pvar x =
match x.[0] with
| '(' -> String.sub x 2 (String.length x - 2)
| _ -> x
EXTEND Gram
GLOBAL: top_phrases prog expr pat regexp keyword;
......@@ -152,10 +155,10 @@ EXTEND Gram
[ mk _loc (LetDecl (p,e)) ]
| (_,p,e1) = let_binding; "in"; e2 = expr LEVEL "top"->
[ mk _loc (EvalStatement (exp _loc (let_in e1 p e2))) ]
| "type"; x = IDENTPAR; pargs = LIST1 [ x = PVAR -> ident x ] SEP ","; ")"; "="; t = pat ->
[ mk _loc (TypeDecl ((lop _loc,ident x),pargs,t)) ]
| "type"; x = located_ident; "="; t = pat ->
[ mk _loc (TypeDecl (x,[],t)) ]
| "type"; x = located_ident;
args = OPT [ "("; l = LIST1 [ v = PVAR -> U.mk (clean_pvar v) ] SEP ","; ")" -> l ];
"="; t = pat ->
[ mk _loc (TypeDecl (x, opt_to_list args ,t)) ]
| "using"; name = IDENT; "="; cu = [ x = IDENT -> x | x = STRING -> x ] ->
[ mk _loc (Using (U.mk name, U.mk cu)) ]
| "open"; ids = LIST1 ident_or_keyword SEP "." ->
......@@ -355,11 +358,7 @@ EXTEND Gram
exp _loc body
]
| [
a = IDENTPAR; l = LIST1 expr SEP ","; ")" ->
let e1 = exp _loc (Var (ident a)) in
let e2 = exp _loc (tuple l) in
exp _loc (Apply (e1,e2))
| e1 = SELF; IDENT "div"; e2 = expr -> apply_op2 _loc "/" e1 e2
e1 = SELF; IDENT "div"; e2 = expr -> apply_op2 _loc "/" e1 e2
| e1 = SELF; IDENT "mod"; e2 = expr -> apply_op2 _loc "mod" e1 e2
| e1 = SELF; e2 = expr -> exp _loc (Apply (e1,e2))
]
......@@ -521,11 +520,10 @@ EXTEND Gram
fun_decl: [
[ f = OPT located_ident; "("; (a,b) = fun_decl_after_lparen -> (f,a,b)
| f = IDENTPAR; (a,b) = fun_decl_after_lparen -> (Some (lop _loc,ident f),a,b)
]
];
arrow: [
arrow: [
[ t1 = pat LEVEL "no_arrow"; "->"; t2 = pat -> (t1,t2)]
];
......@@ -541,15 +539,26 @@ EXTEND Gram
[ x = regexp; "|"; y = regexp ->
match (x,y) with
| Elem x, Elem y -> Elem (mk _loc (Or (x,y)))
| _ -> Alt (x,y)
| _ -> Alt (x, y)
]
| [ x = regexp; y = regexp -> Seq (x,y) ]
| [ x = regexp; "&"; y = regexp ->
match (x,y) with
| Elem x, Elem y -> Elem (mk _loc (And (x,y)))
| _ -> error _loc "Conjunction not allowed in regular expression"
| [ x = regexp; y = regexp -> re_seq x y ]
| [ x = regexp ; op = [ "&" -> "&" | "\\" -> "\\" ] ; y = regexp ->
let rec unseq e = match e with
Elem _ -> e
| Seq(Elem { descr = PatVar (ids, []); loc}, e0) ->
let ue0 = unseq e0 in begin
match ue0 with
| Elem p -> Elem {loc; descr = PatVar(ids, prod_to_list p) }
| _ -> e
end
| _ -> e
in
match unseq x, unseq y, op with
| Elem x, Elem y, "&" -> Elem (mk _loc (And (x,y)))
| Elem x, Elem y, _ -> Elem (mk _loc (Diff (x,y)))
| _ -> error _loc ( op ^ " not allowed in regular expression")
]
| [ a = IDENT; "::"; x = regexp -> SeqCapture ((lop _loc,ident a),x) ]
| "capture" [ a = IDENT; "::"; x = regexp -> SeqCapture ((lop _loc,ident a),x) ]
| [ x = regexp; "*" -> Star x
| x = regexp; "*?" -> WeakStar x
| x = regexp; "+" -> Seq (x, Star x)
......@@ -559,7 +568,7 @@ EXTEND Gram
| x = regexp; "**"; i = INT ->
let rec aux i accu =
if (i = 0) then accu
else aux (pred i) (Seq (x, accu))
else aux (pred i) (re_seq x accu)
in
let i =
try
......@@ -573,17 +582,19 @@ EXTEND Gram
aux i Epsilon
]
| [ "("; x = LIST1 regexp SEP ","; ")" ->
(match x with
| [ x ] -> x
| _ ->
let x =
List.map
(function
| Elem x -> x
| _ -> error _loc
"Mixing regular expressions and products")
x in
Elem (multi_prod _loc x))
(match x with
| [ Elem x ] -> Arg (Elem x)
| [ x ] -> x
| _ ->
let x =
List.map
(function
| Elem x -> x
| _ -> error _loc
"Mixing regular expressions and products")
x
in
Arg (Elem (multi_prod _loc x)))
| "("; a = IDENT; ":="; c = expr; ")" ->
Elem (mk _loc (Constant ((ident a,c))))
| "/"; p = pat LEVEL "simple" -> Guard p
......@@ -597,10 +608,14 @@ EXTEND Gram
(fun c accu ->
let c = Chars.V.mk_int c in
let c = Chars.atom c in
Seq (Elem (mknoloc (Internal (Types.char c))), accu))
re_seq (Elem (mknoloc (Internal (Types.char c)))) accu)
(seq_of_string s)
Epsilon ]
| [ e = pat LEVEL "simple" -> Elem e ]
| [ e = pat LEVEL "basic" ->
match e.descr with
Group p -> Arg (Elem p)
| _ -> Elem e
]
];
schema_ref: [
......@@ -620,10 +635,18 @@ EXTEND Gram
| "no_arrow" [ x = pat; "|"; y = pat -> mk _loc (Or (x,y)) ]
| "simple" [ x = pat; "&"; y = pat -> mk _loc (And (x,y))
| x = pat; "\\"; y = pat -> mk _loc (Diff (x,y)) ]
| "var" [ x = PVAR ->
mk _loc (Internal (Types.var (Var.mk (ident_aux x)))) ]
|
[ "{"; r = record_spec; "}" -> r
| "instance" [ ids = LIST1 ident_or_keyword SEP ".";
args = OPT [ "("; l = LIST1 pat SEP ","; ")" -> l ] ->
mk _loc (PatVar (List.map ident ids, opt_to_list args)) ]
| "basic" [
x = PVAR ->
let nx = clean_pvar x in
let res = mk _loc (Internal (Types.var (Var.mk (ident_aux nx)))) in
if x == nx then res else
mk _loc (Group (res))
| ids = LIST1 ident_or_keyword SEP "." ->
mk _loc (PatVar (List.map ident ids, []))
| "{"; r = record_spec; "}" -> r
| "ref"; p = pat ->
let get_fun = mk _loc (Arrow (pat_nil, p))
and set_fun = mk _loc (Arrow (p, pat_nil))in
......@@ -635,10 +658,6 @@ EXTEND Gram
mk _loc (Constant (ident a,c))
| "!"; a = IDENT ->
mk _loc (Internal (Types.abstract (Types.Abstracts.atom a)))
| id = IDENTPAR; pargs = LIST1 pat SEP ","; ")" ->
mk _loc (PatVar ([ident id],pargs))
| ids = LIST1 ident_or_keyword SEP "." ->
mk _loc (PatVar (List.map ident ids,[]))
| i = INT ; "--"; j = INT ->
let i = Intervals.V.mk i
and j = Intervals.V.mk j in
......@@ -657,17 +676,17 @@ EXTEND Gram
| i = char ; "--"; j = char ->
mk _loc (Internal (Types.char (Chars.char_class i j)))
| "`"; c = tag_type -> c
| "("; l = LIST1 pat SEP ","; ")" -> multi_prod _loc l
| "("; l = LIST1 pat SEP ","; ")" -> mk _loc (Group (multi_prod _loc l))
| "["; r = [ r = regexp -> r | -> Epsilon ];
q = [ ";"; q = pat -> Some q | -> None ];
"]" ->
let r = match q with
| Some q ->
let any = mk _loc (Internal (Types.any)) in
Seq(r,Seq(Guard q, Star (Elem any)))
| None -> r
in
mk _loc (Regexp r)
"]" ->
let r = match q with
| Some q ->
let any = mk _loc (Internal (Types.any)) in
Seq(r,Seq(Guard q, Star (Elem any)))
| None -> r
in
mk _loc (Regexp r)
| "<"; t =
[ x = tag_type -> x
| "("; t = pat; ")" -> t ];
......
......@@ -205,11 +205,6 @@ let return_loc i j tok = (tok, (i,j))
let rec token = lexer
| xml_blank+ -> token lexbuf
(* TODO: Fix bug #40 *)
| qname "(" ->
let s = L.utf8_sub_lexeme lexbuf 0 (L.lexeme_length lexbuf - 1) in
return lexbuf (IDENTPAR s)
(* END #40 *)
| qname ->
let s = L.utf8_lexeme lexbuf in
return lexbuf (IDENT s)
......@@ -266,9 +261,6 @@ let rec token = lexer
and token2 = lexer
| xml_blank+ -> token2 lexbuf
| qname "(" ->
let s = L.utf8_sub_lexeme lexbuf 0 (L.lexeme_length lexbuf - 1) in
return lexbuf (IDENTPAR s)
| qname ->
let s = L.utf8_lexeme lexbuf in
return lexbuf (IDENT s)
......@@ -314,7 +306,7 @@ and token2 = lexer
(try String.index s '\t' with _ -> len))
(try String.index s ')' with _ -> len) in
let s = String.sub s 0 idend in
return lexbuf (PVAR s)
return lexbuf (PVAR ("()"^s)) (* UGLY hack to not loose the "("; ")" tokens. *)
| "'" ncname ->
let s = L.utf8_lexeme lexbuf in
let s = String.sub s 1 (String.length s - 1) in
......@@ -336,9 +328,6 @@ and token2 = lexer
and token2toplevel = lexer
| xml_blank+ -> token2toplevel lexbuf
| qname "(" ->
let s = L.utf8_sub_lexeme lexbuf 0 (L.lexeme_length lexbuf - 1) in
return lexbuf (IDENTPAR s)
| qname ->
let s = L.utf8_lexeme lexbuf in
return lexbuf (IDENT s)
......@@ -384,7 +373,7 @@ and token2toplevel = lexer
(try String.index s '\t' with _ -> len))
(try String.index s ')' with _ -> len) in
let s = String.sub s 0 idend in
return lexbuf (PVAR s)
return lexbuf (PVAR ("()" ^ s)) (* UGLY hack to not loose the "("; ")" tokens. *)
| "'" ncname ->
let s = L.utf8_lexeme lexbuf in
let s = String.sub s 1 (String.length s - 1) in
......
......@@ -318,7 +318,7 @@ module IType = struct
type penv = {
penv_tenv : t;
penv_derec : node Env.t;
penv_derec : (node * U.t list) Env.t;
penv_var : (string, Var.var) Hashtbl.t;
}
......@@ -341,6 +341,51 @@ module IType = struct
all_delayed := [];
List.iter check_one_delayed l
let seq_to_list e =
let rec loop e acc =
match e with
| Seq(e1, e2) -> loop e1 (loop e2 acc)
| _ -> e :: acc
in
loop e []
let list_to_seq l =
let rec loop l acc =
match l with
[] -> acc
| ((Elem { descr = PatVar(_, []); _} ) as var) ::
((Arg _) as arg) :: rest -> loop rest (re_seq acc (re_seq var arg))
| (Arg r) :: rest | r :: rest -> loop rest (re_seq acc r)
in
loop l Epsilon
let rec clean_re e =
match e with
| Seq(_,_) -> let l = seq_to_list e in
let l =
List.map (function Arg e -> Arg (clean_re e) | e -> clean_re e) l
in
list_to_seq l
| Alt(e1, e2) -> Alt (clean_re e1, clean_re e2)
| Star e0 -> Star (clean_re e0)
| WeakStar e0 -> WeakStar (clean_re e0)
| SeqCapture (i, e0) -> SeqCapture (i, clean_re e0)
| Arg e0 -> clean_re e0
| _ -> e
let rec print_re fmt e =
match e with
Epsilon -> Format.fprintf fmt "Epsilon"
| Elem _ -> Format.fprintf fmt "Elem"
| Guard _ -> Format.fprintf fmt "Guard"
| Seq (e1, e2) -> Format.fprintf fmt "Seq(%a, %a)" print_re e1 print_re e2
| Alt (e1, e2) -> Format.fprintf fmt "Alt(%a, %a)" print_re e1 print_re e2
| Star (e0) -> Format.fprintf fmt "Star(%a)" print_re e0
| WeakStar (e0) -> Format.fprintf fmt "WeakStar(%a)" print_re e0
| SeqCapture (_, e0) -> Format.fprintf fmt "SeqCapture(_, %a)" print_re e0
| Arg (e0) -> Format.fprintf fmt "Arg(%a)" print_re e0
(* Ast -> symbolic type *)
let rec derecurs env p =
match p.descr with
......@@ -364,26 +409,50 @@ module IType = struct
| Constant (x,c) ->
mk_constant (ident env.penv_tenv p.loc x) (const env.penv_tenv p.loc c)
| Cst c -> mk_type (Types.constant (const env.penv_tenv p.loc c))
| Regexp r -> rexp (derecurs_regexp env r)
| Regexp r -> rexp (derecurs_regexp env (clean_re r))
| Concat (p1,p2) -> mk_concat (derecurs env p1) (derecurs env p2)
| Merge (p1,p2) -> mk_merge (derecurs env p1) (derecurs env p2)
| Group p -> derecurs env p
and derecurs_regexp env = function
| Epsilon -> mk_epsilon
| Elem p -> mk_elem (derecurs env p)
| Guard p -> mk_guard (derecurs env p)
| Seq (p1,p2) -> mk_seq (derecurs_regexp env p1) (derecurs_regexp env p2)
| Seq (p1,p2) -> (* we need to disambiguate between sequence concatenation in regexp
and type instantiation *)
begin
match p1, p2 with
Elem { loc; descr = PatVar ((id :: rest as ids), []) }, Arg (Elem t2) ->
let nargs =
try
if rest == [] then (* local identifier *)
let id = ident env.penv_tenv loc id in
try List.length (snd (Env.find id env.penv_derec))
with Not_found ->
Array.length (snd (find_local_type env.penv_tenv loc id))
else
Array.length (snd (find_global_type env.penv_tenv loc ids))
with Not_found -> 0
in
if nargs != 0 then (* instantiation *)
mk_elem (derecurs env { loc; descr = PatVar(ids, prod_to_list t2) })
else
mk_seq (derecurs_regexp env p1) (derecurs_regexp env p2)
| _ ->
mk_seq (derecurs_regexp env p1) (derecurs_regexp env p2)
end
| Alt (p1,p2) -> mk_alt (derecurs_regexp env p1) (derecurs_regexp env p2)
| Star p -> mk_star (derecurs_regexp env p)
| WeakStar p -> mk_weakstar (derecurs_regexp env p)
| SeqCapture ((loc,x),p) -> mk_seqcapt (ident env.penv_tenv loc x) (derecurs_regexp env p)
| Arg r -> derecurs_regexp env r
and derecurs_var env loc ids =
match ids with
| ([v],a) ->
let v = ident env.penv_tenv loc v in
begin
try Env.find v env.penv_derec
try fst (Env.find v env.penv_derec)
with Not_found ->
try
let (t,pargs) = find_local_type env.penv_tenv loc v in
......@@ -416,19 +485,22 @@ module IType = struct
and derecurs_def env b =
let seen = ref IdSet.empty in
let b =
List.map (fun ((loc,v),_,p) ->
List.map (fun ((loc,v),args,p) ->
let v = ident env.penv_tenv loc v in
if IdSet.mem !seen v then
raise_loc_generic loc
("Multiple definitions for the type identifer " ^
(Ident.to_string v));
seen := IdSet.add v !seen;
(v,p,delayed loc)
(v,args,p,delayed loc)
) b
in
let n = List.fold_left (fun env (v,p,s) -> Env.add v s env) env.penv_derec b in
let n = List.fold_left
(fun env (v,a,p,s) -> Env.add v (s,a) env)
env.penv_derec b
in
let env = { env with penv_derec = n } in
List.iter (fun (v,p,s) -> link s (derecurs env p)) b;
List.iter (fun (v,_, p,s) -> link s (derecurs env p)) b;
(env, b)
let derec penv p =
......@@ -458,7 +530,7 @@ module IType = struct
with Patterns.Error s -> raise_loc_generic loc s
in
let b =
List.map2 (fun ((loc,v),pl,p) (v',_,d) ->
List.map2 (fun ((loc,v),pl,p) (v',_,_, d) ->
let t_rhs = aux loc d in
if (loc <> noloc) && (Types.is_empty t_rhs) then
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment