open Location open Ast open Ident (* let () = Grammar.error_verbose := true *) let gram = Grammar.gcreate (Wlexer.lexer Wlexer.token Wlexer.latin1_engine) let prog = Grammar.Entry.create gram "prog" let expr = Grammar.Entry.create gram "expression" let pat = Grammar.Entry.create gram "type/pattern expression" let regexp = Grammar.Entry.create gram "type/pattern regexp" let const = Grammar.Entry.create gram "scalar constant" let rec multi_prod loc = function | [ x ] -> x | x :: l -> mk loc (Prod (x, multi_prod loc l)) | [] -> assert false let rec tuple loc = function | [ x ] -> x | x :: l -> mk loc (Pair (x, tuple loc l)) | [] -> assert false let tuple_queue = List.fold_right (fun x q -> mk_loc x.loc (Pair (x, q))) let char = mknoloc (Internal (Types.char Chars.any)) let string_regexp = Star (Elem char) let cst_nil = mknoloc (Cst (Types.Atom Sequence.nil_atom)) let seq_of_string pos s = let (pos,_) = pos in let rec aux accu i = if (i = 0) then accu else aux (((pos+i,pos+i+1),s.[i-1])::accu) (i-1) in aux [] (String.length s) exception Error of string let error (i,j) s = Location.raise_loc i j (Error s) let make_record loc r = LabelMap.from_list (fun _ _ -> error loc "Duplicated record field") r let parse_char loc s = (* TODO: Unicode *) if String.length s <> 1 then error loc "Character litteral must have length 1"; s.[0] let char_list pos s = let s = seq_of_string pos s in List.map (fun (loc,c) -> mk loc (Cst (Types.Char (Chars.mk_char c)))) s let include_stack = ref [] EXTEND GLOBAL: prog expr pat regexp const; prog: [ [ l = LIST0 [ p = phrase; ";;" -> p ]; EOI -> List.flatten l ] ]; phrase: [ [ (p,e) = let_binding -> [ mk loc (LetDecl (p,e)) ] | (p,e1) = let_binding; "in"; e2 = expr LEVEL "top"-> [ mk loc (EvalStatement (mk loc (Match (e1,[p,e2])))) ] | LIDENT "type"; x = UIDENT; "="; t = pat -> [ mk loc (TypeDecl (x,t)) ] | LIDENT "debug"; d = debug_directive -> [ mk loc (Debug d) ] | LIDENT "include"; s = STRING2 -> protect_op "File inclusion"; (* avoid looping; should issue an error ? *) if List.mem s !include_stack then [] else ( include_stack := s :: !include_stack; let chan = open_in s in Location.push_source (`File s); let input = Stream.of_channel chan in let l = Grammar.Entry.parse prog input in close_in chan; Location.pop_source (); include_stack := List.tl !include_stack; l ) ] | [ e = expr -> [ mk loc (EvalStatement e) ] ] ]; debug_directive: [ [ LIDENT "filter"; t = pat; p = pat -> `Filter(t,p) | LIDENT "accept"; p = pat -> `Accept p | LIDENT "compile"; t = pat; p = LIST1 pat -> `Compile (t,p) | LIDENT "normal_record"; t = pat -> `Normal_record t | LIDENT "compile2"; t = pat; p = LIST1 pat -> `Compile2 (t,p) | LIDENT "subtype"; t1 = pat; t2 = pat -> `Subtype (t1,t2) ] ]; expr: [ "top" RIGHTA [ "match"; e = SELF; "with"; b = branches -> mk loc (Match (e,b)) | "try"; e = SELF; "with"; b = branches -> let default = (mknoloc (Capture (ident "x")), mknoloc (Op ("raise",[mknoloc (Var (ident "x"))]))) in mk loc (Try (e,b@[default])) | "map"; e = SELF; "with"; b = branches -> mk loc (Map (e,b)) | "if"; e = SELF; "then"; e1 = SELF; "else"; e2 = SELF -> let p1 = mk loc (Internal (Builtin.true_type)) and p2 = mk loc (Internal (Builtin.false_type)) in mk loc (Match (e, [p1,e1; p2,e2])) | "transform"; e = SELF; "with"; b = branches -> let default = mknoloc (Capture (ident "x")), cst_nil in mk loc (Op ("flatten", [mk loc (Map (e,b@[default]))])) | "fun"; (f,a,b) = fun_decl -> mk loc (Abstraction { fun_name = f; fun_iface = a; fun_body = b }) | (p,e1) = let_binding; "in"; e2 = expr LEVEL "top"-> mk loc (Match (e1,[p,e2])) | e = expr; ":"; p = pat -> mk loc (Forget (e,p)) ] | [ e1 = expr; op = ["=" | "<=" | "<<" | ">>" | ">=" ]; e2 = expr -> let op = match op with | "<<" -> "<" | ">>" -> ">" | s -> s in mk loc (Op (op,[e1;e2])) ] | [ e1 = expr; op = ["+" | "-" | "@" ]; e2 = expr -> mk loc (Op (op,[e1;e2])) | e = expr; "\\"; l = [LIDENT | UIDENT] -> mk loc (RemoveField (e,LabelPool.mk l)) ] | [ e1 = expr; op = ["*"]; e2 = expr -> mk loc (Op (op,[e1;e2])) | e = expr; op = "/"; p = pat -> let tag = mk loc (Internal (Types.atom (Atoms.any))) in let att = mk loc (Internal Types.Record.any) in let any = mk loc (Internal (Types.any)) in let re = Star(Alt(SeqCapture(ident "x",Elem p), Elem any)) in let ct = mk loc (Regexp (re,any)) in let p = mk loc (XmlT (tag, multi_prod loc [att;ct])) in let b = (p, mk loc (Var (ident "x"))) in mk loc (Op ("flatten", [mk loc (Map (e,[b]))])) ] | [ e = expr; "."; l = [LIDENT | UIDENT] -> mk loc (Dot (e,LabelPool.mk l)) ] | [ op = [ LIDENT "flatten" | LIDENT "load_xml" | LIDENT "load_html" | LIDENT "print_xml" | LIDENT "print" | LIDENT "raise" | LIDENT "int_of" | LIDENT "string_of" ]; e = expr -> mk loc (Op (op,[e])) | op = [ LIDENT "dump_to_file" ]; e1 = expr LEVEL "no_appl"; e2 = expr -> mk loc (Op (op, [e1;e2])) | e1 = SELF; LIDENT "div"; e2 = expr -> mk loc (Op ("/", [e1;e2])) | e1 = SELF; LIDENT "mod"; e2 = expr -> mk loc (Op ("mod", [e1;e2])) | e1 = SELF; e2 = expr -> mk loc (Apply (e1,e2)) ] | "no_appl" [ c = const -> mk loc (Cst c) | "("; l = LIST1 expr SEP ","; ")" -> tuple loc l | "["; l = LIST0 seq_elem; e = OPT [ ";"; e = expr -> e ]; "]" -> let e = match e with Some e -> e | None -> cst_nil in List.fold_right (fun x q -> match x with | `Elems l -> tuple_queue l q | `Explode x -> mk_loc x.loc (Op ("@",[x;q])) ) l e | t = [ a = TAG -> mk loc (Cst (Types.Atom (Atoms.mk a))) | "<"; e = expr LEVEL "no_appl" -> e ]; a = expr_attrib_spec; ">"; c = expr -> mk loc (Xml (t, mk loc (Pair (a,c)))) | "{"; r = [ expr_record_spec | -> mk loc (RecordLitt LabelMap.empty) ]; "}" -> r | s = STRING2 -> tuple loc (char_list loc s @ [cst_nil]) | a = LIDENT -> mk loc (Var (ident a)) ] ]; seq_elem: [ [ x = STRING1 -> `Elems (char_list loc x) | e = expr LEVEL "no_appl" -> `Elems [e] | "!"; e = expr LEVEL "no_appl" -> `Explode e ] ]; let_binding: [ [ "let"; p = pat; "="; e = expr -> (p,e) | "let"; p = pat; ":"; t = pat; "="; e = expr -> (p, mknoloc (Forget (e,t))) | "let"; "fun"; (f,a,b) = fun_decl -> let p = match f with | Some x -> mk loc (Capture x) | _ -> failwith "Function name mandatory in let fun declarations" in let abst = { fun_name = f; fun_iface = a; fun_body = b } in let e = mk loc (Abstraction abst) in (p,e); ] ]; fun_decl: [ (* need an hack to do this, because both productions would match [ OPT LIDENT; "("; pat ] .... *) [ f = OPT [ x = LIDENT -> ident x]; "("; p1 = pat LEVEL "no_arrow"; res = [ "->"; p2 = pat; a = [ ";"; a = LIST0 arrow SEP ";" -> a | -> [] ]; ")"; b = branches -> `Classic (p2,a,b) | ":"; targ1 = pat; args = LIST0 [ ","; arg = pat; ":"; targ = pat -> (arg,targ) ]; ")"; ":"; tres = pat ; "="; body = expr -> `Compact (targ1,args,tres,body) ] -> match res with | `Classic (p2,a,b) -> f,(p1,p2)::a,b | `Compact (targ1,args,tres,body) -> let args = (p1,targ1) :: args in let targ = multi_prod nopos (List.map snd args) in let arg = multi_prod nopos (List.map fst args) in let b = [arg, body] in let a = [targ,tres] in (f,a,b) ] ]; arrow: [ [ t1 = pat LEVEL "no_arrow"; "->"; t2 = pat -> (t1,t2)] ]; branches: [ [ OPT "|"; l = LIST1 branch SEP "|" -> l ] ]; branch: [ [ p = pat LEVEL "no_arrow"; "->"; e = expr -> (p,e) ] ]; regexp: [ [ x = regexp; "|"; y = regexp -> match (x,y) with | Elem x, Elem y -> Elem (mk loc (Or (x,y))) | _ -> Alt (x,y) ] | [ x = regexp; y = regexp -> Seq (x,y) ] | [ a = LIDENT; "::"; x = regexp -> SeqCapture (ident a,x) ] | [ x = regexp; "*" -> Star x | x = regexp; "*?" -> WeakStar x | x = regexp; "+" -> Seq (x, Star x) | x = regexp; "+?" -> Seq (x, WeakStar x) | x = regexp; "?" -> Alt (x, Epsilon) | x = regexp; "??" -> Alt (Epsilon, x) ] | [ "("; x = regexp; ")" -> x | "("; a = LIDENT; ":="; c = const; ")" -> Elem (mk loc (Constant ((ident a,c)))) | UIDENT "PCDATA" -> string_regexp | i = STRING1; "--"; j = STRING1 -> let i = Chars.mk_char (parse_char loc i) and j = Chars.mk_char (parse_char loc j) in Elem (mk loc (Internal (Types.char (Chars.char_class i j)))) | s = STRING1 -> let s = seq_of_string loc s in List.fold_right (fun (loc,c) accu -> let c = Chars.mk_char c in let c = Chars.atom c in Seq (Elem (mk loc (Internal (Types.char c))), accu)) s Epsilon | e = pat LEVEL "simple" -> Elem e ] ]; pat: [ [ x = pat; "where"; b = LIST1 [ a = UIDENT; "="; y = pat -> (a,y)] SEP "and" -> mk loc (Recurs (x,b)) ] | RIGHTA [ x = pat; "->"; y = pat -> mk loc (Arrow (x,y)) ] | "no_arrow" [ x = pat; "|"; y = pat -> mk loc (Or (x,y)) ] | "simple" [ x = pat; "&"; y = pat -> mk loc (And (x,y)) | x = pat; "\\"; y = pat -> mk loc (Diff (x,y)) ] | [ "{"; r = record_spec; "}" -> mk loc (Record (true,r)) | "{|"; r = record_spec; "|}" -> mk loc (Record (false,r)) | LIDENT "_" -> mk loc (Internal Types.any) | a = LIDENT -> mk loc (Capture (ident a)) | "("; a = LIDENT; ":="; c = const; ")" -> mk loc (Constant (ident a,c)) | a = UIDENT -> mk loc (PatVar a) | i = INT ; "--"; j = INT -> let i = Intervals.mk i and j = Intervals.mk j in mk loc (Internal (Types.interval (Intervals.bounded i j))) | i = INT -> let i = Intervals.mk i in mk loc (Internal (Types.interval (Intervals.atom i))) | "*"; "--"; j = INT -> let j = Intervals.mk j in mk loc (Internal (Types.interval (Intervals.left j))) | i = INT; "--"; "*" -> let i = Intervals.mk i in mk loc (Internal (Types.interval (Intervals.right i))) | i = char -> mk loc (Internal (Types.char (Chars.char_class i i))) | i = char ; "--"; j = char -> mk loc (Internal (Types.char (Chars.char_class i j))) | c = const -> mk loc (Internal (Types.constant c)) | "("; l = LIST1 pat SEP ","; ")" -> multi_prod loc l | "["; r = [ r = regexp -> r | -> Epsilon ]; q = [ ";"; q = pat -> q | -> mknoloc (Internal (Sequence.nil_type)) ]; "]" -> mk loc (Regexp (r,q)) | t = [ [ "<"; LIDENT "_" -> mk loc (Internal (Types.atom (Atoms.any))) | a = TAG -> mk loc (Internal (Types.atom (Atoms.atom (Atoms.mk a)))) ] | [ "<"; t = pat -> t ] ]; a = attrib_spec; ">"; c = pat -> mk loc (XmlT (t, multi_prod loc [a;c])) | s = STRING2 -> let s = seq_of_string loc s in let s = List.map (fun (loc,c) -> mk loc (Internal (Types.char (Chars.atom (Chars.mk_char c))))) s in let s = s @ [mk loc (Internal (Sequence.nil_type))] in multi_prod loc s ] ]; record_spec: [ [ r = LIST0 [ l = [LIDENT | UIDENT]; "="; o = [ "?" -> true | -> false]; x = pat -> let x = if o then mk loc (Optional x) else x in (LabelPool.mk l, x) ] SEP ";" -> make_record loc r ] ]; char: [ [ c = STRING1 -> Chars.mk_char (parse_char loc c) | "!"; i = INT -> Chars.mk_int (int_of_string i) ] ]; const: [ [ i = INT -> Types.Integer (Intervals.mk i) | "`"; a = [LIDENT | UIDENT] -> Types.Atom (Atoms.mk a) | c = char -> Types.Char c ] ]; attrib_spec: [ [ r = record_spec -> mk loc (Record (true,r)) | "("; t = pat; ")" -> t | "{"; r = record_spec; "}" -> mk loc (Record (true,r)) | "{|"; r = record_spec; "|}" -> mk loc (Record (false,r)) ] ]; expr_record_spec: [ [ r = LIST1 [ l = [LIDENT | UIDENT]; "="; x = expr -> (LabelPool.mk l,x) ] SEP ";" -> mk loc (RecordLitt (make_record loc r)) ] ]; expr_attrib_spec: [ [ r = expr_record_spec -> r ] | [ e = expr LEVEL "no_appl" -> e | -> mk loc (RecordLitt (LabelMap.empty)) ] ]; END let pat' = Grammar.Entry.create gram "type/pattern expression" EXTEND GLOBAL: pat pat'; pat': [ [ p = pat; EOI -> p ] ]; END let pat = Grammar.Entry.parse pat and expr = Grammar.Entry.parse expr and prog = Grammar.Entry.parse prog module From_string = struct let pat s = Grammar.Entry.parse pat' (Stream.of_string s) let expr s = expr (Stream.of_string s) end