Commit 7466ea22 authored by Pietro Abate's avatar Pietro Abate

[r2003-09-23 19:41:35 by cvscast] Constantes structurees + suite nettoyage

Original author: cvscast
Date: 2003-09-23 19:41:36+00:00
parent 2b3c3aab
0.2.0
* Rewriting to use ocaml 3.07
* Code upgraded to Ocaml 3.07+beta2
* Major cleaning in progress
* Using ulex instead of wlex
* Allow structured constants in default value patterns (p := c)
0.1.1
* Various bug fixes (expat might now work)
......
include Makefile.distrib
NATIVE=false
# We put this rule here to avoid re-building wlexer.ml on
# user installation (wlex may not be available)
#parser/wlexer.ml: parser/wlexer.mll
# wlex parser/wlexer.mll
include Makefile.distrib
# For development
pull: tools/pull.$(EXTENSION)
$(LINK) -o $@ $^
PREPRO = $(SYNTAX) pr_o.cmo
PREPRO = camlp4o -I `ocamlfind query ulex` pa_ulex.cma pr_o.cmo $(SYNTAX) -sep "\n"
profile:
profile: misc/q_symbol.cmo
rm -Rf prepro
mkdir prepro
for i in $(DIRS); do \
......@@ -26,9 +21,11 @@ profile:
fi; \
done; \
done
#cp parser/wlexer.mll prepro/parser/
cp Makefile depend prepro/
(cd prepro; $(MAKE) cduce PROFILE=true SYNTAX_PARSER= NATIVE=false)
cp Makefile.distrib Makefile Makefile.conf prepro/
(cd prepro; \
touch depend; \
$(MAKE) compute_depend PROFILE=true SYNTAX_PARSER= ; \
$(MAKE) cduce PROFILE=true SYNTAX_PARSER= NATIVE=false)
# Site-specific installation
......
# build CDuce using OCaml native code compiler
NATIVE = true
ifeq ($(NATIVE), false)
else
NATIVE = true
endif
# profiling support
PROFILE = false
......
include Makefile.conf
VERSION = 0.2.0
VERSION = 0.1.2
PACKAGES = pxp-engine pxp-lex-iso88591 ulex camlp4 num cgi pcre netstring
ifeq ($(PXP_WLEX), true)
......@@ -8,10 +8,7 @@ else
PACKAGES += pxp-lex-utf8
endif
ULEX_PATH = `ocamlfind query ulex`
SYNTAX = camlp4o -I misc/ pa_extend.cmo \
q_symbol.cmo \
$(shell ocamlfind query ulex)/pa_ulex.cma \
SYNTAX = -I misc/ pa_extend.cmo q_symbol.cmo \
-symbol cduce_version=\"$(VERSION)\" \
-symbol build_date=\"$(shell date +%Y-%m-%d)\" \
-symbol session_dir=\"$(SESSION_DIR)\"
......@@ -27,14 +24,16 @@ ifeq ($(EXPAT), true)
SYNTAX += -symbol EXPAT=
endif
SYNTAX_PARSER = -pp '$(SYNTAX)'
SYNTAX_PARSER = -syntax camlp4o $(SYNTAX:%=-ppopt %)
CAMLC_P = ocamlc
DEPEND_OCAMLDEP = misc/q_symbol.cmo
ifeq ($(PROFILE), true)
CAMLOPT_P = ocamlopt -p
ifeq ($(NATIVE), false)
CAMLC_P = ocamlcp -p a
SYNTAX_PARSER =
DEPEND_OCAMLDEP =
endif
else
CAMLOPT_P = ocamlopt -inline 25
......@@ -67,7 +66,7 @@ uninstall:
# Source directories
DIRS = misc parser schema typing types runtime driver
DIRS = misc parser schema typing types runtime driver module
CLEAN_DIRS = $(DIRS) tools tests
# Objects to build
......@@ -124,14 +123,11 @@ validate: $(OBJECTS:.cmo=.$(EXTENSION)) tools/validate.ml
$(LINK) $(INCLUDES) -o $@ $^
.PHONY: compute_depend
compute_depend: misc/q_symbol.cmo
compute_depend: $(DEPEND_OCAMLDEP)
@echo "Computing dependencies ..."
ocamldep $(INCLUDES) $(SYNTAX_PARSER) $(DEPEND) | \
ocamlfind ocamldep -package "$(PACKAGES)" $(INCLUDES) $(SYNTAX_PARSER) $(DEPEND) | \
sed -e "s|: |: misc/q_symbol.cmo |" > depend
#parser/wlexer.ml: parser/wlexer.mll
# wlex parser/wlexer.mll
clean:
for i in $(CLEAN_DIRS); do \
(cd $$i; rm -f *.cmi *.cmo *.cma *.cmx *.o *~); \
......@@ -151,7 +147,7 @@ misc/q_symbol.cmo: misc/q_symbol.ml
$(CAMLC) -c -pp 'camlp4o pa_extend.cmo q_MLast.cmo' $<
.ml.cmo:
$(CAMLC) -c $(SYNTAX_PARSER) $(INCLUDES) $<
$(CAMLC) -c $(INCLUDES) $(SYNTAX_PARSER) $<
.ml.cmx:
$(CAMLOPT) -c $(SYNTAX_PARSER) $(INCLUDES) $<
......
......@@ -57,7 +57,7 @@ Beppe 2003-03-02
Add an operator random(n)
é
======================================================================
......@@ -147,6 +147,9 @@ Alain 2003-06-17
Unicode dans un source CDuce en Latin1
- source en UTF8 (ou autre)
Alain 2003-09-23
Avec le passage à ulex, on gagne la possibilité de changer
l'encoding du source
======================================================================
......@@ -159,6 +162,9 @@ Alain 2003-05-15
Etudier les problèmes avec print_xml (ex: XHTML a besoin
d'avoir un prefixe bien défini...)
Alain 2003-09-23
Fait il y a longtemps.
======================================================================
Alain 2003-05-19
......
......@@ -90,10 +90,10 @@ parser/location.cmo: misc/q_symbol.cmo parser/location.cmi
parser/location.cmx: misc/q_symbol.cmo parser/location.cmi
parser/ulexer.cmo: misc/q_symbol.cmo parser/ulexer.cmi
parser/ulexer.cmx: misc/q_symbol.cmo parser/ulexer.cmi
parser/ast.cmo: misc/q_symbol.cmo types/ident.cmo parser/location.cmi misc/ns.cmi \
schema/schema_types.cmi types/types.cmi
parser/ast.cmx: misc/q_symbol.cmo types/ident.cmx parser/location.cmx misc/ns.cmx \
schema/schema_types.cmx types/types.cmx
parser/ast.cmo: misc/q_symbol.cmo types/chars.cmi types/ident.cmo types/intervals.cmi \
parser/location.cmi misc/ns.cmi schema/schema_types.cmi types/types.cmi
parser/ast.cmx: misc/q_symbol.cmo types/chars.cmx types/ident.cmx types/intervals.cmx \
parser/location.cmx misc/ns.cmx schema/schema_types.cmx types/types.cmx
parser/parser.cmo: misc/q_symbol.cmo parser/ast.cmo types/atoms.cmi types/chars.cmi \
misc/encodings.cmi types/ident.cmo types/intervals.cmi \
parser/location.cmi misc/ns.cmi schema/schema_parser.cmi \
......
......@@ -120,14 +120,6 @@ let do_file s =
let chan = open_in s in
Location.push_source (`File s);
let input = Stream.of_channel chan in
if Stream.npeek 2 input = ['#';'!'] then
(
let rec count n =
match Stream.next input with
| '\n' -> n
| _ -> count (n + 1) in
Ulexer.set_delta_loc (count 1)
);
let ok = Cduce.script ppf ppf_err input in
close_in chan;
if not ok then exit 1
......
......@@ -9,6 +9,7 @@ struct
let equal (x : t) y = x = y
let compare (x : t) y = compare x y
(* TODO: handle UTF-8 viewport *)
let to_string s =
Netconversion.recode_string
......
......@@ -31,10 +31,6 @@ and toplevel_directive =
]
and pconst =
| Const_internal of Types.const
| Const_atom of U.t
and pexpr =
| LocatedExpr of loc * pexpr
......@@ -44,8 +40,10 @@ and pexpr =
| Abstraction of abstr
(* Data constructors *)
| Cst of pconst
| Integer of Intervals.V.t
| Char of Chars.V.t
| Pair of pexpr * pexpr
| Atom of U.t
| Xml of pexpr * pexpr
| RecordLitt of (label * pexpr) list
| String of U.uindex * U.uindex * U.t * pexpr
......@@ -87,7 +85,7 @@ and ppat' =
| PatVar of U.t
| SchemaVar of (* type/pattern schema variable *)
schema_item_kind * string * string
| AtomT of U.t
| Cst of pexpr
| NsT of U.t
| Recurs of ppat * (U.t * ppat) list
| Internal of Types.descr
......@@ -99,7 +97,7 @@ and ppat' =
| Arrow of ppat * ppat
| Optional of ppat
| Record of bool * (label * ppat) list
| Constant of id * pconst
| Constant of id * pexpr
| Regexp of regexp * ppat
(* regular expression, continuation: [ re ; cont ], e.g: [ re ; nil ] *)
......@@ -112,4 +110,4 @@ and regexp =
| WeakStar of regexp
| SeqCapture of id * regexp
open Printf
(* TODO: handle encodings of the input for pretty printing
fragments of code *)
type source = [ `None | `File of string | `Stream | `String of string ]
type loc = source * int * int
type precise = [ `Full | `Char of int ]
......@@ -38,18 +41,34 @@ let nopos = (-1,-1)
let viewport = ref `Text
let set_viewport v = viewport := v
(* Note: this is incorrect. Directives #utf8,... should
not be recognized inside comments and strings !
The clean solution is probably to have the real lexer
count the lines. *)
let get_line_number src i =
let enc = ref Ulexing.Latin1 in
let ic = open_in_bin src in
let rec aux pos line start =
if (pos >= i)
then (line,i - start)
else
match input_char ic with
| '\r' when pos = start -> aux (pos + 1) line (pos + 1)
| '\r' | '\n' -> aux (pos + 1) (line + 1) (pos + 1)
| _ -> aux (pos + 1) line start
let lb = Ulexing.from_var_enc_channel enc ic in
let rec count line start = lexer
| '\n' | "\n\r" | '\r' ->
aux (line + 1) (Ulexing.lexeme_end lb)
| "#utf8" ->
enc := Ulexing.Utf8;
aux line start
| "#ascii" ->
enc := Ulexing.Ascii;
aux line start
| "#latin1" ->
enc := Ulexing.Latin1;
aux line start
| _ ->
aux line start
and aux line start =
if (Ulexing.lexeme_start lb >= i) then (line, i - start)
else count line start lb
in
let r = aux 0 1 0 in
let r = aux 1 0 in
close_in ic;
r
......
......@@ -29,7 +29,6 @@ let top_phrases = Grammar.Entry.create gram "toplevel phrases"
let expr = Grammar.Entry.create gram "expression"
let pat = Grammar.Entry.create gram "type/pattern expression"
let regexp = Grammar.Entry.create gram "type/pattern regexp"
let const = Grammar.Entry.create gram "scalar constant"
let exp pos e = LocatedExpr (loc_of_pos pos,e)
......@@ -50,7 +49,7 @@ let tuple_queue =
let char = mknoloc (Internal (Types.char Chars.any))
let string_regexp = Star (Elem char)
let cst_nil = Cst (Const_internal (Types.Atom Sequence.nil_atom))
let cst_nil = Atom (U.mk "nil")
let pat_nil = mknoloc (Internal (Sequence.nil_type))
let seq_of_string s =
......@@ -83,7 +82,7 @@ let is_fun_decl =
)
EXTEND
GLOBAL: top_phrases prog expr pat regexp const;
GLOBAL: top_phrases prog expr pat regexp;
top_phrases: [
[ l = LIST0 phrase; ";;" -> List.flatten l ]
......@@ -265,8 +264,8 @@ EXTEND
]
| "no_appl"
[ c = const -> exp loc (Cst c)
| "("; l = LIST1 expr SEP ","; ")" -> exp loc (tuple l)
[
"("; l = LIST1 expr SEP ","; ")" -> exp loc (tuple l)
| "["; l = LIST0 seq_elem; e = OPT [ ";"; e = expr -> e ];
loc_end = ["]" -> loc] ->
let e = match e with Some e -> e | None -> cst_nil in
......@@ -282,10 +281,9 @@ EXTEND
in
exp loc l
| "<"; t = [ "("; e = expr; ")" -> e
| a = tag -> exp loc (Cst a)
| a = tag -> exp loc a
];
a = expr_attrib_spec; ">"; c = expr ->
(* let t = Pair (cst_nil, t) in *)
exp loc (Xml (t, Pair (a,c)))
| "{"; r = [ expr_record_spec | -> exp loc (RecordLitt []) ]; "}" -> r
| s = STRING2 ->
......@@ -294,16 +292,18 @@ EXTEND
| a = IDENT -> exp loc (Var (ident a))
| "!"; e = expr ->
exp loc (Apply (Dot (e, U.mk "get"), cst_nil))
| i = INT -> exp loc (Integer (Intervals.V.mk i))
| "`"; a = tag -> a
| c = char -> exp loc (Char c)
]
];
tag: [ [ a = [ IDENT | keyword ] ->
Const_atom (parse_ident a) ] ];
tag: [ [ a = [ IDENT | keyword ] -> exp loc (Atom (parse_ident a)) ] ];
tag_type: [
[ IDENT "_" -> mk loc (Internal (Types.atom Atoms.any))
| a = [ IDENT | keyword ] -> mk loc (AtomT (parse_ident a))
| a = [ IDENT | keyword ] -> mk loc (Cst (Atom (parse_ident a)))
| t = ANY_IN_NS -> mk loc (NsT (parse_ident t))
]
];
......@@ -399,7 +399,7 @@ EXTEND
| x = regexp; "?" -> Alt (x, Epsilon)
| x = regexp; "??" -> Alt (Epsilon, x) ]
| [ "("; x = regexp; ")" -> x
| "("; a = IDENT; ":="; c = const; ")" ->
| "("; a = IDENT; ":="; c = expr; ")" ->
Elem (mk loc (Constant ((ident a,c))))
| IDENT "PCDATA" -> string_regexp
| i = STRING1; "--"; j = STRING1 ->
......@@ -435,7 +435,7 @@ EXTEND
let fields = [ label "get", get_fun; label "set", set_fun ] in
mk loc (Record (false, fields))
| IDENT "_" -> mk loc (Internal Types.any)
| "("; a = IDENT; ":="; c = const; ")" ->
| "("; a = IDENT; ":="; c = expr; ")" ->
mk loc (Constant (ident a,c))
| schema = IDENT; "#"; typ = [ IDENT | keyword ];
k = OPT [ "as"; k = [ "element" | "type" | "attribute" ] -> k ] ->
......@@ -468,11 +468,6 @@ EXTEND
| i = char ; "--"; j = char ->
mk loc (Internal (Types.char (Chars.char_class i j)))
| "`"; c = tag_type -> c
| c = const ->
(match c with
| Const_atom l -> mk loc (AtomT l)
| Const_internal c -> mk loc (Internal (Types.constant c))
)
| "("; l = LIST1 pat SEP ","; ")" -> multi_prod loc l
| "["; r = [ r = regexp -> r | -> Epsilon ];
q = [ ";"; q = pat -> q
......@@ -514,14 +509,6 @@ EXTEND
];
const:
[
[ i = INT -> Const_internal (Types.Integer (Intervals.V.mk i))
| "`"; a = tag -> a
| c = char -> Const_internal (Types.Char c) ]
];
attrib_spec:
[ [ r = LIST0 [ l = [IDENT | keyword ]; "=";
o = [ "?" -> true | -> false];
......@@ -568,7 +555,7 @@ let sync () =
match !Ulexer.last_tok with
| ("",";;") | ("EOI","") -> ()
| _ ->
Ulexer.last_tok := Ulexer.token lb;
Ulexer.last_tok := fst (Ulexer.token lb);
aux ()
in
aux ()
......@@ -57,38 +57,42 @@ let illegal lexbuf =
let in_comment = ref false
let return lexbuf tok = (tok, L.loc lexbuf)
let return_loc i j tok = (tok, (i,j))
let rec token = lexer
| xml_blank+ -> token lexbuf
| qname ->
let s = L.utf8_lexeme lexbuf in
if Hashtbl.mem keywords s then "",s else "IDENT",s
return lexbuf (if Hashtbl.mem keywords s then "",s else "IDENT",s)
| ncname ":*" ->
let s = L.utf8_sub_lexeme lexbuf 0 (L.lexeme_length lexbuf - 2) in
"ANY_IN_NS", s
return lexbuf ("ANY_IN_NS", s)
| ".:*" ->
"ANY_IN_NS", ""
return lexbuf ("ANY_IN_NS", "")
| '-'? ['0'-'9']+ ->
"INT", L.utf8_lexeme lexbuf
return lexbuf ("INT", L.utf8_lexeme lexbuf)
| [ "<>=.,:;+-*/@&{}[]()|?`!" ]
| "->" | "::" | ";;" | "--" | ":=" | "\\" | "++"
| "{|" | "|}" | "<=" | ">=" | "<<" | ">>"
| ["?+*"] "?" | "#" ->
"", L.utf8_lexeme lexbuf
return lexbuf ("", L.utf8_lexeme lexbuf)
| "#" ncname ->
"DIRECTIVE", L.utf8_lexeme lexbuf
return lexbuf ("DIRECTIVE", L.utf8_lexeme lexbuf)
| '"' | "'" ->
let start = L.lexeme_start lexbuf in
let double_quote = L.latin1_lexeme_char lexbuf 0 = '"' in
string (L.lexeme_start lexbuf) double_quote lexbuf;
(if double_quote then "STRING2" else "STRING1"),
(get_stored_string())
return_loc start (L.lexeme_end lexbuf)
((if double_quote then "STRING2" else "STRING1"),
(get_stored_string()))
| "(*" ->
in_comment := true;
comment (L.lexeme_start lexbuf) lexbuf;
in_comment := false;
token lexbuf
| eof ->
"EOI",""
return lexbuf ("EOI","")
| _ ->
illegal lexbuf
......@@ -137,15 +141,12 @@ and string start double = lexer
string start double lexbuf
let delta_loc = ref 0
let set_delta_loc dl = delta_loc := dl
let lexbuf = ref None
let last_tok = ref ("","")
let tok_func cs =
let dl = !delta_loc in
delta_loc := 0;
let lb = L.from_var_enc_stream enc cs in
(lexer ("#!" [^ '\n']* "\n")? -> ()) lb;
lexbuf := Some lb;
let next () =
let tok =
......@@ -157,10 +158,8 @@ let tok_func cs =
| Ulexing.InvalidCodepoint i ->
raise (Error (Ulexing.lexeme_end lb, Ulexing.lexeme_end lb,
"Code point invalid for the current encoding")) in
(* TODO: translate Error exn with offset dl ? *)
let loc = (L.lexeme_start lb + dl, L.lexeme_end lb + dl) in
last_tok := tok;
(tok, loc)
last_tok := fst tok;
tok
in
Token.make_stream_and_location next
......@@ -189,7 +188,7 @@ let dump_file f =
let lexbuf = L.from_var_enc_channel enc ic in
(try
while true do
let (a,b) = token lexbuf in
let ((a,b),_) = token lexbuf in
Printf.printf "%s: \"%s\"\n" a b;
if a = "EOI" then exit 0
done
......
exception Error of int * int * string
val token: Ulexing.lexbuf -> string * string
val token: Ulexing.lexbuf -> (string * string) * (int * int)
val lex: (string * string) Token.glexer
val in_comment: bool ref
val set_delta_loc: int -> unit
val lexbuf: Ulexing.lexbuf option ref
val enc: Ulexing.enc ref
val last_tok: (string * string) ref
......
......@@ -51,10 +51,16 @@ let rec flatten = function
| Pair (x,y) -> concat x (flatten y)
| q -> q
let const = function
let rec const = function
| Types.Integer i -> Integer i
| Types.Atom a -> Atom a
| Types.Char c -> Char c
| Types.Pair (x,y) -> Pair (const x, const y)
| Types.Xml (x, Types.Pair (y, z)) -> Xml (const x, const y, const z)
| Types.Xml (_,_) -> assert false
| Types.Record x -> Record (LabelMap.map const x)
| Types.String (i,j,s,c) -> String_utf8 (i,j,s, const c)
let buf = Buffer.create 100
......
......@@ -13,10 +13,15 @@ let compare = 1
type const =
| Integer of Intervals.V.t
| Atom of Atoms.V.t
| Atom of Atoms.V.t
| Char of Chars.V.t
| Pair of const * const
| Xml of const * const
| Record of const label_map
| String of U.uindex * U.uindex * U.t * const
let compare_const c1 c2 =
let rec compare_const c1 c2 =
match (c1,c2) with
| Integer x, Integer y -> Intervals.V.compare x y
| Integer _, _ -> -1
......@@ -25,11 +30,38 @@ let compare_const c1 c2 =
| Atom _, _ -> -1
| _, Atom _ -> 1
| Char x, Char y -> Chars.V.compare x y
let hash_const = function
| Integer x -> Intervals.V.hash x
| Atom x -> Atoms.V.hash x
| Char x -> Chars.V.hash x
| Char _, _ -> -1
| _, Char _ -> 1
| Pair (x1,x2), Pair (y1,y2) ->
let c = compare_const x1 y1 in
if c <> 0 then c else compare_const x2 y2
| Pair (_,_), _ -> -1
| _, Pair (_,_) -> 1
| Xml (x1,x2), Xml (y1,y2) ->
let c = compare_const x1 y1 in
if c <> 0 then c else compare_const x2 y2
| Xml (_,_), _ -> -1
| _, Xml (_,_) -> 1
| Record x, Record y ->
LabelMap.compare compare_const x y
| Record _, _ -> -1
| _, Record _ -> 1
| String (i1,j1,s1,r1), String (i2,j2,s2,r2) ->
let c = Pervasives.compare i1 i2 in if c <> 0 then c
else let c = Pervasives.compare j1 j2 in if c <> 0 then c
else let c = U.compare s1 s2 in if c <> 0 then c (* Should compare
only the substring *)
else compare_const r1 r2
let rec hash_const = function
| Integer x -> 1 + 17 * (Intervals.V.hash x)
| Atom x -> 2 + 17 * (Atoms.V.hash x)
| Char x -> 3 + 17 * (Chars.V.hash x)
| Pair (x,y) -> 4 + 17 * (hash_const x) + 257 * (hash_const y)
| Xml (x,y) -> 5 + 17 * (hash_const x) + 257 * (hash_const y)
| Record x -> 6 + 17 * (LabelMap.hash hash_const x)
| String (i,j,s,r) -> 7 + 17 * (U.hash s) + 257 * hash_const r
(* Note: improve hash for String *)
let equal_const c1 c2 = compare_const c1 c2 = 0
......@@ -216,10 +248,6 @@ let record' (x : bool * node Ident.label_map) =
{ empty with record = BoolRec.atom x }
let atom a = { empty with atoms = a }
let char c = { empty with chars = c }
let constant = function
| Integer i -> interval (Intervals.atom i)
| Atom a -> atom (Atoms.atom a)
| Char c -> char (Chars.atom c)
let cup x y =
if x == y then x else {
......@@ -294,7 +322,19 @@ let internalize n = n
let id n = n.Node.id
let rec constant = function
| Integer i -> interval (Intervals.atom i)
| Atom a -> atom (Atoms.atom a)
| Char c -> char (Chars.atom c)
| Pair (x,y) -> times (const_node x) (const_node y)
| Xml (x,y) -> times (const_node x) (const_node y)
| Record x -> record' (false ,LabelMap.map const_node x)
| String (i,j,s,c) ->
if U.equal_index i j then constant c
else
let (ch,i') = U.next s i in
constant (Pair (Char (Chars.V.mk_int ch), String (i',j,s,c)))
and const_node c = cons (constant c)
let neg x = diff any x
......@@ -972,10 +1012,25 @@ end
module Print =
struct
let print_const ppf = function
let rec print_const ppf = function
| Integer i -> Intervals.V.print ppf i
| Atom a -> Atoms.V.print_quote ppf a
| Char c -> Chars.V.print ppf c
| Pair (x,y) -> Format.fprintf ppf "(%a,%a)" print_const x print_const y
| Xml (x,y) -> Format.fprintf ppf "XML(%a,%a)" print_const x print_const y
| Record r ->
Format.fprintf ppf "Record{";
List.iter
(fun (l,c) ->