Commit c3a0a16d authored by Pietro Abate's avatar Pietro Abate

[r2003-06-08 08:33:52 by cvscast] Clean load_xml

Original author: cvscast
Date: 2003-06-08 08:34:36+00:00
parent 5ffaf389
......@@ -102,3 +102,32 @@ PXP_WLEX=false: use ocamllex for parsing UTF-8 XML files
E.g.:
make cduce NATIVE=false
You can also modify Makefile.conf to set values for these choices.
------------------------------------------------------------------------------
Support for the expat parser
------------------------------------------------------------------------------
This release includes an experimental support for the expat XML
parser, using the OCaml wrapper written by Maas-Maarten Zeeman, and
included in the expat/ subdirectory for convenience (see
expat/README).
If you have the expat C library installed, you can build
the OCaml wrapper:
cd expact
make all (* may need to modify Makefile.conf *)
make install (* may require root privileges *)
cd ..
Then set EXPAT=true in Makefile.conf, and rebuild CDuce (make clean;
make all).
The interpreter is now using expat for loading XML files.
You can still use PXP by providing the --pxp switch on the command
line.
Note: the current wrapper for expat does not support inclusion
of external entities. Moreover I encounter random segfaults...
# build CDuce using OCaml native code compiler
NATIVE = true
# profiling support
PROFILE = false
# use wlex lexers for parsing XML files with PXP
PXP_WLEX = false
# include support for expat
EXPAT = false
# Customize the following variables to match the settings
# of your local web server
WEB_PREFIX = /var/www
CGI_DIR = $(WEB_PREFIX)/cgi-bin
CDUCE_HTML_DIR = $(WEB_PREFIX)/html
NATIVE = true
PROFILE = false
PXP_WLEX = false
EXPAT = false
include Makefile.conf
VERSION = 0.0.91
PACKAGES = -package "pxp-engine pxp-lex-iso88591 wlexing camlp4 num cgi"
ifeq ($(PXP_WLEX), true)
......@@ -22,11 +20,10 @@ else
endif
ifeq ($(EXPAT), true)
PACKAGES += -package expat
SYNTAX += -symbol EXPAT=
endif
VERSION = 0.0.91
SYNTAX_PARSER = -pp '$(SYNTAX)'
CAMLC_P = ocamlc
......@@ -47,15 +44,9 @@ CAMLOPT = ocamlfind $(CAMLOPT_P) $(OPT) $(PACKAGES)
ifeq ($(NATIVE), true)
EXTENSION = cmx
LINK = $(CAMLOPT) -linkpkg gramlib.cmxa
ifeq ($(EXPAT), true)
LINK += mlexpat.cmxa
endif
else
EXTENSION = cmo
LINK = $(CAMLC) -custom -linkpkg gramlib.cma mlexpat.cma
ifeq ($(EXPAT), true)
LINK += mlexpat.cma
endif
endif
all: cduce dtd2cduce local_website
......@@ -92,10 +83,6 @@ DTD2CDUCE = tools/dtd2cduce.cmo
DEPEND = $(DIRS:=/*.ml) $(DIRS:=/*.mli)
INCLUDES = $(DIRS:%=-I %)
ifeq ($(EXPAT), true)
INCLUDES += -I expat
endif
cduce: $(CDUCE:.cmo=.$(EXTENSION))
$(LINK) $(INCLUDES) -o $@ $^
......@@ -132,11 +119,6 @@ clean:
misc/q_symbol.cmo: misc/q_symbol.ml
$(CAMLC) -c -pp 'camlp4o pa_extend.cmo q_MLast.cmo' $<
.PHONY: expat
expat:
(cd expat; make)
make cduce EXPAT=true
.ml.cmo:
$(CAMLC) -c $(SYNTAX_PARSER) $(INCLUDES) $<
......@@ -156,13 +138,8 @@ driver/examples.ml: cduce web/examples/build.cd web/examples/examples.xml
web/files: cduce web/site.cd
(cd web; ../cduce --quiet site.cd --arg -php site.xml)
#
# Customize the following variables to match the settings
# of your local web server
#
WEB_PREFIX = /var/www
CGI_DIR = $(WEB_PREFIX)/cgi-bin
CDUCE_HTML_DIR = $(WEB_PREFIX)/html
install_web_local:web/files webiface
cp web/www/*.php web/cduce.css $(CDUCE_HTML_DIR)/
......
......@@ -33,24 +33,7 @@ See the INSTALL file for Installation instructions.
For performance reasons, it is advised to build it using OCaml native code
compiler (by default in the Makefile).
Usage:
cduce [options] [CDuce files] --arg [arguments for the CDuce program]
The arguments following the --arg are passed to the CDuce program
in the argv variable (having type [ String* ], which means: sequence
of character strings).
The option --quiet suppresses normal output (typing, results). It is
normally used when the CDuce interpreter is used in the context
of batch processing.
The option --dump followed by a filename allows persistence between
several invocations of the interpreter: the current environment
(defined types and values) is saved to the file when the interpreter
terminates and it is restored if the interpreter is started again with
the same option. Note that only the arguments after -- on the first
invocation in a session are passed to the CDuce program.
See the man page or HTML manual for usage.
When no CDuce file is given on the command line, the interpreter
behaves as a toplevel. Phrases are interpreted when the user type
......
......@@ -20,6 +20,10 @@ let langs =
(fun script xml ->
sp "%s --quiet %s --arg %s" cduce_cmd script xml);
"CDuce+expat", cduce,
(fun script xml ->
sp "%s --expat --quiet %s --arg %s" cduce_cmd script xml);
"XDuce", xduce,
(fun script xml ->
sp "%s %s %s" xduce_cmd script xml);
......@@ -75,7 +79,7 @@ let () =
(fun (lang, scripts, cmd) ->
List.iter
(fun script ->
pr "%6s[%20s] " lang script;
pr "%20s[%20s] " lang script;
time (cmd script fn)
) scripts
)
......
......@@ -2,7 +2,9 @@ open Ident
let () = State.close ();;
let dump = ref None
let load_dump = ref None
let save_dump = ref None
let src = ref []
let args = ref []
......@@ -10,31 +12,48 @@ let version () =
Printf.eprintf "CDuce, version %s\n" <:symbol<cduce_version>>;
Printf.eprintf "built on %s\n" <:symbol<build_date>>;
Printf.eprintf "using OCaml %s compiler\n" <:symbol<ocaml_compiler>>;
Printf.eprintf "support for expat:%b\n" (Load_xml.expat_support);
exit 0
let license () =
Printf.eprintf "\n\
The CDuce interpreter is distributed under the terms of the Q Public \n\
License version 1.0 (included in the sources). The Choice of Law section\n\
been modified from the original Q Public.\n\n
";
exit 0
let specs =
[ "--dump", Arg.String (fun s -> dump := Some s),
" specify filename for persistency";
[ "--load", Arg.String (fun s -> load_dump := Some s),
" load persistency file before running CDuce program";
"--save", Arg.String (fun s -> save_dump := Some s),
" save persistency file after running CDuce program";
"--dump", Arg.String (fun s -> save_dump := Some s; load_dump := Some s),
" specify persistency file for loading and saving";
"--quiet", Arg.Set Cduce.quiet,
" suppress normal output (typing, results)";
"--expat", Arg.Unit (fun () -> Load_xml.use_parser := `Expat),
" use expat instead of PXP to parse XML documents";
"-v", Arg.Unit version,
" print CDuce version";
"--version", Arg.Unit version,
"print CDuce version";
"--license", Arg.Unit (fun () ->
Printf.eprintf "\n\
The CDuce interpreter is distributed under the terms of the Q Public \n\
License version 1.0 (included in the sources). The Choice of Law section\n\
been modified from the original Q Public.\n\n
"; exit 0),
"print CDuce license";
"print CDuce version";
"--license", Arg.Unit license,
"print CDuce license";
"--arg", Arg.Rest (fun s -> args := s :: !args),
" the arguments that follow are passed to the CDuce program (in argv)";
" following arguments are passed to the CDuce program (in argv)";
]
let specs =
if Load_xml.expat_support then
("--expat", Arg.Unit (fun () -> Load_xml.use_parser := `Expat),
" use expat parser (default)") ::
("--pxp", Arg.Unit (fun () -> Load_xml.use_parser := `Pxp),
" use PXPt parser") ::
specs
else
specs
let () =
Arg.parse specs (fun s -> src := s :: !src)
"\nUsage:\ncduce [OPTIONS ...] [FILE ...] [--arg argument ...]\n\nOptions:"
......@@ -115,7 +134,7 @@ let do_file s =
let main () =
(match !dump with
(match !load_dump with
| Some f ->
(try
Format.fprintf ppf "Restoring state: ";
......@@ -135,7 +154,7 @@ let main () =
(match !src with
| [] -> toploop ()
| l -> List.iter do_file l);
(match !dump with
(match !save_dump with
| Some f ->
Format.fprintf ppf "Saving state ...@\n";
let s = State.get () in
......
name="expat"
version="0.0.3"
description="Expat XML parser"
requires=""
archive(byte)="expat.cma"
archive(native)="expat.cmxa"
linkopts = ""
EXPAT_LIB=-L/usr/lib -lexpat
EXPAT_INC=-I /usr/include
-include Makefile.conf
libmlexpat.a: expat_stubs.o expat.cmo expat.cmx
ocamlmklib -verbose -o mlexpat $^ $(EXPAT_LIB)
OCAMLMAKEFILE = OCamlMakefile
expat.cmo: expat.cmi expat.ml
ocamlc -c expat.ml
SOURCES = expat_stubs.c expat.mli expat.ml
CFLAGS = -O2 -fPIC -DPIC -DFULL_UNROLL -O2
CLIBS = expat
RESULT = expat
expat.cmx: expat.cmi expat.ml
ocamlopt -c expat.ml
all: byte-code-library native-code-library
expat.cmi: expat.mli
ocamlc -c expat.mli
install: libinstall
uninstall: libuninstall
expat_stubs.o: expat_stubs.c
ocamlc -c $(EXPAT_INC) expat_stubs.c
clean: nobackup clean-doc
clean:
rm -f *.o *.a *.so *.cmo *.cmx *.cmi *.cma *.cmxa
-include $(OCAMLMAKEFILE)
# export STATIC = yes
# export INCDIRS := /usr/include
# export LIBDIRS := /usr/lib
This diff is collapsed.
This directory contains code written by Maas-Maarten Zeeman
and included here for convenience.
and included here for convenience:
Home page: http://home.wanadoo.nl/maas/ocaml/
To build support for expat, do (in CDuce root directory):
make clean; make expat
Now, when the CDuce interpreter is run with the option --expat,
it will use expat instead of PXP for load_xml.
Note: the current wrapper for expat does not support inclusion
of external entities. Moreover I encounter random segfaults...
The build process has been adapted from the PCRE-OCAML wrapper:
http://www.ai.univie.ac.at/~markus/home/ocaml_sources.html
......@@ -5,7 +5,7 @@
(* LICENCE for details. *)
(***********************************************************************)
(* $Id: expat.mli,v 1.2 2003/06/03 20:45:50 cvscast Exp $ *)
(* $Id: expat.mli,v 1.3 2003/06/08 08:33:54 cvscast Exp $ *)
(** The Ocaml Expat library provides an interface to the Expat XML Parser.
......@@ -82,7 +82,7 @@ val reset_processing_instruction_handler : expat_parser -> unit
val set_comment_handler : expat_parser -> (string -> unit) -> unit
val reset_comment_handler : expat_parser -> unit
(** {6 CData Section handler setting and resetting *)
(** {6 CData Section handler setting and resetting} *)
val set_start_cdata_handler : expat_parser -> (unit -> unit) -> unit
val reset_start_cdata_handler : expat_parser -> unit
......@@ -90,7 +90,7 @@ val reset_start_cdata_handler : expat_parser -> unit
val set_end_cdata_handler : expat_parser -> (unit -> unit) -> unit
val reset_end_cdata_handler : expat_parser -> unit
(** {6 Default Handler setting and resetting *)
(** {6 Default Handler setting and resetting} *)
val set_default_handler : expat_parser -> (string -> unit) -> unit
val reset_default_handler : expat_parser -> unit
......
(* Loading XML documents *)
let use_parser = ref `Pxp
ifdef EXPAT then
let expat_support = true
else
let expat_support = false
let use_parser = ref (if expat_support then `Expat else `Pxp)
open Pxp_yacc
open Pxp_lexer_types
......@@ -14,21 +19,21 @@ type buf =
mutable pos : int;
mutable length : int }
let create n = { buffer = String.create n; pos = 0; length = n }
let txt = { buffer = String.create 1024; pos = 0; length = 1024 }
let resize b n =
let new_len = b.length * 2 + n in
let resize n =
let new_len = txt.length * 2 + n in
let new_buf = String.create new_len in
String.unsafe_blit b.buffer 0 new_buf 0 b.pos;
b.buffer <- new_buf;
b.length <- new_len
String.unsafe_blit txt.buffer 0 new_buf 0 txt.pos;
txt.buffer <- new_buf;
txt.length <- new_len
let add_string b s =
let add_string s =
let len = String.length s in
let new_pos = b.pos + len in
if new_pos > b.length then resize b len;
String.unsafe_blit s 0 b.buffer b.pos len;
b.pos <- new_pos
let new_pos = txt.pos + len in
if new_pos > txt.length then resize len;
String.unsafe_blit s 0 txt.buffer txt.pos len;
txt.pos <- new_pos
let rec only_ws s i =
(i = 0) ||
......@@ -59,8 +64,6 @@ type token =
| String of string
let stack = ref []
let txt = create 1024
let rec create_elt accu = function
| String s :: st -> create_elt (string s accu) st
......@@ -69,97 +72,98 @@ let rec create_elt accu = function
| [] -> assert false
let buflen = 1000
let buf = String.create buflen
let start_element_handler name att =
if not (only_ws txt.buffer txt.pos) then
stack := String (String.sub txt.buffer 0 txt.pos) :: !stack;
txt.pos <- 0;
stack := Start (name,att) :: !stack
let end_element_handler _ =
let accu =
if only_ws txt.buffer txt.pos
then nil
else string (String.sub txt.buffer 0 txt.pos) nil in
txt.pos <- 0;
create_elt accu !stack
ifdef EXPAT then
let load_expat s =
let load_expat =
let buflen = 1024 in
let buf = String.create buflen in
fun s ->
let ic =
try open_in s
with exn ->
let msg =
Printf.sprintf "load_xml, file \"%s\": %s" s (Printexc.to_string exn)
in
raise (Location.Generic msg)
in
let p = Expat.parser_create "" in
Expat.set_start_element_handler p
(fun name att ->
if not (only_ws txt.buffer txt.pos) then
stack := String (String.sub txt.buffer 0 txt.pos) :: !stack;
txt.pos <- 0;
stack := Start (name,att) :: !stack);
Expat.set_end_element_handler p
(fun _ ->
let accu =
if only_ws txt.buffer txt.pos
then nil
else string (String.sub txt.buffer 0 txt.pos) nil in
txt.pos <- 0;
create_elt accu !stack);
Expat.set_character_data_handler p (add_string txt);
(* Gc.full_major (); *)
(* Gc.compact (); *)
let ic = open_in s in
Expat.set_start_element_handler p start_element_handler;
Expat.set_end_element_handler p end_element_handler;
Expat.set_character_data_handler p add_string;
let rec loop () =
let n = input ic buf 0 buflen in
if (n > 0) then
(*(Expat.parse p (String.sub buf 0 n); loop ()) *)
(Expat.parse_sub p buf 0 n; loop ())
if (n > 0) then (Expat.parse_sub p buf 0 n; loop ())
in
try
loop();
Expat.final p;
close_in ic;
match !stack with
| [ Element x ] -> stack := []; x
| _ -> assert false
with
Expat.Expat_error e ->
failwith ("Expat ("^s^"):"^Expat.xml_error_to_string e)
close_in ic;
let line = Expat.get_current_line_number p
and col = Expat.get_current_column_number p in
let msg =
Printf.sprintf
"load_xml, file \"%s\", at line %i, column %i: %s"
s
(Expat.get_current_line_number p)
(Expat.get_current_column_number p)
(Expat.xml_error_to_string e)
in
raise (Location.Generic msg)
else
let load_expat s =
failwith "Expat support not included"
let handle_event = function
| E_start_tag (name,att,_) ->
if not (only_ws txt.buffer txt.pos) then
stack := String (String.sub txt.buffer 0 txt.pos) :: !stack;
txt.pos <- 0;
stack := Start (name,att) :: !stack
| E_char_data data ->
add_string txt data
| E_end_tag (_,_) ->
let accu =
if only_ws txt.buffer txt.pos
then nil
else string (String.sub txt.buffer 0 txt.pos) nil in
txt.pos <- 0;
create_elt accu !stack
let pxp_handle_event = function
| E_start_tag (name,att,_) -> start_element_handler name att
| E_char_data data -> add_string data
| E_end_tag (_,_) -> end_element_handler ()
| _ -> ()
let load_pxp s =
let config = { default_config with
(* warner = new warner; *)
encoding = `Enc_utf8;
store_element_positions = false;
drop_ignorable_whitespace = true
}
in
let mgr = create_entity_manager config (from_file s) in
process_entity config (`Entry_document[]) mgr handle_event;
match !stack with
| [ Element x ] -> stack := []; x
| _ -> assert false
let pxp_config =
{ default_config with
(* warner = new warner; *)
encoding = `Enc_utf8;
store_element_positions = false;
drop_ignorable_whitespace = true
}
let load_xml_aux s =
match !use_parser with
| `Expat -> load_expat s
| `Pxp -> load_pxp s
let load_pxp s =
try
let mgr = create_entity_manager pxp_config (from_file s) in
process_entity pxp_config (`Entry_document[]) mgr pxp_handle_event;
with exn ->
raise (Location.Generic (Pxp_types.string_of_exn exn))
let load_xml s =
Location.protect_op "load_xml";
try load_xml_aux s
with exn ->
raise
(Location.Generic (Pxp_types.string_of_exn exn))
try
(match !use_parser with
| `Expat -> load_expat s
| `Pxp -> load_pxp s);
match !stack with
| [ Element x ] -> stack := []; x
| _ -> assert false
with e -> stack := []; txt.pos <-0; raise e
let load_html s =
......
val use_parser: [ `Expat | `Pxp ] ref
val expat_support : bool
val load_xml: string -> Value.t
......
......@@ -28,16 +28,25 @@ String* ]</code>, which means sequence of character strings). </li>
results). This option is normally used when the CDuce interpreter is used in
the context of batch processing. </li>
<li> The option <code>--dump %%filename%%</code> allows persistence
between several invocations of the interpreter: the current
environment (defined types and values) is saved to the file when the
interpreter terminates and it is restored if the interpreter is
started again with the same option and file name. Note that only the arguments after
<li> The options <code>--save %%filename%%</code> and
<code>--load %%filename%%</code> allows persistence
between several invocations of the interpreter, by saving and
restoring the current environment (defined types and values) to a
file. Note that only the arguments after
<code>--arg</code> on the first invocation in a session are passed to the
CDuce program(s). </li>
CDuce program(s).
</li>
<li> The option <code>--dump %%filename%%</code> is equivlant
to <code>--load %%filename%% --save %%filename%%</code>. </li>
<li> The options <code>-v</code> and <code>--version</code> make the
interpreter print its version number and exit immediately.
</li>
<li> The options <code>-v</code> and <code>--version</code> make the interpreter print its
version number and exit immediately.</li>
<li> The options <code>--pxp</code> and <code>--expat</code> allow
to choose the parser for XML files (available only when CDuce
is built with expat support; default is <code>--expat</code>).</li>
<li>All the other arguments on the command line are considered CDuce
scripts, which are executed successively.</li>
......@@ -64,11 +73,11 @@ type S = <b>[ T ]
]]></sample>
</li>
<li>Function declarations <code>let fun %%f%% %%...%%</code>.
<li>Function declarations <code>let %%f%% %%...%%</code>.
Adjacent function declarations are mutually recursives, e.g.:
<sample><![CDATA[
let fun f (x : Int) : Int = g x
let fun g (x : Int) : Int = x + 1
let f (x : Int) : Int = g x
let g (x : Int) : Int = x + 1
]]></sample>
</li>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment