Commit e4614310 authored by Pietro Abate's avatar Pietro Abate

[r2003-06-12 13:15:56 by cvscast] Merging schema branch

Original author: cvscast
Date: 2003-06-12 13:16:00+00:00
parent 2da3d5a2
Makefile.conf
cduce
cduce.opt
dtd2cduce
webiface.opt
webiface
validate
evaluator
cdo2ml
mlcduce_wrapper
*.cmi
*.cmo
*.cmx
*.cma
*.cmxa
META
configure.log
include Makefile.conf
VERSION = 0.0.91
PACKAGES = -package "pxp-engine pxp-lex-iso88591 wlexing camlp4 num cgi"
PACKAGES = -package "pxp-engine pxp-lex-iso88591 wlexing camlp4 num cgi pcre netstring"
ifeq ($(PXP_WLEX), true)
PACKAGES += -package pxp-wlex-utf8
else
......@@ -32,7 +32,7 @@ ifeq ($(PROFILE), true)
ifeq ($(NATIVE), false)
CAMLC_P = ocamlcp -p a
SYNTAX_PARSER =
endif
endif
else
CAMLOPT_P = ocamlopt -inline 25
endif
......@@ -46,14 +46,15 @@ ifeq ($(NATIVE), true)
LINK = $(CAMLOPT) -linkpkg gramlib.cmxa
else
EXTENSION = cmo
LINK = $(CAMLC) -custom -linkpkg gramlib.cma mlexpat.cma
LINK = $(CAMLC) -custom -linkpkg gramlib.cma
endif
all: cduce dtd2cduce local_website
# all: cduce dtd2cduce local_website
all: cduce dtd2cduce local_website validate
# Source directories
DIRS = misc parser typing types runtime driver
DIRS = misc parser schema typing types runtime driver
CLEAN_DIRS = $(DIRS) tools tests
# Objects to build
......@@ -65,13 +66,18 @@ OBJECTS = \
types/sortedList.cmo types/boolean.cmo types/ident.cmo \
types/intervals.cmo types/chars.cmo types/atoms.cmo types/normal.cmo \
types/types.cmo types/patterns.cmo types/sequence.cmo \
types/sample.cmo \
types/sample.cmo types/builtin_defs.cmo \
\
runtime/value.cmo \
\
schema/schema_types.cmo schema/schema_xml.cmo schema/schema_builtin.cmo \
schema/schema_validator.cmo schema/schema_parser.cmo \
\
parser/location.cmo parser/wlexer.cmo parser/ast.cmo parser/parser.cmo \
\
typing/typed.cmo typing/typer.cmo \
\
runtime/value.cmo runtime/load_xml.cmo runtime/run_dispatch.cmo \
runtime/load_xml.cmo runtime/run_dispatch.cmo \
runtime/print_xml.cmo runtime/eval.cmo \
\
types/builtin.cmo driver/cduce.cmo
......@@ -98,6 +104,9 @@ webiface: $(WEBIFACE:.cmo=.$(EXTENSION))
dtd2cduce: $(DTD2CDUCE:.cmo=.$(EXTENSION))
$(LINK) $(INCLUDES) -o $@ $^
validate: $(OBJECTS:.cmo=.$(EXTENSION)) tools/validate.ml
$(LINK) $(INCLUDES) -o $@ $^
.PHONY: compute_depend
compute_depend: misc/q_symbol.cmo
@echo "Computing dependencies ..."
......@@ -115,7 +124,7 @@ clean:
rm -f `find . -name "*~"`
rm -f *.cmi *.cmo *.cma *.cmx *.a *.cmxa *.o *~
rm -f cduce ocamlprof.dump
rm -f dtd2cduce pool webiface
rm -f dtd2cduce pool webiface validate
rm -Rf prepro package
rm -f web/www/*.php web/www/*.html web/*~
......
This diff is collapsed.
......@@ -165,6 +165,9 @@ let rec phrases ppf phs = match phs with
phrases ppf (collect_funs ppf [] phs)
| { descr = Ast.TypeDecl (_,_) } :: _ ->
phrases ppf (collect_types ppf [] phs)
| { descr = Ast.SchemaDecl (name, schema) } :: rest ->
Typer.register_schema name schema;
phrases ppf rest
| { descr = Ast.EvalStatement e } :: rest ->
let (fv,e) = Typer.expr e in
let t = Typer.type_check !typing_env e Types.any true in
......
......@@ -5,7 +5,7 @@
# For updates see:
# http://www.oefai.at/~markus/ocaml_sources
#
# $Id: OCamlMakefile,v 1.1 2003/06/08 08:33:53 cvscast Exp $
# $Id: OCamlMakefile,v 1.2 2003/06/12 13:15:56 cvscast Exp $
#
###########################################################################
......
......@@ -5,7 +5,7 @@
(* LICENCE for details. *)
(***********************************************************************)
(* $Id: expat.ml,v 1.2 2003/06/03 20:45:50 cvscast Exp $ *)
(* $Id: expat.ml,v 1.3 2003/06/12 13:15:57 cvscast Exp $ *)
type expat_parser
......
......@@ -5,7 +5,7 @@
(* LICENCE for details. *)
(***********************************************************************)
(* $Id: expat.mli,v 1.3 2003/06/08 08:33:54 cvscast Exp $ *)
(* $Id: expat.mli,v 1.4 2003/06/12 13:15:57 cvscast Exp $ *)
(** The Ocaml Expat library provides an interface to the Expat XML Parser.
......
......@@ -5,7 +5,7 @@
/* LICENCE for details. */
/***********************************************************************/
/* $Id: expat_stubs.c,v 1.3 2003/06/03 20:45:50 cvscast Exp $ */
/* $Id: expat_stubs.c,v 1.4 2003/06/12 13:15:57 cvscast Exp $ */
/* Stub code to interface Ocaml with Expat */
......
......@@ -3,11 +3,14 @@
open Location
open Ident
type schema_item_kind = [ `Type | `Element | `Attribute | `Any ]
type pprog = pmodule_item list
and pmodule_item = pmodule_item' located
and pmodule_item' =
| TypeDecl of string * ppat
| SchemaDecl of string * Schema_types.schema (* name, schema *)
| LetDecl of ppat * pexpr
| FunDecl of pexpr
| EvalStatement of pexpr
......@@ -48,7 +51,8 @@ and pexpr =
| Map of pexpr * branches
| Transform of pexpr * branches
| Xtrans of pexpr * branches
| Dot of pexpr* label
| Validate of pexpr * string * string (* exp, schema name, element name *)
| Dot of pexpr * label
| RemoveField of pexpr * label
(* Exceptions *)
......@@ -67,6 +71,8 @@ and branches = (ppat * pexpr) list
and ppat = ppat' located
and ppat' =
| PatVar of string
| SchemaVar of (* type/pattern schema variable *)
schema_item_kind * string * string
| Recurs of ppat * (string * ppat) list
| Internal of Types.descr
| Or of ppat * ppat
......@@ -80,6 +86,7 @@ and ppat' =
| Capture of id
| Constant of id * Types.const
| Regexp of regexp * ppat
(* regular expression, continuation: [ re ; cont ], e.g: [ re ; nil ] *)
and regexp =
| Epsilon
......@@ -89,3 +96,16 @@ and regexp =
| Star of regexp
| WeakStar of regexp
| SeqCapture of id * regexp
open Printf
(*
let rec string_of_regexp = function
| Epsilon -> "e"
| Elem _ -> "ELEM"
| Seq (re1, re2) -> sprintf "(%s),(%s)" (string_of_regexp re1) (string_of_regexp re2)
| Alt (re1, re2) -> sprintf "(%s)|(%s)" (string_of_regexp re1) (string_of_regexp re2)
| Star re -> sprintf "(%s)*" (string_of_regexp re)
| WeakStar _ -> assert false
| SeqCapture _ -> assert false
*)
open Location
open Ast
open Ident
open Printf
(*
let () = Grammar.error_verbose := true
......@@ -91,7 +92,6 @@ let char_list loc s =
let s = seq_of_string loc s in
List.map (fun (loc,c) -> exp loc (Cst (Types.Char (Chars.mk_int c)))) s
let include_stack = ref []
let protect_exn f g =
......@@ -107,6 +107,7 @@ let is_fun_decl =
| _ -> raise Stream.Failure
)
let dot_RE = Pcre.regexp "\\."
EXTEND
GLOBAL: top_phrases prog expr pat regexp const;
......@@ -127,6 +128,10 @@ EXTEND
[ mk loc (EvalStatement (exp loc (Match (e1,[p,e2])))) ]
| "type"; x = UIDENT; "="; t = pat -> [ mk loc (TypeDecl (x,t)) ]
| "type"; x = LIDENT -> error loc "Type identifiers must be capitalized"
| "schema"; name = UIDENT; "="; uri = STRING2 ->
let schema_doc = Schema_xml.pxp_tree_of (get_string uri) in
let schema = Schema_parser.parse_schema schema_doc in
[ mk loc (SchemaDecl (name, schema))]
| "debug"; d = debug_directive -> [ mk loc (Debug d) ]
| DIRECTIVE "#quit" -> [ mk loc (Directive `Quit) ]
| DIRECTIVE "#env" -> [ mk loc (Directive `Env) ]
......@@ -176,7 +181,7 @@ EXTEND
| "if" | "then" | "else"
| "transform" | "fun" | "in"
| "let" | "type" | "debug" | "include"
| "and"
| "and" | "validate" | "schema"
]
-> a
]
......@@ -198,6 +203,9 @@ EXTEND
exp loc (Match (e, [p1,e1; p2,e2]))
| "transform"; e = SELF; "with"; b = branches ->
exp loc (Transform (e,b))
| "validate"; e = SELF; "with"; schema = UIDENT; "#";
typ = [ UIDENT | LIDENT | keyword ] ->
exp loc (Validate (e, schema, typ))
| "fun"; (f,a,b) = fun_decl ->
exp loc (Abstraction { fun_name = f; fun_iface = a; fun_body = b })
| (_,p,e1) = let_binding; "in"; e2 = expr LEVEL "top"->
......@@ -400,6 +408,17 @@ EXTEND
| a = LIDENT -> mk loc (Capture (ident a))
| "("; a = LIDENT; ":="; c = const; ")" ->
mk loc (Constant (ident a,c))
| schema = UIDENT; "#"; typ = [ UIDENT | LIDENT | keyword ];
k = OPT [ "as"; k = [ "element" | "type" | "attribute" ] -> k ] ->
let kind =
match k with
| None -> `Any
| Some "element" -> `Element
| Some "type" -> `Type
| Some "attribute" -> `Attribute
| _ -> assert false
in
mk loc (SchemaVar (kind, schema, typ))
| a = UIDENT -> mk loc (PatVar a)
| i = INT ; "--"; j = INT ->
let i = Intervals.mk i
......
......@@ -100,7 +100,7 @@ rule token = parse
| [ "<>=.,:;+-*/@&{}[]()|?`!" ]
| "->" | "::" | ";;" | "--" | ":=" | "\\" | "++"
| "{|" | "|}" | "<=" | ">=" | "<<" | ">>"
| ["?+*"] "?"
| ["?+*"] "?" | "#"
{ "",Lexing.lexeme lexbuf }
| "#" lowercase+ { "DIRECTIVE",Lexing.lexeme lexbuf }
| '"' | "'"
......
......@@ -16,6 +16,7 @@ let get_accu = function
(* Evaluation of expressions *)
let rec eval env e0 = match e0.Typed.exp_descr with
| Typed.Forget (e,_) -> eval env e
| Typed.Var s -> (match Env.find s env with Value.Delayed x -> !x | x -> x)
......@@ -35,7 +36,10 @@ let rec eval env e0 = match e0.Typed.exp_descr with
| Typed.RemoveField (e, l) -> eval_remove_field l (eval env e)
| Typed.UnaryOp (o,e) -> o.Typed.un_op_eval (eval env e)
| Typed.BinaryOp (o,e1,e2) -> o.Typed.bin_op_eval (eval env e1) (eval env e2)
| Typed.Validate (e, schema, name) ->
let validator = Typer.get_schema_validator (schema, name) in
Schema_validator.validate ~validator
(Schema_xml.pxp_stream_of_value (eval env e))
and eval_try env arg brs =
try eval env arg
......@@ -62,7 +66,6 @@ and eval_abstraction env a =
self := a;
a
and eval_apply f arg = match f with
| Abstraction (_,clos) -> clos arg
| _ -> assert false
......
......@@ -25,6 +25,17 @@ let vtrue = Atom (Atoms.mk_ascii "true")
let vfalse = Atom (Atoms.mk_ascii "false")
let vbool x = if x then vtrue else vfalse
let vrecord l =
let l = List.map (fun (l,v) -> LabelPool.mk (U.mk l), v) l in
Record (LabelMap.from_list (fun _ _ -> assert false) l)
let get_fields = function
| Record map ->
LabelMap.mapi_to_list
(fun k v -> Utf8.to_string (LabelPool.value k), v)
map
| _ -> assert false
let rec sequence = function
| [] -> nil
| h::t -> Pair (h, sequence t)
......@@ -81,6 +92,9 @@ let get_string_utf8 e =
Buffer.clear buf;
(Utf8.mk s, q)
let get_int = function
| Integer i when Intervals.is_int i -> Intervals.get_int i
| _ -> raise (Invalid_argument "Value.get_int")
let rec is_seq = function
| Pair (_, y) when is_seq y -> true
......@@ -155,10 +169,16 @@ and print_seq ppf = function
| _ -> ()
and print_xml ppf tag attr content =
Format.fprintf ppf "@[<hv2><%a%a>[@ %a@]]"
print_tag tag
print_attr attr
print_seq content
if is_seq content then
Format.fprintf ppf "@[<hv2><%a%a>[@ %a@]]"
print_tag tag
print_attr attr
print_seq content
else
Format.fprintf ppf "@[<hv2><%a%a>@ %a@]"
print_tag tag
print_attr attr
print content
and print_tag ppf = function
| Atom tag -> Utf8.print ppf (Atoms.value tag)
| tag -> Format.fprintf ppf "(%a)" print tag
......@@ -174,6 +194,13 @@ and print_record ppf = function
and print_field ppf (l,v) =
Format.fprintf ppf "%a=%a" U.print (LabelPool.value l) print v
let explode_rev s =
let rec aux acc = function
| v when v = nil -> acc
| Pair (v, seq) -> aux (v::acc) seq
| v -> [v]
in
aux [] s
let normalize_string_latin1 i j s q =
if i = j then q else
......
......@@ -36,7 +36,11 @@ val vtrue : t
val vfalse : t
val vbool : bool -> t
(** @return a Record value from an associative list of fields *)
val vrecord : (string * t) list -> t
val sequence : t list -> t
val explode_rev : t -> t list (* tail recursive *)
val concat : t -> t -> t
val flatten : t -> t
......@@ -44,4 +48,9 @@ val get_string_latin1 : t -> string
val get_string_utf8 : t -> U.t * t
val is_str : t -> bool
val get_int : t -> int
(** @return an associative list of fields from a Record value *)
val get_fields : t -> (string * t) list
val compare : t -> t -> int
......@@ -6,11 +6,12 @@ LOG="$ROOT/regtest.log"
if [ -f "$ROOT/$1.xsd" ]; then
$VALIDATE $ROOT/$1.xsd $ROOT/$1.xml
else
> $LOG
touch $LOG
date | tee $LOG
for f in $ROOT/*.xsd; do
echo -n "$(basename $f) ... " | tee -a $LOG
if ($VALIDATE $f $(echo "$f" | sed 's/xsd$/xml/') &> /dev/null); then
echo -n "`basename $f` ... " | tee -a $LOG
if ($VALIDATE $f `echo "$f" | sed 's/xsd$/xml/'` > /dev/null 2>&1);
then
echo "OK" | tee -a $LOG
else
echo "FAILURE" | tee -a $LOG
......
......@@ -4,7 +4,6 @@ let debug = false ;;
open Printf ;;
open Pxp_yacc ;;
open Schema_types ;;
open Schema_validator ;;
exception Stop ;; (* internal *)
......
(* some experiments with hand construction of CDuce types *)
let xml =
Types.xml'
(Types.atom (Atoms.atom (Atoms.mk_ascii "xml")))
Types.empty_closed_record
Types.Int.any
;;
let choice =
Types.choice_of_list [ Builtin_defs.bool; Builtin_defs.string; Builtin_defs.int ]
;;
let seq =
Sequence.seq_of_list [ Builtin_defs.bool; Builtin_defs.string; Builtin_defs.int ]
;;
let reco = (* closed record with two fields *)
Types.rec_of_list ~opened:false ["foo", Builtin_defs.int; "bar", Builtin_defs.bool]
;;
let opt_reco = (* closed record with two required and one optional fields *)
Types.rec_of_list' ~opened:false [
false, "foo", Builtin_defs.int;
false, "bar", Builtin_defs.bool;
true, "baz", Builtin_defs.string;
]
;;
let concat = (* TODO ... ask Alain: how to concatenate two star types? *)
let int_star = (Sequence.star Builtin_defs.bool) in
let bool_star = (Sequence.star Builtin_defs.int) in
(* Sequence.concat int_star bool_star *)
Sequence.flatten (Sequence.seq_of_list [ int_star; bool_star ])
;;
let rex =
let elem = Ast.Elem (Location.mknoloc (Ast.Internal xml)) in
let nil = Location.mknoloc (Ast.Internal Sequence.nil_type) in
let rex =
(Ast.Seq ((Ast.Seq (Ast.Star elem, elem)), (Ast.Seq (elem, Ast.Star elem))))
in
let ast_rex = Location.mknoloc (Ast.Regexp (rex, nil)) in
print_endline (Ast.string_of_regexp rex);
Types.descr (Typer.typ ast_rex)
(* Typer.typ' (Typer.real_compile (Typer.derecurs Typer.TypeEnv.empty ast_rex)) *)
;;
let string = Sequence.star Types.Char.any ;;
Types.Print.print Format.std_formatter rex;
Format.fprintf Format.std_formatter "\n"
(* XML Schema validator *)
open Printf ;;
open Pxp_document ;;
open Schema_types ;;
let die_usage () =
prerr_endline
"Usage: validate <schema_document> [ <instance_document> ... ]";
exit 1
;;
let schema_file = try Sys.argv.(1) with Invalid_argument _ -> die_usage () ;;
let main () =
let schema_doc = Schema_xml.pxp_tree_of schema_file in
let schema = Schema_parser.parse_schema schema_doc in
for i = 2 to Array.length Sys.argv - 1 do
let instance_stream = Schema_xml.pxp_stream_of_file Sys.argv.(i) in
let first_element_name =
let rec aux s =
match Stream.peek s with
| Some (Pxp_yacc.E_start_tag (name, _, _)) -> name
| _ -> Stream.junk s; aux s
in
aux instance_stream
in
(try
let first_element_decl =
(try
List.find (fun (name,_,_) -> name = first_element_name)
schema.elt_decls
with Not_found ->
raise (XSI_validation_error (sprintf "No declaration found in schema \
for element '%s'" first_element_name)))
in
let validator =
Schema_validator.validator_of_elt_decl first_element_decl
in
let value = Schema_validator.validate ~validator instance_stream in
Value.print Format.std_formatter value
with XSI_validation_error msg ->
print_endline (sprintf "Validation error on '%s': %s" Sys.argv.(i) msg);
flush stdout)
done
;;
main () ;;
(* PROVE VARIE *)
(*
Types.Print.print Format.std_formatter Builtin_defs.pos_int; Format.fprintf Format.std_formatter "\n";
Types.Print.print Format.std_formatter Builtin_defs.non_neg_int; Format.fprintf Format.std_formatter "\n";
Types.Print.print Format.std_formatter Builtin_defs.neg_int; Format.fprintf Format.std_formatter "\n";
Types.Print.print Format.std_formatter Builtin_defs.non_pos_int; Format.fprintf Format.std_formatter "\n";
*)
let pos_intstr =
Sequence.plus (Types.char (Chars.char_class
(Chars.mk_char '0')
(Chars.mk_char '9')
)
)
let neg_intstr =
Types.times
(Types.cons (Types.char (Chars.atom (Chars.mk_char '-'))))
(Types.cons pos_intstr)
let intstr = Types.cup pos_intstr neg_intstr (* [ '-'? '0'--'9'+ ] *)
let true_atom = Atoms.mk_ascii "true"
let false_atom = Atoms.mk_ascii "false"
let true_type = Types.atom (Atoms.atom true_atom)
let false_type = Types.atom (Atoms.atom false_atom)
let bool = Types.cup true_type false_type
let nil = Sequence.nil_type
let string = Sequence.string
let any = Types.any
let int = Types.Int.any
let atom = Types.atom Atoms.any
let char_latin1 = Types.char (Chars.mk_classes [ (0,255) ])
let string_latin1 = Sequence.star char_latin1
open Builtin_defs
(* Types *)
......
(*
No values exported.
Are you looking for builtin types? Then look at types/builtin_defs.mli
*)
let pos_int = Types.interval (Intervals.right (Intervals.mk "1"))
let non_neg_int = Types.interval (Intervals.right (Intervals.mk "0"))
let neg_int = Types.interval (Intervals.left (Intervals.mk "-1"))
let non_pos_int = Types.interval (Intervals.left (Intervals.mk "0"))
let pos_intstr =
Sequence.plus (Types.char (Chars.char_class
(Chars.mk_char '0')
(Chars.mk_char '9')
)
)
let neg_intstr =
Types.times
(Types.cons (Types.char (Chars.atom (Chars.mk_char '-'))))
(Types.cons pos_intstr)
let intstr = Types.cup pos_intstr neg_intstr (* [ '-'? '0'--'9'+ ] *)
let true_atom = Atoms.mk_ascii "true"
let false_atom = Atoms.mk_ascii "false"
let true_type = Types.atom (Atoms.atom true_atom)
let false_type = Types.atom (Atoms.atom false_atom)
let bool = Types.cup true_type false_type
let nil = Sequence.nil_type
let string = Sequence.string
let any = Types.any
let int = Types.Int.any
let atom = Types.atom Atoms.any
let char_latin1 = Types.char (Chars.mk_classes [ (0,255) ])
let string_latin1 = Sequence.star char_latin1
(** 1 .. Inf *)
val pos_int : Types.descr
(** 0 .. Inf *)
val non_neg_int : Types.descr
(** -Inf .. -1 *)
val neg_int : Types.descr
(** -Inf .. 0 *)
val non_pos_int : Types.descr
val intstr : Types.descr
val pos_intstr : Types.descr
val neg_intstr : Types.descr
val true_atom : Atoms.v
val false_atom : Atoms.v
val true_type : Types.descr
val false_type : Types.descr
val any : Types.descr
val atom : Types.descr
val nil : Types.descr
val bool : Types.descr
val int : Types.descr
val string : Types.descr
val char_latin1 : Types.descr
val string_latin1 : Types.descr
......@@ -10,6 +10,8 @@ let vcompare = compare_big_int
let vhash x = hash_bigint (Obj.magic x)
(* num_digits_big_int x *) (* improve this *)
let mk = big_int_of_string
let get_int = int_of_big_int
let is_int = is_int_big_int
let vadd = add_big_int
let vmult = mult_big_int
let vsub = sub_big_int
......
......@@ -2,6 +2,9 @@ type v
val print_v : Format.formatter -> v -> unit
val mk: string -> v
val is_int: v -> bool
val get_int: v -> int