Commit f5c914c3 authored by Pietro Abate's avatar Pietro Abate

[r2003-11-24 16:03:12 by szach] use Utf8 everywhere in schema support

Original author: szach
Date: 2003-11-24 16:03:14+00:00
parent 1608b0ee
......@@ -26,7 +26,7 @@ type expr =
| Transform of expr * branches
| Xtrans of expr * branches
| Try of expr * branches
| Validate of expr * schema_component_kind * string * string
| Validate of expr * schema_component_kind * U.t * U.t
| RemoveField of expr * label
| Dot of expr * label
| UnaryOp of int * expr
......
......@@ -69,7 +69,7 @@ let dump_env ppf =
Format.fprintf ppf "Namespace prefixes used for pretty-printing:@.%t"
Ns.InternalPrinter.dump;
Format.fprintf ppf "Schemas: %s@."
(String.concat " " (Typer.get_schema_names ()));
(String.concat " " (List.map U.get_str (Typer.get_schema_names ())));
Format.fprintf ppf "Values:@.";
Typer.iter_values !typing_env
(fun x t -> dump_value ppf x t (get_global_value x))
......
......@@ -8,7 +8,7 @@ type pprog = pmodule_item list
and pmodule_item = pmodule_item' located
and pmodule_item' =
| TypeDecl of id * ppat
| SchemaDecl of string * Schema_types.schema (* name, schema *)
| SchemaDecl of U.t * Schema_types.schema (* name, schema *)
| LetDecl of ppat * pexpr
| FunDecl of pexpr
| Namespace of U.t * Ns.t
......@@ -30,9 +30,9 @@ and toplevel_directive =
| `Reinit_ns
| `Help
| `Dump of pexpr
| `Print_schema of string
| `Print_schema_type of Schema_types.component_kind * string * string
| `Print_type of string
| `Print_schema of U.t
| `Print_schema_type of Schema_types.component_kind * U.t * U.t
| `Print_type of U.t
]
......@@ -59,7 +59,7 @@ and pexpr =
| Map of pexpr * branches
| Transform of pexpr * branches
| Xtrans of pexpr * branches
| Validate of pexpr * Schema_types.component_kind * string * string
| Validate of pexpr * Schema_types.component_kind * U.t * U.t
(* exp, schema component kind, schema name, element name *)
| Dot of pexpr * label
| RemoveField of pexpr * label
......@@ -91,7 +91,7 @@ and ppat = ppat' located
and ppat' =
| PatVar of U.t
| SchemaVar of (* type/pattern schema variable *)
Schema_types.component_kind * string * string (* kind, schema, name *)
Schema_types.component_kind * U.t * U.t (* kind, schema, name *)
| Cst of pexpr
| NsT of U.t
| Recurs of ppat * (id * ppat) list
......
......@@ -114,7 +114,7 @@ EXTEND
| "schema"; name = IDENT; "="; uri = STRING2 ->
protect_op "schema";
let schema = Schema_parser.schema_of_file uri in
[ mk loc (SchemaDecl (name, schema)) ]
[ mk loc (SchemaDecl (U.mk name, schema)) ]
| (name,ns) = namespace_binding ->
[ mk loc (Namespace (name, ns)) ]
| (name,ns) = namespace_binding; "in"; e2 = expr LEVEL "top" ->
......@@ -127,7 +127,7 @@ EXTEND
| DIRECTIVE "#quit" -> [ mk loc (Directive `Quit) ]
| DIRECTIVE "#env" -> [ mk loc (Directive `Env) ]
| DIRECTIVE "#print_schema"; name = IDENT ->
[ mk loc (Directive (`Print_schema name)) ]
[ mk loc (Directive (`Print_schema (U.mk name))) ]
| DIRECTIVE "#print_type"; name = IDENT;
schema_part = OPT [
"#"; typ = [ IDENT | keyword ];
......@@ -135,9 +135,10 @@ EXTEND
(kind, typ)
] ->
(match schema_part with
| None -> [ mk loc (Directive (`Print_type name)) ]
| None -> [ mk loc (Directive (`Print_type (U.mk name))) ]
| Some (kind, typ) ->
[ mk loc (Directive (`Print_schema_type (kind, name, typ))) ])
[ mk loc
(Directive (`Print_schema_type (kind, U.mk name, U.mk typ))) ])
| DIRECTIVE "#dump_value"; e = expr -> [ mk loc (Directive (`Dump e)) ]
| DIRECTIVE "#reinit_ns" -> [ mk loc (Directive `Reinit_ns) ]
| DIRECTIVE "#help" -> [ mk loc (Directive `Help) ]
......@@ -483,7 +484,7 @@ EXTEND
schema_ref: [
[ schema = IDENT; "#"; typ = [ IDENT | keyword ];
kind = OPT [ "as"; k = schema_kind -> k] ->
(kind, schema, typ)
(kind, U.mk schema, U.mk typ)
]
];
......@@ -508,7 +509,7 @@ EXTEND
mk loc (Constant (ident a,c))
| schema = IDENT; "#"; typ = [ IDENT | keyword ];
kind = OPT [ "as"; k = schema_kind -> k] ->
mk loc (SchemaVar (kind, schema, typ))
mk loc (SchemaVar (kind, U.mk schema, U.mk typ))
| a = IDENT ->
mk loc (PatVar (U.mk a))
| i = INT ; "--"; j = INT ->
......
This diff is collapsed.
open Encodings
(** all schema simple type names used in this API are prefixed with
* Schema_xml.xsd_prefix *)
exception Schema_builtin_error of string
exception Schema_builtin_error of Utf8.t
val is_builtin: string -> bool
val get_builtin: string -> Schema_types.simple_type_definition
val is_builtin: Utf8.t -> bool
val get_builtin: Utf8.t -> Schema_types.simple_type_definition
val iter_builtin: (Schema_types.simple_type_definition -> unit) -> unit
val cd_type_of_builtin: string -> Types.descr
val cd_type_of_builtin: Utf8.t -> Types.descr
(** @raise Schema_builtin_error [name] in case of validation error, where
(** validate_builtin <builtin> <string>
* validates <string> against builtin type <builtin> and return the resulting
* cduce value
* @raise Schema_builtin_error [name] in case of validation error, where
* [name] is the name of a schema built in type prefixed with
* Schema_xml.xsd_prefix *)
val validate_builtin: string -> string -> Value.t
val validate_builtin: Utf8.t -> Utf8.t -> Value.t
open Printf
open Encodings
open Encodings.Utf8.Pcre
open Schema_types
let no_facets = {
......@@ -29,7 +31,7 @@ let name_of_complex_type_definition = function
| _, Some name, _, _, _, _ -> name
| _ -> raise (Invalid_argument "anonymous complex type definition")
let name_of_type_definition = function
| AnyType -> "xsd:anyType"
| AnyType -> Encodings.Utf8.mk "xsd:anyType"
| Simple st -> name_of_simple_type_definition st
| Complex ct -> name_of_complex_type_definition ct
let name_of_attribute_declaration (name, _, _) = name
......@@ -63,26 +65,24 @@ let iter_attribute_groups schema f = List.iter f schema.attribute_groups
let iter_model_groups schema f = List.iter f schema.model_groups
exception XSD_validation_error of string
exception XSI_validation_error of validation_context * string
let regexp' s = Pcre.regexp ~flags:[`UTF8] s
exception XSI_validation_error of string
let rec normalize_white_space =
let ws_RE = regexp' "[\t\r\n]" in
let spaces_RE = regexp' "[ ]+" in
let margins_RE = regexp' "^ (.*) $" in
let ws_RE = pcre_regexp "[\t\r\n]" in
let spaces_RE = pcre_regexp "[ ]+" in
let margins_RE = pcre_regexp "^ (.*) $" in
fun handling s ->
match handling with
| `Preserve -> s
| `Replace -> Pcre.replace ~rex:ws_RE ~templ:" " s
| `Replace -> pcre_replace ~rex:ws_RE ~templ:(Utf8.mk " ") s
| `Collapse ->
let s' =
Pcre.replace ~rex:spaces_RE ~templ:" "
pcre_replace ~rex:spaces_RE ~templ:(Utf8.mk " ")
(normalize_white_space `Replace s)
in
Pcre.replace ~rex:margins_RE ~templ:"$1" s'
pcre_replace ~rex:margins_RE ~templ:(Utf8.mk "$1") s'
let anySimpleType = Primitive "xsd:anySimpleType"
let anySimpleType = Primitive (Encodings.Utf8.mk "xsd:anySimpleType")
let anyType = AnyType
let get_interval facets =
......@@ -113,22 +113,27 @@ let get_interval facets =
| None, None -> Intervals.any
let print_simple_type fmt = function
| Primitive name -> Format.fprintf fmt "%s" name
| Derived (Some name, _, _, _) -> Format.fprintf fmt "%s'" name
| Primitive name -> Format.fprintf fmt "%a" Encodings.Utf8.dump name
| Derived (Some name, _, _, _) ->
Format.fprintf fmt "%a'" Encodings.Utf8.dump name
| Derived (None, _, _, _) -> Format.fprintf fmt "unnamed'"
let print_complex_type fmt = function
| (id, Some name, _, _, _, _) -> Format.fprintf fmt "%d:%s" id name
| (id, Some name, _, _, _, _) ->
Format.fprintf fmt "%d:%a" id Encodings.Utf8.dump name
| (id, None, _, _, _, _) -> Format.fprintf fmt "%d:unnamed'" id
let print_type fmt = function
| AnyType -> Format.fprintf fmt "xsd:anyType"
| Simple t -> Format.fprintf fmt "S:%a" print_simple_type t
| Complex t -> Format.fprintf fmt "C:%a" print_complex_type t
let print_attribute fmt (name, t, _) =
Format.fprintf fmt "@@%s:%a" name print_simple_type t
let print_element fmt (id, name, _, _) = Format.fprintf fmt "E:%d:<%s>" id name
Format.fprintf fmt "@@%a:%a" Utf8.dump name print_simple_type t
let print_element fmt (id, name, _, _) =
Format.fprintf fmt "E:%d:<%a>" id Utf8.dump name
let print_attributes fmt = List.iter (Format.fprintf fmt "%a" print_attribute)
let print_attribute_group fmt (name, _) = Format.fprintf fmt "{agroup:%s}" name
let print_model_group fmt (name, _) = Format.fprintf fmt "{mgroup:%s}" name
let print_attribute_group fmt (name, _) =
Format.fprintf fmt "{agroup:%a}" Utf8.dump name
let print_model_group fmt (name, _) =
Format.fprintf fmt "{mgroup:%a}" Utf8.dump name
let print_schema fmt schema =
let defined_types = (* filter out built-in types *)
List.filter
......@@ -277,4 +282,85 @@ let string_of_component_kind (kind: component_kind) =
| Some `Model_group -> "model group"
| None -> "component"
(** Events *)
type to_be_visited =
| Fully of Value.t (* xml values still to be visited *)
| Half of Value.t (* xml values half visited (i.e. E_start_tag generated) *)
| Other of Encodings.Utf8.t (* other values *)
| Backlog of event (* old events not yet delivered *)
let stream_of_value v =
let stack = ref [Fully v] in
let f _ = (* lazy visit of a tree of CDuce XML values, stack keeps track of
what is still to be visited *)
match !stack with
| (Fully ((Value.Xml (Value.Atom atom, attrs, _)) as v)) :: tl ->
stack := (Half v) :: tl;
let children = ref [] in (* TODO inefficient *)
let push v s = (s := v :: !s) in
Value.iter_xml
(fun pcdata -> push (Other pcdata) children)
(fun v ->
match v with
| (Value.Xml (_, _, _)) as v -> push (Fully v) children
| v -> raise (Invalid_argument "Schema_events.stream_of_value"))
v;
stack := (List.rev !children) @ !stack;
List.iter (* push attributes as events on the stack *)
(fun (qname, v) ->
push (Backlog (E_attribute (qname, fst (Value.get_string_utf8 v))))
stack)
(Value.get_fields attrs);
Some (E_start_tag (Atoms.V.value atom))
| (Half (Value.Xml (Value.Atom atom, _, _))) :: tl ->
stack := tl;
Some (E_end_tag (Atoms.V.value atom))
| (Fully (Value.Xml (_, _, _)))::_ | (Half (Value.Xml (_, _, _)))::_ ->
failwith "Schema_xml.pxp_stream_of_value: non-atom-tag xml value"
| (Backlog ev) :: tl -> (* consume backlog *)
stack := tl;
Some ev
| (Other v) :: tl ->
stack := tl;
Some (E_char_data v)
| [] -> None
| _ -> assert false
in
Stream.from f
let string_of_event = function
| E_start_tag qname -> sprintf "<%s>" (Ns.QName.to_string qname)
| E_end_tag qname -> sprintf "</%s>" (Ns.QName.to_string qname)
| E_attribute (qname, value) ->
sprintf "@%s=%s" (Ns.QName.to_string qname) (Utf8.to_string value)
| E_char_data value -> Utf8.to_string value
(*
let test v =
let s = stream_of_value v in
let rec aux () =
(match Stream.peek s with
| None -> ()
| Some (E_start_tag qname) ->
Ns.QName.print Format.std_formatter qname
| Some (E_end_tag qname) ->
Format.fprintf Format.std_formatter "/";
Ns.QName.print Format.std_formatter qname
| Some (E_attribute (qname, value)) ->
Format.fprintf Format.std_formatter "@@";
Ns.QName.print Format.std_formatter qname;
Format.fprintf Format.std_formatter " ";
Encodings.Utf8.print Format.std_formatter value
| Some (E_char_data value) ->
Encodings.Utf8.print Format.std_formatter value);
Format.fprintf Format.std_formatter "\n";
(match Stream.peek s with
| None -> ()
| _ ->
Stream.junk s;
aux ())
in
aux ()
*)
(** Schema common functionalities depending only on Schema_types *)
open Encodings
open Schema_types
(** {2 Exceptions} *)
exception XSD_validation_error of string
exception XSI_validation_error of validation_context * string
exception XSI_validation_error of string
(** {2 XSD printer *)
......@@ -22,15 +23,15 @@ val print_complex_type : Format.formatter -> complex_type_definition -> unit
(** {2 Deconstruction functions} *)
val name_of_element_declaration : element_declaration -> string
val name_of_type_definition : type_definition -> string
val name_of_simple_type_definition : simple_type_definition -> string
val name_of_complex_type_definition : complex_type_definition -> string
val name_of_attribute_declaration : attribute_declaration -> string
val name_of_attribute_use : attribute_use -> string
val name_of_attribute_group_definition : attribute_group_definition -> string
val name_of_model_group_definition : model_group_definition -> string
val name_of_particle : particle -> string
val name_of_element_declaration : element_declaration -> Utf8.t
val name_of_type_definition : type_definition -> Utf8.t
val name_of_simple_type_definition : simple_type_definition -> Utf8.t
val name_of_complex_type_definition : complex_type_definition -> Utf8.t
val name_of_attribute_declaration : attribute_declaration -> Utf8.t
val name_of_attribute_use : attribute_use -> Utf8.t
val name_of_attribute_group_definition : attribute_group_definition -> Utf8.t
val name_of_model_group_definition : model_group_definition -> Utf8.t
val name_of_particle : particle -> Utf8.t
val string_of_component_kind : component_kind -> string
......@@ -40,13 +41,13 @@ val simple_type_of_type : type_definition -> simple_type_definition
val complex_type_of_type : type_definition -> complex_type_definition
val content_type_of_type : type_definition -> content_type
val get_type: string -> schema -> type_definition
val get_attribute: string -> schema -> attribute_declaration
val get_element: string -> schema -> element_declaration
val get_attribute_group: string -> schema -> attribute_group_definition
val get_model_group: string -> schema -> model_group_definition
val get_type: Utf8.t -> schema -> type_definition
val get_attribute: Utf8.t -> schema -> attribute_declaration
val get_element: Utf8.t -> schema -> element_declaration
val get_attribute_group: Utf8.t -> schema -> attribute_group_definition
val get_model_group: Utf8.t -> schema -> model_group_definition
val get_component: component_kind -> string -> schema -> component
val get_component: component_kind -> Utf8.t -> schema -> component
val iter_types: schema -> (type_definition -> unit) -> unit
val iter_attributes: schema -> (attribute_declaration -> unit) -> unit
......@@ -61,7 +62,7 @@ val merge_facets: facets -> facets -> facets
(** restrict base new_facets new_name
* Implements simple type derivition by restriction *)
val restrict: simple_type_definition -> facets -> string option ->
val restrict: simple_type_definition -> facets -> Utf8.t option ->
simple_type_definition
(** {2 Miscellaneous} *)
......@@ -77,5 +78,10 @@ val anyType: type_definition
val get_interval: facets -> Intervals.t
(** perform white space normalization according to XML recommendation *)
val normalize_white_space: white_space_handling -> string -> string
val normalize_white_space: white_space_handling -> Utf8.t -> Utf8.t
(** event interface on top of CDuce values *)
val stream_of_value: Value.t -> event Stream.t
val string_of_event: event -> string
This diff is collapsed.
......@@ -11,6 +11,8 @@
PSVI Post Schema Validation Infoset
*)
open Encodings
(** {2 XSD representation} *)
type xs_nonNegativeInteger = Intervals.V.t (* = Big_int.big_int *)
......@@ -38,9 +40,9 @@ type facets = {
}
type simple_type_definition =
| Primitive of string
| Primitive of Utf8.t
| Derived of
string option * (* name *)
Utf8.t option * (* name *)
variety *
facets *
simple_type_definition (* base *)
......@@ -52,7 +54,7 @@ and variety =
| Union of simple_type_definition list
type attribute_declaration =
string * (* name *)
Utf8.t * (* name *)
simple_type_definition * (* type *)
value_constraint option
......@@ -84,13 +86,13 @@ and particle =
and element_declaration =
int * (* unique id *)
string * (* name *)
Utf8.t * (* name *)
type_definition ref * (* type *)
value_constraint option
and complex_type_definition =
int * (* unique id *)
string option * (* name *)
Utf8.t option * (* name *)
type_definition * (* base *)
derivation_type *
attribute_use list *
......@@ -102,11 +104,11 @@ and type_definition =
| Complex of complex_type_definition
type model_group_definition =
string * (* name *)
Utf8.t * (* name *)
model_group
type attribute_group_definition =
string * (* name *)
Utf8.t * (* name *)
attribute_use list
type schema = {
......@@ -118,6 +120,14 @@ type schema = {
model_groups: model_group_definition list;
}
(** {2 Events} see Schema_events module *)
type event =
| E_start_tag of Ns.qname
| E_end_tag of Ns.qname
| E_attribute of Ns.qname * Encodings.Utf8.t (* qualified name, value *)
| E_char_data of Encodings.Utf8.t
(** {2 Misc} *)
(* kind of a schema component *)
......@@ -131,8 +141,3 @@ type component =
| Attribute_group of attribute_group_definition
| Model_group of model_group_definition
type validation_context = {
path: int list;
mixed: bool;
}
This diff is collapsed.
......@@ -5,12 +5,13 @@ open Schema_types
(** {2 per schema component kind validators} *)
(** CDuce domain: any values
*
* - when type_definition is AnyType, validate_type is the identity function
* (CDuce domain: any values)
* - when type_definition is a simple type definition, validate_type act as
* validate_simple_type below (CDuce domain: strings)
* - when type_definition is a complex type_definition, validate_type check
* that a given XML value has the given type (CDuce domain: XML values)
* that a given XML value has the given type ignoring tag name (CDuce domain:
* XML values)
*)
val validate_type : type_definition -> Value.t -> Value.t
......
......@@ -4,29 +4,25 @@ open Pxp_ev_parser
open Pxp_tree_parser
open Pxp_types
open Encodings
open Encodings.Utf8.Pcre
type pxp_node =
('a Pxp_document.node Pxp_document.extension as 'a) Pxp_document.node
type pxp_document =
('a Pxp_document.node Pxp_document.extension as 'a) Pxp_document.document
let regexp' s = Pcre.regexp ~flags:[`UTF8] s
let xsd_RE = regexp' "^xsd:"
let namespace_split name = (* Pxp_aux.namespace_split *)
try
let n = String.index name ':' in (* may raise Not_found *)
let prefix = String.sub name 0 n in
let localname = String.sub name (n+1) (String.length name - n - 1)
in
(prefix, localname)
with
Not_found -> ("", name)
let has_xsd_prefix s = Pcre.pmatch ~rex:xsd_RE s
let xsd_namespace = "http://www.w3.org/2001/XMLSchema"
let xsi_namespace = "http://www.w3.org/2001/XMLSchema-instance"
let xsd_prefix = "xsd"
let xsi_prefix = "xsi"
let xsd_RE = pcre_regexp "^xsd:"
let has_xsd_prefix s = Pcre.pmatch ~rex:xsd_RE (Utf8.get_str s)
let xsd_namespace = Utf8.mk "http://www.w3.org/2001/XMLSchema"
let xsi_namespace = Utf8.mk "http://www.w3.org/2001/XMLSchema-instance"
let xsd_prefix = Utf8.mk "xsd"
let xsi_prefix = Utf8.mk "xsi"
let add_xsd_prefix =
let prefix = Utf8.concat xsd_prefix (Utf8.mk ":") in
fun s -> Utf8.concat prefix s
let schema_ns_prefixes =
[ xsd_prefix, xsd_namespace; xsi_prefix, xsi_namespace ]
......@@ -34,7 +30,9 @@ let schema_ns_prefixes =
let spec = default_namespace_spec
let new_xsd_config () =
let ns_manager = new Pxp_dtd.namespace_manager in
List.iter (fun (p, ns) -> ns_manager#add_namespace p ns) schema_ns_prefixes;
List.iter
(fun (p, ns) -> ns_manager#add_namespace (Utf8.get_str p) (Utf8.get_str ns))
schema_ns_prefixes;
{ default_namespace_config with
Pxp_types.enable_namespace_processing = Some ns_manager
}
......@@ -158,11 +156,11 @@ module Pxp_helpers =
exception PxpHelpers of exn
let _raise e = raise (PxpHelpers e)
let space_RE = regexp' " "
let space_RE = pcre_regexp " "
let _tag_name (n: pxp_node) =
match n#node_type with
| T_element tag -> tag
| T_element tag -> Utf8.mk tag
| _ -> raise Not_found
let _has_attribute name (n: pxp_node) =
......@@ -174,7 +172,7 @@ module Pxp_helpers =
let _attribute name (n: pxp_node) =
match n#attribute name with
| Value v -> v
| Value v -> Utf8.mk v
| _ -> raise Not_found
let _has_element e (n: pxp_node) =
......@@ -208,6 +206,6 @@ module Pxp_helpers =
(** export Ns.t version of defined namespaces *)
let xsd_namespace = Ns.mk_ascii xsd_namespace
let xsi_namespace = Ns.mk_ascii xsi_namespace
let xsd_namespace = Ns.mk xsd_namespace
let xsi_namespace = Ns.mk xsi_namespace
open Encodings
type pxp_node =
('a Pxp_document.node Pxp_document.extension as 'a) Pxp_document.node
type pxp_document =
......@@ -22,7 +24,7 @@ module Pxp_helpers:
sig
exception PxpHelpers of exn
val _tag_name : pxp_node -> string
val _tag_name : pxp_node -> Utf8.t
val _has_attribute : string -> pxp_node -> bool
......@@ -32,7 +34,7 @@ module Pxp_helpers:
(* test if exists a child element which name is one of the given *)
val _has_element': string list -> pxp_node -> bool
val _attribute : string -> pxp_node -> string
val _attribute : string -> pxp_node -> Utf8.t
(* find the first child element which have the given name *)
val _element : string -> pxp_node -> pxp_node
......@@ -49,13 +51,16 @@ module Pxp_helpers:
(* namespace handling *)
val has_xsd_prefix : string -> bool
val schema_ns_prefixes: (string * string) list
open Encodings
val has_xsd_prefix : Utf8.t -> bool
val schema_ns_prefixes: (Utf8.t * Utf8.t) list
val xsd_namespace : Ns.t
val xsi_namespace : Ns.t
val xsd_prefix : string
val xsi_prefix : string
val namespace_split : string -> string * string
val xsd_prefix : Utf8.t
val xsi_prefix : Utf8.t
val add_xsd_prefix : Utf8.t -> Utf8.t
(* misc stuff *)
......
......@@ -79,7 +79,7 @@ let _ =
| XSD_validation_error msg ->
prerr_endline ("Error validating schema document:\n" ^ msg);
exit 2
| XSI_validation_error (context,msg) ->
| XSI_validation_error msg ->
prerr_endline ("Error validating instance document:\n" ^ msg);
exit 3
| Pxp_types.At _ as exc ->
......
......@@ -208,9 +208,8 @@ let find_schema_descr' k s n =
try
find_schema_descr k s n
with Not_found ->
raise (Error