Commit 7fbd2315 authored by Pietro Abate's avatar Pietro Abate
Browse files

[r2005-03-04 14:35:48 by afrisch] doc

Original author: afrisch
Date: 2005-03-04 14:35:49+00:00
parent a0171f82
......@@ -109,7 +109,7 @@ let rec unfold seen constrs ty =
let loop = unfold seen constrs in
slot.def <-
(match ty.desc with
| Tarrow (l,t1,t2,_) -> Arrow (l, loop t1, loop t2)
| Tarrow (l,t1,t2,_) -> let t1 = loop t1 in let t2 = loop t2 in Arrow (l, t1,t2)
| Ttuple tyl -> Tuple (List.map loop tyl)
| Tvariant rd ->
let fields =
......
......@@ -5,17 +5,8 @@
exceptions are available here. See Schema_common.
*)
(**
Glossary:
XSD XML Schema Document
PSV Post Schema Validation
PSVI Post Schema Validation Infoset
*)
open Encodings
(** {2 XSD representation} *)
type derivation_type = [ `Extension | `Restriction ]
type white_space_handling = [ `Preserve | `Replace | `Collapse ]
......@@ -138,8 +129,6 @@ type schema = {
model_groups: model_group_definition list;
}
(** {2 Events} see Schema_events module *)
type event =
| E_start_tag of Ns.qname
| E_end_tag of Ns.qname
......
......@@ -409,7 +409,7 @@ and validate_content_type ctx content_type =
| CT_empty ->
Value.nil
| CT_simple st_def ->
Value.sequence [ validate_simple_type_wrapper ctx st_def ]
validate_simple_type_wrapper ctx st_def
| CT_model (particle, mixed) ->
let mixold = ctx.ctx_mixed in
let ctx = subctx mixed ctx in
......
......@@ -2,7 +2,7 @@
(*
XML Schema validator
Usage: validate <schema_document> [ <instance_document> ... ]
Usage: validate <schema_document> [<instance_document> ...]
Exit codes:
0 validation ok
......@@ -32,7 +32,6 @@ let main () =
in
debug_print "Parsing schema document ...";
let schema = Schema_parser.schema_of_uri schema_file in
print_schema Format.std_formatter schema
(*
for i = 2 to Array.length Sys.argv - 1 do
let instance_stream = Schema_xml.pxp_stream_of_file Sys.argv.(i) in
......@@ -67,6 +66,7 @@ let main () =
flush stdout)
done
*)
()
let _ =
try
......
......@@ -1844,15 +1844,15 @@ let register_schema schema_name uri schema =
env := Env.add (Ident.ident name) (cd_type, v def) !env
) lst
in
defs "type" name_of_type_definition type_def validate_type schema.types;
defs "attribute" (fun a -> a.attr_name) att_decl
(fun _ -> assert false) schema.attributes;
defs "element" (fun e -> e.elt_name) elt_decl
validate_element schema.elements;
(* defs "attribute" (fun a -> a.attr_name) att_decl
(fun _ _ -> assert false) schema.attributes; *)
defs "attribute group" (fun ag -> ag.ag_name) attr_group
validate_attribute_group schema.attribute_groups;
defs "model group" (fun mg -> mg.mg_name) model_group
validate_model_group schema.model_groups;
defs "type" name_of_type_definition type_def validate_type schema.types;
defs "element" (fun e -> e.elt_name) elt_decl
validate_element schema.elements;
!env
let real_load_schema schema_name uri =
......
......@@ -204,22 +204,22 @@ more details).
</p>
<p>
In a CDuce module, you can write <code>external "M.f"</code>
In a CDuce module, you can write <code>M.f</code>
to denote the result of translating the OCaml value <code>M.f</code>
to CDuce. Actually, you can simply write <code>M.f</code>.
to CDuce.
</p>
<p>
If the value you want to use has a polymorphic type, you can make
the translation work by explicitly instantiating its type
variables with CDuce types. The syntax is <code>external { "M.f" t1
variables with CDuce types. The syntax is <code>M.f with { t1
... tn }</code> where the <code>ti</code> are CDuce types. The type
variables are listed in the order they appear in a left-to-right
reading of the OCaml type. Example:
</p>
<sample>
let listmap = external { "List.map" Int String }
let listmap = List.map with { Int String }
</sample>
<p>
......@@ -354,6 +354,27 @@ Here is the protocol to compile a single CDuce module:
</box>
<box title="Calling OCaml from the toplevel" link="topl">
<p>
The tool <code>cduce_mktop</code> creates custom versions of the CDuce
toplevel with built-in support for some OCaml modules / functions.
</p>
<sample>
cduce_mktop [target] [primitive file]
</sample>
<p>
The first argument is the file name of the resulting toplevel.
The second points to a file whose contents specify a set of built-in
OCaml values to be embedded in the toplevel. Each line must either
be a qualified value (like <code>List.map</code>) or
the name of an OCaml unit (like <code>List</code>).
</p>
</box>
<box title="Examples" link="examples">
<section title="Getting the value of an environment variable">
......
......@@ -16,7 +16,7 @@
href="http://www.w3.org/TR/xmlschema-0/">Primer</a>, <a
href="http://www.w3.org/TR/xmlschema-1/">Structures</a>, <a
href="http://www.w3.org/TR/xmlschema-2/">Datatypes</a>). Using this CDuce
feature is possible to manipulate XML documents whose leaves are typed
feature it is possible to manipulate XML documents whose leaves are typed
values like integers, dates, binary data, and so on.
</p>
<p>
......@@ -53,7 +53,7 @@
<box title="XML Schema components (micro) introduction" link="primer">
<p>
An XML Schema document could define five different kinds of component, each
An XML Schema document could define four different kinds of component, each
of them could be imported in CDuce and used as CDuce types:
</p>
<ul>
......@@ -69,12 +69,14 @@
element. An XML Schema complex type is strictly more expressive than a DTD
element declaration.
</li>
<!--
<li>
<b>Attribute declaration</b><br />
An attribute declaration links an attribute name to a simple type.
Optionally it can constraints the set of possible values for the attribute
mandating a fixed value or providing a default value.
</li>
-->
<li>
<b>Element declarations</b>
An element declaration links an attribute name to a complex type.
......@@ -94,6 +96,10 @@
XML Schema components.
</li>
</ul>
<p>
Attribute declaration currently don't produce any CDuce type
and can't be used for validation themselves.
</p>
</box>
<box title="XML Schema components import" link="import">
......@@ -105,38 +111,28 @@
</p>
<sample>
# {{schema Mails = "tests/schema/mails.xsd"}};;
Registering schema type: Mails # attachmentType
Registering schema type: Mails # mimeTopLevelType
Registering schema type: Mails # mailType
Registering schema type: Mails # envelopeType
Registering schema type: Mails # mailsType
Registering schema type: Mails # bodyType
Registering schema attribute: Mails # name
Registering schema element: Mails # Date
Registering schema element: Mails # mails
Registering schema element: Mails # header
Registering schema attribute group: Mails # mimeTypeAttributes
Registering schema model group: Mails # attachmentContent
Registering schema type: attachmentType
Registering schema type: mimeTopLevelType
Registering schema type: mailsType
Registering schema type: mailType
Registering schema type: bodyType
Registering schema type: envelopeType
Registering schema element: header
Registering schema element: Date
Registering schema element: mails
Registering schema attribute group: mimeTypeAttributes
Registering schema model group: attachmentContent
</sample>
<p>
The above declaration will (try to) import all schema components included in
the schema document <local href="manual_schema_samples">mails.xsd</local>
as CDuce types. You can reference them using the
<code>#</code> (sharp) operator.
dot operator, e.g. <code>S.mails</code>.
</p>
<p>
XML Schema permits ambiguity in components name, this implies that you can
have both an element declaration and an attribute declaration having the
same name in a single schema document. In case of no ambiguity you can
reference CDuce types corresponding to schema components just using the name
with the following syntax:<br /> <tt>schema_ref ::= </tt>
<code>&lt;schema_name&gt; # &lt;component_name&gt;</code><br />
Otherwise you can specify the kind of schema component as follows:<br />
<tt>|</tt> <code>&lt;schema_name&gt; # &lt;component_name&gt; as
&lt;component_kind&gt;</code><br /> where component kind is one of:<br />
<tt>component_kind ::= </tt>
<code>element | type | attribute | attribute_group | model_group</code>
<br />
XML Schema permits ambiguity in components name. CDuce chooses
to resolve references to Schema components in this order:
elements, types, model groups, attribute group.
</p>
<p>
The result of a schema component reference is an ordinary CDuce type which
......@@ -144,16 +140,9 @@ Registering schema model group: Mails # attachmentContent
</p>
<sample>
let is_valid_mail (Any -> Bool)
| {{Mails # mailType}} -> `true
| {{Mails.mailType}} -> `true
| _ -> `false
</sample>
<p>
<em>
Please note the spaces which surround the sharp character, they are
needed, otherwise <code>#mailType</code> will be considered by the lexer
as a(n unexistent) directive.
</em>
</p>
</box>
<box>
<p>
......@@ -174,40 +163,14 @@ let is_valid_mail (Any -> Bool)
The toplevel directive <code>#env</code> supports schemas, it lists the
currently defined schemas.
</p>
<sample>
# #env;;
Types: Empty Any Int Char Byte Atom Pair Arrow Record String Latin1 Bool
Namespace prefixes:
=>""
xml=>"http://www.w3.org/XML/1998/namespace"
Namespace prefixes used for pretty-printing:
{{Schemas: Mails}}
Values:
val argv : [ String* ] = ""
</sample>
<p>
The toplevel directive <code>#print_type</code> supports schemas too, it can
be used to print types corresponding to schema components with the usual
sharp syntax.
be used to print types corresponding to schema components.
</p>
<sample>
# #print_type {{Mails # bodyType}};;
[ Char ]
# #print_type {{Mails.bodyType}};;
[ Char* ]
</sample>
<p>
The toplevel directive <code>#print_schema</code> is not really user
friendly (because it shows some representation internals), but can be used
to show the various schema components contained in a given schema.
</p>
<sample><![CDATA[
# #print_schema Mails;;
Types: C:10:attachmentType S:mimeTopLevelType' C:12:mailType C:4:envelopeType
C:14:mailsType S:bodyType'
Attributes: @name:xsd:string
Elements: E:18:<Date> E:15:<mails> E:17:<header>
Attribute groups: {agroup:mimeTypeAttributes}
Model groups: {mgroup:attachmentContent}
]]></sample>
<p>
For more information have a look at the manual section about <local
href="manual_interpreter">toplevel directives</local>.
......@@ -269,10 +232,17 @@ Model groups: {mgroup:attachmentContent}
<code>NMTOKENS</code>, <code>IDREFS</code>, <code>ENTITIES</code>
</td>
<td>
<code>String</code> list (i.e. Kleene star of a <code>String</code>
type)
<code>[String*]</code>
</td>
</tr>
<tr>
<td>
<code>decimal</code>,<code>float</code>,</code>double</code>
</td>
<td>
<code>Float</code>
</td>
</tr>
<tr>
<td>
(<b>Not properly supported</b>)<br /> <code>decimal</code>,
......@@ -318,7 +288,7 @@ Model groups: {mgroup:attachmentContent}
<tt>dateTime</tt> Schema type.
</p>
<sample><![CDATA[
# #print_type Mails # envelopeType;;
# #print_type Mails.envelopeType;;
<(Any) {| |}>[
<From {| |}>String
<To {| |}>String
......@@ -332,17 +302,6 @@ Model groups: {mgroup:attachmentContent}
]
]]></sample>
</li>
<li>
<p>
XML Schema <b>attribute declarations</b> are converted to closed record
types with exactly one required field corresponding to the declared
attribute.
</p>
<sample>
# #print_type Mails # name;;
{| {{name = String}} |}
</sample>
</li>
<li>
<p>
XML Schema <b>element declarations</b> can bound an XML element either
......@@ -379,7 +338,7 @@ Model groups: {mgroup:attachmentContent}
will be translated to the following CDuce type:
</p>
<sample><![CDATA[
# #print_type Mails # header;;
# #print_type Mails.header;;
<header {| name = String |}>String
]]></sample>
<p>
......@@ -397,13 +356,18 @@ Model groups: {mgroup:attachmentContent}
timezone =? { positive = Bool; hour = Int; minute = Int }
}
]]></sample>
<p>XML Schema wildcards (<tt>xsd:any</tt>)
and nullable elements (<tt>xsi:nil</tt) are supported.</p>
</li>
<li>
<p>
XML Schema <b>attribute group definitions</b> are mapped to record types
containing one field for each attribute declarations contained in the
group. <tt>use</tt> constraints are respected: optional attributes are
mapped to optional fields, required attributes to required fields.
mapped to optional fields, required attributes to required
fields. XML Schema attribute wildcards are partly supported;
they simply produce open record types instead of closed one,
but the actual constraints of the wildcards are discarded.
</p>
<p>
The following XML Schema attribute group declaration:
......@@ -418,7 +382,7 @@ Model groups: {mgroup:attachmentContent}
will thus be mapped to the following CDuce type:
</p>
<sample>
# #print_type Mails # mimeTypeAttributes;;
# #print_type Mails.mimeTypeAttributes;;
{| type = [
'image' | 'text' | 'application' | 'audio' | 'message' | 'multipart' | 'video'
];
......@@ -436,11 +400,12 @@ Model groups: {mgroup:attachmentContent}
<tt>all</tt> constraints, also known as <em>interleaving
constraints</em>, can't be expressed in the CDuce type system avoiding
type sizes explosion. Thus, this kind of content models are normalized
and considered, in the type system, as sequence types.
and considered, in the type system, as sequence types (the
validator will reorder the actual XML documents).
</p>
<p>
For a similar reason, <tt>mixed</tt> content models aren't supported by
CDuce too.
<b>Mixed content models</b> are supported.
</p>
<p>
As an example, the following XML Schema model group definition:
......@@ -461,9 +426,9 @@ Model groups: {mgroup:attachmentContent}
will be mapped to the following CDuce type:
</p>
<sample><![CDATA[
# #print_type Mails # attachmentContent;;
# #print_type Mails.attachmentContent;;
[ X1 <content {| |}>String | X1 ] where
X1 = <mimetype {| type = [ ... ]; subtype = String |}>[ ]
X1 = <mimetype S.mimeTypeAttributes>[ ]
]]></sample>
</li>
</ul>
......@@ -523,15 +488,10 @@ X1 = <mimetype {| type = [ ... ]; subtype = String |}>[ ]
</p>
<sample><![CDATA[
# let xml = <Date>"2003-10-15T15:44:01Z" in
validate xml with Mails # Date;;
- : <Date {| |}>{
positive = Bool;
year = Int; month = Int; day = Int;
hour = Int; minute = Int; second = Int;
timezone =? { positive = Bool; hour = Int; minute = Int }
}
=
validate xml with Mails.Date;;
- : S.Date =
<Date> {
time_kind=`dateTime;
positive=`true;
year=2003; month=10; day=15;
hour=15; minute=44; second=1;
......@@ -565,35 +525,26 @@ val xml : Any = <ignored_tag From="fake@microsoft.com">[
<Subject>[ 'I desperately need XML Schema support in CDuce' ]
<header name="Reply-To">[ 'bill@microsoft.com' ]
]
# validate xml with Mails # envelopeType;;
- : <(Any) {| From = String |}>[
<From {| |}>String <To {| |}>String
<Date {| |}>{
positive = Bool;
year = Int; month = Int; day = Int;
hour = Int; minute = Int; second = Int;
timezone =? { positive = Bool; hour = Int; minute = Int }
}
<Subject {| |}>String
<header {| name = String |}>[ String ]* ]
=
# validate xml with Mails.envelopeType;;
- : S.envelopeType =
<ignored_tag From="fake@microsoft.com">[
<From>[ 'user@unknown.domain.org' ]
<To>[ 'user@cduce.org' ]
<Date> {
positive=`true;
time_kind=`dateTime;
positive=`true;
year=2003; month=10; day=15;
hour=15; minute=44; second=1;
timezone={ positive=`true; hour=0; minute=0 }
}
<Subject>[ 'I desperately need XML Schema support in CDuce' ]
<header name="Reply-To">[ "bill@microsoft.com" ]
<header name="Reply-To">[ 'bill@microsoft.com' ]
]
]]></sample>
</li>
<li>
<p>
Similarly you can want to validate against a <b>model group</b>. In this
Similarly you may want to validate against a <b>model group</b>. In this
case you can validate CDuce's sequences against model groups. Given
sequences will be considered as content of XML elements.
</p>
......@@ -612,14 +563,8 @@ val xml : Any = <ignored_tag From="fake@microsoft.com">[
<mimetype type="application"; subtype="msword">[ ]
<content>[ '\n ### removed by spamoracle ###\n ' ]
]
# validate content with Mails # attachmentContent;;
- : [ X1 <content {| |}>String | X1 ] where
X1 = <mimetype {|
type = [
'image' | 'text' | 'application' | 'audio' | 'message' | 'multipart' | 'video'
];
subtype = String |}>[ ]
=
# validate content with Mails.attachmentContent;;
- : Mails.attachmentContent =
[ <mimetype type="application"; subtype="msword">[ ]
<content>[ '\n ### removed by spamoracle ###\n ' ]
]
......@@ -673,7 +618,7 @@ val xml : Any = <ignored_tag From="fake@microsoft.com">[
val record :
{| type = [ 'image' ]; subtype = [ 'png' ] |} =
{ type="image"; subtype="png" }
# validate record with Mails # mimeTypeAttributes ;;
# validate record with Mails.mimeTypeAttributes ;;
- : {| type = [ 'image' | 'text' | ... ]; subtype = String |} =
{ type="image"; subtype="png" }
]]></sample>
......@@ -683,7 +628,9 @@ val xml : Any = <ignored_tag From="fake@microsoft.com">[
<box title="XML Schema instances output" link="print_xml">
<p>
<b>TODO</b>
It is possible to use the normal <tt>print_xml</tt>
and <tt>print_xml_utf8</tt> built-in functions to print
values resulting from XML Schema validation.
</p>
</box>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment