Commit 0a448f99 authored by Pietro Abate's avatar Pietro Abate

[r2005-01-14 06:32:35 by afrisch] Empty log message

Original author: afrisch
Date: 2005-01-14 06:32:36+00:00
parent ce360bef
Since 0.2.1
- interval arithmetic for the * operator
- warning for potential division by 0
- Pattern guard /p in regexps (to match the current tail)
- Back to the old semantics for default value patterns in regexps
(the non-capturing semantics is obtained with /(x:=c))
- bug fixes in configure/Makefile for Cygwin
- bug fix for the compilation of complex patterns with records
- new syntax { l = p else p' }
- fixed a little bit support for XML Schema, but still largely broken
- better compilation of sequence capture variables
- punning in record/attribute expressions and values ({ x; y } -> {x=x;y=y})
- removed the warning "no caml interface"
- add "system" and "exit" built-in functions
- (e :? t) raises an exception when e doesn't have type t;
the exception is an explanation of why this is not the case.
- apps-cduce-cvs GODI package is updatable
- /* ... */ comments
- fix --stdin
0.2.2
- Language:
* Runtime type-check: (e :? t) raises an exception when e doesn't
have type t; the exception is an explanation of why this is not the case.
(The actual message may change in the future.)
* Better return type for load_xml, defined as:
AnyXml=<(Atom) (Record)>[ (AnyXml|Char)* ]
* New style for comments: /* ... */
Simple and double quotes within these comments are not handled
specially. Moreover, these comments cannot be nested.
The new style should be used for textual comments (which can include
the simple quote); the old style is better for ignoring pieces of code.
* Pattern guard /p in regexps to match the current tail. This can be used
for setting "markers" without capturing anything, e.g.:
[ (/(x:=1) ...) | (/(x:=2) ...) ]
Back to the old semantics for default value patterns in regexps
(they do capture an element).
* New syntax {...; l = p else p'; ... }. p' is applied
to the whole matched record when p does not match (or when
the field is missing). Equivalent to:
{ ...; l = p; ...} | ({...;...} & p')
* Punning in record/attribute expressions and patterns.
{ x; y } is a shorthand for {x=x;y=y}.
* New syntax for R**n in regular expressions, equivalent
to R...R (n times), where n > 0.
* Interval arithmetic for the * operator.
* Warning for potential division by 0.
* New "system", "exit", "getenv" built-in functions.
- Toplevel, interpreter, compiler:
* New #silent, #verbose directives.
* New --script option.
* Removed the warning "no caml interface".
- Compilation:
* Better compilation of sequence capture variables.
Now, [ ... x::Any* ] is as efficient as [ ... ; x ]. Can also be written
[ ... /x _* ]. The [ ... ; ... ] syntax is no longer necessary
and might be deprecated.
- Bug fixes, including:
* Bug fixes in configure/Makefile for Cygwin.
* Bug fix for the compilation of complex patterns with records.
* Fixed a little bit support for XML Schema, but still largely broken.
* Fix --stdin.
- Other:
* apps-cduce-cvs GODI package is updatable.
0.2.1
......
......@@ -450,7 +450,23 @@ EXTEND
| x = regexp; "+" -> Seq (x, Star x)
| x = regexp; "+?" -> Seq (x, WeakStar x)
| x = regexp; "?" -> Alt (x, Epsilon)
| x = regexp; "??" -> Alt (Epsilon, x) ]
| x = regexp; "??" -> Alt (Epsilon, x)
| x = regexp; "**"; i = INT ->
let rec aux i accu =
if (i = 0) then accu
else aux (pred i) (Seq (x, accu))
in
let i =
try
let i = int_of_string i in
if (i > 1024) then raise Exit else i
(* We cannot handle type that huge... *)
with Failure _ | Exit -> error loc "Repetition number too large"
in
if (i <= 0) then
error loc "Repetition number must be a positive integer";
aux i Epsilon
]
| [ "("; x = LIST1 regexp SEP ","; ")" ->
(match x with
| [ x ] -> x
......
......@@ -74,7 +74,7 @@ let rec token = lexer
return lexbuf ("INT", L.utf8_lexeme lexbuf)
| [ "<>=.,:;+-*/@&{}[]()|?`!" ]
| "->" | "::" | ";;" | "--" | "//" | "/@" | ":=" | "\\" | "++"
| "{|" | "|}" | "<=" | ">=" | "<<" | ">>" | "||" | "&&"
| "{|" | "|}" | "<=" | ">=" | "<<" | ">>" | "||" | "&&" | "**"
| ["?+*"] "?" | "#" ->
return lexbuf ("", L.utf8_lexeme lexbuf)
| "#" ncname ->
......
......@@ -74,3 +74,18 @@ let () = register_fun "system" string_latin1 system_out
let () = register_fun "exit" byte_int Types.empty
(fun v -> Location.protect_op "exit"; exit (Value.cduce2ocaml_int v))
let exn_not_found =
Value.CDuceExn (Value.Atom (Atoms.V.mk_ascii "Not_found"))
let () = register_fun "getenv" string_latin1 string_latin1
(fun e ->
Location.protect_op "getenv";
let var = Value.get_string_latin1 e in
try Value.string_latin1 (Sys.getenv var)
with Not_found -> raise exn_not_found);;
let () = register_fun "argv" nil (Sequence.star string_latin1)
(fun e ->
Location.protect_op "argv";
!Builtin.argv);;
......@@ -19,6 +19,7 @@ let types =
"Latin1", string_latin1;
"Bool", bool;
"Float", float;
"AnyXml", any_xml;
]
let env =
......@@ -83,9 +84,6 @@ let exn_int_of =
Value.Atom (Atoms.V.mk_ascii "Invalid_argument"),
Value.string_latin1 "int_of"))
let exn_not_found =
Value.CDuceExn (Value.Atom (Atoms.V.mk_ascii "Not_found"))
let eval_load_file ~utf8 e =
Location.protect_op "load_file";
let fn = Value.get_string_latin1 e in
......@@ -150,7 +148,7 @@ register_fun "string_of"
);;
register_fun "load_xml"
string_latin1 any
string_latin1 any_xml
(fun v -> Load_xml.load_xml (Value.get_string_latin1 v));;
register_fun "load_html"
......@@ -165,19 +163,9 @@ register_fun "load_file"
string_latin1 string_latin1
(eval_load_file ~utf8:false);;
register_fun "getenv" string_latin1 string_latin1
(fun e ->
Location.protect_op "getenv";
let var = Value.get_string_latin1 e in
try Value.string_latin1 (Sys.getenv var)
with Not_found -> raise exn_not_found);;
let argv = ref Value.Absent;;
register_fun "argv" nil (Sequence.star string_latin1)
(fun e ->
Location.protect_op "argv";
!argv);;
register_fun "print_xml"
......
......@@ -70,3 +70,17 @@ let float_abs =
let float =
Types.abstract (Types.Abstract.atom float_abs)
let any_xml =
let elt = Types.make () in
let seq = Types.make () in
let elt_d = Types.xml
(Types.cons atom)
(Types.cons (Types.times
(Types.cons (Types.record' (true,LabelMap.empty)))
seq)) in
let elt_char_d = Types.cup elt_d char in
let seq_d = Types.cup nil (Types.times (Types.cons elt_char_d) seq) in
Types.define elt elt_d;
Types.define seq seq_d;
elt_d
......@@ -41,3 +41,5 @@ val ref_type: Types.Node.t -> Types.t
val float: Types.t
val float_abs: Types.Abstract.abs
val any_xml : Types.t
......@@ -462,19 +462,15 @@ of the mismatch is raised.
<section title="Loading XML documents">
<p>
The <code>load_xml</code> operator parse an XML document on the local
file system; the argument gives the filename:</p>
The <code>load_xml: Latin1 -> AnyXml</code> built-in function parses
an XML document on the local
file system. The argument is the filename.
The result type <code>AnyXml</code> is defined as:
</p>
<sample><![CDATA[
load_xml %%e%%
type AnyXml = <(Atom) (Record)>[ (AnyXml|Char)* ]
]]></sample>
<p>
The argument to <code>load_xml</code> is a Latin1 string (the type
system will issue a warning if the argument is of type
<code>String</code> but not <code>Latin1</code>, and an
exception might be raised at runtime).
</p>
<p>
If the support for netclient or curl is available, it is also
possible to fetch an XML file from an URL, e.g.:
......@@ -483,19 +479,20 @@ is always supported: the string following the scheme is parsed as it is.
</p>
<p>
There is also a <code>load_html</code> operator to parse in a
permissive way HTML documents. The result has type <code>[Any*]</code>.
There is also a <code>load_html: Latin1 -> [Any*]</code> built-in
function to parse in a
permissive way HTML documents.
</p>
</section>
<section title="Pretty-printing XML documents">
<p>
Two operators can be used to produce a string from an XML document:
Two built-in functions can be used to produce a string from an XML document:
</p>
<sample><![CDATA[
print_xml %%e%%
print_xml_utf8 %%e%%
print_xml: Any -> Latin1
print_xml_utf8: Any -> String
]]></sample>
<p>
They fail if the argument is not an XML document (this isn't checked
......@@ -573,9 +570,9 @@ which are not matched and are not XML elements are copied verbatim.
<section title="Pretty-printing a value">
<p>
The operator <code>string_of</code> converts any value to a string,
The built-in function <code>string_of: Any -> Latin1</code>
converts any value to a string,
using the same pretty-printing function as the CDuce interpreter itself.
The result has type <code>Latin1</code>.
</p>
</section>
......@@ -607,17 +604,9 @@ cannot be proved to be of type <code>[ '-'? '0'--'9'+ ]</code>.
<section title="Displaying a string">
<p>
To print a string to standard output, you can use the construction:
</p>
<sample><![CDATA[
print %%e%%
]]></sample>
<p>
The string will be printed assuming the terminal accepts
ISO-8859-1 encoded characters (or standard output is
an ISO-8859-1 stream). The operator fails if the string
cannot be encoded in ISO-8859-1. Otherwise, it returns <code>`nil</code>.
A warning is issued if the argument is not provably of type <code>Latin1</code>.
To print a string to standard output, you can use one of the built-in
function <code>print: Latin1 -> []</code> or
<code>print_utf8: String -> []</code>.
</p>
</section>
......@@ -625,17 +614,16 @@ A warning is issued if the argument is not provably of type <code>Latin1</code>.
<section title="Loading files">
<p>
There are two operators available to load a file into a CDuce string:
There are two built-in functions available to load a file into a CDuce string:
</p>
<sample><![CDATA[
load_file %%e%%
load_file_utf8 %%e%%
load_file: Latin1 -> Latin1
load_file_utf8: Latin1 -> String
]]></sample>
<p>
The first one loads an ISO-8859-1 encoded file (resulting type:
<code>Latin1</code>),
The first one loads an ISO-8859-1 encoded file,
whereas the second
one loads a UTF-8 encoded file (resulting type: <code>String</code>).
one loads a UTF-8 encoded file.
</p>
<p>
If the support for netclient or curl is available, it is also
......@@ -682,11 +670,26 @@ Latin1 -> {| stdout = Latin1; stderr = Latin1;
</section>
<section title="Running external commands">
<section title="Terminating the program">
<p>
The predefined function <code>exit: 0--255 -> Empty</code> terminates
the current process. The argument is the exit code.
</p>
</section>
<section title="Accessing the environment">
<p>
The built-in function <code>getenv: Latin1 -> Latin1</code>
queries the system environment for an environment variable.
If the argument does not refer to an existing variable,
the function raises the exception <code>`Not_found</code>.
</p>
</section>
<section title="Command line arguments">
<p>
The predefined function <code>exit</code> terminates
the current process. Its type is <code>0--255 -> Empty</code>.
The argument is the exit code.
The built-in function <code>argv: [] -> [ String* ]</code> returns
the sequence of command line arguments given to the current program.
</p>
</section>
</box>
......
......@@ -241,7 +241,8 @@ must be contained in a single adjacent sequence of phrases
<p>
You can quit the toplevel with the toplevel directive
<code>#quit</code> but also with either <code>Ctrl-C</code> or
<code>Ctrl-D</code>.
<code>Ctrl-D</code>. Another option is to use the built-in
<code>exit</code>.
</p>
<p>
......@@ -263,6 +264,12 @@ for parsing (as defined by the user) and
for pretty-printing (as computed by CDuce itself).
</p>
<p>
The two toplevel directives <code>#silent</code> and
<code>#verbose</code> can be used to turn down and up toplevel
outputs (results of typing and evaluation).
</p>
<p>
The toplevel directive <code>#reinit_ns</code> reinit the
table of prefix-to-namespace bindings used for pretty-printing
......
......@@ -323,13 +323,18 @@ the integer itself.</li>
Grouping <code>(%%R%%)</code>. E.g.: <code>[ x::(Int Int) y ]</code>.
</li>
<li>
Tail predicate <code>/p</code>. The type/pattern <code>p</code>
Tail predicate <code>/%%p%%</code>. The type/pattern <code>%%p%%</code>
applies to the current tail of the sequence (the subsequence
starting at the current position). E.g.:
<code>[ (Int /(x:=1) | /(x:=2)) _* ]</code> will bind
<code>x</code> to <code>1</code> if the sequence starts
with an integer and <code>2</code> otherwise.
</li>
<li>
Repetition <code>%%R%% ** %%n%%</code> where <code>%%n%%</code>
is a positive integer constant, which is just a shorthand
for the concatenation of <code>%%n%%</code> copies of <code>%%R%%</code>.
</li>
</ul>
<p>
......
......@@ -48,7 +48,7 @@ _" character, starting by a capitalized letter or underscore.</li>
</ul>
</box>
<box title="Operators" link="op">
<box title="Operators, built-in functions" link="op">
<ul>
<li>Infix:
<br/> <code>@</code> : concatenation of sequences
......@@ -59,19 +59,26 @@ _" character, starting by a capitalized letter or underscore.</li>
<br/> <code>not</code>: Bool -> Bool
</li>
<li>Prefix:
<br/><code>load_xml</code> : Latin1 -> Any,
<br/><code>load_xml</code> : Latin1 -> AnyXml,
<br/><code>load_html</code> : Latin1 -> [ Any* ],
<br/><code>load_file</code> : Latin1 -> Latin1,
<br/><code>load_file_utf8</code> : Latin1 -> String,
<br/><code>argv</code> : [] -> [ String* ],
<br/><code>dump_to_file</code> : Latin1 -> String -> [],
<br/><code>dump_to_file_utf8</code> : Latin1 -> String -> [],
<br/><code>print_xml</code> : Any -> Latin1,
<br/><code>print_xml_utf8</code> : Any -> String,
<br/><code>print</code> : Latin1 -> [],
<br/><code>int_of</code> : String -> Integer,
<br/><code>print_utf8</code> : String -> [],
<br/><code>int_of</code> : String -> Int,
<br/><code>string_of</code> : Any -> Latin1,
<br/><code>atom_of</code> : String -> Atom
<br/><code>atom_of</code> : String -> Atom,
<br/><code>system</code> : Latin1 -> {| stdout = Latin1; stderr = Latin1;
status = (`exited,Int) | (`stopped,Int) | (`signaled,Int)
|},
<br/><code>exit</code> : 0--255 -> Empty,
<br/><code>getenv</code> : Latin1 -> Latin1,
<br/><code>argv</code> : [] -> [ String* ],
<br/><code>raise</code> : Any -> Empty
</li>
</ul>
</box>
......@@ -169,7 +176,7 @@ field <code>l</code> is not present)</li>
transformation is recursively applied to the sequence of children of the unmatched
element; as for transform, each branch returns a sequence
and all the resulting sequences are concatenated together. </li>
<li>Operators: <code>load_xml : Latin1 -> Any; print_xml : Any -> Latin1</code>
<li>Operators: <code>load_xml : Latin1 -> AnyXml; print_xml : Any -> Latin1</code>
</li>
</ul>
</box>
......
......@@ -140,12 +140,14 @@ match sitemap with
find_local_link (h . children,l))
| [] -> raise `Not_found
let local_link (sitemap : Tree, l : String, txt : String) : H:Inline =
let local_link (sitemap : Tree, l : String, txt : String) : [H:Inline?] =
try
let h = find_local_link ([sitemap],l) in
let txt = if txt = "" then h . title else txt in
<a href=(h . url)>txt
with `Not_found -> raise [ 'Local link not found: ' !l ]
[ <a href=(h . url)>txt ]
with `Not_found ->
print [ 'Warning. Local link not found: ' !(string_of l) '\n' ];
[]
let compute_sitemap ((Page|External) -> Tree)
| <page name=name>[ <title>title (c::(Page|External) | _)* ] & p ->
......@@ -319,8 +321,7 @@ match page with
let text (t : [InlineText*]) : H:Inlines =
transform t with
| <code>x -> [ <b>[ <tt>(highlight x) ] ]
| <local href=l>txt ->
[ (local_link (sitemap,l,txt)) ]
| <local href=l>txt -> local_link (sitemap,l,txt)
| <(tag & (`b|`i|`tt|`em)) (attr)>x -> [ <(tag) (attr)>(text x) ]
| <footnote nocount=_>_ ->
let n = string_of !footnote_counter in
......@@ -409,7 +410,8 @@ match page with
| <site-toc>[] ->
[ <ul>[ (display_sitemap sitemap) ] ]
| <local-links href=s>[] ->
ul (map (split_comma s) with x -> <li>[ (local_link(sitemap,x,"")) ])
ul (transform (split_comma s) with x ->
match local_link(sitemap,x,"") with [] -> [] | x -> [<li>x])
| <two-columns>[ <left>x <right>y ] ->
[ <table width="100%">[
<tr>[
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment