Commit fee8fb26 authored by Pietro Abate's avatar Pietro Abate

[r2005-03-09 13:33:51 by afrisch] string:// --> string: Resolution of external...

[r2005-03-09 13:33:51 by afrisch] string:// --> string: Resolution of external entities in PXP, expat

Original author: afrisch
Date: 2005-03-09 13:36:17+00:00
parent 27cd3cfe
......@@ -28,6 +28,8 @@ Since 0.2.2
* Keywords are now allowed as type names
* Concatenation @ allowed in types
* Record concatenation + allowed in types
* Changed "string://" URL-pseudo schema to "string:"
* Better resolution of external entities for PXP and expat
- Tools:
* A new tool cduce_mktop produces customized CDuce toplevels with embedded
......
......@@ -11,4 +11,4 @@ let () =
Config.register
"curl"
"Load external URLs with curl"
(fun () -> Url.load_url := load_url)
(fun () -> Url.url_loader := load_url)
......@@ -30,4 +30,4 @@ let () =
Config.register
"netclient"
"Load external URLs with netclient"
(fun () -> Url.load_url := load_url)
(fun () -> Url.url_loader := load_url)
type url = Filename of string | Url of string
let start_with s p =
let l = String.length p in
let n = String.length s in
......@@ -18,12 +16,12 @@ let no_load_url s =
in
raise (Location.Generic msg)
let load_url = ref no_load_url
let url_loader = ref no_load_url
type kind = File of string | Uri of string | String of string
let kind s =
match start_with s "string://" with
match start_with s "string:" with
| None -> if is_url s then Uri s else File s
| Some s -> String s
......@@ -48,22 +46,15 @@ let local s1 s2 =
s2 in
Neturl.string_of_url (Neturl.ensure_absolute_url ~base:url1 url2)
let process s =
match kind s with
| File s -> Location.protect_op "loading file"; Filename s
| Uri s -> Location.protect_op "fetching external URI"; Url (!load_url s)
| String s -> Url s
let load_file s =
let ic = open_in s in
let b = Buffer.create 10240 in
let buf = String.create 1024 in
let rec loop () =
let n = input ic buf 0 1024 in
if (n > 0) then (Buffer.add_substring b buf 0 n; loop ())
in
loop ();
let load_file fn =
let ic = open_in fn in
let len = in_channel_length ic in
let s = String.create len in
really_input ic s 0 len;
close_in ic;
Buffer.contents b
s
let load_url s =
match start_with s "string:" with
| None -> if is_url s then !url_loader s else load_file s
| Some s -> s
type url = Filename of string | Url of string
val is_url: string -> bool
val process: string -> url
val local: string -> string -> string
val load_url: (string -> string) ref
val url_loader: (string -> string) ref
val load_file: string -> string
val load_url: string -> string
......@@ -20,26 +20,24 @@ let load_from_file p s =
close_in ic
with exn -> close_in ic; raise exn
let load_expat s =
let rec load_expat s =
let p = Expat.parser_create "" in
Expat.set_start_element_handler p Load_xml.start_element_handler;
Expat.set_end_element_handler p Load_xml.end_element_handler;
Expat.set_character_data_handler p Load_xml.text_handler;
let u = Url.process s in
Expat.set_external_entity_ref_handler p
(fun _ _ sys _ -> load_expat (Url.local s sys));
ignore (Expat.set_param_entity_parsing p Expat.ALWAYS);
try
match u with
| Url.Url s -> Expat.parse p s
| Url.Filename s -> load_from_file p s
if Url.is_url s then Expat.parse p (Url.load_url s)
else load_from_file p s
with Expat.Expat_error e ->
let line = Expat.get_current_line_number p
and col = Expat.get_current_column_number p in
let src = match u with
| Url.Url s -> ""
| Url.Filename s -> Printf.sprintf " file \"%s\"," s in
let msg =
Printf.sprintf
"load_xml,%s at line %i, column %i: %s"
src
s
(Expat.get_current_line_number p)
(Expat.get_current_column_number p)
(Expat.xml_error_to_string e)
......
......@@ -43,7 +43,7 @@ let channel_of_id rid =
in
let enc,ch =
if Url.is_url url
then of_string (!Url.load_url url)
then of_string (Url.load_url url)
else of_file url
in
ch, enc, Some { rid with rid_system = Some url }
......
......@@ -115,14 +115,15 @@ let load_html s =
Location.protect_op "load_html";
let parse src = Nethtml.parse_document ~dtd:Nethtml.relaxed_html40_dtd src in
let doc =
match Url.process s with
| Url.Filename s ->
let ic = open_in s in
let doc = parse (Lexing.from_channel ic) in
close_in ic;
doc
| Url.Url s ->
parse (Lexing.from_string s)
if Url.is_url s then
parse (Lexing.from_string (Url.load_url s))
else
let ic = open_in s in
let doc =
try parse (Lexing.from_channel ic)
with exn -> close_in ic; raise exn in
close_in ic;
doc
in
let doc = Nethtml.decode ~subst:(fun _ -> "???") doc in
let doc = Nethtml.map_list
......
......@@ -92,17 +92,7 @@ let exn_float_of =
let eval_load_file ~utf8 e =
Location.protect_op "load_file";
let fn = Value.get_string_latin1 e in
let s = match Url.process fn with
| Url.Filename fn ->
let ic = open_in fn in
let len = in_channel_length ic in
let s = String.create len in
really_input ic s 0 len;
close_in ic;
s
| Url.Url txt ->
txt
in
let s = Url.load_url fn in
if utf8 then
match U.mk_check s with
| Some s -> Value.string_utf8 s
......@@ -154,11 +144,11 @@ register_fun "string_of"
register_fun "load_xml"
string_latin1 any_xml
(fun v -> Load_xml.load_xml (Value.get_string_latin1 v));;
(fun v -> Location.protect_op "load_xml"; Load_xml.load_xml (Value.get_string_latin1 v));;
register_fun "load_html"
string_latin1 Sequence.any
(fun v -> Load_xml.load_html (Value.get_string_latin1 v));;
(fun v -> Location.protect_op "load_html"; Load_xml.load_html (Value.get_string_latin1 v));;
register_fun "load_file_utf8"
string_latin1 string
......
......@@ -482,7 +482,7 @@ type AnyXml = <(Atom) (Record)>[ (AnyXml|Char)* ]
<p>
If the support for netclient or curl is available, it is also
possible to fetch an XML file from an URL, e.g.:
<code>load_xml "http://..."</code>. A special scheme <code>string://</code>
<code>load_xml "http://..."</code>. A special scheme <code>string:</code>
is always supported: the string following the scheme is parsed as it is.
</p>
......
......@@ -276,15 +276,15 @@ type <code>ParentBook</code>) then it performs the assignment (the variable
The command <code>load_xml "parents.xml"</code> is just an abbreviated form for
<code>load_xml "{{file://}}parents.xml"</code>. If CDuce is compiled with
netclient or curl support, then it is also possible to use other URI schemes such as
http:// or ftp://. A special scheme string:// is always supported: the string
http:// or ftp://. A special scheme string: is always supported: the string
following the scheme is parsed as it is.
<footnote>
All these schemes are available for <code>load_html</code> and <code>load_file</code> as well.
</footnote>
So, for instance, <code>load_xml
"string://%%exp%%"</code>
"string:%%exp%%"</code>
parses litteral XML code <code>%%exp%%</code> (it corresponds to XQuery's <code>{ %%exp%% }</code>), while <code>load_xml
("string://" @ x)</code> parses the XML code associated to the string variable <code>x</code>. Thus the following definition of <code>x</code>
("string:" @ x)</code> parses the XML code associated to the string variable <code>x</code>. Thus the following definition of <code>x</code>
</p>
<sample><![CDATA[
let x : Any = <person>[ <name>"Alice" <children>[] ]
......@@ -293,7 +293,7 @@ let x : Any = <person>[ <name>"Alice" <children>[] ]
is completely equivalent to this one
</p>
<sample><![CDATA[
let x = load_xml "string://<person><name>Alice</name> <children/></person>"
let x = load_xml "string:<person><name>Alice</name> <children/></person>"
]]></sample>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment