Commit 15187188 authored by Pietro Abate's avatar Pietro Abate

[r2003-11-30 14:27:07 by beppe] add url support for load_xml, load_html and in schema declarations

Original author: beppe
Date: 2003-11-30 14:27:08+00:00
parent 38c7348d
include Makefile.conf
VERSION = 0.2.0-pre2
PACKAGES = pxp-engine pxp-lex-iso88591 ulex camlp4 num cgi pcre netstring
PACKAGES = pxp-engine pxp-lex-iso88591 ulex camlp4 num cgi pcre netstring curl
ifeq ($(PXP_WLEX), true)
PACKAGES += pxp-wlex-utf8
else
......@@ -97,7 +97,7 @@ OBJECTS = \
misc/stats.cmo \
misc/serialize.cmo misc/custom.cmo \
misc/state.cmo misc/pool.cmo misc/encodings.cmo misc/bool.cmo \
misc/pretty.cmo misc/ns.cmo misc/inttbl.cmo \
misc/pretty.cmo misc/ns.cmo misc/inttbl.cmo misc/url.cmo\
\
types/sortedList.cmo types/boolean.cmo types/ident.cmo \
types/intervals.cmo types/chars.cmo types/atoms.cmo \
......@@ -139,7 +139,7 @@ DTD2CDUCE = tools/dtd2cduce.cmo
ALL_OBJECTS = $(OBJECTS) $(CQL_OBJECTS) \
driver/run.cmo driver/examples.cmo driver/webiface.cmo \
tools/dtd2cduce.cmo tools/validate.cmo
ALL_INTERFACES = schema/schema_types.mli
ALL_INTERFACES = schema/schema_types.mli
DEPEND = $(ALL_OBJECTS:.cmo=.ml) $(ALL_OBJECTS:.cmo=.mli) $(ALL_INTERFACES)
......
......@@ -22,6 +22,8 @@ misc/ns.cmx: misc/q_symbol.cmo misc/custom.cmx misc/encodings.cmx misc/pool.cmx
misc/serialize.cmx misc/state.cmx misc/ns.cmi
misc/inttbl.cmo: misc/q_symbol.cmo misc/inttbl.cmi
misc/inttbl.cmx: misc/q_symbol.cmo misc/inttbl.cmi
misc/url.cmo: misc/q_symbol.cmo misc/url.cmi
misc/url.cmx: misc/q_symbol.cmo misc/url.cmi
types/sortedList.cmo: misc/q_symbol.cmo misc/custom.cmo misc/serialize.cmi types/sortedList.cmi
types/sortedList.cmx: misc/q_symbol.cmo misc/custom.cmx misc/serialize.cmx types/sortedList.cmi
types/boolean.cmo: misc/q_symbol.cmo misc/custom.cmo types/sortedList.cmi types/boolean.cmi
......@@ -145,9 +147,11 @@ compile/lambda.cmo: misc/q_symbol.cmo types/ident.cmo types/patterns.cmi misc/se
compile/lambda.cmx: misc/q_symbol.cmo types/ident.cmx types/patterns.cmx misc/serialize.cmx \
types/types.cmx
runtime/load_xml.cmo: misc/q_symbol.cmo types/atoms.cmi misc/encodings.cmi types/ident.cmo \
parser/location.cmi misc/ns.cmi runtime/value.cmi runtime/load_xml.cmi
parser/location.cmi misc/ns.cmi misc/url.cmi runtime/value.cmi \
runtime/load_xml.cmi
runtime/load_xml.cmx: misc/q_symbol.cmo types/atoms.cmx misc/encodings.cmx types/ident.cmx \
parser/location.cmx misc/ns.cmx runtime/value.cmx runtime/load_xml.cmi
parser/location.cmx misc/ns.cmx misc/url.cmx runtime/value.cmx \
runtime/load_xml.cmi
runtime/run_dispatch.cmo: misc/q_symbol.cmo types/atoms.cmi types/chars.cmi misc/encodings.cmi \
types/ident.cmo types/patterns.cmi types/types.cmi runtime/value.cmi \
runtime/run_dispatch.cmi
......
open Curl
type url = Filename of string | Url of string
let process_url s =
try
let _ = Neturl.extract_url_scheme s in
let buff = ref "" in
let connection = init () in
setopt connection (CURLOPT_URL s);
setopt connection (CURLOPT_WRITEFUNCTION (fun s -> buff:=s));
perform connection;
Url (!buff);
with Neturl.Malformed_URL -> Filename s
type url = Filename of string | Url of string
val process_url: string -> url
......@@ -112,7 +112,9 @@ EXTEND
[ mk loc (Using (U.mk name, Types.CompUnit.mk (U.mk cu))) ]
| "schema"; name = IDENT; "="; uri = STRING2 ->
protect_op "schema";
let schema = Schema_parser.schema_of_file uri in
let schema = match Url.process_url uri with
| Url.Filename s -> Schema_parser.schema_of_file s
| Url.Url s -> Schema_parser.schema_of_string s in
[ mk loc (SchemaDecl (U.mk name, schema)) ]
| (name,ns) = namespace_binding ->
[ mk loc (Namespace (name, ns)) ]
......
......@@ -13,6 +13,11 @@ open Pxp_types
open Value
open Ident
open Encodings
open List
open Url
type buf =
{ mutable buffer : string;
......@@ -21,17 +26,17 @@ type buf =
let txt = { buffer = String.create 1024; pos = 0; length = 1024 }
let resize n =
let resize txt n =
let new_len = txt.length * 2 + n in
let new_buf = String.create new_len in
String.unsafe_blit txt.buffer 0 new_buf 0 txt.pos;
txt.buffer <- new_buf;
txt.length <- new_len
let add_string s =
let add_string txt s =
let len = String.length s in
let new_pos = txt.pos + len in
if new_pos > txt.length then resize len;
if new_pos > txt.length then resize txt len;
String.unsafe_blit s 0 txt.buffer txt.pos len;
txt.pos <- new_pos
......@@ -109,7 +114,7 @@ let load_expat =
let p = Expat.parser_create "" in
Expat.set_start_element_handler p start_element_handler;
Expat.set_end_element_handler p end_element_handler;
Expat.set_character_data_handler p add_string;
Expat.set_character_data_handler p add_string txt;
let rec loop () =
let n = input ic buf 0 buflen in
if (n > 0) then (Expat.parse_sub p buf 0 n; loop ())
......@@ -139,7 +144,7 @@ let load_expat s =
let pxp_handle_event = function
| E_start_tag (name,att,_) -> start_element_handler name att
| E_char_data data -> add_string data
| E_char_data data -> add_string txt data
| E_end_tag (_,_) -> end_element_handler ()
| _ -> ()
......@@ -153,17 +158,26 @@ let pxp_config =
let load_pxp s =
try
let mgr = create_entity_manager pxp_config (from_file s) in
let mgr = create_entity_manager pxp_config
(match s with
| Url(s) -> from_string s
| Filename(s) -> from_file s ) in
process_entity pxp_config (`Entry_document[`Extend_dtd_fully]) mgr pxp_handle_event;
with exn ->
raise (Location.Generic (Pxp_types.string_of_exn exn))
(* Recognize if it is a valid url. If it is connect and bufferize
in a string otherwise consider it as filename *)
let load_xml s =
Location.protect_op "load_xml";
try
(match !use_parser with
| `Expat -> load_expat s
| `Pxp -> load_pxp s);
| `Pxp -> load_pxp (process_url s));
match !stack with
| [ Element x ] -> stack := []; x
| _ -> assert false
......@@ -184,11 +198,18 @@ let load_html s =
in
Location.protect_op "load_html";
let ic = open_in s in
let doc = Nethtml.parse_document
~dtd:Nethtml.relaxed_html40_dtd
(Lexing.from_channel ic) in
let doc = Nethtml.decode ~enc:`Enc_utf8 ~subst:(fun _ -> "???") doc in
close_in ic;
val_of_docs doc
match process_url s with
| Filename(s) -> (
let ic = open_in s in
let doc = Nethtml.parse_document
~dtd:Nethtml.relaxed_html40_dtd
(Lexing.from_channel ic) in
let doc = Nethtml.decode ~enc:`Enc_utf8 ~subst:(fun _ -> "???") doc in
close_in ic;
val_of_docs doc)
| Url s -> (
let doc = Nethtml.parse_document
~dtd:Nethtml.relaxed_html40_dtd
(Lexing.from_string s) in
let doc = Nethtml.decode ~enc:`Enc_utf8 ~subst:(fun _ -> "???") doc in
val_of_docs doc)
......@@ -824,3 +824,6 @@ let parse_schema source =
let schema_of_file fname = parse_schema (Pxp_types.from_file fname)
let schema_of_string s = parse_schema (Pxp_types.from_string s)
......@@ -13,3 +13,6 @@ val schema_of_node: pxp_node -> schema
(** shortand for "parse_schema (Pxp_types.from_file <fname>)" *)
val schema_of_file: string -> schema
(** shortand for "parse_schema (Pxp_types.from_string <fname>)" *)
val schema_of_string: string -> schema
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment