Commit d5c0b6f0 authored by Pietro Abate's avatar Pietro Abate

[r2003-05-20 22:17:46 by cvscast] Empty log message

Original author: cvscast
Date: 2003-05-20 22:17:47+00:00
parent 999cf81c
DEBUG = -g
OCAMLCP = ocamlc
OCAMLC = ocamlfind $(OCAMLCP) -package $(PACKAGES)
OCAMLOPT = ocamlfind ocamlopt -inline 25 -package $(PACKAGES)
OCAMLOPT = ocamlfind ocamlopt -p -inline 25 -package $(PACKAGES)
# extra options:
# -p (profiling)
......
......@@ -22,6 +22,7 @@ struct
let equal_index = (==)
let mk s = s
let mk_latin1 s = Netconversion.recode_string ~in_enc:`Enc_iso88591 ~out_enc:`Enc_utf8 s
let mk_idx i = i
let get_str s = s
let get_idx i = i
......
......@@ -18,6 +18,7 @@ sig
val equal_index: uindex -> uindex -> bool
val mk: string -> t
val mk_latin1: string -> t
val mk_idx: int -> uindex
val get_str: t -> string
val get_idx: uindex -> int
......
......@@ -9,13 +9,32 @@ open Value
open Ident
open Encodings
let is_ws s =
let rec check i =
(i < 0) ||
(match (String.unsafe_get s i) with
| ' ' | '\t' | '\n' | '\r' -> check (i - 1)
| _ -> false) in
check (String.length s - 1)
type buf =
{ mutable buffer : string;
mutable pos : int;
mutable length : int }
let create n = { buffer = String.create n; pos = 0; length = n }
let resize b n =
let new_len = b.length * 2 + n in
let new_buf = String.create new_len in
String.unsafe_blit b.buffer 0 new_buf 0 b.pos;
b.buffer <- new_buf;
b.length <- new_len
let add_string b s =
let len = String.length s in
let new_pos = b.pos + len in
if new_pos > b.length then resize b len;
String.unsafe_blit s 0 b.buffer b.pos len;
b.pos <- new_pos
let rec only_ws s i =
(i = 0) ||
(let i = pred i in match (String.unsafe_get s i) with
| ' ' | '\t' | '\n' | '\r' -> only_ws s i
| _ -> false)
let string s q =
......@@ -51,7 +70,7 @@ let load_xml_aux s =
| Some x -> curr := x
| None -> () in
let txt = Buffer.create 1024 in
let txt = create 1024 in
let rec parse_elt name att =
let elt = elem name att (parse_seq ()) in
......@@ -61,29 +80,35 @@ let load_xml_aux s =
elt
and dump_txt q =
let data = Buffer.contents txt in
Buffer.clear txt;
if (is_ws data) then q () else string data (q ())
and parse_seq () =
match !curr with
| E_start_tag (name,att,_) ->
get ();
let data = Buffer.contents txt in
Buffer.clear txt;
let e1 = parse_elt name att in
let rest = parse_seq () in
let q = Pair (e1,rest) in
if (is_ws data) then q else string data q
if only_ws txt.buffer txt.pos then
let () = txt.pos <- 0 in
let e1 = parse_elt name att in
let rest = parse_seq () in
Pair (e1,rest)
else
let s = String.sub txt.buffer 0 txt.pos in
let () = txt.pos <- 0 in
let e1 = parse_elt name att in
let rest = parse_seq () in
let q = Pair (e1,rest) in
string s q
| E_char_data data ->
get();
Buffer.add_string txt data;
add_string txt data;
parse_seq ()
| E_end_tag (_,_) ->
let data = Buffer.contents txt in
Buffer.clear txt;
if (is_ws data) then nil else string data nil
if only_ws txt.buffer txt.pos then (txt.pos <- 0; nil)
else
let s = String.sub txt.buffer 0 txt.pos in
txt.pos <- 0;
string s nil
| _ -> failwith "Expect start_tag, char_data, or end_tag"
and parse_doc () =
......@@ -106,7 +131,7 @@ let load_xml s =
let load_html s =
let rec val_of_doc q = function
| Nethtml.Data data ->
if (is_ws data) then q else string data q
if (only_ws data (String.length data)) then q else string data q
| Nethtml.Element (tag, att, child) ->
Pair (elem tag att (val_of_docs child), q)
and val_of_docs = function
......@@ -122,3 +147,4 @@ let load_html s =
let doc = Nethtml.decode ~enc:`Enc_utf8 ~subst:(fun _ -> "???") doc in
close_in ic;
val_of_docs doc
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment