Commit 779babe6 authored by Pietro Abate's avatar Pietro Abate
Browse files

[r2003-05-20 22:17:46 by cvscast] Empty log message

Original author: cvscast
Date: 2003-05-20 22:17:47+00:00
parent 149fc662
DEBUG = -g DEBUG = -g
OCAMLCP = ocamlc OCAMLCP = ocamlc
OCAMLC = ocamlfind $(OCAMLCP) -package $(PACKAGES) OCAMLC = ocamlfind $(OCAMLCP) -package $(PACKAGES)
OCAMLOPT = ocamlfind ocamlopt -inline 25 -package $(PACKAGES) OCAMLOPT = ocamlfind ocamlopt -p -inline 25 -package $(PACKAGES)
# extra options: # extra options:
# -p (profiling) # -p (profiling)
......
...@@ -22,6 +22,7 @@ struct ...@@ -22,6 +22,7 @@ struct
let equal_index = (==) let equal_index = (==)
let mk s = s let mk s = s
let mk_latin1 s = Netconversion.recode_string ~in_enc:`Enc_iso88591 ~out_enc:`Enc_utf8 s let mk_latin1 s = Netconversion.recode_string ~in_enc:`Enc_iso88591 ~out_enc:`Enc_utf8 s
let mk_idx i = i
let get_str s = s let get_str s = s
let get_idx i = i let get_idx i = i
......
...@@ -18,6 +18,7 @@ sig ...@@ -18,6 +18,7 @@ sig
val equal_index: uindex -> uindex -> bool val equal_index: uindex -> uindex -> bool
val mk: string -> t val mk: string -> t
val mk_latin1: string -> t val mk_latin1: string -> t
val mk_idx: int -> uindex
val get_str: t -> string val get_str: t -> string
val get_idx: uindex -> int val get_idx: uindex -> int
......
...@@ -9,13 +9,32 @@ open Value ...@@ -9,13 +9,32 @@ open Value
open Ident open Ident
open Encodings open Encodings
let is_ws s = type buf =
let rec check i = { mutable buffer : string;
(i < 0) || mutable pos : int;
(match (String.unsafe_get s i) with mutable length : int }
| ' ' | '\t' | '\n' | '\r' -> check (i - 1)
| _ -> false) in let create n = { buffer = String.create n; pos = 0; length = n }
check (String.length s - 1)
let resize b n =
let new_len = b.length * 2 + n in
let new_buf = String.create new_len in
String.unsafe_blit b.buffer 0 new_buf 0 b.pos;
b.buffer <- new_buf;
b.length <- new_len
let add_string b s =
let len = String.length s in
let new_pos = b.pos + len in
if new_pos > b.length then resize b len;
String.unsafe_blit s 0 b.buffer b.pos len;
b.pos <- new_pos
let rec only_ws s i =
(i = 0) ||
(let i = pred i in match (String.unsafe_get s i) with
| ' ' | '\t' | '\n' | '\r' -> only_ws s i
| _ -> false)
let string s q = let string s q =
...@@ -51,7 +70,7 @@ let load_xml_aux s = ...@@ -51,7 +70,7 @@ let load_xml_aux s =
| Some x -> curr := x | Some x -> curr := x
| None -> () in | None -> () in
let txt = Buffer.create 1024 in let txt = create 1024 in
let rec parse_elt name att = let rec parse_elt name att =
let elt = elem name att (parse_seq ()) in let elt = elem name att (parse_seq ()) in
...@@ -61,29 +80,35 @@ let load_xml_aux s = ...@@ -61,29 +80,35 @@ let load_xml_aux s =
elt elt
and dump_txt q =
let data = Buffer.contents txt in
Buffer.clear txt;
if (is_ws data) then q () else string data (q ())
and parse_seq () = and parse_seq () =
match !curr with match !curr with
| E_start_tag (name,att,_) -> | E_start_tag (name,att,_) ->
get (); get ();
let data = Buffer.contents txt in if only_ws txt.buffer txt.pos then
Buffer.clear txt; let () = txt.pos <- 0 in
let e1 = parse_elt name att in let e1 = parse_elt name att in
let rest = parse_seq () in let rest = parse_seq () in
let q = Pair (e1,rest) in Pair (e1,rest)
if (is_ws data) then q else string data q else
let s = String.sub txt.buffer 0 txt.pos in
let () = txt.pos <- 0 in
let e1 = parse_elt name att in
let rest = parse_seq () in
let q = Pair (e1,rest) in
string s q
| E_char_data data -> | E_char_data data ->
get(); get();
Buffer.add_string txt data; add_string txt data;
parse_seq () parse_seq ()
| E_end_tag (_,_) -> | E_end_tag (_,_) ->
let data = Buffer.contents txt in if only_ws txt.buffer txt.pos then (txt.pos <- 0; nil)
Buffer.clear txt; else
if (is_ws data) then nil else string data nil let s = String.sub txt.buffer 0 txt.pos in
txt.pos <- 0;
string s nil
| _ -> failwith "Expect start_tag, char_data, or end_tag" | _ -> failwith "Expect start_tag, char_data, or end_tag"
and parse_doc () = and parse_doc () =
...@@ -106,7 +131,7 @@ let load_xml s = ...@@ -106,7 +131,7 @@ let load_xml s =
let load_html s = let load_html s =
let rec val_of_doc q = function let rec val_of_doc q = function
| Nethtml.Data data -> | Nethtml.Data data ->
if (is_ws data) then q else string data q if (only_ws data (String.length data)) then q else string data q
| Nethtml.Element (tag, att, child) -> | Nethtml.Element (tag, att, child) ->
Pair (elem tag att (val_of_docs child), q) Pair (elem tag att (val_of_docs child), q)
and val_of_docs = function and val_of_docs = function
...@@ -122,3 +147,4 @@ let load_html s = ...@@ -122,3 +147,4 @@ let load_html s =
let doc = Nethtml.decode ~enc:`Enc_utf8 ~subst:(fun _ -> "???") doc in let doc = Nethtml.decode ~enc:`Enc_utf8 ~subst:(fun _ -> "???") doc in
close_in ic; close_in ic;
val_of_docs doc val_of_docs doc
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment