load_xml.ml 2.18 KB
Newer Older
1
2
3
4
5
6
7
8
9
(* Loading XML documents *)

(*TODO: close the file ! *)

open Pxp_yacc
open Pxp_lexer_types
open Pxp_types
open Value

10
11
12
13
14
15
16
17
18
let is_ws s =
  let rec check i = 
    (i < 0) || 
    (match s.[i] with
       | ' ' | '\t' | '\n' | '\r' -> check (i - 1) 
       | _ -> false) in
  check (String.length s - 1)


19
let string s q =
20
  String (0,String.length s,s,q)
21
22
23
24
25
26
27
28
29
30
31
32
33

let run s =
  let config = { default_config with 
		   store_element_positions = false;
		   drop_ignorable_whitespace = true
	       }
  in
  let mgr = create_entity_manager config (from_file s) in
  let next_event = 
    create_pull_parser config (`Entry_document[]) mgr in
  let curr = ref E_end_of_stream in
  let get () =
    match next_event () with 
34
35
      | Some (E_error exn) -> failwith (Pxp_types.string_of_exn exn)
      | Some E_end_of_stream -> failwith "Unexpected end of XML stream"
36
37
38
      | Some x -> curr := x 
      | None -> () in

39
40
  let txt = Buffer.create 1024 in

41
  let rec parse_elt name att =
42
    let att = List.map (fun (l,v) -> Types.LabelPool.mk l, string v nil) att in
43
    let att = SortedMap.from_list (fun _ _ -> assert false) att in
44
    let child = parse_seq () in
45
    
46
    let elt = Xml 
47
		(Atom (Types.AtomPool.mk name),
48
49
50
51
52
53
54
		 Pair (Record att, child)
		) in
    (match !curr with
       | E_end_tag (_,_) -> get ()
       | _ -> failwith "Expect end_tag");
    elt
      
55
56
57
58
59

  and dump_txt q =
    let data = Buffer.contents txt in
    Buffer.clear txt;
    if (is_ws data) then q () else string data (q ())
60
      
61
  and parse_seq () =
62
    match !curr with
63
      | E_start_tag (name,att,_) ->
64
	  get ();
65
66
67
68
69
	  dump_txt (fun () ->
		      let e1 = parse_elt name att in
		      let rest = parse_seq () in
		      Pair (e1, rest)
		   )
70
      | E_char_data data ->
71
72
73
	  get();
	  Buffer.add_string txt data;
	  parse_seq ()
74
      | E_end_tag (_,_) ->
75
	  dump_txt (fun () -> nil)
76
77
78
79
80
81
82
83
      | _ -> failwith "Expect start_tag, char_data, or end_tag"
	  
  and parse_doc () =
    match !curr with
      | E_start_tag (name,att,_) ->  get (); parse_elt name att
      | _ -> get (); parse_doc ()
  in
  get ();
84
85
  parse_doc ()

86
87

let run s =
88
  Location.protect_op "load_xml";
89
90
91
92
93
  try run s 
  with exn ->
    raise
      (Location.Generic (Pxp_types.string_of_exn exn))