load_xml.ml 2.01 KB
Newer Older
1
2
3
4
5
6
7
8
9
(* Loading XML documents *)

(*TODO: close the file ! *)

open Pxp_yacc
open Pxp_lexer_types
open Pxp_types
open Value

10
11
12
13
14
15
16
17
18
let is_ws s =
  let rec check i = 
    (i < 0) || 
    (match s.[i] with
       | ' ' | '\t' | '\n' | '\r' -> check (i - 1) 
       | _ -> false) in
  check (String.length s - 1)


19
let string s q =
20
  String (0,String.length s,s,q)
21
22
23
24
25
26
27
28
29
30
31
32
33

let run s =
  let config = { default_config with 
		   store_element_positions = false;
		   drop_ignorable_whitespace = true
	       }
  in
  let mgr = create_entity_manager config (from_file s) in
  let next_event = 
    create_pull_parser config (`Entry_document[]) mgr in
  let curr = ref E_end_of_stream in
  let get () =
    match next_event () with 
34
35
      | Some (E_error exn) -> failwith (Pxp_types.string_of_exn exn)
      | Some E_end_of_stream -> failwith "Unexpected end of XML stream"
36
37
38
39
      | Some x -> curr := x 
      | None -> () in

  let rec parse_elt name att =
40
    let att = List.map (fun (l,v) -> Types.LabelPool.mk l, string v nil) att in
41
    let att = SortedMap.from_list (fun _ _ -> assert false) att in
42
    let child = parse_seq true in
43
    
44
    let elt = Xml 
45
		(Atom (Types.AtomPool.mk name),
46
47
48
49
50
51
52
53
		 Pair (Record att, child)
		) in
    (match !curr with
       | E_end_tag (_,_) -> get ()
       | _ -> failwith "Expect end_tag");
    elt
      
      
54
  and parse_seq dropws =
55
56
57
58
    match !curr with
      | E_start_tag (name,att,_) -> 
	  get ();
	  let e1 = parse_elt name att in
59
	  let rest = parse_seq true in
60
61
62
	  Pair (e1, rest)
      | E_char_data data ->
	  get ();
63
64
65
	  if dropws && (is_ws data) 
	  then parse_seq true 
	  else string data (parse_seq false)
66
67
68
69
70
71
72
73
74
75
      | E_end_tag (_,_) ->
	  nil
      | _ -> failwith "Expect start_tag, char_data, or end_tag"
	  
  and parse_doc () =
    match !curr with
      | E_start_tag (name,att,_) ->  get (); parse_elt name att
      | _ -> get (); parse_doc ()
  in
  get ();
76
77
  parse_doc ()

78
79

let run s =
80
  Location.protect_op "load_xml";
81
82
83
84
85
  try run s 
  with exn ->
    raise
      (Location.Generic (Pxp_types.string_of_exn exn))