load_xml.ml 1.96 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
(* Loading XML documents *)

(*TODO: close the file ! *)

open Pxp_yacc
open Pxp_lexer_types
open Pxp_types
open Value

let string s q =
  let rec check_ws i = (i < 0) || 
		       (match s.[i] with
			  | ' ' | '\t' | '\n' | '\r' -> check_ws (i - 1) 
			  | _ -> false) in
  if check_ws (String.length s - 1) then q 
  else String (0,String.length s,s,q)

let run s =
  let config = { default_config with 
		   store_element_positions = false;
		   drop_ignorable_whitespace = true
	       }
  in
  let mgr = create_entity_manager config (from_file s) in
  let next_event = 
    create_pull_parser config (`Entry_document[]) mgr in
  let curr = ref E_end_of_stream in
  let get () =
    match next_event () with 
30
31
      | Some (E_error exn) -> failwith (Pxp_types.string_of_exn exn)
      | Some E_end_of_stream -> failwith "Unexpected end of XML stream"
32
33
34
35
      | Some x -> curr := x 
      | None -> () in

  let rec parse_elt name att =
36
    let att = List.map (fun (l,v) -> Types.LabelPool.mk l, string v nil) att in
37
38
39
    let att = SortedMap.from_list (fun _ _ -> assert false) att in
    let child = parse_seq () in
    
40
    let elt = Xml 
41
		(Atom (Types.AtomPool.mk name),
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
		 Pair (Record att, child)
		) in
    (match !curr with
       | E_end_tag (_,_) -> get ()
       | _ -> failwith "Expect end_tag");
    elt
      
      
  and parse_seq () =
    match !curr with
      | E_start_tag (name,att,_) -> 
	  get ();
	  let e1 = parse_elt name att in
	  let rest = parse_seq () in
	  Pair (e1, rest)
      | E_char_data data ->
	  get ();
	  let rest = parse_seq () in
	  string data rest
      | E_end_tag (_,_) ->
	  nil
      | _ -> failwith "Expect start_tag, char_data, or end_tag"
	  
  and parse_doc () =
    match !curr with
      | E_start_tag (name,att,_) ->  get (); parse_elt name att
      | _ -> get (); parse_doc ()
  in
  get ();
71
72
  parse_doc ()

73
74

let run s =
75
  Location.protect_op "load_xml";
76
77
78
79
80
  try run s 
  with exn ->
    raise
      (Location.Generic (Pxp_types.string_of_exn exn))