load_xml.ml 1.67 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
(* Loading XML documents *)

(*TODO: close the file ! *)

open Pxp_yacc
open Pxp_lexer_types
open Pxp_types
open Value

let string s q =
  let rec check_ws i = (i < 0) || 
		       (match s.[i] with
			  | ' ' | '\t' | '\n' | '\r' -> check_ws (i - 1) 
			  | _ -> false) in
  if check_ws (String.length s - 1) then q 
  else String (0,String.length s,s,q)

let run s =
  let config = { default_config with 
		   store_element_positions = false;
		   drop_ignorable_whitespace = true
	       }
  in
  let mgr = create_entity_manager config (from_file s) in
  let next_event = 
    create_pull_parser config (`Entry_document[]) mgr in
  let curr = ref E_end_of_stream in
  let get () =
    match next_event () with 
      | Some x -> curr := x 
      | None -> () in

  let rec parse_elt name att =
    let att = List.map (fun (l,v) -> Types.label l, string v nil) att in
    let att = SortedMap.from_list (fun _ _ -> assert false) att in
    let child = parse_seq () in
    
    let elt = Pair 
		(Atom (Types.mk_atom name),
		 Pair (Record att, child)
		) in
    (match !curr with
       | E_end_tag (_,_) -> get ()
       | _ -> failwith "Expect end_tag");
    elt
      
      
  and parse_seq () =
    match !curr with
      | E_start_tag (name,att,_) -> 
	  get ();
	  let e1 = parse_elt name att in
	  let rest = parse_seq () in
	  Pair (e1, rest)
      | E_char_data data ->
	  get ();
	  let rest = parse_seq () in
	  string data rest
      | E_end_tag (_,_) ->
	  nil
      | _ -> failwith "Expect start_tag, char_data, or end_tag"
	  
  and parse_doc () =
    match !curr with
      | E_start_tag (name,att,_) ->  get (); parse_elt name att
      | _ -> get (); parse_doc ()
  in
  get ();
69
70
  parse_doc ()