print_xml.ml 5.82 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
(* Print XML documents *)

(* The write_*_function are inspired from Pxp_aux.ml *)

open Netconversion

let write_markup_string ~to_enc buf s =
  let s' = if to_enc = `Enc_utf8 then s
  else convert
    ~in_enc:`Enc_utf8
    ~out_enc:to_enc
    ~subst:(fun n -> 
	      failwith ("Cannot represent code point " ^ string_of_int n))
    s
  in
16
  buf s'
17 18 19 20 21

let write_data_string ~to_enc buf s =
  let write_part i len =
    if (len > 0) then
      if to_enc = `Enc_utf8 
22
      then buf (String.sub s i len)
23 24 25 26 27 28 29 30
      else
	let s' = 
	  convert
            ~in_enc:`Enc_utf8
            ~out_enc:to_enc
            ~subst:(fun n -> "&#" ^ string_of_int n ^ ";")
	    ~range_pos:i ~range_len:len s
	in
31
	buf s'
32 33 34 35 36 37 38
  in
  let i = ref 0 in
  for k = 0 to String.length s - 1 do
    match s.[k] with
      | ('&' | '<' | '>' | '"' | '%') as c ->
          write_part !i (k - !i);
          begin match c with
39 40 41 42 43
              '&' -> buf "&amp;"
            | '<' -> buf "&lt;"
            | '>' -> buf "&gt;"
            | '"' -> buf "&quot;"
            | '%' -> buf "&#37;"  (* reserved in DTDs *)
44 45 46 47 48 49 50 51 52 53
            | _   -> assert false
          end;
          i := k+1
      | _ -> ()
  done;
  write_part !i (String.length s - !i)


(*************)

54 55

open Value
56
open Ident
57
module U = Encodings.Utf8
58

59
let exn_print_xml = CDuceExn (Pair (
60
				Atom (Atoms.V.mk_ascii "Invalid_argument"),
61
				string_latin1 "print_xml"))
62

63 64 65 66 67 68
let blank = U.mk " "
let true_literal = U.mk "true"
let false_literal = U.mk "false"

  (* @raise exn_print_xml in case of failure. Rationale: schema printing is
   * the last attempt to print a value, others have already failed *)
69 70 71
let rec schema_value ?(recurs=true) ~wds v = match v with
  | Abstract ("float",f) ->
      wds (U.mk (string_of_float (Obj.magic f : float)))
72 73 74
  | Record _ as v ->
      (try
        wds (Schema_builtin.string_of_time_type (Value.get_fields v))
75
      with Schema_builtin.Error _ -> raise exn_print_xml)
76 77 78 79
  | Integer i -> wds (U.mk (Intervals.V.to_string i))
  | v when Value.equal v Value.vtrue -> wds true_literal
  | v when Value.equal v Value.vfalse -> wds false_literal
  | Pair _ as v when recurs -> schema_values ~wds v
80
  | String_utf8 _ | String_latin1 _ as v -> wds (fst (get_string_utf8 v))
81 82 83 84 85 86 87 88 89 90 91 92
  | _ -> raise exn_print_xml

and schema_values ~wds v =
  match v with
  | Pair (hd, Atom a) when a = Sequence.nil_atom ->
      schema_value ~recurs:false ~wds hd
  | Pair (hd, tl) ->
      schema_value ~recurs:false ~wds hd;
      wds blank;
      schema_values ~wds tl
  | _ -> raise exn_print_xml

93
let to_buf ~utf8 buffer ns_table v = 
94
  let to_enc = if utf8 then `Enc_utf8 else `Enc_iso88591 in
95

96
  let printer = Ns.Printer.printer ns_table in
97

98 99
  let wms = write_markup_string ~to_enc buffer
  and wds s = write_data_string ~to_enc buffer (U.get_str s)
100
  in
101
  let write_att (n,v) =
102
    wms (" " ^ (Ns.Printer.attr printer (Label.value n)) ^ "=\""); wds v; wms "\"" in
103 104 105 106 107
  let write_xmlns (pr,ns) =
    let pr = U.get_str pr in
    if pr = "" then wms " xmlns"
    else (wms " xmlns:"; wms pr);
    wms "=\"";
108
    wds (Ns.Uri.value ns);
109
    wms "\"" in
110

111 112
  let element_start q xmlns attrs = 
    wms ("<" ^ (Ns.Printer.tag printer (Atoms.V.value q))); 
113 114 115
    List.iter write_xmlns xmlns;
    List.iter write_att attrs; 
    wms ">"
116 117
  and empty_element q xmlns attrs = 
    wms ("<" ^ (Ns.Printer.tag printer (Atoms.V.value q))); 
118 119 120
    List.iter write_xmlns xmlns;
    List.iter write_att attrs; 
    wms "/>"
121 122
  and element_end q = 
    wms ("</" ^ (Ns.Printer.tag printer (Atoms.V.value q)) ^ ">")
123
  and document_start () = 
124
(*    wms ("<?xml version='1.0' encoding='" ^
125
	 Netconversion.string_of_encoding to_enc ^
126
	 "'?>\n") *)
127
    ()
128
  in
129

130
  let rec register_elt = function
131 132
    | Xml (Atom q, Record attrs, content) 
    | XmlNs (Atom q, Record attrs, content, _) ->
133
	Imap.iter
134 135
	  (fun n _ -> Ns.Printer.register_qname printer 
	     (Label.value (Label.from_int n)))
136
	  attrs;
137
	Ns.Printer.register_qname printer (Atoms.V.value q);
138 139 140 141 142 143
	register_content content
    | _ -> ()
  and register_content = function
    | String_utf8 (_,_,_,q)
    | String_latin1 (_,_,_,q) -> register_content q
    | Pair (x, q) -> register_elt x; register_content q
144
    | Concat (x,y) -> register_content x; register_content y
145 146 147 148 149
    | _ -> () 
  in
  register_elt v;

  let rec print_elt xmlns = function
150 151
    | Xml (Atom tag, Record attrs, content)
    | XmlNs (Atom tag, Record attrs, content, _) ->
152
	let attrs = Imap.map_elements
153
		      (fun n v -> 
154 155 156 157
                         if is_str v then begin
                           let (s,q) = get_string_utf8 v in
                           match q with
                             | Atom a when a = Sequence.nil_atom -> 
158
                                 (Label.from_int n), s
159 160 161 162 163
                             | _ -> raise exn_print_xml
                         end else begin
                           let buf = Buffer.create 20 in
                           let wds s = Buffer.add_string buf (U.get_str s) in
                           schema_value ~wds v;
164
                           (Label.from_int n, U.mk (Buffer.contents buf))
165
                         end
166
		      ) attrs in
167
	(match content with
168
	  | Atom a when a = Sequence.nil_atom -> empty_element tag xmlns attrs
169
	  | _ ->
170
	      element_start tag xmlns attrs;
171 172
	      print_content content;
	      element_end tag)
173
    | _ -> raise exn_print_xml
174 175 176 177
  and print_content v =
    let (s,q) = get_string_utf8 v in
    wds s;
    match q with
178
      | Pair ((Xml _ | XmlNs _) as x, q) -> print_elt [] x; print_content q
179
      | Atom a when a = Sequence.nil_atom -> ()
180
      | v -> schema_value ~wds v
181 182
  in
  document_start ();
183
  print_elt (Ns.Printer.prefixes printer) v
184

185
let print_xml ~utf8 ns_table s =
186 187 188
  let buf = Buffer.create 32 in
  to_buf ~utf8 (Buffer.add_string buf) ns_table s;
  let s = Buffer.contents buf in
189
  if utf8 then string_utf8 (U.mk s) else string_latin1 s
190
 
191 192 193
let dump_xml  ~utf8 ns_table s =
  to_buf ~utf8 print_string ns_table s;
  Value.nil