print_xml.ml 6.59 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
(* Print XML documents *)

(* The write_*_function are inspired from Pxp_aux.ml *)

open Netconversion

let write_markup_string ~to_enc buf s =
  let s' = if to_enc = `Enc_utf8 then s
  else convert
    ~in_enc:`Enc_utf8
    ~out_enc:to_enc
    ~subst:(fun n -> 
	      failwith ("Cannot represent code point " ^ string_of_int n))
    s
  in
16
  buf s'
17 18 19 20 21

let write_data_string ~to_enc buf s =
  let write_part i len =
    if (len > 0) then
      if to_enc = `Enc_utf8 
22
      then buf (String.sub s i len)
23 24 25 26 27 28 29 30
      else
	let s' = 
	  convert
            ~in_enc:`Enc_utf8
            ~out_enc:to_enc
            ~subst:(fun n -> "&#" ^ string_of_int n ^ ";")
	    ~range_pos:i ~range_len:len s
	in
31
	buf s'
32 33 34 35 36 37 38
  in
  let i = ref 0 in
  for k = 0 to String.length s - 1 do
    match s.[k] with
      | ('&' | '<' | '>' | '"' | '%') as c ->
          write_part !i (k - !i);
          begin match c with
39 40 41 42 43
              '&' -> buf "&amp;"
            | '<' -> buf "&lt;"
            | '>' -> buf "&gt;"
            | '"' -> buf "&quot;"
            | '%' -> buf "&#37;"  (* reserved in DTDs *)
44 45 46 47 48 49 50 51 52 53
            | _   -> assert false
          end;
          i := k+1
      | _ -> ()
  done;
  write_part !i (String.length s - !i)


(*************)

54 55

open Value
56
open Ident
57
module U = Encodings.Utf8
58
module H = Hashtbl.Make(Ns.Uri)
59

60
let exn_print_xml = CDuceExn (Pair (
61
				Atom (Atoms.V.mk_ascii "Invalid_argument"),
62
				string_latin1 "print_xml"))
63

64 65 66 67 68 69
let blank = U.mk " "
let true_literal = U.mk "true"
let false_literal = U.mk "false"

  (* @raise exn_print_xml in case of failure. Rationale: schema printing is
   * the last attempt to print a value, others have already failed *)
Pietro Abate's avatar
Pietro Abate committed
70 71 72 73 74 75 76
let rec schema_value ?(recurs=true) ~wds ~wcs v = match v with
  | Abstract ("float",o) ->
      wds (U.mk (string_of_float (Obj.magic o : float)))
  | Abstract ("cdata",o) ->
      wcs (U.mk "<![CDATA[");
      wcs (U.mk (U.get_str (Obj.magic o : U.t)));
      wcs (U.mk "]]>")
77 78 79
  | Record _ as v ->
      (try
        wds (Schema_builtin.string_of_time_type (Value.get_fields v))
80
      with Schema_builtin.Error _ -> raise exn_print_xml)
81 82 83
  | Integer i -> wds (U.mk (Intervals.V.to_string i))
  | v when Value.equal v Value.vtrue -> wds true_literal
  | v when Value.equal v Value.vfalse -> wds false_literal
Pietro Abate's avatar
Pietro Abate committed
84
  | Pair _ as v when recurs -> schema_values ~wds ~wcs v
85
  | String_utf8 _ | String_latin1 _ as v -> wds (fst (get_string_utf8 v))
86 87
  | _ -> raise exn_print_xml

Pietro Abate's avatar
Pietro Abate committed
88
and schema_values ~wds ~wcs v =
89 90
  match v with
  | Pair (hd, Atom a) when a = Sequence.nil_atom ->
Pietro Abate's avatar
Pietro Abate committed
91
      schema_value ~recurs:false ~wds ~wcs hd
92
  | Pair (hd, tl) ->
Pietro Abate's avatar
Pietro Abate committed
93
      schema_value ~recurs:false ~wds ~wcs hd;
94
      wds blank;
Pietro Abate's avatar
Pietro Abate committed
95
      schema_values ~wds ~wcs tl
96 97
  | _ -> raise exn_print_xml

98
let to_buf ~utf8 buffer ns_table v subst = 
99
  let to_enc = if utf8 then `Enc_utf8 else `Enc_iso88591 in
100

101
  let printer = Ns.Printer.printer ns_table in
102

103 104
  let wms = write_markup_string ~to_enc buffer
  and wds s = write_data_string ~to_enc buffer (U.get_str s)
Pietro Abate's avatar
Pietro Abate committed
105
  and wcs s = buffer (U.get_str s) in
106
  let write_att (n,v) =
107
    wms (" " ^ (Ns.Printer.attr printer (Label.value n)) ^ "=\""); wds v; wms "\"" in
108 109 110 111 112
  let write_xmlns (pr,ns) =
    let pr = U.get_str pr in
    if pr = "" then wms " xmlns"
    else (wms " xmlns:"; wms pr);
    wms "=\"";
113
    wds (Ns.Uri.value ns);
114
    wms "\"" in
115

116 117
  let element_start q xmlns attrs = 
    wms ("<" ^ (Ns.Printer.tag printer (Atoms.V.value q))); 
118 119 120
    List.iter write_xmlns xmlns;
    List.iter write_att attrs; 
    wms ">"
121 122
  and empty_element q xmlns attrs = 
    wms ("<" ^ (Ns.Printer.tag printer (Atoms.V.value q))); 
123 124 125
    List.iter write_xmlns xmlns;
    List.iter write_att attrs; 
    wms "/>"
126 127
  and element_end q = 
    wms ("</" ^ (Ns.Printer.tag printer (Atoms.V.value q)) ^ ">")
128
  and document_start () = 
129
(*    wms ("<?xml version='1.0' encoding='" ^
130
	 Netconversion.string_of_encoding to_enc ^
131
	 "'?>\n") *)
132
    ()
133
  in
134

135
  let rec register_elt = function
136 137
    | Xml (Atom q, Record attrs, content) 
    | XmlNs (Atom q, Record attrs, content, _) ->
138
	Imap.iter
139 140
	  (fun n _ -> Ns.Printer.register_qname printer 
	     (Label.value (Label.from_int n)))
141
	  attrs;
142
	Ns.Printer.register_qname printer (Atoms.V.value q);
143 144 145 146 147 148
	register_content content
    | _ -> ()
  and register_content = function
    | String_utf8 (_,_,_,q)
    | String_latin1 (_,_,_,q) -> register_content q
    | Pair (x, q) -> register_elt x; register_content q
149
    | Concat (x,y) -> register_content x; register_content y
150 151 152 153 154
    | _ -> () 
  in
  register_elt v;

  let rec print_elt xmlns = function
155 156
    | Xml (Atom tag, Record attrs, content)
    | XmlNs (Atom tag, Record attrs, content, _) ->
157
	let attrs = Imap.map_elements
158
		      (fun n v -> 
159 160 161 162
                         if is_str v then begin
                           let (s,q) = get_string_utf8 v in
                           match q with
                             | Atom a when a = Sequence.nil_atom -> 
163
                                 (Label.from_int n), s
164 165 166 167
                             | _ -> raise exn_print_xml
                         end else begin
                           let buf = Buffer.create 20 in
                           let wds s = Buffer.add_string buf (U.get_str s) in
Pietro Abate's avatar
Pietro Abate committed
168
                           schema_value ~wds ~wcs:wds v;
169
                           (Label.from_int n, U.mk (Buffer.contents buf))
170
                         end
171
		      ) attrs in
172
	(match content with
173
	  | Atom a when a = Sequence.nil_atom -> empty_element tag xmlns attrs
174
	  | _ ->
175
	      element_start tag xmlns attrs;
176 177
	      print_content content;
	      element_end tag)
178
    | _ -> raise exn_print_xml
179 180 181 182
  and print_content v =
    let (s,q) = get_string_utf8 v in
    wds s;
    match q with
183
      | Pair ((Xml _ | XmlNs _) as x, q) -> print_elt [] x; print_content q
184
      | Atom a when a = Sequence.nil_atom -> ()
Pietro Abate's avatar
Pietro Abate committed
185
      | v -> schema_value ~wds ~wcs v
186
  in
187 188 189 190 191 192
  let uri_subst prefixes replace =
    let h = H.create 10 in
    List.iter (fun (k,v) -> H.replace h k v) replace;
    List.map (fun (pr,ns) -> if H.mem h ns then (pr,H.find h ns) else
	(pr,ns)) prefixes
  in
193
  document_start ();
194 195 196
  match subst with
      [] -> print_elt (Ns.Printer.prefixes printer) v
    | _ -> print_elt (uri_subst (Ns.Printer.prefixes printer) subst) v
197

198
let print_xml ~utf8 ns_table s =
199
  let buf = Buffer.create 32 in
200 201 202 203 204 205 206
  to_buf ~utf8 (Buffer.add_string buf) ns_table s [];
  let s = Buffer.contents buf in
  if utf8 then string_utf8 (U.mk s) else string_latin1 s

let print_xml_subst ~utf8 ns_table s subst =
  let buf = Buffer.create 32 in
  to_buf ~utf8 (Buffer.add_string buf) ns_table s subst;
207
  let s = Buffer.contents buf in
208
  if utf8 then string_utf8 (U.mk s) else string_latin1 s
209
 
210
let dump_xml  ~utf8 ns_table s =
211
  to_buf ~utf8 print_string ns_table s [];
212
  Value.nil