dtd2cduce.ml 3.24 KB
Newer Older
1
2
3
4
5
6
(* TODO: 
    - attributes ! 
    - clever factorizations of content model and attribute specifs
         (e.g.  type XHTML_inlien = [ ( Char | ... ) ])
    - better pretty-printing
*)
7
open Printf
8
9
10
11
open Pxp_yacc
open Pxp_lexer_types
open Pxp_types

12
13
14
let mixed_table : ('a,unit) Hashtbl.t = Hashtbl.create 127
let regexp_table : ('a,unit) Hashtbl.t = Hashtbl.create 127

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
let import_dtd ppf name filename =
  let rec regexp ppf = function
    | Optional re -> Format.fprintf ppf "%a?" regexp re
    | Repeated re -> Format.fprintf ppf "%a*" regexp re
    | Repeated1 re -> Format.fprintf ppf "%a+" regexp re
    | Seq (re1 :: res) -> 
	Format.fprintf ppf "(@[%a" regexp re1;
	List.iter (fun re -> Format.fprintf ppf "@ %a" regexp re) res;
	Format.fprintf ppf "@])"
    | Alt (re1 :: res) -> 
	Format.fprintf ppf "(@[%a" regexp re1;
	List.iter (fun re -> Format.fprintf ppf "@ | %a" regexp re) res;
	Format.fprintf ppf "@])"
    | Child s -> Format.fprintf ppf "%s" (name s)
    | _ -> assert false
  in
  let content ppf = function
    | Unspecified | Any -> Format.fprintf ppf "Any*"
    | Empty -> Format.fprintf ppf ""
    | Mixed l -> 
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
	(try 
	   Hashtbl.find mixed_table l;
	   Format.fprintf ppf "MIXED:CACHED!"
	 with Not_found ->
(*	   Hashtbl.add mixed_table l (); *)
	   let l = List.map 
		     (function 
			| MPCDATA -> "Char"
			| MChild s -> name s) l in
	   Format.fprintf ppf "( %s )*" (String.concat " | " l))
    | Regexp r -> 
	(try 
	   Hashtbl.find regexp_table r;
	   Format.fprintf ppf "REGEXP:CACHED!"
	 with Not_found ->
(*	   Hashtbl.add regexp_table r ();*)
	   regexp ppf r
	)
53
  in    
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
  let att_type ppf = function
    | A_enum l -> 
	Format.fprintf ppf "(";
	ignore 
	  (List.fold_left 
	     (fun first s -> 
		if not first then Format.fprintf ppf " | ";
		Format.fprintf ppf "\"%s\"" s; false) true l);
	Format.fprintf ppf ")"
    | _ -> Format.fprintf ppf "String"
  in
  let attrib ppf e =
    ignore 
      (List.fold_left
	 (fun first a ->
	    let (at,ad) = e # attribute a in
	    match ad with
	      | D_fixed _ -> first
	      | _ ->
		Format.fprintf ppf "%s%s=%s%a"
		  (if first then "" else "; ")
		  a
		  (if ad = D_required then "" else "?")
		  att_type at;
		  false
	 )
	 true (e # attribute_names)
      )
  in
83
  let elt ppf e = 
84
    Format.fprintf ppf "type @[<2>%s =@ @[<3><%s {|%a|}>[@ @[%a@]@ ]@]@];;@\n" 
85
86
      (name (e # name))
      (e # name)
87
      attrib e
88
89
90
91
92
      content (e # content_model)
  in      
  let handle = function
    | E_start_doc(_,_,dtd) ->
	List.iter (fun x -> elt ppf (dtd # element x)) (dtd # element_names);
93
94
95
96
	exit 0
    | E_error e ->
	printf "Error: %s\n" (Pxp_types.string_of_exn e);
	exit 2
97
98
    | _ -> ()
  in
99
100
101
102
103
104
105
106
107
108
109
110


  let config = default_config in
  let mgr = create_entity_manager config (from_file filename) in
  let next_event = 
    create_pull_parser config (`Entry_document [`Extend_dtd_fully]) mgr in
  let event = ref (Some E_end_of_stream) in

  Format.fprintf ppf 
    "(* This type has been automatically generated from %s by dtd2cduce *)@\n"
    filename;

111
112
  let rec loop () =
    match next_event () with
113
      | None -> failwith "End of file reached before DTD definition"
114
115
116
117
118
119
120
121
122
      | Some e -> handle e; loop ()
  in
  loop ()
 
	  

let () =
  let name s = Sys.argv.(1) ^ s in
  import_dtd Format.std_formatter name Sys.argv.(2)