dtd2cduce.ml 2.19 KB
Newer Older
1
2
3
4
5
6
(* TODO: 
    - attributes ! 
    - clever factorizations of content model and attribute specifs
         (e.g.  type XHTML_inlien = [ ( Char | ... ) ])
    - better pretty-printing
*)
7
open Printf
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
open Pxp_yacc
open Pxp_lexer_types
open Pxp_types

let import_dtd ppf name filename =
  let rec regexp ppf = function
    | Optional re -> Format.fprintf ppf "%a?" regexp re
    | Repeated re -> Format.fprintf ppf "%a*" regexp re
    | Repeated1 re -> Format.fprintf ppf "%a+" regexp re
    | Seq (re1 :: res) -> 
	Format.fprintf ppf "(@[%a" regexp re1;
	List.iter (fun re -> Format.fprintf ppf "@ %a" regexp re) res;
	Format.fprintf ppf "@])"
    | Alt (re1 :: res) -> 
	Format.fprintf ppf "(@[%a" regexp re1;
	List.iter (fun re -> Format.fprintf ppf "@ | %a" regexp re) res;
	Format.fprintf ppf "@])"
    | Child s -> Format.fprintf ppf "%s" (name s)
    | _ -> assert false
  in
  let content ppf = function
    | Unspecified | Any -> Format.fprintf ppf "Any*"
    | Empty -> Format.fprintf ppf ""
    | Mixed l -> 
	let l = List.map 
		  (function 
		     | MPCDATA -> "Char"
		     | MChild s -> name s) l in
36
	Format.fprintf ppf "( %s )*" (String.concat " | " l)
37
38
39
    | Regexp r -> regexp ppf r
  in    
  let elt ppf e = 
40
    Format.fprintf ppf "type @[<2>%s =@ @[<3><%s>[@ @[%a@]@ ]@]@];;@\n" 
41
42
43
44
45
46
47
      (name (e # name))
      (e # name)
      content (e # content_model)
  in      
  let handle = function
    | E_start_doc(_,_,dtd) ->
	List.iter (fun x -> elt ppf (dtd # element x)) (dtd # element_names);
48
49
50
51
	exit 0
    | E_error e ->
	printf "Error: %s\n" (Pxp_types.string_of_exn e);
	exit 2
52
53
    | _ -> ()
  in
54
55
56
57
58
59
60
61
62
63
64
65


  let config = default_config in
  let mgr = create_entity_manager config (from_file filename) in
  let next_event = 
    create_pull_parser config (`Entry_document [`Extend_dtd_fully]) mgr in
  let event = ref (Some E_end_of_stream) in

  Format.fprintf ppf 
    "(* This type has been automatically generated from %s by dtd2cduce *)@\n"
    filename;

66
67
  let rec loop () =
    match next_event () with
68
      | None -> failwith "End of file reached before DTD definition"
69
70
71
72
73
74
75
76
77
      | Some e -> handle e; loop ()
  in
  loop ()
 
	  

let () =
  let name s = Sys.argv.(1) ^ s in
  import_dtd Format.std_formatter name Sys.argv.(2)