(* TODO: - attributes ! - clever factorizations of content model and attribute specifs (e.g. type XHTML_inlien = [ ( Char | ... ) ]) - better pretty-printing *) open Printf open Pxp_yacc open Pxp_lexer_types open Pxp_types let mixed_table : ('a,unit) Hashtbl.t = Hashtbl.create 127 let regexp_table : ('a,unit) Hashtbl.t = Hashtbl.create 127 let import_dtd ppf name filename = let rec regexp ppf = function | Optional re -> Format.fprintf ppf "%a?" regexp re | Repeated re -> Format.fprintf ppf "%a*" regexp re | Repeated1 re -> Format.fprintf ppf "%a+" regexp re | Seq (re1 :: res) -> Format.fprintf ppf "(@[%a" regexp re1; List.iter (fun re -> Format.fprintf ppf "@ %a" regexp re) res; Format.fprintf ppf "@])" | Alt (re1 :: res) -> Format.fprintf ppf "(@[%a" regexp re1; List.iter (fun re -> Format.fprintf ppf "@ | %a" regexp re) res; Format.fprintf ppf "@])" | Child s -> Format.fprintf ppf "%s" (name s) | _ -> assert false in let content ppf = function | Unspecified | Any -> Format.fprintf ppf "Any*" | Empty -> Format.fprintf ppf "" | Mixed l -> (try Hashtbl.find mixed_table l; Format.fprintf ppf "MIXED:CACHED!" with Not_found -> (* Hashtbl.add mixed_table l (); *) let l = List.map (function | MPCDATA -> "Char" | MChild s -> name s) l in Format.fprintf ppf "( %s )*" (String.concat " | " l)) | Regexp r -> (try Hashtbl.find regexp_table r; Format.fprintf ppf "REGEXP:CACHED!" with Not_found -> (* Hashtbl.add regexp_table r ();*) regexp ppf r ) in let att_type ppf = function | A_enum l -> Format.fprintf ppf "("; ignore (List.fold_left (fun first s -> if not first then Format.fprintf ppf " | "; Format.fprintf ppf "\"%s\"" s; false) true l); Format.fprintf ppf ")" | _ -> Format.fprintf ppf "String" in let attrib ppf e = ignore (List.fold_left (fun first a -> let (at,ad) = e # attribute a in match ad with | D_fixed _ -> first | _ -> Format.fprintf ppf "%s%s=%s%a" (if first then "" else "; ") a (if ad = D_required then "" else "?") att_type at; false ) true (e # attribute_names) ) in let elt ppf e = Format.fprintf ppf "type @[<2>%s =@ @[<3><%s {|%a|}>[@ @[%a@]@ ]@]@];;@\n" (name (e # name)) (e # name) attrib e content (e # content_model) in let handle = function | E_start_doc(_,_,dtd) -> List.iter (fun x -> elt ppf (dtd # element x)) (dtd # element_names); exit 0 | E_error e -> printf "Error: %s\n" (Pxp_types.string_of_exn e); exit 2 | _ -> () in let config = default_config in let mgr = create_entity_manager config (from_file filename) in let next_event = create_pull_parser config (`Entry_document [`Extend_dtd_fully]) mgr in let event = ref (Some E_end_of_stream) in Format.fprintf ppf "(* This type has been automatically generated from %s by dtd2cduce *)@\n" filename; let rec loop () = match next_event () with | None -> failwith "End of file reached before DTD definition" | Some e -> handle e; loop () in loop () let () = let name s = Sys.argv.(1) ^ s in import_dtd Format.std_formatter name Sys.argv.(2)