open Printf open Encodings open Schema_pcre open Schema_types let xsd = Schema_xml.xsd let no_facets = { length = None; minLength = None; maxLength = None; (* pattern = []; *) enumeration = None; whiteSpace = `Collapse, true; maxInclusive = None; maxExclusive = None; minInclusive = None; minExclusive = None; (* totalDigits = None; fractionDigits = None; *) } (** naive implementation: doesn't follow XML Schema constraints on facets * merging. Here all new facets override old ones *) let merge_facets old_facets new_facets = let maxInclusive, maxExclusive = match new_facets.maxInclusive, new_facets.maxExclusive with | None, None -> old_facets.maxInclusive, old_facets.maxExclusive | Some _, Some _ -> assert false | v -> v in let minInclusive, minExclusive = match new_facets.minInclusive, new_facets.minExclusive with | None, None -> old_facets.minInclusive, old_facets.minExclusive | Some _, Some _ -> assert false | v -> v in { old_facets with length = (match new_facets.length with | None -> old_facets.length | v -> v); minLength = (match new_facets.minLength with | None -> old_facets.minLength | v -> v); maxLength = (match new_facets.maxLength with | None -> old_facets.maxLength | v -> v); enumeration = (match new_facets.enumeration with | None -> old_facets.enumeration | v -> v); whiteSpace = new_facets.whiteSpace; maxInclusive = maxInclusive; maxExclusive = maxExclusive; minInclusive = minInclusive; minExclusive = minExclusive; } let rec facets_of_simple_type_definition st = st.st_facets let rec variety_of_simple_type_definition st = st.st_variety (* let get_simple_type c = match Lazy.force c with | Simple c -> c | AnyType -> Primitive (xsd,Utf8.mk "anySimpleType") | _ -> assert false *) (* let rec normalize_simple_type = function | Derived (name, Restrict, new_facets, base) -> (match normalize_simple_type (get_simple_type base) with | Derived (_,variety,old_facets,base) -> Derived (name,variety,merge_facets old_facets new_facets,base) | Primitive _ as st -> let b = lazy (Simple st) in Derived (name,Atomic b,new_facets,b)) | st -> st *) let name_of_element_declaration elt = elt.elt_name let name_of_simple_type_definition = function | { st_name = Some name } -> name | _ -> raise (Invalid_argument "anonymous simple type definition") let name_of_complex_type_definition = function | { ct_name = Some name } -> name | _ -> raise (Invalid_argument "anonymous complex type definition") let name_of_type_definition = function | AnyType -> (xsd, Utf8.mk "anyType") | Simple st -> name_of_simple_type_definition st | Complex ct -> name_of_complex_type_definition ct let name_of_attribute_declaration a = a.attr_name let name_of_attribute_use { attr_decl = { attr_name = name } } = name let name_of_attribute_group_definition ag = ag.ag_name let name_of_model_group_definition mg = mg.mg_name let name_of_particle = function | { part_term = Elt e } -> name_of_element_declaration e | _ -> assert false let simple_type_of_type = function | Simple s -> s | _ -> raise (Invalid_argument "simple_type_of_type") let complex_type_of_type = function | Complex c -> c | _ -> raise (Invalid_argument "complex_type_of_type") let content_type_of_type = function | AnyType -> assert false | Complex { ct_content = ct } -> ct | Simple st -> CT_simple st let iter_types schema f = List.iter f schema.types let iter_attributes schema f = List.iter f schema.attributes let iter_elements schema f = List.iter f schema.elements let iter_attribute_groups schema f = List.iter f schema.attribute_groups let iter_model_groups schema f = List.iter f schema.model_groups exception XSD_validation_error of string exception XSI_validation_error of string let rec normalize_white_space = let ws_RE = pcre_regexp "[\t\r\n]" in let spaces_RE = pcre_regexp "[ ]+" in let margins_RE = pcre_regexp "^ (.*) $" in fun handling s -> match handling with | `Preserve -> s | `Replace -> pcre_replace ~rex:ws_RE ~templ:(Utf8.mk " ") s | `Collapse -> let s' = pcre_replace ~rex:spaces_RE ~templ:(Utf8.mk " ") (normalize_white_space `Replace s) in pcre_replace ~rex:margins_RE ~templ:(Utf8.mk "$1") s' (* let anySimpleType = Primitive (xsd, Utf8.mk "anySimpleType") *) let anyType = AnyType let first_of_particle p = p.part_first let nullable p = p.part_nullable let first_of_wildcard_constraint = function | WAny -> Atoms.any | WNot ns -> Atoms.diff Atoms.any (Atoms.any_in_ns ns) | WOne l -> List.fold_left (fun acc ns -> Atoms.cup acc (Atoms.any_in_ns ns)) Atoms.empty l let first_of_model_group = function | All particles | Choice particles -> List.fold_left (fun acc p -> Atoms.cup acc (first_of_particle p)) Atoms.empty particles | Sequence particles -> let rec aux = function | hd::tl when nullable hd -> Atoms.cup (first_of_particle hd) (aux tl) | hd::tl -> first_of_particle hd | [] -> Atoms.empty in aux particles let nullable_of_model_group = function | All particles | Sequence particles -> List.for_all nullable particles | Choice particles -> List.exists nullable particles let get_interval facets = (* ASSUMPTION: * not (facets.minInclusive = Some _ && facets.minExclusive = Some _) * not (facets.maxInclusive = Some _ && facets.maxExclusive = Some _) * Value.t is an integer! (no other intervals are actually supported * by the CDuce type system) *) let getint f = Value.get_integer f in let min = match facets.minInclusive, facets.minExclusive with | Some (i, _), None -> Some (getint i) | None, Some (i, _) -> Some (Intervals.V.succ (getint i)) | None, None -> None | _ -> assert false in let max = match facets.maxInclusive, facets.maxExclusive with | Some (i, _), None -> Some (getint i) | None, Some (i, _) -> Some (Intervals.V.pred (getint i)) | None, None -> None | _ -> assert false in match min, max with | Some min, Some max -> Intervals.bounded min max | Some min, None -> Intervals.right min | None, Some max -> Intervals.left max | None, None -> Intervals.any let print_simple_type fmt = function | { st_name = Some name } -> Format.fprintf fmt "%a" Ns.QName.print name | _ -> Format.fprintf fmt "unnamed" let print_complex_type fmt = function | { ct_uid = id; ct_name = Some name } -> Format.fprintf fmt "%d:%a" id Ns.QName.print name | { ct_uid = id } -> Format.fprintf fmt "%d:unnamed'" id let print_type fmt = function | AnyType -> Format.fprintf fmt "xsd:anyType" | Simple t -> Format.fprintf fmt "S:%a" print_simple_type t | Complex t -> Format.fprintf fmt "C:%a" print_complex_type t let print_attribute fmt { attr_name = name; attr_typdef = t } = Format.fprintf fmt "@@%a:%a" Ns.QName.print name print_simple_type t let print_element fmt { elt_uid = id; elt_name = name } = Format.fprintf fmt "E:%d:<%a>" id Ns.QName.print name let print_attributes fmt = List.iter (Format.fprintf fmt "%a" print_attribute) let print_attribute_group fmt ag = Format.fprintf fmt "{agroup:%a}" Ns.QName.print ag.ag_name let print_model_group_def fmt mg = Format.fprintf fmt "{mgroup:%a}" Ns.QName.print mg.mg_name let print_schema fmt schema = let defined_types = (* filter out built-in types *) List.filter (fun t -> let (ns,_) = name_of_type_definition t in not (Ns.equal ns xsd)) schema.types in if defined_types <> [] then begin Format.fprintf fmt "Types: "; List.iter (fun c -> print_type fmt c; Format.fprintf fmt " ") defined_types; Format.fprintf fmt "\n" end; if schema.attributes <> [] then begin Format.fprintf fmt "Attributes: "; List.iter (fun c -> print_attribute fmt c; Format.fprintf fmt " ") schema.attributes; Format.fprintf fmt "\n" end; if schema.elements <> [] then begin Format.fprintf fmt "Elements: "; List.iter (fun c -> print_element fmt c; Format.fprintf fmt " ") schema.elements; Format.fprintf fmt "\n" end; if schema.attribute_groups <> [] then begin Format.fprintf fmt "Attribute groups: "; List.iter (fun c -> print_attribute_group fmt c; Format.fprintf fmt " ") schema.attribute_groups; Format.fprintf fmt "\n" end; if schema.model_groups <> [] then begin Format.fprintf fmt "Model groups: "; List.iter (fun c -> print_model_group_def fmt c; Format.fprintf fmt " ") schema.model_groups; Format.fprintf fmt "\n" end let get_qual name table get_name = List.find (fun x -> try Ns.QName.equal (get_name x) name with Invalid_argument _ -> false) table let get_type name schema = get_qual name schema.types name_of_type_definition let get_attribute name schema = get_qual name schema.attributes name_of_attribute_declaration let get_element name schema = get_qual name schema.elements name_of_element_declaration let get_attribute_group name schema = get_qual name schema.attribute_groups name_of_attribute_group_definition let get_model_group name schema = get_qual name schema.model_groups name_of_model_group_definition (* policy for unqualified schema component resolution. The order should * be consistent with Typer.find_schema_descr *) let get_component kind name schema = let rec tries = function | [] -> raise Not_found | hd :: tl -> (try hd () with Not_found -> tries tl) in let elt () = Element (get_element name schema) in let typ () = Type (get_type name schema) in let att () = Attribute (get_attribute name schema) in let att_group () = Attribute_group (get_attribute_group name schema) in let mod_group () = Model_group (get_model_group name schema) in match kind with | Some `Element -> elt () | Some `Type -> typ () | Some `Attribute -> att () | Some `Attribute_group -> att_group () | Some `Model_group -> mod_group () | None -> tries [ elt; typ; att; att_group; mod_group ] let string_of_component_kind (kind: component_kind) = match kind with | Some `Type -> "type" | Some `Element -> "element" | Some `Attribute -> "attribute" | Some `Attribute_group -> "attribute group" | Some `Model_group -> "model group" | None -> "component" (** Events *) type to_be_visited = | Fully of Value.t (* xml values still to be visited *) | Half of Value.t (* xml values half visited (i.e. E_start_tag generated) *) | Other of Encodings.Utf8.t (* other values *) | Backlog of event (* old events not yet delivered *) let stream_of_value v = let stack = ref [Fully v] in let f _ = (* lazy visit of a tree of CDuce XML values, stack keeps track of what has still to be visited *) match !stack with | (Fully ((Value.Xml (Value.Atom atom, attrs, _)) as v)) :: tl -> stack := (Half v) :: tl; let children = ref [] in (* TODO inefficient *) let push v s = (s := v :: !s) in Value.iter_xml (fun pcdata -> push (Other pcdata) children) (fun v -> match v with | (Value.Xml (_, _, _)) as v -> push (Fully v) children | v -> raise (Invalid_argument "Schema_events.stream_of_value")) v; stack := (List.rev !children) @ !stack; List.iter (* push attributes as events on the stack *) (fun (qname, v) -> push (Backlog (E_attribute (qname, fst (Value.get_string_utf8 v)))) stack) (Value.get_fields attrs); Some (E_start_tag (Atoms.V.value atom)) | (Half (Value.Xml (Value.Atom atom, _, _))) :: tl -> stack := tl; Some (E_end_tag (Atoms.V.value atom)) | (Fully (Value.Xml (_, _, _)))::_ | (Half (Value.Xml (_, _, _)))::_ -> failwith "Schema_xml.pxp_stream_of_value: non-atom-tag xml value" | (Backlog ev) :: tl -> (* consume backlog *) stack := tl; Some ev | (Other v) :: tl -> stack := tl; Some (E_char_data v) | [] -> None | _ -> failwith "Non XML element" in Stream.from f let string_of_event = function | E_start_tag qname -> sprintf "<%s>" (Ns.QName.to_string qname) | E_end_tag qname -> sprintf "" (Ns.QName.to_string qname) | E_attribute (qname, value) -> sprintf "@%s=%s" (Ns.QName.to_string qname) (Utf8.to_string value) | E_char_data value -> Utf8.to_string value (* let test v = let s = stream_of_value v in let rec aux () = (match Stream.peek s with | None -> () | Some (E_start_tag qname) -> Ns.QName.print Format.std_formatter qname | Some (E_end_tag qname) -> Format.fprintf Format.std_formatter "/"; Ns.QName.print Format.std_formatter qname | Some (E_attribute (qname, value)) -> Format.fprintf Format.std_formatter "@@"; Ns.QName.print Format.std_formatter qname; Format.fprintf Format.std_formatter " "; Encodings.Utf8.print Format.std_formatter value | Some (E_char_data value) -> Encodings.Utf8.print Format.std_formatter value); Format.fprintf Format.std_formatter "\n"; (match Stream.peek s with | None -> () | _ -> Stream.junk s; aux ()) in aux () *) let rec print_model_group ppf = function | All pl -> Format.fprintf ppf "All(%a)" print_particle_list pl | Choice pl -> Format.fprintf ppf "Choice(%a)" print_particle_list pl | Sequence pl -> Format.fprintf ppf "Sequence(%a)" print_particle_list pl and print_particle_list ppf = function | [] -> () | [p] -> print_particle ppf p | hd::tl -> Format.fprintf ppf "%a;%a" print_particle hd print_particle_list tl and print_particle ppf p = print_term ppf p.part_term and print_term ppf = function | Elt e -> Format.fprintf ppf "E%i" e.elt_uid | Model m -> print_model_group ppf m | Wildcard _ -> Format.fprintf ppf "Wildcard" let simple_restrict name base new_facets = { st_name = name; st_variety = base.st_variety; st_facets = merge_facets base.st_facets new_facets; st_base = Some base } let simple_list name item = { st_name = name; st_variety = List item; st_facets = no_facets; st_base = None } let simple_union name members = { st_name = name; st_variety = Union members; st_facets = no_facets; st_base = None } let xsi_nil_qname = (Schema_xml.xsi,Utf8.mk "nil") let xsi_nil_atom = Atoms.V.of_qname xsi_nil_qname let xsi_nil_type = Types.atom (Atoms.atom xsi_nil_atom) let xsi_nil_label = Ident.LabelPool.mk xsi_nil_qname let merge_attribute_uses l = List.fold_left (fun (l,a) (l',a') -> (l @ l', a || a')) ([],false) l