open Printf open Encodings open Schema_pcre open Schema_types let no_facets = { length = None; minLength = None; maxLength = None; (* pattern = []; *) enumeration = None; whiteSpace = `Collapse, true; maxInclusive = None; maxExclusive = None; minInclusive = None; minExclusive = None; (* totalDigits = None; fractionDigits = None; *) } (** naive implementation: doesn't follow XML Schema constraints on facets * merging. Here all new facets override old ones *) let merge_facets old_facets new_facets = let maxInclusive, maxExclusive = match new_facets.maxInclusive, new_facets.maxExclusive with | None, None -> old_facets.maxInclusive, old_facets.maxExclusive | Some _, Some _ -> assert false | v -> v in let minInclusive, minExclusive = match new_facets.minInclusive, new_facets.minExclusive with | None, None -> old_facets.minInclusive, old_facets.minExclusive | Some _, Some _ -> assert false | v -> v in { old_facets with length = (match new_facets.length with | None -> old_facets.length | v -> v); minLength = (match new_facets.minLength with | None -> old_facets.minLength | v -> v); maxLength = (match new_facets.maxLength with | None -> old_facets.maxLength | v -> v); enumeration = (match new_facets.enumeration with | None -> old_facets.enumeration | v -> v); whiteSpace = new_facets.whiteSpace; maxInclusive = maxInclusive; maxExclusive = maxExclusive; minInclusive = minInclusive; minExclusive = minExclusive; } let rec facets_of_simple_type_definition = function | Primitive _ -> no_facets | Derived (_, _, facets, _) -> facets let rec variety_of_simple_type_definition = function | (Primitive name) as st -> Atomic (ref (Simple st)) | Derived (_, variety, _, _) -> variety let get_simple_type = function | { contents = Simple c } -> c | { contents = AnyType } -> Primitive (Utf8.mk "xsd:anySimpleType") | _ -> assert false let rec normalize_simple_type = function | Derived (name, Restrict, new_facets, base) -> (match normalize_simple_type (get_simple_type base) with | Derived (_,variety,old_facets,base) -> Derived (name,variety,merge_facets old_facets new_facets,base) | Primitive _ as st -> let b = ref (Simple st) in Derived (name,Atomic b,new_facets,b)) | st -> st let name_of_element_declaration elt = elt.elt_name let name_of_simple_type_definition = function | Primitive name -> name | Derived (Some name, _, _, _) -> name | _ -> raise (Invalid_argument "anonymous simple type definition") let name_of_complex_type_definition = function | { ct_name = Some name } -> name | _ -> raise (Invalid_argument "anonymous complex type definition") let name_of_type_definition = function | AnyType -> Encodings.Utf8.mk "xsd:anyType" | Simple st -> name_of_simple_type_definition st | Complex ct -> name_of_complex_type_definition ct let name_of_attribute_declaration a = a.attr_name let name_of_attribute_use { attr_decl = { attr_name = name } } = name let name_of_attribute_group_definition ag = ag.ag_name let name_of_model_group_definition mg = mg.mg_name let name_of_particle = function | (_, _, Elt elt_decl_ref, _) -> name_of_element_declaration !elt_decl_ref | _ -> assert false let variety_of_simple_type_definition = function | (Primitive name) as st -> Atomic (ref (Simple st)) | Derived (_, variety, _, _) -> variety let simple_type_of_type = function | Simple s -> s | _ -> raise (Invalid_argument "simple_type_of_type") let complex_type_of_type = function | Complex c -> c | _ -> raise (Invalid_argument "complex_type_of_type") let content_type_of_type = function | AnyType -> assert false | Complex { ct_content = ct } -> ct | Simple st -> CT_simple (ref (Simple st)) let iter_types schema f = List.iter f schema.types let iter_attributes schema f = List.iter f schema.attributes let iter_elements schema f = List.iter f schema.elements let iter_attribute_groups schema f = List.iter f schema.attribute_groups let iter_model_groups schema f = List.iter f schema.model_groups exception XSD_validation_error of string exception XSI_validation_error of string let rec normalize_white_space = let ws_RE = pcre_regexp "[\t\r\n]" in let spaces_RE = pcre_regexp "[ ]+" in let margins_RE = pcre_regexp "^ (.*) $" in fun handling s -> match handling with | `Preserve -> s | `Replace -> pcre_replace ~rex:ws_RE ~templ:(Utf8.mk " ") s | `Collapse -> let s' = pcre_replace ~rex:spaces_RE ~templ:(Utf8.mk " ") (normalize_white_space `Replace s) in pcre_replace ~rex:margins_RE ~templ:(Utf8.mk "$1") s' let anySimpleType = Primitive (Encodings.Utf8.mk "xsd:anySimpleType") let anyType = AnyType let first_of_particle (_, _, _, first) = first let nullable p = List.mem None (first_of_particle p) let first_of_model_group = function | All particles | Choice particles -> List.concat (List.map first_of_particle particles) | Sequence particles -> let rec aux = function | hd :: tl when nullable hd -> (first_of_particle hd) @ (aux tl) | hd :: tl -> first_of_particle hd | [] -> [] in aux particles let rec is_in_first tag = function | [] -> false | Some tag' :: rest when Utf8.equal tag' tag -> true | _ :: rest -> is_in_first tag rest let get_interval facets = (* ASSUMPTION: * not (facets.minInclusive = Some _ && facets.minExclusive = Some _) * not (facets.maxInclusive = Some _ && facets.maxExclusive = Some _) * Value.t is an integer! (no other intervals are actually supported * by the CDuce type system) *) let getint f = Value.get_integer (Lazy.force f) in let min = match facets.minInclusive, facets.minExclusive with | Some (i, _), None -> Some (getint i) | None, Some (i, _) -> Some (Intervals.V.succ (getint i)) | None, None -> None | _ -> assert false in let max = match facets.maxInclusive, facets.maxExclusive with | Some (i, _), None -> Some (getint i) | None, Some (i, _) -> Some (Intervals.V.pred (getint i)) | None, None -> None | _ -> assert false in match min, max with | Some min, Some max -> Intervals.bounded min max | Some min, None -> Intervals.right min | None, Some max -> Intervals.left max | None, None -> Intervals.any let print_simple_type fmt = function | Primitive name -> Format.fprintf fmt "%a" Encodings.Utf8.dump name | Derived (Some name, _, _, _) -> Format.fprintf fmt "%a'" Encodings.Utf8.dump name | Derived (None, _, _, _) -> Format.fprintf fmt "unnamed'" let print_complex_type fmt = function | { ct_uid = id; ct_name = Some name } -> Format.fprintf fmt "%d:%a" id Encodings.Utf8.dump name | { ct_uid = id } -> Format.fprintf fmt "%d:unnamed'" id let print_type fmt = function | AnyType -> Format.fprintf fmt "xsd:anyType" | Simple t -> Format.fprintf fmt "S:%a" print_simple_type t | Complex t -> Format.fprintf fmt "C:%a" print_complex_type t let print_attribute fmt { attr_name = name; attr_typdef = t } = Format.fprintf fmt "@@%a:%a" Utf8.dump name print_simple_type (get_simple_type t) let print_element fmt { elt_uid = id; elt_name = name } = Format.fprintf fmt "E:%d:<%a>" id Utf8.dump name let print_attributes fmt = List.iter (Format.fprintf fmt "%a" print_attribute) let print_attribute_group fmt ag = Format.fprintf fmt "{agroup:%a}" Utf8.dump ag.ag_name let print_model_group fmt mg = Format.fprintf fmt "{mgroup:%a}" Utf8.dump mg.mg_name let print_schema fmt schema = let defined_types = (* filter out built-in types *) List.filter (fun def -> not (Schema_xml.has_xsd_prefix (name_of_type_definition def))) schema.types in if defined_types <> [] then begin Format.fprintf fmt "Types: "; List.iter (fun c -> print_type fmt c; Format.fprintf fmt " ") defined_types; Format.fprintf fmt "\n" end; if schema.attributes <> [] then begin Format.fprintf fmt "Attributes: "; List.iter (fun c -> print_attribute fmt c; Format.fprintf fmt " ") schema.attributes; Format.fprintf fmt "\n" end; if schema.elements <> [] then begin Format.fprintf fmt "Elements: "; List.iter (fun c -> print_element fmt c; Format.fprintf fmt " ") schema.elements; Format.fprintf fmt "\n" end; if schema.attribute_groups <> [] then begin Format.fprintf fmt "Attribute groups: "; List.iter (fun c -> print_attribute_group fmt c; Format.fprintf fmt " ") schema.attribute_groups; Format.fprintf fmt "\n" end; if schema.model_groups <> [] then begin Format.fprintf fmt "Model groups: "; List.iter (fun c -> print_model_group fmt c; Format.fprintf fmt " ") schema.model_groups; Format.fprintf fmt "\n" end let get_type name schema = List.find (fun x -> try name_of_type_definition x = name with Invalid_argument _ -> false) schema.types let get_attribute name schema = List.find (fun x -> try name_of_attribute_declaration x = name with Invalid_argument _ -> false) schema.attributes let get_element name schema = List.find (fun x -> try name_of_element_declaration x = name with Invalid_argument _ -> false) schema.elements let get_attribute_group name schema = List.find (fun x -> try name_of_attribute_group_definition x = name with Invalid_argument _ -> false) schema.attribute_groups let get_model_group name schema = List.find (fun x -> try name_of_model_group_definition x = name with Invalid_argument _ -> false) schema.model_groups (* policy for unqualified schema component resolution. The order should * be consistent with Typer.find_schema_descr *) let get_component kind name schema = let rec tries = function | [] -> raise Not_found | hd :: tl -> (try hd () with Not_found -> tries tl) in let elt () = Element (get_element name schema) in let typ () = Type (get_type name schema) in let att () = Attribute (get_attribute name schema) in let att_group () = Attribute_group (get_attribute_group name schema) in let mod_group () = Model_group (get_model_group name schema) in match kind with | Some `Element -> elt () | Some `Type -> typ () | Some `Attribute -> att () | Some `Attribute_group -> att_group () | Some `Model_group -> mod_group () | None -> tries [ elt; typ; att; att_group; mod_group ] let string_of_component_kind (kind: component_kind) = match kind with | Some `Type -> "type" | Some `Element -> "element" | Some `Attribute -> "attribute" | Some `Attribute_group -> "attribute group" | Some `Model_group -> "model group" | None -> "component" (** Events *) type to_be_visited = | Fully of Value.t (* xml values still to be visited *) | Half of Value.t (* xml values half visited (i.e. E_start_tag generated) *) | Other of Encodings.Utf8.t (* other values *) | Backlog of event (* old events not yet delivered *) let stream_of_value v = let stack = ref [Fully v] in let f _ = (* lazy visit of a tree of CDuce XML values, stack keeps track of what has still to be visited *) match !stack with | (Fully ((Value.Xml (Value.Atom atom, attrs, _)) as v)) :: tl -> stack := (Half v) :: tl; let children = ref [] in (* TODO inefficient *) let push v s = (s := v :: !s) in Value.iter_xml (fun pcdata -> push (Other pcdata) children) (fun v -> match v with | (Value.Xml (_, _, _)) as v -> push (Fully v) children | v -> raise (Invalid_argument "Schema_events.stream_of_value")) v; stack := (List.rev !children) @ !stack; List.iter (* push attributes as events on the stack *) (fun (qname, v) -> push (Backlog (E_attribute (qname, fst (Value.get_string_utf8 v)))) stack) (Value.get_fields attrs); Some (E_start_tag (Atoms.V.value atom)) | (Half (Value.Xml (Value.Atom atom, _, _))) :: tl -> stack := tl; Some (E_end_tag (Atoms.V.value atom)) | (Fully (Value.Xml (_, _, _)))::_ | (Half (Value.Xml (_, _, _)))::_ -> failwith "Schema_xml.pxp_stream_of_value: non-atom-tag xml value" | (Backlog ev) :: tl -> (* consume backlog *) stack := tl; Some ev | (Other v) :: tl -> stack := tl; Some (E_char_data v) | [] -> None | _ -> failwith "Non XML element" in Stream.from f let string_of_event = function | E_start_tag qname -> sprintf "<%s>" (Ns.QName.to_string qname) | E_end_tag qname -> sprintf "" (Ns.QName.to_string qname) | E_attribute (qname, value) -> sprintf "@%s=%s" (Ns.QName.to_string qname) (Utf8.to_string value) | E_char_data value -> Utf8.to_string value (* let test v = let s = stream_of_value v in let rec aux () = (match Stream.peek s with | None -> () | Some (E_start_tag qname) -> Ns.QName.print Format.std_formatter qname | Some (E_end_tag qname) -> Format.fprintf Format.std_formatter "/"; Ns.QName.print Format.std_formatter qname | Some (E_attribute (qname, value)) -> Format.fprintf Format.std_formatter "@@"; Ns.QName.print Format.std_formatter qname; Format.fprintf Format.std_formatter " "; Encodings.Utf8.print Format.std_formatter value | Some (E_char_data value) -> Encodings.Utf8.print Format.std_formatter value); Format.fprintf Format.std_formatter "\n"; (match Stream.peek s with | None -> () | _ -> Stream.junk s; aux ()) in aux () *)