open Printf ;; open Schema_types ;; (* TODO dates: boundary checks (e.g. 95/26/2003) *) (* TODO a lot of almost cut-and-paste code, expecially in gFoo types validation *) (* auxiliary stuff *) let char_of_hex = let int_of_hex_char = function | '0' -> 0 | '1' -> 1 | '2' -> 2 | '3' -> 3 | '4' -> 4 | '5' -> 5 | '6' -> 6 | '7' -> 7 | '8' -> 8 | '9' -> 9 | 'a' | 'A' -> 10 | 'b' | 'B' -> 11 | 'c' | 'C' -> 12 | 'd' | 'D' -> 13 | 'e' | 'E' -> 14 | 'f' | 'F' -> 15 | _ -> assert false in (* most significative, least significative *) fun ms ls -> Char.unsafe_chr (int_of_hex_char ms * 16 + int_of_hex_char ls) ;; let strip_parens s = Pcre.replace ~pat:"[()]" s ;; let add_limits s = "^" ^ s ^ "$" ;; let simple_type_error ~typ ~value = raise (XSI_validation_error (sprintf "'%s' isn't a valid xsd:%s" value typ)) ;; (* regular expressions used to validate built-in types *) let timezone_RE_raw = "(Z)|(([+-])?(\\d{2}):(\\d{2}))" ;; let date_RE_raw = "(\\d{4,})-(\\d{2})-(\\d{2})" ;; let time_RE_raw = "(\\d{2}):(\\d{2}):(\\d{2})" ;; let gYearMonth_RE_raw = sprintf "(-)?(\\d{4,})-(\\d{2})(%s)?" timezone_RE_raw ;; let gYear_RE_raw = sprintf "(-)?(\\d{4,})(%s)?" timezone_RE_raw ;; let gMonthDay_RE_raw = sprintf "--(\\d{2})-(\\d{2})(%s)?" timezone_RE_raw ;; let gDay_RE_raw = sprintf "---(\\d{2})(%s)?" timezone_RE_raw ;; let gMonth_RE_raw = "--(\\d{2})--(%s)?" ;; let positive_field = false, "positive", Builtin_defs.bool ;; let year_field = false, "year", Builtin_defs.int ;; let month_field = false, "month", Builtin_defs.int ;; let day_field = false, "day", Builtin_defs.int ;; let hour_field = false, "hour", Builtin_defs.int ;; let minute_field = false, "minute", Builtin_defs.int ;; let second_field = false, "second", Builtin_defs.int ;; (* TODO this should be a decimal *) (* some cduce types corresponding to schema ones *) (* TODO the constraint that at least one part should be present isn't easily expressible with CDuce types *) let duration_type = Types.rec_of_list' [ positive_field; true, "year", Builtin_defs.int; true, "month", Builtin_defs.int; true, "day", Builtin_defs.int; true, "hour", Builtin_defs.int; true, "minute", Builtin_defs.int; true, "second", Builtin_defs.int; (* TODO this should be a decimal *) ] ;; let timezone_type = Types.rec_of_list' [ false, "positive", Builtin_defs.bool; hour_field; minute_field ] ;; let timezone_type_fields = [ true, "timezone", timezone_type ] ;; let time_type_fields = [ hour_field; minute_field; second_field ] ;; let date_type_fields = [ year_field; month_field; day_field ] ;; let time_type = Types.rec_of_list' (time_type_fields @ timezone_type_fields) ;; let date_type = Types.rec_of_list' (positive_field :: date_type_fields) ;; let dateTime_type = Types.rec_of_list' (positive_field :: (date_type_fields @ time_type_fields @ timezone_type_fields)) ;; let gYearMonth_type = Types.rec_of_list' [ positive_field; year_field; month_field ] ;; let gYear_type = Types.rec_of_list' [ positive_field; year_field ] ;; let gMonthDay_type = Types.rec_of_list' [ month_field; day_field ] ;; let gDay_type = Types.rec_of_list' [ day_field ] ;; let gMonth_type = Types.rec_of_list' [ month_field ] ;; let nonPositiveInteger_type = Builtin_defs.non_pos_int ;; let negativeInteger_type = Builtin_defs.neg_int ;; let nonNegativeInteger_type = Builtin_defs.non_neg_int ;; let positiveInteger_type = Builtin_defs.pos_int ;; (* validation functions: string -> Value.t *) let validate_string = Value.string_latin1 ;; let validate_integer s = try Value.Integer (Intervals.mk s) with Failure _ -> simple_type_error ~typ:"integer" ~value:s ;; let validate_interval interval type_name s = let integer = try Intervals.mk s with Failure _ -> simple_type_error ~typ:type_name ~value:s in if Intervals.contains integer interval then Value.Integer integer else simple_type_error ~typ:type_name ~value:s ;; let validate_nonPositiveInteger = validate_interval (Intervals.left (Intervals.mk "0")) "nonPositiveInteger" ;; let validate_negativeInteger = validate_interval (Intervals.left (Intervals.mk "-1")) "negativeInteger" ;; let validate_nonNegativeInteger = validate_interval (Intervals.right (Intervals.mk "0")) "nonNegativeInteger" ;; let validate_positiveInteger = validate_interval (Intervals.right (Intervals.mk "1")) "positiveInteger" ;; let validate_bool = function | "true" | "1" -> Value.vtrue | "false" | "0" -> Value.vfalse | v -> simple_type_error ~typ:"boolean" ~value:v ;; let parse_sign = function "+" | "" -> Value.vtrue | _ -> Value.vfalse ;; let parse_date = let rex = Pcre.regexp (add_limits date_RE_raw) in fun s -> let abort () = simple_type_error ~typ:"date" ~value:s in let subs = try Pcre.extract ~rex s with Not_found -> abort () in [ "year", validate_integer subs.(1); "month", validate_integer subs.(2); "day", validate_integer subs.(3) ] ;; let parse_time = let rex = Pcre.regexp (add_limits time_RE_raw) in fun s -> let abort () = simple_type_error ~typ:"time" ~value:s in let subs = try Pcre.extract ~rex s with Not_found -> abort () in [ "hour", validate_integer subs.(1); "minute", validate_integer subs.(2); "second", validate_integer subs.(3) ] ;; let parse_timezone = let rex = Pcre.regexp (add_limits timezone_RE_raw) in fun s -> let abort () = raise (XSI_validation_error (sprintf "'%s' isn't a valid timezone" s)) in let subs = try Pcre.extract ~rex s with Not_found -> abort () in match subs.(1) with | "Z" -> ["positive", Value.vtrue; "hour", validate_integer "0"; "minute", validate_integer "0"] | _ -> ["positive", parse_sign subs.(3); "hour", validate_integer subs.(4); "minute", validate_integer subs.(5)] ;; (* parse a timezone from a string, if it's empty return the empty list, otherwise return a list containing a pair <"timezone", timezone value> *) let parse_timezone' = function | "" -> [] | v -> [ "timezone", Value.vrecord (parse_timezone v) ] ;; let validate_duration = let rex = Pcre.regexp "^([+-])?P((\\d+)Y)?((\\d+)M)?((\\d+)D)?(T((\\d+)H)?((\\d+)M)?((\\d+)S)?)?$" in fun s -> let abort () = simple_type_error ~typ:"duration" ~value:s in let subs = try Pcre.extract ~rex s with Not_found -> abort () in try let fields = ["positive", parse_sign subs.(1) ] @ (match subs.(3) with "" -> [] | v -> ["year", validate_integer v]) @ (match subs.(5) with "" -> [] | v -> ["month", validate_integer v]) @ (match subs.(7) with "" -> [] | v -> ["day", validate_integer v]) @ (match subs.(10) with "" -> [] | v -> ["hour", validate_integer v]) @ (match subs.(12) with "" -> [] | v -> ["minute", validate_integer v]) @ (match subs.(14) with "" -> [] | v -> ["second", validate_integer v]) in Value.vrecord fields with XSI_validation_error _ -> abort () ;; let validate_dateTime = let rex = Pcre.regexp (sprintf "^([+-])?(%s)T(%s)(%s)?$" (strip_parens date_RE_raw) (strip_parens time_RE_raw) (strip_parens timezone_RE_raw)) in fun s -> let abort () = simple_type_error ~typ:"dateTime" ~value:s in let subs = try Pcre.extract ~rex s with Not_found -> abort () in try let fields = [ "positive", parse_sign subs.(1) ] @ parse_date subs.(2) @ parse_time subs.(3) @ parse_timezone' subs.(4) in Value.vrecord fields with XSI_validation_error _ -> abort () ;; let validate_gYearMonth = let rex = Pcre.regexp (add_limits gYearMonth_RE_raw) in fun s -> let abort () = simple_type_error ~typ:"gYearMonth" ~value:s in let subs = try Pcre.extract ~rex s with Not_found -> abort () in try let fields = [ "positive", parse_sign subs.(1); "year", validate_integer subs.(2); "month", validate_integer subs.(3) ] @ parse_timezone' subs.(4) in Value.vrecord fields with XSI_validation_error _ -> abort () ;; let validate_gYear = let rex = Pcre.regexp (add_limits gYear_RE_raw) in fun s -> let abort () = simple_type_error ~typ:"gYear" ~value:s in let subs = try Pcre.extract ~rex s with Not_found -> abort () in try let fields = [ "positive", parse_sign subs.(1); "year", validate_integer subs.(2); ] @ parse_timezone' subs.(3) in Value.vrecord fields with XSI_validation_error _ -> abort () ;; let validate_gMonthDay = let rex = Pcre.regexp (add_limits gMonthDay_RE_raw) in fun s -> let abort () = simple_type_error ~typ:"gMonthDay" ~value:s in let subs = try Pcre.extract ~rex s with Not_found -> abort () in try let fields = [ "month", validate_integer subs.(1); "day", validate_integer subs.(2); ] @ parse_timezone' subs.(3) in Value.vrecord fields with XSI_validation_error _ -> abort () ;; let validate_gDay = let rex = Pcre.regexp (add_limits gDay_RE_raw) in fun s -> let abort () = simple_type_error ~typ:"gDay" ~value:s in let subs = try Pcre.extract ~rex s with Not_found -> abort () in try let fields = ("day", validate_integer subs.(1)) :: (parse_timezone' subs.(2)) in Value.vrecord fields with XSI_validation_error _ -> abort () ;; let validate_gMonth = let rex = Pcre.regexp (add_limits gMonth_RE_raw) in fun s -> let abort () = simple_type_error ~typ:"gMonth" ~value:s in let subs = try Pcre.extract ~rex s with Not_found -> abort () in try let fields = ("month", validate_integer subs.(1)) :: (parse_timezone' subs.(2)) in Value.vrecord fields with XSI_validation_error _ -> abort () ;; let validate_time = let rex = Pcre.regexp (sprintf "^(%s)(%s)?$" (strip_parens time_RE_raw) (strip_parens timezone_RE_raw)) in fun s -> let abort () = simple_type_error ~typ:"time" ~value:s in let subs = try Pcre.extract ~rex s with Not_found -> abort () in try let fields = parse_time subs.(1) @ (match subs.(2) with | "" -> [] | v -> [ "timezone", Value.vrecord (parse_timezone v) ]) in Value.vrecord fields with XSI_validation_error _ -> abort () ;; let validate_date = let rex = Pcre.regexp (sprintf "^(-)?(%s)(%s)?$" (strip_parens date_RE_raw) (strip_parens timezone_RE_raw)) in fun s -> let abort () = simple_type_error ~typ:"date" ~value:s in let subs = try Pcre.extract ~rex s with Not_found -> abort () in try let fields = [ "positive", parse_sign subs.(1) ] @ parse_date subs.(2) @ (match subs.(3) with | "" -> [] | v -> [ "timezone", Value.vrecord (parse_timezone v) ]) in Value.vrecord fields with XSI_validation_error _ -> abort () ;; let validate_hexBinary s = let len = String.length s in if len mod 2 <> 0 then simple_type_error ~typ:"hexBinary" ~value:s; let res = String.create (len / 2) in let rec aux idx = if idx < len then begin String.unsafe_set res (idx / 2) (char_of_hex (String.unsafe_get s idx) (String.unsafe_get s (idx + 1))); aux (idx + 2) end in aux 0; validate_string res ;; (* TODO test base64Binary simple type! *) let validate_base64Binary s = validate_string (Netencoding.Base64.decode s) ;; let validate_anyURI s = try validate_string (Neturl.string_of_url (Neturl.url_of_string Neturl.ip_url_syntax s)) with Neturl.Malformed_URL -> simple_type_error ~typ:"anyURI" ~value:s let builtins = [ "xsd:string", (Builtin_defs.string, validate_string); "xsd:integer", (Builtin_defs.int, validate_integer); "xsd:nonPositiveInteger", (nonPositiveInteger_type, validate_nonPositiveInteger); "xsd:negativeInteger", (negativeInteger_type, validate_negativeInteger); "xsd:nonNegativeInteger", (nonNegativeInteger_type, validate_nonNegativeInteger); "xsd:positiveInteger", (positiveInteger_type, validate_positiveInteger); "xsd:boolean", (Builtin_defs.bool, validate_bool); "xsd:hexBinary", (Builtin_defs.string, validate_hexBinary); "xsd:base64Binary", (Builtin_defs.string, validate_base64Binary); "xsd:anyURI", (Builtin_defs.string, validate_anyURI); (* TODO anyType: is this useful? *) "xsd:anyType", (Types.any, (fun _ -> assert false)); (* TODO anySimpleType: is ok as a string? *) "xsd:anySimpleType", (Builtin_defs.string, validate_string); "xsd:duration", (duration_type, validate_duration); "xsd:dateTime", (dateTime_type, validate_dateTime); "xsd:time", (time_type, validate_time); "xsd:date", (date_type, validate_date); "xsd:gYearMonth", (gYearMonth_type, validate_gYearMonth); "xsd:gYear", (gYear_type, validate_gYear); "xsd:gMonthDay", (gMonthDay_type, validate_gMonthDay); "xsd:gDay", (gDay_type, validate_gDay); "xsd:gMonth", (gMonth_type, validate_gMonth); ] ;; (* module's interface implementation *) let names = List.sort compare (List.map fst builtins) ;; let len = List.length builtins ;; let cd_types = Hashtbl.create len ;; let validators = Hashtbl.create len ;; let __validate_fun_of_builtin = Hashtbl.find validators ;; let cd_type_of_builtin = Hashtbl.find cd_types ;; let fill () = List.iter (fun (name, (typ, validator)) -> Hashtbl.add cd_types name typ; Hashtbl.add validators name validator) builtins ;; fill () ;;