schema_builtin.ml 24.9 KB
Newer Older
1

2
open Printf
3

4
open Encodings
5
open Schema_pcre
6
open Schema_common
7
open Schema_types
8
9
10
11
12

(* TODO dates: boundary checks (e.g. 95/26/2003) *)
(* TODO a lot of almost cut-and-paste code, expecially in gFoo types validation
*)

13
14
  (** {2 Aux/Misc stuff} *)

15
let add_xsd_prefix s = Utf8.mk ("xsd:"^s)
16

17
18
19
20
let unsupported =
  List.map (fun s -> add_xsd_prefix s)
    [ "decimal"; "float"; "double"; "NOTATION"; "QName" ]

21
22
let is_empty s = Utf8.equal s (Utf8.mk "")

23
24
25
26
27
28
29
30
31
32
33
34
let zero = Intervals.V.zero
let one = (Intervals.V.succ Intervals.V.zero)
let minus_one = (Intervals.V.pred Intervals.V.zero)
let long_l = (Intervals.V.mk "-9223372036854775808")
let long_r = (Intervals.V.mk "9223372036854775807")
let int_l = (Intervals.V.mk "-2147483648")
let int_r = (Intervals.V.mk "2147483647")
let short_l = (Intervals.V.mk "-32768")
let short_r = (Intervals.V.mk "32767")
let byte_l = (Intervals.V.mk "-128")
let byte_r = (Intervals.V.mk "127")

35
let xml_S_RE = pcre_regexp "[ \\t\\r\\n]+"
36
  (* split a string at XML recommendation "S" production boundaries *)
37
38
let split_xml_S s = pcre_split ~rex:xml_S_RE s
let norm_RE = pcre_regexp "[\\t\\r\\n]"
39
40
41
42
43
44
45
46
47
48
49

let char_of_hex =
  let int_of_hex_char = function
    | '0' -> 0 | '1' -> 1 | '2' -> 2 | '3' -> 3 | '4' -> 4 | '5' -> 5 | '6' -> 6
    | '7' -> 7 | '8' -> 8 | '9' -> 9 | 'a' | 'A' -> 10 | 'b' | 'B' -> 11
    | 'c' | 'C' -> 12 | 'd' | 'D' -> 13 | 'e' | 'E' -> 14 | 'f' | 'F' -> 15
    | _ -> assert false
  in
    (* most significative, least significative *)
  fun ms ls -> Char.unsafe_chr (int_of_hex_char ms * 16 + int_of_hex_char ls)

50
51
let strip_parens s = Pcre.replace ~pat:"[()]" s
let add_limits s = "^" ^ s ^ "$"
52

53
54
exception Schema_builtin_error of Utf8.t
let simple_type_error name = raise (Schema_builtin_error (add_xsd_prefix name))
55

56
let qualify s = (Ns.empty, Encodings.Utf8.mk s)
57

58
59
  (* regular expressions used to validate built-in types *)

60
61
62
63
64
65
66
67
68
69
let timezone_RE_raw = "(Z)|(([+-])?(\\d{2}):(\\d{2}))"
let date_RE_raw = "(\\d{4,})-(\\d{2})-(\\d{2})"
let time_RE_raw = "(\\d{2}):(\\d{2}):(\\d{2})"

let gYearMonth_RE_raw = sprintf "(-)?(\\d{4,})-(\\d{2})(%s)?" timezone_RE_raw
let gYear_RE_raw = sprintf "(-)?(\\d{4,})(%s)?" timezone_RE_raw
let gMonthDay_RE_raw = sprintf "--(\\d{2})-(\\d{2})(%s)?" timezone_RE_raw
let gDay_RE_raw = sprintf "---(\\d{2})(%s)?" timezone_RE_raw
let gMonth_RE_raw = "--(\\d{2})--(%s)?"

70
71
  (** {2 CDuce types} *)

72
73
74
75
76
77
78
let positive_field = false, qualify "positive", Builtin_defs.bool
let year_field = false, qualify "year", Builtin_defs.int
let month_field = false, qualify "month", Builtin_defs.int
let day_field = false, qualify "day", Builtin_defs.int
let hour_field = false, qualify "hour", Builtin_defs.int
let minute_field = false, qualify "minute", Builtin_defs.int
let second_field = false, qualify "second", Builtin_defs.int
79
80
81
  (* TODO this should be a decimal *)
let time_type_fields = [ hour_field; minute_field; second_field ]
let date_type_fields = [ year_field; month_field; day_field ]
82

83
let time_kind_field = false, qualify "time_kind", Builtin_defs.time_kind
84
let time_kind kind = (qualify "time_kind", Value.Atom (Atoms.V.mk_ascii kind))
85

86
87
88
  (* TODO the constraint that at least one part should be present isn't easily
  expressible with CDuce types *)
let duration_type = Types.rec_of_list' [
89
  time_kind_field;
90
  positive_field;
91
92
93
94
95
96
  true, qualify "year", Builtin_defs.int;
  true, qualify "month", Builtin_defs.int;
  true, qualify "day", Builtin_defs.int;
  true, qualify "hour", Builtin_defs.int;
  true, qualify "minute", Builtin_defs.int;
  true, qualify "second", Builtin_defs.int; (* TODO this should be a decimal *)
97
]
98
let timezone_type = Types.rec_of_list' [
99
  positive_field;
100
  hour_field; minute_field
101
]
102
let timezone_type_fields = [ true, qualify "timezone", timezone_type ]
103
104
let time_type = Types.rec_of_list' (time_kind_field :: time_type_fields @ timezone_type_fields)
let date_type = Types.rec_of_list' (time_kind_field :: positive_field :: date_type_fields)
105
let dateTime_type =
106
  Types.rec_of_list' (time_kind_field :: positive_field ::
107
108
    (date_type_fields @ time_type_fields @ timezone_type_fields))
let gYearMonth_type = Types.rec_of_list' [
109
  positive_field; time_kind_field; year_field; month_field
110
]
111
112
113
114
let gYear_type = Types.rec_of_list' [ time_kind_field; positive_field; year_field ]
let gMonthDay_type = Types.rec_of_list' [ time_kind_field; month_field; day_field ]
let gDay_type = Types.rec_of_list' [ time_kind_field; day_field ]
let gMonth_type = Types.rec_of_list' [ time_kind_field; month_field ]
115

116
117
118
119
let nonPositiveInteger_type = Builtin_defs.non_pos_int
let negativeInteger_type = Builtin_defs.neg_int
let nonNegativeInteger_type = Builtin_defs.non_neg_int
let positiveInteger_type = Builtin_defs.pos_int
120
121
122
123
let long_type = Builtin_defs.long_int
let int_type = Builtin_defs.int_int
let short_type = Builtin_defs.short_int
let byte_type = Builtin_defs.byte_int
124

125
126
127
let string_list_type = Sequence.star Builtin_defs.string

  (** {2 Validation functions (string -> Value.t)} *)
128

129
130
131
132
133
let parse_sign s =
  if Utf8.equal s (Utf8.mk "+") || Utf8.equal s (Utf8.mk "") then
    Value.vtrue
  else
    Value.vfalse
134
135
136

let validate_integer s =
  try
137
    Value.Integer (Intervals.V.mk (Utf8.get_str s))
138
139
140
  with Failure _ -> simple_type_error "integer"

let strip_decimal_RE = Pcre.regexp "\\..*$"
141
142
143
144

let parse_date =
  let rex = Pcre.regexp (add_limits date_RE_raw) in
  fun s ->
145
  let abort () = simple_type_error "date" in
146
147
148
149
  let subs = try pcre_extract ~rex s with Not_found -> abort () in
  [ qualify "year", validate_integer subs.(1);
    qualify "month", validate_integer subs.(2);
    qualify "day", validate_integer subs.(3) ]
150
151
152
153

let parse_time =
  let rex = Pcre.regexp (add_limits time_RE_raw) in
  fun s ->
154
  let abort () = simple_type_error "time" in
155
156
157
158
  let subs = try pcre_extract ~rex s with Not_found -> abort () in
  [ qualify "hour", validate_integer subs.(1);
    qualify "minute", validate_integer subs.(2);
    qualify "second", validate_integer subs.(3) ]
159
160
161
162

let parse_timezone =
  let rex = Pcre.regexp (add_limits timezone_RE_raw) in
  fun s ->
163
164
165
166
167
168
169
170
171
172
  let abort () = simple_type_error "timezone" in
  let subs = try pcre_extract ~rex s with Not_found -> abort () in
  if Utf8.equal subs.(1) (Utf8.mk "Z") then
    [qualify "positive", Value.vtrue;
     qualify "hour", validate_integer (Utf8.mk "0");
     qualify "minute", validate_integer (Utf8.mk "0")]
  else
    [qualify "positive", parse_sign subs.(3);
     qualify "hour", validate_integer subs.(4);
     qualify "minute", validate_integer subs.(5)]
173
174
  (* parse a timezone from a string, if it's empty return the empty list,
  otherwise return a list containing a pair <"timezone", timezone value> *)
175
176
177
178
179
let parse_timezone' s =
  if is_empty s then
    []
  else
    [ qualify "timezone", Value.vrecord (parse_timezone s) ]
180

181
let validate_string s = Value.string_utf8 s
182
183
184
185
186
187
188
189
190
191
let validate_normalizedString s =
  validate_string (normalize_white_space `Replace s)
let validate_token s =
  validate_string (normalize_white_space `Collapse s)
let validate_token_list s =
  Value.sequence (List.map validate_token (split_xml_S s))

let validate_interval interval type_name s =
  let integer =
    try
192
      Intervals.V.mk (Utf8.get_str s)
193
    with Failure _ -> simple_type_error type_name
194
195
196
197
  in
  if Intervals.contains integer interval then
    Value.Integer integer
  else
198
    simple_type_error type_name
199
200
201
202
203
204
205
206
let validate_nonPositiveInteger =
  validate_interval (Intervals.left Intervals.V.zero) "nonPositiveInteger"
let validate_negativeInteger =
  validate_interval (Intervals.left Intervals.V.minus_one) "negativeInteger"
let validate_nonNegativeInteger =
  validate_interval (Intervals.right Intervals.V.zero) "nonNegativeInteger"
let validate_positiveInteger =
  validate_interval (Intervals.right Intervals.V.one) "positiveInteger"
207
208
209
210
211
let validate_long = validate_interval (Intervals.bounded long_l long_r) "long"
let validate_int = validate_interval (Intervals.bounded int_l int_r) "int"
let validate_short =
  validate_interval (Intervals.bounded short_l short_r) "short"
let validate_byte = validate_interval (Intervals.bounded byte_l byte_r) "byte"
212

213
214
215
216
217
218
219
let validate_bool s =
  if Utf8.equal s (Utf8.mk "true") || Utf8.equal s (Utf8.mk "1") then
    Value.vtrue
  else if Utf8.equal s (Utf8.mk "false") || Utf8.equal s (Utf8.mk "0") then
    Value.vfalse
  else
    simple_type_error "boolean"
220

221
let validate_duration =
222
  let rex = pcre_regexp
223
224
225
  "^([+-])?P((\\d+)Y)?((\\d+)M)?((\\d+)D)?(T((\\d+)H)?((\\d+)M)?((\\d+)S)?)?$"
  in
  fun s ->
226
  let abort () = simple_type_error "duration" in
227
  let subs = try pcre_extract ~rex s with Not_found -> abort () in
228
229
  try
    let fields =
230
      time_kind "duration" ::
231
232
233
234
235
236
237
238
239
240
241
242
243
      [qualify "positive", parse_sign subs.(1) ] @
      (if is_empty subs.(3) then []
      else [qualify "year", validate_integer subs.(3)]) @
      (if is_empty subs.(5) then []
      else [qualify "month", validate_integer subs.(5)]) @
      (if is_empty subs.(7) then []
      else [qualify "day", validate_integer subs.(7)]) @
      (if is_empty subs.(10) then []
      else [qualify "hour", validate_integer subs.(10)]) @
      (if is_empty subs.(12) then []
      else [qualify "minute", validate_integer subs.(12)]) @
      (if is_empty subs.(14) then []
      else [qualify "second", validate_integer subs.(14)])
244
245
    in
    Value.vrecord fields
246
  with Schema_builtin_error _ -> abort ()
247
248
249
250
251
252
253

let validate_dateTime =
  let rex = Pcre.regexp (sprintf "^([+-])?(%s)T(%s)(%s)?$"
    (strip_parens date_RE_raw) (strip_parens time_RE_raw)
    (strip_parens timezone_RE_raw))
  in
  fun s ->
254
  let abort () = simple_type_error "dateTime" in
255
  let subs = try pcre_extract ~rex s with Not_found -> abort () in
256
257
  try
    let fields =
258
      time_kind "dateTime" ::
259
      [ qualify "positive", parse_sign subs.(1) ] @
260
261
262
263
264
      parse_date subs.(2) @
      parse_time subs.(3) @
      parse_timezone' subs.(4)
    in
    Value.vrecord fields
265
  with Schema_builtin_error _ -> abort ()
266
267
268
269

let validate_gYearMonth =
  let rex = Pcre.regexp (add_limits gYearMonth_RE_raw) in
  fun s ->
270
    let abort () = simple_type_error "gYearMonth" in
271
    let subs = try pcre_extract ~rex s with Not_found -> abort () in
272
273
    try
      let fields = [
274
        time_kind "gYearMonth";
275
276
277
        qualify "positive", parse_sign subs.(1);
        qualify "year", validate_integer subs.(2);
        qualify "month", validate_integer subs.(3)
278
279
280
      ] @ parse_timezone' subs.(4)
      in
      Value.vrecord fields
281
    with Schema_builtin_error _ -> abort ()
282
283
284
285

let validate_gYear =
  let rex = Pcre.regexp (add_limits gYear_RE_raw) in
  fun s ->
286
    let abort () = simple_type_error "gYear" in
287
    let subs = try pcre_extract ~rex s with Not_found -> abort () in
288
289
    try
      let fields = [
290
        time_kind "gYear";
291
292
        qualify "positive", parse_sign subs.(1);
        qualify "year", validate_integer subs.(2);
293
294
295
      ] @ parse_timezone' subs.(3)
      in
      Value.vrecord fields
296
    with Schema_builtin_error _ -> abort ()
297
298
299
300

let validate_gMonthDay =
  let rex = Pcre.regexp (add_limits gMonthDay_RE_raw) in
  fun s ->
301
    let abort () = simple_type_error "gMonthDay" in
302
    let subs = try pcre_extract ~rex s with Not_found -> abort () in
303
304
    try
      let fields = [
305
        time_kind "gMonthDay";
306
307
        qualify "month", validate_integer subs.(1);
        qualify "day", validate_integer subs.(2);
308
309
310
      ] @ parse_timezone' subs.(3)
      in
      Value.vrecord fields
311
    with Schema_builtin_error _ -> abort ()
312
313
314
315

let validate_gDay =
  let rex = Pcre.regexp (add_limits gDay_RE_raw) in
  fun s ->
316
    let abort () = simple_type_error "gDay" in
317
    let subs = try pcre_extract ~rex s with Not_found -> abort () in
318
319
    try
      let fields =
320
        time_kind "gDay" ::
321
        (qualify "day", validate_integer subs.(1)) ::
322
        (parse_timezone' subs.(2))
323
324
      in
      Value.vrecord fields
325
    with Schema_builtin_error _ -> abort ()
326
327
328
329

let validate_gMonth =
  let rex = Pcre.regexp (add_limits gMonth_RE_raw) in
  fun s ->
330
    let abort () = simple_type_error "gMonth" in
331
    let subs = try pcre_extract ~rex s with Not_found -> abort () in
332
333
    try
      let fields =
334
        time_kind "gMonth" ::
335
        (qualify "month", validate_integer subs.(1)) ::
336
        (parse_timezone' subs.(2))
337
338
      in
      Value.vrecord fields
339
    with Schema_builtin_error _ -> abort ()
340
341
342
343
344
345

let validate_time =
  let rex = Pcre.regexp (sprintf "^(%s)(%s)?$" (strip_parens time_RE_raw)
    (strip_parens timezone_RE_raw))
  in
  fun s ->
346
  let abort () = simple_type_error "time" in
347
  let subs = try pcre_extract ~rex s with Not_found -> abort () in
348
349
  try
    let fields =
350
      time_kind "time" ::
351
      parse_time subs.(1) @
352
353
      (if is_empty subs.(2) then []
      else [ qualify "timezone", Value.vrecord (parse_timezone subs.(2)) ])
354
355
    in
    Value.vrecord fields
356
  with Schema_builtin_error _ -> abort ()
357
358
359
360
361
362

let validate_date =
  let rex = Pcre.regexp (sprintf "^(-)?(%s)(%s)?$" (strip_parens date_RE_raw)
    (strip_parens timezone_RE_raw))
  in
  fun s ->
363
  let abort () = simple_type_error "date" in
364
  let subs = try pcre_extract ~rex s with Not_found -> abort () in
365
366
  try
    let fields =
367
      time_kind "date" ::
368
      [ qualify "positive", parse_sign subs.(1) ] @
369
      parse_date subs.(2) @
370
371
      (if is_empty subs.(3) then []
      else [ qualify "timezone", Value.vrecord (parse_timezone subs.(3)) ])
372
373
    in
    Value.vrecord fields
374
  with Schema_builtin_error _ -> abort ()
375
376

let validate_hexBinary s =
377
  let s = Utf8.get_str s in
378
379
  let len = String.length s in
  if len mod 2 <> 0 then
380
    simple_type_error "hexBinary";
381
382
383
384
385
386
387
388
389
  let res = String.create (len / 2) in
  let rec aux idx =
    if idx < len then begin
      String.unsafe_set res (idx / 2)
        (char_of_hex (String.unsafe_get s idx) (String.unsafe_get s (idx + 1)));
      aux (idx + 2)
    end
  in
  aux 0;
390
  validate_string (Utf8.mk res)
391

392
393
394
let validate_base64Binary s =
  let s = Utf8.get_str s in
  validate_string (Utf8.mk (Netencoding.Base64.decode s))
395
396

let validate_anyURI s =
397
  let s = Utf8.get_str s in
398
  try
399
400
    validate_string (Utf8.mk (Neturl.string_of_url (Neturl.url_of_string
      Neturl.ip_url_syntax s)))
401
  with Neturl.Malformed_URL -> simple_type_error "anyURI"
402

403
404
405
  (** {2 API backend} *)

let builtins = Hashtbl.create 50
406
let reg name spec = Hashtbl.add builtins (add_xsd_prefix name) spec
407
let alias alias name =
408
  let (alias, name) = (add_xsd_prefix alias, add_xsd_prefix name) in
409
410
411
412
413
414
415
416
417
418
  Hashtbl.add builtins alias
    (let (st_def, descr, validator) = Hashtbl.find builtins name in
    let new_def =
      match st_def with
      | Primitive _ -> Primitive alias
      | Derived (_, variety, facets, base) ->
          Derived (Some alias, variety, facets, base)
    in
    (new_def, descr, validator))
let restrict' name basename new_facets =
419
  let (name, basename) = (add_xsd_prefix name, add_xsd_prefix basename) in
420
421
422
423
424
  let (base, _, _) = Hashtbl.find builtins basename in
  let variety = variety_of_simple_type_definition base in
  let facets =
    merge_facets (facets_of_simple_type_definition base) new_facets
  in
425
  Derived (Some name, variety, facets, ref (Simple base))
426
let list' name itemname =
427
  let (name, itemname) = (add_xsd_prefix name, add_xsd_prefix itemname) in
428
  let (base, _, _) = Hashtbl.find builtins itemname in
429
  let base = ref (Simple base) in
430
431
432
  Derived (Some name, List base, no_facets, base)

let fill () = (* fill "builtins" hashtbl *)
433
  let primitive name = Primitive (add_xsd_prefix name) in
434
435
436

  (* primitive builtins *)

437
438
  reg "anySimpleType"
    (primitive "anySimpleType", Builtin_defs.string, validate_string);
439
  alias "anyType" "anySimpleType";  (* TODO BUG HERE *)
440
441
  reg "string"
    (primitive "string", Builtin_defs.string, validate_string);
442
443
444
445
446
447
448
449

    (* TODO following types not yet supported (see "unsupported" above) *)
  alias "decimal" "string";
  alias "float" "string";
  alias "double" "string";
  alias "NOTATION" "string";
  alias "QName" "string";

450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
  reg "boolean"
    (primitive "boolean", Builtin_defs.bool, validate_bool);
  reg "hexBinary"
    (primitive "hexBinary", Builtin_defs.string, validate_hexBinary);
  reg "base64Binary"
    (primitive "base64Binary", Builtin_defs.string, validate_base64Binary);
  reg "anyURI"
    (primitive "anyURI", Builtin_defs.string, validate_anyURI);
  reg "duration"
    (primitive "duration", duration_type, validate_duration);
  reg "dateTime"
    (primitive "dateTime", dateTime_type, validate_dateTime);
  reg "time"
    (primitive "time", time_type, validate_time);
  reg "date"
    (primitive "date", date_type, validate_date);
  reg "gYearMonth"
    (primitive "gYearMonth", gYearMonth_type, validate_gYearMonth);
  reg "gYear"
    (primitive "gYear", gYear_type, validate_gYear);
  reg "gMonthDay"
    (primitive "gMonthDay", gMonthDay_type, validate_gMonthDay);
  reg "gDay"
    (primitive "gDay", gDay_type, validate_gDay);
  reg "gMonth"
    (primitive "gMonth", gMonth_type, validate_gMonth);
476
477
478

  (* derived builtins *)

479
480
  reg "integer"
    (restrict' "integer" "decimal" no_facets, (* fake restriction *)
481
    Builtin_defs.int, validate_integer);
482
483
  reg "nonPositiveInteger"
    (restrict' "nonPositiveInteger" "integer"
484
      { no_facets with maxInclusive = Some (lazy (Value.Integer zero), false) },
485
    nonPositiveInteger_type, validate_nonPositiveInteger);
486
487
  reg "negativeInteger"
    (restrict' "negativeInteger" "nonPositiveInteger"
488
      { no_facets with maxInclusive = Some (lazy (Value.Integer minus_one), false) },
489
    negativeInteger_type, validate_negativeInteger);
490
491
  reg "nonNegativeInteger"
    (restrict' "nonNegativeInteger" "integer"
492
      { no_facets with minInclusive = Some (lazy (Value.Integer zero), false) },
493
    nonNegativeInteger_type, validate_nonNegativeInteger);
494
495
  reg "positiveInteger"
    (restrict' "positiveInteger" "nonNegativeInteger"
496
      { no_facets with minInclusive = Some (lazy (Value.Integer one), false) },
497
    positiveInteger_type, validate_positiveInteger);
498
499
  reg "long"
    (restrict' "long" "integer"
500
      { no_facets with
501
502
          minInclusive = Some (lazy (Value.Integer long_l), false);
          maxInclusive = Some (lazy (Value.Integer long_r), false)},
503
    long_type, validate_long);
504
505
  reg "int"
    (restrict' "int" "long"
506
      { no_facets with
507
508
          minInclusive = Some (lazy (Value.Integer int_l), false);
          maxInclusive = Some (lazy (Value.Integer int_r), false)},
509
    int_type, validate_int);
510
511
  reg "short"
    (restrict' "short" "int"
512
      { no_facets with
513
514
          minInclusive = Some (lazy (Value.Integer short_l), false);
          maxInclusive = Some (lazy (Value.Integer short_r), false)},
515
    short_type, validate_short);
516
517
  reg "byte"
    (restrict' "byte" "short"
518
      { no_facets with
519
520
          minInclusive = Some (lazy (Value.Integer byte_l), false);
          maxInclusive = Some (lazy (Value.Integer byte_r), false)},
521
    byte_type, validate_short);
522
523
  reg "normalizedString"
    (restrict' "normalizedString" "string"
524
525
      { no_facets with whiteSpace = `Replace, false },
    Builtin_defs.string, validate_normalizedString);
526
527
  reg "token"
    (restrict' "token" "normalizedString"
528
529
      { no_facets with whiteSpace = `Collapse, false },
    Builtin_defs.string, validate_token);
530
531
532
533
534
535
536
537
538
  alias "language" "token";
  alias "Name" "token";
  alias "NMTOKEN" "token";
  alias "NCName" "token";
  alias "ID" "token";
  alias "IDREF" "token";
  alias "ENTITY" "token";
  reg "NMTOKENS"
    (list' "NMTOKENS" "token",
539
    string_list_type, validate_token_list);
540
541
  alias "IDREFS" "NMTOKENS";
  alias "ENTITIES" "NMTOKENS"
542
543
544

let _ = try fill () with Not_found -> assert false

545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
  (** {2 Printing} *)

open Big_int

type kind =
  Duration | DateTime | Time | Date | GYearMonth | GYear | GMonthDay | GDay |
  GMonth
type timezone = bool * Intervals.V.t * Intervals.V.t
  (* positive, hour, minute *)
type time_value = {
  kind: kind option; positive: bool option; year: Intervals.V.t option;
  month: Intervals.V.t option; day: Intervals.V.t option;
  hour: Intervals.V.t option; minute: Intervals.V.t option;
  second: Intervals.V.t option; timezone: timezone option
}
let null_value = {
  kind = None; positive = None; year = None; month = None; day = None;
  hour = None; minute = None; second = None; timezone = None
}

let string_of_time_type fields =
  let fail () = raise (Schema_builtin_error (Utf8.mk "")) in
  let parse_int = function Value.Integer i -> i | _ -> fail () in
  let parse_timezone v =
    let fields =
      try
        Value.get_fields v
      with Invalid_argument _ -> fail ()
    in
    let (positive, hour, minute) = (ref true, ref zero, ref zero) in
    List.iter
      (fun ((ns, name), value) ->
        if ns <> Ns.empty then fail ();
        (match Utf8.get_str name with
        | "positive" -> positive :=  (Value.equal value Value.vtrue)
        | "hour" -> hour := parse_int value
        | "minute" -> minute := parse_int value
        | _ -> fail ()))
      fields;
    !positive, !hour, !minute
  in
  let parse_time_kind = function
    | Value.Atom a ->
        (match Utf8.get_str (snd (Atoms.V.value a)) with
        | "duration" -> Duration | "dateTime" -> DateTime | "time" -> Time
        | "date" -> Date | "gYearMonth" -> GYearMonth | "gYear" -> GYear
        | "gMonthDay" -> GMonthDay | "gDay" -> GDay | "gMonth" -> GMonth
        | _ -> fail ())
    | _ -> fail ()
  in
  let parse_positive = function
    | v when Value.equal v Value.vfalse -> false
    | _ -> true
  in
  let string_of_positive v =
    match v.positive with Some false -> "-" | _ -> ""
  in
  let string_of_year v =
    match v.year with None -> fail () | Some i -> Intervals.V.to_string i
  in
  let string_of_month v =
    match v.month with None -> fail () | Some i -> Intervals.V.to_string i
  in
  let string_of_day v =
    match v.day with None -> fail () | Some i -> Intervals.V.to_string i
  in
  let string_of_hour v =
    match v.hour with None -> fail () | Some i -> Intervals.V.to_string i
  in
  let string_of_minute v =
    match v.minute with None -> fail () | Some i -> Intervals.V.to_string i
  in
  let string_of_second v =
    match v.second with None -> fail () | Some i -> Intervals.V.to_string i
  in
  let string_of_date v =
    sprintf "%s-%s-%s" (string_of_year v) (string_of_month v) (string_of_day v)
  in
  let string_of_timezone v =
    match v.timezone with
    | Some (positive, hour, minute) ->
        sprintf "Z%s%s:%s" (if not positive then "-" else "")
          (Intervals.V.to_string hour) (Intervals.V.to_string minute)
    | None -> ""
  in
  let string_of_time v =
    sprintf "%s:%s:%s" (string_of_hour v) (string_of_minute v)
      (string_of_second v)
  in
  let v =
    List.fold_left
      (fun acc ((ns, name), value) ->
        if ns <> Ns.empty then fail ();
        (match Utf8.get_str name with
        | "year" -> { acc with year = Some (parse_int value) }
        | "month" -> { acc with month = Some (parse_int value) }
        | "day" -> { acc with day = Some (parse_int value) }
        | "hour" -> { acc with hour = Some (parse_int value) }
        | "minute" -> { acc with minute = Some (parse_int value) }
        | "second" -> { acc with second = Some (parse_int value) }
        | "timezone" -> { acc with timezone = Some (parse_timezone value) }
        | "time_kind" -> { acc with kind = Some (parse_time_kind value) }
        | "positive" -> { acc with positive = Some (parse_positive value) }
        | s -> assert false))
      null_value fields
  in
  let s =
    match v.kind with
    | None -> fail ()
    | Some Duration ->
        sprintf "%sP%s%s%s%s"
          (string_of_positive v)
          (match v.year with Some v -> Intervals.V.to_string v ^ "Y" | _ -> "")
          (match v.month with Some v -> Intervals.V.to_string v ^ "M" | _ -> "")
          (match v.day with Some v -> Intervals.V.to_string v ^ "D" | _ -> "")
          (if v.hour = None && v.minute = None && v.second = None then
            ""
          else
            "T" ^
            (match v.hour with Some v -> Intervals.V.to_string v ^ "H" | _ ->
              "") ^
            (match v.minute with Some v -> Intervals.V.to_string v ^ "M" | _ ->
              "") ^
            (match v.second with Some v -> Intervals.V.to_string v ^ "S" | _ ->
              ""))
    | Some DateTime ->
        sprintf "%s%sT%s%s" (string_of_positive v) (string_of_date v)
          (string_of_time v) (string_of_timezone v)
    | Some Time ->
        sprintf "%s%s%s" (string_of_positive v) (string_of_time v)
          (string_of_timezone v)
    | Some Date ->
        sprintf "%s%s%s" (string_of_positive v) (string_of_date v)
          (string_of_timezone v)
    | Some GYearMonth ->
        sprintf "%s%s-%s%s" (string_of_positive v) (string_of_year v)
          (string_of_month v) (string_of_timezone v)
    | Some GYear ->
        sprintf "%s%s%s" (string_of_positive v) (string_of_year v)
          (string_of_timezone v)
    | Some GMonthDay ->
        sprintf "--%s%s%s" (string_of_month v) (string_of_day v)
          (string_of_timezone v)
    | Some GDay ->
        sprintf "---%s%s" (string_of_day v) (string_of_timezone v)
    | Some GMonth ->
        sprintf "--%s--%s" (string_of_month v) (string_of_timezone v)
  in
  Utf8.mk s

695
696
697
698
699
700
701
702
703
704
705
706
707
  (** {2 API} *)

let is_builtin = Hashtbl.mem builtins
let iter_builtin f =
  Hashtbl.iter (fun _ (type_def, _, _) -> f type_def) builtins

let lookup name = Hashtbl.find builtins name

let fst (x,_,_) = x
let snd (_,y,_) = y
let trd (_,_,z) = z

let get_builtin name          = fst (lookup name)
708
let cd_type_of_builtin name   = snd (lookup name)
709
let validate_builtin name     = trd (lookup name)
710