parser.mly 24 KB
Newer Older
1
%{
2

3
4
5
open Ast
open Ident

Kim Nguyễn's avatar
Kim Nguyễn committed
6
let tloc (i,j) = (i.Lexing.pos_cnum, j.Lexing.pos_cnum)
7
let nopos = (-1,-1)
Kim Nguyễn's avatar
Kim Nguyễn committed
8
let lnopos = Lexing.dummy_pos, Lexing.dummy_pos
9

10
11
12
let parsing_error pos msg =
  let i, j = tloc pos in
  Cduce_loc.raise_loc i j (Ast.Parsing_error msg)
13
14

let mk loc x = Cduce_loc.mk_located (tloc loc) x
Kim Nguyễn's avatar
Kim Nguyễn committed
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
let mknoloc x = Cduce_loc.mknoloc x
let lop pos = Cduce_loc.loc_of_pos (tloc pos)
let exp pos e = LocatedExpr (lop pos, e)
let noloc = Cduce_loc.noloc 


let ident s =
  let b = Buffer.create (String.length s) in
  let rec aux i =
    if (i = String.length s) then Buffer.contents b
    else match s.[i] with
      | '\\' -> assert (s.[i+1] = '.'); Buffer.add_char b '.'; aux (i+2)
      | c -> Buffer.add_char b c; aux (i+1)
  in
  aux 0

let label s = U.mk (ident s)
let ident s = U.mk (ident s)

let rec multi_prod loc = function
  | [ x ] -> x
  | x :: l -> mk loc (Prod (x, multi_prod loc l))
  | [] -> assert false

let rec tuple = function
  | [ x ] -> x
  | x :: l -> Pair (x, tuple l)
  | [] -> assert false

let char = mknoloc (Internal (Types.char Chars.any))
let string_regexp = Star (Elem char)

let seq_of_string s =
  let open Encodings in
  let s = Utf8.mk s in
  let rec aux i j =
    if Utf8.equal_index i j then []
    else let (c,i) = Utf8.next s i in c :: (aux i j)
  in
  aux (Utf8.start_index s) (Utf8.end_index s)


let parse_char loc s =
  match seq_of_string s with
    | [ c ] -> c
60
    | _ -> parsing_error loc (Format.sprintf "Invalid character litteral '%s'" s)
Kim Nguyễn's avatar
Kim Nguyễn committed
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86

let mk_rec_field loc lab def =
  let o, x, y =
    match def with
      None -> (false, mknoloc (PatVar [ident lab]), None)
      | Some z -> z
  in
  let x = if o then mk loc (Optional x) else x in
  (label lab, (x, y))


let rec is_not = function
  Var id when U.to_string id = "not" -> true
  | LocatedExpr (_, e) -> is_not e
  | _ -> false

let apply_op2_noloc op e1 e2 = Apply (Apply (Var (ident op), e1), e2)
let apply_op2 loc op e1 e2 = exp loc (apply_op2_noloc op e1 e2)

let set_ref e1 e2 = Apply (Dot (e1, U.mk "set"), e2)
let get_ref e = Apply (Dot (e, U.mk "get"), cst_nil)
let let_in e1 p e2 =  Match (e1, [p,e2])
let seq e1 e2 = let_in e1 pat_nil e2
let concat e1 e2 = apply_op2_noloc "@" e1 e2

let id_dummy = U.mk "$$$"
87
88
89
90


%}
/* Keywords */
Kim Nguyễn's avatar
Kim Nguyễn committed
91
92
%token HASH_PRINT_TYPE "#print_type"
%token HASH_DUMP_VALUE "#dump_value"
93
%token HASH_ASCII HASH_LATIN1 HASH_UTF8
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
%token AND   "and"
%token DEBUG "debug"
%token DIV   "div"
%token ELSE  "else"
%token FROM  "from"
%token FUN   "fun"
%token IF    "if"
%token IN    "in"
%token INCLUDE "include"
%token LET     "let"
%token MAP     "map"
%token MATCH   "match"
%token MOD     "mod"
%token NAMESPACE "namespace"
%token OFF       "off"
%token ON        "on"
%token OPEN      "open"
%token OR        "or"
%token REF       "ref"
%token SCHEMA    "schema"
%token SELECT    "select"
%token THEN      "then"
%token TRANSFORM "transform"
%token TRY       "try"
%token TYPE      "type"
%token USING     "using"
%token VALIDATE  "validate"
%token WHERE     "where"
%token WITH      "with"
%token XTRANSFORM "xtransform"


/* Opertors */
%token COLEQ ":="
%token MINUSGT  "->"
%token EQ "=" LTEQ "<=" LTLT "<<" GTGT ">>" GTEQ ">=" BANGEQ "!="
%token PLUS "+" MINUS "-" AT "@"
%token BARBAR "||" BAR "|"
Kim Nguyễn's avatar
Kim Nguyễn committed
132
%token SETMINUS
133
134
135
136
137
138
139
140
141
142
143
144
145
%token STAR "*"
%token AMPAMP "&&" AMP "&"
%token STARSTAR "**"
%token SLASH "/"
%token SLASHAT "/@"
%token SLASHSLASH "//"
%token DOT "."
%token BQUOTE "`"
%token BANG "!"
%token COLCOL "::"
%token DOTDOT ".."
%token MINUSMINUS "--"
%token QMARKQMARK "??" PLUSQMARK "+?" STARQMARK "*?"
146
147
%token EQQMARK "=?"
%token UNDERSCORE "_"
148
149
150

/* Separators */
%token LP "(" RP ")" LSB "[" RSB "]" LT "<" GT ">" LCB "{" RCB "}"
Kim Nguyễn's avatar
Kim Nguyễn committed
151
%token COLON ":" SEMI ";" SEMISEMI ";;" COMMA "," QMARK "?"
152
153
154
155
156
157
158

/* Terminals */
%token <string> IDENT
%token <string> ANY_IN_NS
%token <string> STRING1
%token <string> STRING2
%token <string> INT
Kim Nguyễn's avatar
Kim Nguyễn committed
159
%token <string> HASH_DIRECTIVE
160
%token <Ast.pprog> RESOLVED_INCLUDE
161
162
163
164
165
%token EOI

/* Priorities */
%nonassoc "in"
%nonassoc "with"
166
167
%nonassoc "|"
%nonassoc below_SEMI
168
169
170
%nonassoc ";"
%nonassoc "let" "namespace"
%nonassoc ";;" 
171
172
173
%right ":="
%nonassoc "ref"
%right "from" "where" "and"
Kim Nguyễn's avatar
Kim Nguyễn committed
174
175
%nonassoc "then"
%nonassoc "else"
176
177
178
179
180
%left "or" "||"
%left "&&"
%left "=" "<<" ">>" "<=" ">=" "!="
%left "+" "-" "@"
%left "*" "div" "mod"
Kim Nguyễn's avatar
Kim Nguyễn committed
181
%left SETMINUS "//" "/@" "/"
182
%nonassoc ":"
Kim Nguyễn's avatar
Kim Nguyễn committed
183
%nonassoc "!" unary_op
184
185
186
%left "."
%nonassoc "," ")"

Kim Nguyễn's avatar
Kim Nguyễn committed
187
188
189
%start <Ast.pprog> prog
%start <Ast.pprog> top_phrases
%type <Ast.ppat> pat 
190
191
%%

Kim Nguyễn's avatar
Kim Nguyễn committed
192
193
194
195
196
/* Macros */

%inline loc(X):
x=X           { mk $sloc x }
;
197

198
199
200
201
%inline iloption(X):
              { [] }
| x = X       { [x] }
;
Kim Nguyễn's avatar
Kim Nguyễn committed
202
203
204
205

/*  Toplevel definitions    */


206
top_phrases:
207
| e = multi_expr ";;" { [ mk $sloc (EvalStatement e) ] }
208
| p = list(prog_item) ";;" { List.concat p }
209
210
211
;

prog:
212
| e = opt_prog_expr l = prog_items* EOI { e @ List.concat l }
213
214
;

215
216
217
218
219
%inline opt_prog_expr:
 l = iloption(multi_expr) { match l with
      [] -> []
      | e :: _ -> [ mk $sloc (EvalStatement e) ]
   }
220
221
;

222
223
prog_items:
";;" e = opt_prog_expr { e }
224
| p = prog_item { p  }
225
226
;

227
prog_item:
228
| item = loc(prog_item_) { [item] }
229
| "include" items = RESOLVED_INCLUDE { items } 
230
231
232
| HASH_ASCII | HASH_LATIN1 | HASH_UTF8
| "include" STRING2 { [] }

233
234
235
;

%inline prog_item_:
236
237
| l = let_binding {   let f, p, e = l in 
                      if f then FunDecl e  else  LetDecl (p, e)
Kim Nguyễn's avatar
Kim Nguyễn committed
238
 }
239

Kim Nguyễn's avatar
Kim Nguyễn committed
240
| n = namespace_binding { 
241
match n with
242
243
    | `Prefix (name,ns) ->  Namespace (name, ns) 
    | `Keep b ->  KeepNs b 
244
}
245

246
| "type" x = ident_or_keyword "=" t = pat { 
247
248
                let id = lop $loc(x), ident x in TypeDecl (id, t)  }

249
| "using" name = IDENT "=" cu = ident_or_string2 { 
250
251
252
                       Using (U.mk name, U.mk cu) 
                    }

Kim Nguyễn's avatar
Kim Nguyễn committed
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
| "open" ids = separated_nonempty_list(".", ident_or_keyword) { 
 Open (List.map ident ids)
 }
| "schema" name = IDENT "=" uri = STRING2 { 
    Cduce_loc.protect_op "schema";
    SchemaDecl (U.mk name, uri)
 }
| "debug" d = IDENT "(" l = nonempty_list (pat) ")" { 
  let dir = match d, l with
  "filter", [t; p] -> `Filter(t, p)
  | "accept", [p] -> `Accept p
  | "compile", t ::( _ :: _ as p) -> `Compile (t, p)
  | "sample", [t] -> `Sample t
  | "subtype", [t1; t2] -> `Subtype (t1, t2)
  | "single", [t] -> `Single t
268
  | _ -> parsing_error $loc(d) (Format.sprintf "Invalid debug directive %s" d) 
Kim Nguyễn's avatar
Kim Nguyễn committed
269
270
271
272
  in Directive (`Debug dir)
 }
| d = HASH_DIRECTIVE {
  let dir = match d with
273
274
275
276
277
278
279
    "#verbose" -> `Verbose
    | "#slient" -> `Silent 
    | "#quit" -> `Quit
    | "#env" -> `Env
    | "#reinit_ns" -> `Reinit_ns
    | "#help" -> `Help 
    | "#builtins" -> `Builtins
280
    | _ -> parsing_error $loc(d) (Format.sprintf "Invalid toplevel directive %s" d)
Kim Nguyễn's avatar
Kim Nguyễn committed
281
282
283
    in Directive dir
}
| HASH_PRINT_TYPE t = pat { Directive(`Print_type t) }
284
| HASH_DUMP_VALUE e = expr { Directive (`Dump e) }
285
286
287
;

%inline ident_or_string2:
Kim Nguyễn's avatar
Kim Nguyễn committed
288
| s = IDENT { s }
289
290
291
292
293
| s = STRING2 { s }
;


pat:
Kim Nguyễn's avatar
Kim Nguyễn committed
294
295
| x = arrow_pat "where" l = and_pat_list { mk $sloc (Recurs(x, List.rev l)) }
| x = arrow_pat { x }
296
;
Kim Nguyễn's avatar
Kim Nguyễn committed
297

298
and_pat_list:
Kim Nguyễn's avatar
Kim Nguyễn committed
299
300
301
| id = located_ident "=" p = pat { [ (fst id, snd id, p) ] }
| l = and_pat_list "and" id = located_ident "=" p = pat 
        { (fst id, snd id, p)::l }
302
303
304
;

arrow_pat:
Kim Nguyễn's avatar
Kim Nguyễn committed
305
306
307
308
| x = or_pat "->" y = arrow_pat  { mk $sloc (Arrow(x, y)) }
| x = or_pat "@" y = arrow_pat { mk $sloc (Concat(x, y)) }
| x = or_pat "+" y = arrow_pat { mk $sloc (Merge(x, y)) }
| x = or_pat { x }
309
310
311
;

or_pat:
Kim Nguyễn's avatar
Kim Nguyễn committed
312
313
| x = or_pat "|" y = and_pat { mk $sloc (Or(x, y)) }
| x = and_pat { x }
314
315
316
;

and_pat:
317
| x = and_pat "&" y = var_pat { mk $sloc (And(x, y)) }
Kim Nguyễn's avatar
Kim Nguyễn committed
318
| x = and_pat SETMINUS y = var_pat { mk $sloc (Diff(x, y)) }
Kim Nguyễn's avatar
Kim Nguyễn committed
319
| x = var_pat { x }
320
321
322
;

var_pat:
Kim Nguyễn's avatar
Kim Nguyễn committed
323
324
325
326
327
328
329
330
331
| id = ident_or_keyword_no_ref_no_where ids = e_list
| id = ident_or_keyword_no_ref_no_where "." ids = separated_nonempty_list(".", ident_or_keyword) 
      { let iids = List.map ident (id::ids) in
        mk $sloc (PatVar iids) 
      }
| x = constr_pat { x }
;
%inline e_list:
| { [] }
332
333
;
constr_pat:
Kim Nguyễn's avatar
Kim Nguyễn committed
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
| "(" a = IDENT ":=" c = expr ")" { mk $sloc (Constant (ident a,c))}
| "(" l = separated_nonempty_list (",", pat) ")" {multi_prod $sloc l}
| i = char { mk $sloc (Internal (Types.char (Chars.mk_classes [i, i]))) }
| i = char "--" j = char { mk $sloc (Internal (Types.char (Chars.mk_classes [i, j]))) }
| i = int_or_star "--" j = int_or_star { 
  let open Intervals in
  match i, j with
    None, Some j -> 
      let j = V.mk j in
          mk $sloc (Internal (Types.interval (left j)))
    | Some i, None ->
     let i = V.mk i in
         mk $sloc (Internal (Types.interval (right i)))
    | Some i, Some j ->
     let i = V.mk i and j = V.mk j in
         mk $sloc (Internal (Types.interval (bounded i j)))
350
351
    | None, None -> parsing_error $sloc 
        (Format.sprintf "Invalid interval *--*")
Kim Nguyễn's avatar
Kim Nguyễn committed
352
353
354
355
356
357
358
 }
| i = INT { 
      let open Intervals in
      let i =  V.mk i  in
      mk $sloc (Internal (Types.interval (atom i)))
  }
| s = simple_pat { s }
359
360
361
;

simple_pat:
Kim Nguyễn's avatar
Kim Nguyễn committed
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
| "{" r = record_spec "}" { r }
| "ref" p = constr_pat { 
    let get_fun = mk $sloc (Arrow (pat_nil, p))
    and set_fun = mk $sloc (Arrow (p, pat_nil)) in
    let fields =
      [ label "get", (get_fun, None);
        label "set", (set_fun, None) ]
    in
      mk $sloc (Record (false, fields))
 }
| "!" a = IDENT {	mk $sloc (Internal Types.(abstract (Abstract.atom a)))}
| "`" t = tag_type { t }
| "[" r = regexp? q = option(";" q = pat { q }) "]" { 
      let r = match r with None -> Epsilon | Some r -> r in
      let r = 
        match q with 
        Some q -> let any = mk $sloc (Internal Types.any) in
                  Seq (r, Seq(Guard q, Star (Elem any)))
      | None -> r
      in mk $sloc (Regexp r)
 }
383
| "<" t = tag_type_or_pat a = attrib_spec">" c = var_pat { 
Kim Nguyễn's avatar
Kim Nguyễn committed
384
385
      mk $sloc (XmlT (t, multi_prod $sloc [a;c]))
 }
386
| "_" {  mk $sloc (Internal Types.any) }
Kim Nguyễn's avatar
Kim Nguyễn committed
387
388
389
390
391
392
393
394
395
396
| s = STRING2 { 
    let s = List.map 
      (fun c -> 
        mknoloc (Internal (Types.char Chars.(atom (V.mk_int c)))
        )) (seq_of_string s)
    in
    let s = s @ [ mknoloc (Internal (Sequence.nil_type))]
    in
    multi_prod $sloc s
 }
397
398
399
;

located_ident:
Kim Nguyễn's avatar
Kim Nguyễn committed
400
| i = IDENT { (lop $sloc ,ident i) }
401
402
403
;

char:
Kim Nguyễn's avatar
Kim Nguyễn committed
404
| c = STRING1  { parse_char $sloc c }
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
;

%inline keyword_no_else_no_ref_no_where:
| "and" { "and" }
| "debug" { "debug" }
| "div" { "div" }
| "from" { "from" }
| "fun" { "fun" }
| "if" { "if" }
| "in" { "in" }
| "include" { "include" }
| "let" { "let" }
| "map" { "map" }
| "match" { "match" }
| "mod" { "mod" }
| "namespace" { "namespace" }
| "off" { "off" }
| "on" { "on" }
| "open" { "open" }
| "or" { "or" }
| "schema" { "schema" }
| "select" { "select" }
| "then" { "then" }
| "transform" { "transform" }
| "try" { "try" }
| "type" { "type" }
| "using" { "using" }
| "validate" { "validate" }
| "with" { "with" }
| "xtransform" { "xtransform" }
;

keyword:
k = keyword_no_else_no_ref_no_where { k }
| "ref" { "ref" }
| "else" { "else" }
| "where" { "where" }
;

ident_or_keyword_no_else:
| s = keyword_no_else_no_ref_no_where { s }
| id = IDENT { id }
| "ref"  { "ref" }
| "where" { "where" }
;

ident_or_keyword_no_ref_no_where:
| s =  keyword_no_else_no_ref_no_where { s }
| id = IDENT { id }
| "else"  { "else" }
;

ident_or_keyword:
| s = IDENT { s }
| k = keyword { k }
;

462
463


464
465
466
467
468
469
int_or_star:
| i = INT   { Some i }
| "*"      { None }
;

tag_type:
470
471
472
473
474
475
476
l = loc (tag_type_) { l }
;

%inline tag_type_:
| "_"  { Internal (Types.atom (Atoms.any)) }
| a = ident_or_keyword { Cst (Atom (ident a)) }
| t = ANY_IN_NS { NsT (ident t) }
477
478
479
;

tag_type_or_pat:
Kim Nguyễn's avatar
Kim Nguyễn committed
480
481
| t = tag_type { t }
| "(" p = pat ")" { p }
482
483
484
;

attrib_spec:
Kim Nguyễn's avatar
Kim Nguyễn committed
485
486
| r = record_spec { r }
| "(" t = pat ")" { t }
487
488
489
;

record_spec:
Kim Nguyễn's avatar
Kim Nguyễn committed
490
491
| r = record_spec_fields op = boption("..") 
        { mk $sloc (Record(op, r)) }
492
493
494
;

record_spec_fields:
Kim Nguyễn's avatar
Kim Nguyễn committed
495
| "else" f = option(field_pat) ";"? l=other_rec_spec { 
496
    (mk_rec_field $sloc "else" f):: l 
Kim Nguyễn's avatar
Kim Nguyễn committed
497
 }
498
| l = other_rec_spec { l }
499
500
;
other_rec_spec: 
Kim Nguyễn's avatar
Kim Nguyễn committed
501
|               { [ ] }
502
| lab = ident_or_keyword_no_else f = option(field_pat) ";"? fields = other_rec_spec 
Kim Nguyễn's avatar
Kim Nguyễn committed
503
504
505
{ 
  (mk_rec_field $sloc lab f) :: fields
}
506
507
508
;

%inline field_eq:
Kim Nguyễn's avatar
Kim Nguyễn committed
509
510
 "="  { false }
|"=?" { true }
511
512
513
;

field_pat:
Kim Nguyễn's avatar
Kim Nguyễn committed
514
515
| e = field_eq x = arrow_pat  { (e, x, None) }
| e = field_eq x = arrow_pat "else" y = arrow_pat { (e, x, Some y) }
516
517
518
519
520
;



regexp:
Kim Nguyễn's avatar
Kim Nguyễn committed
521
522
523
524
525
526
| x = regexp "|" y = regexp_concat { 
      match x, y with
    |Elem x, Elem y -> Elem (mk $sloc (Or (x, y)))
    | _ -> Alt (x, y)
}
| r = regexp_concat { r }
527
528
529
;

regexp_concat:
Kim Nguyễn's avatar
Kim Nguyễn committed
530
531
| x = regexp_concat y = regexp_and { Seq (x, y) }
| r = regexp_and { r }
532
533
534
;

regexp_and:
Kim Nguyễn's avatar
Kim Nguyễn committed
535
536
537
| x = regexp_and "&" y = regexp_acc { 
  match x, y with
    Elem x, Elem y -> Elem (mk $sloc (And (x, y)))
538
539
  | _ -> parsing_error $sloc
          "Conjunction not allowed in regular expression"
Kim Nguyễn's avatar
Kim Nguyễn committed
540
 }
Kim Nguyễn's avatar
Kim Nguyễn committed
541
 | x = regexp_and SETMINUS y = regexp_acc { 
542
543
  match x, y with
    Elem x, Elem y -> Elem (mk $sloc (Diff (x, y)))
544
545
  | _ -> parsing_error $sloc
          "Difference not allowed in regular expression"
546
547
 }

Kim Nguyễn's avatar
Kim Nguyễn committed
548
| r = regexp_acc { r }
549
550
551
;

regexp_acc:
Kim Nguyễn's avatar
Kim Nguyễn committed
552
553
554
555
| a = IDENT "::" x = regexp_simple { 
  SeqCapture (lop $sloc, ident a, x)
 }
| x = regexp_simple { x }
556
557
558
;

regexp_simple:
Kim Nguyễn's avatar
Kim Nguyễn committed
559
560
561
562
563
564
565
566
567
568
569
570
571
572
| x = regexp_simple "*" { Star x }
| x = regexp_simple "*?" { WeakStar x }
| x = regexp_simple "+" { Seq (x, Star x) }
| x = regexp_simple "+?" { Seq (x, WeakStar x) }
| x = regexp_simple "?" { Alt (x, Epsilon) }
| x = regexp_simple "??" { Alt (Epsilon, x) }
| x = regexp_simple "**" i = INT { 
  let rec aux i accu =
	  if (i = 0) then accu else aux (pred i) (Seq (x, accu))
	in
	let i =
	  try
	    let i = int_of_string i in
	    if (i > 1024) then raise Exit else i  
573
    with Failure _ | Exit -> parsing_error $loc(i) "Repetition number too large"
Kim Nguyễn's avatar
Kim Nguyễn committed
574
  in
575
  if i <= 0 then parsing_error $sloc "Repetition number must be a positive integer";
Kim Nguyễn's avatar
Kim Nguyễn committed
576
577
578
579
580
  aux i Epsilon
 }
| "(" x = separated_nonempty_list(",", regexp) ")" { 
    match x with
      [ x ] -> x
581
582
      | _ -> let x = List.map (function Elem x -> x | _ ->
                parsing_error $sloc "Mixing regular expressions and products") x
Kim Nguyễn's avatar
Kim Nguyễn committed
583
584
585
586
             in 
             Elem (multi_prod $sloc x)
 }
| "(" a = IDENT ":=" c = expr ")" { Elem (mk $sloc (Constant (ident a, c))) }
587
| "/" p = var_pat { Guard p }
Kim Nguyễn's avatar
Kim Nguyễn committed
588
589
590
591
592
593
594
| i = char "--" j = char { 
  let open Chars in
  let i = V.mk_int i
	and j = V.mk_int j in
   Elem (mk $sloc (Internal (Types.char (char_class i j))))
 }
| s = STRING1 { 
595
596
597
598
599
600
601
602
603
604
605
606
  match seq_of_string s with
  [ c ] -> Elem (mk $sloc (Internal (Types.char (Chars.mk_classes [c, c]))))
  | l ->
    List.fold_right
      (fun c accu ->
        let c = Chars.V.mk_int c in
        let c = Chars.atom c in
        Seq (Elem (mknoloc (Internal (Types.char c))), accu))
      l
  	  Epsilon
 }
| i = INT {
Kim Nguyễn's avatar
Kim Nguyễn committed
607
608
609
610
611
612
613
614
615
616
617
618
619
620
 let open Intervals in
      let i =  V.mk i  in
      Elem (mk $sloc (Internal (Types.interval (atom i))))
}
| id = ident_or_keyword_no_ref_no_where { 
  match id with
    "PCDATA" -> string_regexp
  | _ -> Elem (mk $sloc (PatVar [ident id]))
 }
| id = ident_or_keyword_no_ref_no_where "." ids = separated_nonempty_list(".", ident_or_keyword) { 
  let iids = List.map ident (id::ids) in
        Elem (mk $sloc (PatVar iids)) 
 }
| p = simple_pat { Elem p }
621
622
623
624
;


namespace_binding:
Kim Nguyễn's avatar
Kim Nguyễn committed
625
626
627
628
629
630
631
632
633
634
  "namespace" uri = STRING2 {`Prefix(U.mk "", `Uri (Ns.Uri.mk (ident uri))) }
| "namespace" name = ident_or_keyword rem = namespace_binding_rem { 
  match name, rem with
  | _, `Idents ids -> 
  let ids = List.map (fun x -> ident x) (name :: ids) in
   `Prefix(U.mk "", `Path ids)
  | _, (`Uri _ as uri) ->  `Prefix(ident name, uri)
  | _, (`Path _ as path) ->`Prefix(ident name, path)
  | "on", `Empty -> `Keep true
  | "off", `Empty -> `Keep false
635
  | _ -> parsing_error $sloc "Invalid namespace specification"
Kim Nguyễn's avatar
Kim Nguyễn committed
636
 }
637
;
Kim Nguyễn's avatar
Kim Nguyễn committed
638

639
namespace_binding_rem:
Kim Nguyễn's avatar
Kim Nguyễn committed
640
641
642
643
644
645
646
"." idents = separated_nonempty_list(".", ident_or_keyword) { `Idents idents  }
| "=" uri = STRING2 { `Uri (Ns.Uri.mk (ident uri))  }
| "=" ids = separated_nonempty_list(".", ident_or_keyword) { 
    let ids = List.map (fun x -> ident x) ids in
    `Path ids
  }
|  {`Empty}
647
648
649
;

let_binding:
Kim Nguyễn's avatar
Kim Nguyễn committed
650
651
652
653
654
655
656
657
| "let" "fun" f = located_ident "(" fd = fun_decl_after_lpar 
| "let" f = located_ident "(" fd = fun_decl_after_lpar { 
    let p = mk $sloc (PatVar [ snd f ]) in
    let fun_iface, fun_body = fd in
    let abst = { fun_name = Some f; fun_iface; fun_body  } in
    let e = exp $sloc (Abstraction abst) in
    (true, p, e)  
 }
658
659
| "let" p = ident_or_let_pat "=" e = multi_expr { (false, p, e) }
| "let" p = ident_or_let_pat ":" check=boption("?") t = pat "=" e = multi_expr { 
Kim Nguyễn's avatar
Kim Nguyễn committed
660
661
  (false, p, if check then Check(e, t) else Forget (e, t))
 } 
662
663
664
;

%inline fun_decl_after_lpar:
Kim Nguyễn's avatar
Kim Nguyễn committed
665
666
667
668
669
670
671
  x = or_pat "->" y = or_pat 
  other_arrows = list (";" p1 = or_pat "->" p2 = or_pat {(p1,p2)}) 
  ")"
  b = branches { 
      (x, y) :: other_arrows, b
   }
| x = or_pat ":" t = pat args = loption(p = pair(",", 
672
673
                      separated_nonempty_list(",", x = pat ":" t = pat { (x, t)})) { snd p})
  ")"
Kim Nguyễn's avatar
Kim Nguyễn committed
674
  others = list(delimited("(",separated_nonempty_list(",", separated_pair(pat,":", pat)) ,")"))
675
 ":" tres = pat "=" body = multi_expr { 
Kim Nguyễn's avatar
Kim Nguyễn committed
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
   let mkfun args =
	       multi_prod lnopos (List.map snd args),
	       multi_prod lnopos (List.map fst args)
	 in
	 let tres, body = List.fold_right
		 (fun args (tres,body) ->
		    let (targ,arg) = mkfun args in
		    let e = Abstraction
			      { fun_name = None; fun_iface = [targ,tres];
				    fun_body = [arg,body] }
        in
		    let t = mknoloc (Arrow (targ,tres)) in
		    (t,e)
		 )
		 others (tres, body)
    in
	  let (targ,arg) = mkfun ((x,t) :: args) in
	  [(targ,tres)],[(arg,body)]
	  
  }
696
697
698
699

;

ident_or_let_pat:
Kim Nguyễn's avatar
Kim Nguyễn committed
700
701
| id = located_ident { mk $sloc (PatVar [ (snd id) ]) }
| p = constr_pat { p }
702
| p1 = ident_or_let_pat "&" p2 =constr_pat { mk $sloc (And(p1, p2)) }
703
704
;

Kim Nguyễn's avatar
Kim Nguyễn committed
705
706
707
708
709
branches_:
| b = branch {  [ b ] }
| bl = branches_ "|" b = branch { b :: bl }
;
%inline branches:
710
"|"? b = branches_ { List.rev b }
711
712
713
;

branch:
714
715
716
717
718
719
 p = or_pat "->" e = multi_expr { (p, e) }
;

multi_expr:
e = expr %prec below_SEMI { e }
| e1 = expr SEMI e2 = multi_expr { exp $sloc (seq e1 e2) }
720
721
722
;

expr:
723
724
725
726
727
728
| "match" e = multi_expr "with" b = branches { exp $sloc (Match (e, b)) }
| "try" e = multi_expr "with" b = branches { exp $sloc (Try (e, b)) }
| "map" e = multi_expr "with" b = branches { exp $sloc (Map (e, b)) }
| "transform" e = multi_expr "with" b = branches { exp $sloc (Transform (e, b)) }
| "xtransform" e = multi_expr "with" b = branches { exp $sloc (Xtrans (e, b)) }
| "validate" e = multi_expr "with" r = schema_ref { 
Kim Nguyễn's avatar
Kim Nguyễn committed
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
  exp $sloc (Validate (e, [fst r; snd r]))
 }
| "select" e = expr "from" l = from_list { exp $sloc (SelectFW (e, l, [])) }
| "select" e = expr "from" l = from_list w = where_condition { exp $sloc (SelectFW (e, l, w)) }
| "fun" f = located_ident? "(" fd = fun_decl_after_lpar {
   let fun_iface, fun_body = fd in
    let abst = { fun_name = f; fun_iface; fun_body  } in
    exp $sloc (Abstraction abst)
  }
| "if" e = expr "then" e1 = expr "else" e2 = expr { 
    exp $sloc (if_then_else e e1 e2)
  }
| "if" e = expr "then" e1 = expr { 
    exp $sloc (if_then_else e e1 cst_nil)
  }
744
| l = let_binding "in" e2 = multi_expr { 
Kim Nguyễn's avatar
Kim Nguyễn committed
745
746
  let _, p, e1 = l in exp $sloc (let_in e1 p e2)
 }
747
| n = namespace_binding "in" e2 = multi_expr { 
Kim Nguyễn's avatar
Kim Nguyễn committed
748
749
750
751
752
753
754
755
  match n with
  `Prefix (name, ns) ->
    exp $sloc (NamespaceIn (name, ns, e2))
  | `Keep f -> exp $sloc (KeepNsIn (f, e2))
 }
| e = expr ":" check=boption("?") p = var_pat { 
  exp $sloc (if check then Check(e, p) else Forget (e, p))
 }
756
(*| e1 = expr ";" e2 = expr { exp $sloc (seq e1 e2)}*)
Kim Nguyễn's avatar
Kim Nguyễn committed
757
758
759
760
761
762
763
| "ref" p = pat e = expr { exp $sloc (Ref (e, p))}
| e1 = expr ":=" e2 = expr { exp $sloc (set_ref e1 e2) }
| e1 = expr op = binop e2 = expr { match op with
  | "||" -> exp $sloc (logical_or e1 e2)
  | "&&" -> exp $sloc (logical_and e1 e2)
  |  _ -> apply_op2 $sloc op e1 e2
 }
Kim Nguyễn's avatar
Kim Nguyễn committed
764
| e = expr SETMINUS l = ident_or_keyword { exp $sloc (RemoveField(e, label l)) }
Kim Nguyễn's avatar
Kim Nguyễn committed
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
| e = expr "/" p = var_pat { 
  let tag = mk $sloc (Internal (Types.atom (Atoms.any))) in
	let att = mk $sloc (Internal Types.Record.any) in
	let any = mk $sloc (Internal Types.any) in
	let re = Star(Alt(SeqCapture(noloc,id_dummy,Elem p), Elem any)) in
	let ct = mk $sloc (Regexp re) in
        let p = mk $sloc (XmlT (tag, multi_prod $sloc [att;ct])) in
	exp $sloc (Transform (e,[p, Var id_dummy]))
 }
| e = expr "/@" a = ident_or_keyword { 
  let tag = mk $sloc (Internal (Types.atom Atoms.any)) in
  let any = mk $sloc (Internal Types.any) in
  let att = mk $sloc (Record
			    (true, [(label a,
				     (mk $sloc (PatVar [id_dummy]),
				      None))])) in
  let p = mk $sloc (XmlT (tag, multi_prod $sloc [att;any])) in
  let t = (p, Pair (Var id_dummy,cst_nil)) in
      exp $sloc (Transform (e,[t]))

 }
| e = expr "//" p = var_pat { 
  let stk = U.mk "$stack" in
  let y = U.mk "y" in
  let x = U.mk "x" in
  let f = U.mk "f" in
  let assign =
    set_ref
      (Var stk)
      (concat (get_ref (Var stk)) (Pair (Var id_dummy,cst_nil))) in
  let tag = mknoloc (Internal (Types.atom (Atoms.any))) in
  let att = mknoloc (Internal Types.Record.any) in
  let any = mknoloc (Internal Types.any) in
  let re = (SeqCapture(noloc,y,Star(Elem(any)))) in
  let ct = mknoloc (Regexp re) in
  let children = mknoloc (XmlT (tag, multi_prod $sloc [att;ct])) in
  let capt = mknoloc (And (mknoloc (And (mknoloc (PatVar [id_dummy]),p)),children)) in
  let assign = seq assign ( (Apply(Var(f) , Var(y) ) ) ) in
  let xt = Xtrans ((Var x),[capt,assign]) in
  let rf = Ref (cst_nil, mknoloc (Regexp (Star(Elem p)))) in
        let targ = mknoloc (Regexp(Star(Elem(any)))) in
  let tres = targ in
  let arg = mknoloc(PatVar [x]) in
        let abst = {fun_name = Some (lop $sloc,ident "f") ; fun_iface = [(targ, tres)] ;fun_body = [(arg,xt)] } in
  let body =
    let_in rf (mknoloc (PatVar [stk]))
    (let_in ((Abstraction abst)) (mknoloc (PatVar[ident "f"]))
            (let_in ((Apply(Var(f) , e) )  ) (mknoloc (Internal Types.any)) (get_ref (Var stk))))
  in
	exp $sloc body
 }
| "-" e = expr %prec unary_op { 
    apply_op2 $sloc "-" (Integer (Intervals.V.mk "0")) e
 }
| e = app_expr { e } /* includes not */
820
;
Kim Nguyễn's avatar
Kim Nguyễn committed
821
822


823
824
825
826
827
828
829
830
831
%inline binop:
| "=" { "=" }
| "<=" { "<=" }
| ">=" { ">=" }
| "!=" { "!=" }
| "+" { "+" }
| "-" { "-" }
| "*" { "*" }
| "@" { "@" }
Kim Nguyễn's avatar
Kim Nguyễn committed
832
833
| "||" { "||" }
| "or" { "||" }
834
| "&&" { "&&" }
835
836
837
838
839
840
/* The four operators below use a different internal name */
| "div" { "/" }
| "mod" { "%" }
| "<<" { "<" }
| ">>" { ">" }

841
842
843
;

schema_ref:
Kim Nguyễn's avatar
Kim Nguyễn committed
844
s = IDENT "." typ = ident_or_keyword { (U.mk s, ident typ) }
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
;

from_list:
p = pat "in" e = expr { [ (p, e) ] }
| l = from_list "," p = pat "in" e = expr { (p, e) :: l }
;

where_condition:
| "where" l = and_expr_list { List.rev l }
;
and_expr_list:
| e = expr %prec AND { [ e ] }
| l = and_expr_list "and" e = expr { e :: l }
;

app_expr:
861
| e1=app_expr e2=no_seq_expr {
Kim Nguyễn's avatar
Kim Nguyễn committed
862
863
864
865
  if is_not e1 then exp $sloc (logical_not e2)
  else  exp $sloc (Apply (e1, e2)) 
  }
| e = no_seq_expr { e }
866
867
868
;

no_seq_expr:
Kim Nguyễn's avatar
Kim Nguyễn committed
869
870
| "!" e = no_seq_expr { exp $sloc (get_ref e) }
| c = char { exp $sloc (Char (Chars.V.mk_int c)) }
871
872
873
874
| e = simple_expr %prec BANG { e }
;

simple_expr:
Kim Nguyễn's avatar
Kim Nguyễn committed
875
876
| e = simple_expr "." l = ident_or_keyword { exp $sloc (Dot (e, label l)) }
| "(" e = expr  l = with_annot ")" { exp $sloc (TyArgs (e, l)) }
877
| "(" l = separated_nonempty_list(",", multi_expr) ")" { exp $sloc (tuple l) }
Kim Nguyễn's avatar
Kim Nguyễn committed
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
| "[" l = list(seq_elem) tl = option (";" e = expr { e })
   _le = "]" { 
  let loc_end = $loc(_le) in
  let e = match tl with Some e -> e | None -> cst_nil in
  let e = exp loc_end e in
  let loc_end = snd loc_end in
  let l = List.fold_right (fun x q ->
    match x with
      `String (loc, i, j, s) -> exp loc (String (i, j, s, q))
      |`Elems ((loc_s, _), x) -> exp (loc_s, loc_end) (Pair (x, q))
      |`Explode x -> concat x q
    ) l e 
  in
  exp $sloc l

 }
| "<" te = tag_expr ae = attrib_expr ">" e = no_seq_expr { 
    exp $sloc (Xml (te, Pair(ae, e)))
 }
| "{" r = expr_record_spec "}" { r }
| s = STRING2 { 
    let s = U.mk s in
	  exp $sloc (String (U.start_index s,
                       U.end_index s, s, cst_nil))
}
| v = IDENT { exp $sloc (Var (ident v)) }
| "`" t = ident_or_keyword { exp $sloc (Atom (ident t)) }
| i = INT { exp $sloc (Integer (Intervals.V.mk i)) }
;

with_annot:
| "with" "{" l= list(pat) "}" { l }
910
911
912
913
;


seq_elem:
Kim Nguyễn's avatar
Kim Nguyễn committed
914
915
916
917
918
| s = STRING1 {
  let s = U.mk s in
	`String ($sloc, U.start_index s, U.end_index s, s) }
| e = simple_expr { `Elems ($sloc, e) }
| "!" e = simple_expr { `Explode e }
919
920
;
tag_expr:
Kim Nguyễn's avatar
Kim Nguyễn committed
921
922
| tag = ident_or_keyword { exp $sloc (Atom (ident tag)) }
| "(" e = expr ")" { exp $sloc e }
923
924
925
;
attrib_expr:
| e = expr_record_spec { e }
Kim Nguyễn's avatar
Kim Nguyễn committed
926
| "(" e = expr ")" { exp $sloc e }
927
928
;
expr_record_spec:
929
| fields = list (l = ident_or_keyword e = option ("=" e = no_seq_expr {e} )";"?{
Kim Nguyễn's avatar
Kim Nguyễn committed
930
931
932
933
934
935
936
937
    label l, (
      match e with
        Some e -> e
      | None -> Var (ident l))
  
  }) { 
  exp $sloc (RecordLitt fields)
   }
938
939
940
941
942
;



%%