wlexer.ml 19 KB
Newer Older
1
2
3
4
5
6
7
let eof = 0
let encoding_error = 1
let xml_char = 2
let blank = 3
let lowercase = 4
let uppercase = 5
let ascii_digit = 6
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
let char_23 = 7
let char_5f = 8
let char_3c = 9
let char_3e = 10
let char_3d = 11
let char_2e = 12
let char_2c = 13
let char_3a = 14
let char_3b = 15
let char_2b = 16
let char_2d = 17
let char_2a = 18
let char_2f = 19
let char_40 = 20
let char_26 = 21
let char_7b = 22
let char_7d = 23
let char_5b = 24
let char_5d = 25
let char_28 = 26
let char_29 = 27
let char_7c = 28
let char_3f = 29
let char_60 = 30
let char_22 = 31
let char_5c = 32
let char_27 = 33
let char_21 = 34
let unicode_base_char = 35
let unicode_ideographic = 36
let unicode_combining_char = 37
let unicode_digit = 38
let unicode_extender = 39
41
42

let one_char_classes = [
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
  (0x23, 07);
  (0x5f, 08);
  (0x3c, 09);
  (0x3e, 10);
  (0x3d, 11);
  (0x2e, 12);
  (0x2c, 13);
  (0x3a, 14);
  (0x3b, 15);
  (0x2b, 16);
  (0x2d, 17);
  (0x2a, 18);
  (0x2f, 19);
  (0x40, 20);
  (0x26, 21);
  (0x7b, 22);
  (0x7d, 23);
  (0x5b, 24);
  (0x5d, 25);
  (0x28, 26);
  (0x29, 27);
  (0x7c, 28);
  (0x3f, 29);
  (0x60, 30);
  (0x22, 31);
  (0x5c, 32);
  (0x27, 33);
  (0x21, 34);
71
72
 ]

73
let nb_classes = 40
74

75
# 17 "parser/wlexer.mll"
76
77
78
 
  let keywords = Hashtbl.create 17

79
80
  let in_comment = ref false

81
  let error = Location.raise_loc
82
83
84
85
86
  exception Illegal_character of char
  exception Unterminated_comment
  exception Unterminated_string
  exception Unterminated_string_in_comment

87

88
  (* Buffer for string literals (always encoded in UTF8). *)
89
90
    
  let string_buff = Buffer.create 1024
91

92
93
94
  let store_ascii = Buffer.add_char string_buff
  let store_char  = Buffer.add_string string_buff
  let store_code  = Encodings.Utf8.store string_buff
95
96
97
98
  let get_stored_string () =
    let s = Buffer.contents string_buff in
    Buffer.clear string_buff;
    s
99
  let store_special = function
100
101
102
    | 'n' ->  store_ascii '\n' 
    | 'r' ->  store_ascii '\r' 
    | 't' ->  store_ascii '\t' 
103
    | c -> raise (Illegal_character '\\')
104
105
106
107

  let string_start_pos = ref 0;;
  let comment_start_pos : int list ref = ref [];;

108
  let decimal_char s =
109
110
    int_of_string (String.sub s 1 (String.length s - 2))

111
112
113
114
115
116
117

  let hexa_digit = function
    | '0'..'9' as c -> (Char.code c) - (Char.code '0')
    | 'a'..'f' as c -> (Char.code c) - (Char.code 'a') + 10
    | _ -> failwith "Invalid hexadecimal digit" (* TODO: error loc *)


118
119
120
  let hexa_char s =
    let rec aux i accu =
      if i = String.length s - 1 then accu
121
      else aux (succ i) (accu * 16 + hexa_digit s.[i])
122
123
    in
    aux 0 0
124

125
126
127

# 127 "parser/wlexer.ml"
let __ocaml_lex_tables = {
128
  Lexing.lex_base = 
129
130
131
132
133
   "\000\000\246\255\245\255\008\000\036\000\007\000\250\255\005\000\
    \005\000\015\000\013\000\002\000\000\000\034\000\036\000\005\000\
    \033\000\047\000\248\255\036\000\247\255\052\000\041\000\252\255\
    \062\000\088\000\253\255\077\000\251\255\043\000\036\000\254\255\
    \255\255\081\000\249\255\098\000\070\000\111\000\103\000\128\000\
134
    ";
135
  Lexing.lex_backtrk = 
136
137
138
139
140
   "\255\255\255\255\255\255\000\000\001\000\004\000\255\255\005\000\
    \005\000\005\000\005\000\005\000\005\000\005\000\005\000\005\000\
    \005\000\005\000\255\255\005\000\255\255\006\000\255\255\255\255\
    \255\255\001\000\255\255\255\255\255\255\004\000\004\000\255\255\
    \255\255\255\255\255\255\004\000\255\255\255\255\001\000\255\255\
141
    ";
142
  Lexing.lex_default = 
143
144
145
146
147
   "\006\000\000\000\000\000\255\255\255\255\255\255\000\000\255\255\
    \255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
    \255\255\255\255\000\000\255\255\000\000\255\255\255\255\000\000\
    \255\255\255\255\000\000\028\000\000\000\255\255\255\255\000\000\
    \000\000\034\000\000\000\255\255\255\255\031\000\255\255\255\255\
148
    ";
149
  Lexing.lex_trans = 
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
   "\001\000\002\000\002\000\003\000\004\000\004\000\005\000\017\000\
    \004\000\013\000\014\000\003\000\007\000\005\000\010\000\008\000\
    \016\000\009\000\015\000\022\000\006\000\005\000\011\000\006\000\
    \006\000\006\000\019\000\006\000\012\000\015\000\006\000\018\000\
    \006\000\018\000\006\000\004\000\004\000\002\000\002\000\002\000\
    \004\000\004\000\004\000\006\000\004\000\006\000\006\000\006\000\
    \004\000\006\000\024\000\021\000\021\000\004\000\020\000\021\000\
    \021\000\021\000\021\000\023\000\021\000\032\000\006\000\031\000\
    \021\000\000\000\025\000\025\000\000\000\021\000\025\000\004\000\
    \004\000\004\000\004\000\004\000\036\000\023\000\000\000\000\000\
    \026\000\006\000\021\000\021\000\000\000\023\000\000\000\021\000\
    \021\000\021\000\021\000\021\000\025\000\025\000\025\000\030\000\
    \025\000\025\000\025\000\000\000\025\000\000\000\026\000\029\000\
    \036\000\025\000\000\000\039\000\026\000\039\000\026\000\255\255\
    \032\000\035\000\032\000\038\000\000\000\038\000\032\000\000\000\
    \000\000\000\000\000\000\025\000\025\000\025\000\025\000\025\000\
    \000\000\031\000\031\000\031\000\039\000\000\000\039\000\000\000\
    \000\000\000\000\000\000\000\000\000\000\000\000\000\000\032\000\
    \000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
169
    \000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
170
171
    \000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
    ";
172
173
  Lexing.lex_check = 
   "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
    \000\000\000\000\000\000\003\000\000\000\005\000\000\000\000\000\
    \000\000\000\000\000\000\007\000\008\000\009\000\000\000\012\000\
    \010\000\009\000\000\000\010\000\000\000\000\000\011\000\000\000\
    \009\000\000\000\015\000\000\000\000\000\000\000\000\000\000\000\
    \004\000\004\000\004\000\013\000\004\000\013\000\014\000\014\000\
    \004\000\016\000\004\000\017\000\017\000\004\000\019\000\017\000\
    \021\000\021\000\021\000\022\000\021\000\029\000\016\000\030\000\
    \021\000\255\255\024\000\024\000\255\255\021\000\024\000\004\000\
    \004\000\004\000\004\000\004\000\036\000\027\000\255\255\255\255\
    \024\000\033\000\017\000\017\000\255\255\036\000\255\255\021\000\
    \021\000\021\000\021\000\021\000\025\000\025\000\025\000\027\000\
    \025\000\024\000\024\000\255\255\025\000\255\255\035\000\027\000\
    \035\000\025\000\255\255\038\000\027\000\038\000\027\000\037\000\
    \033\000\033\000\033\000\037\000\255\255\037\000\038\000\255\255\
    \255\255\255\255\255\255\025\000\025\000\025\000\025\000\025\000\
    \255\255\035\000\035\000\035\000\039\000\255\255\039\000\255\255\
    \255\255\255\255\255\255\255\255\255\255\255\255\255\255\039\000\
191
    \255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
    \255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
    \255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
    ";
  Lexing.lex_base_code = 
   "";
  Lexing.lex_backtrk_code = 
   "";
  Lexing.lex_default_code = 
   "";
  Lexing.lex_trans_code = 
   "";
  Lexing.lex_check_code = 
   "";
  Lexing.lex_code = 
   "";
207
208
209
}

let rec token engine lexbuf = 
210
211
  match engine __ocaml_lex_tables 0 lexbuf with
      | 0 ->
212
# 78 "parser/wlexer.mll"
213
214
215
216
              ( token engine lexbuf )
# 215 "parser/wlexer.ml"

  | 1 ->
217
# 80 "parser/wlexer.mll"
218
      ( 
219
	let s = Lexing.lexeme lexbuf in
220
	if Hashtbl.mem keywords s then "",s else "IDENT",s
221
      )
222
# 223 "parser/wlexer.ml"
223
224

  | 2 ->
225
# 85 "parser/wlexer.mll"
226
      ( 
227
228
229
230
	let s = Lexing.lexeme lexbuf in
	let s = String.sub s 0 (String.length s - 2) in
	"ANY_IN_NS", s
      )
231
# 232 "parser/wlexer.ml"
232
233

  | 3 ->
234
# 90 "parser/wlexer.mll"
235
          ( "ANY_IN_NS", "" )
236
# 237 "parser/wlexer.ml"
237
238

  | 4 ->
239
# 92 "parser/wlexer.mll"
240
    ( "INT",Lexing.lexeme lexbuf )
241
# 242 "parser/wlexer.ml"
242
243

  | 5 ->
244
# 97 "parser/wlexer.mll"
245
 ( "",Lexing.lexeme lexbuf )
246
# 247 "parser/wlexer.ml"
247
248

  | 6 ->
249
# 98 "parser/wlexer.mll"
250
               ( "DIRECTIVE",Lexing.lexeme lexbuf )
251
# 252 "parser/wlexer.ml"
252
253

  | 7 ->
254
# 100 "parser/wlexer.mll"
255
      ( let string_start = Lexing.lexeme_start lexbuf in
256
257
        string_start_pos := string_start;
	let double_quote = Lexing.lexeme_char lexbuf 0 = '"' in
258
        string (Lexing.lexeme lexbuf) engine lexbuf;
259
260
261
262
        lexbuf.Lexing.lex_start_pos <-
          string_start - lexbuf.Lexing.lex_abs_pos;
        (if double_quote then "STRING2" else "STRING1"), 
	(get_stored_string()) )
263
# 264 "parser/wlexer.ml"
264
265

  | 8 ->
266
# 110 "parser/wlexer.mll"
267
      ( comment_start_pos := [Lexing.lexeme_start lexbuf];
268
	in_comment := true;
269
        comment engine lexbuf;
270
	in_comment := false;
271
        token engine lexbuf )
272
# 273 "parser/wlexer.ml"
273
274

  | 9 ->
275
# 117 "parser/wlexer.mll"
276
      ( "EOI","" )
277
# 278 "parser/wlexer.ml"
278
279

  | 10 ->
280
# 119 "parser/wlexer.mll"
281
      ( error 
282
283
	  (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
	  (Illegal_character ((Lexing.lexeme lexbuf).[0])) )
284
# 285 "parser/wlexer.ml"
285

286
287
288
  | _ -> failwith "lexing: empty token [token]"

and comment engine lexbuf = 
289
290
  match engine __ocaml_lex_tables 27 lexbuf with
      | 0 ->
291
# 125 "parser/wlexer.mll"
292
      ( comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos;
293
294
        comment engine lexbuf;
      )
295
# 296 "parser/wlexer.ml"
296
297

  | 1 ->
298
# 129 "parser/wlexer.mll"
299
      ( comment_start_pos := List.tl !comment_start_pos;
300
301
	if !comment_start_pos <> [] then comment engine lexbuf;
      )
302
# 303 "parser/wlexer.ml"
303
304

  | 2 ->
305
# 133 "parser/wlexer.mll"
306
      ( string_start_pos := Lexing.lexeme_start lexbuf;
307
	Buffer.clear string_buff;
308
309
	let ender = Lexing.lexeme lexbuf in
        (try string ender engine lexbuf
310
         with Location.Location (_,_,Unterminated_string) ->
311
           let st = List.hd !comment_start_pos in
312
           error st (st+2) Unterminated_string_in_comment);
313
314
	Buffer.clear string_buff;
        comment engine lexbuf )
315
# 316 "parser/wlexer.ml"
316
317

  | 3 ->
318
# 143 "parser/wlexer.mll"
319
      ( let st = List.hd !comment_start_pos in
320
321
        error st (st+2) Unterminated_comment
      )
322
# 323 "parser/wlexer.ml"
323
324

  | 4 ->
325
# 147 "parser/wlexer.mll"
326
      ( comment engine lexbuf )
327
# 328 "parser/wlexer.ml"
328

329
330
  | _ -> failwith "lexing: empty token [comment]"

331
and string ender engine lexbuf = 
332
333
  match engine __ocaml_lex_tables 33 lexbuf with
      | 0 ->
334
# 151 "parser/wlexer.mll"
335
      ( let c = Lexing.lexeme lexbuf in
336
	if c = ender then ()
337
	else (store_char (Lexing.lexeme lexbuf); 
338
	      string ender engine lexbuf) )
339
# 340 "parser/wlexer.ml"
340
341

  | 1 ->
342
# 156 "parser/wlexer.mll"
343
      ( store_ascii (Lexing.lexeme_char lexbuf 1);
344
        string ender engine lexbuf )
345
# 346 "parser/wlexer.ml"
346
347

  | 2 ->
348
# 159 "parser/wlexer.mll"
349
      ( let c = Lexing.lexeme_char lexbuf 1 in
350
351
	if c = 'x' 
	then parse_hexa_char engine lexbuf 
352
	else store_special c;
353
	string ender engine lexbuf )
354
# 355 "parser/wlexer.ml"
355
356

  | 3 ->
357
# 165 "parser/wlexer.mll"
358
      ( store_code (decimal_char (Lexing.lexeme lexbuf));
359
        string ender engine lexbuf )
360
# 361 "parser/wlexer.ml"
361
362

  | 4 ->
363
# 168 "parser/wlexer.mll"
364
      ( error 
365
366
	  (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
	  (Illegal_character '\\') )
367
# 368 "parser/wlexer.ml"
368
369

  | 5 ->
370
# 172 "parser/wlexer.mll"
371
      ( error !string_start_pos (!string_start_pos+1) Unterminated_string )
372
# 373 "parser/wlexer.ml"
373
374

  | 6 ->
375
# 174 "parser/wlexer.mll"
376
      ( store_code (Char.code (Lexing.lexeme_char lexbuf 0));  
377
	(* Adapt when source is UTF8 *)
378
        string ender engine lexbuf )
379
# 380 "parser/wlexer.ml"
380
381

  | _ -> failwith "lexing: empty token [string]"
382

383
and parse_hexa_char engine lexbuf = 
384
385
  match engine __ocaml_lex_tables 37 lexbuf with
      | 0 ->
386
# 180 "parser/wlexer.mll"
387
      ( store_code (hexa_char (Lexing.lexeme lexbuf)) )
388
# 389 "parser/wlexer.ml"
389
390

  | 1 ->
391
# 182 "parser/wlexer.mll"
392
      ( error 
393
394
	  (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
	  (Illegal_character '\\') )
395
# 396 "parser/wlexer.ml"
396

397
  | _ -> failwith "lexing: empty token [parse_hexa_char]"
398
399
400

;;

401
# 188 "parser/wlexer.mll"
402
403
 

404
405
406
  let delta_loc = ref 0
  let set_delta_loc dl = delta_loc := dl

407
(* For synchronization on errors in the toplevel ... *)
408
(* Issue: file inclusion *)
409
410
411
  let lexbuf = ref None
  let last_tok = ref ("","")

412
413
414
415
416
417

  let lexeme_start lexbuf = 
    lexbuf.Lexing.lex_abs_pos + lexbuf.Lexing.lex_start_pos
  let lexeme_end lexbuf = 
    lexbuf.Lexing.lex_abs_pos + lexbuf.Lexing.lex_curr_pos

418
419
420
421
422
423
  let lexer_func_of_wlex lexfun lexengine cs =
    let dl = !delta_loc in
    delta_loc := 0;
    let lb =
      Lexing.from_function
	(fun s n ->
424
425
           try s.[0] <- Stream.next cs; 1 
	   with Stream.Failure -> 0)
426
    in
427
    lexbuf := Some lb;
428
429
    let next () =
      let tok = lexfun lexengine lb in
430
      let loc = (lexeme_start lb + dl, lexeme_end lb + dl) in
431
      last_tok := tok;
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
      (tok, loc) 
    in
    Token.make_stream_and_location next
      
  let register_kw (s1,s2) =
    if s1 = "" then 
      match s2.[0] with 
	| 'a' .. 'z' when not (Hashtbl.mem keywords s2) -> 
	    Hashtbl.add keywords s2 ()      
	| _ -> ()


  let lexer lexfun lexengine =
    { 
      Token.tok_func = lexer_func_of_wlex lexfun lexengine; 
      Token.tok_using = register_kw;
      Token.tok_removing = (fun _ -> ()); 
      Token.tok_match = Token.default_match;
450
451
      Token.tok_text = Token.lexer_text;
      Token.tok_comm = None;
452
    }
453
454
455
456

  let classes = 
    let c i = (i,i) in
    let i ch1 ch2 = (Char.code ch1, Char.code ch2) in
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
    [ unicode_base_char,
      [ 0x00C0,0x00D6; 0x00D8,0x00F6; 
	0x00F8,0x00FF; 0x0100,0x0131; 0x0134,0x013E; 0x0141,0x0148; 
	0x014A,0x017E; 0x0180,0x01C3; 0x01CD,0x01F0; 0x01F4,0x01F5; 
	0x01FA,0x0217; 0x0250,0x02A8; 0x02BB,0x02C1; 0x0386,0x0386;
	0x0388,0x038A; 0x038C,0x038C; 0x038E,0x03A1; 0x03A3,0x03CE; 
	0x03D0,0x03D6; 0x03DA,0x03DA; 0x03DC,0x03DC; 0x03DE,0x03DE; 
	0x03E0,0x03E0; 0x03E2,0x03F3; 
	0x0401,0x040C; 0x040E,0x044F; 0x0451,0x045C; 0x045E,0x0481; 
	0x0490,0x04C4; 0x04C7,0x04C8; 0x04CB,0x04CC; 0x04D0,0x04EB; 
	0x04EE,0x04F5; 0x04F8,0x04F9; 0x0531,0x0556; 0x0559,0x0559;
	0x0561,0x0586; 0x05D0,0x05EA; 0x05F0,0x05F2; 0x0621,0x063A; 
	0x0641,0x064A; 0x0671,0x06B7; 0x06BA,0x06BE; 0x06C0,0x06CE; 
	0x06D0,0x06D3; 0x06D5,0x06D5; 0x06E5,0x06E6; 0x0905,0x0939; 
	0x093D,0x093D;
	0x0958,0x0961; 0x0985,0x098C; 0x098F,0x0990; 0x0993,0x09A8; 
	0x09AA,0x09B0; 0x09B2,0x09B2; 0x09B6,0x09B9; 0x09DC,0x09DD; 
	0x09DF,0x09E1; 0x09F0,0x09F1; 0x0A05,0x0A0A; 0x0A0F,0x0A10; 
	0x0A13,0x0A28; 0x0A2A,0x0A30; 0x0A32,0x0A33; 0x0A35,0x0A36; 
	0x0A38,0x0A39; 0x0A59,0x0A5C; 0x0A5E,0x0A5E; 0x0A72,0x0A74; 
	0x0A85,0x0A8B; 0x0A8D,0x0A8D; 0x0A8F,0x0A91; 0x0A93,0x0AA8; 
	0x0AAA,0x0AB0; 0x0AB2,0x0AB3; 0x0AB5,0x0AB9; 0x0ABD,0x0ABD; 
	0x0AE0,0x0AE0;
	0x0B05,0x0B0C; 0x0B0F,0x0B10; 0x0B13,0x0B28; 0x0B2A,0x0B30; 
	0x0B32,0x0B33; 0x0B36,0x0B39; 0x0B3D,0x0B3D; 0x0B5C,0x0B5D; 
	0x0B5F,0x0B61; 0x0B85,0x0B8A; 0x0B8E,0x0B90; 0x0B92,0x0B95; 
	0x0B99,0x0B9A; 0x0B9C,0x0B9C; 0x0B9E,0x0B9F; 0x0BA3,0x0BA4; 
	0x0BA8,0x0BAA; 0x0BAE,0x0BB5; 0x0BB7,0x0BB9; 0x0C05,0x0C0C; 
	0x0C0E,0x0C10; 0x0C12,0x0C28; 0x0C2A,0x0C33; 0x0C35,0x0C39; 
	0x0C60,0x0C61; 0x0C85,0x0C8C; 0x0C8E,0x0C90; 0x0C92,0x0CA8; 
	0x0CAA,0x0CB3; 0x0CB5,0x0CB9; 0x0CDE,0x0CDE; 0x0CE0,0x0CE1; 
	0x0D05,0x0D0C; 0x0D0E,0x0D10; 0x0D12,0x0D28; 0x0D2A,0x0D39; 
	0x0D60,0x0D61; 0x0E01,0x0E2E; 0x0E30,0x0E30; 0x0E32,0x0E33; 
	0x0E40,0x0E45; 0x0E81,0x0E82; 0x0E84,0x0E84; 0x0E87,0x0E88; 
	0x0E8A,0x0E8A;
	0x0E8D,0x0E8D; 0x0E94,0x0E97; 0x0E99,0x0E9F; 0x0EA1,0x0EA3; 
	0x0EA5,0x0EA5;
	0x0EA7,0x0EA7; 0x0EAA,0x0EAB; 0x0EAD,0x0EAE; 0x0EB0,0x0EB0; 
	0x0EB2,0x0EB3;
	0x0EBD,0x0EBD; 0x0EC0,0x0EC4; 0x0F40,0x0F47; 0x0F49,0x0F69; 
	0x10A0,0x10C5; 0x10D0,0x10F6; 0x1100,0x1100; 0x1102,0x1103; 
	0x1105,0x1107; 0x1109,0x1109; 0x110B,0x110C; 0x110E,0x1112; 
	0x113C,0x113C;
	0x113E,0x113E; 0x1140,0x1140; 0x114C,0x114C; 0x114E,0x114E; 
	0x1150,0x1150; 0x1154,0x1155; 0x1159,0x1159;
	0x115F,0x1161; 0x1163,0x1163; 0x1165,0x1165; 0x1167,0x1167; 
	0x1169,0x1169; 0x116D,0x116E; 
	0x1172,0x1173; 0x1175,0x1175; 0x119E,0x119E; 0x11A8,0x11A8; 
	0x11AB,0x11AB; 0x11AE,0x11AF; 
	0x11B7,0x11B8; 0x11BA,0x11BA; 0x11BC,0x11C2; 0x11EB,0x11EB; 
	0x11F0,0x11F0; 0x11F9,0x11F9;
	0x1E00,0x1E9B; 0x1EA0,0x1EF9; 0x1F00,0x1F15; 0x1F18,0x1F1D; 
	0x1F20,0x1F45; 0x1F48,0x1F4D; 0x1F50,0x1F57; 0x1F59,0x1F59; 
	0x1F5B,0x1F5B;
	0x1F5D,0x1F5D; 0x1F5F,0x1F7D; 0x1F80,0x1FB4; 0x1FB6,0x1FBC; 
	0x1FBE,0x1FBE;
	0x1FC2,0x1FC4; 0x1FC6,0x1FCC; 0x1FD0,0x1FD3; 0x1FD6,0x1FDB; 
	0x1FE0,0x1FEC; 0x1FF2,0x1FF4; 0x1FF6,0x1FFC; 0x2126,0x2126;
	0x212A,0x212B; 0x212E,0x212E; 0x2180,0x2182; 0x3041,0x3094; 
	0x30A1,0x30FA; 0x3105,0x312C; (* 0xAC00,0xD7A3 *) ];

      unicode_ideographic,
      [ 0x3007,0x3007; 0x3021,0x3029 (* 0x4E00-0x9FA5 *) ];

      unicode_combining_char,
      [ 0x0300,0x0345; 0x0360,0x0361; 0x0483,0x0486; 0x0591,0x05A1;
	0x05A3,0x05B9; 0x05BB,0x05BD; 0x05BF,0x05BF; 0x05C1,0x05C2;
	0x05C4,0x05C4; 0x064B,0x0652; 0x0670,0x0670; 0x06D6,0x06DC;
	0x06DD,0x06DF; 0x06E0,0x06E4; 0x06E7,0x06E8; 0x06EA,0x06ED;
	0x0901,0x0903; 0x093C,0x093C; 0x093E,0x094C; 0x094D,0x094D;
	0x0951,0x0954; 0x0962,0x0963; 0x0981,0x0983; 0x09BC,0x09BC;
	0x09BE,0x09BE; 0x09BF,0x09BF; 0x09C0,0x09C4; 0x09C7,0x09C8;
	0x09CB,0x09CD; 0x09D7,0x09D7; 0x09E2,0x09E3; 0x0A02,0x0A02;
	0x0A3C,0x0A3C; 0x0A3E,0x0A3E; 0x0A3F,0x0A3F; 0x0A40,0x0A42;
	0x0A47,0x0A48; 0x0A4B,0x0A4D; 0x0A70,0x0A71; 0x0A81,0x0A83;
	0x0ABC,0x0ABC; 0x0ABE,0x0AC5; 0x0AC7,0x0AC9; 0x0ACB,0x0ACD;
	0x0B01,0x0B03; 0x0B3C,0x0B3C; 0x0B3E,0x0B43; 0x0B47,0x0B48;
	0x0B4B,0x0B4D; 0x0B56,0x0B57; 0x0B82,0x0B83; 0x0BBE,0x0BC2;
	0x0BC6,0x0BC8; 0x0BCA,0x0BCD; 0x0BD7,0x0BD7; 0x0C01,0x0C03;
	0x0C3E,0x0C44; 0x0C46,0x0C48; 0x0C4A,0x0C4D; 0x0C55,0x0C56;
	0x0C82,0x0C83; 0x0CBE,0x0CC4; 0x0CC6,0x0CC8; 0x0CCA,0x0CCD;
	0x0CD5,0x0CD6; 0x0D02,0x0D03; 0x0D3E,0x0D43; 0x0D46,0x0D48;
	0x0D4A,0x0D4D; 0x0D57,0x0D57; 0x0E31,0x0E31; 0x0E34,0x0E3A;
	0x0E47,0x0E4E; 0x0EB1,0x0EB1; 0x0EB4,0x0EB9; 0x0EBB,0x0EBC;
	0x0EC8,0x0ECD; 0x0F18,0x0F19; 0x0F35,0x0F35; 0x0F37,0x0F37;
	0x0F39,0x0F39; 0x0F3E,0x0F3E; 0x0F3F,0x0F3F; 0x0F71,0x0F84;
	0x0F86,0x0F8B; 0x0F90,0x0F95; 0x0F97,0x0F97; 0x0F99,0x0FAD;
	0x0FB1,0x0FB7; 0x0FB9,0x0FB9; 0x20D0,0x20DC; 0x20E1,0x20E1;
	0x302A,0x302F; 0x3099,0x3099; 0x309A,0x309A ];

      unicode_digit,
      [ 0x0660,0x0669; 0x06F0,0x06F9; 0x0966,0x096F; 0x09E6,0x09EF;
	0x0A66,0x0A6F; 0x0AE6,0x0AEF; 0x0B66,0x0B6F; 0x0BE7,0x0BEF;
	0x0C66,0x0C6F; 0x0CE6,0x0CEF; 0x0D66,0x0D6F; 0x0E50,0x0E59;
	0x0ED0,0x0ED9; 0x0F20,0x0F29 ];


      unicode_extender,
      [ 0x00B7,0x00B7; 0x02D0,0x02D1; 0x0387,0x0387; 0x0640,0x0640;
	0x0E46,0x0E46; 0x0EC6,0x0EC6; 0x3005,0x3005; 0x3031,0x3035;
	0x309D,0x309E; 0x30FC,0x30FE ];

      ascii_digit, 
      [ i '0' '9'];

      lowercase, 
      [i 'a' 'z'];

      uppercase, 
      [i 'A' 'Z'];
      
      blank, 
      [c 8; c 9; c 10; c 13; c 32]
570
571
572
573
    ]

  let table =
    assert(nb_classes <= 256);
574
    let v = String.make 0x312d (Char.chr encoding_error) in
575
576
577
578
579
580
581
    let fill_int c (i, j) = String.fill v i (j-i+1) c in
    let fill_class (c, l) = List.iter (fill_int (Char.chr c)) l in
    let fill_char  (ch, cl) = v.[ch] <- Char.chr cl in
    List.iter fill_class classes;
    List.iter fill_char one_char_classes;
    v

582
583
584
585
586
587
588
589
590
591
592
  let utf8_engine = 
    Lex_engines.engine_tiny_utf8 table
      (fun code -> 
	 if code >= 0x4E00 && code <= 0x9FA5 then
           unicode_ideographic
	 else if code >= 0xAC00 && code <= 0xD7A3 then
           unicode_base_char
	 else if code <= 0xD7FF || (code >= 0xE000 && code <= 0xFFFD) ||
           (code >= 0x10000 && code <= 0x10FFFF) then
             xml_char
	 else encoding_error)
593
594

  let latin1_engine = Lex_engines.engine_tiny_8bit table
595

596
# 597 "parser/wlexer.ml"