Commit 5b55e2b7 authored by Pietro Abate's avatar Pietro Abate
Browse files

[r2003-05-10 14:44:29 by cvscast] Start Unicode support. Remove more generic comparisons

Original author: cvscast
Date: 2003-05-10 14:44:30+00:00
parent 12f6a97c
......@@ -42,7 +42,7 @@ XWEBIFACE = $(WEBIFACE:.cmo=.cmx)
DEBUG = -g
PACKAGES = pxp-engine,pxp-lex-iso88591,wlexing,camlp4,num,cgi
PACKAGES = pxp-engine,pxp-lex-iso88591,pxp-wlex-utf8,wlexing,camlp4,num,cgi
OCAMLCP = ocamlc
OCAMLC = ocamlfind $(OCAMLCP) -package $(PACKAGES)
OCAMLOPT = ocamlfind ocamlopt -package $(PACKAGES)
......
......@@ -11,13 +11,15 @@ parser/ast.cmx: types/ident.cmx parser/location.cmx types/types.cmx
parser/location.cmo: parser/location.cmi
parser/location.cmx: parser/location.cmi
parser/parser.cmo: parser/ast.cmo types/atoms.cmi types/builtin.cmo \
types/chars.cmi types/ident.cmo types/intervals.cmi parser/location.cmi \
types/sequence.cmi types/types.cmi parser/wlexer.cmo parser/parser.cmi
types/chars.cmi misc/encodings.cmi types/ident.cmo types/intervals.cmi \
parser/location.cmi types/sequence.cmi types/types.cmi parser/wlexer.cmo \
parser/parser.cmi
parser/parser.cmx: parser/ast.cmx types/atoms.cmx types/builtin.cmx \
types/chars.cmx types/ident.cmx types/intervals.cmx parser/location.cmx \
types/sequence.cmx types/types.cmx parser/wlexer.cmx parser/parser.cmi
parser/wlexer.cmo: parser/location.cmi
parser/wlexer.cmx: parser/location.cmx
types/chars.cmx misc/encodings.cmx types/ident.cmx types/intervals.cmx \
parser/location.cmx types/sequence.cmx types/types.cmx parser/wlexer.cmx \
parser/parser.cmi
parser/wlexer.cmo: misc/encodings.cmi parser/location.cmi
parser/wlexer.cmx: misc/encodings.cmx parser/location.cmx
typing/typed.cmo: types/ident.cmo parser/location.cmi types/patterns.cmi \
types/types.cmi
typing/typed.cmx: types/ident.cmx parser/location.cmx types/patterns.cmx \
......@@ -80,18 +82,18 @@ runtime/print_xml.cmo: types/atoms.cmi types/chars.cmi types/ident.cmo \
types/sequence.cmi runtime/value.cmi
runtime/print_xml.cmx: types/atoms.cmx types/chars.cmx types/ident.cmx \
types/sequence.cmx runtime/value.cmx
runtime/run_dispatch.cmo: types/atoms.cmi types/chars.cmi types/ident.cmo \
types/patterns.cmi types/types.cmi runtime/value.cmi \
runtime/run_dispatch.cmo: types/atoms.cmi types/chars.cmi misc/encodings.cmi \
types/ident.cmo types/patterns.cmi types/types.cmi runtime/value.cmi \
runtime/run_dispatch.cmi
runtime/run_dispatch.cmx: types/atoms.cmx types/chars.cmx types/ident.cmx \
types/patterns.cmx types/types.cmx runtime/value.cmx \
runtime/run_dispatch.cmx: types/atoms.cmx types/chars.cmx misc/encodings.cmx \
types/ident.cmx types/patterns.cmx types/types.cmx runtime/value.cmx \
runtime/run_dispatch.cmi
runtime/value.cmo: types/atoms.cmi types/builtin.cmo types/chars.cmi \
types/ident.cmo types/intervals.cmi types/sequence.cmi types/types.cmi \
runtime/value.cmi
misc/encodings.cmi types/ident.cmo types/intervals.cmi types/sequence.cmi \
types/types.cmi runtime/value.cmi
runtime/value.cmx: types/atoms.cmx types/builtin.cmx types/chars.cmx \
types/ident.cmx types/intervals.cmx types/sequence.cmx types/types.cmx \
runtime/value.cmi
misc/encodings.cmx types/ident.cmx types/intervals.cmx types/sequence.cmx \
types/types.cmx runtime/value.cmi
driver/cduce.cmo: parser/ast.cmo types/builtin.cmo runtime/eval.cmi \
types/ident.cmo parser/location.cmi parser/parser.cmi types/patterns.cmi \
misc/state.cmi typing/typed.cmo typing/typer.cmi types/types.cmi \
......@@ -123,5 +125,5 @@ types/types.cmi: types/atoms.cmi types/chars.cmi types/ident.cmo \
runtime/eval.cmi: types/ident.cmo typing/typed.cmo runtime/value.cmi
runtime/load_xml.cmi: runtime/value.cmi
runtime/run_dispatch.cmi: types/patterns.cmi runtime/value.cmi
runtime/value.cmi: types/atoms.cmi types/chars.cmi types/ident.cmo \
types/intervals.cmi types/types.cmi
runtime/value.cmi: types/atoms.cmi types/chars.cmi misc/encodings.cmi \
types/ident.cmo types/intervals.cmi types/types.cmi
type uchar = int
module type T =
sig
val get: string -> int -> uchar
val next: string -> int -> int
val put: string -> int -> uchar -> int
val bytes: uchar -> int
end
module Iso88591 =
struct
let get s i = Char.code s.[i]
let next s i = succ i
let put s i c = s.[i] <- Char.chr i; succ i
let bytes c = 1
end
module Utf8 =
struct
type ustring = string
type uindex = int
let start_index s = 0
let end_index s = String.length s
let equal_index = (==)
let mk s = s
let get_str s = s
let get_idx i = i
(* TODO: handle 5,6 bytes chars; report malformed UTF-8 *)
let get s i =
match s.[i] with
......@@ -39,18 +31,71 @@ struct
((Char.code s.[i+3] - 128))
| _ -> failwith "Malformed UTF-8 bufffer"
let next s i =
match s.[i] with
| '\000'..'\127' as c ->
Char.code c, i + 1
| '\192'..'\223' as c ->
((Char.code c - 192) lsl 6) lor
((Char.code s.[i+1] - 128)), i + 2
| '\224'..'\239' as c ->
((Char.code c - 192) lsl 12) lor
((Char.code s.[i+1] - 128) lsl 6) lor
((Char.code s.[i+2] - 128)), i + 3
| '\240'..'\248' as c ->
((Char.code c - 192) lsl 18) lor
((Char.code s.[i+1] - 128) lsl 12) lor
((Char.code s.[i+2] - 128) lsl 6) lor
((Char.code s.[i+3] - 128)), i + 4
| _ -> failwith "Malformed UTF-8 bufffer"
let advance s i =
match s.[i] with
| '\000'..'\127' as c -> i + 1
| '\192'..'\223' as c -> i + 2
| '\224'..'\239' as c -> i + 3
| '\240'..'\248' as c -> i + 4
| _ -> failwith "Malformed UTF-8 bufffer"
(*
let width = Array.create 256 1
let () =
for i = 192 to 223 do width.(i) <- 2 done;
for i = 224 to 249 do width.(i) <- 3 done;
for i = 240 to 248 do width.(i) <- 4 done
let next s i =
let len s i =
Array.unsafe_get width (Char.code s.[i])
*)
let store b p =
(* Adapted from Netstring's netconversion.ml/write_utf8 *)
if p <= 127 then
Buffer.add_char b (Char.chr p)
else if p <= 0x7ff then (
Buffer.add_char b (Char.chr (0xc0 lor (p lsr 6)));
Buffer.add_char b (Char.chr (0x80 lor (p land 0x3f)))
)
else if p <= 0xffff then (
(* Refuse writing surrogate pairs, and fffe, ffff *)
if (p >= 0xd800 & p < 0xe000) or (p >= 0xfffe) then
failwith "Encodings.Utf8.store";
Buffer.add_char b (Char.chr (0xe0 lor (p lsr 12)));
Buffer.add_char b (Char.chr (0x80 lor ((p lsr 6) land 0x3f)));
Buffer.add_char b (Char.chr (0x80 lor (p land 0x3f)))
)
else if p <= 0x10ffff then (
Buffer.add_char b (Char.chr (0xf0 lor (p lsr 18)));
Buffer.add_char b (Char.chr (0x80 lor ((p lsr 12) land 0x3f)));
Buffer.add_char b (Char.chr (0x80 lor ((p lsr 6) land 0x3f)));
Buffer.add_char b (Char.chr (0x80 lor (p land 0x3f)))
)
else
(* Higher code points are not possible in XML: *)
failwith "Encodings.Utf8.store"
let put s i c =
failwith "Encodings.Utf8.put: not yet implemented"
let copy b s i j =
Buffer.add_substring b s i (j - i)
let bytes c =
failwith "Encodings.Utf8.bytes: not yet implemented"
let get_substr s i j =
String.sub s i (j - i)
end
type uchar = int
module type T =
module Utf8 :
sig
val get: string -> int -> uchar
val next: string -> int -> int
type ustring
type uindex
val put: string -> int -> uchar -> int
val bytes: uchar -> int
end
val end_index: ustring -> uindex
val start_index: ustring -> uindex
val equal_index: uindex -> uindex -> bool
val mk: string -> ustring
val get_str: ustring -> string
val get_idx: uindex -> int
val get: ustring -> uindex -> uchar
val advance: ustring -> uindex -> uindex
val next: ustring -> uindex -> uchar * uindex
module Iso88591 : T
module Utf8 : T
val store: Buffer.t -> uchar -> unit
val copy: Buffer.t -> ustring -> uindex -> uindex -> unit
val get_substr: ustring -> uindex -> uindex -> string
end
......@@ -36,12 +36,16 @@ let string_regexp = Star (Elem char)
let cst_nil = mknoloc (Cst (Types.Atom Sequence.nil_atom))
let seq_of_string pos s =
let s = Encodings.Utf8.mk s in
(* What about locations when input file is not Utf8 ? *)
let (pos,_) = pos in
let rec aux accu i =
if (i = 0)
then accu
else aux (((pos+i,pos+i+1),s.[i-1])::accu) (i-1) in
aux [] (String.length s)
let rec aux pos i j =
if Encodings.Utf8.equal_index i j then []
else
let (c,i) = Encodings.Utf8.next s i in
((pos,pos+1),c)::(aux (pos+1) i j)
in
aux pos (Encodings.Utf8.start_index s) (Encodings.Utf8.end_index s)
exception Error of string
let error (i,j) s = Location.raise_loc i j (Error s)
......@@ -50,14 +54,14 @@ let make_record loc r =
LabelMap.from_list (fun _ _ -> error loc "Duplicated record field") r
let parse_char loc s =
(* TODO: Unicode *)
if String.length s <> 1 then
error loc "Character litteral must have length 1";
s.[0]
let s = seq_of_string loc s in
match s with
| [_,c] -> c
| _ -> error loc "Character litteral must have length 1"
let char_list pos s =
let s = seq_of_string pos s in
List.map (fun (loc,c) -> mk loc (Cst (Types.Char (Chars.mk_char c)))) s
List.map (fun (loc,c) -> mk loc (Cst (Types.Char (Chars.mk_int c)))) s
let include_stack = ref []
......@@ -285,14 +289,14 @@ EXTEND
Elem (mk loc (Constant ((ident a,c))))
| UIDENT "PCDATA" -> string_regexp
| i = STRING1; "--"; j = STRING1 ->
let i = Chars.mk_char (parse_char loc i)
and j = Chars.mk_char (parse_char loc j) in
let i = Chars.mk_int (parse_char loc i)
and j = Chars.mk_int (parse_char loc j) in
Elem (mk loc (Internal (Types.char (Chars.char_class i j))))
| s = STRING1 ->
let s = seq_of_string loc s in
List.fold_right
(fun (loc,c) accu ->
let c = Chars.mk_char c in
let c = Chars.mk_int c in
let c = Chars.atom c in
Seq (Elem (mk loc (Internal (Types.char c))), accu))
s
......@@ -356,7 +360,7 @@ EXTEND
mk loc (Internal
(Types.char
(Chars.atom
(Chars.mk_char c))))) s in
(Chars.mk_int c))))) s in
let s = s @ [mk loc (Internal (Sequence.nil_type))] in
multi_prod loc s
]
......@@ -375,8 +379,7 @@ EXTEND
char:
[
[ c = STRING1 -> Chars.mk_char (parse_char loc c)
| "!"; i = INT -> Chars.mk_int (int_of_string i) ]
[ c = STRING1 -> Chars.mk_int (parse_char loc c) ]
];
......
......@@ -76,28 +76,34 @@ let nb_classes = 34
exception Unterminated_string_in_comment
(* Buffer for string literals *)
(* Buffer for string literals : always encoded in Utf8 *)
let string_buff = Buffer.create 1024
let store_char = Buffer.add_char string_buff
let store_ascii = Buffer.add_char string_buff
let store_char = Buffer.add_string string_buff
let store_code = Encodings.Utf8.store string_buff
let get_stored_string () =
let s = Buffer.contents string_buff in
Buffer.clear string_buff;
s
let store_special = function
| 'n' -> store_char '\n'
| 'r' -> store_char '\r'
| 't' -> store_char '\t'
| 'n' -> store_ascii '\n'
| 'r' -> store_ascii '\r'
| 't' -> store_ascii '\t'
| c -> raise (Illegal_character '\\')
let string_start_pos = ref 0;;
let comment_start_pos : int list ref = ref [];;
let char_for_decimal_code s =
let s = String.sub s 1 (String.length s - 1) in
let c = int_of_string s in
assert ( c < 256 ); (* TODO: handle Unicode *)
Char.chr c
let numeric_char s =
int_of_string (String.sub s 1 (String.length s - 2))
let hexa_char s =
let rec aux i accu =
if i = String.length s - 1 then accu
else aux (succ i) (accu * 16 + Char.code s.[i] - Char.code '0')
in
aux 0 0
let rec tag_of_tag s i =
match s.[i] with
......@@ -106,125 +112,125 @@ let nb_classes = 34
let lex_tables = {
Lexing.lex_base =
"\000\000\012\000\010\000\018\000\251\255\250\255\016\000\255\255\
\253\255\005\000\254\255\027\000\013\000\252\255\251\255\000\000\
\006\000\253\255\255\255\247\255\246\255\021\000\047\000\060\000\
\028\000\067\000\026\000\250\255\033\000\024\000\040\000\053\000\
\011\000\018\000\039\000\037\000\249\255\248\255\077\000\080\000\
\084\000\097\000\055\000\101\000\114\000\118\000\131\000\135\000\
\148\000\071\000";
"\000\000\012\000\018\000\004\000\254\255\005\000\017\000\255\255\
\251\255\250\255\255\255\016\000\253\255\020\000\252\255\252\255\
\251\255\007\000\007\000\253\255\247\255\246\255\032\000\047\000\
\051\000\030\000\035\000\052\000\250\255\055\000\027\000\044\000\
\057\000\041\000\043\000\054\000\052\000\249\255\248\255\072\000\
\080\000\084\000\097\000\072\000\101\000\114\000\118\000\131\000\
\135\000\148\000\077\000";
Lexing.lex_backtrk =
"\255\255\255\255\255\255\255\255\255\255\255\255\005\000\255\255\
\255\255\003\000\255\255\005\000\003\000\255\255\255\255\004\000\
\004\000\255\255\255\255\255\255\255\255\000\000\001\000\002\000\
\003\000\005\000\005\000\255\255\005\000\005\000\005\000\005\000\
\005\000\005\000\005\000\005\000\255\255\255\255\255\255\004\000\
\255\255\004\000\003\000\002\000\255\255\002\000\001\000\255\255\
\001\000\000\000";
"\255\255\255\255\255\255\255\255\255\255\001\000\255\255\255\255\
\255\255\255\255\255\255\005\000\255\255\255\255\255\255\255\255\
\255\255\004\000\004\000\255\255\255\255\255\255\000\000\001\000\
\002\000\003\000\005\000\005\000\255\255\005\000\005\000\005\000\
\005\000\005\000\005\000\005\000\005\000\255\255\255\255\255\255\
\004\000\255\255\004\000\003\000\002\000\255\255\002\000\001\000\
\255\255\001\000\000\000";
Lexing.lex_default =
"\027\000\014\000\005\000\005\000\000\000\000\000\255\255\000\000\
\000\000\255\255\000\000\255\255\255\255\000\000\000\000\255\255\
\255\255\000\000\000\000\000\000\000\000\255\255\255\255\255\255\
\255\255\255\255\255\255\000\000\255\255\255\255\255\255\255\255\
\255\255\255\255\255\255\255\255\000\000\000\000\255\255\255\255\
"\028\000\016\000\009\000\004\000\000\000\255\255\255\255\000\000\
\000\000\000\000\000\000\255\255\000\000\255\255\000\000\000\000\
\000\000\255\255\255\255\000\000\000\000\000\000\255\255\255\255\
\255\255\255\255\255\255\255\255\000\000\255\255\255\255\255\255\
\255\255\255\255\255\255\255\255\255\255\000\000\000\000\255\255\
\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
\255\255\255\255";
\255\255\255\255\255\255";
Lexing.lex_trans =
"\019\000\020\000\020\000\021\000\022\000\023\000\024\000\022\000\
\025\000\026\000\004\000\009\000\013\000\028\000\029\000\030\000\
\031\000\032\000\004\000\012\000\008\000\033\000\009\000\018\000\
\049\000\034\000\010\000\035\000\032\000\015\000\036\000\008\000\
\036\000\012\000\042\000\005\000\005\000\016\000\005\000\005\000\
\007\000\011\000\017\000\005\000\017\000\005\000\005\000\010\000\
\010\000\006\000\007\000\046\000\046\000\046\000\046\000\005\000\
\037\000\010\000\010\000\005\000\047\000\042\000\005\000\046\000\
\043\000\043\000\043\000\043\000\005\000\005\000\038\000\039\000\
\039\000\044\000\049\000\005\000\043\000\005\000\000\000\046\000\
\038\000\039\000\039\000\000\000\039\000\039\000\039\000\039\000\
\041\000\041\000\041\000\041\000\043\000\040\000\000\000\000\000\
\039\000\000\000\000\000\000\000\041\000\041\000\041\000\041\000\
\041\000\043\000\043\000\043\000\043\000\000\000\040\000\000\000\
\039\000\041\000\044\000\000\000\041\000\043\000\045\000\045\000\
\045\000\045\000\045\000\045\000\045\000\045\000\000\000\000\000\
\000\000\041\000\045\000\044\000\000\000\043\000\045\000\046\000\
\046\000\046\000\046\000\048\000\048\000\048\000\048\000\000\000\
\047\000\000\000\045\000\046\000\000\000\000\000\045\000\048\000\
\048\000\048\000\048\000\048\000\000\000\000\000\000\000\000\000\
\000\000\047\000\000\000\046\000\048\000\000\000\000\000\048\000\
"\020\000\021\000\021\000\022\000\023\000\024\000\025\000\023\000\
\026\000\027\000\005\000\006\000\015\000\029\000\030\000\031\000\
\032\000\033\000\008\000\007\000\012\000\034\000\013\000\006\000\
\007\000\035\000\013\000\036\000\033\000\017\000\037\000\007\000\
\037\000\004\000\014\000\050\000\043\000\018\000\039\000\040\000\
\040\000\009\000\019\000\009\000\019\000\009\000\004\000\004\000\
\010\000\011\000\010\000\047\000\047\000\047\000\047\000\044\000\
\044\000\044\000\044\000\009\000\048\000\009\000\009\000\047\000\
\045\000\009\000\009\000\044\000\009\000\009\000\009\000\038\000\
\009\000\009\000\009\000\039\000\040\000\040\000\043\000\047\000\
\050\000\000\000\000\000\044\000\040\000\040\000\040\000\040\000\
\042\000\042\000\042\000\042\000\000\000\041\000\000\000\000\000\
\040\000\000\000\000\000\000\000\042\000\042\000\042\000\042\000\
\042\000\044\000\044\000\044\000\044\000\000\000\041\000\000\000\
\040\000\042\000\045\000\000\000\042\000\044\000\046\000\046\000\
\046\000\046\000\046\000\046\000\046\000\046\000\000\000\000\000\
\000\000\042\000\046\000\045\000\000\000\044\000\046\000\047\000\
\047\000\047\000\047\000\049\000\049\000\049\000\049\000\000\000\
\048\000\000\000\046\000\047\000\000\000\000\000\046\000\049\000\
\049\000\049\000\049\000\049\000\000\000\000\000\000\000\000\000\
\000\000\048\000\000\000\047\000\049\000\000\000\000\000\049\000\
\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\000\000\000\000\000\000\000\000\048\000\000\000";
\000\000\000\000\000\000\000\000\049\000\000\000";
Lexing.lex_check =
"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\000\000\000\000\002\000\009\000\001\000\000\000\000\000\000\000\
\000\000\000\000\003\000\012\000\006\000\000\000\006\000\016\000\
\021\000\000\000\015\000\000\000\000\000\001\000\000\000\011\000\
\000\000\011\000\024\000\026\000\026\000\001\000\029\000\032\000\
\002\000\002\000\001\000\028\000\001\000\033\000\028\000\006\000\
\006\000\003\000\003\000\022\000\022\000\022\000\022\000\030\000\
\034\000\011\000\011\000\035\000\022\000\042\000\031\000\022\000\
\023\000\023\000\023\000\023\000\030\000\031\000\025\000\025\000\
\025\000\023\000\049\000\025\000\023\000\025\000\255\255\022\000\
\038\000\038\000\038\000\255\255\039\000\039\000\039\000\039\000\
\040\000\040\000\040\000\040\000\023\000\039\000\255\255\255\255\
\039\000\255\255\255\255\255\255\040\000\041\000\041\000\041\000\
\041\000\043\000\043\000\043\000\043\000\255\255\041\000\255\255\
\039\000\041\000\043\000\255\255\040\000\043\000\044\000\044\000\
\044\000\044\000\045\000\045\000\045\000\045\000\255\255\255\255\
\255\255\041\000\044\000\045\000\255\255\043\000\045\000\046\000\
\046\000\046\000\046\000\047\000\047\000\047\000\047\000\255\255\
\046\000\255\255\044\000\046\000\255\255\255\255\045\000\047\000\
\048\000\048\000\048\000\048\000\255\255\255\255\255\255\255\255\
\255\255\048\000\255\255\046\000\048\000\255\255\255\255\047\000\
\000\000\000\000\003\000\005\000\001\000\000\000\000\000\000\000\
\000\000\000\000\002\000\005\000\011\000\000\000\011\000\006\000\
\018\000\000\000\013\000\000\000\000\000\001\000\000\000\006\000\
\000\000\017\000\013\000\022\000\025\000\001\000\026\000\026\000\
\026\000\030\000\001\000\026\000\001\000\026\000\011\000\011\000\
\002\000\002\000\002\000\023\000\023\000\023\000\023\000\024\000\
\024\000\024\000\024\000\031\000\023\000\027\000\027\000\023\000\
\024\000\029\000\032\000\024\000\029\000\033\000\034\000\035\000\
\031\000\032\000\036\000\039\000\039\000\039\000\043\000\023\000\
\050\000\255\255\255\255\024\000\040\000\040\000\040\000\040\000\
\041\000\041\000\041\000\041\000\255\255\040\000\255\255\255\255\
\040\000\255\255\255\255\255\255\041\000\042\000\042\000\042\000\
\042\000\044\000\044\000\044\000\044\000\255\255\042\000\255\255\
\040\000\042\000\044\000\255\255\041\000\044\000\045\000\045\000\
\045\000\045\000\046\000\046\000\046\000\046\000\255\255\255\255\
\255\255\042\000\045\000\046\000\255\255\044\000\046\000\047\000\
\047\000\047\000\047\000\048\000\048\000\048\000\048\000\255\255\
\047\000\255\255\045\000\047\000\255\255\255\255\046\000\048\000\
\049\000\049\000\049\000\049\000\255\255\255\255\255\255\255\255\
\255\255\049\000\255\255\047\000\049\000\255\255\255\255\048\000\
\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
\255\255\255\255\255\255\255\255\048\000\255\255"
\255\255\255\255\255\255\255\255\049\000\255\255"
}
let rec token engine lexbuf =
match engine lex_tables 0 lexbuf with
0 -> (
# 57 "parser/wlexer.mll"
# 65 "parser/wlexer.mll"
token engine lexbuf )
| 1 -> (
# 58 "parser/wlexer.mll"
# 66 "parser/wlexer.mll"
let s = Lexing.lexeme lexbuf in
if Hashtbl.mem keywords s then "",s else "LIDENT",s
)
| 2 -> (
# 62 "parser/wlexer.mll"
# 70 "parser/wlexer.mll"
"UIDENT",Lexing.lexeme lexbuf )
| 3 -> (
# 63 "parser/wlexer.mll"
# 71 "parser/wlexer.mll"
"INT",Lexing.lexeme lexbuf )
| 4 -> (
# 64 "parser/wlexer.mll"
# 72 "parser/wlexer.mll"
let s = Lexing.lexeme lexbuf in
"TAG", tag_of_tag s 1
)
| 5 -> (
# 72 "parser/wlexer.mll"
# 80 "parser/wlexer.mll"
"",Lexing.lexeme lexbuf )
| 6 -> (
# 75 "parser/wlexer.mll"
# 83 "parser/wlexer.mll"
let string_start = Lexing.lexeme_start lexbuf in
string_start_pos := string_start;
let double_quote = Lexing.lexeme_char lexbuf 0 = '"' in
if double_quote then string2 engine lexbuf else string1 engine lexbuf;
string (Lexing.lexeme lexbuf) engine lexbuf;
lexbuf.Lexing.lex_start_pos <-
string_start - lexbuf.Lexing.lex_abs_pos;
(if double_quote then "STRING2" else "STRING1"),
(get_stored_string()) )
| 7 -> (
# 85 "parser/wlexer.mll"
# 93 "parser/wlexer.mll"
comment_start_pos := [Lexing.lexeme_start lexbuf];
comment engine lexbuf;
token engine lexbuf )
| 8 -> (
# 90 "parser/wlexer.mll"
# 98 "parser/wlexer.mll"
"EOI","" )
| 9 -> (
# 92 "parser/wlexer.mll"
# 100 "parser/wlexer.mll"
error
(Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
(Illegal_character ((Lexing.lexeme lexbuf).[0])) )
......@@ -233,90 +239,77 @@ let rec token engine lexbuf =
and comment engine lexbuf =
match engine lex_tables 1 lexbuf with
0 -> (
# 98 "parser/wlexer.mll"
# 106 "parser/wlexer.mll"
comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos;
comment engine lexbuf;
)
| 1 -> (
# 102 "parser/wlexer.mll"
# 110 "parser/wlexer.mll"
comment_start_pos := List.tl !comment_start_pos;
if !comment_start_pos <> [] then comment engine lexbuf;
)
| 2 -> (
# 106 "parser/wlexer.mll"
# 114 "parser/wlexer.mll"
string_start_pos := Lexing.lexeme_start lexbuf;
let string =
if Lexing.lexeme_char lexbuf 0 = '"' then string2 else string1 in
(try string engine lexbuf
let ender = Lexing.lexeme lexbuf in
(try string ender engine lexbuf
with Location.Location (_,Unterminated_string) ->
let st = List.hd !comment_start_pos in
error st (st+2) Unterminated_string_in_comment);
Buffer.clear string_buff;
comment engine lexbuf )
| 3 -> (
# 116 "parser/wlexer.mll"
# 123 "parser/wlexer.mll"
let st = List.hd !comment_start_pos in
error st (st+2) Unterminated_comment
)
| 4 -> (
# 120 "parser/wlexer.mll"
# 127 "parser/wlexer.mll"
comment engine lexbuf )
| _ -> failwith "lexing: empty token [comment]"
and string2 engine lexbuf =
and string ender engine lexbuf =
match engine lex_tables 2 lexbuf with
0 -> (
# 124 "parser/wlexer.mll"
() )
# 131 "parser/wlexer.mll"
let c = Lexing.lexeme lexbuf in
if c = ender then ()
else (store_char (Lexing.lexeme lexbuf); string ender engine lexbuf)
)
| 1 -> (
# 126 "parser/wlexer.mll"
store_char (Lexing.lexeme_char lexbuf 1);
string2 engine lexbuf )
# 137 "parser/wlexer.mll"