Commit 88300a9b authored by Pietro Abate's avatar Pietro Abate

[r2003-03-12 19:09:30 by cvscast] Empty log message

Original author: cvscast
Date: 2003-03-12 19:09:30+00:00
parent 8adfc664
......@@ -236,7 +236,11 @@ EXTEND
regexp: [
[ x = regexp; "|"; y = regexp -> Alt (x,y) ]
[ x = regexp; "|"; y = regexp ->
match (x,y) with
| Elem x, Elem y -> Elem (mk loc (Or (x,y)))
| _ -> Alt (x,y)
]
| [ x = regexp; y = regexp -> Seq (x,y) ]
| [ a = LIDENT; "::"; x = regexp -> SeqCapture (ident a,x) ]
| [ x = regexp; "*" -> Star x
......
......@@ -103,9 +103,9 @@ let lex_tables = {
"\000\000\009\000\012\000\018\000\252\255\251\255\004\000\255\255\
\005\000\254\255\014\000\013\000\003\000\005\000\253\255\255\255\
\247\255\246\255\020\000\047\000\051\000\018\000\043\000\250\255\
\027\000\017\000\044\000\052\000\005\000\011\000\045\000\041\000\
\249\255\250\255\248\255\062\000\069\000\080\000\084\000\063\000\
\089\000\099\000\104\000\119\000\123\000\133\000\067\000";
\027\000\017\000\044\000\052\000\005\000\011\000\045\000\043\000\
\249\255\250\255\248\255\066\000\069\000\082\000\086\000\060\000\
\090\000\103\000\120\000\124\000\137\000\141\000\074\000";
Lexing.lex_backtrk =
"\255\255\255\255\255\255\255\255\255\255\255\255\004\000\255\255\
\002\000\255\255\004\000\002\000\004\000\004\000\255\255\255\255\
......@@ -128,20 +128,21 @@ let lex_tables = {
\032\000\033\000\013\000\009\000\009\000\033\000\033\000\014\000\
\033\000\014\000\007\000\010\000\009\000\009\000\035\000\036\000\
\036\000\006\000\007\000\043\000\043\000\043\000\043\000\040\000\
\040\000\040\000\040\000\033\000\044\000\033\000\034\000\033\000\
\041\000\035\000\036\000\036\000\033\000\039\000\046\000\000\000\
\033\000\036\000\036\000\036\000\036\000\000\000\000\000\043\000\
\000\000\000\000\037\000\040\000\038\000\038\000\038\000\038\000\
\038\000\038\000\038\000\038\000\000\000\040\000\040\000\040\000\
\040\000\037\000\000\000\000\000\000\000\036\000\041\000\042\000\
\042\000\042\000\042\000\000\000\042\000\042\000\042\000\042\000\
\038\000\000\000\000\000\000\000\038\000\041\000\000\000\000\000\
\000\000\040\000\000\000\043\000\043\000\043\000\043\000\045\000\
\045\000\045\000\045\000\042\000\044\000\000\000\000\000\000\000\
\042\000\045\000\045\000\045\000\045\000\000\000\000\000\000\000\
\000\000\000\000\044\000\000\000\000\000\000\000\000\000\043\000\
\000\000\000\000\000\000\045\000\000\000\000\000\000\000\000\000\
\000\000\000\000\000\000\000\000\000\000\045\000\000\000";
\040\000\040\000\040\000\033\000\044\000\033\000\034\000\043\000\
\041\000\033\000\039\000\040\000\033\000\035\000\036\000\036\000\
\033\000\036\000\036\000\036\000\036\000\046\000\000\000\043\000\
\000\000\000\000\037\000\040\000\000\000\036\000\038\000\038\000\
\038\000\038\000\038\000\038\000\038\000\038\000\040\000\040\000\
\040\000\040\000\038\000\037\000\000\000\036\000\038\000\041\000\
\000\000\000\000\040\000\042\000\042\000\042\000\042\000\000\000\
\000\000\000\000\038\000\000\000\000\000\000\000\038\000\042\000\
\000\000\000\000\040\000\000\000\042\000\042\000\042\000\042\000\
\043\000\043\000\043\000\043\000\000\000\041\000\000\000\042\000\
\042\000\044\000\000\000\000\000\043\000\045\000\045\000\045\000\
\045\000\045\000\045\000\045\000\045\000\000\000\000\000\000\000\
\042\000\045\000\044\000\000\000\043\000\045\000\000\000\000\000\
\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\000\000\045\000\000\000\000\000\000\000\045\000\000\000";
Lexing.lex_check =
"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
\000\000\001\000\006\000\008\000\002\000\000\000\000\000\000\000\
......@@ -150,20 +151,21 @@ let lex_tables = {
\000\000\028\000\001\000\006\000\006\000\024\000\029\000\001\000\
\024\000\001\000\002\000\002\000\010\000\010\000\022\000\022\000\
\022\000\003\000\003\000\019\000\019\000\019\000\019\000\020\000\
\020\000\020\000\020\000\026\000\019\000\027\000\030\000\031\000\
\020\000\035\000\035\000\035\000\027\000\039\000\046\000\255\255\
\026\000\036\000\036\000\036\000\036\000\255\255\255\255\019\000\
\255\255\255\255\036\000\020\000\037\000\037\000\037\000\037\000\
\038\000\038\000\038\000\038\000\255\255\040\000\040\000\040\000\
\040\000\038\000\255\255\255\255\255\255\036\000\040\000\041\000\
\041\000\041\000\041\000\255\255\042\000\042\000\042\000\042\000\
\037\000\255\255\255\255\255\255\038\000\042\000\255\255\255\255\
\255\255\040\000\255\255\043\000\043\000\043\000\043\000\044\000\
\044\000\044\000\044\000\041\000\043\000\255\255\255\255\255\255\
\042\000\045\000\045\000\045\000\045\000\255\255\255\255\255\255\
\255\255\255\255\045\000\255\255\255\255\255\255\255\255\043\000\
\255\255\255\255\255\255\044\000\255\255\255\255\255\255\255\255\
\255\255\255\255\255\255\255\255\255\255\045\000\255\255"
\020\000\020\000\020\000\026\000\019\000\027\000\030\000\019\000\
\020\000\031\000\039\000\020\000\027\000\035\000\035\000\035\000\
\026\000\036\000\036\000\036\000\036\000\046\000\255\255\019\000\
\255\255\255\255\036\000\020\000\255\255\036\000\037\000\037\000\
\037\000\037\000\038\000\038\000\038\000\038\000\040\000\040\000\
\040\000\040\000\037\000\038\000\255\255\036\000\038\000\040\000\
\255\255\255\255\040\000\041\000\041\000\041\000\041\000\255\255\
\255\255\255\255\037\000\255\255\255\255\255\255\038\000\041\000\
\255\255\255\255\040\000\255\255\042\000\042\000\042\000\042\000\
\043\000\043\000\043\000\043\000\255\255\042\000\255\255\041\000\
\042\000\043\000\255\255\255\255\043\000\044\000\044\000\044\000\
\044\000\045\000\045\000\045\000\045\000\255\255\255\255\255\255\
\042\000\044\000\045\000\255\255\043\000\045\000\255\255\255\255\
\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
\255\255\044\000\255\255\255\255\255\255\045\000\255\255"
}
let rec token engine lexbuf =
......
......@@ -44,7 +44,7 @@ classes
}
let identchar = lowercase | uppercase | ascii_digit | '_' | '\''
let identchar = lowercase | uppercase | ascii_digit | '_' | '\'' | '-'
let ident = identchar* ( ':' identchar+)*
rule token = parse
......
......@@ -250,7 +250,9 @@ type X_head =
];;
type X_html = <html>[ (X_head X_body) ];;
match load_xml "tests/xhtml.xml" with X_html -> `Ok | _ -> `Not_ok;;
(*
match load_xml "tests/xhtml.xml" with x -> x;;
*)
......@@ -9,6 +9,9 @@ open Pxp_yacc
open Pxp_lexer_types
open Pxp_types
let mixed_table : ('a,unit) Hashtbl.t = Hashtbl.create 127
let regexp_table : ('a,unit) Hashtbl.t = Hashtbl.create 127
let import_dtd ppf name filename =
let rec regexp ppf = function
| Optional re -> Format.fprintf ppf "%a?" regexp re
......@@ -29,17 +32,59 @@ let import_dtd ppf name filename =
| Unspecified | Any -> Format.fprintf ppf "Any*"
| Empty -> Format.fprintf ppf ""
| Mixed l ->
let l = List.map
(function
| MPCDATA -> "Char"
| MChild s -> name s) l in
Format.fprintf ppf "( %s )*" (String.concat " | " l)
| Regexp r -> regexp ppf r
(try
Hashtbl.find mixed_table l;
Format.fprintf ppf "MIXED:CACHED!"
with Not_found ->
(* Hashtbl.add mixed_table l (); *)
let l = List.map
(function
| MPCDATA -> "Char"
| MChild s -> name s) l in
Format.fprintf ppf "( %s )*" (String.concat " | " l))
| Regexp r ->
(try
Hashtbl.find regexp_table r;
Format.fprintf ppf "REGEXP:CACHED!"
with Not_found ->
(* Hashtbl.add regexp_table r ();*)
regexp ppf r
)
in
let att_type ppf = function
| A_enum l ->
Format.fprintf ppf "(";
ignore
(List.fold_left
(fun first s ->
if not first then Format.fprintf ppf " | ";
Format.fprintf ppf "\"%s\"" s; false) true l);
Format.fprintf ppf ")"
| _ -> Format.fprintf ppf "String"
in
let attrib ppf e =
ignore
(List.fold_left
(fun first a ->
let (at,ad) = e # attribute a in
match ad with
| D_fixed _ -> first
| _ ->
Format.fprintf ppf "%s%s=%s%a"
(if first then "" else "; ")
a
(if ad = D_required then "" else "?")
att_type at;
false
)
true (e # attribute_names)
)
in
let elt ppf e =
Format.fprintf ppf "type @[<2>%s =@ @[<3><%s>[@ @[%a@]@ ]@]@];;@\n"
Format.fprintf ppf "type @[<2>%s =@ @[<3><%s %a>[@ @[%a@]@ ]@]@];;@\n"
(name (e # name))
(e # name)
attrib e
content (e # content_model)
in
let handle = function
......
......@@ -2,6 +2,20 @@ open Recursive
open Printf
open Ident
(* IDEAS for optimizations:
* optimize lines of dnf for products and record;
instead of
(t1,s1) & ... & (tn,sn) \ ....
use:
(t1 & ... & tn, s1 & ... & sn) \ ....
---> more compact representation, more sharing, ...
* re-consider using BDD-like representation instead of dnf
*)
let map_sort f l =
SortedList.from_list (List.map f l)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment