Commit e5996c86 authored by Pierre Letouzey's avatar Pierre Letouzey
Browse files

Autre projet possible : expressions régulieres et dérivées de Brzozowski

parent 57f847bd
Require Export Explore.
Import ListNotations.
Module RegEquivDec (Letter : FiniteOrderedType).
Include RegExplore(Letter).
(** Testing whether a regexp has an empty language *)
Definition is_empty r :=
negb (REs.exists_ is_nullable (exact_derivs r)).
Lemma is_empty_spec r :
is_empty r = true <-> r === Void.
Proof.
Admitted.
(** Now, we can test regexp equivalence via double inclusion *)
Definition is_equiv r r' :=
is_empty (Or (And r (Not r')) (And r' (Not r))).
Lemma is_equiv_spec r r' :
is_equiv r r' = true <-> r === r'.
Proof.
Admitted.
Lemma equiv_reflect r r' : reflect (r === r') (is_equiv r r').
Proof.
apply iff_reflect. symmetry. apply is_equiv_spec.
Qed.
Lemma equiv_dec r r' : { r === r' } + { ~ r === r' }.
Proof.
case (equiv_reflect r r'); [now left | now right].
Qed.
(** Exact list of derivatives up to "===" *)
(** Quadratic algorithm : [(derivs_sim_nb r)^2] calls to [is_equiv]. *)
Definition minimal_derivs r :=
removedup is_equiv (REs.elements (exact_derivs r)).
Definition derivs_equiv_nb r := length (minimal_derivs r).
(** Of course, we have less derivatives up to [equiv] than
up to [sim] (and less than the rough upper bound [derivs_bound]). *)
Lemma derivs_nb_comparison r :
derivs_equiv_nb r <= derivs_sim_nb r <= derivs_bound r.
Proof.
Admitted.
(** [minimal_derivs] has no duplicates up to [equiv] *)
Lemma minimal_derivs_nodup r : NoDupA equiv (minimal_derivs r).
Proof.
Admitted.
(** [minimal_derivs] is an exact list of derivatives up to [equiv] *)
Lemma minimal_derivs_correct r r' :
In r' (minimal_derivs r) -> exists w, r' === r//w.
Proof.
Admitted.
Lemma minimal_derivs_complete r :
AllDerivsInMod equiv r (minimal_derivs r).
Proof.
Admitted.
(** [minimal_derivs] is indeed minimal *)
Lemma minimal_derivs_minimal r l :
AllDerivsInMod equiv r l -> derivs_equiv_nb r <= length l.
Proof.
Admitted.
(** Note : it is possible to prove that [derivs_equiv_nb r] is the size
of the smallest deterministic automata recognizing the language of
[r] (with total transition function).
And actually the sets of derivatives found above (either modulo
[sim] or [equal]) could be used as the states of an automata
(either the minimal one or a good approximate of it).
The transition are r--a-->r' iff r' =~= r/a (or r' === r/a).
But that's another story...
*)
(** Note : a simple and fast test of emptyness is possible
as long as the regexp doesn't contain conjunction or complement.
But for testing the emptyness of [Not r], we should
study whether [r] is the full universe or not, and for [And r s]
we should study whether the languages of [r] and [s] do
intersect or not. Both questions are non-trivial.
It has been proved that testing equivalence of regexps with
[Not] and [And] is of non-elementary complexity.
*)
End RegEquivDec.
Require Export Finiteness.
Import ListNotations.
(** * Generating the exact set of derivatives of a regexp *)
(** In Finiteness.v, we've built an over-approximation of these
derivatives. Now, let's be exact. *)
Module RegExplore (Letter : FiniteOrderedType).
Include FiniteDerivs(Letter).
Implicit Type a : Letter.t.
(** From a given regexp r, the set of all [canon (r/a)] for all
possible letters a. *)
Definition allderiv1 r :=
list2set (List.map (fun a => canon (r/a)) Letter.enumeration).
Lemma allderiv1_in r x :
REs.In x (allderiv1 r) <-> exists a, x = canon (r/a).
Proof.
Admitted.
(** We proceed via a graph exploration. Nodes are regexps, edges are
letters, there is an edge [a] from [r] to [s] if [canon (r/a) = s].
We start with our initial regexp, and we stop when nothing new could
be reached. Note: this graph is actually the automata recognizing
our regexp, but we do not build it explicitely here.
- [n] is the depth of our search, to convince Coq we'll stop someday
- [explored] is a set of derivatives we've finished visiting :
all their derivatives by a letter are in [explored] or [seen]
- [seen] is a set of derivatives we still have to handle :
we haven't checked yet whether their own derivatives are new or not.
*)
Fixpoint explore n explored seen :=
match n with
| 0 => REs.empty (* abort *)
| S n =>
let seen' := REs.diff seen explored in
match REs.choose seen' with
| None => explored (* finished *)
| Some x =>
explore n (REs.add x explored)
(REs.union (allderiv1 x) (REs.remove x seen'))
end
end.
Definition try_exact_derivs r n :=
explore n REs.empty (REs.singleton (canon r)).
(** Normally, we launch explore with [derivs_bound], to be sure to
converges. But this bound could be horribly slow to compute... *)
Definition slow_exact_derivs r :=
try_exact_derivs r (S (derivs_bound r)).
(** Pragmatic approach : 100 loops are usually enough.
If not, we restart with the full bound and wait... *)
Definition exact_derivs r :=
let s := try_exact_derivs r 100 in
if REs.is_empty s then slow_exact_derivs r (* 100 wasn't enough *)
else s.
Definition Deriv r r' := exists w, r' = canon (r//w).
Definition DerivSet r set := forall r', REs.In r' set -> Deriv r r'.
Definition ExactDerivSet r set := forall r', REs.In r' set <-> Deriv r r'.
Definition Hereditary set1 set2 :=
forall r a, REs.In r set1 -> REs.In (canon (r/a)) set2.
Instance : Proper (eq ==> REs.Equal ==> iff) DerivSet.
Proof.
intros r r' <- set set' E. unfold DerivSet. now setoid_rewrite E.
Qed.
Instance : Proper (REs.Equal ==> REs.Equal ==> iff) Hereditary.
Proof.
intros set1 set1' E1 set2 set2' E2. unfold Hereditary.
setoid_rewrite E1; setoid_rewrite E2; easy.
Qed.
Lemma a_set_equation x explored seen :
REs.Equal
(REs.union (REs.add x explored)
(REs.union (allderiv1 x)
(REs.remove x (REs.diff seen explored))))
(REs.union (allderiv1 x) (REs.add x (REs.union explored seen))).
Proof.
intro y. generalize (allderiv1_in x y). REsdec.
Qed.
Definition PreCondition r explored seen :=
let all := REs.union explored seen in
REs.In (canon r) all /\
DerivSet r all /\
Hereditary explored all.
Lemma PreCondition_init r :
PreCondition r REs.empty (REs.singleton (canon r)).
Proof.
Admitted.
Lemma PreCondition_next r explored seen x :
let seen' := REs.diff seen explored in
REs.In x seen' ->
PreCondition r explored seen ->
PreCondition r (REs.add x explored)
(REs.union (allderiv1 x) (REs.remove x seen')).
Proof.
Admitted.
(** Here it could help to proceed by induction over the [rev] of some word.
Feel free to add any intermediate lemmas that might help you. *)
Lemma hereditary_all_derivs r s set :
REs.In (canon r) set ->
Hereditary set set ->
Deriv r s -> REs.In s set.
Proof.
Admitted.
Lemma explore_partial_spec r n explored seen :
PreCondition r explored seen ->
let out := explore n explored seen in
REs.Empty out \/ ExactDerivSet r out.
Proof.
Admitted.
Lemma explore_converges r n explored seen :
derivs_bound r < n + REs.cardinal explored ->
PreCondition r explored seen ->
~REs.Empty (explore n explored seen).
Proof.
Admitted.
Lemma explore_spec r n explored seen :
derivs_bound r < n + REs.cardinal explored ->
PreCondition r explored seen ->
ExactDerivSet r (explore n explored seen).
Proof.
Admitted.
Lemma slow_exact_derivs_spec r :
ExactDerivSet r (slow_exact_derivs r).
Proof.
Admitted.
Lemma exact_derivs_spec r :
ExactDerivSet r (exact_derivs r).
Proof.
Admitted.
Lemma exact_derivs_alt r :
REs.Equal (exact_derivs r) (slow_exact_derivs r).
Proof.
Admitted.
Lemma exact_derivs_init r : REs.In (canon r) (exact_derivs r).
Proof.
Admitted.
(** The exact count of derivatives of [r] not identified by [=~=] *)
Definition derivs_sim_nb r := REs.cardinal (exact_derivs r).
Lemma derivs_sim_nb_nz r : derivs_sim_nb r <> 0.
Proof.
Admitted.
(** Any list of derivatives without duplicates
is smaller than [exact_derivs]. *)
Definition DerivList r l := forall r', In r' l -> Deriv r r'.
Lemma derivs_smaller r l :
NoDup l -> DerivList r l -> length l <= derivs_sim_nb r.
Proof.
Admitted.
(** Any complete list of derivatives is larger than [exact_derivs]. *)
Lemma derivs_larger r l :
AllDerivsIn r l -> derivs_sim_nb r <= length l.
Proof.
Admitted.
(** In particular, the first bound on derivatives is (way) larger
than [derivs_sim_nb]. *)
Lemma exact_below_bound r : derivs_sim_nb r <= derivs_bound r.
Proof.
Admitted.
(** Same results using [sim] instead of [canon]. *)
Lemma NoDupA_sim_canon l : NoDupA sim l -> NoDup (map canon l).
Proof.
Admitted.
Lemma derivs_sim_smaller r l :
NoDupA sim l -> (forall r', In r' l -> exists w, r' =~= r//w) ->
length l <= derivs_sim_nb r.
Proof.
Admitted.
Lemma derivs_sim_larger r l :
AllDerivsInMod sim r l -> derivs_sim_nb r <= length l.
Proof.
Admitted.
End RegExplore.
Require Export Similar.
Import ListNotations.
(** * A regexp has a finite number of canonical derivatives *)
(** See file Similar.v for the definition of the canonical form *)
(** For proving that, we explicitely generate a list of regexps
containing (at least) all the derivatives. This list is an
over-approximation, we'll see later how to be exact in Explore.v *)
Module FiniteDerivs (Letter : FiniteOrderedType).
Include RegSim(Letter).
(** Two helper functions *)
Definition mixcat r l1 l2 :=
let f '(s,l2b) := canon (Or (Cat s r) (OR.mk l2b)) in
List.map f (product l1 (sublists l2)).
Definition mixstar r l :=
let f l := canon (OR.mk (List.map (fun s => Cat s (Star r)) l)) in
List.map f (sublists l).
Definition mkOr '(r1,r2) := sOr r1 r2.
Definition mkAnd '(r1,r2) := sAnd r1 r2.
(** First, an over-approximation : all canonical derivatives of [r]
will be in [over_derivs r], but this list may also contain some
non-derivatives. *)
Fixpoint over_derivs r := match r with
| Void => [Void]
| Epsilon => [Void; Epsilon]
| Letter a => [Void; Epsilon; Letter a]
| Cat r1 r2 => mixcat r2 (over_derivs r1) (over_derivs r2)
| Star r' => canon r :: mixstar r' (over_derivs r')
| Or r1 r2 => map mkOr (product (over_derivs r1) (over_derivs r2))
| And r1 r2 => map mkAnd (product (over_derivs r1) (over_derivs r2))
| Not r => map Not (over_derivs r)
end.
(** The size of this over-approximation *)
Fixpoint derivs_bound r :=
match r with
| Void => 1
| Epsilon => 2
| Letter _ => 3
| Cat r1 r2 => (derivs_bound r1) * 2^(derivs_bound r2)
| Star r => S (2^(derivs_bound r))
| Or r1 r2 => (derivs_bound r1) * (derivs_bound r2)
| And r1 r2 => (derivs_bound r1) * (derivs_bound r2)
| Not r => derivs_bound r
end.
Lemma derivs_bound_ok r :
length (over_derivs r) = derivs_bound r.
Proof.
Admitted.
(** The predicate we're trying to establish on [over_derivs] *)
Definition AllDerivsIn r l := forall w, In (canon (r//w)) l.
(** Results about [mixcat] *)
Lemma mixcat_in r r2 l1 l2 :
In r (mixcat r2 l1 l2) <->
exists s l2b,
canon (Or (Cat s r2) (OR.mk l2b)) = r /\ In s l1 /\ Incl l2b l2.
Proof.
Admitted.
Lemma mixcat_stable_1 r r2 l1 l2 : Canonical r ->
In r l1 ->
In (sCat r (canon r2)) (mixcat r2 l1 l2).
Proof.
Admitted.
Lemma mixcat_stable_2 r r' r2 l1 l2 : Canonical r' ->
In r (mixcat r2 l1 l2) ->
In r' l2 ->
In (sOr r r') (mixcat r2 l1 l2).
Proof.
Admitted.
Lemma mixcat_gen r1 r2 l1 l2 :
AllDerivsIn r1 l1 ->
AllDerivsIn r2 l2 ->
forall w',
AllDerivsIn (Cat (r1 // w') r2) (mixcat r2 l1 l2).
Proof.
Admitted.
Lemma mixcat_ok r1 r2 l1 l2 :
AllDerivsIn r1 l1 ->
AllDerivsIn r2 l2 ->
AllDerivsIn (Cat r1 r2) (mixcat r2 l1 l2).
Proof.
Admitted.
(** Results about [mixstar] *)
Lemma mixstar_in r' r l :
In r' (mixstar r l) <->
exists l',
canon (OR.mk (map (fun s => Cat s (Star r)) l')) = r' /\ Incl l' l.
Proof.
Admitted.
Lemma mixstar_stable_1 r l r' :
In (canon r') l ->
In (canon (Cat r' (Star r))) (mixstar r l).
Proof.
Admitted.
Lemma mixstar_stable_2 r r1 r2 l :
In r1 (mixstar r l) ->
In r2 (mixstar r l) ->
In (sOr r1 r2) (mixstar r l).
Proof.
Admitted.
Lemma mixstar_gen r l :
AllDerivsIn r l ->
forall w',
AllDerivsIn (Cat (r // w') (Star r)) (mixstar r l).
Proof.
Admitted.
Lemma mixstar_ok r l :
AllDerivsIn r l ->
AllDerivsIn (Star r) (canon (Star r) :: mixstar r l).
Proof.
Admitted.
(** Main theorem : our over approximation indeed contains
all canonical derivatives *)
Lemma finite_derivatives r : AllDerivsIn r (over_derivs r).
Proof.
Admitted.
(** Same result, expressed with [sim] instead of [canon] *)
Definition AllDerivsInMod (R:re->re->Prop) r l :=
forall w, InModulo R (r//w) l.
Lemma finite_derivatives' r :
AllDerivsInMod sim r (over_derivs r).
Proof.
Admitted.
(** Finiteness statement based on [equiv] (full regexp equivalence
based on the languages) instead of [sim] *)
Lemma finite_derivatives_equiv r :
AllDerivsInMod equiv r (over_derivs r).
Proof.
Admitted.
End FiniteDerivs.
Require Export Bool List Equalities Orders Setoid Morphisms.
Import ListNotations.
(** * Languages are sets of words over some type of letters *)
Module Lang (Letter : Typ).
Definition word := list Letter.t.
Definition t := word -> Prop.
Bind Scope lang_scope with t.
Delimit Scope lang_scope with lang.
Local Open Scope lang_scope.
Implicit Type a : Letter.t.
Implicit Type w x y z : word.
Implicit Type L : t.
Definition eq L L' := forall x, L x <-> L' x.
Definition void : t := fun _ => False.
Definition epsilon : t := fun w => w = [].
Definition singleton a : t := fun w => w = [a].
Definition cat L L' : t :=
fun x => exists y z, x = y++z /\ L y /\ L' z.
Definition union L L' : t := fun w => L w \/ L' w.
Definition inter L L' : t := fun w => L w /\ L' w.
Fixpoint power L n : t :=
match n with
| 0 => epsilon
| S n' => cat L (power L n')
end.
(** Kleene's star *)
Definition star L : t := fun w => exists n, power L n w.
(** language complement *)
Definition comp L : t := fun w => ~(L w).
(** Languages : notations **)
Module Notations.
Infix "==" := Lang.eq (at level 70).
Notation "∅" := void : lang_scope. (* \emptyset *)
Notation "'ε'" := epsilon : lang_scope. (* \epsilon *)
Coercion singleton : Letter.t >-> Lang.t.
Infix "·" := cat (at level 35) : lang_scope. (* \cdot *)
Infix "∪" := union (at level 50) : lang_scope. (* \cup *)
Infix "∩" := inter (at level 40) : lang_scope. (* \cap *)
Infix "^" := power : lang_scope.
Notation "L ★" := (star L) (at level 30) : lang_scope. (* \bigstar *)
Notation "¬ L" := (comp L) (at level 65): lang_scope. (* \neg *)
End Notations.
Import Notations.
(** Technical stuff to be able to rewrite with respect to "==" *)
Instance : Equivalence eq.
Proof. firstorder. Qed.
Instance cat_eq : Proper (eq ==> eq ==> eq) cat.
Proof. firstorder. Qed.
Instance inter_eq : Proper (eq ==> eq ==> eq) inter.
Proof. firstorder. Qed.
Instance union_eq : Proper (eq ==> eq ==> eq) union.
Proof. firstorder. Qed.
Instance comp_eq : Proper (eq ==> eq) comp.
Proof. firstorder. Qed.
Instance power_eq : Proper (eq ==> Logic.eq ==> eq) power.
Proof.
intros x x' Hx n n' <-. induction n; simpl; now rewrite ?IHn, ?Hx.
Qed.
Instance cat_eq' : Proper (eq ==> eq ==> Logic.eq ==> iff) cat.
Proof. intros x x' Hx y y' Hy w w' <-. now apply cat_eq. Qed.
Instance inter_eq' : Proper (eq ==> eq ==> Logic.eq ==> iff) inter.
Proof. intros x x' Hx y y' Hy w w' <-. now apply inter_eq. Qed.
Instance union_eq' : Proper (eq ==> eq ==> Logic.eq ==> iff) union.
Proof. intros x x' Hx y y' Hy w w' <-. now apply union_eq. Qed.
Instance comp_eq' : Proper (eq ==> Logic.eq ==> iff) comp.
Proof. intros x x' Hy w w' <-. now apply comp_eq. Qed.
Instance power_eq' : Proper (eq ==> Logic.eq ==> Logic.eq ==> iff) power.
Proof. intros x x' Hx n n' <- w w' <-. now apply power_eq. Qed.
Instance star_eq : Proper (eq ==> eq) star.
Proof.
intros x x' Hx w. unfold star. now setoid_rewrite <- Hx.
Qed.
Instance star_eq' : Proper (eq ==> Logic.eq ==> iff) star.
Proof. intros x x' Hx w w' <-. now apply star_eq. Qed.
(** Languages : misc properties *)
Lemma cat_void_l L : · L == .
Proof.
Admitted.
Lemma cat_void_r L : L · == .
Proof.
Admitted.