Library ATBR.DKA_Construction


Construction of automata (eNFA) from regular expressions.

We define a high-level algorithm first, which we prove correct. The efficient algorithm is then defined, and proved equivalent to the high-level one.

We also prove that is the given expression was in strict star form, then the resulting automata has well-founded epsilon transitions.

Require Import Common.
Require Import Classes.
Require Import Graph.
Require Import Monoid.
Require Import SemiLattice.
Require Import SemiRing.
Require Import KleeneAlgebra.
Require Import MxGraph.
Require Import MxSemiLattice.
Require Import MxSemiRing.
Require Import MxKleeneAlgebra.

Require Import DKA_Definitions.
Require Import DKA_CheckLabels.
Require Import StrictStarForm.

Require Import Utils_WF.
Require Import Relations.


Algebraic, not really efficient, presentation of the construction algorithm
Module Algebraic.

  Record pre_MAUT := mk {
    size: nat;
    delta: KMX size size
  }.

  Definition add (a : regex) (i f : nat) (A : pre_MAUT) : pre_MAUT := mk (delta A + mx_point _ _ i f a).

  Definition add_one i f A := add 1 i f A.

  Definition add_var i f c A := add (RegExp.var c) i f A.

  Definition empty := mk (0: KMX 2 2).

  Definition incr (A : pre_MAUT) : pre_MAUT := @mk (size A+1) (mx_blocks (delta A) 0 0 0).

  Fixpoint build e i f (A : pre_MAUT) : pre_MAUT :=
    match e with
      | RegExp.zero => A
      | RegExp.one => add_one i f A
      | RegExp.var c => add_var i f c A
      | RegExp.plus a b => build a i f (build b i f A)
      | RegExp.dot a b =>
        let p := size A in
          build a i p (build b p f (incr A))
      | RegExp.star a =>
        let p := size A in
          add_one i p (build a p p (add_one p f (incr A)))
    end.

  Definition to_MAUT i f A :=
    MAUT.mk (mx_point _ _ 0 i 1) (delta A) (mx_point _ _ f 0 1).

  Definition X_to_MAUT e := to_MAUT 0 1 (build e 0 1 empty).

below : proof of correctness

  Definition eval i f := to_MAUT i f >> MAUT.eval.

  Inductive equal : pre_MAUT -> pre_MAUT -> Prop :=
  | equal_refl: forall n (M N : KMX n n), M == N -> equal (mk M) (mk N).

  Infix " [=0=] " := equal (at level 80).

  Section protect.

  Local Instance equal_equiv : Equivalence equal.

  Local Instance to_MAUT_compat i f : Proper (equal ==> MAUT.equal) (to_MAUT i f).

  Local Instance eval_compat i f : Proper (equal ==> Classes.equal tt tt) (eval i f).

  Local Instance add_compat' : Proper (Classes.equal tt tt ==> (@eq nat) ==> @eq nat ==> equal ==> equal) add.
  Local Instance add_compat a i j : Proper (equal ==> equal) (add a i j).

  Local Instance incr_compat : Proper (equal ==> equal) incr.

  Local Instance build_compat a i f : Proper (equal ==> equal) (build a i f).

  Opaque star plus dot one zero.

  Notation belong s A := (s < size A)%nat.

  Lemma belong_incr: forall A s, belong s A -> belong s (incr A).
  Lemma belong_incr': forall A, belong (size A) (incr A).
  Lemma belong_add: forall i j b A s, belong s A -> belong s (add i j b A).
  Lemma belong_add_one: forall i j A s, belong s A -> belong s (add_one i j A).
  Lemma belong_add_var: forall i j n A s, belong s A -> belong s (add_var i j n A).
  Local Hint Resolve belong_incr belong_incr' belong_add belong_add_one belong_add_var.

  Lemma belong_build: forall a i j A s, belong s A -> belong s (build a i j A).
  Local Hint Resolve belong_build.

  Lemma add_comm : forall a b i f s t M, add a i f (add b s t M) [=0=] add b s t (add a i f M).

  Lemma add_plus : forall a b i f M, add (a+b) i f M [=0=] add a i f (add b i f M).

  Lemma add_zero : forall i f M, add 0 i f M [=0=] M.

  Lemma incr_add : forall a s t M, belong s M -> belong t M ->
    incr (add a s t M) [=0=] add a s t (incr M).

  Lemma build_add: forall b i j s t M a, belong s M -> belong t M ->
    build b i j (add a s t M) [=0=] add a s t (build b i j M).

  Lemma eval_select_block m x y z: forall (U: KMX x m) (A M: KMX m m) B C (D: KMX y y) (V: KMX m z),
    A+B*D#*C == M ->
     mx_blocks (x:=0) 0 0 U 0 * (mx_blocks A B C D)# * mx_blocks (y:=0) 0 V 0 0
     == U * M# * V.

  Lemma eval_master_theorem i f s t a b c: forall A,
    belong i A -> belong f A -> belong s A -> belong t A ->
    eval i f (add c (size A) (size A) (add a s (size A) (add b (size A) t (incr A))))
    == eval i f (add (a*c#*b) s t A).

  Lemma eval_dot i f s t a b: forall A,
    belong i A -> belong f A -> belong s A -> belong t A ->
    eval i f (add b (size A) t (add a s (size A) (incr A)))
    == eval i f (add (a*b) s t A).

  Lemma eval_star i f s t a A:
    belong i A -> belong f A -> belong s A -> belong t A ->
    eval i f (add a (size A) (size A) (add 1 s (size A) (add 1 (size A) t (incr A))))
    == eval i f (add (a#) s t A).

  Lemma build_correct: forall a A i f s t,
    belong i A -> belong f A -> belong s A -> belong t A ->
    eval i f (build a s t A) == eval i f (add a s t A).

Correctness of the high-level construction algorithm

  Theorem construction_correct: forall (e: regex), MAUT.eval (X_to_MAUT e) == e.
    Transparent dot star plus one zero Peano.minus.

  End protect.
End Algebraic.

Infix " [=0=] " := Algebraic.equal (at level 80).

Definition statelabelmap_set_to_fun d := fun ia => optionset_to_set (StateLabelMap.find ia d).

Module Concrete.

  Record pre_eNFA := mk {
    size: state;
next fresh state (= size)
epsilon-transitions
visible transitions
    max_label: label
maximal label + 1
  }.

  Definition augment (i: state) f eps :=
    StateMap.add i (StateSet.add f (statemap_set_to_fun eps i)) eps.

  Definition add_one i f (A: pre_eNFA) :=
    let '(mk s e d m) := A in
      mk s (augment i f e) d m.

  Definition augment_var ia j delt :=
    StateLabelMap.add ia (StateSet.add j (statelabelmap_set_to_fun delt ia)) delt.

  Definition add_var i f a (A: pre_eNFA) :=
    let '(mk s e d m) := A in
      mk s e (augment_var (i,a) f d) (max (S a) m).

  Definition incr (A: pre_eNFA): pre_eNFA:=
    let '(mk s e d m) := A in
      mk (S s) e d m.

  Fixpoint build e (i : state) (f: state) (A : pre_eNFA) : pre_eNFA :=
    match e with
      | RegExp.zero => A
      | RegExp.one => add_one i f A
      | RegExp.var c => add_var i f c A
      | RegExp.plus a b => build a i f (build b i f A)
      | RegExp.dot a b =>
        let p := size A in
          let A := incr A in
            build a i p (build b p f A)
      | RegExp.star a =>
        let p := size A in
          let A := incr A in
            add_one i p (build a p p (add_one p f A))
    end.

  Definition empty := mk 2 (StateMap.empty _) (StateLabelMap.empty _) O.

  Definition X_to_eNFA e :=
    let '(mk s e d m) := build e 0 1 empty in
      eNFA.mk s (statemap_set_to_fun e) d 0 1 m.

End Concrete.

Definition X_to_eNFA := Concrete.X_to_eNFA.

Module Correctness.

  Import Concrete.

  Notation belong s A := (below s (size A)).

  Definition epsilonfun A := statemap_set_to_fun (epsilonmap A).
  Definition epsilonbrel A := fun i j => StateSet.mem j (epsilonfun A i).
  Definition epsilon A: relation state := fun i j => epsilonbrel A i j = true.
  Notation epsilon_t A := (clos_trans state (epsilon A)).
  Notation epsilon_rt A := (clos_refl_trans state (epsilon A)).

  Definition wf A := well_founded (transp _ (epsilon A)).

  Definition deltafun A := statelabelmap_set_to_fun (deltamap A).
  Definition deltabrel A i a j := StateSet.mem j (deltafun A (i,a)).
  Definition delta A i a j := deltabrel A i a j = true.

  Ltac unfold_defs H :=
    unfold delta, deltabrel, deltafun, statelabelmap_set_to_fun,
      epsilon, epsilonbrel, epsilonfun, statemap_set_to_fun in H; simpl in H.

  Record bounded A : Prop := {
      bounded_delta: forall i a j, delta A i a j -> lt a (max_label A) /\ belong i A /\ belong j A;
      bounded_eps : forall i j, epsilon A i j -> belong i A /\ belong j A
  }.

  Definition preNFA_to_preMAUT (A: pre_eNFA): Algebraic.pre_MAUT :=
    let n := nat_of_state (size A) in
      Algebraic.mk
      (mx_bool _ n n (fun i j => epsilonbrel A (state_of_nat i) (state_of_nat j))
        + box n n (fun i j => labelling (max_label A)
                      (fun a => deltabrel A (state_of_nat i) a (state_of_nat j)))).

  Section protect.

  Transparent zero one plus dot star.

  Lemma belong_incr: forall A s, belong s A -> belong s (incr A).
  Lemma belong_incr': forall A, belong (size A) (incr A).
  Lemma belong_add_one: forall i j A s, belong s A -> belong s (add_one i j A).
  Lemma belong_add_var: forall i j n A s, belong s A -> belong s (add_var i j n A).
  Local Hint Resolve belong_incr belong_incr' belong_add_one belong_add_var.

  Lemma belong_build: forall a i j A s, belong s A -> belong s (build a i j A).
  Local Hint Resolve belong_build.

  Lemma epsilonbrel_add_one: forall i j A s t,
    epsilonbrel (add_one i j A) s t = epsilonbrel A s t || eq_state_bool i s && eq_state_bool j t.

  Lemma epsilon_add_one: forall i j A s t, epsilon (add_one i j A) s t <-> epsilon A s t \/ i=s /\ j=t.

  Lemma epsilon_add_var: forall i j n A s t, epsilon (add_var i j n A) s t <-> epsilon A s t.

  Lemma epsilon_incr: forall A s t, epsilon (incr A) s t <-> epsilon A s t.

  Lemma epsilon_empty: forall i j, ~ epsilon empty i j.

  Ltac sl_inject := repeat
    match goal with
      | H: StateLabel.P.compare _ _ = Eq |- _ =>
        apply StateLabel.P.reflect in H; injection H; intros; subst
      | H: StateLabel.P.compare ?x ?x = Eq -> False |- _ => elim H; apply StateLabel.eq_refl
    end.

  Lemma deltabrel_add_var: forall i j n A s t m,
    deltabrel (add_var i j n A) s m t =
    deltabrel A s m t || eq_label_bool n m && (eq_state_bool i s && eq_state_bool j t).

  Lemma delta_add_var: forall i j n A s t m,
    delta (add_var i j n A) s m t <-> delta A s m t \/ n=m /\ i=s /\ j=t.

  Lemma bounded_incr: forall A, bounded A -> bounded (incr A).
  Lemma bounded_add_one: forall i j A, belong i A -> belong j A -> bounded A -> bounded (add_one i j A).
  Lemma bounded_add_var: forall i j n A, belong i A -> belong j A -> bounded A -> bounded (add_var i j n A).
  Local Hint Resolve bounded_incr bounded_add_one bounded_add_var.

  Lemma bounded_build: forall a i j A, belong i A -> belong j A -> bounded A -> bounded (build a i j A).
  Local Hint Resolve bounded_build.

  Lemma bounded_empty: bounded empty.

  Lemma commute_add_one: forall i f A,
    bounded A -> belong i A -> belong f A ->
    Algebraic.add_one (nat_of_state i) (nat_of_state f) (preNFA_to_preMAUT A)
    [=0=] preNFA_to_preMAUT (add_one i f A).
    Opaque add_one.
    Transparent add_one.

  Lemma labelling_S: forall f n,
    labelling (Datatypes.S n) f == labelling n f + xif (f n) (RegExp.var (num_of_nat n): regex) 0.

  Lemma labelling_empty: forall f, (forall a, f a = false) -> forall n, labelling n f == 0.

  Lemma labelling_crop : forall n A i j,
    bounded A -> max_label A <= n ->
    labelling (max_label A) (fun a => deltabrel A i a j) == labelling n (fun a => deltabrel A i a j).

  Lemma leb_S: forall n, leb (S n) xH = false.
  Hint Rewrite leb_S : bool_simpl.

  Lemma labelling_add_var : forall n A i j s t c,
    labelling n (fun a => deltabrel (add_var s t c A) i a j)
    ==
    labelling n (fun a => deltabrel A i a j) +
    xif (eq_state_bool s i && eq_state_bool t j && leb (S c) n) (RegExp.var c: regex) 0.

  Lemma psurj A: A = mk (size A) (epsilonmap A) (deltamap A) (max_label A).

  Opaque Max.max.
  Lemma commute_add_var: forall n i f A,
    bounded A -> belong i A -> belong f A ->
    Algebraic.add_var (nat_of_state i) (nat_of_state f) n (preNFA_to_preMAUT A)
    [=0=] preNFA_to_preMAUT (add_var i f n A).
  Transparent Max.max.

  Lemma not_true_eq_false: forall b, ~ b = true -> b = false.

  Lemma commute_incr: forall A, bounded A ->
    Algebraic.incr (preNFA_to_preMAUT A) [=0=] preNFA_to_preMAUT (incr A).

  Local Hint Resolve
    Algebraic.equal_equiv Algebraic.eval_compat Algebraic.to_MAUT_compat
    Algebraic.add_compat Algebraic.build_compat Algebraic.incr_compat
    : typeclass_instances.

  Lemma commute_build: forall a i f A,
    bounded A -> belong i A -> belong f A ->
    Algebraic.build a (nat_of_state i) (nat_of_state f) (preNFA_to_preMAUT A)
    [=0=] preNFA_to_preMAUT (build a i f A).

  Lemma commute_empty: Algebraic.empty [=0=] preNFA_to_preMAUT empty.

  Lemma commute_build_empty: forall e,
    Algebraic.build e 0 1 Algebraic.empty [=0=] preNFA_to_preMAUT (build e 0 1 empty).

The two construction algorithms (high-level / efficient) are equivalent

  Theorem constructions_equiv: forall a,
    MAUT.equal (eNFA.to_MAUT (X_to_eNFA a)) (Algebraic.X_to_MAUT a).

  Lemma t_rt: forall T R i j, clos_trans T R i j -> clos_refl_trans T R i j.

  Lemma rt_t: forall T R i j, clos_refl_trans T R i j -> i=j \/ clos_trans T R i j.

  Lemma epsilon_t_add_one: forall i j A s t, epsilon_t (add_one i j A) s t ->
    epsilon_t A s t \/ epsilon_rt A s i /\ epsilon_rt A j t.

  Lemma epsilon_rt_add_one: forall i j A s t, epsilon_rt (add_one i j A) s t ->
    epsilon_rt A s t \/ epsilon_rt A s i /\ epsilon_rt A j t.

  Lemma epsilon_rt_incr: forall A s t, epsilon_rt (incr A) s t -> epsilon_rt A s t.

  Lemma not_epsilon_rt_incr_left: forall A s, epsilon_rt (incr A) s (size A) -> bounded A -> belong s A -> False.
  Lemma not_epsilon_rt_incr_right: forall A s, epsilon_rt (incr A) (size A) s -> bounded A -> belong s A -> False.
  Ltac not_epsilon :=
    match goal with
      | H: epsilon_rt (incr _) _ _ |- _ =>
        (elim (not_epsilon_rt_incr_left H) || elim (not_epsilon_rt_incr_right H)); solve [auto]
    end.

  Local Hint Constructors non_strict.
  Lemma epsilon_rt_build: forall a i j A s t,
    bounded A ->
    belong i A -> belong j A ->
    belong s A -> belong t A ->
    epsilon_rt (build a i j A) s t ->
    epsilon_rt A s t \/ (epsilon_rt A s i /\ epsilon_rt A j t /\ non_strict a).

  Lemma wf_add_one: forall A i j, wf A -> ~ epsilon_rt A j i -> wf (add_one i j A).

  Theorem wf_build: forall a, strict_star_form a ->
    forall i j A, bounded A -> belong i A -> belong j A -> wf A ->
      (epsilon_rt A j i -> strict a) -> wf (build a i j A).

  Lemma wf_empty: wf empty.

  End protect.
End Correctness.

Import Correctness Concrete.

Correctness of the construction algorithm

Theorem correct: forall (a: regex), eNFA.eval (X_to_eNFA a) == a.

Lemma bounded: forall (a: regex), eNFA.bounded (X_to_eNFA a).

The algorithm produces well-founded eNFAs when starting with expressions in strict star form
We finally show that expressions sharing the same set of labels yield eNFAs with the same max_label (theorem same_labels_max_label below). We test the equality of the sets of labels at the very beginning of decide_kleene (for efficiency reasons, in case of failure). Having the theorem below allows us to remove the need for a test on max_labels later on, in DKA_Merge, so that obtaining the completness of the procedure is easier.

Lemma max_label_add_one: forall i j A, max_label (add_one i j A) = max_label A.

Lemma max_label_add_var: forall i j a A, max_label (add_var i j a A) = max (S a) (max_label A).

Lemma max_label_incr: forall A, max_label (incr A) = max_label A.

Lemma build_max_label: forall a i e f A, i < max_label A -> i < max_label (build a e f A).

Lemma collect_max_label:
  forall a i acc, NumSet.In i (collect a acc) ->
    forall e f A, nat_of_num i < max_label (build a e f A) \/ NumSet.In i acc.

Lemma max_label_collect:
  forall a i e f A, i < max_label (build a e f A) ->
    (exists2 j, i<=j & NumSet.In (state_of_nat j) (collect a NumSet.empty)) \/ i < max_label A.

Lemma inf_leq: forall n m, (forall k, k<n -> k<m) -> n <= m.

Lemma le_antisym: forall n m: num, n <= m -> m <= n -> n = m.

Lemma max_label_X_to_eNFA a: eNFA.max_label (X_to_eNFA a) = max_label (build a 0 1 empty).

Theorem same_labels_max_label: forall a b, same_labels a b = true ->
  eNFA.max_label (X_to_eNFA a) = eNFA.max_label (X_to_eNFA b).