Library ATBR.Model_RegExp


Syntactic model of regular expressions

Terms of arbitrary Kleene algebras will be reified into this one, which is syntactic, and allows us to define automata constructions.

We also prove the untyping theorem for Kleene algebras in this module, to obtain the above reification.

We define the kleene_clean_zeros tactic, to remove zeros from KA expressions.

Require Import Common.
Require Import Classes.
Require Import Graph.
Require Import SemiLattice.
Require Import SemiRing.
Require Import KleeneAlgebra.
Require Import MxGraph.
Require Reification.


Module RegExp.

  Inductive regex :=
  | one: regex
  | zero: regex
  | dot: regex -> regex -> regex
  | plus: regex -> regex -> regex
  | star: regex -> regex
  | var: positive -> regex
    .

free equality, generated by KA axioms
  Inductive equal: regex -> regex -> Prop :=
  | refl_one: equal one one
  | refl_zero: equal zero zero
  | refl_var: forall i, equal (var i) (var i)

  | dot_assoc: forall x y z, equal (dot x (dot y z)) (dot (dot x y) z)
  | dot_neutral_left: forall x, equal (dot one x) x
  | dot_neutral_right: forall x, equal (dot x one) x

  | plus_neutral_left: forall x, equal (plus zero x) x
  | plus_idem: forall x, equal (plus x x) x
  | plus_assoc: forall x y z, equal (plus x (plus y z)) (plus (plus x y) z)
  | plus_com: forall x y, equal (plus x y) (plus y x)

  | dot_ann_left: forall x, equal (dot zero x) zero
  | dot_ann_right: forall x, equal (dot x zero) zero
  | dot_distr_left: forall x y z, equal (dot (plus x y) z) (plus (dot x z) (dot y z))
  | dot_distr_right: forall x y z, equal (dot x (plus y z)) (plus (dot x y) (dot x z))

  | star_make_left: forall a, equal (plus one (dot (star a) a)) (star a)
  | star_make_right: forall a, equal (plus one (dot a (star a))) (star a)
  | star_destruct_left: forall a x, equal (plus (dot a x) x) x -> equal (plus (dot (star a) x) x) x
  | star_destruct_right: forall a x, equal (plus (dot x a) x) x -> equal (plus (dot x (star a)) x) x

  | dot_compat: forall x x', equal x x' -> forall y y', equal y y' -> equal (dot x y) (dot x' y')
  | plus_compat: forall x x', equal x x' -> forall y y', equal y y' -> equal (plus x y) (plus x' y')
  | star_compat: forall x x', equal x x' -> equal (star x) (star x')
  | equal_trans: forall x y z, equal x y -> equal y z -> equal x z
  | equal_sym: forall x y, equal x y -> equal y x
    .

  Lemma equal_refl: forall x, equal x x.

  Definition is_zero t := match t with zero => true | _ => false end.
  Definition is_one t := match t with one => true | _ => false end.

  Lemma Is_zero: forall t, is_zero t = true -> t = zero.

  Lemma Is_one: forall t, is_one t = true -> t = one.

  Ltac leaf x :=
    match x with
      | context [is_one ?u] => fail 1
      | context [is_zero ?u] => fail 1
      | _ => idtac
    end.

  Ltac destruct_tests :=
    repeat (
      repeat match goal with
               | H: is_zero ?x = _ |- _ => rewrite (Is_zero H) in * || rewrite H in *
               | H: is_one ?x = _ |- _ => rewrite (Is_one H) in * || rewrite H in *
               | H: ?x = ?x |- _ => clear H
               | H: ?x <> ?y |- _ => solve [elimtype False; apply H; trivial]
             end;
      repeat match goal with
               | |- context[is_zero ?x] => leaf x; let Z := fresh "Z" in case_eq (is_zero x); intro Z
               | |- context[is_one ?x] => leaf x; let O := fresh "O" in case_eq (is_one x); intro O
             end;
      try discriminate).


  Section Def.

    Program Instance re_Graph: Graph := {
      T := unit;
      X A B := regex;
      equal A B := RegExp.equal
    }.

    Instance re_SemiLattice_Ops: SemiLattice_Ops re_Graph:= {
      plus A B := RegExp.plus;
      zero A B := RegExp.zero
    }.

    Instance re_Monoid_Ops: Monoid_Ops re_Graph := {
      dot A B C := RegExp.dot;
      one A := RegExp.one
    }.

    Instance re_Star_Op: Star_Op re_Graph := {
      star A := RegExp.star
    }.

    Instance re_SemiLattice: SemiLattice re_Graph.

    Instance re_Monoid: Monoid re_Graph.

    Instance re_SemiRing: IdemSemiRing re_Graph.

    Instance re_KleeneAlgebra: KleeneAlgebra re_Graph.

  End Def.

  Module Load.


    Canonical Structure re_Graph.

    Import Classes.

    Notation tt := (tt: @T re_Graph).
    Notation regex := (@X re_Graph tt tt).
    Notation KMX n m := (@X (@mx_Graph re_Graph tt) (n%nat) (m%nat)).
    Notation var i := (var i: regex).

    Transparent equal plus dot one zero star.
    Global Opaque T.

    Ltac fold_regex :=
      change RegExp.equal with (@equal re_Graph tt tt) ;
      change RegExp.one with (@one re_Graph re_Monoid_Ops tt) ;
        change RegExp.dot with (@dot re_Graph re_Monoid_Ops tt tt tt) ;
          change RegExp.zero with (@zero re_Graph re_SemiLattice_Ops tt tt) ;
            change RegExp.plus with (@plus re_Graph re_SemiLattice_Ops tt tt) ;
              change RegExp.star with (@star re_Graph re_Star_Op tt).

  End Load.

Cleaning regular expressions so that they no longer contain zeros (but the last if the expression reduces to zero ...)
  Module Clean.

    Import Load.
    Section S.

      Let cleaning_dot x y :=
        if is_zero x then zero
          else if is_zero y then zero
            else dot x y.
      Let cleaning_plus x y :=
        if is_zero x then y
          else if is_zero y then x
            else plus x y.
      Let cleaning_star x :=
        if is_zero x then one else star x.


clean x removes all zeros from x (but the last one, if the expression reduces to zero...)
      Fixpoint rewrite (x: regex): regex :=
        match x with
          | dot x y => cleaning_dot (rewrite x) (rewrite y)
          | plus x y => cleaning_plus (rewrite x) (rewrite y)
          | star x => cleaning_star (rewrite x)
          | x => x
        end.

      Lemma clean_dot: forall (e f: regex), (cleaning_dot e f: regex) == e * f.

      Lemma clean_plus: forall (e f: regex), (cleaning_plus e f: regex) == e + f.

      Lemma clean_star: forall (e f: regex), (cleaning_star e: regex) == e #.

the rewriting procedure is correct
      Theorem correct: forall e, e == rewrite e.

    End S.


rewrite is idempotent
    Lemma rewrite_idem: forall x, rewrite (rewrite x) = rewrite x.

two equal terms equally rewrite two zero
    Lemma equal_rewrite_zero_equiv : forall x y, equal x y -> is_zero (rewrite x) = is_zero (rewrite y).

  End Clean.

Untyping theorem for Kleene algebra, typed reification
  Module Untype.

    Section protect.

    Notation clean := Clean.rewrite.

first we show that equality proofs can be factorised, so as to use the annihilation laws at first

strong equality, without annihilation
    Inductive sequal: regex -> regex -> Prop :=
    | sequal_refl_one: sequal one one
    | sequal_refl_zero: sequal zero zero
    | sequal_refl_var: forall i, sequal (var i) (var i)
      
    | sequal_dot_assoc: forall x y z, sequal (dot x (dot y z)) (dot (dot x y) z)
    | sequal_dot_neutral_left: forall x, sequal (dot one x) x
    | sequal_dot_neutral_right: forall x, sequal (dot x one) x
    | sequal_dot_distr_left: forall x y z, is_zero (clean z) = false -> sequal (dot (plus x y) z) (plus (dot x z) (dot y z))
    | sequal_dot_distr_right: forall x y z, is_zero (clean x) = false -> sequal (dot x (plus y z)) (plus (dot x y) (dot x z))
    
    | sequal_plus_assoc: forall x y z, sequal (plus x (plus y z)) (plus (plus x y) z)
    | sequal_plus_idem: forall x, sequal (plus x x) x
    | sequal_plus_com: forall x y, sequal (plus x y) (plus y x)
    
    | sequal_star_make_left: forall a, sequal (plus one (dot (star a) a)) (star a)
    | sequal_star_make_right: forall a, sequal (plus one (dot a (star a))) (star a)
    | sequal_star_destruct_left: forall a x, is_zero (clean x) = false -> sequal (plus (dot a x) x) x -> sequal (plus (dot (star a) x) x) x
    | sequal_star_destruct_right: forall a x, is_zero (clean x) = false -> sequal (plus (dot x a) x) x -> sequal (plus (dot x (star a)) x) x
    
    | sequal_dot_compat: forall x x', sequal x x' -> forall y y', sequal y y' -> sequal (dot x y) (dot x' y')
    | sequal_plus_compat: forall x x', sequal x x' -> forall y y', sequal y y' -> sequal (plus x y) (plus x' y')
    | sequal_star_compat: forall x x', sequal x x' -> sequal (star x) (star x')
    | sequal_trans: forall x y z, sequal x y -> sequal y z -> sequal x z
    | sequal_sym: forall x y, sequal x y -> sequal y x
        .

    Lemma sequal_equal x y: sequal x y -> equal x y .

    Lemma sequal_refl: forall x, sequal x x.
    Local Hint Resolve sequal_refl.
    Local Hint Constructors sequal.

    Lemma sequal_clean_zero_equiv x : sequal (clean x) zero -> is_zero (clean x) = true.

factorisation theorem
    Theorem equal_to_sequal : forall x y, equal x y -> sequal (clean x) (clean y).



Erasure funciton, from typed syntax (reified) to the above untyped syntax
    Section erase.

      Context `{env: Reification.Env}.
      Import Reification.KA.

erasure function, from typed syntax to untyped syntax
      Fixpoint erase n m (x: X n m): regex :=
        match x with
          | dot _ _ _ x y => RegExp.dot (erase x) (erase y)
          | plus _ _ x y => RegExp.plus (erase x) (erase y)
          | star _ x => RegExp.star (erase x)
          | zero _ _ => RegExp.zero
          | one _ => RegExp.one
          | var i => RegExp.var i
        end.

    End erase.

    Section faithful.

      Import Reification Classes.
      Context `{KA: KleeneAlgebra} {env: Env}.
      Notation feval := KA.eval.

      Inductive eval: forall A B, regex -> X (typ A) (typ B) -> Prop :=
      | e_one: forall A, @eval A A RegExp.one 1
      | e_zero: forall A B, @eval A B RegExp.zero 0
      | e_dot: forall A B C x y x' y', @eval A B x x' -> @eval B C y y' -> @eval A C (RegExp.dot x y) (x'*y')
      | e_plus: forall A B x y x' y', @eval A B x x' -> @eval A B y y' -> @eval A B (RegExp.plus x y) (x'+y')
      | e_star: forall A x x', @eval A A x x' -> @eval A A (RegExp.star x) (x'#)
      | e_var: forall i, eval (RegExp.var i) (unpack (val i)).
      Implicit Arguments eval [].
      Local Hint Constructors eval.

evaluation of erased terms
      Lemma eval_erase_feval: forall n m a, eval n m (erase a) (feval a).

inversion lemmas about evaluations
      Lemma eval_dot_inv: forall n p u v c, eval n p (RegExp.dot u v) c ->
        exists m, exists a, exists b, c = a*b /\ eval n m u a /\ eval m p v b.

      Lemma eval_one_inv: forall n m c, eval n m RegExp.one c -> c [=] one (typ n) /\ n=m.

      Lemma eval_plus_inv: forall n m x y z, eval n m (RegExp.plus x y) z ->
        exists x', exists y', z=x'+y' /\ eval n m x x' /\ eval n m y y'.

      Lemma eval_zero_inv: forall n m z, eval n m RegExp.zero z -> z=0.

      Lemma eval_star_inv: forall n m x z, eval n m (RegExp.star x) z -> exists x', z [=] x'# /\ eval n n x x' /\ n=m.

      Lemma eval_var_inv: forall n m i c, eval n m (RegExp.var i) c -> c [=] unpack (val i) /\ n=src_p (val i) /\ m=tgt_p (val i).

      Ltac eval_inversion :=
        repeat match goal with
                 | H : eval _ _ ?x _ |- _ => eval_inversion_aux H x
               end
        with eval_inversion_aux H x :=
          let H1 := fresh in
            match x with
              | RegExp.dot _ _ => destruct (eval_dot_inv H) as (?&?&?&H1&?&?); subst; try rewrite H1
              | RegExp.one => destruct (eval_one_inv H) as [H1 ?]; auto; subst; apply eqd_inj in H1; subst
              | RegExp.zero => pose proof (eval_zero_inv H); subst
              | RegExp.plus _ _ => destruct (eval_plus_inv H) as (?&?&H1&?&?); auto; subst
              | RegExp.star _ => destruct (eval_star_inv H) as (?&H1&?&?); auto; subst; apply eqd_inj in H1; subst
              | RegExp.var _ => destruct (eval_var_inv H) as (H1&?&?); auto; subst; apply eqd_inj in H1; subst
            end; clear H.

      Lemma eval_type_inj_left: forall A A' B x z z', eval A B x z -> eval A' B x z' -> A=A' \/ clean x = RegExp.zero.

      Lemma eval_type_inj_right: forall A B B' x z z', eval A B x z -> eval A B' x z' -> B=B' \/ clean x = RegExp.zero.

      Lemma eval_clean_zero: forall x A B z, eval A B x z -> RegExp.is_zero (clean x) = true -> z==0.

      Lemma eval_clean: forall A B x y, eval A B x y -> exists2 z, eval A B (clean x) z & y==z.


      Lemma eval_inj: forall A B x y z, eval A B x y -> eval A B x z -> y==z.


      Lemma and_idem: forall (A: Prop), A -> A/\A.

      Ltac split_IHeval :=
        repeat match goal with
                 | H: (forall A B x', eval A B ?x x' -> _) /\ _ ,
                   Hx: eval ?A ?B ?x ?x' |- _ => destruct (proj1 H _ _ _ Hx); clear H
                 | H: _ /\ forall A B x', eval A B ?x x' -> _ ,
                   Hx: eval ?A ?B ?x ?x' |- _ => destruct (proj2 H _ _ _ Hx); clear H
               end;
        repeat match goal with
                 | H: (forall A B x', eval A B ?x x' -> _)
                   /\ (forall A B y', eval A B ?y y' -> _) |- _ => destruct H
               end.


      Ltac eval_injection :=
        repeat match goal with
                 | H: eval ?A ?B ?x ?z , H': eval ?A ?B ?x ?z' |- _ => rewrite (eval_inj H H') in *; clear H
               end.

      Lemma eval_sequal: forall x y, sequal x y -> forall A B x', eval A B x x' -> exists2 y', eval A B y y' & x'==y'.


untyping theorem
      Theorem equal_eval: forall x' y', RegExp.equal x' y'->
        forall A B x y, eval A B x' x -> eval A B y' y -> x==y.

      Theorem erase_faithful: forall n m (a b: KA.X n m),
        RegExp.equal (erase a) (erase b) -> feval a == feval b.

      Lemma normalizer {n} {m} {R} `{T: Transitive (Classes.X (typ n) (typ m)) R} `{H: subrelation _ (equal _ _) R}
        norm (Hnorm: forall x, RegExp.equal x (norm x)):
        forall a b a' b',
          
          (let na := norm (erase a) in eval n m na a') ->
          (let nb := norm (erase b) in eval n m nb b') ->
          R a' b' -> R (feval a) (feval b).

    End faithful.
    End protect.
  End Untype.
End RegExp.
Import RegExp.

Ltac kleene_normalize_ Hnorm :=
  let t := fresh "t" in
  let e := fresh "e" in
  let l := fresh "l" in
  let r := fresh "r" in
  let x := fresh "x" in
  let solve_eval :=
    intro x; vm_compute in x; subst x;
      repeat econstructor;
        match goal with |- Untype.eval (var ?i) _ => eapply (Untype.e_var (env:=e) i) end
  in
    kleene_reify; intros t e l r;
      eapply (Untype.normalizer Hnorm);
        [ solve_eval |
          solve_eval |
            compute [t e Reification.unpack Reification.val Reification.typ
              Reification.tgt Reification.src Reification.tgt_p Reification.src_p
              Reification.sigma_get Reification.sigma_add Reification.sigma_empty
              FMapPositive.PositiveMap.find FMapPositive.PositiveMap.add
              FMapPositive.PositiveMap.empty ] ];
        clear t e l r.

tactic to clean zeros in Kleene algebra expressions
Ltac kleene_clean_zeros := kleene_normalize_ Clean.correct.