Theory Automaton

(*  Author: Dmitriy Traytel *)

section "Equivalence Framework"

(*<*)
theory Automaton
imports
  "HOL-Library.While_Combinator"
  Coinductive_Languages.Coinductive_Language
begin
(*>*)

coinductive rel_language where
  "βŸ¦π”¬ L = 𝔬 K; β‹€a b. R a b ⟹ rel_language R (𝔑 L a) (𝔑 K b)⟧ ⟹ rel_language R L K"

declare rel_language.coinduct[consumes 1, case_names Lang, coinduct pred]

lemma rel_language_alt:
  "rel_language R L K = rel_fun (list_all2 R) (=) (in_language L) (in_language K)"
unfolding rel_fun_def proof (rule iffI, safe del: iffI)
  fix xs ys
  assume "list_all2 R xs ys" "rel_language R L K"
  then show "in_language L xs = in_language K ys"
    by (induct xs ys arbitrary: L K) (auto del: iffI elim: rel_language.cases)
next
  assume "βˆ€xs ys. list_all2 R xs ys ⟢ in_language L xs = in_language K ys"
  then show "rel_language R L K" by (coinduction arbitrary: L K) (auto dest: spec2)
qed

lemma rel_language_eq: "rel_language (=) = (=)"
  unfolding rel_language_alt[abs_def] list.rel_eq fun.rel_eq
  by (subst (2) fun_eq_iff)+
    (auto intro: box_equals[OF _ to_language_in_language to_language_in_language])

abbreviation "𝔑s ≑ fold (Ξ»a L. 𝔑 L a)"

lemma in_language_𝔑s: "in_language (𝔑s w L) v ⟷ in_language L (w @ v)"
  by (induct w arbitrary: L) simp_all

lemma 𝔬_𝔑s: "𝔬 (𝔑s w L) ⟷ in_language L w"
  by (induct w arbitrary: L) auto

lemma in_language_to_language[simp]: "in_language (to_language L) w ⟷ w ∈ L"
  by (metis in_language_to_language mem_Collect_eq)

lemma rtrancl_fold_product:
shows "{((r, s), (f a r, g b s)) | a b r s. a ∈ A ∧ b ∈ B ∧ R a b}^* =
       {((r, s), (fold f w1 r, fold g w2 s)) | w1 w2 r s. w1 ∈ lists A ∧ w2 ∈ lists B ∧ list_all2 R w1 w2}"
  (is "?L = ?R")
proof-
  { fix r s r' s'
    have "((r, s), (r', s')) : ?L ⟹ ((r, s), (r', s')) ∈ ?R"
    proof(induction rule: converse_rtrancl_induct2)
      case refl show ?case by(force intro!: fold_simps(1)[symmetric])
    next
      case step thus ?case by(force intro!: fold_simps(2)[symmetric])
    qed
  }
  hence "β‹€x. x ∈ ?L ⟹ x ∈ ?R" by force
  moreover
  { fix r s r' s'
    { fix w1 w2 assume "list_all2 R w1 w2" "w1 ∈ lists A" "w2 ∈ lists B"
      then have "((r, s), fold f w1 r, fold g w2 s) ∈ ?L"
      proof(induction w1 w2 arbitrary: r s)
        case Nil show ?case by simp
      next
        case Cons thus ?case by (force elim!: converse_rtrancl_into_rtrancl[rotated])
      qed
    }
    hence "((r, s), (r', s')) ∈ ?R ⟹ ((r, s), (r', s')) ∈ ?L" by auto
  }
  hence "β‹€x. x ∈ ?R ⟹ x ∈ ?L" by blast
  ultimately show ?thesis by blast
qed

lemma rtrancl_fold_product1:
shows "{(r, s). βˆƒa ∈ A. s = f a r}^* = {(r, s). βˆƒa ∈ lists A. s = fold f a r}" (is "?L = ?R")
proof-
  { fix r s
    have "(r, s) ∈ ?L ⟹ (r, s) ∈ ?R"
    proof(induction rule: converse_rtrancl_induct)
      case base show ?case by(force intro!: fold_simps(1)[symmetric])
    next
      case step thus ?case by(force intro!: fold_simps(2)[symmetric])
    qed
  } moreover
  { fix r s
    { fix w assume "w ∈ lists A"
      then have "(r, fold f w r) ∈ ?L"
      proof(induction w rule: rev_induct)
        case Nil show ?case by simp
      next
        case snoc thus ?case by (force elim!: rtrancl_into_rtrancl)
      qed
    } 
    hence "(r, s) ∈ ?R ⟹ (r, s) ∈ ?L" by auto
  } ultimately show ?thesis by (auto 10 0)
qed

lemma lang_eq_ext_Nil_fold_Deriv:
  fixes K L A R
  assumes
    "β‹€w. in_language K w ⟹ w ∈ lists A"
    "β‹€w. in_language L w ⟹ w ∈ lists B"
    "β‹€a b. R a b ⟹ a ∈ A ⟷ b ∈ B"
  defines "𝔅 ≑ {(𝔑s w1 K, 𝔑s w2 L) | w1 w2. w1 ∈ lists A ∧ w2 ∈ lists B ∧ list_all2 R w1 w2}"
  shows "rel_language R K L ⟷ (βˆ€(K, L) ∈ 𝔅. 𝔬 K ⟷ 𝔬 L)"
proof
  assume "βˆ€(K, L)βˆˆπ”…. 𝔬 K = 𝔬 L"
  then show "rel_language R K L"
  unfolding 𝔅_def using assms(1,2)
  proof (coinduction arbitrary: K L)
    case (Lang K L)
    then have CIH: "β‹€K' L'. βˆƒw1 w2.
      K' = 𝔑s w1 K ∧ L' = 𝔑s w2 L ∧ w1 ∈ lists A ∧ w2 ∈ lists B ∧ list_all2 R w1 w2 ⟹ 𝔬 K' = 𝔬 L'" and
      [dest]: "β‹€w. in_language K w ⟹ w ∈ lists A" "β‹€w. in_language L w ⟹ w ∈ lists B" 
      by blast+
    show ?case unfolding ex_simps simp_thms
    proof (safe del: iffI)
      show "𝔬 K = 𝔬 L" by (intro CIH[OF exI[where x = "[]"]]) simp
    next
      fix x y w1 w2 assume "βˆ€x∈set w1. x ∈ A" "βˆ€x∈set w2. x ∈ B" "list_all2 R w1 w2" "R x y"
      then show "𝔬 (𝔑s w1 (𝔑 K x)) = 𝔬 (𝔑s w2 (𝔑 L y))"
      proof (cases "x ∈ A ∧ y ∈ B")
        assume "¬ (x ∈ A ∧ y ∈ B)"
        with assms(3)[OF β€ΉR x yβ€Ί] show ?thesis
          by (auto simp: in_language_𝔑s in_language.simps[symmetric] simp del: in_language.simps)
      qed (intro CIH exI[where x = "x # w1"] exI[where x = "y # w2"], auto)
    qed (auto simp add: in_language.simps[symmetric] simp del: in_language.simps)
  qed
qed (auto simp: 𝔅_def rel_language_alt rel_fun_def 𝔬_𝔑s)

subsection β€ΉAbstract Deterministic Automatonβ€Ί

locale DA =
fixes alphabet :: "'a list"
fixes init :: "'t β‡’ 's"
fixes delta :: "'a β‡’ 's β‡’ 's"
fixes accept :: "'s β‡’ bool"
fixes wellformed :: "'s β‡’ bool"
fixes Language :: "'s β‡’ 'a language"
fixes wf :: "'t β‡’ bool"
fixes Lang :: "'t β‡’ 'a language"
assumes distinct_alphabet: "distinct alphabet"
assumes Language_init: "wf t ⟹ Language (init t) = Lang t"
assumes wellformed_init: "wf t ⟹ wellformed (init t)"
assumes Language_alphabet: "⟦wellformed s; in_language (Language s) w⟧ ⟹ w ∈ lists (set alphabet)"
assumes wellformed_delta: "⟦wellformed s; a ∈ set alphabet⟧ ⟹ wellformed (delta a s)"
assumes Language_delta: "⟦wellformed s; a ∈ set alphabet⟧ ⟹ Language (delta a s) = 𝔑 (Language s) a"
assumes accept_Language: "wellformed s ⟹ accept s ⟷ 𝔬 (Language s)"
begin

lemma this: "DA alphabet init delta accept wellformed Language wf Lang" by unfold_locales

lemma wellformed_deltas:
  "⟦wellformed s; w ∈ lists (set alphabet)⟧ ⟹ wellformed (fold delta w s)"
  by (induction w arbitrary: s) (auto simp add: Language_delta wellformed_delta)

lemma Language_deltas:
  "⟦wellformed s; w ∈ lists (set alphabet)⟧ ⟹ Language (fold delta w s) = 𝔑s w (Language s)"
  by (induction w arbitrary: s) (auto simp add: Language_delta wellformed_delta)

textβ€ΉAuxiliary functions:β€Ί
definition reachable :: "'a list β‡’ 's β‡’ 's set" where
  "reachable as s = snd (the (rtrancl_while (Ξ»_. True) (Ξ»s. map (Ξ»a. delta a s) as) s))"

definition automaton :: "'a list β‡’ 's β‡’ (('s * 'a) * 's) set" where
  "automaton as s =
    snd (the
    (let start = (([s], {s}), {});
         test = Ξ»((ws, Z), A). ws β‰  [];
         step = Ξ»((ws, Z), A).
           (let s = hd ws;
                new_edges = map (Ξ»a. ((s, a), delta a s)) as;
                new = remdups (filter (Ξ»ss. ss βˆ‰ Z) (map snd new_edges))
           in ((new @ tl ws, set new βˆͺ Z), set new_edges βˆͺ A))
    in while_option test step start))"

definition match :: "'s β‡’ 'a list β‡’ bool" where
  "match s w = accept (fold delta w s)"

lemma match_correct:
  assumes "wellformed s" "w ∈ lists (set alphabet)"
  shows "match s w ⟷ in_language (Language s) w"
  unfolding match_def accept_Language[OF wellformed_deltas[OF assms]] Language_deltas[OF assms] 𝔬_𝔑s ..

end

locale DAs =
  M: DA alphabet1 init1 delta1 accept1 wellformed1 Language1 wf1 Lang1 +
  N: DA alphabet2 init2 delta2 accept2 wellformed2 Language2 wf2 Lang2
  for alphabet1 :: "'a1 list" and init1 :: "'t1 β‡’ 's1" and delta1 accept1 wellformed1 Language1 wf1 Lang1
  and alphabet2 :: "'a2 list" and init2 :: "'t2 β‡’ 's2" and delta2 accept2 wellformed2 Language2 wf2 Lang2 +
  fixes letter_eq :: "'a1 β‡’ 'a2 β‡’ bool"
  assumes letter_eq: "β‹€a b. letter_eq a b ⟹ a ∈ set alphabet1 ⟷ b ∈ set alphabet2"
begin

abbreviation step where
  "step ≑ (Ξ»(p, q). map (Ξ»(a, b). (delta1 a p, delta2 b q))
    (filter (case_prod letter_eq) (List.product alphabet1 alphabet2)))"

abbreviation closure :: "'s1 * 's2 β‡’ (('s1 * 's2) list * ('s1 * 's2) set) option" where
  "closure ≑ rtrancl_while (Ξ»(p, q). accept1 p = accept2 q) step"

theorem closure_sound_complete:
assumes wf: "wf1 r" "wf2 s"
and result: "closure (init1 r, init2 s) = Some (ws, R)" (is "closure (?r, ?s) = _")
shows "ws = [] ⟷ rel_language letter_eq (Lang1 r) (Lang2 s)"
proof -
  from wf have wellformed: "wellformed1 ?r" "wellformed2 ?s"
    using M.wellformed_init N.wellformed_init by blast+
  note Language_alphabets[simp] =
    M.Language_alphabet[OF wellformed(1)] N.Language_alphabet[OF wellformed(2)]
  note Language_deltass = M.Language_deltas[OF wellformed(1)] N.Language_deltas[OF wellformed(2)]

  have bisim: "rel_language letter_eq (Language1 ?r) (Language2 ?s) =
    (βˆ€a b. (βˆƒw1 w2. a = 𝔑s w1 (Language1 ?r) ∧ b = 𝔑s w2 (Language2 ?s) ∧
      w1 ∈ lists (set alphabet1) ∧ w2 ∈ lists (set alphabet2) ∧ list_all2 letter_eq w1 w2) ⟢
    𝔬 a = 𝔬 b)"
    by (subst lang_eq_ext_Nil_fold_Deriv) (auto dest: letter_eq)

  have leq: "rel_language letter_eq (Language1 ?r) (Language2 ?s) =
  (βˆ€(r', s') ∈ {((r, s), (delta1 a r, delta2 b s)) | a b r s.
    a ∈ set alphabet1 ∧ b ∈ set alphabet2 ∧ letter_eq a b}^* `` {(?r, ?s)}.
    accept1 r' = accept2 s')" using Language_deltass
      M.accept_Language[OF M.wellformed_deltas[OF wellformed(1)]]
      N.accept_Language[OF N.wellformed_deltas[OF wellformed(2)]]
      unfolding rtrancl_fold_product in_lists_conv_set bisim
      by (auto 10 0)
  have "{(x,y). y ∈ set (step x)} =
    {((r, s), (delta1 a r, delta2 b s)) | a b r s. a ∈ set alphabet1 ∧ b ∈ set alphabet2 ∧ letter_eq a b}"
    by auto
  with rtrancl_while_Some[OF result]
  have "(ws = []) = rel_language letter_eq (Language1 ?r) (Language2 ?s)"
    by (auto simp add: leq Ball_def split: if_splits)
  then show ?thesis unfolding M.Language_init[OF wf(1)] N.Language_init[OF wf(2)] .
qed

subsection β€ΉThe overall procedureβ€Ί

definition check_eqv :: "'t1 β‡’ 't2 β‡’ bool" where
"check_eqv r s = (wf1 r ∧ wf2 s ∧ (case closure (init1 r, init2 s) of Some([], _) β‡’ True | _ β‡’ False))"

lemma soundness: 
assumes "check_eqv r s" shows "rel_language letter_eq (Lang1 r) (Lang2 s)"
proof -
  obtain R where "wf1 r" "wf2 s" "closure (init1 r, init2 s) = Some([], R)"
    using assms by (auto simp: check_eqv_def Let_def split: option.splits list.splits)
  from closure_sound_complete[OF this] show "rel_language letter_eq (Lang1 r) (Lang2 s)" by simp
qed

(* completeness needs termination of closure, otherwise result could be None *)

end

subsection β€ΉAbstract Deterministic Finite Automatonβ€Ί

locale DFA = DA +
assumes fin: "wellformed s ⟹ finite {fold delta w s | w. w ∈ lists (set alphabet)}"
begin

lemma finite_rtrancl_delta_Image1:
  "wellformed r ⟹ finite ({(r, s). βˆƒa ∈ set alphabet. s = delta a r}^* `` {r})"
  unfolding rtrancl_fold_product1 by (auto intro: finite_subset[OF _ fin])

lemma
  assumes "wellformed r" "set as βŠ† set alphabet"
  shows reachable: "reachable as r = {fold delta w r | w. w ∈ lists (set as)}"
  and finite_reachable: "finite (reachable as r)"
proof -
  obtain wsZ where *: "rtrancl_while (Ξ»_. True) (Ξ»s. map (Ξ»a. delta a s) as) r = Some wsZ"
    using assms by (atomize_elim, intro rtrancl_while_finite_Some Image_mono rtrancl_mono
      finite_subset[OF _ finite_rtrancl_delta_Image1[of r]]) auto
  then show "reachable as r = {fold delta w r | w. w ∈ lists (set as)}"
    unfolding reachable_def by (cases wsZ)
      (auto dest!: rtrancl_while_Some split: if_splits simp: rtrancl_fold_product1 image_iff)
  then show "finite (reachable as r)" using assms by (force intro: finite_subset[OF _ fin])
qed

end

locale DFAs =
  M: DFA alphabet1 init1 delta1 accept1 wellformed1 Language1 wf1 Lang1 +
  N: DFA alphabet2 init2 delta2 accept2 wellformed2 Language2 wf2 Lang2
  for alphabet1 :: "'a1 list" and init1 :: "'t1 β‡’ 's1" and delta1 accept1 wellformed1 Language1 wf1 Lang1
  and alphabet2 :: "'a2 list" and init2 :: "'t2 β‡’ 's2" and delta2 accept2 wellformed2 Language2 wf2 Lang2 +
  fixes letter_eq :: "'a1 β‡’ 'a2 β‡’ bool"
  assumes letter_eq: "β‹€a b. letter_eq a b ⟹ a ∈ set alphabet1 ⟷ b ∈ set alphabet2"
begin

interpretation DAs by unfold_locales (auto dest: letter_eq)

lemma finite_rtrancl_delta_Image:
  "⟦wellformed1 r; wellformed2 s⟧ ⟹
  finite ({((r, s), (delta1 a r, delta2 b s))| a b r s.
    a ∈ set alphabet1 ∧ b ∈ set alphabet2 ∧ R a b}^* `` {(r, s)})"
  unfolding rtrancl_fold_product Image_singleton
  by (auto intro: finite_subset[OF _ finite_cartesian_product[OF M.fin N.fin]])

lemma "termination":
  assumes "wellformed1 r" "wellformed2 s"
  shows "βˆƒst. closure (r, s) = Some st" (is "βˆƒ_. closure  ?i = _")
proof (rule rtrancl_while_finite_Some)
  show "finite ({(x, st). st ∈ set (step x)}* `` {?i})"
    by (rule finite_subset[OF Image_mono[OF rtrancl_mono]
      finite_rtrancl_delta_Image[OF assms, of letter_eq]]) auto
qed

lemma completeness: 
assumes "wf1 r" "wf2 s" "rel_language letter_eq (Lang1 r) (Lang2 s)" shows "check_eqv r s"
proof -
  obtain ws R where 1: "closure (init1 r, init2 s) = Some (ws, R)" using "termination"
    M.wellformed_init N.wellformed_init assms by fastforce
  with closure_sound_complete[OF _ _ this] assms
  show "check_eqv r s" by (simp add: check_eqv_def)
qed
  
end

sublocale DA < DAs
  alphabet init delta accept wellformed Language wf Lang
  alphabet init delta accept wellformed Language wf Lang "(=)"
  by unfold_locales auto

sublocale DFA < DFAs
  alphabet init delta accept wellformed Language wf Lang
  alphabet init delta accept wellformed Language wf Lang "(=)"
  by unfold_locales auto

lemma (in DA) step_alt: "step = (Ξ»(p, q). map (Ξ»a. (delta a p, delta a q)) alphabet)"
  using distinct_alphabet
proof (induct alphabet)
  case (Cons x xs)
  moreover
  { fix x :: 'a and xs ys :: "'a list"
    assume "x βˆ‰ set xs"
    then have "[(x, y)←List.product xs (x # ys) . x = y] = [(x, y)←List.product xs ys . x = y]"
      by (induct xs arbitrary: x) auto
  }
  moreover
  { fix x :: 'a and xs :: "'a list"
    assume "x βˆ‰ set xs"
    then have "[(x, y)←map (Pair x) xs . x = y] = []"
      by (induct xs) auto
  }
  ultimately show ?case by (auto simp: fun_eq_iff)
qed simp

(*<*)
end
(*>*)