Theory Context_Free_Grammar
section ‹Word Problem for Context-Free Grammars›
theory Context_Free_Grammar
imports Coinductive_Language "HOL-Library.FSet"
begin
section ‹Context Free Languages›
text ‹
A context-free grammar consists of a list of productions for every nonterminal
and an initial nonterminal. The productions are required to be in weak Greibach
normal form, i.e. each right hand side of a production must either be empty or
start with a terminal.
›
abbreviation "wgreibach α ≡ (case α of (Inr N # _) ⇒ False | _ ⇒ True)"
record ('t, 'n) cfg =
init :: "'n :: finite"
prod :: "'n ⇒ ('t + 'n) list fset"
context
fixes G :: "('t, 'n :: finite) cfg"
begin
inductive in_cfl where
"in_cfl [] []"
| "in_cfl α w ⟹ in_cfl (Inl a # α) (a # w)"
| "fBex (prod G N) (λβ. in_cfl (β @ α) w) ⟹ in_cfl (Inr N # α) w"
abbreviation lang_trad where
"lang_trad ≡ {w. in_cfl [Inr (init G)] w}"
fun 𝔬⇩P where
"𝔬⇩P [] = True"
| "𝔬⇩P (Inl _ # _) = False"
| "𝔬⇩P (Inr N # α) = ([] |∈| prod G N ∧ 𝔬⇩P α)"
fun 𝔡⇩P where
"𝔡⇩P [] a = {||}"
| "𝔡⇩P (Inl b # α) a = (if a = b then {|α|} else {||})"
| "𝔡⇩P (Inr N # α) a =
(λβ. tl β @ α) |`| ffilter (λβ. β ≠ [] ∧ hd β = Inl a) (prod G N) |∪|
(if [] |∈| prod G N then 𝔡⇩P α a else {||})"
primcorec subst :: "('t + 'n) list fset ⇒ 't language" where
"subst P = Lang (fBex P 𝔬⇩P) (λa. subst (ffUnion ((λr. 𝔡⇩P r a) |`| P)))"
inductive in_cfls where
"fBex P 𝔬⇩P ⟹ in_cfls P []"
| "in_cfls (ffUnion ((λα. 𝔡⇩P α a) |`| P)) w ⟹ in_cfls P (a # w)"
inductive_cases [elim!]: "in_cfls P []"
inductive_cases [elim!]: "in_cfls P (a # w)"
declare inj_eq[OF bij_is_inj[OF to_language_bij], simp]
lemma subst_in_cfls: "subst P = to_language {w. in_cfls P w}"
by (coinduction arbitrary: P) (auto intro: in_cfls.intros)
lemma 𝔬⇩P_in_cfl: "𝔬⇩P α ⟹ in_cfl α []"
by (induct α rule: 𝔬⇩P.induct) (auto intro!: in_cfl.intros elim: fBexI[rotated])
lemma 𝔡⇩P_in_cfl: "β |∈| 𝔡⇩P α a ⟹ in_cfl β w ⟹ in_cfl α (a # w)"
proof (induct α a arbitrary: β w rule: 𝔡⇩P.induct)
case (3 N α a)
then show ?case
by (auto simp: rev_fBexI neq_Nil_conv split: if_splits
intro!: in_cfl.intros elim!: rev_fBexI[of "_ # _"])
qed (auto split: if_splits intro: in_cfl.intros)
lemma in_cfls_in_cfl: "in_cfls P w ⟹ fBex P (λα. in_cfl α w)"
by (induct P w rule: in_cfls.induct)
(auto simp: 𝔬⇩P_in_cfl 𝔡⇩P_in_cfl ffUnion.rep_eq
intro: in_cfl.intros elim: rev_bexI)
lemma in_cfls_mono: "in_cfls P w ⟹ P |⊆| Q ⟹ in_cfls Q w"
proof (induct P w arbitrary: Q rule: in_cfls.induct)
case (2 a P w)
from 2(3) 2(2)[of "ffUnion ((λα. local.𝔡⇩P α a) |`| Q)"] show ?case
by (auto intro!: ffunion_mono in_cfls.intros)
qed (auto intro!: in_cfls.intros)
end
locale cfg_wgreibach =
fixes G :: "('t, 'n :: finite) cfg"
assumes weakGreibach: "⋀N α. α |∈| prod G N ⟹ wgreibach α"
begin
lemma in_cfl_in_cfls: "in_cfl G α w ⟹ in_cfls G {|α|} w"
proof (induct α w rule: in_cfl.induct)
case (3 N α w)
then obtain β where
β: "β |∈| prod G N" and
in_cfl: "in_cfl G (β @ α) w" and
in_cfls: "in_cfls G {|β @ α|} w" by blast
then show ?case
proof (cases β)
case [simp]: Nil
from β in_cfls show ?thesis
by (cases w) (auto intro!: in_cfls.intros elim: in_cfls_mono)
next
case [simp]: (Cons x γ)
from weakGreibach[OF β] obtain a where [simp]: "x = Inl a" by (cases x) auto
with β in_cfls show ?thesis
apply -
apply (rule in_cfl.cases[OF in_cfl]; auto)
apply (force intro: in_cfls.intros(2) elim!: in_cfls_mono)
done
qed
qed (auto intro!: in_cfls.intros)
abbreviation lang where
"lang ≡ subst G {|[Inr (init G)]|}"
lemma lang_lang_trad: "lang = to_language (lang_trad G)"
proof -
have "in_cfls G {|[Inr (init G)]|} w ⟷ in_cfl G [Inr (init G)] w" for w
by (auto dest: in_cfls_in_cfl in_cfl_in_cfls)
then show ?thesis
by (auto simp: subst_in_cfls)
qed
end
text ‹
The function @{term in_language} decides the word problem for a given language.
Since we can construct the language of a CFG using @{const cfg_wgreibach.lang} we obtain an
executable (but not very efficient) decision procedure for CFGs for free.
›
abbreviation "𝔞 ≡ Inl True"
abbreviation "𝔟 ≡ Inl False"
abbreviation "S ≡ Inr ()"
interpretation palindromes: cfg_wgreibach "⦇init = (), prod = λ_. {|[], [𝔞], [𝔟], [𝔞, S, 𝔞], [𝔟, S, 𝔟]|}⦈"
by unfold_locales auto
lemma "in_language palindromes.lang []" by normalization
lemma "in_language palindromes.lang [True]" by normalization
lemma "in_language palindromes.lang [False]" by normalization
lemma "in_language palindromes.lang [True, True]" by normalization
lemma "in_language palindromes.lang [True, False, True]" by normalization
lemma "¬ in_language palindromes.lang [True, False]" by normalization
lemma "¬ in_language palindromes.lang [True, False, True, False]" by normalization
lemma "in_language palindromes.lang [True, False, True, True, False, True]" by normalization
lemma "¬ in_language palindromes.lang [True, False, True, False, False, True]" by normalization
interpretation Dyck: cfg_wgreibach "⦇init = (), prod = λ_. {|[], [𝔞, S, 𝔟, S]|}⦈"
by unfold_locales auto
lemma "in_language Dyck.lang []" by normalization
lemma "¬ in_language Dyck.lang [True]" by normalization
lemma "¬ in_language Dyck.lang [False]" by normalization
lemma "in_language Dyck.lang [True, False, True, False]" by normalization
lemma "in_language Dyck.lang [True, True, False, False]" by normalization
lemma "in_language Dyck.lang [True, False, True, False]" by normalization
lemma "in_language Dyck.lang [True, False, True, False, True, True, False, False]" by normalization
lemma "¬ in_language Dyck.lang [True, False, True, True, False]" by normalization
lemma "¬ in_language Dyck.lang [True, True, False, False, False, True]" by normalization
interpretation abSSa: cfg_wgreibach "⦇init = (), prod = λ_. {|[], [𝔞, 𝔟, S, S, 𝔞]|}⦈"
by unfold_locales auto
lemma "in_language abSSa.lang []" by normalization
lemma "¬ in_language abSSa.lang [True]" by normalization
lemma "¬ in_language abSSa.lang [False]" by normalization
lemma "in_language abSSa.lang [True, False, True]" by normalization
lemma "in_language abSSa.lang [True, False, True, False, True, True, False, True, True]" by normalization
lemma "in_language abSSa.lang [True, False, True, False, True, True]" by normalization
lemma "¬ in_language abSSa.lang [True, False, True, True, False]" by normalization
lemma "¬ in_language abSSa.lang [True, True, False, False, False, True]" by normalization
end