Theory Regexp_Constructions
section ‹Basic constructions on regular expressions›
theory Regexp_Constructions
imports
Main
"HOL-Library.Sublist"
Regular_Exp
begin
subsection ‹Reverse language›
lemma rev_conc [simp]: "rev ` (A @@ B) = rev ` B @@ rev ` A"
unfolding conc_def image_def by force
lemma rev_compower [simp]: "rev ` (A ^^ n) = (rev ` A) ^^ n"
by (induction n) (simp_all add: conc_pow_comm)
lemma rev_star [simp]: "rev ` star A = star (rev ` A)"
by (simp add: star_def image_UN)
subsection ‹Substituting characters in a language›
definition subst_word :: "('a ⇒ 'b list) ⇒ 'a list ⇒ 'b list" where
"subst_word f xs = concat (map f xs)"
lemma subst_word_Nil [simp]: "subst_word f [] = []"
by (simp add: subst_word_def)
lemma subst_word_singleton [simp]: "subst_word f [x] = f x"
by (simp add: subst_word_def)
lemma subst_word_append [simp]: "subst_word f (xs @ ys) = subst_word f xs @ subst_word f ys"
by (simp add: subst_word_def)
lemma subst_word_Cons [simp]: "subst_word f (x # xs) = f x @ subst_word f xs"
by (simp add: subst_word_def)
lemma subst_word_conc [simp]: "subst_word f ` (A @@ B) = subst_word f ` A @@ subst_word f ` B"
unfolding conc_def image_def by force
lemma subst_word_compower [simp]: "subst_word f ` (A ^^ n) = (subst_word f ` A) ^^ n"
by (induction n) simp_all
lemma subst_word_star [simp]: "subst_word f ` (star A) = star (subst_word f ` A)"
by (simp add: star_def image_UN)
text ‹Suffix language›
definition Suffixes :: "'a list set ⇒ 'a list set" where
"Suffixes A = {w. ∃q. q @ w ∈ A}"
lemma Suffixes_altdef [code]: "Suffixes A = (⋃w∈A. set (suffixes w))"
unfolding Suffixes_def set_suffixes_eq suffix_def by blast
lemma Nil_in_Suffixes_iff [simp]: "[] ∈ Suffixes A ⟷ A ≠ {}"
by (auto simp: Suffixes_def)
lemma Suffixes_empty [simp]: "Suffixes {} = {}"
by (auto simp: Suffixes_def)
lemma Suffixes_empty_iff [simp]: "Suffixes A = {} ⟷ A = {}"
by (auto simp: Suffixes_altdef)
lemma Suffixes_singleton [simp]: "Suffixes {xs} = set (suffixes xs)"
by (auto simp: Suffixes_altdef)
lemma Suffixes_insert: "Suffixes (insert xs A) = set (suffixes xs) ∪ Suffixes A"
by (simp add: Suffixes_altdef)
lemma Suffixes_conc [simp]: "A ≠ {} ⟹ Suffixes (A @@ B) = Suffixes B ∪ (Suffixes A @@ B)"
unfolding Suffixes_altdef conc_def by (force simp: suffix_append)
lemma Suffixes_union [simp]: "Suffixes (A ∪ B) = Suffixes A ∪ Suffixes B"
by (auto simp: Suffixes_def)
lemma Suffixes_UNION [simp]: "Suffixes (⋃(f ` A)) = ⋃((λx. Suffixes (f x)) ` A)"
by (auto simp: Suffixes_def)
lemma Suffixes_compower:
assumes "A ≠ {}"
shows "Suffixes (A ^^ n) = insert [] (Suffixes A @@ (⋃k<n. A ^^ k))"
proof (induction n)
case (Suc n)
from Suc have "Suffixes (A ^^ Suc n) =
insert [] (Suffixes A @@ ((⋃k<n. A ^^ k) ∪ A ^^ n))"
by (simp_all add: assms conc_Un_distrib)
also have "(⋃k<n. A ^^ k) ∪ A ^^ n = (⋃k∈insert n {..<n}. A ^^ k)" by blast
also have "insert n {..<n} = {..<Suc n}" by auto
finally show ?case .
qed simp_all
lemma Suffixes_star [simp]:
assumes "A ≠ {}"
shows "Suffixes (star A) = Suffixes A @@ star A"
proof -
have "star A = (⋃n. A ^^ n)" unfolding star_def ..
also have "Suffixes … = (⋃x. Suffixes (A ^^ x))" by simp
also have "… = (⋃n. insert [] (Suffixes A @@ (⋃k<n. A ^^ k)))"
using assms by (subst Suffixes_compower) auto
also have "… = insert [] (Suffixes A @@ (⋃n. (⋃k<n. A ^^ k)))"
by (simp_all add: conc_UNION_distrib)
also have "(⋃n. (⋃k<n. A ^^ k)) = (⋃n. A ^^ n)" by auto
also have "… = star A" unfolding star_def ..
also have "insert [] (Suffixes A @@ star A) = Suffixes A @@ star A"
using assms by auto
finally show ?thesis .
qed
text ‹Prefix language›
definition Prefixes :: "'a list set ⇒ 'a list set" where
"Prefixes A = {w. ∃q. w @ q ∈ A}"
lemma Prefixes_altdef [code]: "Prefixes A = (⋃w∈A. set (prefixes w))"
unfolding Prefixes_def set_prefixes_eq prefix_def by blast
lemma Nil_in_Prefixes_iff [simp]: "[] ∈ Prefixes A ⟷ A ≠ {}"
by (auto simp: Prefixes_def)
lemma Prefixes_empty [simp]: "Prefixes {} = {}"
by (auto simp: Prefixes_def)
lemma Prefixes_empty_iff [simp]: "Prefixes A = {} ⟷ A = {}"
by (auto simp: Prefixes_altdef)
lemma Prefixes_singleton [simp]: "Prefixes {xs} = set (prefixes xs)"
by (auto simp: Prefixes_altdef)
lemma Prefixes_insert: "Prefixes (insert xs A) = set (prefixes xs) ∪ Prefixes A"
by (simp add: Prefixes_altdef)
lemma Prefixes_conc [simp]: "B ≠ {} ⟹ Prefixes (A @@ B) = Prefixes A ∪ (A @@ Prefixes B)"
unfolding Prefixes_altdef conc_def by (force simp: prefix_append)
lemma Prefixes_union [simp]: "Prefixes (A ∪ B) = Prefixes A ∪ Prefixes B"
by (auto simp: Prefixes_def)
lemma Prefixes_UNION [simp]: "Prefixes (⋃(f ` A)) = ⋃((λx. Prefixes (f x)) ` A)"
by (auto simp: Prefixes_def)
lemma Prefixes_rev: "Prefixes (rev ` A) = rev ` Suffixes A"
by (auto simp: Prefixes_altdef prefixes_rev Suffixes_altdef)
lemma Suffixes_rev: "Suffixes (rev ` A) = rev ` Prefixes A"
by (auto simp: Prefixes_altdef suffixes_rev Suffixes_altdef)
lemma Prefixes_compower:
assumes "A ≠ {}"
shows "Prefixes (A ^^ n) = insert [] ((⋃k<n. A ^^ k) @@ Prefixes A)"
proof -
have "A ^^ n = rev ` ((rev ` A) ^^ n)" by (simp add: image_image)
also have "Prefixes … = insert [] ((⋃k<n. A ^^ k) @@ Prefixes A)"
unfolding Prefixes_rev
by (subst Suffixes_compower) (simp_all add: image_UN image_image Suffixes_rev assms)
finally show ?thesis .
qed
lemma Prefixes_star [simp]:
assumes "A ≠ {}"
shows "Prefixes (star A) = star A @@ Prefixes A"
proof -
have "star A = rev ` star (rev ` A)" by (simp add: image_image)
also have "Prefixes … = star A @@ Prefixes A"
unfolding Prefixes_rev
by (subst Suffixes_star) (simp_all add: assms image_image Suffixes_rev)
finally show ?thesis .
qed
subsection ‹Subword language›
text ‹
The language of all sub-words, i.e. all words that are a contiguous sublist of a word in
the original language.
›
definition Sublists :: "'a list set ⇒ 'a list set" where
"Sublists A = {w. ∃q∈A. sublist w q}"
lemma Sublists_altdef [code]: "Sublists A = (⋃w∈A. set (sublists w))"
by (auto simp: Sublists_def)
lemma Sublists_empty [simp]: "Sublists {} = {}"
by (auto simp: Sublists_def)
lemma Sublists_singleton [simp]: "Sublists {w} = set (sublists w)"
by (auto simp: Sublists_altdef)
lemma Sublists_insert: "Sublists (insert w A) = set (sublists w) ∪ Sublists A"
by (auto simp: Sublists_altdef)
lemma Sublists_Un [simp]: "Sublists (A ∪ B) = Sublists A ∪ Sublists B"
by (auto simp: Sublists_altdef)
lemma Sublists_UN [simp]: "Sublists (⋃(f ` A)) = ⋃((λx. Sublists (f x)) ` A)"
by (auto simp: Sublists_altdef)
lemma Sublists_conv_Prefixes: "Sublists A = Prefixes (Suffixes A)"
by (auto simp: Sublists_def Prefixes_def Suffixes_def sublist_def)
lemma Sublists_conv_Suffixes: "Sublists A = Suffixes (Prefixes A)"
by (auto simp: Sublists_def Prefixes_def Suffixes_def sublist_def)
lemma Sublists_conc [simp]:
assumes "A ≠ {}" "B ≠ {}"
shows "Sublists (A @@ B) = Sublists A ∪ Sublists B ∪ Suffixes A @@ Prefixes B"
using assms unfolding Sublists_conv_Suffixes by auto
lemma star_not_empty [simp]: "star A ≠ {}"
by auto
lemma Sublists_star:
"A ≠ {} ⟹ Sublists (star A) = Sublists A ∪ Suffixes A @@ star A @@ Prefixes A"
by (simp add: Sublists_conv_Prefixes)
lemma Prefixes_subset_Sublists: "Prefixes A ⊆ Sublists A"
unfolding Prefixes_def Sublists_def by auto
lemma Suffixes_subset_Sublists: "Suffixes A ⊆ Sublists A"
unfolding Suffixes_def Sublists_def by auto
subsection ‹Fragment language›
text ‹
The following is the fragment language of a given language, i.e. the set of all words that
are (not necessarily contiguous) sub-sequences of a word in the original language.
›
definition Subseqs where "Subseqs A = (⋃w∈A. set (subseqs w))"
lemma Subseqs_empty [simp]: "Subseqs {} = {}"
by (simp add: Subseqs_def)
lemma Subseqs_insert [simp]: "Subseqs (insert xs A) = set (subseqs xs) ∪ Subseqs A"
by (simp add: Subseqs_def)
lemma Subseqs_singleton [simp]: "Subseqs {xs} = set (subseqs xs)"
by simp
lemma Subseqs_Un [simp]: "Subseqs (A ∪ B) = Subseqs A ∪ Subseqs B"
by (simp add: Subseqs_def)
lemma Subseqs_UNION [simp]: "Subseqs (⋃(f ` A)) = ⋃((λx. Subseqs (f x)) ` A)"
by (simp add: Subseqs_def)
lemma Subseqs_conc [simp]: "Subseqs (A @@ B) = Subseqs A @@ Subseqs B"
proof safe
fix xs assume "xs ∈ Subseqs (A @@ B)"
then obtain ys zs where *: "ys ∈ A" "zs ∈ B" "subseq xs (ys @ zs)"
by (auto simp: Subseqs_def conc_def)
from *(3) obtain xs1 xs2 where "xs = xs1 @ xs2" "subseq xs1 ys" "subseq xs2 zs"
by (rule subseq_appendE)
with *(1,2) show "xs ∈ Subseqs A @@ Subseqs B" by (auto simp: Subseqs_def set_subseqs_eq)
next
fix xs assume "xs ∈ Subseqs A @@ Subseqs B"
then obtain xs1 xs2 ys zs
where "xs = xs1 @ xs2" "subseq xs1 ys" "subseq xs2 zs" "ys ∈ A" "zs ∈ B"
by (auto simp: conc_def Subseqs_def)
thus "xs ∈ Subseqs (A @@ B)" by (force simp: Subseqs_def conc_def intro: list_emb_append_mono)
qed
lemma Subseqs_compower [simp]: "Subseqs (A ^^ n) = Subseqs A ^^ n"
by (induction n) simp_all
lemma Subseqs_star [simp]: "Subseqs (star A) = star (Subseqs A)"
by (simp add: star_def)
lemma Sublists_subset_Subseqs: "Sublists A ⊆ Subseqs A"
by (auto simp: Sublists_def Subseqs_def dest!: sublist_imp_subseq)
subsection ‹Various regular expression constructions›
text ‹A construction for language reversal of a regular expression:›
primrec rexp_rev where
"rexp_rev Zero = Zero"
| "rexp_rev One = One"
| "rexp_rev (Atom x) = Atom x"
| "rexp_rev (Plus r s) = Plus (rexp_rev r) (rexp_rev s)"
| "rexp_rev (Times r s) = Times (rexp_rev s) (rexp_rev r)"
| "rexp_rev (Star r) = Star (rexp_rev r)"
lemma lang_rexp_rev [simp]: "lang (rexp_rev r) = rev ` lang r"
by (induction r) (simp_all add: image_Un)
text ‹The obvious construction for a singleton-language regular expression:›
fun rexp_of_word where
"rexp_of_word [] = One"
| "rexp_of_word [x] = Atom x"
| "rexp_of_word (x#xs) = Times (Atom x) (rexp_of_word xs)"
lemma lang_rexp_of_word [simp]: "lang (rexp_of_word xs) = {xs}"
by (induction xs rule: rexp_of_word.induct) (simp_all add: conc_def)
lemma size_rexp_of_word [simp]: "size (rexp_of_word xs) = Suc (2 * (length xs - 1))"
by (induction xs rule: rexp_of_word.induct) auto
text ‹Character substitution in a regular expression:›
primrec rexp_subst where
"rexp_subst f Zero = Zero"
| "rexp_subst f One = One"
| "rexp_subst f (Atom x) = rexp_of_word (f x)"
| "rexp_subst f (Plus r s) = Plus (rexp_subst f r) (rexp_subst f s)"
| "rexp_subst f (Times r s) = Times (rexp_subst f r) (rexp_subst f s)"
| "rexp_subst f (Star r) = Star (rexp_subst f r)"
lemma lang_rexp_subst: "lang (rexp_subst f r) = subst_word f ` lang r"
by (induction r) (simp_all add: image_Un)
text ‹Suffix language of a regular expression:›
primrec suffix_rexp :: "'a rexp ⇒ 'a rexp" where
"suffix_rexp Zero = Zero"
| "suffix_rexp One = One"
| "suffix_rexp (Atom a) = Plus (Atom a) One"
| "suffix_rexp (Plus r s) = Plus (suffix_rexp r) (suffix_rexp s)"
| "suffix_rexp (Times r s) =
(if rexp_empty r then Zero else Plus (Times (suffix_rexp r) s) (suffix_rexp s))"
| "suffix_rexp (Star r) =
(if rexp_empty r then One else Times (suffix_rexp r) (Star r))"
theorem lang_suffix_rexp [simp]:
"lang (suffix_rexp r) = Suffixes (lang r)"
by (induction r) (auto simp: rexp_empty_iff)
text ‹Prefix language of a regular expression:›
primrec prefix_rexp :: "'a rexp ⇒ 'a rexp" where
"prefix_rexp Zero = Zero"
| "prefix_rexp One = One"
| "prefix_rexp (Atom a) = Plus (Atom a) One"
| "prefix_rexp (Plus r s) = Plus (prefix_rexp r) (prefix_rexp s)"
| "prefix_rexp (Times r s) =
(if rexp_empty s then Zero else Plus (Times r (prefix_rexp s)) (prefix_rexp r))"
| "prefix_rexp (Star r) =
(if rexp_empty r then One else Times (Star r) (prefix_rexp r))"
theorem lang_prefix_rexp [simp]:
"lang (prefix_rexp r) = Prefixes (lang r)"
by (induction r) (auto simp: rexp_empty_iff)
text ‹Sub-word language of a regular expression›
primrec sublist_rexp :: "'a rexp ⇒ 'a rexp" where
"sublist_rexp Zero = Zero"
| "sublist_rexp One = One"
| "sublist_rexp (Atom a) = Plus (Atom a) One"
| "sublist_rexp (Plus r s) = Plus (sublist_rexp r) (sublist_rexp s)"
| "sublist_rexp (Times r s) =
(if rexp_empty r ∨ rexp_empty s then Zero else
Plus (sublist_rexp r) (Plus (sublist_rexp s) (Times (suffix_rexp r) (prefix_rexp s))))"
| "sublist_rexp (Star r) =
(if rexp_empty r then One else
Plus (sublist_rexp r) (Times (suffix_rexp r) (Times (Star r) (prefix_rexp r))))"
theorem lang_sublist_rexp [simp]:
"lang (sublist_rexp r) = Sublists (lang r)"
by (induction r) (auto simp: rexp_empty_iff Sublists_star)
text ‹Fragment language of a regular expression:›
primrec subseqs_rexp :: "'a rexp ⇒ 'a rexp" where
"subseqs_rexp Zero = Zero"
| "subseqs_rexp One = One"
| "subseqs_rexp (Atom x) = Plus (Atom x) One"
| "subseqs_rexp (Plus r s) = Plus (subseqs_rexp r) (subseqs_rexp s)"
| "subseqs_rexp (Times r s) = Times (subseqs_rexp r) (subseqs_rexp s)"
| "subseqs_rexp (Star r) = Star (subseqs_rexp r)"
lemma lang_subseqs_rexp [simp]: "lang (subseqs_rexp r) = Subseqs (lang r)"
by (induction r) auto
text ‹Subword language of a regular expression›
end