Theory Regex

(*<*)
theory Regex
  imports
    Trace
begin

unbundle lattice_syntax
(*>*)

section ‹Regular expressions›

context begin

qualified datatype (atms: 'a) regex = Skip nat | Test 'a
  | Plus "'a regex" "'a regex" | Times "'a regex" "'a regex"  | Star "'a regex"

lemma finite_atms[simp]: "finite (atms r)"
  by (induct r) auto

definition "Wild = Skip 1"

lemma size_regex_estimation[termination_simp]: "x  atms r  y < f x  y < size_regex f r"
  by (induct r) auto

lemma size_regex_estimation'[termination_simp]: "x  atms r  y  f x  y  size_regex f r"
  by (induct r) auto

qualified definition "TimesL r S = Times r ` S"
qualified definition "TimesR R s = (λr. Times r s) ` R"

qualified primrec collect where
  "collect f (Skip n) = {}"
| "collect f (Test φ) = f φ"
| "collect f (Plus r s) = collect f r  collect f s"
| "collect f (Times r s) = collect f r  collect f s"
| "collect f (Star r) = collect f r"

lemma collect_cong[fundef_cong]:
  "r = r'  (z. z  atms r  f z = f' z)  collect f r = collect f' r'"
  by (induct r arbitrary: r') auto

lemma finite_collect[simp]: "(z. z  atms r  finite (f z))  finite (collect f r)"
  by (induct r) auto

lemma collect_commute:
  "(z. z  atms r  x  f z  g x  f' z)  x  collect f r  g x  collect f' r"
  by (induct r) auto

lemma collect_alt: "collect f r = (z  atms r. f z)"
  by (induct r) auto

qualified definition ncollect where
  "ncollect f r = Max (insert 0 (Suc ` collect f r))"

lemma insert_Un: "insert x (A  B) = insert x A  insert x B"
  by auto

lemma ncollect_simps[simp]:
  assumes [simp]: "(z. z  atms r  finite (f z))" "(z. z  atms s  finite (f z))"
  shows
  "ncollect f (Skip n) = 0"
  "ncollect f (Test φ) = Max (insert 0 (Suc ` f φ))"
  "ncollect f (Plus r s) = max (ncollect f r) (ncollect f s)"
  "ncollect f (Times r s) = max (ncollect f r) (ncollect f s)"
  "ncollect f (Star r) = ncollect f r"
  unfolding ncollect_def
  by (auto simp add: image_Un Max_Un insert_Un simp del: Un_insert_right Un_insert_left)

abbreviation "min_regex_default f r j  (if atms r = {} then j else Min ((λz. f z j) ` atms r))"

qualified primrec match :: "(nat  'a  bool)  'a regex  nat  nat  bool" where
  "match test (Skip n) = (λi j. j = i + n)"
| "match test (Test φ) = (λi j. i = j  test i φ)"
| "match test (Plus r s) = match test r  match test s"
| "match test (Times r s) = match test r OO match test s"
| "match test (Star r) = (match test r)**"

lemma match_cong[fundef_cong]:
  "r = r'  (i z. z  atms r  t i z = t' i z)  match t r = match t' r'"
  by (induct r arbitrary: r') auto

lemma match_le: "match test r i j  i  j"
proof (induction r arbitrary: i j)
  case (Times r s)
  then show ?case using order.trans by fastforce
next
  case (Star r)
  from Star.prems show ?case
    unfolding match.simps by (induct i j rule: rtranclp.induct) (force dest: Star.IH)+
qed auto

lemma match_rtranclp_le: "(match test r)** i j  i  j"
  by (metis match.simps(5) match_le)

lemma match_map_regex: "match t (map_regex f r) = match (λk z. t k (f z)) r"
  by (induct r) auto

lemma match_mono_strong:
  "(k z. k  {i ..< j + 1}  z  atms r  t k z  t' k z)  match t r i j  match t' r i j"
proof (induction r arbitrary: i j)
  case (Times r s)
  from Times.prems show ?case
    by (auto 0 4 simp: relcompp_apply intro: le_less_trans match_le less_Suc_eq_le
      dest: Times.IH[rotated -1] match_le)
next
  case (Star r)
  from Star(3) show ?case unfolding match.simps
  proof -
    assume *: "(match t r)** i j"
    then have "i  j" unfolding match.simps(5)[symmetric]
      by (rule match_le)
    with * show "(match t' r)** i j" using Star.prems
    proof (induction i j rule: rtranclp.induct)
      case (rtrancl_into_rtrancl a b c)
      from rtrancl_into_rtrancl(1,2,4,5) show ?case
        by (intro rtranclp.rtrancl_into_rtrancl[OF rtrancl_into_rtrancl.IH])
          (auto dest!: Star.IH[rotated -1]
            dest: match_le match_rtranclp_le simp: less_Suc_eq_le)
    qed simp
  qed
qed auto

lemma match_cong_strong:
  "(k z. k  {i ..< j + 1}  z  atms r  t k z = t' k z)  match t r i j = match t' r i j"
  using match_mono_strong[of i j r t t'] match_mono_strong[of i j r t' t] by blast

end

(*<*)
end
(*>*)