Theory Smolka_AI_Independence_System

theory Smolka_AI_Independence_System
  imports Main Set_System Smolka_AI
begin

text ‹
  Formalisation of the annotation algorithm via independence systems. We prove:
  \begin{itemize}
  \item The Best-In-Greedy algorithm returns a maximal independent set.
  \item The annotation set system is an independence system.
  \item Local minimality of the kept positions.
  \item The witness variable corollary.
  \end{itemize}
›

subsection ‹The Annotation Set System›

text ‹
  We now define the annotation set system.
  We abstract away from the concrete type-theoretic setting and work with:
  \begin{itemize}
  \item A finite set \<^term>‹S› of candidate positions (the ground set).
  \item A finite set \<^term>‹V› of inference variables.
  \item A key function mapping each position to a set of variables.
  \item The coverage condition: \<^term>‹S› covers \<^term>‹V›.
  \end{itemize}
›

subsubsection ‹Setup›

locale annotation_setup =
  fixes S :: "nat set"
    and V :: "string set"
    and key :: "nat ⇒ string set"
  assumes finite_S: "finite S"
  assumes finite_V: "finite V"
  assumes key_subset: "⋀p. p ∈ S ⟹ key p ⊆ V"
  assumes S_covers_V: "⋃(key ` S) = V"
begin

text ‹Coverage›

definition covers :: "nat set ⇒ bool" where
  "covers Q ⟷ V ⊆ ⋃(key ` Q)"

lemma covers_S: "covers S"
  unfolding covers_def using S_covers_V by blast

lemma covers_mono:
  assumes "covers Q" "Q ⊆ Q'"
  shows "covers Q'"
  unfolding covers_def using assms unfolding covers_def by blast

subsubsection ‹The Independence System›

text ‹
  The annotation set system (S, F) is defined by:
    $F = \{F \subseteq S \| S \setminus F \textrm{ covers } V\}$
  A set $F$ represents positions that may be dropped: it is independent precisely
  when the remaining positions $S \setminus F$ still cover all inference variables.
›

definition ann_indep :: "nat set ⇒ bool" where
  "ann_indep F ⟷ F ⊆ S ∧ covers (S - F)"

subsubsection ‹The annotation set system is an independence system›

lemma ann_indep_empty: "ann_indep {}"
  unfolding ann_indep_def using covers_S by simp

lemma ann_indep_subset_carrier:
  "ann_indep F ⟹ F ⊆ S"
  unfolding ann_indep_def by simp

lemma ann_indep_hereditary:
  assumes "ann_indep F" "F' ⊆ F"
  shows "ann_indep F'"
proof -
  from assms have "F ⊆ S" and cov: "covers (S - F)" 
    unfolding ann_indep_def by auto
  from assms have "F' ⊆ S" using ‹F ⊆ S› by blast
  moreover have "S - F ⊆ S - F'" using ‹F' ⊆ F› by blast
  then have "covers (S - F')" using cov covers_mono by blast
  ultimately show ?thesis unfolding ann_indep_def by blast
qed

text ‹The annotation set system forms an independence system.›

sublocale indep_system S ann_indep
proof unfold_locales
  show "finite S" by (rule finite_S)
  show "ann_indep {}" by (rule ann_indep_empty)
  show "⋀F. ann_indep F ⟹ F ⊆ S" by (rule ann_indep_subset_carrier)
  show "⋀F F'. ann_indep F ⟹ F' ⊆ F ⟹ ann_indep F'"
    by (rule ann_indep_hereditary)
qed

subsection ‹Feasibility and Count Condition›

text ‹
  For \<^term>‹F› in the independence family with \<^term>‹p ∈ S - F›, we have
  \<^term>‹F ∪ {p}› is independent iff \<^term>‹var_count F α > 1› for every \<^term>‹α ∈ key p›,
  where \<^term>‹var_count α› counts positions in \<^term>‹S - F› covering \<^term>‹α›.
›

definition var_count :: "nat set ⇒ string ⇒ nat" where
  "var_count F α = card {q ∈ S - F. α ∈ key q}"

lemma feasibility_iff_count:
  assumes "ann_indep F" "p ∈ S - F"
  shows "ann_indep (F ∪ {p}) ⟷ (∀α ∈ key p. var_count F α > 1)"
proof
  assume indep_Fp: "ann_indep (F ∪ {p})"
  show "∀α ∈ key p. var_count F α > 1"
  proof
    fix α assume α_in: "α ∈ key p"
    from indep_Fp have "covers (S - (F ∪ {p}))" unfolding ann_indep_def by simp
    then have V_cov: "V ⊆ ⋃(key ` (S - (F ∪ {p})))" unfolding covers_def .
    from α_in assms(2) key_subset have "α ∈ V" by blast
    then obtain q where q: "q ∈ S - (F ∪ {p})" "α ∈ key q"
      using V_cov by blast
    then have "q ≠ p" "q ∈ S - F" by auto
    then have "{p, q} ⊆ {r ∈ S - F. α ∈ key r}" 
      using α_in q(2) assms(2) by blast
    moreover have "p ≠ q" using ‹q ≠ p› by simp
    moreover have "finite {r ∈ S - F. α ∈ key r}" using finite_S by auto
    ultimately have "card {r ∈ S - F. α ∈ key r} ≥ 2"
      by (metis card_2_iff card_mono)
    then show "var_count F α > 1" unfolding var_count_def by simp
  qed
next
  assume count_cond: "∀α ∈ key p. var_count F α > 1"
  have "F ∪ {p} ⊆ S" using assms unfolding ann_indep_def by auto
  moreover have "covers (S - (F ∪ {p}))"
  proof -
    have V_sub: "V ⊆ ⋃(key ` (S - F))"
      using assms(1) unfolding ann_indep_def covers_def by simp
    show ?thesis unfolding covers_def
    proof
      fix α assume "α ∈ V"
      then obtain q where q: "q ∈ S - F" "α ∈ key q" 
        using V_sub by blast
      show "α ∈ ⋃(key ` (S - (F ∪ {p})))"
      proof (cases "α ∈ key p")
        case True
        then have cnt_gt: "card {r ∈ S - F. α ∈ key r} > 1" 
          using count_cond unfolding var_count_def by simp
        moreover have p_mem: "p ∈ {r ∈ S - F. α ∈ key r}" using True assms(2) by simp
        moreover have fin: "finite {r ∈ S - F. α ∈ key r}" using finite_S by auto
        ultimately have "card ({r ∈ S - F. α ∈ key r} - {p}) > 0"
          by (simp add: card_Diff_singleton)
        then have "{r ∈ S - F. α ∈ key r} - {p} ≠ {}" 
          using card_gt_0_iff fin finite_Diff by blast
        then obtain r where "r ∈ S - F" "α ∈ key r" "r ≠ p" by blast
        then show ?thesis by blast
      next
        case False
        then have "q ≠ p" using q(2) by blast
        then have "q ∈ S - (F ∪ {p})" using q(1) by blast
        then show ?thesis using q(2) by blast
      qed
    qed
  qed
  ultimately show "ann_indep (F ∪ {p})" unfolding ann_indep_def by blast
qed

subsection ‹Local Minimality›

lemma local_minimality_coverage:
  assumes "ann_indep F_star"
  shows "covers (S - F_star)"
  using assms unfolding ann_indep_def by simp

lemma local_minimality:
  assumes greedy_maximal: "maximal F_star"
    and p_in_P: "p ∈ S - F_star"
  shows "¬ covers (S - F_star - {p})"
proof -
  from greedy_maximal p_in_P have "¬ ann_indep (F_star ∪ {p})"
    using maximalD(2) by blast
  moreover have "F_star ∪ {p} ⊆ S" 
    using maximalD(1)[OF greedy_maximal] indep_subset_carrier p_in_P by blast
  ultimately have "¬ covers (S - (F_star ∪ {p}))"
    unfolding ann_indep_def by simp
  moreover have "S - (F_star ∪ {p}) = S - F_star - {p}" by blast
  ultimately show ?thesis by simp
qed

corollary witness_variable:
  assumes greedy_maximal: "maximal F_star"
    and p_in_P: "p ∈ S - F_star"
  obtains α where "α ∈ key p" "α ∈ V" 
    "⋀q. q ∈ S - F_star - {p} ⟹ α ∉ key q"
proof -
  let ?P = "S - F_star"
  have cov: "covers ?P" 
    using maximalD(1)[OF greedy_maximal] local_minimality_coverage by simp
  have not_cov: "¬ covers (?P - {p})" 
    using local_minimality[OF greedy_maximal p_in_P] by simp
  from not_cov obtain α where α: "α ∈ V" "α ∉ ⋃(key ` (?P - {p}))"
    unfolding covers_def by blast
  from cov have "V ⊆ ⋃(key ` ?P)" unfolding covers_def by simp
  with α(1) obtain q where "q ∈ ?P" "α ∈ key q" by blast
  with α(2) have "q = p" by blast
  then have "α ∈ key p" using ‹α ∈ key q› by simp
  moreover have "⋀q. q ∈ ?P - {p} ⟹ α ∉ key q"
    using α(2) by blast
  ultimately show ?thesis using that α(1) by blast
qed

subsection ‹Reverse Greedy Correctness›
definition ‹reverse_greedy' es = S - best_in_greedy ann_indep es›

theorem annotation_algorithm_correct:
  assumes perm: "set es = S" and distinct: "distinct es"
  shows "covers (reverse_greedy' es)"
    and "⋀p. p ∈ reverse_greedy' es ⟹ 
           ∃α ∈ key p. α ∈ V ∧ (∀q ∈ reverse_greedy' es - {p}. α ∉ key q)"
proof -
  have mx: "maximal (best_in_greedy ann_indep es)"
    using best_in_greedy_maximal[OF perm distinct] .
  then have indep_star: "ann_indep (best_in_greedy ann_indep es)" 
    using maximalD by blast
  show "covers (reverse_greedy' es)"
    using local_minimality_coverage[OF indep_star] reverse_greedy'_def by simp
  show "⋀p. p ∈ reverse_greedy' es ⟹ 
           ∃α ∈ key p. α ∈ V ∧ (∀q ∈ reverse_greedy' es - {p}. α ∉ key q)"
    using witness_variable[OF mx] reverse_greedy'_def by metis
qed
end

context annotation_problem
begin

text ‹The annotation problem gives rise to an annotation set system.›

lemma finite_tvars_ty: "finite (tvars_ty t)"
  by (induction t) auto

interpretation ann: annotation_setup "pos_set a_star" V key
proof unfold_locales
  show "finite (pos_set a_star)" by (simp add: pos_set_def)
  have "finite (tvars_aterm a_star)" by (induction a_star) (auto simp: finite_tvars_ty)
  then show "finite V" unfolding V_def by auto
  show "⋀p. p ∈ pos_set a_star ⟹ key p ⊆ V"
    unfolding key_def by auto
  show "⋃(key ` pos_set a_star) = V"
  proof
    show "⋃(key ` pos_set a_star) ⊆ V" unfolding key_def by auto
  next
    show "V ⊆ ⋃(key ` pos_set a_star)"
    proof
      fix α assume "α ∈ V"
      then have "α ∈ tvars_aterm a_star" unfolding V_def by auto
      then obtain p s τ where "(p, s, τ) ∈ set (enum_aterm a_star)" "α ∈ tvars_ty τ"
        using tvars_aterm_subset_enum by blast
      then have "p ∈ pos_set a_star" unfolding pos_set_def by force
      moreover have "α ∈ key p" unfolding key_def using ‹(p, s, τ) ∈ _› ‹α ∈ tvars_ty τ› ‹α ∈ V› by auto
      ultimately show "α ∈ ⋃(key ` pos_set a_star)" by auto
    qed
  qed
qed

definition es_from_enum :: "nat list" where
  "es_from_enum = map fst (enum_aterm a_star)"

lemma es_set: "set es_from_enum = pos_set a_star"
  unfolding es_from_enum_def pos_set_def
  by (force simp: image_iff)

lemma es_distinct: "distinct es_from_enum"
  unfolding es_from_enum_def
  by (simp add: distinct_enum_fst)

lemma reverse_greedy'_subs_pos: ‹ann.reverse_greedy' es_from_enum ⊆ pos_set a_star›
  by (simp add: ann.reverse_greedy'_def)

lemma reverse_greedy'_coverage: ‹⋃ (key ` ann.reverse_greedy' es_from_enum) = V›
  using ann.annotation_algorithm_correct(1)[OF es_set es_distinct] ann.S_covers_V 
  using ann.covers_def ann.reverse_greedy'_def by auto

lemma reverse_greedy'_witness: 
  assumes ‹p ∈ ann.reverse_greedy' es_from_enum› 
  shows ‹∃α∈key p. ∀p'∈ann.reverse_greedy' es_from_enum. p' ≠ p ⟶ α ∉ key p'›
  using assms ann.annotation_algorithm_correct(2)[OF es_set es_distinct, of p] 
  by auto

interpretation ann: annotation_selection 
  const_type
  Γ a a_star σ ‹ann.reverse_greedy' es_from_enum›
  using reverse_greedy'_subs_pos reverse_greedy'_coverage reverse_greedy'_witness
  by unfold_locales auto

thm ann.local_minimality
thm ann.completeness
end
end