Theory Smolka_AI_Independence_System

theory Smolka_AI_Independence_System
  imports Main Set_System Smolka_AI
begin

text ‹
  Formalisation of the annotation algorithm via independence systems. We prove:
  \begin{itemize}
  \item The Best-In-Greedy algorithm returns a maximal independent set.
  \item The annotation set system is an independence system.
  \item Local minimality of the kept positions.
  \item The witness variable corollary.
  \end{itemize}
›

subsection ‹The Annotation Set System›

text ‹
  We now define the annotation set system.
  We abstract away from the concrete type-theoretic setting and work with:
  \begin{itemize}
  \item A finite set termS of candidate positions (the ground set).
  \item A finite set termV of inference variables.
  \item A key function mapping each position to a set of variables.
  \item The coverage condition: termS covers termV.
  \end{itemize}
›

subsubsection ‹Setup›

locale annotation_setup =
  fixes S :: "nat set"
    and V :: "string set"
    and key :: "nat  string set"
  assumes finite_S: "finite S"
  assumes finite_V: "finite V"
  assumes key_subset: "p. p  S  key p  V"
  assumes S_covers_V: "(key ` S) = V"
begin

text ‹Coverage›

definition covers :: "nat set  bool" where
  "covers Q  V  (key ` Q)"

lemma covers_S: "covers S"
  unfolding covers_def using S_covers_V by blast

lemma covers_mono:
  assumes "covers Q" "Q  Q'"
  shows "covers Q'"
  unfolding covers_def using assms unfolding covers_def by blast

subsubsection ‹The Independence System›

text ‹
  The annotation set system (S, F) is defined by:
    $F = \{F \subseteq S \| S \setminus F \textrm{ covers } V\}$
  A set $F$ represents positions that may be dropped: it is independent precisely
  when the remaining positions $S \setminus F$ still cover all inference variables.
›

definition ann_indep :: "nat set  bool" where
  "ann_indep F  F  S  covers (S - F)"

subsubsection ‹The annotation set system is an independence system›

lemma ann_indep_empty: "ann_indep {}"
  unfolding ann_indep_def using covers_S by simp

lemma ann_indep_subset_carrier:
  "ann_indep F  F  S"
  unfolding ann_indep_def by simp

lemma ann_indep_hereditary:
  assumes "ann_indep F" "F'  F"
  shows "ann_indep F'"
proof -
  from assms have "F  S" and cov: "covers (S - F)" 
    unfolding ann_indep_def by auto
  from assms have "F'  S" using F  S by blast
  moreover have "S - F  S - F'" using F'  F by blast
  then have "covers (S - F')" using cov covers_mono by blast
  ultimately show ?thesis unfolding ann_indep_def by blast
qed

text ‹The annotation set system forms an independence system.›

sublocale indep_system S ann_indep
proof unfold_locales
  show "finite S" by (rule finite_S)
  show "ann_indep {}" by (rule ann_indep_empty)
  show "F. ann_indep F  F  S" by (rule ann_indep_subset_carrier)
  show "F F'. ann_indep F  F'  F  ann_indep F'"
    by (rule ann_indep_hereditary)
qed

subsection ‹Feasibility and Count Condition›

text ‹
  For termF in the independence family with termp  S - F, we have
  termF  {p} is independent iff termvar_count F α > 1 for every termα  key p,
  where termvar_count α counts positions in termS - F covering termα.
›

definition var_count :: "nat set  string  nat" where
  "var_count F α = card {q  S - F. α  key q}"

lemma feasibility_iff_count:
  assumes "ann_indep F" "p  S - F"
  shows "ann_indep (F  {p})  (α  key p. var_count F α > 1)"
proof
  assume indep_Fp: "ann_indep (F  {p})"
  show "α  key p. var_count F α > 1"
  proof
    fix α assume α_in: "α  key p"
    from indep_Fp have "covers (S - (F  {p}))" unfolding ann_indep_def by simp
    then have V_cov: "V  (key ` (S - (F  {p})))" unfolding covers_def .
    from α_in assms(2) key_subset have "α  V" by blast
    then obtain q where q: "q  S - (F  {p})" "α  key q"
      using V_cov by blast
    then have "q  p" "q  S - F" by auto
    then have "{p, q}  {r  S - F. α  key r}" 
      using α_in q(2) assms(2) by blast
    moreover have "p  q" using q  p by simp
    moreover have "finite {r  S - F. α  key r}" using finite_S by auto
    ultimately have "card {r  S - F. α  key r}  2"
      by (metis card_2_iff card_mono)
    then show "var_count F α > 1" unfolding var_count_def by simp
  qed
next
  assume count_cond: "α  key p. var_count F α > 1"
  have "F  {p}  S" using assms unfolding ann_indep_def by auto
  moreover have "covers (S - (F  {p}))"
  proof -
    have V_sub: "V  (key ` (S - F))"
      using assms(1) unfolding ann_indep_def covers_def by simp
    show ?thesis unfolding covers_def
    proof
      fix α assume "α  V"
      then obtain q where q: "q  S - F" "α  key q" 
        using V_sub by blast
      show "α  (key ` (S - (F  {p})))"
      proof (cases "α  key p")
        case True
        then have cnt_gt: "card {r  S - F. α  key r} > 1" 
          using count_cond unfolding var_count_def by simp
        moreover have p_mem: "p  {r  S - F. α  key r}" using True assms(2) by simp
        moreover have fin: "finite {r  S - F. α  key r}" using finite_S by auto
        ultimately have "card ({r  S - F. α  key r} - {p}) > 0"
          by (simp add: card_Diff_singleton)
        then have "{r  S - F. α  key r} - {p}  {}" 
          using card_gt_0_iff fin finite_Diff by blast
        then obtain r where "r  S - F" "α  key r" "r  p" by blast
        then show ?thesis by blast
      next
        case False
        then have "q  p" using q(2) by blast
        then have "q  S - (F  {p})" using q(1) by blast
        then show ?thesis using q(2) by blast
      qed
    qed
  qed
  ultimately show "ann_indep (F  {p})" unfolding ann_indep_def by blast
qed

subsection ‹Local Minimality›

lemma local_minimality_coverage:
  assumes "ann_indep F_star"
  shows "covers (S - F_star)"
  using assms unfolding ann_indep_def by simp

lemma local_minimality:
  assumes greedy_maximal: "maximal F_star"
    and p_in_P: "p  S - F_star"
  shows "¬ covers (S - F_star - {p})"
proof -
  from greedy_maximal p_in_P have "¬ ann_indep (F_star  {p})"
    using maximalD(2) by blast
  moreover have "F_star  {p}  S" 
    using maximalD(1)[OF greedy_maximal] indep_subset_carrier p_in_P by blast
  ultimately have "¬ covers (S - (F_star  {p}))"
    unfolding ann_indep_def by simp
  moreover have "S - (F_star  {p}) = S - F_star - {p}" by blast
  ultimately show ?thesis by simp
qed

corollary witness_variable:
  assumes greedy_maximal: "maximal F_star"
    and p_in_P: "p  S - F_star"
  obtains α where "α  key p" "α  V" 
    "q. q  S - F_star - {p}  α  key q"
proof -
  let ?P = "S - F_star"
  have cov: "covers ?P" 
    using maximalD(1)[OF greedy_maximal] local_minimality_coverage by simp
  have not_cov: "¬ covers (?P - {p})" 
    using local_minimality[OF greedy_maximal p_in_P] by simp
  from not_cov obtain α where α: "α  V" "α  (key ` (?P - {p}))"
    unfolding covers_def by blast
  from cov have "V  (key ` ?P)" unfolding covers_def by simp
  with α(1) obtain q where "q  ?P" "α  key q" by blast
  with α(2) have "q = p" by blast
  then have "α  key p" using α  key q by simp
  moreover have "q. q  ?P - {p}  α  key q"
    using α(2) by blast
  ultimately show ?thesis using that α(1) by blast
qed

subsection ‹Reverse Greedy Correctness›
definition reverse_greedy' es = S - best_in_greedy ann_indep es

theorem annotation_algorithm_correct:
  assumes perm: "set es = S" and distinct: "distinct es"
  shows "covers (reverse_greedy' es)"
    and "p. p  reverse_greedy' es  
           α  key p. α  V  (q  reverse_greedy' es - {p}. α  key q)"
proof -
  have mx: "maximal (best_in_greedy ann_indep es)"
    using best_in_greedy_maximal[OF perm distinct] .
  then have indep_star: "ann_indep (best_in_greedy ann_indep es)" 
    using maximalD by blast
  show "covers (reverse_greedy' es)"
    using local_minimality_coverage[OF indep_star] reverse_greedy'_def by simp
  show "p. p  reverse_greedy' es  
           α  key p. α  V  (q  reverse_greedy' es - {p}. α  key q)"
    using witness_variable[OF mx] reverse_greedy'_def by metis
qed
end

context annotation_problem
begin

text ‹The annotation problem gives rise to an annotation set system.›

lemma finite_tvars_ty: "finite (tvars_ty t)"
  by (induction t) auto

interpretation ann: annotation_setup "pos_set a_star" V key
proof unfold_locales
  show "finite (pos_set a_star)" by (simp add: pos_set_def)
  have "finite (tvars_aterm a_star)" by (induction a_star) (auto simp: finite_tvars_ty)
  then show "finite V" unfolding V_def by auto
  show "p. p  pos_set a_star  key p  V"
    unfolding key_def by auto
  show "(key ` pos_set a_star) = V"
  proof
    show "(key ` pos_set a_star)  V" unfolding key_def by auto
  next
    show "V  (key ` pos_set a_star)"
    proof
      fix α assume "α  V"
      then have "α  tvars_aterm a_star" unfolding V_def by auto
      then obtain p s τ where "(p, s, τ)  set (enum_aterm a_star)" "α  tvars_ty τ"
        using tvars_aterm_subset_enum by blast
      then have "p  pos_set a_star" unfolding pos_set_def by force
      moreover have "α  key p" unfolding key_def using (p, s, τ)  _ α  tvars_ty τ α  V by auto
      ultimately show "α  (key ` pos_set a_star)" by auto
    qed
  qed
qed

definition es_from_enum :: "nat list" where
  "es_from_enum = map fst (enum_aterm a_star)"

lemma es_set: "set es_from_enum = pos_set a_star"
  unfolding es_from_enum_def pos_set_def
  by (force simp: image_iff)

lemma es_distinct: "distinct es_from_enum"
  unfolding es_from_enum_def
  by (simp add: distinct_enum_fst)

lemma reverse_greedy'_subs_pos: ann.reverse_greedy' es_from_enum  pos_set a_star
  by (simp add: ann.reverse_greedy'_def)

lemma reverse_greedy'_coverage:  (key ` ann.reverse_greedy' es_from_enum) = V
  using ann.annotation_algorithm_correct(1)[OF es_set es_distinct] ann.S_covers_V 
  using ann.covers_def ann.reverse_greedy'_def by auto

lemma reverse_greedy'_witness: 
  assumes p  ann.reverse_greedy' es_from_enum 
  shows αkey p. p'ann.reverse_greedy' es_from_enum. p'  p  α  key p'
  using assms ann.annotation_algorithm_correct(2)[OF es_set es_distinct, of p] 
  by auto

interpretation ann: annotation_selection 
  const_type
  Γ a a_star σ ann.reverse_greedy' es_from_enum
  using reverse_greedy'_subs_pos reverse_greedy'_coverage reverse_greedy'_witness
  by unfold_locales auto

thm ann.local_minimality
thm ann.completeness
end
end