Theory Approx_SC_Hoare

section ‹Set Cover›

theory Approx_SC_Hoare
imports
  "HOL-Hoare.Hoare_Logic"
  Complex_Main (* "HOL-Analysis.Harmonic_Numbers" *)
begin

text ‹This is a formalization of the set cover algorithm and proof
in the book by Kleinberg and Tardos cite"KleinbergT06".›

definition harm :: "nat  'a :: real_normed_field" where
  "harm n = (k=1..n. inverse (of_nat k))"
(* For simplicity defined locally instead of importing HOL-Analysis.Harmonic_Numbers.
   Only the definition, no theorems are needed.
*)

locale Set_Cover = (* Set Cover *)
  fixes w :: "nat  real"
    and m :: nat
    and S :: "nat  'a set"
  assumes S_finite: "i  {1..m}. finite (S i)"
      and w_nonneg: "i. 0  w i"
begin

definition U :: "'a set" where
  "U = (i  {1..m}. S i)"

lemma S_subset: "i  {1..m}. S i  U"
  using U_def by blast

lemma U_finite: "finite U"
  unfolding U_def using S_finite by blast

lemma empty_cover: "m = 0  U = {}"
  using U_def by simp

definition sc :: "nat set  'a set  bool" where
  "sc C X  C  {1..m}  (i  C. S i) = X"

definition cost :: "'a set  nat  real" where
  "cost R i = w i / card (S i  R)"

lemma cost_nonneg: "0  cost R i"
  using w_nonneg by (simp add: cost_def)

text cost R i = 0› if card (S i ∩ R) = 0›! Needs to be accounted for separately in min_arg›.›
fun min_arg :: "'a set  nat  nat" where
  "min_arg R 0 = 1"
| "min_arg R (Suc x) =
   (let j = min_arg R x
    in if S j  R = {}  (S (Suc x)  R  {}  cost R (Suc x) < cost R j) then (Suc x) else j)"

lemma min_in_range: "k > 0  min_arg R k  {1..k}"
  by (induction k) (force simp: Let_def)+

lemma min_empty: "S (min_arg R k)  R = {}  i  {1..k}. S i  R = {}"
proof (induction k)
  case (Suc k)
  from Suc.prems have prem: "S (min_arg R k)  R = {}" by (auto simp: Let_def split: if_splits)
  with Suc.IH have IH: "i  {1..k}. S i  R = {}" .
  show ?case proof fix i assume "i  {1..Suc k}" show "S i  R = {}"
    proof (cases i = Suc k)
      case True with Suc.prems prem show ?thesis by simp
    next
      case False with IH i  {1..Suc k} show ?thesis by simp
    qed
  qed
qed simp

lemma min_correct: " i  {1..k}; S i  R  {}   cost R (min_arg R k)  cost R i"
proof (induction k)
  case (Suc k)
  show ?case proof (cases i = Suc k)
    case True with Suc.prems show ?thesis by (auto simp: Let_def)
  next
    case False with Suc.prems Suc.IH have IH: "cost R (min_arg R k)  cost R i" by simp
    from Suc.prems False min_empty[of R k] have "S (min_arg R k)  R  {}" by force
    with IH show ?thesis by (auto simp: Let_def)
  qed
qed simp

text ‹Correctness holds quite trivially for both m = 0 and m > 0
      (assuming a set cover can be found at all, otherwise algorithm would not terminate).›
lemma set_cover_correct:
"VARS (R :: 'a set) (C :: nat set) (i :: nat)
  {True}
  R := U; C := {};
  WHILE R  {} INV {R  U  sc C (U - R)} DO
  i := min_arg R m;
  R := R - S i;
  C := C  {i}
  OD
  {sc C U}"
proof (vcg, goal_cases)
  case 2 show ?case proof (cases m)
    case 0
    from empty_cover[OF this] 2 show ?thesis by (auto simp: sc_def)
  next
    case Suc then have "m > 0" by simp
    from min_in_range[OF this] 2 show ?thesis using S_subset by (auto simp: sc_def)
  qed
qed (auto simp: sc_def)

definition c_exists :: "nat set  'a set  bool" where
  "c_exists C R = (c. sum w C = sum c (U - R)  (i. 0  c i)
                 (k  {1..m}. sum c (S k  (U - R))
                    (j = card (S k  R) + 1..card (S k). inverse j) * w k))"

definition inv :: "nat set  'a set  bool" where
  "inv C R  sc C (U - R)  R  U  c_exists C R"

lemma invI:
  assumes "sc C (U - R)" "R  U"
          "c. sum w C = sum c (U - R)  (i. 0  c i)
         (k  {1..m}. sum c (S k  (U - R))
                       (j = card (S k  R) + 1..card (S k). inverse j) * w k)"
    shows "inv C R" using assms by (auto simp: inv_def c_exists_def)

lemma invD:
  assumes "inv C R"
  shows "sc C (U - R)" "R  U"
        "c. sum w C = sum c (U - R)  (i. 0  c i)
       (k  {1..m}. sum c (S k  (U - R))
                     (j = card (S k  R) + 1..card (S k). inverse j) * w k)"
  using assms by (auto simp: inv_def c_exists_def)

lemma inv_init: "inv {} U"
proof (rule invI, goal_cases)
  case 3
  let ?c = "(λ_. 0) :: 'a  real"
  have "sum w {} = sum ?c (U - U)" by simp
  moreover {
    have "k  {1..m}. 0  (j = card (S k  U) + 1..card (S k). inverse j) * w k"
      by (simp add: sum_nonneg w_nonneg)
    then have "(k{1..m}. sum ?c (S k  (U - U))
              (j = card (S k  U) + 1..card (S k). inverse j) * w k)" by simp
  }
  ultimately show ?case by blast
qed (simp_all add: sc_def)

lemma inv_step:
  assumes "inv C R" "R  {}"
  defines [simp]: "i  min_arg R m"
  shows "inv (C  {i}) (R - (S i))"
proof (cases m)
  case 0
  from empty_cover[OF this] invD(2)[OF assms(1)] have "R = {}" by blast
  then show ?thesis using assms(2) by simp
next
  case Suc then have "0 < m" by simp
  note hyp = invD[OF assms(1)]
  show ?thesis proof (rule invI, goal_cases)
      ― ‹Correctness›
    case 1 have "i  {1..m}" using min_in_range[OF 0 < m] by simp
    with hyp(1) S_subset show ?case by (auto simp: sc_def)
  next
    case 2 from hyp(2) show ?case by auto
  next
    case 3
      ― ‹Set Cover grows›
    have "i  {1..m}. S i  R  {}"
      using assms(2) U_def hyp(2) by blast
    then have "S i  R  {}" using min_empty by auto
    then have "0 < card (S i  R)"
      using S_finite min_in_range[OF 0 < m] by auto

      ― ‹Proving properties of cost function›
    from hyp(3) obtain c where "sum w C = sum c (U - R)" "i. 0  c i" and
      SUM: "k{1..m}. sum c (S k  (U - R))
         (j = card (S k  R) + 1..card (S k). inverse j) * w k" by blast
    let ?c = "(λx. if x  S i  R then cost R i else c x)"

      ― ‹Proof of Lemma 11.9›
    have "finite (U - R)" "finite (S i  R)" "(U - R)  (S i  R) = {}"
      using U_finite S_finite min_in_range[OF 0 < m] by auto
    then have "sum ?c (U - R  (S i  R)) = sum ?c (U - R) + sum ?c (S i  R)"
      by (rule sum.union_disjoint)
    moreover have U_split: "U - (R - S i) = U - R  (S i  R)" using hyp(2) by blast
    moreover {
      have "sum ?c (S i  R) = card (S i  R) * cost R i" by simp
      also have "... = w i" unfolding cost_def using 0 < card (S i  R) by simp
      finally have "sum ?c (S i  R) = w i" .
    }
    ultimately have "sum ?c (U - (R - S i)) = sum ?c (U - R) + w i" by simp
    moreover {
      have "C  {i} = {}" using hyp(1) S i  R  {} by (auto simp: sc_def)
      from sum.union_disjoint[OF _ _ this] have "sum w (C  {i}) = sum w C + w i"
        using hyp(1) by (auto simp: sc_def intro: finite_subset)
    }
    ultimately have 1: "sum w (C  {i}) = sum ?c (U - (R - S i))" ― ‹Lemma 11.9›
      using sum w C = sum c (U - R) by simp

    have 2: "i. 0  ?c i" using i. 0  c i cost_nonneg by simp

      ― ‹Proof of Lemma 11.10›
    have 3: "k{1..m}. sum ?c (S k  (U - (R - S i)))
           (j = card (S k  (R - S i)) + 1..card (S k). inverse j) * w k"
    proof
      fix k assume "k  {1..m}"
      let ?rem = "S k  R" ― ‹Remaining elements to be covered›
      let ?add = "S k  S i  R" ― ‹Elements that will be covered in this step›
      let ?cov = "S k  (U - R)" ― ‹Covered elements›

      ― ‹Transforming left and right sides›
      have "sum ?c (S k  (U - (R - S i))) = sum ?c (S k  (U - R  (S i  R)))"
        unfolding U_split ..
      also have "... = sum ?c (?cov  ?add)"
        by (simp add: Int_Un_distrib Int_assoc)
      also have "... = sum ?c ?cov + sum ?c ?add"
        by (rule sum.union_disjoint) (insert S_finite k  _, auto)
      finally have lhs:
        "sum ?c (S k  (U - (R - S i))) = sum ?c ?cov + sum ?c ?add" .
      have "S k  (R - S i) = ?rem - ?add" by blast
      then have "card (S k  (R - S i)) = card (?rem - ?add)" by simp
      also have "... = card ?rem - card ?add"
        using S_finite k  _ by (auto intro: card_Diff_subset)
      finally have rhs:
        "card (S k  (R - S i)) + 1 = card ?rem - card ?add + 1" by simp
      
      ― ‹The apparent complexity of the remaining proof is deceiving. Much of this is just about
          convincing Isabelle that these sum transformations are allowed.›
      have "sum ?c ?add = card ?add * cost R i" by simp
      also have "...  card ?add * cost R k"
        proof (cases "?rem = {}")
          case True
          then have "card ?add = 0" by (auto simp: card_eq_0_iff)
          then show ?thesis by simp
        next
          case False
          from min_correct[OF k  _ this] have "cost R i  cost R k" by simp
          then show ?thesis by (simp add: mult_left_mono)
        qed
      also have "... = card ?add * inverse (card ?rem) * w k"
        by (simp add: cost_def divide_inverse_commute)
      also have "... = (j  {card ?rem - card ?add + 1 .. card ?rem}. inverse (card ?rem)) * w k"
        proof -
          have "card ?add  card ?rem"
            using S_finite k  _ by (blast intro: card_mono)
          then show ?thesis by (simp add: sum_distrib_left)
        qed
      also have "...  (j  {card ?rem - card ?add + 1 .. card ?rem}. inverse j) * w k"
        proof -
          have "j  {card ?rem - card ?add + 1 .. card ?rem}. inverse (card ?rem)  inverse j"
            by force
          then have "(j  {card ?rem - card ?add + 1 .. card ?rem}. inverse (card ?rem))
                    (j  {card ?rem - card ?add + 1 .. card ?rem}. inverse j)"
            by (blast intro: sum_mono)
          with w_nonneg show ?thesis by (blast intro: mult_right_mono)
        qed
      finally have "sum ?c ?add
                  (j  {card ?rem - card ?add + 1 .. card ?rem}. inverse j) * w k" .
      moreover from SUM have "sum ?c ?cov 
                            (j  {card ?rem + 1 .. card (S k)}. inverse j) * w k"
        using k  {1..m} by simp
      ultimately have "sum ?c (S k  (U - (R - S i)))
                     ((j  {card ?rem - card ?add + 1 .. card ?rem}. inverse j) +
                        (j  {card ?rem + 1 .. card (S k)}. inverse j)) * w k"
        unfolding lhs by argo
      also have "... = (j  {card ?rem - card ?add + 1 .. card (S k)}. inverse j) * w k"
        proof -
          have sum_split: "b  {a .. c}  sum f {a .. c} = sum f {a .. b} + sum f {Suc b .. c}"
            for f :: "nat  real" and a b c :: nat
          proof -
            assume "b  {a .. c}"
            then have "{a .. b}  {Suc b .. c} = {a .. c}" by force
            moreover have "{a .. b}  {Suc b .. c} = {}"
              using b  {a .. c} by auto
            ultimately show ?thesis by (metis finite_atLeastAtMost sum.union_disjoint)
          qed

          have "(j  {card ?rem - card ?add + 1 .. card (S k)}. inverse j)
              = (j  {card ?rem - card ?add + 1 .. card ?rem}. inverse j)
              + (j  {card ?rem + 1 .. card (S k)}. inverse j)"
            proof (cases ?add = {})
              case False
              then have "0 < card ?add" "0 < card ?rem"
                using S_finite k  _ by fastforce+
              then have "Suc (card ?rem - card ?add)  card ?rem" by simp
              moreover have "card ?rem  card (S k)"
                using S_finite k  _ by (simp add: card_mono)
              ultimately show ?thesis by (auto intro: sum_split)
            qed simp
          then show ?thesis by algebra
        qed
      finally show "sum ?c (S k  (U - (R - S i)))
                  (j  {card (S k  (R - S i)) + 1 .. card (S k)}. inverse j) * w k"
        unfolding rhs .
    qed

    from 1 2 3 show ?case by blast
  qed
qed

lemma cover_sum:
  fixes c :: "'a  real"
  assumes "sc C V" "i. 0  c i"
  shows "sum c V  (i  C. sum c (S i))"
proof -
  from assms(1) have "finite C" by (auto simp: sc_def finite_subset)
  then show ?thesis using assms(1)
  proof (induction C arbitrary: V rule: finite_induct)
    case (insert i C)
    have V_split: "( (S ` insert i C)) = ( (S ` C))  S i" by auto
    have finite: "finite ( (S ` C))" "finite (S i)"
      using insert S_finite by (auto simp: sc_def)

    have "sum c (S i) - sum c ( (S ` C)  S i)  sum c (S i)"
      using assms(2) by (simp add: sum_nonneg)
    then have "sum c ( (S ` insert i C))  sum c ( (S ` C)) + sum c (S i)"
      unfolding V_split using sum_Un[OF finite, of c] by linarith
    moreover have "(iinsert i C. sum c (S i)) = (i  C. sum c (S i)) + sum c (S i)"
      by (simp add: insert.hyps)
    ultimately show ?case using insert by (fastforce simp: sc_def)
  qed (simp add: sc_def)
qed

abbreviation H :: "nat  real" where "H  harm"

definition d_star :: nat ("d*") where "d*  Max (card ` (S ` {1..m}))"

lemma set_cover_bound:
  assumes "inv C {}" "sc C' U"
  shows "sum w C  H d* * sum w C'"
proof -
  from invD(3)[OF assms(1)] obtain c where
    "sum w C = sum c U" "i. 0  c i" and H_bound:
    "k  {1..m}. sum c (S k)  H (card (S k)) * w k" ― ‹Lemma 11.10›
    by (auto simp: harm_def Int_absorb2 S_subset)

  have "k  {1..m}. card (S k)  d*" by (auto simp: d_star_def)
  then have "k  {1..m}. H (card (S k))  H d*" by (auto simp: harm_def intro!: sum_mono2)
  with H_bound have "k  {1..m}. sum c (S k)  H d* * w k"
    by (metis atLeastAtMost_iff atLeastatMost_empty_iff empty_iff mult_right_mono w_nonneg)
  moreover have "C'  {1..m}" using assms(2) by (simp add: sc_def)
  ultimately have "i  C'. sum c (S i)  H d* * w i" by blast
  then have "(i  C'. sum c (S i))  H d* * sum w C'"
    by (auto simp: sum_distrib_left intro: sum_mono)

  have "sum w C = sum c U" by fact ― ‹Lemma 11.9›
  also have "...  (i  C'. sum c (S i))" by (rule cover_sum[OF assms(2)]) fact
  also have "...  H d* * sum w C'" by fact
  finally show ?thesis .
qed

theorem set_cover_approx:
"VARS (R :: 'a set) (C :: nat set) (i :: nat)
  {True}
  R := U; C := {};
  WHILE R  {} INV {inv C R} DO
  i := min_arg R m;
  R := R - S i;
  C := C  {i}
  OD
  {sc C U  (C'. sc C' U  sum w C  H d* * sum w C')}"
proof (vcg, goal_cases)
  case 1 show ?case by (rule inv_init)
next
  case 2 thus ?case using inv_step ..
next
  case (3 R C i)
  then have "sc C U" unfolding inv_def by auto
  with 3 show ?case by (auto intro: set_cover_bound)
qed

end (* Set Cover *)

end (* Theory *)