Theory Derandomization_Conditional_Expectations_Cut

section ‹Method of Conditional Expectations: Large Cuts›

text ‹The following is an example of the application of the method of conditional
expectations~\cite{jukna2001,alon2000} to construct an approximation algorithm that finds a cut of
an undirected graph cutting at least half of the edges. This is also the example that 
Vadhan~\cite[Section 3.4.2]{vadhan2012} uses to introduce the 
``Method of Conditional Expectations''.›

theory Derandomization_Conditional_Expectations_Cut
  imports Derandomization_Conditional_Expectations_Preliminary
begin

context fin_sgraph
begin

definition cut_size where "cut_size C = card {e  E. e  C  {}  e - C  {}}"

lemma eval_cond_edge:
  assumes "L  U" "finite U" "e  E"
  shows "(C. of_bool (eC{}  e-C{}) pmf_of_set {C. LCCU}) = 
    ((if e  -U  L then of_bool(e  L {}  e  -U {} )::real else 1/2))"
    (is "?L = ?R")
proof -
  obtain e1 e2 where e_def: "e = {e1,e2}" "e1  e2" using two_edges[OF assms(3)]
    by (meson card_2_iff)

  let ?sing_iff = "(λx e. (if x then {e} else {}))"

  define R1 where "R1 = (if e1  L then {True} else (if e1  U - L then {False,True} else {False}))"
  define R2 where "R2 = (if e2  L then {True} else (if e2  U - L then {False,True} else {False}))"

  have bij: "bij_betw (λx. ((e1  x,e2 x),x-{e1,e2})) {C. L  C  C  U} 
    ((R1 × R2) × {C. L-{e1,e2}  C  C  U-{e1,e2}})"
    unfolding R1_def R2_def using e_def(2) assms(1)
    by (intro bij_betwI[where g="(λ((a,b),x). x  ?sing_iff a e1  ?sing_iff b e2)"]) 
     (auto split:if_split_asm)  (* SLOW *)

  have r: "map_pmf (λx. (e1  x, e2  x)) (pmf_of_set {C. L  C  C  U}) = pmf_of_set (R1 × R2)"
    using assms(1,2) map_pmf_of_set_bij_betw_2[OF bij] by auto

  have "?L = C. of_bool ((e1  C)  (e2  C)) (pmf_of_set {C. L  C  C  U})"
    unfolding e_def(1) using e_def(2) by (intro integral_cong_AE AE_pmfI) auto
  also have "... = x. of_bool(fst x  snd x) pmf_of_set (R1 × R2)"
    unfolding r[symmetric] by simp
  also have "... = (if {e1,e2}  -U  L then of_bool({e1,e2}  L {}{e1,e2} - U {} ) else 1/2)"
    unfolding R1_def R2_def e_def(1) using e_def(2) assms(1)
    by (auto simp add:integral_pmf_of_set split:if_split_asm)
  also have "... = ?R" unfolding e_def by simp
  finally show ?thesis by simp
qed

text ‹If every vertex is selected independently with probability $\frac{1}{2}$ into the cut, it is
easy to deduce that an edge will be cut with probability $\frac{1}{2}$ as well. Thus the expected cut size will be
@{term "real (card E) / 2"}.›

lemma exp_cut_size:
  "(C. real (cut_size C) pmf_of_set (Pow V)) = real (card E) / 2" (is "?L = ?R")
proof -
  have a:"False" if "x  E" "x  -V" for x
  proof -
    have "x = {}" using wellformed[OF that(1)] that(2) by auto
    thus "False" using two_edges[OF that(1)] by simp
  qed

  have "?L = (C. (e  E. of_bool (e  C  {}  e - C  {})) pmf_of_set (Pow V))"
    using fin_edges by (simp_all add:of_bool_def cut_size_def sum.If_cases Int_def)
  also have "... = (e  E. (C. of_bool (e  C  {}  e - C  {}) pmf_of_set (Pow V)))"
    using finV by (intro Bochner_Integration.integral_sum integrable_measure_pmf_finite) 
     (simp add: Pow_not_empty)
  also have "... = (e  E. (C. of_bool (eC{}  e - C  {}) pmf_of_set {C. {}  C  C  V}))"
    unfolding Pow_def by simp
  also have "... = (e  E.  (if e  - V  {} then of_bool (e  {} {}e -V {}) else 1 / 2))"
    by (intro sum.cong eval_cond_edge finV) auto
  also have "... = (e  E. 1/2)" using a by (intro sum.cong) auto
  also have "... = ?R" by simp
  finally show ?thesis by simp
qed

text ‹For the above it is easy to show that there exists a cut, cutting at least half of the edges.›

lemma exists_cut: "C  V. real (cut_size C)  card E/2"
proof -
  have "xset_pmf (pmf_of_set (Pow V)). card E / 2  cut_size x" using finV exp_cut_size[symmetric]
    by (intro exists_point_above_expectation integrable_measure_pmf_finite)(auto simp:Pow_not_empty)
  moreover have "set_pmf (pmf_of_set (Pow V)) = Pow V"
    using finV Pow_not_empty by (intro set_pmf_of_set) auto
  ultimately show ?thesis by auto
qed

end

text ‹However the above is just an existence proof, but it doesn't provide a method to construct
such a cut efficiently. Here, we can apply the method of conditional expectations.

This works because, we can not only compute the expectation of the number of cut edges, when
all vertices are chosen at random, but also conditional expectations, when some of the edges
are fixed. The idea of the algorithm, is to choose the assignment of vertices into the cut
based on which option maximizes the conditional expectation. The latter can be done incrementally for each vertex.

This results in the following efficient algorithm:›

fun derandomized_max_cut :: "'a list  'a set  'a set  'a set set  'a set" where 
  "derandomized_max_cut [] R _ _ = R" |
  "derandomized_max_cut (v#vs) R B E = 
    (if card {e  E. v  e  e  R  {}}  card {e  E. v  e  e  B  {}} then
      derandomized_max_cut vs R (B  {v}) E
    else
      derandomized_max_cut vs (R  {v}) B E
    )"

context fin_sgraph
begin

text ‹The term @{term cond_exp} is the conditional expectation, when some of the edges are selected
into the cut, and some are selected to be outside the cut, while the remaining vertices are chosen
randomly.›

definition cond_exp where "cond_exp R B = (C. real (cut_size C) pmf_of_set {C. R  C  C  V-B})"

text ‹The following is the crucial property of conditional expectations, the average of choosing
a vertex in/out is the same as not fixing that vertex. This means that at least one choice will
not decrease the conditional expectation.›

lemma cond_exp_split:
  assumes "R  V" "B  V" "R  B = {}" "v  V -R-B"
  shows "cond_exp R B = (cond_exp (R  {v}) B + cond_exp R (B  {v}))/2" (is "?L = ?R")
proof -
  let ?A = "{C. R{v}CCV-B}"
  let ?B = "{C. RCCV-(B{v})}"
  define p where "p = real (card ?A) / (card ?A + card ?B)"

  have a: "{C. RCCV-B} = ?A  ?B" using assms by auto
  have b: "?A  ?B = {}" using assms by auto
  have c: "finite (?A  ?B)" using finV by auto
  have "R {v}  V-B" using assms by auto
  hence g: "?A  {}" by auto
  hence d: "?A  ?B  {}" by simp
  have e: "real (cut_size x)  real (card E)" for x
    unfolding cut_size_def by (intro of_nat_mono card_mono fin_edges) auto

  have "card ?A = card ?B" using assms(1-4) 
    by (intro bij_betw_same_card[where f="λx. x - {v}"] bij_betwI[where g="insert v"]) auto
  moreover have "card ?A > 0" using g c card_gt_0_iff by auto 
  ultimately have p_val: "p = 1/2" unfolding p_def by auto
  have "?L = (b.(C. real (cut_size C) pmf_of_set (if b then ?A else ?B)) bernoulli_pmf p)"
    using e unfolding cond_exp_def a pmf_of_set_un[OF d b c] p_def
    by (subst integral_bind_pmf[where M="card E"]) auto 
  also have "... = ((C. real(cut_size C) pmf_of_set ?A)+(C. real(cut_size C) pmf_of_set ?B))/2"
    unfolding p_val by (subst integral_bernoulli_pmf) simp_all
  also have "... = ?R" unfolding cond_exp_def by simp
  finally show ?thesis by simp
qed

lemma cond_exp_cut_size:
  assumes "R  V" "B  V" "R  B = {}"
  shows "cond_exp R B = real (card {eE. eR{}eB{}}) + real (card {eE. eV-R-B{}}) / 2" 
    (is "?L = ?R")
proof -
  have a:"finite {C. R  C  C  V - B} " "{C. R  C  C  V - B}  {}" using finV assms by auto 

  have b:"e  -V  B  R" if cthat: "e  E" "e  R  {}" "e  B  {}" for e
  proof -
    obtain e1 where e1: "e1  e  R" using cthat(2) by auto
    obtain e2 where e2: "e2  e  B" using cthat(3) by auto
    have "e1  e2" using e1 e2 assms(3) by auto
    hence "card {e1,e2} = 2" by auto
    hence "e = {e1,e2}" using two_edges[OF cthat(1)] e1 e2 
      by (intro card_seteq[symmetric]) (auto intro!:card_ge_0_finite) 
    thus ?thesis using e1 e2 by simp
  qed

  have "?L = (C. (e  E. of_bool (e  C  {}  e - C  {})) pmf_of_set {C. R  C  C  V-B})"
    unfolding cond_exp_def using fin_edges 
    by (simp_all add:of_bool_def cut_size_def sum.If_cases Int_def)
  also have "... = (e  E. (C. of_bool (eC  {}  e-C  {}) pmf_of_set {C. RC  CV-B}))"
    using a by (intro Bochner_Integration.integral_sum integrable_measure_pmf_finite) auto
  also have "... = (e  E. ((if e  -(V-B)  R then of_bool(eR{}e-(V-B){})::real else 1/2)))"
    using finV assms(1,3) by (intro sum.cong eval_cond_edge) auto
  also have "... = real (card {eE. e-VBReR{}e-(V-B){}}) + real (card {eE. ¬ e -VB R}) / 2"
    using fin_edges by (simp add: sum.If_cases of_bool_def Int_def)
  also have "... = ?R" using wellformed assms b
    by (intro arg_cong[where f="card"] arg_cong2[where f="(+)"] arg_cong[where f="real"] 
        arg_cong2[where f="(/)"] refl Collect_cong order_antisym) auto
  finally show ?thesis by simp
qed

text ‹Indeed the algorithm returns a cut with the promised approximation guarantee.›

theorem derandomized_max_cut:
  assumes "vs  permutations_of_set V"
  defines "C  derandomized_max_cut vs {} {} E"
  shows "C  V" "2 * cut_size C  card E"
proof -
  define R :: "'a set" where "R = {}"
  define B :: "'a set" where "B = {}"
  have a:"cut_size (derandomized_max_cut vs R B E)  cond_exp R B 
      (derandomized_max_cut vs R B E)  V"
    if "distinct vs" "set vs  R = {}" "set vs  B = {}" "R  B = {}" "{set vs,R,B}= V" 
    using that
  proof (induction vs arbitrary: R B)
    case Nil
    have "cond_exp R B = real (card {eE. eR{}eB{}}) + real (card {eE. eV-R-B  {}}) / 2"
      using Nil by (intro cond_exp_cut_size) auto
    also have "... = real (card {eE. eR{}eB{}})+real (card ({}::'a set set ))/2" using Nil
      by (intro arg_cong[where f="card"] arg_cong2[where f="(+)"]  arg_cong2[where f="(/)"] 
          arg_cong[where f="real"]) auto
    also have "... = real (card {eE. eR{}eB{}})" by simp
    also have "... = real (cut_size R)" using Nil wellformed unfolding cut_size_def
      by (intro arg_cong[where f="card"] arg_cong2[where f="(+)"] arg_cong[where f="real"]) auto
    finally have "cond_exp R B = real (cut_size R)" by simp
    thus ?case using Nil by auto 
  next
    case (Cons vh vt)
    let ?NB = "{e  E. vh  e  e  B  {}}"
    let ?NR = "{e  E. vh  e  e  R  {}}"
    define t where "t = real (card {e  E. e  V - R - (B  {vh})  {}}) / 2"
    have t_alt: "t = real (card {e  E. e  V - (R  {vh}) - B  {}}) / 2"
      unfolding t_def by (intro arg_cong[where f="λx. real (card x) /2"]) auto

    have "cond_exp R (B{vh})-card ?NR = real(card {eE. eR{}e(B{vh}){}})-(card ?NR)+t"
      using Cons(2-6) unfolding t_def by (subst cond_exp_cut_size) auto
    also have "... = real(card {eE. eR{}e(B{vh}){}}-card ?NR)+t"
      using fin_edges by (intro of_nat_diff[symmetric] arg_cong2[where f="(+)"] card_mono) auto
    also have "... = real(card ({eE. eR{}e(B{vh}){}}- ?NR))+t"
      using fin_edges by (intro arg_cong[where f="(λx. real x+t)"] card_Diff_subset[symmetric]) auto
    also have "... = real(card ({eE. e(R{vh}){}eB{}}- ?NB))+t"
      by (intro arg_cong[where f="(λx. real (card x) + t)"] ) auto
    also have "... = real(card {eE. e(R{vh}){}eB{}}-card ?NB)+t"
      using fin_edges by (intro arg_cong[where f="(λx. real x+t)"] card_Diff_subset) auto
    also have "... = real(card {eE. e(R{vh}){}eB{}})-(card ?NB)+t"
      using fin_edges by (intro of_nat_diff arg_cong2[where f="(+)"] card_mono) auto
    also have "... = cond_exp (R{vh}) B -card ?NB"
      using Cons(2-6) unfolding t_alt by (subst cond_exp_cut_size) auto
    finally have d:"cond_exp R (B{vh}) - cond_exp (R{vh}) B = real (card ?NR) - card ?NB" 
      by (simp add:ac_simps)

    have split: "cond_exp R B = (cond_exp (R  {vh}) B + cond_exp R (B  {vh})) / 2"
      using Cons(2-6) by (intro cond_exp_split) auto

    have dvt: "distinct vt" using Cons(2) by simp
    show ?case 
    proof (cases "card ?NR  card ?NB")
      case True
      have 0:"set vtR={}" "set vt(B{vh})={}" "R(B{vh})={}" "{set vt,R,B{vh}}=V"
        using Cons(2-6) by auto

      have "cond_exp R B  cond_exp R (B  {vh})" unfolding split using d True by simp
      thus ?thesis using True Cons(1)[OF dvt 0] by simp
    next
      case False
      have 0:"set vt(R{vh})={}" "set vtB={}" "(R{vh})B={}" "{set vt,R{vh},B}=V"
        using Cons(2-6) by auto
      have "cond_exp R B  cond_exp (R  {vh}) B" unfolding split using d False by simp
      thus ?thesis using False Cons(1)[OF dvt 0] by simp
    qed
  qed
  moreover have "e  V  {}" if "e  E" for e 
    using Int_absorb2[OF wellformed[OF that]] two_edges[OF that] by auto
  hence "{e  E. e  V  {}} = E"  by auto
  hence "cond_exp {} {} = graph_size /2" by (subst cond_exp_cut_size) auto
  ultimately show "C  V" "2 * cut_size C  card E"
    unfolding C_def R_def B_def using permutations_of_setD[OF assms(1)] by auto
qed

end

end