Theory Negative_Association.Negative_Association_FKG_Inequality

section ‹The FKG inequality›

text ‹The FKG inequality~\cite{fortuin1971} is a generalization of Chebyshev's less known other
inequality. It is sometimes referred to as Chebyshev's sum inequality. Although there is a also
a continuous version, which can be stated as:

\[
  E [ f g ] \geq E[f] E [g]
\]

where $f$, $g$ are continuous simultaneously monotone or simultaneously antimonotone
functions on the Lebesgue probability space $[a,b] \subseteq \mathbb R$. ($E f$ denotes the
expectation of the function.)

Note that the inequality is also true for totally ordered discrete probability spaces, for example:
$\{1,\ldots,n\}$ with uniform probabilities.

The FKG inequality is essentially a generalization of the above to not necessarily totally ordered
spaces, but finite distributive lattices.

The proof follows the derivation in the book by Alon and Spencer~\cite[Ch. 6]{alon2000}.›

theory Negative_Association_FKG_Inequality
  imports
    Negative_Association_Util
    Birkhoff_Finite_Distributive_Lattices.Birkhoff_Finite_Distributive_Lattices
begin

theorem four_functions_helper:
  fixes φ :: "nat  'a set  real"
  assumes "finite I"
  assumes "i. i  {0..3}  φ i  Pow I  {0..}"
  assumes "A B. A  I  B  I  φ 0 A * φ 1 B  φ 2 (A  B) * φ 3 (A  B)"
  shows "(APow I. φ 0 A)*(BPow I. φ 1 B)  (CPow I. φ 2 C)*(DPow I. φ 3 D)"
  using assms
proof (induction I arbitrary:φ rule:finite_induct)
  case empty
  then show ?case using empty by auto
next
  case (insert x I)
  define φ' where "φ' i A = φ i A + φ i (A  {x})" for i A

  have a:"(APow (insert x I). φ i A) = (APow I. φ' i A)" (is "?L1 = ?R1") for i
  proof -
    have "?L1 = (APow I. φ i A) + (Ainsert x ` Pow I. φ i A)"
      using insert(1,2) unfolding Pow_insert by (intro sum.union_disjoint) auto
    also have " = (APow I. φ i A) + (APow I. φ i (insert x A))"
      using insert(2) by (subst sum.reindex) (auto intro!:inj_onI)
    also have " = ?R1" using insert(1) unfolding φ'_def sum.distrib by simp
    finally show ?thesis by simp
  qed

  have φ_int: "φ 0 A * φ 1 B  φ 2 C * φ 3 D"
    if "C = A  B" "D = A  B" "A  insert x I" "B  insert x I" for A B C D
    using that insert(5) by auto

  have φ_nonneg: "φ i A  0" if "A  insert x I" "i  {0..3}" for i A
    using that insert(4) by auto

  have "φ' 0 A * φ' 1 B  φ' 2 (A  B) * φ' 3 (A  B)" if "A  I" "B  I" for A B
  proof -
    define a0 a1 where a: "a0 = φ 0 A" "a1 = φ 0 (insert x A)"
    define b0 b1 where b: "b0 = φ 1 B" "b1 = φ 1 (insert x B)"
    define c0 c1 where c: "c0 = φ 2 (A  B)" "c1 = φ 2 (insert x (A  B))"
    define d0 d1 where d: "d0 = φ 3 (A  B)" "d1 = φ 3 (insert x (A  B))"

    have 0:"a0 * b0  c0 * d0" using that unfolding a b c d by (intro φ_int) auto
    have 1:"a0 * b1  c1 * d0" using that insert(2) unfolding a b c d by (intro φ_int) auto
    have 2:"a1 * b0  c1 * d0" using that insert(2) unfolding a b c d by (intro φ_int) auto
    have 3:"a1 * b1  c1 * d1" using that insert(2) unfolding a b c d by (intro φ_int) auto
    have 4:"a0  0" "a1  0" "b0  0" "b1  0" using that unfolding a b by (auto intro!: φ_nonneg)
    have 5:"c0  0" "c1  0" "d0  0" "d1  0" using that unfolding c d by (auto intro!: φ_nonneg)

    consider  (a) "c1 = 0" | (b) "d0 = 0" | (c) "c1 > 0" "d0 > 0" using 4 5 by argo

    then have "(a0 + a1) * (b0 + b1)  (c0 + c1) * (d0 + d1)"
    proof (cases)
      case a
      hence "a0 * b1 = 0" "a1 * b0 = 0" "a1 * b1 = 0"
        using 1 2 3 by (intro order_antisym mult_nonneg_nonneg 4 5;simp_all)+
      then show ?thesis unfolding distrib_left distrib_right
        using 0 4 5 by (metis add_mono mult_nonneg_nonneg)
    next
      case b
      hence "a0 * b0 = 0" "a0 * b1 = 0" "a1 * b0 = 0"
        using 0 1 2 by (intro order_antisym mult_nonneg_nonneg 4 5;simp_all)+
      then show ?thesis unfolding distrib_left distrib_right
        using 3 4 5 by (metis add_mono mult_nonneg_nonneg)
    next
      case c
      have "0  (c1*d0-a0*b1) * (c1*d0 - a1*b0)"
        using 1 2 by (intro mult_nonneg_nonneg) auto
      hence "(a0 + a1) * (b0 + b1)*d0*c1  (a0*b0 + c1*d0) * (c1*d0 + a1*b1)"
        by (simp add:algebra_simps)
      hence "(a0 + a1) * (b0 + b1)  ((a0*b0)/d0 + c1) * (d0 + (a1*b1)/c1)"
        using c 4 5 by (simp add:field_simps)
      also have "  (c0 + c1) * (d0 + d1)"
        using 0 3 c 4 5 by (intro mult_mono add_mono order.refl) (simp add:field_simps)+
      finally show ?thesis by simp
    qed

    thus ?thesis unfolding φ'_def a b c d by auto
  qed

  moreover have "φ' i  Pow I  {0..}" if "i  {0..3}" for i
    using insert(4)[OF that] unfolding φ'_def by (auto intro!:add_nonneg_nonneg)
  ultimately show ?case unfolding a by (intro insert(3)) auto
qed

text ‹The following is the Ahlswede-Daykin inequality~\cite{ahlswede1978} also referred to by
Alon and Spencer as the four functions theorem~\cite[Th. 6.1.1]{alon2000}.›

theorem four_functions:
  fixes α β γ δ :: "'a set  real"
  assumes "finite I"
  assumes "α  Pow I  {0..}" "β  Pow I  {0..}" "γ  Pow I  {0..}" "δ  Pow I  {0..}"
  assumes "A B. A  I  B  I  α A * β B  γ (A  B) * δ (A  B)"
  assumes "M  Pow I" "N  Pow I"
  shows "(AM. α A)*(BN. β B)  (C| AM. BN. C=AB. γ C)*(D| AM. BN. D=AB. δ D)"
    (is "?L  ?R")
proof -
  define α' where "α' A = (if A  M then α A else 0)" for A
  define β' where "β' B = (if B  N then β B else 0)" for B
  define γ' where "γ' C = (if AM. BN. C=AB then γ C else 0)" for C
  define δ' where "δ' D = (if AM. BN. D=AB then δ D else 0)" for D

  define φ where "φ i = [α',β',γ',δ'] ! i" for i

  have "list_all (λi. φ i  Pow I  {0..}) [0..<4]"
    unfolding φ_def  α'_def β'_def γ'_def δ'_def using assms(2-5)
    by (auto simp add:numeral_eq_Suc)
  hence φ_nonneg: "φ i  Pow I  {0..}" if "i  {0..3}" for i
    unfolding list.pred_set using that by auto

  have 0: "φ 0 A * φ 1 B  φ 2 (A  B) * φ 3 (A  B)" (is "?L1  ?R1") if "A  I" "B  I" for A B
  proof (cases "A  M  B  N")
    case True
    have "?L1 = α A * β B" using True unfolding φ_def α'_def β'_def by auto
    also have "  γ (A  B) * δ (A  B)" by(intro that assms(6))
    also have " = ?R1" using True unfolding γ'_def δ'_def φ_def by auto
    finally show ?thesis by simp
  next
    case False
    hence "?L1 = 0" unfolding α'_def β'_def φ_def by auto
    also have "  ?R1" using φ_nonneg[of "2"] φ_nonneg[of "3"] that
      by (intro mult_nonneg_nonneg) auto
    finally show ?thesis by simp
  qed

  have fin_pow: "finite (Pow I)" using assms(1) by simp

  have "?L = (A  Pow I. α' A) * (B  Pow I. β' B)"
    unfolding α'_def β'_def using assms(1,7,8) by (simp add: sum.If_cases Int_absorb1)
  also have " = (A  Pow I. φ 0 A) * (A  Pow I. φ 1 A)" unfolding φ_def by simp
  also have "  (A  Pow I. φ 2 A) * (A  Pow I. φ 3 A)"
    by (intro four_functions_helper assms(1) φ_nonneg 0) auto
  also have " = (A  Pow I. γ' A) * (B  Pow I. δ' B)" unfolding φ_def by simp
  also have " = ?R"
    unfolding γ'_def δ'_def sum.If_cases[OF fin_pow] sum.neutral_const add_0_right using assms(7,8)
    by (intro arg_cong2[where f="(*)"] sum.cong refl) auto
  finally show ?thesis by simp
qed

text ‹Using Birkhoff's Representation
Theorem~\cite{birkhoff1967,Birkhoff_Finite_Distributive_Lattices-AFP} it is possible to generalize
the previous to finite distributive lattices~\cite[Cor. 6.1.2]{alon2000}.›
lemma four_functions_in_lattice:
  fixes α β γ δ :: "'a :: finite_distrib_lattice   real"
  assumes "range α  {0..}" "range β  {0..}" "range γ  {0..}" "range δ  {0..}"
  assumes "x y. α x * β y  γ (x  y) * δ (x  y)"
  shows "(xM. α x)*(yN. β y)  (c| xM. yN. c=xy. γ c)*(d| xM. yN. d=xy. δ d)"
    (is "?L  ?R")
proof -
  let ?e = "λx::'a.  x "
  let ?f = "the_inv ?e"

  have ran_e: "range ?e = 𝒪𝒥" by (rule bij_betw_imp_surj_on[OF birkhoffs_theorem])
  have inj_e: "inj ?e" by (rule bij_betw_imp_inj_on[OF birkhoffs_theorem])

  define conv :: "('a  real)  'a set  real"
    where "conv φ I = (if I  𝒪𝒥 then φ(?f I) else 0)" for φ I

  define α' β' γ' δ' where prime_def:"α' = conv α" "β' = conv β" "γ' = conv γ" "δ' = conv δ"

  have 1:"conv φ  Pow 𝒥  {0..}" if "range φ  {(0::real)..}" for φ
    using that unfolding conv_def by (intro Pi_I) auto

  have 0:"α' A * β' B  γ' (A  B) * δ' (A  B)" if "A  𝒥" "B  𝒥" for A B
  proof (cases "A  𝒪𝒥  B  𝒪𝒥")
    case True
    define x y where xy: "x = ?f A" "y = ?f B"

    have p0:"?e (x  y) = A  B"
      using True ran_e unfolding join_irreducibles_join_homomorphism xy
      by (subst (1 2) f_the_inv_into_f[OF inj_e]) auto
    hence p1:"A  B  𝒪𝒥" using ran_e by auto

    have p2:"?e (x  y) = A  B"
      using True ran_e unfolding join_irreducibles_meet_homomorphism xy
      by (subst (1 2) f_the_inv_into_f[OF inj_e]) auto
    hence p3:"A  B  𝒪𝒥" using ran_e by auto

    have "α' A * β' B = α (?f A) * β (?f B) " using True unfolding prime_def conv_def by simp
    also have "  γ (?f A  ?f B) * δ (?f A  ?f B)" by (intro assms(5))
    also have " = γ (x  y) * δ (x  y)" unfolding xy by simp
    also have " = γ (?f (?e (x  y))) * δ (?f (?e (x  y)))" by (simp add: the_inv_f_f[OF inj_e])
    also have " = γ (?f (A  B)) * δ (?f (A  B))" unfolding p0 p2 by auto
    also have " = γ' (A  B) * δ' (A  B)"  using p1 p3 unfolding prime_def conv_def by auto
    finally show ?thesis by simp
  next
    case False
    hence "α' A * β' B = 0" unfolding prime_def conv_def by simp
    also have "  γ' (A  B) * δ' (A  B)" unfolding prime_def
      using 1 that assms(3,4) by (intro mult_nonneg_nonneg) auto
    finally show ?thesis by simp
  qed

  define M' where "M' = (λx.  x ) ` M"
  define N' where "N' = (λx.  x ) ` N"

  have ran1: "M'  𝒪𝒥" "N'  𝒪𝒥" unfolding M'_def N'_def using ran_e by auto
  hence ran2: "M'  Pow 𝒥" "N'  Pow 𝒥" unfolding down_irreducibles_def by auto

  have "?f  ?e ` S  S" for S using inj_e by (simp add: Pi_iff the_inv_f_f)
  hence bij_betw: "bij_betw ?f (?e ` S) S" for S :: "'a set"
    by (intro bij_betwI[where g="?e"] the_inv_f_f f_the_inv_into_f inj_e) auto

  have a: "{C. AM'. BN'. C = A  B} = ?e ` {c. xM. yN. c=xy}"
    unfolding M'_def N'_def Set.bex_simps join_irreducibles_join_homomorphism[symmetric] by auto
  have b: "{D. AM'. BN'. D = A  B} = ?e ` {c. xM. yN. c=xy}"
    unfolding M'_def N'_def Set.bex_simps join_irreducibles_meet_homomorphism[symmetric] by auto

  have M'_N'_un_ran: "{C. AM'. BN'. C = A  B}  𝒪𝒥"
    unfolding a using ran_e by auto
  have M'_N'_int_ran: "{C. AM'. BN'. C = A  B}  𝒪𝒥"
    unfolding b using ran_e by auto

  have "?L =(AM'. α (?f A)) * (AN'. β (?f A))"
    unfolding M'_def N'_def
    by (intro arg_cong2[where f="(*)"] sum.reindex_bij_betw[symmetric] bij_betw)
  also have " = (AM'. α' A)*(AN'. β' A)"
    unfolding prime_def conv_def using ran1 by (intro arg_cong2[where f="(*)"] sum.cong refl) auto
  also have "  (C | AM'. BN'. C = A  B. γ' C) * (D | AM'. BN'. D = A  B. δ' D)"
    using ran2 by (intro four_functions[where I="𝒥"] 0) (auto intro!:1 assms simp:prime_def)
  also have " = (C|AM'. BN'. C=AB. γ(?f C))*(D|AM'.BN'. D=AB. δ(?f D))"
    using M'_N'_un_ran M'_N'_int_ran unfolding prime_def conv_def
    by (intro arg_cong2[where f="(*)"] sum.cong refl) auto
  also have " = ?R"
    unfolding a b by (intro arg_cong2[where f="(*)"] sum.reindex_bij_betw bij_betw)
  finally show ?thesis by simp
qed

theorem fkg_inequality:
  fixes μ :: "'a :: finite_distrib_lattice  real"
  assumes "range μ  {0..}" "range f  {0..}" "range g  {0..}"
  assumes "x y. μ x * μ y  μ (x  y) * μ (x  y)"
  assumes "mono f" "mono g"
  shows "(xUNIV. μ x*f x) * (xUNIV. μ x*g x)  (xUNIV. μ x*f x*g x) * sum μ UNIV"
    (is "?L  ?R")
proof -
  define α where "α x = μ x * f x" for x
  define β where "β x = μ x * g x" for x
  define γ where "γ x = μ x * f x * g x" for x
  define δ where "δ x = μ x" for x

  have 0:"f x  0" if "range f  {0..}" for f :: "'a  real" and x
    using that by auto

  note μfg_nonneg = 0[OF assms(1)] 0[OF assms(2)] 0[OF assms(3)]

  have 1:"α x * β y  γ (x  y) * δ (x  y)" (is "?L1  ?R1") for x y
  proof -
    have "?L1 = (μ x * μ y) * (f x * g y)" unfolding α_def β_def by (simp add:ac_simps)
    also have "  (μ (x  y) * μ (x  y)) * (f x * g y)"
      using assms(2,3) by (intro mult_right_mono assms(4) mult_nonneg_nonneg) auto
    also have "  (μ (x  y) * μ (x  y)) * (f (x  y) * g (x  y))"
      using μfg_nonneg
      by (intro mult_left_mono mult_mono monoD[OF assms(5)] monoD[OF assms(6)] mult_nonneg_nonneg)
       simp_all
    also have " = ?R1" unfolding γ_def δ_def by simp
    finally show ?thesis by simp
  qed

  have "?L = (xUNIV. α x) * (yUNIV. β y)" unfolding α_def β_def by simp
  also have "  (c| xUNIV. yUNIV. c=xy. γ c)*(d| xUNIV. yUNIV. d=xy. δ d)"
    using μfg_nonneg by (intro four_functions_in_lattice 1) (auto simp:α_def β_def γ_def δ_def)
  also have " = (xUNIV. γ x) * (xUNIV. δ x)"
    using sup.idem[where 'a='a] inf.idem[where 'a='a]
    by (intro arg_cong2[where f="(*)"] sum.cong refl UNIV_eq_I[symmetric] CollectI) (metis UNIV_I)+
  also have " = ?R" unfolding γ_def δ_def by simp
  finally show ?thesis by simp
qed

theorem fkg_inequality_gen:
  fixes μ :: "'a :: finite_distrib_lattice  real"
  assumes "range μ  {0..}"
  assumes "x y. μ x * μ y  μ (x  y) * μ (x  y)"
  assumes "monotone (≤) (≤≥τ) f" "monotone (≤) (≤≥σ) g"
  shows "(xUNIV. μ x*f x) * (xUNIV. μ x*g x) ≤≥τ*σ(xUNIV. μ x*f x*g x) * sum μ UNIV"
    (is "?L ≤≥?x?R")
proof -
  define a where "a = max (MAX x. -f x*(±τ)) (MAX x. -g x*(±σ))"

  define f' where "f' x = a + f x*(±τ)" for x
  define g' where "g' x = a + g x*(±σ)" for x

  have f'_mono: "mono f'" unfolding f'_def using monotoneD[OF assms(3)]
    by (intro monoI add_mono order.refl)  (cases τ, auto simp:comp_def ac_simps)

  have g'_mono: "mono g'" unfolding g'_def using monotoneD[OF assms(4)]
    by (intro monoI add_mono order.refl) (cases σ, auto simp:comp_def ac_simps)

  have f'_nonneg: "f' x  0" for x
    unfolding f'_def a_def max_add_distrib_left
    by (intro max.coboundedI1) (auto intro!:Max.coboundedI simp: algebra_simps real_0_le_add_iff)

  have g'_nonneg: "g' x  0" for x
    unfolding g'_def a_def max_add_distrib_left
    by (intro max.coboundedI2) (auto intro!:Max.coboundedI simp: algebra_simps real_0_le_add_iff)

  let ?M = "(x  UNIV. μ x)"
  let ?sum = "(λf. (xUNIV. μ x * f x))"

  have "(±τ*σ) * ?L = ?sum (λx. f x*(±τ)) * ?sum (λx. g x*(±σ))"
    by (simp add:ac_simps sum_distrib_left[symmetric] dir_mult_hom del:rel_dir_mult)
  also have " = (?sum (λx. (f x*(±τ)+a))-?M*a) * (?sum (λx. (g x*(±σ)+a))-?M*a)"
    by (simp add:algebra_simps sum.distrib sum_distrib_left)
  also have " = (?sum f')*(?sum g') - ?M*a*?sum f'- ?M*a*?sum g' + ?M*?M*a*a"
    unfolding f'_def g'_def by (simp add:algebra_simps)
  also have "  ((xUNIV. μ x*f' x*g' x)*?M) - ?M*a*?sum f'- ?M*a*?sum g' + ?M*?M*a*a"
    using f'_nonneg g'_nonneg
    by (intro diff_mono add_mono order.refl fkg_inequality assms(1,2) f'_mono g'_mono) auto
  also have " = ?sum (λx. (f x*(±τ))*(g x*(±σ)))*?M"
    unfolding f'_def g'_def by (simp add:algebra_simps sum.distrib sum_distrib_left[symmetric])
  also have " = (±τ*σ) * ?R"
    by (simp add:ac_simps sum.distrib sum_distrib_left[symmetric] dir_mult_hom del:rel_dir_mult)
  finally have " (±τ*σ) * ?L  (±τ*σ) * ?R" by simp
  thus ?thesis by (cases "τ*σ", auto)
qed

theorem fkg_inequality_pmf:
  fixes M :: "('a :: finite_distrib_lattice) pmf"
  fixes f g :: "'a  real"
  assumes "x y. pmf M x * pmf M y  pmf M (x  y) * pmf M (x  y)"
  assumes "monotone (≤) (≤≥τ) f" "monotone (≤) (≤≥σ) g"
  shows "(x. f x M) * (x. g x M) ≤≥τ * σ(x. f x * g x M)"
    (is "?L ≤≥_?R")
proof -
  have 0:"?L = (aUNIV. pmf M a * f a) * (aUNIV. pmf M a * g a)"
    by (subst (1 2) integral_measure_pmf_real[where A="UNIV"]) (auto simp:ac_simps)
  have "?R = ?R * (x. 1 M)" by simp
  also have " = (aUNIV. pmf M a*f a*g a) * sum (pmf M) UNIV"
    by (subst (1 2) integral_measure_pmf_real[where A="UNIV"])  (auto simp:ac_simps)
  finally have 1: "?R = (aUNIV. pmf M a*f a*g a) * sum (pmf M) UNIV" by simp
  thus ?thesis unfolding 0 1
    by (intro fkg_inequality_gen assms) auto
qed

end