Theory Bennett_Inequality

section ‹Bennett's Inequality›

text ‹In this section we verify Bennett's inequality~\cite{bennett1962} and a (weak) version of
Bernstein's inequality as a corollary. Both inequalities give concentration bounds for sums of
independent random variables. The statement and proofs follow a summary paper by
Boucheron et al.~\cite{DBLP:conf/ac/BoucheronLB03}.›

theory Bennett_Inequality
  imports Concentration_Inequalities_Preliminary
begin

context prob_space
begin

(* Restating Chernoff inequality for independent variables *)
lemma indep_vars_Chernoff_ineq_ge:
  assumes I: "finite I"
  assumes ind: "indep_vars (λ _. borel) X I"
  assumes sge: "s  0"
  assumes int: "i. i  I  integrable M (λx. exp (s * X i x))"
  shows "prob {x  space M. (i I. X i x - expectation (X i))  t} 
    exp (-s*t) *
    (iI. expectation (λx. exp(s * (X i x - expectation (X i)))))"
proof (cases "s = 0")
  case [simp]: True
  thus ?thesis
    by (simp add: prob_space)
next
  case False
  then have s: "s > 0" using sge by auto

  have [measurable]: "i. i  I  random_variable borel (X i)"
    using ind unfolding indep_vars_def by blast

  have indep1: "indep_vars (λ_. borel)
     (λi ω. exp (s * (X i ω - expectation (X i)))) I"
    apply (intro indep_vars_compose[OF ind, unfolded o_def])
    by auto

  define S where "S = (λx. (i I. X i x - expectation (X i)))"

  have int1: "i. i  I 
         integrable M (λω. exp (s * (X i ω - expectation (X i))))"
    by (auto simp add: algebra_simps exp_diff int)

  have eprod: "x. exp (s * S x) = (iI. exp(s * (X i x - expectation (X i))))"
     unfolding S_def
     by (simp add: assms(1) exp_sum vector_space_over_itself.scale_sum_right)

  from indep_vars_integrable[OF I indep1 int1]
  have intS: "integrable M (λx. exp (s * S x))"
    unfolding eprod by auto

  then have si: "set_integrable M (space M) (λx. exp (s * S x))"
    unfolding set_integrable_def
    apply (intro integrable_mult_indicator)
    by auto

  from Chernoff_ineq_ge[OF s si]
  have "prob {x  space M. S x  t}  exp (- s * t) * (xspace M. exp (s * S x)M)"
    by auto

  also have "(xspace M. exp (s * S x)M) = expectation (λx. exp(s * S x))"
     unfolding set_integral_space[OF intS] by auto

  also have "... = expectation (λx. iI. exp(s * (X i x - expectation (X i))))"
     unfolding S_def
     by (simp add: assms(1) exp_sum vector_space_over_itself.scale_sum_right)
  also have "... = (iI. expectation (λx. exp(s * (X i x - expectation (X i)))))"
     apply (intro indep_vars_lebesgue_integral[OF I indep1 int1]) .
  finally show ?thesis
    unfolding S_def
    by auto
qed

definition bennett_h::"real  real"
  where "bennett_h u = (1 + u) * ln (1 + u) - u"

lemma exp_sub_two_terms_eq:
  fixes x :: real
  shows "exp x - x - 1 = (n. x^(n+2) / fact (n+2))"
    "summable (λn. x^(n+2) / fact (n+2))"
proof -
  have "(i<2. inverse (fact i) * x ^ i) = 1 + x"
    by (simp add:numeral_eq_Suc)
  thus "exp x - x - 1 = (n. x^(n+2) / fact (n+2))"
    unfolding exp_def
    apply (subst suminf_split_initial_segment[where k = 2])
    by (auto simp add: summable_exp divide_inverse_commute)
  have "summable (λn. x^n / fact n)"
    by (simp add: divide_inverse_commute summable_exp)
  then have "summable (λn. x^(Suc (Suc n)) / fact (Suc (Suc n)))"
    apply (subst summable_Suc_iff)
    apply (subst summable_Suc_iff)
    by auto
  thus "summable (λn. x^(n+2) / fact (n+2))" by auto
qed

lemma psi_mono:
  defines "f  (λx. (exp x - x - 1) - x^2 / 2)"
  assumes xy: "a  (b::real)"
  shows "f a  f b"
proof -
  have 1: "(f has_real_derivative (exp x - x - 1)) (at x)" for x
    unfolding f_def
    by (auto intro!: derivative_eq_intros)

  have 2: "x. x  {a..b}  0  exp x - x - 1"
    by (smt (verit) exp_ge_add_one_self)

  from deriv_nonneg_imp_mono[OF 1 2 xy]
  show ?thesis by auto
qed

(* TODO: not sure if this holds for y < 0 too *)
lemma psi_inequality:
  assumes le: "x  (y::real)" "y  0"
  shows "y^2 * (exp x - x - 1)  x^2 * (exp y - y - 1)"
proof -

  have x: "exp x - x - 1 = (n. (x^(n+2) / fact (n+2)))"
    "summable (λn. x^(n+2) / fact (n+2))"
    using exp_sub_two_terms_eq .

  have y: "exp y - y - 1 = (n. (y^(n+2) / fact (n+2)))"
    "summable (λn. y^(n+2) / fact (n+2))"
    using exp_sub_two_terms_eq .

  (* Simplify the expressions in the inequality *)
  have l:"y^2 * (exp x - x - 1) = (n. y^2 * (x^(n+2) / fact (n+2)))"
    using x
    apply (subst suminf_mult)
    by auto
  have ls: "summable (λn. y^2 * (x^(n+2) / fact (n+2)))"
    by (intro summable_mult[OF x(2)])

  have r:"x^2 * (exp y - y - 1) = (n. x^2 * (y^(n+2) / fact (n+2)))"
    using y
    apply (subst suminf_mult)
    by auto
  have rs: "summable (λn. x^2 * (y^(n+2) / fact (n+2)))"
    by (intro summable_mult[OF y(2)])

  have "¦x¦  ¦y¦  ¦y¦ < ¦x¦" by auto
  moreover {
    assume "¦x¦  ¦y¦"
    then have "x^ n  y ^n" for n
    by (smt (verit, ccfv_threshold) bot_nat_0.not_eq_extremum le power_0 real_root_less_mono real_root_power_cancel root_abs_power)
    then have "(x^2 * y^2) * x^n  (x^2 * y^2) * y^n" for n
      by (simp add: mult_left_mono)
    then have "y2 * (x ^ (n + 2))  x2 * (y ^ (n + 2))" for n
      by (metis (full_types) ab_semigroup_mult_class.mult_ac(1) mult.commute power_add)
    then have "y2 * (x ^ (n + 2)) / fact (n+2) x2 * (y ^ (n + 2)) / fact (n+2)" for n
      by (meson divide_right_mono fact_ge_zero)
    then have "(n. y^2 * (x^(n+2) / fact (n+2)))  (n. x^2 * (y^(n+2) / fact (n+2)))"
      apply (intro suminf_le[OF _ ls rs])
      by auto
    then have "y^2 * (exp x - x - 1)  x^2 * (exp y - y - 1)"
    using l r by presburger
  }
  moreover {
    assume ineq: "¦y¦ < ¦x¦"

    from psi_mono[OF assms(1)]
    have "(exp x - x - 1) - x^2 /2  (exp y - y - 1) - y^2/2" .

    then have "y^2 * ((exp x - x - 1) - x^2 /2)  x^2 * ((exp y - y - 1) - y^2/2)"
      by (smt (verit, best) ineq diff_divide_distrib exp_lower_Taylor_quadratic le(1) le(2) mult_nonneg_nonneg one_less_exp_iff power_zero_numeral prob_space.psi_mono prob_space_completion right_diff_distrib zero_le_power2)

    then have "y^2 * (exp x - x - 1)  x^2 * (exp y - y - 1)"
      by (simp add: mult.commute right_diff_distrib)
  }
  ultimately show ?thesis by auto
qed

(* Helper lemma, starting with normalized variables *)
lemma bennett_inequality_1:
  assumes I: "finite I"
  assumes ind: "indep_vars (λ _. borel) X I"
  assumes intsq: "i. i  I  integrable M (λx. (X i x)^2)"
  assumes bnd: "i. i  I  AE x in M. X i x  1"
  assumes t: "t  0"
  defines "V  (i  I. expectation(λx. X i x^2))"
  shows "prob {x  space M. (i  I. X i x - expectation (X i))  t} 
    exp (-V * bennett_h (t / V))"
proof (cases "V = 0")
  case True
  then show ?thesis
    by auto
next
  case f: False
  have "V  0"
    unfolding V_def
    apply (intro sum_nonneg  integral_nonneg_AE)
    by auto
  then have Vpos: "V > 0" using f by auto

  define l :: real where "l = ln(1 + t / V)"
  then have l: "l  0"
    using t Vpos by auto
  have rv[measurable]: "i. i  I  random_variable borel (X i)"
    using ind unfolding indep_vars_def by blast

  define ψ where "ψ = (λx::real. exp(x) - x - 1)"

  have rw: "exp y = 1 + y + ψ y" for y
    unfolding ψ_def by auto

  have ebnd: "i. i  I 
         AE x in M. exp (l * X i x)  exp l"
     apply (drule bnd)
     using l by (auto simp add: mult_left_le)

  (* integrability *)
  have int: "i. i  I  integrable M (λx. (X i x))"
  using rv intsq square_integrable_imp_integrable by blast

  have intl: "i. i  I  integrable M (λx. (l * X i x))"
    using int by blast

  have intexpl: "i. i  I  integrable M (λx. exp (l * X i x))"
    apply (intro integrable_const_bound[where B = "exp l"])
    using ebnd by auto

  have intpsi: "i. i  I  integrable M (λx. ψ (l * X i x))"
    unfolding ψ_def
    using intl intexpl by auto

  have **: "i. i  I 
    expectation (λx. ψ (l * X i x))  ψ l * expectation (λx. (X i x)^2)"
  proof -
    fix i assume i: "i  I"
    then have "AE x in M. l * X i x  l"
      using ebnd by auto
    then have "AE x in M. l^2 * ψ (l * X i x)  (l * X i x)^2 * ψ l"
      using psi_inequality[OF _ l] unfolding ψ_def
      by auto
    then have "AE x in M. l^2 * ψ (l * X i x)  l^2 * (ψ l * (X i x)^2)"
      by (auto simp add: field_simps)
    then have "AE x in M. ψ (l * X i x)  ψ l * (X i x)^2 "
      by (smt (verit, best) AE_cong ψ_def exp_eq_one_iff mult_cancel_left mult_eq_0_iff mult_left_mono zero_eq_power2 zero_le_power2)
    then have "AE x in M. 0  ψ l * (X i x)^2 - ψ (l * X i x) "
      by auto
    then have "expectation (λx. ψ l * (X i x)^2 + (- ψ (l * X i x)))  0"
      by (simp add: integral_nonneg_AE)
    also have "expectation (λx. ψ l * (X i x)^2 + (- ψ (l * X i x))) =
        ψ l * expectation (λx. (X i x)^2) - expectation (λx. ψ (l * X i x))"
      apply (subst Bochner_Integration.integral_add)
      using intpsi[OF i] intsq[OF i] by auto
    finally show "expectation (λx. ψ (l * X i x))  ψ l * expectation (λx. (X i x)^2)"
      by auto
  qed

  (* Analyzing the expectation *)
  then have *: "i. i  I 
      expectation (λx. exp (l * X i x)) 
      exp (l * expectation (X i)) * exp (ψ l * expectation (λx. X i x^2))"
  proof -
    fix i
    assume iI: "i  I"
    have "expectation (λx. exp (l * X i x)) =
      1 + l * expectation (λx. X i x) +
       expectation (λx. ψ (l * X i x))"
      unfolding rw
      apply (subst Bochner_Integration.integral_add)
      using iI intl intpsi apply auto[2]
      apply (subst Bochner_Integration.integral_add)
      using intl iI prob_space by auto
    also have "... = l * expectation (X i) + 1 + expectation (λx. ψ (l * X i x))"
      by auto
    also have "...  1 + l * expectation (X i) + ψ l * expectation (λx. X i x^2)"
      using **[OF iI] by auto
    also have "...  exp (l * expectation (X i)) * exp (ψ l  * expectation (λx. X i x^2))"
      by (simp add: is_num_normalize(1) mult_exp_exp)
    finally show "expectation (λx. exp (l * X i x)) 
      exp (l * expectation (X i)) * exp (ψ l  * expectation (λx. X i x^2))" .
  qed

  have "(iI. expectation (λx. exp (l * (X i x)))) 
    (iI. exp (l * expectation (X i)) * exp (ψ l  * expectation (λx. X i x^2)))"
    by (auto intro!: prod_mono simp add: *)
  also have "... =
    (iI. exp (l * expectation (X i))) * (iI. exp (ψ l  * expectation (λx. X i x^2)))"
    by (auto simp add: prod.distrib)
  finally have **:
    "(iI. expectation (λx. exp (l * (X i x)))) 
    (iI. exp (l * expectation (X i))) * exp (ψ l * V)"
    by (simp add: V_def I exp_sum sum_distrib_left)

  from indep_vars_Chernoff_ineq_ge[OF I ind l intexpl]
  have "prob {x  space M. (i  I. X i x - expectation (X i))  t} 
    exp (- l * t) *
     (iI. expectation (λx. exp (l * (X i x - expectation (X i)))))"
     by auto
  also have "(iI. expectation (λx. exp (l * (X i x - expectation (X i))))) =
    (iI. expectation (λx. exp (l * (X i x))) * exp (- l * expectation (X i)))"
    by (auto intro!: prod.cong simp add: field_simps exp_diff exp_minus_inverse)
  also have "... =
     (iI. exp (- l * expectation (X i))) * (iI. expectation (λx. exp (l * (X i x))))"
    by (auto simp add: prod.distrib)
  also have "... 
     (iI. exp (- l * expectation (X i))) * ((iI. exp (l * expectation (X i))) * exp (ψ l * V))"
    apply (intro mult_left_mono[OF **])
    by (meson exp_ge_zero prod_nonneg)
  also have "... = exp (ψ l * V)"
    apply (simp add: prod.distrib [symmetric])
    by (smt (verit, ccfv_threshold) exp_minus_inverse prod.not_neutral_contains_not_neutral)
  finally have "
    prob {x  space M. (i  I. X i x - expectation (X i))  t} 
    exp (ψ l * V - l * t)"
    by (simp add:mult_exp_exp)
  also have "ψ l * V - l * t = -V * bennett_h (t / V)"
    unfolding ψ_def l_def bennett_h_def
    apply (subst exp_ln)
    subgoal by (smt (verit) Vpos divide_nonneg_nonneg t)
    by (auto simp add: algebra_simps)
  finally show ?thesis .
qed

lemma real_AE_le_sum:
  assumes "i. i  I  AE x in M. f i x  (g i x::real)"
  shows "AE x in M. (iI. f i x)  (iI. g i x)"
proof (cases)
  assume "finite I"
  with AE_finite_allI[OF this assms] have 0:"AE x in M. (iI. f i x  g i x)" by auto
  show ?thesis by (intro eventually_mono[OF 0] sum_mono) auto
qed simp

lemma real_AE_eq_sum:
  assumes "i. i  I  AE x in M. f i x = (g i x::real)"
  shows "AE x in M. (iI. f i x) = (iI. g i x)"
proof -
  have 1: "AE x in M. (iI. f i x)  (iI. g i x)"
    apply (intro real_AE_le_sum)
    apply (drule assms)
    by auto
  have 2: "AE x in M. (iI. g i x)  (iI. f i x)"
    apply (intro real_AE_le_sum)
    apply (drule assms)
    by auto
  show ?thesis
    using 1 2
    by auto
qed

(* B = 0 case trivial *)
theorem bennett_inequality:
  assumes I: "finite I"
  assumes ind: "indep_vars (λ _. borel) X I"
  assumes intsq: "i. i  I  integrable M (λx. (X i x)^2)"
  assumes bnd: "i. i  I  AE x in M. X i x  B"
  assumes t: "t  0"
  assumes B: "B > 0"
  defines "V  (i  I. expectation (λx. X i x^2))"
  shows "prob {x  space M. (i  I. X i x - expectation (X i))  t} 
    exp (- V / B^2 * bennett_h (t * B / V))"
proof -
  define Y where "Y = (λi x. X i x / B)"

  from indep_vars_compose[OF ind, where Y = "λi x. x/ B"]
  have 1: "indep_vars (λ_. borel) Y I"
    unfolding Y_def by (auto simp add: o_def)
  have 2: "i. i  I  integrable M (λx. (Y i x)2)"
    unfolding Y_def apply (drule intsq)
    by (auto simp add: field_simps)
  have 3: "i. i  I  AE x in M. Y i x  1"
    unfolding Y_def apply (drule bnd)
    using B by auto
  have 4:"0  t / B" using t B by auto

  have rw1: "(iI. Y i x - expectation (Y i)) =
    (iI. X i x - expectation (X i)) / B" for x
    unfolding Y_def
    by (auto simp: diff_divide_distrib sum_divide_distrib)

  have rw2: "expectation (λx. (Y i x)2) =
    expectation (λx. (X i x)2) / B^2" for i
    unfolding Y_def
    by (simp add: power_divide)

  have rw3:"- (iI. expectation (λx. (X i x)2) / B^2) = - V / B^2"
    unfolding V_def
    by (auto simp add: sum_divide_distrib)

  have "t / B / (iI. expectation (λx. (X i x)2) / B^2) =
    t / B / (V / B^2)"
    unfolding V_def
    by (auto simp add: sum_divide_distrib)
  then have rw4: "t / B / (iI. expectation (λx. (X i x)2) / B^2) =
      t * B / V"
      by (simp add: power2_eq_square)
  have "prob {x  space M. t  (iI. X i x - expectation (X i))} =
    prob{x  space M. t / B  (iI. X i x - expectation (X i)) / B}"
    by (smt (verit, best) B Collect_cong divide_cancel_right divide_right_mono)
  also have "... 
    exp (- V / B2 *
          bennett_h (t * B / V))"
    using bennett_inequality_1[OF I 1 2 3 4]
    unfolding rw1 rw2 rw3 rw4 .
  finally show ?thesis .
qed

(* This proof follows https://math.stackexchange.com/a/4066844 *)
lemma bennett_h_bernstein_bound:
  assumes "x  0"
  shows "bennett_h x  x^2 / (2 * (1 + x / 3))"
proof -
  have eq:"x^2 / (2 * (1 + x / 3)) = 3/2 * x - 9/2 * (x / (x+3))"
    using assms
    by (sos "(() & ())")

  define g where "g = (λx. bennett_h x - (3/2 * x - 9/2 * (x / (x+3))))"

  define g' where "g' = (λx::real.
    ln(1 + x) +  27 / (2 * (x+3)^2) - 3 / 2)"
  define g'' where "g'' = (λx::real.
      1 / (1 + x) - 27  / (x+3)^3)"

  have "54 / ((2 * x + 6)^2) = 27 / (2 * (x + 3)2)" (is "?L = ?R") for x :: real
  proof -
    have "?L = 54 / (2^2 * (x + 3)^2)"
      unfolding power_mult_distrib[symmetric] by (simp add:algebra_simps)
    also have "... = ?R" by simp
    finally show ?thesis by simp
  qed

  hence 1: "x  0  (g has_real_derivative (g' x)) (at x)" for x
    unfolding g_def g'_def bennett_h_def by (auto intro!: derivative_eq_intros simp:power2_eq_square)
  have 2: "x  0  (g' has_real_derivative (g'' x)) (at x)" for x
    unfolding g'_def g''_def
    apply (auto intro!: derivative_eq_intros)[1]
    by (sos "(() & ())")

  have gz: "g 0 = 0"
    unfolding g_def bennett_h_def by auto
  have g1z: "g' 0 = 0"
    unfolding g'_def by auto

  have p2: "g'' x   0" if "x  0" for x
  proof -
    have "27 * (1+x)  (x+3)^3"
      using that unfolding power3_eq_cube by (auto simp:algebra_simps)
    hence " 27 / (x + 3) ^ 3  1 / (1+x)"
      using that by (subst frac_le_eq) (auto intro!:divide_nonpos_pos)
    thus ?thesis unfolding g''_def by simp
  qed

  from deriv_nonneg_imp_mono[OF 2 p2 _]
  have "x  0  g' x  0" for x using g1z
    by (metis atLeastAtMost_iff)

  from deriv_nonneg_imp_mono[OF 1 this _]
  have "x  0  g x  0" for x using gz
    by (metis atLeastAtMost_iff)

  thus ?thesis
  using assms eq g_def by force
qed

lemma sum_sq_exp_eq_zero_imp_zero:
  assumes "finite I" "i  I"
  assumes intsq: "integrable M (λx. (X i x)^2)"
  assumes "(i  I. expectation (λx. X i x^2)) = 0"
  shows "AE x in M. X i x = (0::real)"
proof -
  have "(i I. expectation (λx. X i x^2) = 0)"
    using assms
    apply (subst sum_nonneg_eq_0_iff[symmetric])
    by auto
  then have "expectation (λx. X i x^2) = 0"
    using assms(2) by blast
  thus ?thesis
    using integral_nonneg_eq_0_iff_AE[OF intsq]
    by auto
qed

corollary bernstein_inequality:
  assumes I: "finite I"
  assumes ind: "indep_vars (λ _. borel) X I"
  assumes intsq: "i. i  I  integrable M (λx. (X i x)^2)"
  assumes bnd: "i. i  I  AE x in M. X i x  B"
  assumes t: "t  0"
  assumes B: "B > 0"
  defines "V  (i  I. expectation (λx. X i x^2))"
  shows "prob {x  space M. (i  I. X i x - expectation (X i))  t} 
    exp (- (t^2 / (2 * (V + t * B / 3))))"
proof (cases "V = 0")
  case True
  then have 1:"i. i  I  AE x in M. X i x = 0"
    unfolding V_def
    using sum_sq_exp_eq_zero_imp_zero
    by (metis I intsq)
  then have 2:"i. i  I  expectation (X i) = 0"
    using integral_eq_zero_AE by blast

  have "AE x in M. (i  I. X i x - expectation (X i)) = (i  I. 0)"
      apply (intro real_AE_eq_sum)
      using 1 2
      by auto
  then have *: "AE x in M. (i  I. X i x - expectation (X i)) = 0"
    by force

  moreover {
    assume "t > 0"
    then have "prob {x  space M. (i  I. X i x - expectation (X i))  t} = 0"
      apply (intro prob_eq_0_AE)
      using * by auto
    then have ?thesis by auto
  }
  ultimately show ?thesis
    apply (cases "t = 0") using t by auto
next
  case f: False
  have "V  0"
    unfolding V_def
    apply (intro sum_nonneg  integral_nonneg_AE)
    by auto
  then have V: "V > 0" using f by auto

  have "t * B / V  0" using t B V by auto
  from bennett_h_bernstein_bound[OF this]
  have "(t * B / V)2 / (2 * (1 + t * B / V / 3))
     bennett_h (t * B / V)" .

  then have "(- V / B^2) * bennett_h (t * B / V) 
    (- V / B^2) * ((t * B / V)2 / (2 * (1 + t * B / V / 3)))"
    apply (subst mult_left_mono_neg)
    using B V by auto
  also have "... =
     ((- V / B^2) * (t * B / V)2) / (2 * (1 + t * B / V / 3))"
    by auto
  also have " ((- V / B^2) * (t * B / V)2) = -(t^2) / V"
    using V B by (auto simp add: field_simps power2_eq_square)
  finally have *: "(- V / B^2) * bennett_h (t * B / V) 
     -(t^2)  / (2 * (V + t * B  / 3))"
    using V by (auto simp add: field_simps)

  from bennett_inequality[OF assms(1-6)]
  have "prob {x  space M. (i  I. X i x - expectation (X i))  t} 
    exp (- V / B^2 * bennett_h (t * B / V))"
    using V_def by auto
  also have "...  exp (- (t^2/ (2 * (V + t * B  / 3))))"
    using *
    by auto
  finally show ?thesis .
qed

end

end