Theory Negative_Association_Chernoff_Bounds

section ‹Chernoff-Hoeffding Bounds›

text ‹This section shows that all the well-known Chernoff-Hoeffding bounds hold also for
negatively associated random variables. The proofs follow the derivations by
Hoeffding~\cite{hoeffding1963}, as well as, Motwani and Raghavan~\cite[Ch. 4]{motwani1995}, with the
modification that the crucial steps, where the classic proofs use independence, are replaced with
the application of Property P2 for negatively associated RV's.›

theory Negative_Association_Chernoff_Bounds
  imports
    Negative_Association_Definition
    Concentration_Inequalities.McDiarmid_Inequality
    Weighted_Arithmetic_Geometric_Mean.Weighted_Arithmetic_Geometric_Mean
begin

context prob_space
begin

context
  fixes I :: "'i set"
  fixes X :: "'i ⇒ 'a ⇒ real"
  assumes na_X: "neg_assoc X I"
  assumes fin_I: "finite I"
begin

private lemma transfer_to_clamped_vars:
  assumes "(∀i∈I. AE ω in M. X i ω ∈ {a i..b i} ∧ a i ≤ b i)"
  assumes 𝒳_def: "𝒳 = (λi. clamp (a i) (b i) ∘ X i)"
  shows "neg_assoc 𝒳 I" (is "?A")
    and "⋀i. i ∈ I ⟹ expectation (𝒳 i) = expectation (X i)"
    and "𝒫(ω in M. (∑i ∈ I. X i ω) ≤≥⇘η⇙ c) = 𝒫(ω in M. (∑i ∈ I. 𝒳 i ω) ≤≥⇘η⇙ c)" (is "?C")
    and "⋀i ω. i ∈ I ⟹ 𝒳 i ω ∈ {a i..b i}"
    and "⋀i S. i ∈ I ⟹ bounded (𝒳 i ` S)"
    and "⋀i. i ∈ I ⟹ expectation (𝒳 i) ∈ {a i..b i}"
proof -
  note [measurable] = clamp_borel
  note rv_X = neg_assoc_imp_measurable[OF na_X]

  hence rv_𝒳: "random_variable borel (𝒳 i)" if "i ∈ I" for i
    unfolding 𝒳_def using rv_X[OF that] by measurable

  have a:"AE x in M. 𝒳 i x = X i x" if "i ∈ I" for i
    unfolding 𝒳_def using clamp_eqI2 by (intro AE_mp[OF bspec[OF assms(1) that] AE_I2]) auto

  hence b:"AE x in M. (∀i ∈ I. 𝒳 i x = X i x)"
    by (intro AE_finite_allI[OF fin_I]) simp

  show "?A"
    using a by (intro neg_assoc_cong[OF fin_I rv_𝒳 na_X]) force+

  show "expectation (𝒳 i) = expectation (X i)" if "i ∈ I" for i
    by (intro integral_cong_AE a rv_X rv_𝒳 that)

  have "{ω ∈ space M. (∑i∈I. X i ω) ≤≥⇘η⇙ c} ∈ events" using rv_X by (cases η) simp_all
  moreover have "{ω ∈ space M. (∑i∈I. 𝒳 i ω) ≤≥⇘η⇙ c} ∈ events" using rv_𝒳 by (cases η) simp_all
  ultimately show "?C" by (intro measure_eq_AE AE_mp[OF b AE_I2]) auto

  show c:"𝒳 i ω ∈ {a i..b i}" if "i ∈ I" for ω i
    unfolding 𝒳_def comp_def using assms(1) clamp_range that by simp

  show d:"bounded (𝒳 i ` S)" if "i ∈ I" for S i
    using c[OF that] assms(2) bounded_clamp by blast

  show "expectation (𝒳 i) ∈ {a i..b i}" if "i ∈ I" for i
    unfolding atLeastAtMost_iff using c[OF that]  rv_𝒳[OF that]
    by (intro conjI integral_ge_const integral_le_const AE_I2 integrable_bounded d[OF that]) auto
qed

lemma ln_one_plus_x_lower_bound:
  assumes "x ≥ (0::real)"
  shows "2*x/(2+x) ≤ ln (1 + x)"
proof -
  define v where "v x = ln(1+x) - 2 * x/ (2+x) " for x :: real
  define v' where "v' x = 1/(1+x) - 4/(2+x)^2" for x :: real

  have v_deriv: "(v has_real_derivative (v' x)) (at x)" if "x ≥ 0" for x
    using that unfolding v_def v'_def power2_eq_square by (auto intro!:derivative_eq_intros)
  have v_deriv_nonneg: "v' x ≥ 0" if "x ≥ 0" for x
    using that unfolding v'_def
    by (simp add:divide_simps power2_eq_square) (simp add:algebra_simps)

  have v_mono: "v x ≤ v y" if "x ≤ y" "x ≥ 0" for x y
    using v_deriv v_deriv_nonneg that order_trans
    by (intro DERIV_nonneg_imp_nondecreasing[OF that(1)]) blast

  have "0 = v 0" unfolding v_def by simp
  also have "… ≤ v x" using v_mono assms by auto
  finally have "v x ≥ 0" by simp
  thus ?thesis unfolding v_def by simp
qed

text ‹Based on Theorem~4.1 by Motwani and Raghavan~\cite{motwani1995}.›

theorem multiplicative_chernoff_bound_upper:
  assumes "δ > 0"
  assumes "⋀i. i ∈ I ⟹ AE ω in M. X i ω ∈ {0..1}"
  defines "μ ≡ (∑i ∈ I. expectation (X i))"
  shows "𝒫(ω in M. (∑i ∈ I. X i ω) ≥ (1+δ) * μ) ≤ (exp δ/((1+δ) powr (1+δ))) powr μ" (is "?L ≤ ?R")
    and "𝒫(ω in M. (∑i ∈ I. X i ω) ≥ (1+δ) * μ) ≤ exp (-(δ^2) * μ / (2+δ))" (is "_ ≤ ?R1")
proof -
  define 𝒳 where "𝒳 = (λi. clamp 0 1 ∘ X i)"
  have transfer_to_clamped_vars_assms: "(∀i∈I. AE ω in M. X i ω ∈ {0 .. 1} ∧ 0 ≤ (1::real))"
    using assms(2) by auto
  note ttcv = transfer_to_clamped_vars[OF transfer_to_clamped_vars_assms 𝒳_def]
  note [measurable] = neg_assoc_imp_measurable[OF ttcv(1)]

  define t where "t = ln (1+δ)"
  have t_gt_0: "t > 0" using assms(1) unfolding t_def by simp

  let  ?h = "(λx. 1 + (exp t - 1) * x)"

  note bounded' = integrable_bounded bounded_prod bounded_vec_mult_comp bounded_intros ttcv(5)

  have int: "integrable M (𝒳 i)" if "i ∈ I" for i
    using that by (intro bounded') simp_all

  have " 2*δ ≤ (2+δ)* ln (1 + δ)"
    using assms(1) ln_one_plus_x_lower_bound[OF less_imp_le[OF assms(1)]] by (simp add:field_simps)
  hence " (1+δ)*(2*δ) ≤ (1 + δ) *(2+δ)* ln (1 + δ)" using assms(1) by simp
  hence a:"(δ - (1 + δ) * ln (1 + δ)) ≤ - (δ^2)/(2+δ)"
    using assms(1) by (simp add:field_simps power2_eq_square)

  have μ_ge_0: "μ ≥ 0" unfolding μ_def using ttcv(2,6) by (intro sum_nonneg) auto

  note 𝒳_prod_mono = has_int_thatD(2)[OF neg_assoc_imp_prod_mono[OF fin_I ttcv(1), where η="Fwd"]]

  have "?L = 𝒫(ω in M. (∑i ∈ I. 𝒳 i ω) ≥ (1+δ) * μ)" using ttcv(3)[where η="Rev"] by simp
  also have "… = 𝒫(ω in M. (∏i ∈ I. exp (t * 𝒳 i ω)) ≥ exp (t * (1+δ) * μ))"
    using t_gt_0 by (simp add: sum_distrib_left[symmetric] exp_sum[OF fin_I,symmetric])
  also have "… ≤ expectation (λω. (∏i ∈ I. exp (t * 𝒳 i ω))) / exp (t*(1+δ)*μ)"
    by (intro integral_Markov_inequality_measure[where A="{}"] bounded' AE_I2 prod_nonneg fin_I)
      simp_all
  also have "… ≤ (∏i ∈ I. expectation (λω. exp (t*𝒳 i ω))) / exp (t*(1+δ)*μ)"
    using t_gt_0 by (intro divide_right_mono 𝒳_prod_mono bounded' image_subsetI monotoneI) simp_all
  also have "… = (∏i ∈ I. expectation (λω. exp ((1-𝒳 i ω) *⇩_R 0+  𝒳 i ω *⇩_R t))) / exp (t*(1+δ)*μ)"
    by (simp add:ac_simps)
  also have "… ≤ (∏i ∈ I. expectation (λω. (1-𝒳 i ω) * exp 0 +  𝒳 i ω * exp t)) / exp (t*(1+δ)*μ)"
    using ttcv(4)
    by (intro divide_right_mono prod_mono integral_mono conjI bounded' convex_onD[OF exp_convex])
     simp_all
  also have "… = (∏i ∈ I. ?h (expectation (𝒳 i))) / exp (t*(1+δ)*μ)"
    using int by (simp add:algebra_simps prob_space cong:prod.cong)
  also have "… ≤ (∏i ∈ I. exp((exp t-1)* expectation (𝒳 i))) / exp (t*(1+δ)*μ)"
    using t_gt_0 ttcv(4)
    by (intro divide_right_mono prod_mono exp_ge_add_one_self conjI add_nonneg_nonneg
        mult_nonneg_nonneg) simp_all
  also have "… = exp ((exp t-1)* μ) / exp (t*(1+δ)*μ)"
    unfolding exp_sum[OF fin_I, symmetric] μ_def by (simp add:ttcv(2) sum_distrib_left)
  also have "… = exp (δ * μ) / exp ( ln (1+δ)*(1+δ) * μ)"
     using assms(1) unfolding μ_def t_def by (simp add:sum_distrib_left)
  also have "… = exp δ powr μ / exp ( ln(1+δ)*(1+δ)) powr μ"
    unfolding powr_def by (simp add:ac_simps)
  also have "… = ?R" using assms(1) by (subst powr_divide) (simp_all add:powr_def)
  finally show "?L ≤ ?R" by simp
  also have "… =  exp (μ * ln (exp δ / exp ((1 + δ) * ln (1 + δ))))"
    using assms unfolding powr_def by simp
  also have "… = exp (μ * (δ - (1 + δ) * ln (1 + δ)))" by (subst ln_div) simp_all
  also have "… ≤ exp (μ * (-(δ^2)/(2+δ)))"
    by (intro iffD2[OF exp_le_cancel_iff] mult_left_mono a μ_ge_0)
  also have "… = ?R1" by simp
  finally show "?L ≤ ?R1" by simp
qed

lemma ln_one_minus_x_lower_bound:
  assumes "x ∈ {(0::real)..<1}"
  shows "(x^2/2-x)/(1-x) ≤ ln (1 - x)"
proof -
  define v where "v x = ln(1-x) - (x^2/2-x) / (1-x) " for x :: real
  define v' where "v' x = -1/(1-x) - (-(x^2)/2+x-1)/((1-x)^2)" for x :: real

  have v_deriv: "(v has_real_derivative (v' x)) (at x)" if "x ∈ {0..<1}" for x
    using that unfolding v_def v'_def power2_eq_square
    by (auto intro!:derivative_eq_intros simp:algebra_simps)
  have v_deriv_nonneg: "v' x ≥ 0" if "x ≥ 0" for x
    using that unfolding v'_def by (simp add:divide_simps power2_eq_square)

  have v_mono: "v x ≤ v y" if "x ≤ y" "x ≥ 0" "y < 1" for x y
    using v_deriv v_deriv_nonneg that unfolding atLeastLessThan_iff
    by (intro DERIV_nonneg_imp_nondecreasing[OF that(1)])
     (metis (mono_tags, opaque_lifting) Ico_eq_Ico ivl_subset linorder_not_le order_less_irrefl)

  have "0 = v 0" unfolding v_def by simp
  also have "… ≤ v x" using v_mono assms by auto
  finally have "v x ≥ 0" by simp
  thus ?thesis unfolding v_def by simp
qed

text ‹Based on Theorem~4.2 by Motwani and Raghavan~\cite{motwani1995}.›

theorem multiplicative_chernoff_bound_lower:
  assumes "δ ∈ {0<..<1}"
  assumes "⋀i. i ∈ I ⟹ AE ω in M. X i ω ∈ {0..1}"
  defines "μ ≡ (∑i ∈ I. expectation (X i))"
  shows "𝒫(ω in M. (∑i ∈ I. X i ω) ≤ (1-δ)*μ) ≤ (exp (-δ)/(1-δ) powr (1-δ)) powr μ" (is "?L ≤ ?R")
    and "𝒫(ω in M. (∑i ∈ I. X i ω) ≤ (1-δ)*μ) ≤ (exp (-(δ^2)*μ/2))" (is "_ ≤ ?R1")
proof -
  define 𝒳 where "𝒳 = (λi. clamp 0 1 ∘ X i)"
  have transfer_to_clamped_vars_assms: "(∀i∈I. AE ω in M. X i ω ∈ {0 .. 1} ∧ 0 ≤ (1::real))"
    using assms(2) by auto
  note ttcv = transfer_to_clamped_vars[OF transfer_to_clamped_vars_assms 𝒳_def]
  note [measurable] = neg_assoc_imp_measurable[OF ttcv(1)]

  define t where "t = ln (1-δ)"
  have t_lt_0: "t < 0" using assms(1) unfolding t_def by simp

  let  ?h = "(λx. 1 + (exp t - 1) * x)"

  note bounded' = integrable_bounded bounded_prod bounded_vec_mult_comp bounded_intros ttcv(5)

  have μ_ge_0: "μ ≥ 0" unfolding μ_def using ttcv(2,6) by (intro sum_nonneg) auto

  have int: "integrable M (𝒳 i)" if "i ∈ I" for i
    using that by (intro bounded') simp_all

  note 𝒳_prod_mono = has_int_thatD(2)[OF neg_assoc_imp_prod_mono[OF fin_I ttcv(1), where η="Rev"]]

  have 0: "0 ≤ 1 + (exp t - 1) * expectation (𝒳 i)" if "i ∈ I" for i
  proof -
    have "0 ≤ 1 + (exp t - 1) * 1" by simp
    also have "… ≤  1 + (exp t - 1) * expectation (𝒳 i)"
      using t_lt_0 ttcv(6)[OF that] by (intro add_mono mult_left_mono_neg) auto
    finally show ?thesis by simp
  qed

  have "δ ∈ {0..<1}" using assms(1) by simp
  from ln_one_minus_x_lower_bound[OF this]
  have "δ⇧² / 2 - δ  ≤  (1 - δ) * ln (1 - δ)" using assms(1) by (simp add:field_simps)
  hence 1: "- δ - (1 - δ) * ln (1 - δ) ≤ - δ⇧² / 2" by (simp add:algebra_simps)

  have "?L = 𝒫(ω in M. (∑i ∈ I. 𝒳 i ω) ≤ (1-δ) * μ)" using ttcv(3)[where η="Fwd"] by simp
  also have "… = 𝒫(ω in M. (∏i ∈ I. exp (t * 𝒳 i ω)) ≥ exp (t * (1-δ) * μ))"
    using t_lt_0 by (simp add: sum_distrib_left[symmetric] exp_sum[OF fin_I,symmetric])
  also have "… ≤ expectation (λω. (∏i ∈ I. exp (t * 𝒳 i ω))) / exp (t*(1-δ)*μ)"
    by (intro integral_Markov_inequality_measure[where A="{}"] bounded' AE_I2 prod_nonneg fin_I)
      simp_all
  also have "… ≤ (∏i ∈ I. expectation (λω. exp (t*𝒳 i ω))) / exp (t*(1-δ)*μ)"
    using t_lt_0 by (intro divide_right_mono 𝒳_prod_mono bounded' image_subsetI monotoneI) simp_all
  also have "… = (∏i ∈ I. expectation (λω. exp ((1-𝒳 i ω) *⇩_R 0+  𝒳 i ω *⇩_R t))) / exp (t*(1-δ)*μ)"
    by (simp add:ac_simps)
  also have "… ≤ (∏i ∈ I. expectation (λω. (1-𝒳 i ω) * exp 0 +  𝒳 i ω * exp t)) / exp (t*(1-δ)*μ)"
    using ttcv(4)
    by (intro divide_right_mono prod_mono integral_mono conjI bounded' convex_onD[OF exp_convex])
     simp_all
  also have "… = (∏i ∈ I. ?h (expectation (𝒳 i))) / exp (t*(1-δ)*μ)"
    using int by (simp add:algebra_simps prob_space cong:prod.cong)
  also have "… ≤ (∏i ∈ I. exp((exp t-1)* expectation (𝒳 i))) / exp (t*(1-δ)*μ)"
    using 0 by (intro divide_right_mono prod_mono exp_ge_add_one_self conjI) simp_all
  also have "… = exp ((exp t-1)* μ) / exp (t*(1-δ)*μ)"
    unfolding exp_sum[OF fin_I, symmetric] μ_def by (simp add:ttcv(2) sum_distrib_left)
  also have "… = exp ((-δ) * μ) / exp ( ln (1-δ)*(1-δ) * μ)"
     using assms(1) unfolding μ_def t_def by (simp add:sum_distrib_left)
  also have "… = exp (-δ) powr μ / exp ( ln(1-δ)*(1-δ)) powr μ"
    unfolding powr_def by (simp add:ac_simps)
  also have "… = ?R" using assms(1) by (subst powr_divide) (simp_all add:powr_def)
  finally show "?L ≤ ?R" by simp
  also have "… = exp (μ * (- δ - (1 - δ) * ln (1 - δ)))"
    using assms(1) unfolding powr_def by (simp add:ln_div)
  also have "… ≤ exp (μ * (-(δ^2) / 2 ))"
    by (intro iffD2[OF exp_le_cancel_iff] mult_left_mono μ_ge_0 1)
  finally show "?L ≤ ?R1" by (simp add:ac_simps)
qed

theorem multiplicative_chernoff_bound_two_sided:
  assumes "δ ∈ {0<..<1}"
  assumes "⋀i. i ∈ I ⟹ AE ω in M. X i ω ∈ {0..1}"
  defines "μ ≡ (∑i ∈ I. expectation (X i))"
  shows "𝒫(ω in M. ¦(∑i ∈ I. X i ω) - μ¦ ≥ δ*μ) ≤ 2*(exp (-(δ^2)*μ/3))" (is "?L ≤ ?R")
proof -
  define 𝒳 where "𝒳 = (λi. clamp 0 1 ∘ X i)"
  have transfer_to_clamped_vars_assms: "(∀i∈I. AE ω in M. X i ω ∈ {0 .. 1} ∧ 0 ≤ (1::real))"
    using assms(2) by auto
  note ttcv = transfer_to_clamped_vars[OF transfer_to_clamped_vars_assms 𝒳_def]

  have μ_ge_0: "μ ≥ 0" unfolding μ_def using ttcv(2,6) by (intro sum_nonneg) auto

  note [measurable] = neg_assoc_imp_measurable[OF na_X]

  have "?L = 𝒫(ω in M. (∑i∈I. X i ω) ≥ (1+δ)*μ ∨ (∑i∈I. X i ω) ≤ (1-δ)*μ)" unfolding abs_real_def
    by (intro arg_cong[where f="prob"] Collect_cong) (auto simp:algebra_simps)
  also have "… =measure M({ω∈space M.(∑i∈I. X i ω)≥(1+δ)*μ}∪{ω∈space M. (∑i∈I. X i ω)≤(1-δ)*μ})"
    by (intro arg_cong[where f="prob"]) auto
  also have "… ≤ 𝒫(ω in M. (∑i∈I. X i ω) ≥ (1+δ)*μ) + 𝒫(ω in M.(∑i∈I. X i ω) ≤ (1-δ)*μ )"
    by (intro measure_Un_le) measurable
  also have "… ≤ exp (-(δ^2)*μ/(2+δ)) + exp (-(δ^2)*μ/2)"
    unfolding μ_def using assms(1,2)
    by (intro multiplicative_chernoff_bound_lower multiplicative_chernoff_bound_upper add_mono) auto
  also have "… ≤ exp (-(δ^2)*μ/3) + exp (-(δ^2)*μ/3)"
    using assms(1) μ_ge_0 by (intro iffD2[OF exp_le_cancel_iff] add_mono divide_left_mono_neg) auto
  also have "… = ?R" by simp
  finally show ?thesis by simp
qed

lemma additive_chernoff_bound_upper_aux:
  assumes "⋀i. i∈I ⟹ AE ω in M. X i ω ∈ {0..1}" "I ≠ {}"
  defines "μ ≡ (∑i∈I. expectation (X i)) / real (card I)"
  assumes "δ ∈ {0<..<1-μ}" "μ ∈ {0<..<1}"
  shows "𝒫(ω in M. (∑i∈I. X i ω) ≥ (μ+δ)*real (card I)) ≤ exp (-real (card I) * KL_div (μ+δ) μ)"
    (is "?L ≤ ?R")
proof -
  define 𝒳 where "𝒳 = (λi. clamp 0 1 ∘ X i)"
  have transfer_to_clamped_vars_assms: "(∀i∈I. AE ω in M. X i ω ∈ {0..1} ∧ 0 ≤ (1::real))"
    using assms(1) by auto
  note ttcv = transfer_to_clamped_vars[OF transfer_to_clamped_vars_assms 𝒳_def]
  note [measurable] = neg_assoc_imp_measurable[OF ttcv(1)]

  define t :: real where "t = ln ((μ+δ)/μ) - ln ((1-μ-δ)/(1-μ))"
  let ?h = "λx. 1 + (exp t - 1) * x"
  let ?n = "real (card I)"

  have n_gt_0: "?n > 0" using assms(2) fin_I by auto

  have a: "(1 - μ - δ) > 0" "μ > 0" "1 - μ > 0" "μ + δ > 0"
    using assms(4,5) by auto

  have "ln ((1 - μ - δ) / (1 - μ)) < 0" using a assms(4) by (intro ln_less_zero) auto
  moreover have "ln ((μ + δ) / μ) > 0" using a assms(4) by (intro ln_gt_zero) auto
  ultimately have t_gt_0: "t > 0" unfolding t_def by simp

  note bounded' = integrable_bounded bounded_prod bounded_vec_mult_comp bounded_intros ttcv(5)

  note 𝒳_prod_mono = has_int_thatD(2)[OF neg_assoc_imp_prod_mono[OF fin_I ttcv(1), where η="Fwd"]]

  have int: "integrable M (𝒳 i)" if "i ∈ I" for i
    using that by (intro bounded') simp_all

  have 0: "0 ≤ 1 + (exp t - 1) * expectation (𝒳 i)" if "i ∈ I" for i
    using t_gt_0 ttcv(6)[OF that] by (intro add_nonneg_nonneg mult_nonneg_nonneg) auto

  have "1 + (exp t - 1) * μ = 1 + ((μ + δ) * (1 - μ) / (μ * (1 - μ - δ)) - 1) * μ"
    using a unfolding t_def exp_diff by simp
  also have "… =  1 + (δ / (μ * (1 - μ - δ))) * μ"
    using a by (subst divide_diff_eq_iff) (simp, simp add:algebra_simps)
  also have "… = (1-μ-δ)/(1-μ-δ) + (δ / (1-μ-δ))" using a by simp
  also have "… = (1-μ) / (1-μ-δ)"
    unfolding add_divide_distrib[symmetric] by (simp add:algebra_simps)
  also have "… = inverse ((1-μ-δ) / (1-μ))" using a by simp
  also have "… = exp (ln (inverse ((1-μ-δ) / (1-μ))))" using a by simp
  also have "… = exp (- ln((1-μ-δ) / (1-μ)))" using a by (subst ln_inverse) (simp_all)
  finally have 1: "1 + (exp t - 1) * μ = exp (- ln((1-μ-δ) / (1-μ)))" by simp

  have "?L = 𝒫(ω in M. (∑i ∈ I. 𝒳 i ω) ≥ (μ+δ) * ?n)" using ttcv(3)[where η="Rev"] by simp
  also have "… = 𝒫(ω in M. (∏i ∈ I. exp (t * 𝒳 i ω)) ≥ exp (t * (μ+δ) * ?n))"
    using t_gt_0 by (simp add: sum_distrib_left[symmetric] exp_sum[OF fin_I,symmetric])
  also have "… ≤ expectation (λω. (∏i ∈ I. exp (t * 𝒳 i ω))) / exp (t * (μ+δ) * ?n)"
    by (intro integral_Markov_inequality_measure[where A="{}"] bounded' AE_I2 prod_nonneg fin_I)
      simp_all
  also have "… ≤ (∏i ∈ I. expectation (λω. exp (t*𝒳 i ω))) / exp (t * (μ+δ) * ?n)"
    using t_gt_0 by (intro divide_right_mono 𝒳_prod_mono bounded' image_subsetI monotoneI) simp_all
  also have "… = (∏i ∈ I. expectation (λω. exp ((1-𝒳 i ω) *⇩_R 0 + 𝒳 i ω *⇩_R t))) / exp (t*(μ+δ)*?n)"
    by (simp add:ac_simps)
  also have "… ≤ (∏i∈I. expectation (λω. (1-𝒳 i ω)*exp 0 + 𝒳 i ω * exp t)) / exp (t * (μ+δ) * ?n)"
    using ttcv(4)
    by (intro divide_right_mono prod_mono integral_mono conjI bounded' convex_onD[OF exp_convex])
     simp_all
  also have "… = (∏i ∈ I. ?h (expectation (𝒳 i))) / exp (t * (μ+δ) * ?n)"
    using int by (simp add:algebra_simps prob_space cong:prod.cong)
  also have "… = (root (card I) (∏i∈I. 1+(exp t-1)*expectation (𝒳 i)))^(card I) / exp (t*(μ+δ)*?n)"
    using n_gt_0
    by (intro arg_cong2[where f="(/)"] real_root_pow_pos2[symmetric] prod_nonneg refl 0) auto
  also have "… ≤ ((∑i ∈ I. 1 + (exp t - 1) * expectation (𝒳 i)) / ?n)^ (card I) / exp (t*(μ+δ)*?n)"
    by (intro divide_right_mono power_mono arithmetic_geometric_mean[OF fin_I] real_root_ge_zero
        prod_nonneg 0) simp_all
  also have "… ≤ ((∑i ∈ I. 1 + (exp t - 1) * expectation (𝒳 i)) / ?n) powr ?n / exp (t*(μ+δ)*?n)"
    using n_gt_0 0 by (subst powr_realpow') (auto intro!:sum_nonneg divide_nonneg_pos 0)
  also have "… ≤ ((∑i ∈ I. 1 + (exp t - 1) * expectation (X i)) / ?n) powr ?n / exp (t*(μ+δ)*?n)"
    using ttcv(2) by (simp cong:sum.cong)
  also have "… = (1 + (exp t - 1) * μ) powr ?n / exp (t*(μ+δ)*?n)"
    using n_gt_0 unfolding μ_def sum.distrib sum_distrib_left[symmetric] by (simp add:divide_simps)
  also have "… = (1 + (exp t - 1) * μ) powr ?n / exp (t*(μ+δ)) powr ?n"
    unfolding powr_def by simp
  also have "… = ((1 + (exp t - 1) * μ)/exp(t*(μ+δ))) powr ?n"
    using a t_gt_0 by (auto intro: powr_divide[symmetric] add_nonneg_nonneg mult_nonneg_nonneg)
  also have "… = (exp (- ln((1-μ-δ) / (1-μ))) * exp( -(t * (μ+δ)))) powr ?n"
    unfolding 1 exp_minus inverse_eq_divide by simp
  also have "… = exp ( -ln((1 - μ-δ)/(1 - μ))- t * (μ+δ)) powr ?n"
    unfolding exp_add[symmetric] by simp
  also have "… = exp ( -ln((1 - μ-δ)/(1 - μ))- (ln ((μ+δ)/μ) - ln ((1-μ-δ)/(1-μ)))*(μ+δ)) powr ?n"
    using a unfolding t_def by (simp add:divide_simps)
  also have "… = exp( -KL_div (μ+δ) μ) powr ?n"
    using a by (subst KL_div_eq) (simp_all add:field_simps)
  also have "… = ?R" unfolding powr_def by simp
  finally show ?thesis by simp
qed

lemma additive_chernoff_bound_upper_aux_2:
  assumes "⋀i. i∈I ⟹ AE ω in M. X i ω ∈ {0..1}" "I ≠ {}"
  defines "μ ≡ (∑i∈I. expectation (X i)) / real (card I)"
  assumes "μ ∈ {0<..<1}"
  shows "𝒫(ω in M. (∑i∈I. X i ω) ≥ real (card I)) ≤ exp (-real (card I) * KL_div 1 μ)"
    (is "?L ≤ ?R")
proof -
  define 𝒳 where "𝒳 = (λi. clamp 0 1 ∘ X i)"
  have transfer_to_clamped_vars_assms: "(∀i∈I. AE ω in M. X i ω ∈ {0..1} ∧ 0 ≤ (1::real))"
    using assms(1) by auto
  note ttcv = transfer_to_clamped_vars[OF transfer_to_clamped_vars_assms 𝒳_def]
  note [measurable] = neg_assoc_imp_measurable[OF ttcv(1)]

  let ?n = "real (card I)"

  have n_gt_0: "?n > 0" using assms(2) fin_I by auto

  note bounded' = integrable_bounded bounded_prod bounded_vec_mult_comp bounded_intros ttcv(5)
    bounded_max

  note 𝒳_prod_mono = has_int_thatD(2)[OF neg_assoc_imp_prod_mono[OF fin_I ttcv(1), where η="Fwd"]]

  have a2:"(∏i ∈ I. max 0 (𝒳 i ω)) ≥ 1" if "(∑i ∈ I. 𝒳 i ω) ≥ ?n" for ω
  proof -
    have "(∑i ∈ I. 1 - 𝒳 i ω) ≤ 0" using that by (simp add:sum_subtractf)
    moreover have "(∑i ∈ I. 1 - 𝒳 i ω) ≥ 0" using ttcv(4) by (intro sum_nonneg) simp
    ultimately have "(∑i ∈ I. 1 - 𝒳 i ω) = 0" by simp
    with iffD1[OF sum_nonneg_eq_0_iff[OF fin_I] this]
    have "∀i ∈ I. 1 - 𝒳 i ω = 0" using ttcv(4) by simp
    hence "𝒳 i ω = 1" if "i ∈ I" for i using that by auto
    thus ?thesis by (intro prod_ge_1) fastforce
  qed

  have "?L = 𝒫(ω in M. (∑i ∈ I. 𝒳 i ω) ≥ ?n)" using ttcv(3)[where η="Rev"] by simp
  also have "… ≤ 𝒫(ω in M. (∏i ∈ I. max 0 (𝒳 i ω)) ≥ 1)"
    using a2 by (intro finite_measure_mono) auto
  also have "… ≤ expectation (λω. (∏i ∈ I. max 0 (𝒳 i ω))) / 1"
    by (intro integral_Markov_inequality_measure[where A="{}"] bounded' AE_I2 prod_nonneg fin_I)
      auto
  also have "… ≤ (∏i ∈ I. expectation (λω. max 0 (𝒳 i ω))) / 1"
    by (intro divide_right_mono 𝒳_prod_mono bounded' image_subsetI monotoneI) simp_all
  also have "… ≤ (∏i ∈ I. expectation (𝒳 i))" using ttcv(4) by simp
  also have "… = (root (card I) (∏i∈I. expectation (𝒳 i)))^(card I) "
    using n_gt_0 ttcv(6) by (intro real_root_pow_pos2[symmetric] prod_nonneg refl) auto
  also have "… ≤ ((∑i ∈ I. expectation (𝒳 i)) / ?n)^ (card I)"
    using ttcv(6) by (intro power_mono arithmetic_geometric_mean[OF fin_I] real_root_ge_zero
        prod_nonneg) auto
  also have "… ≤ ((∑i ∈ I. expectation (𝒳 i)) / ?n) powr ?n"
    using n_gt_0 ttcv(6) by (subst powr_realpow') (auto intro!:sum_nonneg divide_nonneg_pos)
  also have "… ≤ μ powr ?n" using ttcv(2) unfolding μ_def by simp
  also have "… = ?R" using assms(4) unfolding powr_def by (subst KL_div_eq) (auto simp:ln_div)
  finally show ?thesis by simp
qed

text ‹Based on Theorem~1 by Hoeffding~\cite{hoeffding1963}.›

lemma additive_chernoff_bound_upper:
  assumes "⋀i. i∈I ⟹ AE ω in M. X i ω ∈ {0..1}" "I ≠ {}"
  defines "μ ≡ (∑i∈I. expectation (X i)) / real (card I)"
  assumes "δ ∈ {0..1-μ}" "μ ∈ {0<..<1}"
  shows "𝒫(ω in M. (∑i∈I. X i ω) ≥ (μ+δ)*real (card I)) ≤ exp (-real (card I) * KL_div (μ+δ) μ)"
    (is "?L ≤ ?R")
proof -
  note [measurable] = neg_assoc_imp_measurable[OF na_X]

  let ?n = "real (card I)"
  have n_gt_0: "?n > 0" using assms fin_I by auto

  note X_prod_mono = has_int_thatD(2)[OF neg_assoc_imp_prod_mono[OF fin_I na_X, where η="Fwd"]]

  have b:"AE x in M. (∀i ∈ I. X i x ∈ {0..1})"
    using assms(1) by (intro AE_finite_allI[OF fin_I]) simp
  hence c:"AE x in M. (∑i∈I. 1 - X i x) ≥ 0"
    by (intro AE_mp[OF b AE_I2] impI sum_nonneg) auto

  consider (i) "δ=0" | (ii) "δ ∈ {0<..<1-μ}" | (iii) "1-μ=δ" using assms(4) by fastforce
  thus ?thesis
  proof (cases)
    case i
    hence "KL_div (μ+δ) μ = 0" using assms(4,5) by (subst KL_div_eq) auto
    thus ?thesis by simp
  next
    case ii
    thus ?thesis unfolding μ_def using assms by (intro additive_chernoff_bound_upper_aux) auto
  next
    case iii
    hence a:"μ+δ=1" by simp
    thus ?thesis unfolding a mult_1 unfolding μ_def using assms
      by (intro additive_chernoff_bound_upper_aux_2) auto
  qed
qed

text ‹Based on Theorem~2 by Hoeffding~\cite{hoeffding1963}.›

lemma hoeffding_bound_upper:
  assumes "⋀i. i∈I ⟹ a i ≤ b i"
  assumes "⋀i. i∈I ⟹ AE ω in M. X i ω ∈ {a i..b i}"
  defines "n ≡ real (card I)"
  defines "μ ≡ (∑i∈I. expectation (X i))"
  assumes "δ ≥ 0" "(∑i∈I. (b i - a i)^2) > 0"
  shows "𝒫(ω in M. (∑i∈I. X i ω) ≥ μ + δ * n) ≤ exp (-2*(n*δ)^2 / (∑i∈I. (b i - a i)^2))"
    (is "?L ≤ ?R")
proof (cases "δ=0")
  case True thus ?thesis by simp
next
  case False
  define 𝒳 where "𝒳 = (λi. clamp (a i) (b i) ∘ X i)"
  have transfer_to_clamped_vars_assms: "(∀i∈I. AE ω in M. X i ω ∈ {a i.. b i} ∧ a i ≤ b i)"
    using assms(1,2) by auto
  note ttcv = transfer_to_clamped_vars[OF transfer_to_clamped_vars_assms 𝒳_def]
  note [measurable] = neg_assoc_imp_measurable[OF ttcv(1)]

  define s where "s = (∑i∈I. (b i - a i)^2)"
  have s_gt_0: "s > 0" using assms unfolding s_def by auto

  have I_ne: "I ≠ {}" using assms(6) by auto

  have n_gt_0: "n > 0" using I_ne fin_I unfolding n_def by auto

  define t where "t = 4 * δ * n / s"

  have t_gt_0: "t > 0" unfolding t_def using False n_gt_0 s_gt_0 assms by auto

  note bounded' = integrable_bounded bounded_prod bounded_vec_mult_comp bounded_intros ttcv(5)
  note 𝒳_prod_mono = has_int_thatD(2)[OF neg_assoc_imp_prod_mono[OF fin_I ttcv(1), where η="Fwd"]]

  have int: "integrable M (𝒳 i)" if "i ∈ I" for i
    using that by (intro bounded') simp_all

  define ν where "ν i = expectation (X i)" for i
  have 1: "expectation (λx. 𝒳 i x - ν i) = 0" if "i ∈ I" for i
    unfolding  ν_def using int[OF that] ttcv(2)[OF that] by (simp add:prob_space)

  have "?L = 𝒫(ω in M. (∑i ∈ I. 𝒳 i ω) ≥ μ+δ * n)" using ttcv(3)[where η="Rev"] by simp
  also have "… = 𝒫(ω in M. (∑i ∈ I. 𝒳 i ω - ν i) ≥ δ * n)"
    using n_gt_0 unfolding μ_def ν_def by (simp add:algebra_simps sum_subtractf)
  also have "… = 𝒫(ω in M. (∏i ∈ I. exp (t * (𝒳 i ω - ν i))) ≥ exp (t * δ * n))"
    using t_gt_0 by (simp add: sum_distrib_left[symmetric] exp_sum[OF fin_I,symmetric])
  also have "… ≤ expectation (λω. (∏i ∈ I. exp (t * (𝒳 i ω - ν i)))) / exp (t * δ * n)"
    by (intro integral_Markov_inequality_measure[where A="{}"] bounded' AE_I2 prod_nonneg fin_I)
      simp_all
  also have "… ≤ (∏i ∈ I. expectation (λω. exp (t*(𝒳 i ω - ν i)))) / exp (t * δ * n)"
    using t_gt_0 by (intro divide_right_mono 𝒳_prod_mono bounded' image_subsetI monotoneI) simp_all
  also have "… ≤ (∏i ∈ I. exp (t^2 * ((b i - ν i) - (a i-ν i))⇧² / 8)) / exp (t*δ*n)"
    using ttcv(4) 1
    by (intro divide_right_mono prod_mono conjI Hoeffdings_lemma_bochner t_gt_0 AE_I2) simp_all
  also have "…  = (∏i ∈ I. exp (t^2 * (b i - a i)⇧² / 8)) / exp (t*δ*n)" by simp
  also have "… =  exp( (t^2/8)* (∑i ∈ I. (b i - a i)^2)) / exp (t*δ*n)"
    unfolding exp_sum[OF fin_I, symmetric] by (simp add:algebra_simps sum_distrib_left)
  also have "… =  exp( (t^2/8)* s- t*δ*n)"
    unfolding exp_diff s_def by simp
  also have "… = exp(-2 *(n*δ)^2 / s)"
    using s_gt_0 unfolding t_def by (simp add:divide_simps power2_eq_square)
  also have "… = ?R" unfolding s_def by simp
  finally show ?thesis by simp
qed

end

text ‹Dual and two-sided versions of Theorem 1 and 2 by Hoeffding~\cite{hoeffding1963}.›

lemma additive_chernoff_bound_lower:
  assumes "neg_assoc X I" "finite I"
  assumes "⋀i. i∈I ⟹ AE ω in M. X i ω ∈ {0..1}" "I ≠ {}"
  defines "μ ≡ (∑i∈I. expectation (X i)) / real (card I)"
  assumes "δ ∈ {0..μ}" "μ ∈ {0<..<1}"
  shows "𝒫(ω in M. (∑i∈I. X i ω) ≤ (μ-δ)*real (card I)) ≤ exp (-real (card I) * KL_div (μ-δ) μ)"
    (is "?L ≤ ?R")
proof -
  note [measurable] = neg_assoc_imp_measurable[OF assms(1)]

  have int[simp]: "integrable M (X i)" if "i ∈ I" for i
    using that by (intro integrable_const_bound[where B="1"] AE_mp[OF assms(3)[OF that] AE_I2]) auto
  have n_gt_0: "real (card I) > 0" using assms by auto

  hence 0: "(1-μ) = (∑i∈I. expectation (λω. 1 - X i ω)) / real (card I)"
    unfolding μ_def by (simp add:prob_space sum_subtractf divide_simps)
  have 1: " neg_assoc (λi ω. 1 - X i ω) I"
    by (intro neg_assoc_compose_simple[OF assms(2,1), where η="Rev"]) (auto intro:antimonoI)

  have 2: "δ ≤ (1 - (1 - μ))" "δ ≥ 0" using assms by auto
  have 3: "1-μ ∈ {0<..<1}" using assms by auto
  have "?L = 𝒫(ω in M. (∑i∈I. 1 - X i ω) ≥ ((1-μ)+δ)*real (card I))"
    by (simp add:sum_subtractf algebra_simps)
  also have "… ≤ exp (-real (card I) * KL_div ((1-μ)+δ) (1-μ))"
    using assms(3) 1 2 3 unfolding 0 by (intro additive_chernoff_bound_upper assms(2,4)) auto
  also have "… = exp (-real (card I) * KL_div (1-(μ-δ)) (1-μ))" by (simp add:algebra_simps)
  also have "… = ?R" using assms(6,7) by (subst KL_div_swap) (simp_all add:algebra_simps)
  finally show ?thesis by simp
qed

lemma hoeffding_bound_lower:
  assumes "neg_assoc X I" "finite I"
  assumes "⋀i. i∈I ⟹ a i ≤ b i"
  assumes "⋀i. i∈I ⟹ AE ω in M. X i ω ∈ {a i..b i}"
  defines "n ≡ real (card I)"
  defines "μ ≡ (∑i∈I. expectation (X i))"
  assumes "δ ≥ 0" "(∑i∈I. (b i - a i)^2) > 0"
  shows "𝒫(ω in M. (∑i∈I. X i ω) ≤ μ-δ*n) ≤ exp (-2*(n*δ)^2 / (∑i∈I. (b i - a i)^2))"
    (is "?L ≤ ?R")
proof -
  have 0: "-μ = (∑i∈I. expectation (λω. - X i ω))" unfolding μ_def by (simp add:sum_negf)
  have 1: "neg_assoc (λi ω. - X i ω) I"
    by (intro neg_assoc_compose_simple[OF assms(2,1), where η="Rev"]) (auto intro:antimonoI)

  have "?L = 𝒫(ω in M. (∑i∈I. -X i ω) ≥ (-μ)+δ*n)" by (simp add:algebra_simps sum_negf)
  also have "… ≤  exp (-2*(n*δ)^2 / (∑i∈I. ((-a i) - (-b i))^2))"
    using assms(3,4,8) unfolding 0 n_def by (intro hoeffding_bound_upper[OF 1] assms(2,4,7)) auto
  also have "…=  ?R" by simp
  finally show ?thesis by simp
qed

lemma hoeffding_bound_two_sided:
  assumes "neg_assoc X I" "finite I"
  assumes "⋀i. i∈I ⟹ a i ≤ b i"
  assumes "⋀i. i∈I ⟹ AE ω in M. X i ω ∈ {a i..b i}" "I ≠ {}"
  defines "n ≡ real (card I)"
  defines "μ ≡ (∑i∈I. expectation (X i))"
  assumes "δ ≥ 0" "(∑i∈I. (b i - a i)^2) > 0"
  shows "𝒫(ω in M. ¦(∑i∈I. X i ω)-μ¦ ≥ δ*n) ≤ 2*exp (-2*(n*δ)^2 / (∑i∈I. (b i - a i)^2))"
    (is "?L ≤ ?R")
proof -
  note [measurable] = neg_assoc_imp_measurable[OF assms(1)]

  have "?L = 𝒫(ω in M. (∑i∈I. X i ω) ≥ μ+δ*n ∨ (∑i∈I. X i ω) ≤ μ-δ*n )"
    unfolding abs_real_def by (intro arg_cong[where f="prob"] Collect_cong) auto
  also have "… = measure M ({ω∈space M. (∑i∈I. X i ω)≥μ+δ*n}∪{ω∈space M. (∑i∈I. X i ω)≤μ-δ*n})"
    by (intro arg_cong[where f="prob"]) auto
  also have "… ≤ 𝒫(ω in M. (∑i∈I. X i ω) ≥ μ+δ*n) + 𝒫(ω in M.(∑i∈I. X i ω) ≤ μ-δ*n )"
    by (intro measure_Un_le) measurable
  also have "… ≤ exp (-2*(n*δ)^2 / (∑i∈I. (b i-a i)^2)) + exp (-2*(n*δ)^2 / (∑i∈I. (b i-a i)^2))"
    unfolding n_def μ_def by (intro hoeffding_bound_lower hoeffding_bound_upper add_mono assms)
  also have "… = ?R" by simp
  finally show ?thesis by simp
qed

end

end