Theory Negative_Association_Chernoff_Bounds
section ‹Chernoff-Hoeffding Bounds›
text ‹This section shows that all the well-known Chernoff-Hoeffding bounds hold also for
negatively associated random variables. The proofs follow the derivations by
Hoeffding~\cite{hoeffding1963}, as well as, Motwani and Raghavan~\cite[Ch. 4]{motwani1995}, with the
modification that the crucial steps, where the classic proofs use independence, are replaced with
the application of Property P2 for negatively associated RV's.›
theory Negative_Association_Chernoff_Bounds
imports
Negative_Association_Definition
Concentration_Inequalities.McDiarmid_Inequality
Weighted_Arithmetic_Geometric_Mean.Weighted_Arithmetic_Geometric_Mean
begin
context prob_space
begin
context
fixes I :: "'i set"
fixes X :: "'i ⇒ 'a ⇒ real"
assumes na_X: "neg_assoc X I"
assumes fin_I: "finite I"
begin
private lemma transfer_to_clamped_vars:
assumes "(∀i∈I. AE ω in M. X i ω ∈ {a i..b i} ∧ a i ≤ b i)"
assumes 𝒳_def: "𝒳 = (λi. clamp (a i) (b i) ∘ X i)"
shows "neg_assoc 𝒳 I" (is "?A")
and "⋀i. i ∈ I ⟹ expectation (𝒳 i) = expectation (X i)"
and "𝒫(ω in M. (∑i ∈ I. X i ω) ≤≥⇘η⇙ c) = 𝒫(ω in M. (∑i ∈ I. 𝒳 i ω) ≤≥⇘η⇙ c)" (is "?C")
and "⋀i ω. i ∈ I ⟹ 𝒳 i ω ∈ {a i..b i}"
and "⋀i S. i ∈ I ⟹ bounded (𝒳 i ` S)"
and "⋀i. i ∈ I ⟹ expectation (𝒳 i) ∈ {a i..b i}"
proof -
note [measurable] = clamp_borel
note rv_X = neg_assoc_imp_measurable[OF na_X]
hence rv_𝒳: "random_variable borel (𝒳 i)" if "i ∈ I" for i
unfolding 𝒳_def using rv_X[OF that] by measurable
have a:"AE x in M. 𝒳 i x = X i x" if "i ∈ I" for i
unfolding 𝒳_def using clamp_eqI2 by (intro AE_mp[OF bspec[OF assms(1) that] AE_I2]) auto
hence b:"AE x in M. (∀i ∈ I. 𝒳 i x = X i x)"
by (intro AE_finite_allI[OF fin_I]) simp
show "?A"
using a by (intro neg_assoc_cong[OF fin_I rv_𝒳 na_X]) force+
show "expectation (𝒳 i) = expectation (X i)" if "i ∈ I" for i
by (intro integral_cong_AE a rv_X rv_𝒳 that)
have "{ω ∈ space M. (∑i∈I. X i ω) ≤≥⇘η⇙ c} ∈ events" using rv_X by (cases η) simp_all
moreover have "{ω ∈ space M. (∑i∈I. 𝒳 i ω) ≤≥⇘η⇙ c} ∈ events" using rv_𝒳 by (cases η) simp_all
ultimately show "?C" by (intro measure_eq_AE AE_mp[OF b AE_I2]) auto
show c:"𝒳 i ω ∈ {a i..b i}" if "i ∈ I" for ω i
unfolding 𝒳_def comp_def using assms(1) clamp_range that by simp
show d:"bounded (𝒳 i ` S)" if "i ∈ I" for S i
using c[OF that] assms(2) bounded_clamp by blast
show "expectation (𝒳 i) ∈ {a i..b i}" if "i ∈ I" for i
unfolding atLeastAtMost_iff using c[OF that] rv_𝒳[OF that]
by (intro conjI integral_ge_const integral_le_const AE_I2 integrable_bounded d[OF that]) auto
qed
lemma ln_one_plus_x_lower_bound:
assumes "x ≥ (0::real)"
shows "2*x/(2+x) ≤ ln (1 + x)"
proof -
define v where "v x = ln(1+x) - 2 * x/ (2+x) " for x :: real
define v' where "v' x = 1/(1+x) - 4/(2+x)^2" for x :: real
have v_deriv: "(v has_real_derivative (v' x)) (at x)" if "x ≥ 0" for x
using that unfolding v_def v'_def power2_eq_square by (auto intro!:derivative_eq_intros)
have v_deriv_nonneg: "v' x ≥ 0" if "x ≥ 0" for x
using that unfolding v'_def
by (simp add:divide_simps power2_eq_square) (simp add:algebra_simps)
have v_mono: "v x ≤ v y" if "x ≤ y" "x ≥ 0" for x y
using v_deriv v_deriv_nonneg that order_trans
by (intro DERIV_nonneg_imp_nondecreasing[OF that(1)]) blast
have "0 = v 0" unfolding v_def by simp
also have "… ≤ v x" using v_mono assms by auto
finally have "v x ≥ 0" by simp
thus ?thesis unfolding v_def by simp
qed
text ‹Based on Theorem~4.1 by Motwani and Raghavan~\cite{motwani1995}.›
theorem multiplicative_chernoff_bound_upper:
assumes "δ > 0"
assumes "⋀i. i ∈ I ⟹ AE ω in M. X i ω ∈ {0..1}"
defines "μ ≡ (∑i ∈ I. expectation (X i))"
shows "𝒫(ω in M. (∑i ∈ I. X i ω) ≥ (1+δ) * μ) ≤ (exp δ/((1+δ) powr (1+δ))) powr μ" (is "?L ≤ ?R")
and "𝒫(ω in M. (∑i ∈ I. X i ω) ≥ (1+δ) * μ) ≤ exp (-(δ^2) * μ / (2+δ))" (is "_ ≤ ?R1")
proof -
define 𝒳 where "𝒳 = (λi. clamp 0 1 ∘ X i)"
have transfer_to_clamped_vars_assms: "(∀i∈I. AE ω in M. X i ω ∈ {0 .. 1} ∧ 0 ≤ (1::real))"
using assms(2) by auto
note ttcv = transfer_to_clamped_vars[OF transfer_to_clamped_vars_assms 𝒳_def]
note [measurable] = neg_assoc_imp_measurable[OF ttcv(1)]
define t where "t = ln (1+δ)"
have t_gt_0: "t > 0" using assms(1) unfolding t_def by simp
let ?h = "(λx. 1 + (exp t - 1) * x)"
note bounded' = integrable_bounded bounded_prod bounded_vec_mult_comp bounded_intros ttcv(5)
have int: "integrable M (𝒳 i)" if "i ∈ I" for i
using that by (intro bounded') simp_all
have " 2*δ ≤ (2+δ)* ln (1 + δ)"
using assms(1) ln_one_plus_x_lower_bound[OF less_imp_le[OF assms(1)]] by (simp add:field_simps)
hence " (1+δ)*(2*δ) ≤ (1 + δ) *(2+δ)* ln (1 + δ)" using assms(1) by simp
hence a:"(δ - (1 + δ) * ln (1 + δ)) ≤ - (δ^2)/(2+δ)"
using assms(1) by (simp add:field_simps power2_eq_square)
have μ_ge_0: "μ ≥ 0" unfolding μ_def using ttcv(2,6) by (intro sum_nonneg) auto
note 𝒳_prod_mono = has_int_thatD(2)[OF neg_assoc_imp_prod_mono[OF fin_I ttcv(1), where η="Fwd"]]
have "?L = 𝒫(ω in M. (∑i ∈ I. 𝒳 i ω) ≥ (1+δ) * μ)" using ttcv(3)[where η="Rev"] by simp
also have "… = 𝒫(ω in M. (∏i ∈ I. exp (t * 𝒳 i ω)) ≥ exp (t * (1+δ) * μ))"
using t_gt_0 by (simp add: sum_distrib_left[symmetric] exp_sum[OF fin_I,symmetric])
also have "… ≤ expectation (λω. (∏i ∈ I. exp (t * 𝒳 i ω))) / exp (t*(1+δ)*μ)"
by (intro integral_Markov_inequality_measure[where A="{}"] bounded' AE_I2 prod_nonneg fin_I)
simp_all
also have "… ≤ (∏i ∈ I. expectation (λω. exp (t*𝒳 i ω))) / exp (t*(1+δ)*μ)"
using t_gt_0 by (intro divide_right_mono 𝒳_prod_mono bounded' image_subsetI monotoneI) simp_all
also have "… = (∏i ∈ I. expectation (λω. exp ((1-𝒳 i ω) *⇩R 0+ 𝒳 i ω *⇩R t))) / exp (t*(1+δ)*μ)"
by (simp add:ac_simps)
also have "… ≤ (∏i ∈ I. expectation (λω. (1-𝒳 i ω) * exp 0 + 𝒳 i ω * exp t)) / exp (t*(1+δ)*μ)"
using ttcv(4)
by (intro divide_right_mono prod_mono integral_mono conjI bounded' convex_onD[OF exp_convex])
simp_all
also have "… = (∏i ∈ I. ?h (expectation (𝒳 i))) / exp (t*(1+δ)*μ)"
using int by (simp add:algebra_simps prob_space cong:prod.cong)
also have "… ≤ (∏i ∈ I. exp((exp t-1)* expectation (𝒳 i))) / exp (t*(1+δ)*μ)"
using t_gt_0 ttcv(4)
by (intro divide_right_mono prod_mono exp_ge_add_one_self conjI add_nonneg_nonneg
mult_nonneg_nonneg) simp_all
also have "… = exp ((exp t-1)* μ) / exp (t*(1+δ)*μ)"
unfolding exp_sum[OF fin_I, symmetric] μ_def by (simp add:ttcv(2) sum_distrib_left)
also have "… = exp (δ * μ) / exp ( ln (1+δ)*(1+δ) * μ)"
using assms(1) unfolding μ_def t_def by (simp add:sum_distrib_left)
also have "… = exp δ powr μ / exp ( ln(1+δ)*(1+δ)) powr μ"
unfolding powr_def by (simp add:ac_simps)
also have "… = ?R" using assms(1) by (subst powr_divide) (simp_all add:powr_def)
finally show "?L ≤ ?R" by simp
also have "… = exp (μ * ln (exp δ / exp ((1 + δ) * ln (1 + δ))))"
using assms unfolding powr_def by simp
also have "… = exp (μ * (δ - (1 + δ) * ln (1 + δ)))" by (subst ln_div) simp_all
also have "… ≤ exp (μ * (-(δ^2)/(2+δ)))"
by (intro iffD2[OF exp_le_cancel_iff] mult_left_mono a μ_ge_0)
also have "… = ?R1" by simp
finally show "?L ≤ ?R1" by simp
qed
lemma ln_one_minus_x_lower_bound:
assumes "x ∈ {(0::real)..<1}"
shows "(x^2/2-x)/(1-x) ≤ ln (1 - x)"
proof -
define v where "v x = ln(1-x) - (x^2/2-x) / (1-x) " for x :: real
define v' where "v' x = -1/(1-x) - (-(x^2)/2+x-1)/((1-x)^2)" for x :: real
have v_deriv: "(v has_real_derivative (v' x)) (at x)" if "x ∈ {0..<1}" for x
using that unfolding v_def v'_def power2_eq_square
by (auto intro!:derivative_eq_intros simp:algebra_simps)
have v_deriv_nonneg: "v' x ≥ 0" if "x ≥ 0" for x
using that unfolding v'_def by (simp add:divide_simps power2_eq_square)
have v_mono: "v x ≤ v y" if "x ≤ y" "x ≥ 0" "y < 1" for x y
using v_deriv v_deriv_nonneg that unfolding atLeastLessThan_iff
by (intro DERIV_nonneg_imp_nondecreasing[OF that(1)])
(metis (mono_tags, opaque_lifting) Ico_eq_Ico ivl_subset linorder_not_le order_less_irrefl)
have "0 = v 0" unfolding v_def by simp
also have "… ≤ v x" using v_mono assms by auto
finally have "v x ≥ 0" by simp
thus ?thesis unfolding v_def by simp
qed
text ‹Based on Theorem~4.2 by Motwani and Raghavan~\cite{motwani1995}.›
theorem multiplicative_chernoff_bound_lower:
assumes "δ ∈ {0<..<1}"
assumes "⋀i. i ∈ I ⟹ AE ω in M. X i ω ∈ {0..1}"
defines "μ ≡ (∑i ∈ I. expectation (X i))"
shows "𝒫(ω in M. (∑i ∈ I. X i ω) ≤ (1-δ)*μ) ≤ (exp (-δ)/(1-δ) powr (1-δ)) powr μ" (is "?L ≤ ?R")
and "𝒫(ω in M. (∑i ∈ I. X i ω) ≤ (1-δ)*μ) ≤ (exp (-(δ^2)*μ/2))" (is "_ ≤ ?R1")
proof -
define 𝒳 where "𝒳 = (λi. clamp 0 1 ∘ X i)"
have transfer_to_clamped_vars_assms: "(∀i∈I. AE ω in M. X i ω ∈ {0 .. 1} ∧ 0 ≤ (1::real))"
using assms(2) by auto
note ttcv = transfer_to_clamped_vars[OF transfer_to_clamped_vars_assms 𝒳_def]
note [measurable] = neg_assoc_imp_measurable[OF ttcv(1)]
define t where "t = ln (1-δ)"
have t_lt_0: "t < 0" using assms(1) unfolding t_def by simp
let ?h = "(λx. 1 + (exp t - 1) * x)"
note bounded' = integrable_bounded bounded_prod bounded_vec_mult_comp bounded_intros ttcv(5)
have μ_ge_0: "μ ≥ 0" unfolding μ_def using ttcv(2,6) by (intro sum_nonneg) auto
have int: "integrable M (𝒳 i)" if "i ∈ I" for i
using that by (intro bounded') simp_all
note 𝒳_prod_mono = has_int_thatD(2)[OF neg_assoc_imp_prod_mono[OF fin_I ttcv(1), where η="Rev"]]
have 0: "0 ≤ 1 + (exp t - 1) * expectation (𝒳 i)" if "i ∈ I" for i
proof -
have "0 ≤ 1 + (exp t - 1) * 1" by simp
also have "… ≤ 1 + (exp t - 1) * expectation (𝒳 i)"
using t_lt_0 ttcv(6)[OF that] by (intro add_mono mult_left_mono_neg) auto
finally show ?thesis by simp
qed
have "δ ∈ {0..<1}" using assms(1) by simp
from ln_one_minus_x_lower_bound[OF this]
have "δ⇧2 / 2 - δ ≤ (1 - δ) * ln (1 - δ)" using assms(1) by (simp add:field_simps)
hence 1: "- δ - (1 - δ) * ln (1 - δ) ≤ - δ⇧2 / 2" by (simp add:algebra_simps)
have "?L = 𝒫(ω in M. (∑i ∈ I. 𝒳 i ω) ≤ (1-δ) * μ)" using ttcv(3)[where η="Fwd"] by simp
also have "… = 𝒫(ω in M. (∏i ∈ I. exp (t * 𝒳 i ω)) ≥ exp (t * (1-δ) * μ))"
using t_lt_0 by (simp add: sum_distrib_left[symmetric] exp_sum[OF fin_I,symmetric])
also have "… ≤ expectation (λω. (∏i ∈ I. exp (t * 𝒳 i ω))) / exp (t*(1-δ)*μ)"
by (intro integral_Markov_inequality_measure[where A="{}"] bounded' AE_I2 prod_nonneg fin_I)
simp_all
also have "… ≤ (∏i ∈ I. expectation (λω. exp (t*𝒳 i ω))) / exp (t*(1-δ)*μ)"
using t_lt_0 by (intro divide_right_mono 𝒳_prod_mono bounded' image_subsetI monotoneI) simp_all
also have "… = (∏i ∈ I. expectation (λω. exp ((1-𝒳 i ω) *⇩R 0+ 𝒳 i ω *⇩R t))) / exp (t*(1-δ)*μ)"
by (simp add:ac_simps)
also have "… ≤ (∏i ∈ I. expectation (λω. (1-𝒳 i ω) * exp 0 + 𝒳 i ω * exp t)) / exp (t*(1-δ)*μ)"
using ttcv(4)
by (intro divide_right_mono prod_mono integral_mono conjI bounded' convex_onD[OF exp_convex])
simp_all
also have "… = (∏i ∈ I. ?h (expectation (𝒳 i))) / exp (t*(1-δ)*μ)"
using int by (simp add:algebra_simps prob_space cong:prod.cong)
also have "… ≤ (∏i ∈ I. exp((exp t-1)* expectation (𝒳 i))) / exp (t*(1-δ)*μ)"
using 0 by (intro divide_right_mono prod_mono exp_ge_add_one_self conjI) simp_all
also have "… = exp ((exp t-1)* μ) / exp (t*(1-δ)*μ)"
unfolding exp_sum[OF fin_I, symmetric] μ_def by (simp add:ttcv(2) sum_distrib_left)
also have "… = exp ((-δ) * μ) / exp ( ln (1-δ)*(1-δ) * μ)"
using assms(1) unfolding μ_def t_def by (simp add:sum_distrib_left)
also have "… = exp (-δ) powr μ / exp ( ln(1-δ)*(1-δ)) powr μ"
unfolding powr_def by (simp add:ac_simps)
also have "… = ?R" using assms(1) by (subst powr_divide) (simp_all add:powr_def)
finally show "?L ≤ ?R" by simp
also have "… = exp (μ * (- δ - (1 - δ) * ln (1 - δ)))"
using assms(1) unfolding powr_def by (simp add:ln_div)
also have "… ≤ exp (μ * (-(δ^2) / 2 ))"
by (intro iffD2[OF exp_le_cancel_iff] mult_left_mono μ_ge_0 1)
finally show "?L ≤ ?R1" by (simp add:ac_simps)
qed
theorem multiplicative_chernoff_bound_two_sided:
assumes "δ ∈ {0<..<1}"
assumes "⋀i. i ∈ I ⟹ AE ω in M. X i ω ∈ {0..1}"
defines "μ ≡ (∑i ∈ I. expectation (X i))"
shows "𝒫(ω in M. ¦(∑i ∈ I. X i ω) - μ¦ ≥ δ*μ) ≤ 2*(exp (-(δ^2)*μ/3))" (is "?L ≤ ?R")
proof -
define 𝒳 where "𝒳 = (λi. clamp 0 1 ∘ X i)"
have transfer_to_clamped_vars_assms: "(∀i∈I. AE ω in M. X i ω ∈ {0 .. 1} ∧ 0 ≤ (1::real))"
using assms(2) by auto
note ttcv = transfer_to_clamped_vars[OF transfer_to_clamped_vars_assms 𝒳_def]
have μ_ge_0: "μ ≥ 0" unfolding μ_def using ttcv(2,6) by (intro sum_nonneg) auto
note [measurable] = neg_assoc_imp_measurable[OF na_X]
have "?L = 𝒫(ω in M. (∑i∈I. X i ω) ≥ (1+δ)*μ ∨ (∑i∈I. X i ω) ≤ (1-δ)*μ)" unfolding abs_real_def
by (intro arg_cong[where f="prob"] Collect_cong) (auto simp:algebra_simps)
also have "… =measure M({ω∈space M.(∑i∈I. X i ω)≥(1+δ)*μ}∪{ω∈space M. (∑i∈I. X i ω)≤(1-δ)*μ})"
by (intro arg_cong[where f="prob"]) auto
also have "… ≤ 𝒫(ω in M. (∑i∈I. X i ω) ≥ (1+δ)*μ) + 𝒫(ω in M.(∑i∈I. X i ω) ≤ (1-δ)*μ )"
by (intro measure_Un_le) measurable
also have "… ≤ exp (-(δ^2)*μ/(2+δ)) + exp (-(δ^2)*μ/2)"
unfolding μ_def using assms(1,2)
by (intro multiplicative_chernoff_bound_lower multiplicative_chernoff_bound_upper add_mono) auto
also have "… ≤ exp (-(δ^2)*μ/3) + exp (-(δ^2)*μ/3)"
using assms(1) μ_ge_0 by (intro iffD2[OF exp_le_cancel_iff] add_mono divide_left_mono_neg) auto
also have "… = ?R" by simp
finally show ?thesis by simp
qed
lemma additive_chernoff_bound_upper_aux:
assumes "⋀i. i∈I ⟹ AE ω in M. X i ω ∈ {0..1}" "I ≠ {}"
defines "μ ≡ (∑i∈I. expectation (X i)) / real (card I)"
assumes "δ ∈ {0<..<1-μ}" "μ ∈ {0<..<1}"
shows "𝒫(ω in M. (∑i∈I. X i ω) ≥ (μ+δ)*real (card I)) ≤ exp (-real (card I) * KL_div (μ+δ) μ)"
(is "?L ≤ ?R")
proof -
define 𝒳 where "𝒳 = (λi. clamp 0 1 ∘ X i)"
have transfer_to_clamped_vars_assms: "(∀i∈I. AE ω in M. X i ω ∈ {0..1} ∧ 0 ≤ (1::real))"
using assms(1) by auto
note ttcv = transfer_to_clamped_vars[OF transfer_to_clamped_vars_assms 𝒳_def]
note [measurable] = neg_assoc_imp_measurable[OF ttcv(1)]
define t :: real where "t = ln ((μ+δ)/μ) - ln ((1-μ-δ)/(1-μ))"
let ?h = "λx. 1 + (exp t - 1) * x"
let ?n = "real (card I)"
have n_gt_0: "?n > 0" using assms(2) fin_I by auto
have a: "(1 - μ - δ) > 0" "μ > 0" "1 - μ > 0" "μ + δ > 0"
using assms(4,5) by auto
have "ln ((1 - μ - δ) / (1 - μ)) < 0" using a assms(4) by (intro ln_less_zero) auto
moreover have "ln ((μ + δ) / μ) > 0" using a assms(4) by (intro ln_gt_zero) auto
ultimately have t_gt_0: "t > 0" unfolding t_def by simp
note bounded' = integrable_bounded bounded_prod bounded_vec_mult_comp bounded_intros ttcv(5)
note 𝒳_prod_mono = has_int_thatD(2)[OF neg_assoc_imp_prod_mono[OF fin_I ttcv(1), where η="Fwd"]]
have int: "integrable M (𝒳 i)" if "i ∈ I" for i
using that by (intro bounded') simp_all
have 0: "0 ≤ 1 + (exp t - 1) * expectation (𝒳 i)" if "i ∈ I" for i
using t_gt_0 ttcv(6)[OF that] by (intro add_nonneg_nonneg mult_nonneg_nonneg) auto
have "1 + (exp t - 1) * μ = 1 + ((μ + δ) * (1 - μ) / (μ * (1 - μ - δ)) - 1) * μ"
using a unfolding t_def exp_diff by simp
also have "… = 1 + (δ / (μ * (1 - μ - δ))) * μ"
using a by (subst divide_diff_eq_iff) (simp, simp add:algebra_simps)
also have "… = (1-μ-δ)/(1-μ-δ) + (δ / (1-μ-δ))" using a by simp
also have "… = (1-μ) / (1-μ-δ)"
unfolding add_divide_distrib[symmetric] by (simp add:algebra_simps)
also have "… = inverse ((1-μ-δ) / (1-μ))" using a by simp
also have "… = exp (ln (inverse ((1-μ-δ) / (1-μ))))" using a by simp
also have "… = exp (- ln((1-μ-δ) / (1-μ)))" using a by (subst ln_inverse) (simp_all)
finally have 1: "1 + (exp t - 1) * μ = exp (- ln((1-μ-δ) / (1-μ)))" by simp
have "?L = 𝒫(ω in M. (∑i ∈ I. 𝒳 i ω) ≥ (μ+δ) * ?n)" using ttcv(3)[where η="Rev"] by simp
also have "… = 𝒫(ω in M. (∏i ∈ I. exp (t * 𝒳 i ω)) ≥ exp (t * (μ+δ) * ?n))"
using t_gt_0 by (simp add: sum_distrib_left[symmetric] exp_sum[OF fin_I,symmetric])
also have "… ≤ expectation (λω. (∏i ∈ I. exp (t * 𝒳 i ω))) / exp (t * (μ+δ) * ?n)"
by (intro integral_Markov_inequality_measure[where A="{}"] bounded' AE_I2 prod_nonneg fin_I)
simp_all
also have "… ≤ (∏i ∈ I. expectation (λω. exp (t*𝒳 i ω))) / exp (t * (μ+δ) * ?n)"
using t_gt_0 by (intro divide_right_mono 𝒳_prod_mono bounded' image_subsetI monotoneI) simp_all
also have "… = (∏i ∈ I. expectation (λω. exp ((1-𝒳 i ω) *⇩R 0 + 𝒳 i ω *⇩R t))) / exp (t*(μ+δ)*?n)"
by (simp add:ac_simps)
also have "… ≤ (∏i∈I. expectation (λω. (1-𝒳 i ω)*exp 0 + 𝒳 i ω * exp t)) / exp (t * (μ+δ) * ?n)"
using ttcv(4)
by (intro divide_right_mono prod_mono integral_mono conjI bounded' convex_onD[OF exp_convex])
simp_all
also have "… = (∏i ∈ I. ?h (expectation (𝒳 i))) / exp (t * (μ+δ) * ?n)"
using int by (simp add:algebra_simps prob_space cong:prod.cong)
also have "… = (root (card I) (∏i∈I. 1+(exp t-1)*expectation (𝒳 i)))^(card I) / exp (t*(μ+δ)*?n)"
using n_gt_0
by (intro arg_cong2[where f="(/)"] real_root_pow_pos2[symmetric] prod_nonneg refl 0) auto
also have "… ≤ ((∑i ∈ I. 1 + (exp t - 1) * expectation (𝒳 i)) / ?n)^ (card I) / exp (t*(μ+δ)*?n)"
by (intro divide_right_mono power_mono arithmetic_geometric_mean[OF fin_I] real_root_ge_zero
prod_nonneg 0) simp_all
also have "… ≤ ((∑i ∈ I. 1 + (exp t - 1) * expectation (𝒳 i)) / ?n) powr ?n / exp (t*(μ+δ)*?n)"
using n_gt_0 0 by (subst powr_realpow') (auto intro!:sum_nonneg divide_nonneg_pos 0)
also have "… ≤ ((∑i ∈ I. 1 + (exp t - 1) * expectation (X i)) / ?n) powr ?n / exp (t*(μ+δ)*?n)"
using ttcv(2) by (simp cong:sum.cong)
also have "… = (1 + (exp t - 1) * μ) powr ?n / exp (t*(μ+δ)*?n)"
using n_gt_0 unfolding μ_def sum.distrib sum_distrib_left[symmetric] by (simp add:divide_simps)
also have "… = (1 + (exp t - 1) * μ) powr ?n / exp (t*(μ+δ)) powr ?n"
unfolding powr_def by simp
also have "… = ((1 + (exp t - 1) * μ)/exp(t*(μ+δ))) powr ?n"
using a t_gt_0 by (auto intro: powr_divide[symmetric] add_nonneg_nonneg mult_nonneg_nonneg)
also have "… = (exp (- ln((1-μ-δ) / (1-μ))) * exp( -(t * (μ+δ)))) powr ?n"
unfolding 1 exp_minus inverse_eq_divide by simp
also have "… = exp ( -ln((1 - μ-δ)/(1 - μ))- t * (μ+δ)) powr ?n"
unfolding exp_add[symmetric] by simp
also have "… = exp ( -ln((1 - μ-δ)/(1 - μ))- (ln ((μ+δ)/μ) - ln ((1-μ-δ)/(1-μ)))*(μ+δ)) powr ?n"
using a unfolding t_def by (simp add:divide_simps)
also have "… = exp( -KL_div (μ+δ) μ) powr ?n"
using a by (subst KL_div_eq) (simp_all add:field_simps)
also have "… = ?R" unfolding powr_def by simp
finally show ?thesis by simp
qed
lemma additive_chernoff_bound_upper_aux_2:
assumes "⋀i. i∈I ⟹ AE ω in M. X i ω ∈ {0..1}" "I ≠ {}"
defines "μ ≡ (∑i∈I. expectation (X i)) / real (card I)"
assumes "μ ∈ {0<..<1}"
shows "𝒫(ω in M. (∑i∈I. X i ω) ≥ real (card I)) ≤ exp (-real (card I) * KL_div 1 μ)"
(is "?L ≤ ?R")
proof -
define 𝒳 where "𝒳 = (λi. clamp 0 1 ∘ X i)"
have transfer_to_clamped_vars_assms: "(∀i∈I. AE ω in M. X i ω ∈ {0..1} ∧ 0 ≤ (1::real))"
using assms(1) by auto
note ttcv = transfer_to_clamped_vars[OF transfer_to_clamped_vars_assms 𝒳_def]
note [measurable] = neg_assoc_imp_measurable[OF ttcv(1)]
let ?n = "real (card I)"
have n_gt_0: "?n > 0" using assms(2) fin_I by auto
note bounded' = integrable_bounded bounded_prod bounded_vec_mult_comp bounded_intros ttcv(5)
bounded_max
note 𝒳_prod_mono = has_int_thatD(2)[OF neg_assoc_imp_prod_mono[OF fin_I ttcv(1), where η="Fwd"]]
have a2:"(∏i ∈ I. max 0 (𝒳 i ω)) ≥ 1" if "(∑i ∈ I. 𝒳 i ω) ≥ ?n" for ω
proof -
have "(∑i ∈ I. 1 - 𝒳 i ω) ≤ 0" using that by (simp add:sum_subtractf)
moreover have "(∑i ∈ I. 1 - 𝒳 i ω) ≥ 0" using ttcv(4) by (intro sum_nonneg) simp
ultimately have "(∑i ∈ I. 1 - 𝒳 i ω) = 0" by simp
with iffD1[OF sum_nonneg_eq_0_iff[OF fin_I] this]
have "∀i ∈ I. 1 - 𝒳 i ω = 0" using ttcv(4) by simp
hence "𝒳 i ω = 1" if "i ∈ I" for i using that by auto
thus ?thesis by (intro prod_ge_1) fastforce
qed
have "?L = 𝒫(ω in M. (∑i ∈ I. 𝒳 i ω) ≥ ?n)" using ttcv(3)[where η="Rev"] by simp
also have "… ≤ 𝒫(ω in M. (∏i ∈ I. max 0 (𝒳 i ω)) ≥ 1)"
using a2 by (intro finite_measure_mono) auto
also have "… ≤ expectation (λω. (∏i ∈ I. max 0 (𝒳 i ω))) / 1"
by (intro integral_Markov_inequality_measure[where A="{}"] bounded' AE_I2 prod_nonneg fin_I)
auto
also have "… ≤ (∏i ∈ I. expectation (λω. max 0 (𝒳 i ω))) / 1"
by (intro divide_right_mono 𝒳_prod_mono bounded' image_subsetI monotoneI) simp_all
also have "… ≤ (∏i ∈ I. expectation (𝒳 i))" using ttcv(4) by simp
also have "… = (root (card I) (∏i∈I. expectation (𝒳 i)))^(card I) "
using n_gt_0 ttcv(6) by (intro real_root_pow_pos2[symmetric] prod_nonneg refl) auto
also have "… ≤ ((∑i ∈ I. expectation (𝒳 i)) / ?n)^ (card I)"
using ttcv(6) by (intro power_mono arithmetic_geometric_mean[OF fin_I] real_root_ge_zero
prod_nonneg) auto
also have "… ≤ ((∑i ∈ I. expectation (𝒳 i)) / ?n) powr ?n"
using n_gt_0 ttcv(6) by (subst powr_realpow') (auto intro!:sum_nonneg divide_nonneg_pos)
also have "… ≤ μ powr ?n" using ttcv(2) unfolding μ_def by simp
also have "… = ?R" using assms(4) unfolding powr_def by (subst KL_div_eq) (auto simp:ln_div)
finally show ?thesis by simp
qed
text ‹Based on Theorem~1 by Hoeffding~\cite{hoeffding1963}.›
lemma additive_chernoff_bound_upper:
assumes "⋀i. i∈I ⟹ AE ω in M. X i ω ∈ {0..1}" "I ≠ {}"
defines "μ ≡ (∑i∈I. expectation (X i)) / real (card I)"
assumes "δ ∈ {0..1-μ}" "μ ∈ {0<..<1}"
shows "𝒫(ω in M. (∑i∈I. X i ω) ≥ (μ+δ)*real (card I)) ≤ exp (-real (card I) * KL_div (μ+δ) μ)"
(is "?L ≤ ?R")
proof -
note [measurable] = neg_assoc_imp_measurable[OF na_X]
let ?n = "real (card I)"
have n_gt_0: "?n > 0" using assms fin_I by auto
note X_prod_mono = has_int_thatD(2)[OF neg_assoc_imp_prod_mono[OF fin_I na_X, where η="Fwd"]]
have b:"AE x in M. (∀i ∈ I. X i x ∈ {0..1})"
using assms(1) by (intro AE_finite_allI[OF fin_I]) simp
hence c:"AE x in M. (∑i∈I. 1 - X i x) ≥ 0"
by (intro AE_mp[OF b AE_I2] impI sum_nonneg) auto
consider (i) "δ=0" | (ii) "δ ∈ {0<..<1-μ}" | (iii) "1-μ=δ" using assms(4) by fastforce
thus ?thesis
proof (cases)
case i
hence "KL_div (μ+δ) μ = 0" using assms(4,5) by (subst KL_div_eq) auto
thus ?thesis by simp
next
case ii
thus ?thesis unfolding μ_def using assms by (intro additive_chernoff_bound_upper_aux) auto
next
case iii
hence a:"μ+δ=1" by simp
thus ?thesis unfolding a mult_1 unfolding μ_def using assms
by (intro additive_chernoff_bound_upper_aux_2) auto
qed
qed
text ‹Based on Theorem~2 by Hoeffding~\cite{hoeffding1963}.›
lemma hoeffding_bound_upper:
assumes "⋀i. i∈I ⟹ a i ≤ b i"
assumes "⋀i. i∈I ⟹ AE ω in M. X i ω ∈ {a i..b i}"
defines "n ≡ real (card I)"
defines "μ ≡ (∑i∈I. expectation (X i))"
assumes "δ ≥ 0" "(∑i∈I. (b i - a i)^2) > 0"
shows "𝒫(ω in M. (∑i∈I. X i ω) ≥ μ + δ * n) ≤ exp (-2*(n*δ)^2 / (∑i∈I. (b i - a i)^2))"
(is "?L ≤ ?R")
proof (cases "δ=0")
case True thus ?thesis by simp
next
case False
define 𝒳 where "𝒳 = (λi. clamp (a i) (b i) ∘ X i)"
have transfer_to_clamped_vars_assms: "(∀i∈I. AE ω in M. X i ω ∈ {a i.. b i} ∧ a i ≤ b i)"
using assms(1,2) by auto
note ttcv = transfer_to_clamped_vars[OF transfer_to_clamped_vars_assms 𝒳_def]
note [measurable] = neg_assoc_imp_measurable[OF ttcv(1)]
define s where "s = (∑i∈I. (b i - a i)^2)"
have s_gt_0: "s > 0" using assms unfolding s_def by auto
have I_ne: "I ≠ {}" using assms(6) by auto
have n_gt_0: "n > 0" using I_ne fin_I unfolding n_def by auto
define t where "t = 4 * δ * n / s"
have t_gt_0: "t > 0" unfolding t_def using False n_gt_0 s_gt_0 assms by auto
note bounded' = integrable_bounded bounded_prod bounded_vec_mult_comp bounded_intros ttcv(5)
note 𝒳_prod_mono = has_int_thatD(2)[OF neg_assoc_imp_prod_mono[OF fin_I ttcv(1), where η="Fwd"]]
have int: "integrable M (𝒳 i)" if "i ∈ I" for i
using that by (intro bounded') simp_all
define ν where "ν i = expectation (X i)" for i
have 1: "expectation (λx. 𝒳 i x - ν i) = 0" if "i ∈ I" for i
unfolding ν_def using int[OF that] ttcv(2)[OF that] by (simp add:prob_space)
have "?L = 𝒫(ω in M. (∑i ∈ I. 𝒳 i ω) ≥ μ+δ * n)" using ttcv(3)[where η="Rev"] by simp
also have "… = 𝒫(ω in M. (∑i ∈ I. 𝒳 i ω - ν i) ≥ δ * n)"
using n_gt_0 unfolding μ_def ν_def by (simp add:algebra_simps sum_subtractf)
also have "… = 𝒫(ω in M. (∏i ∈ I. exp (t * (𝒳 i ω - ν i))) ≥ exp (t * δ * n))"
using t_gt_0 by (simp add: sum_distrib_left[symmetric] exp_sum[OF fin_I,symmetric])
also have "… ≤ expectation (λω. (∏i ∈ I. exp (t * (𝒳 i ω - ν i)))) / exp (t * δ * n)"
by (intro integral_Markov_inequality_measure[where A="{}"] bounded' AE_I2 prod_nonneg fin_I)
simp_all
also have "… ≤ (∏i ∈ I. expectation (λω. exp (t*(𝒳 i ω - ν i)))) / exp (t * δ * n)"
using t_gt_0 by (intro divide_right_mono 𝒳_prod_mono bounded' image_subsetI monotoneI) simp_all
also have "… ≤ (∏i ∈ I. exp (t^2 * ((b i - ν i) - (a i-ν i))⇧2 / 8)) / exp (t*δ*n)"
using ttcv(4) 1
by (intro divide_right_mono prod_mono conjI Hoeffdings_lemma_bochner t_gt_0 AE_I2) simp_all
also have "… = (∏i ∈ I. exp (t^2 * (b i - a i)⇧2 / 8)) / exp (t*δ*n)" by simp
also have "… = exp( (t^2/8)* (∑i ∈ I. (b i - a i)^2)) / exp (t*δ*n)"
unfolding exp_sum[OF fin_I, symmetric] by (simp add:algebra_simps sum_distrib_left)
also have "… = exp( (t^2/8)* s- t*δ*n)"
unfolding exp_diff s_def by simp
also have "… = exp(-2 *(n*δ)^2 / s)"
using s_gt_0 unfolding t_def by (simp add:divide_simps power2_eq_square)
also have "… = ?R" unfolding s_def by simp
finally show ?thesis by simp
qed
end
text ‹Dual and two-sided versions of Theorem 1 and 2 by Hoeffding~\cite{hoeffding1963}.›
lemma additive_chernoff_bound_lower:
assumes "neg_assoc X I" "finite I"
assumes "⋀i. i∈I ⟹ AE ω in M. X i ω ∈ {0..1}" "I ≠ {}"
defines "μ ≡ (∑i∈I. expectation (X i)) / real (card I)"
assumes "δ ∈ {0..μ}" "μ ∈ {0<..<1}"
shows "𝒫(ω in M. (∑i∈I. X i ω) ≤ (μ-δ)*real (card I)) ≤ exp (-real (card I) * KL_div (μ-δ) μ)"
(is "?L ≤ ?R")
proof -
note [measurable] = neg_assoc_imp_measurable[OF assms(1)]
have int[simp]: "integrable M (X i)" if "i ∈ I" for i
using that by (intro integrable_const_bound[where B="1"] AE_mp[OF assms(3)[OF that] AE_I2]) auto
have n_gt_0: "real (card I) > 0" using assms by auto
hence 0: "(1-μ) = (∑i∈I. expectation (λω. 1 - X i ω)) / real (card I)"
unfolding μ_def by (simp add:prob_space sum_subtractf divide_simps)
have 1: " neg_assoc (λi ω. 1 - X i ω) I"
by (intro neg_assoc_compose_simple[OF assms(2,1), where η="Rev"]) (auto intro:antimonoI)
have 2: "δ ≤ (1 - (1 - μ))" "δ ≥ 0" using assms by auto
have 3: "1-μ ∈ {0<..<1}" using assms by auto
have "?L = 𝒫(ω in M. (∑i∈I. 1 - X i ω) ≥ ((1-μ)+δ)*real (card I))"
by (simp add:sum_subtractf algebra_simps)
also have "… ≤ exp (-real (card I) * KL_div ((1-μ)+δ) (1-μ))"
using assms(3) 1 2 3 unfolding 0 by (intro additive_chernoff_bound_upper assms(2,4)) auto
also have "… = exp (-real (card I) * KL_div (1-(μ-δ)) (1-μ))" by (simp add:algebra_simps)
also have "… = ?R" using assms(6,7) by (subst KL_div_swap) (simp_all add:algebra_simps)
finally show ?thesis by simp
qed
lemma hoeffding_bound_lower:
assumes "neg_assoc X I" "finite I"
assumes "⋀i. i∈I ⟹ a i ≤ b i"
assumes "⋀i. i∈I ⟹ AE ω in M. X i ω ∈ {a i..b i}"
defines "n ≡ real (card I)"
defines "μ ≡ (∑i∈I. expectation (X i))"
assumes "δ ≥ 0" "(∑i∈I. (b i - a i)^2) > 0"
shows "𝒫(ω in M. (∑i∈I. X i ω) ≤ μ-δ*n) ≤ exp (-2*(n*δ)^2 / (∑i∈I. (b i - a i)^2))"
(is "?L ≤ ?R")
proof -
have 0: "-μ = (∑i∈I. expectation (λω. - X i ω))" unfolding μ_def by (simp add:sum_negf)
have 1: "neg_assoc (λi ω. - X i ω) I"
by (intro neg_assoc_compose_simple[OF assms(2,1), where η="Rev"]) (auto intro:antimonoI)
have "?L = 𝒫(ω in M. (∑i∈I. -X i ω) ≥ (-μ)+δ*n)" by (simp add:algebra_simps sum_negf)
also have "… ≤ exp (-2*(n*δ)^2 / (∑i∈I. ((-a i) - (-b i))^2))"
using assms(3,4,8) unfolding 0 n_def by (intro hoeffding_bound_upper[OF 1] assms(2,4,7)) auto
also have "…= ?R" by simp
finally show ?thesis by simp
qed
lemma hoeffding_bound_two_sided:
assumes "neg_assoc X I" "finite I"
assumes "⋀i. i∈I ⟹ a i ≤ b i"
assumes "⋀i. i∈I ⟹ AE ω in M. X i ω ∈ {a i..b i}" "I ≠ {}"
defines "n ≡ real (card I)"
defines "μ ≡ (∑i∈I. expectation (X i))"
assumes "δ ≥ 0" "(∑i∈I. (b i - a i)^2) > 0"
shows "𝒫(ω in M. ¦(∑i∈I. X i ω)-μ¦ ≥ δ*n) ≤ 2*exp (-2*(n*δ)^2 / (∑i∈I. (b i - a i)^2))"
(is "?L ≤ ?R")
proof -
note [measurable] = neg_assoc_imp_measurable[OF assms(1)]
have "?L = 𝒫(ω in M. (∑i∈I. X i ω) ≥ μ+δ*n ∨ (∑i∈I. X i ω) ≤ μ-δ*n )"
unfolding abs_real_def by (intro arg_cong[where f="prob"] Collect_cong) auto
also have "… = measure M ({ω∈space M. (∑i∈I. X i ω)≥μ+δ*n}∪{ω∈space M. (∑i∈I. X i ω)≤μ-δ*n})"
by (intro arg_cong[where f="prob"]) auto
also have "… ≤ 𝒫(ω in M. (∑i∈I. X i ω) ≥ μ+δ*n) + 𝒫(ω in M.(∑i∈I. X i ω) ≤ μ-δ*n )"
by (intro measure_Un_le) measurable
also have "… ≤ exp (-2*(n*δ)^2 / (∑i∈I. (b i-a i)^2)) + exp (-2*(n*δ)^2 / (∑i∈I. (b i-a i)^2))"
unfolding n_def μ_def by (intro hoeffding_bound_lower hoeffding_bound_upper add_mono assms)
also have "… = ?R" by simp
finally show ?thesis by simp
qed
end
end