Theory Frequency_Moment_k
section ‹Frequency Moment $k$›
theory Frequency_Moment_k
imports
Frequency_Moments
Landau_Ext
Lp.Lp
Median_Method.Median
Probability_Ext
Universal_Hash_Families.Universal_Hash_Families_More_Product_PMF
begin
text ‹This section contains a formalization of the algorithm for the $k$-th frequency moment.
It is based on the algorithm described in \<^cite>‹‹\textsection 2.1› in "alon1999"›.›
type_synonym fk_state = "nat × nat × nat × nat × (nat × nat ⇒ (nat × nat))"
fun fk_init :: "nat ⇒ rat ⇒ rat ⇒ nat ⇒ fk_state pmf" where
"fk_init k δ ε n =
do {
let s⇩1 = nat ⌈3 * real k * n powr (1-1/real k) / (real_of_rat δ)⇧2⌉;
let s⇩2 = nat ⌈-18 * ln (real_of_rat ε)⌉;
return_pmf (s⇩1, s⇩2, k, 0, (λ_ ∈ {0..<s⇩1} × {0..<s⇩2}. (0,0)))
}"
fun fk_update :: "nat ⇒ fk_state ⇒ fk_state pmf" where
"fk_update a (s⇩1, s⇩2, k, m, r) =
do {
coins ← prod_pmf ({0..<s⇩1} × {0..<s⇩2}) (λ_. bernoulli_pmf (1/(real m+1)));
return_pmf (s⇩1, s⇩2, k, m+1, λi ∈ {0..<s⇩1} × {0..<s⇩2}.
if coins i then
(a,0)
else (
let (x,l) = r i in (x, l + of_bool (x=a))
)
)
}"
fun fk_result :: "fk_state ⇒ rat pmf" where
"fk_result (s⇩1, s⇩2, k, m, r) =
return_pmf (median s⇩2 (λi⇩2 ∈ {0..<s⇩2}.
(∑i⇩1∈{0..<s⇩1}. rat_of_nat (let t = snd (r (i⇩1, i⇩2)) + 1 in m * (t^k - (t - 1)^k))) / (rat_of_nat s⇩1))
)"
lemma bernoulli_pmf_1: "bernoulli_pmf 1 = return_pmf True"
by (rule pmf_eqI, simp add:indicator_def)
fun fk_space_usage :: "(nat × nat × nat × rat × rat) ⇒ real" where
"fk_space_usage (k, n, m, ε, δ) = (
let s⇩1 = nat ⌈3*real k* (real n) powr (1-1/ real k) / (real_of_rat δ)⇧2 ⌉ in
let s⇩2 = nat ⌈-(18 * ln (real_of_rat ε))⌉ in
4 +
2 * log 2 (s⇩1 + 1) +
2 * log 2 (s⇩2 + 1) +
2 * log 2 (real k + 1) +
2 * log 2 (real m + 1) +
s⇩1 * s⇩2 * (2 + 2 * log 2 (real n+1) + 2 * log 2 (real m+1)))"
definition encode_fk_state :: "fk_state ⇒ bool list option" where
"encode_fk_state =
N⇩e ⋈⇩e (λs⇩1.
N⇩e ⋈⇩e (λs⇩2.
N⇩e ×⇩e
N⇩e ×⇩e
(List.product [0..<s⇩1] [0..<s⇩2] →⇩e (N⇩e ×⇩e N⇩e))))"
lemma "inj_on encode_fk_state (dom encode_fk_state)"
proof -
have "is_encoding encode_fk_state"
by (simp add:encode_fk_state_def)
(intro dependent_encoding exp_golomb_encoding fun_encoding)
thus ?thesis by (rule encoding_imp_inj)
qed
text ‹This is an intermediate non-parallel form @{term "fk_update"} used only in the correctness proof.›
fun fk_update_2 :: "'a ⇒ (nat × 'a × nat) ⇒ (nat × 'a × nat) pmf" where
"fk_update_2 a (m,x,l) =
do {
coin ← bernoulli_pmf (1/(real m+1));
return_pmf (m+1,if coin then (a,0) else (x, l + of_bool (x=a)))
}"
definition sketch where "sketch as i = (as ! i, count_list (drop (i+1) as) (as ! i))"
lemma fk_update_2_distr:
assumes "as ≠ []"
shows "fold (λx s. s ⤜ fk_update_2 x) as (return_pmf (0,0,0)) =
pmf_of_set {..<length as} ⤜ (λk. return_pmf (length as, sketch as k))"
using assms
proof (induction as rule:rev_nonempty_induct)
case (single x)
show ?case using single
by (simp add:bind_return_pmf pmf_of_set_singleton bernoulli_pmf_1 lessThan_def sketch_def)
next
case (snoc x xs)
let ?h = "(λxs k. count_list (drop (Suc k) xs) (xs ! k))"
let ?q = "(λxs k. (length xs, sketch xs k))"
have non_empty: " {..<Suc (length xs)} ≠ {}" " {..<length xs} ≠ {}" using snoc by auto
have fk_update_2_eta:"fk_update_2 x = (λa. fk_update_2 x (fst a, fst (snd a), snd (snd a)))"
by auto
have "pmf_of_set {..<length xs} ⤜ (λk. bernoulli_pmf (1 / (real (length xs) + 1)) ⤜
(λcoin. return_pmf (if coin then length xs else k))) =
bernoulli_pmf (1 / (real (length xs) + 1)) ⤜ (λy. pmf_of_set {..<length xs} ⤜
(λk. return_pmf (if y then length xs else k)))"
by (subst bind_commute_pmf, simp)
also have "... = pmf_of_set {..<length xs + 1}"
using snoc(1) non_empty
by (intro pmf_eqI, simp add: pmf_bind measure_pmf_of_set)
(simp add:indicator_def algebra_simps frac_eq_eq)
finally have b: "pmf_of_set {..<length xs} ⤜ (λk. bernoulli_pmf (1 / (real (length xs) + 1)) ⤜
(λcoin. return_pmf (if coin then length xs else k))) = pmf_of_set {..<length xs +1}" by simp
have "fold (λx s. (s ⤜ fk_update_2 x)) (xs@[x]) (return_pmf (0,0,0)) =
(pmf_of_set {..<length xs} ⤜ (λk. return_pmf (length xs, sketch xs k))) ⤜ fk_update_2 x"
using snoc by (simp add:case_prod_beta')
also have "... = (pmf_of_set {..<length xs} ⤜ (λk. return_pmf (length xs, sketch xs k))) ⤜
(λ(m,a,l). bernoulli_pmf (1 / (real m + 1)) ⤜ (λcoin.
return_pmf (m + 1, if coin then (x, 0) else (a, (l + of_bool (a = x))))))"
by (subst fk_update_2_eta, subst fk_update_2.simps, simp add:case_prod_beta')
also have "... = pmf_of_set {..<length xs} ⤜ (λk. bernoulli_pmf (1 / (real (length xs) + 1)) ⤜
(λcoin. return_pmf (length xs + 1, if coin then (x, 0) else (xs ! k, ?h xs k + of_bool (xs ! k = x)))))"
by (subst bind_assoc_pmf, simp add: bind_return_pmf sketch_def)
also have "... = pmf_of_set {..<length xs} ⤜ (λk. bernoulli_pmf (1 / (real (length xs) + 1)) ⤜
(λcoin. return_pmf (if coin then length xs else k) ⤜ (λk'. return_pmf (?q (xs@[x]) k'))))"
using non_empty
by (intro bind_pmf_cong, auto simp add:bind_return_pmf nth_append count_list_append sketch_def)
also have "... = pmf_of_set {..<length xs} ⤜ (λk. bernoulli_pmf (1 / (real (length xs) + 1)) ⤜
(λcoin. return_pmf (if coin then length xs else k))) ⤜ (λk'. return_pmf (?q (xs@[x]) k'))"
by (subst bind_assoc_pmf, subst bind_assoc_pmf, simp)
also have "... = pmf_of_set {..<length (xs@[x])} ⤜ (λk'. return_pmf (?q (xs@[x]) k'))"
by (subst b, simp)
finally show ?case by simp
qed
context
fixes ε δ :: rat
fixes n k :: nat
fixes as
assumes k_ge_1: "k ≥ 1"
assumes ε_range: "ε ∈ {0<..<1}"
assumes δ_range: "δ > 0"
assumes as_range: "set as ⊆ {..<n}"
begin
definition s⇩1 where "s⇩1 = nat ⌈3 * real k * (real n) powr (1-1/real k) / (real_of_rat δ)⇧2⌉"
definition s⇩2 where "s⇩2 = nat ⌈-(18 * ln (real_of_rat ε))⌉"
definition "M⇩1 = {(u, v). v < count_list as u}"
definition "Ω⇩1 = measure_pmf (pmf_of_set M⇩1)"
definition "M⇩2 = prod_pmf ({0..<s⇩1} × {0..<s⇩2}) (λ_. pmf_of_set M⇩1)"
definition "Ω⇩2 = measure_pmf M⇩2"
interpretation prob_space "Ω⇩1"
unfolding Ω⇩1_def by (simp add:prob_space_measure_pmf)
interpretation Ω⇩2:prob_space "Ω⇩2"
unfolding Ω⇩2_def by (simp add:prob_space_measure_pmf)
lemma split_space: "(∑a∈M⇩1. f (snd a)) = (∑u ∈ set as. (∑v ∈{0..<count_list as u}. f v))"
proof -
define A where "A = (λu. {u} × {v. v < count_list as u})"
have a: "inj_on snd (A x)" for x
by (simp add:A_def inj_on_def)
have "⋀u v. u < count_list as v ⟹ v ∈ set as"
by (subst count_list_gr_1, force)
hence "M⇩1 = ⋃ (A ` set as)"
by (auto simp add:set_eq_iff A_def M⇩1_def)
hence "(∑a∈M⇩1. f (snd a)) = sum (f ∘ snd) (⋃ (A ` set as))"
by (intro sum.cong, auto)
also have "... = sum (λx. sum (f ∘ snd) (A x)) (set as)"
by (rule sum.UNION_disjoint, simp, simp add:A_def, simp add:A_def, blast)
also have "... = sum (λx. sum f (snd ` A x)) (set as)"
by (intro sum.cong, auto simp add:sum.reindex[OF a])
also have "... = (∑u ∈ set as. (∑v ∈{0..<count_list as u}. f v))"
unfolding A_def by (intro sum.cong, auto)
finally show ?thesis by blast
qed
lemma
assumes "as ≠ []"
shows fin_space: "finite M⇩1"
and non_empty_space: "M⇩1 ≠ {}"
and card_space: "card M⇩1 = length as"
proof -
have "M⇩1 ⊆ set as × {k. k < length as}"
proof (rule subsetI)
fix x
assume a:"x ∈ M⇩1"
have "fst x ∈ set as"
using a by (simp add:case_prod_beta count_list_gr_1 M⇩1_def)
moreover have "snd x < length as"
using a count_le_length order_less_le_trans
by (simp add:case_prod_beta M⇩1_def) fast
ultimately show "x ∈ set as × {k. k < length as}"
by (simp add:mem_Times_iff)
qed
thus fin_space: "finite M⇩1"
using finite_subset by blast
have "(as ! 0, 0) ∈ M⇩1"
using assms(1) unfolding M⇩1_def
by (simp, metis count_list_gr_1 gr0I length_greater_0_conv not_one_le_zero nth_mem)
thus "M⇩1 ≠ {}" by blast
show "card M⇩1 = length as"
using fin_space split_space[where f="λ_. (1::nat)"]
by (simp add:sum_count_set[where X="set as" and xs="as", simplified])
qed
lemma
assumes "as ≠ []"
shows integrable_1: "integrable Ω⇩1 (f :: _ ⇒ real)" and
integrable_2: "integrable Ω⇩2 (g :: _ ⇒ real)"
proof -
have fin_omega: "finite (set_pmf (pmf_of_set M⇩1))"
using fin_space[OF assms] non_empty_space[OF assms] by auto
thus "integrable Ω⇩1 f"
unfolding Ω⇩1_def
by (rule integrable_measure_pmf_finite)
have "finite (set_pmf M⇩2)"
unfolding M⇩2_def using fin_omega
by (subst set_prod_pmf) (auto intro:finite_PiE)
thus "integrable Ω⇩2 g"
unfolding Ω⇩2_def by (intro integrable_measure_pmf_finite)
qed
lemma sketch_distr:
assumes "as ≠ []"
shows "pmf_of_set {..<length as} ⤜ (λk. return_pmf (sketch as k)) = pmf_of_set M⇩1"
proof -
have "x < y ⟹ y < length as ⟹
count_list (drop (y+1) as) (as ! y) < count_list (drop (x+1) as) (as ! y)" for x y
by (intro count_list_lt_suffix suffix_drop_drop, simp_all)
(metis Suc_diff_Suc diff_Suc_Suc diff_add_inverse lessI less_natE)
hence a1: "inj_on (sketch as) {k. k < length as}"
unfolding sketch_def by (intro inj_onI) (metis Pair_inject mem_Collect_eq nat_neq_iff)
have "x < length as ⟹ count_list (drop (x+1) as) (as ! x) < count_list as (as ! x)" for x
by (rule count_list_lt_suffix, auto simp add:suffix_drop)
hence "sketch as ` {k. k < length as} ⊆ M⇩1"
by (intro image_subsetI, simp add:sketch_def M⇩1_def)
moreover have "card M⇩1 ≤ card (sketch as ` {k. k < length as})"
by (simp add: card_space[OF assms(1)] card_image[OF a1])
ultimately have "sketch as ` {k. k < length as} = M⇩1"
using fin_space[OF assms(1)] by (intro card_seteq, simp_all)
hence "bij_betw (sketch as) {k. k < length as} M⇩1"
using a1 by (simp add:bij_betw_def)
hence "map_pmf (sketch as) (pmf_of_set {k. k < length as}) = pmf_of_set M⇩1"
using assms by (intro map_pmf_of_set_bij_betw, auto)
thus ?thesis by (simp add: sketch_def map_pmf_def lessThan_def)
qed
lemma fk_update_distr:
"fold (λx s. s ⤜ fk_update x) as (fk_init k δ ε n) =
prod_pmf ({0..<s⇩1} × {0..<s⇩2}) (λ_. fold (λx s. s ⤜ fk_update_2 x) as (return_pmf (0,0,0)))
⤜ (λx. return_pmf (s⇩1,s⇩2,k, length as, λi∈{0..<s⇩1}×{0..<s⇩2}. snd (x i)))"
proof (induction as rule:rev_induct)
case Nil
then show ?case
by (auto simp:Let_def s⇩1_def[symmetric] s⇩2_def[symmetric] bind_return_pmf)
next
case (snoc x xs)
have fk_update_2_eta:"fk_update_2 x = (λa. fk_update_2 x (fst a, fst (snd a), snd (snd a)))"
by auto
have a: "fk_update x (s⇩1, s⇩2, k, length xs, λi∈{0..<s⇩1} × {0..<s⇩2}. snd (f i)) =
prod_pmf ({0..<s⇩1} × {0..<s⇩2}) (λi. fk_update_2 x (f i)) ⤜
(λa. return_pmf (s⇩1,s⇩2, k, Suc (length xs), λi∈{0..<s⇩1} × {0..<s⇩2}. snd (a i)))"
if b: "f ∈ set_pmf (prod_pmf ({0..<s⇩1} × {0..<s⇩2})
(λ_. fold (λa s. s ⤜ fk_update_2 a) xs (return_pmf (0, 0, 0))))" for f
proof -
have c:"fst (f i) = length xs" if d:"i ∈ {0..<s⇩1} ×{0..<s⇩2}" for i
proof (cases "xs = []")
case True
then show ?thesis using b d by (simp add: set_Pi_pmf)
next
case False
hence "{..<length xs} ≠ {}" by force
thus ?thesis using b d
by (simp add:set_Pi_pmf fk_update_2_distr[OF False] PiE_dflt_def) force
qed
show ?thesis
apply (subst fk_update_2_eta, subst fk_update_2.simps, simp)
apply (simp add: Pi_pmf_bind_return[where d'="undefined"] bind_assoc_pmf)
apply (rule bind_pmf_cong, simp add:c cong:Pi_pmf_cong)
by (auto simp add:bind_return_pmf case_prod_beta)
qed
have "fold (λx s. s ⤜ fk_update x) (xs @ [x]) (fk_init k δ ε n) =
prod_pmf ({0..<s⇩1} × {0..<s⇩2}) (λ_. fold (λx s. s ⤜ fk_update_2 x) xs (return_pmf (0,0,0)))
⤜ (λω. return_pmf (s⇩1,s⇩2,k, length xs, λi∈{0..<s⇩1}×{0..<s⇩2}. snd (ω i)) ⤜ fk_update x)"
using snoc
by (simp add:restrict_def bind_assoc_pmf del:fk_init.simps)
also have "... = prod_pmf ({0..<s⇩1} × {0..<s⇩2})
(λ_. fold (λa s. s ⤜ fk_update_2 a) xs (return_pmf (0, 0, 0))) ⤜
(λf. prod_pmf ({0..<s⇩1} × {0..<s⇩2}) (λi. fk_update_2 x (f i)) ⤜
(λa. return_pmf (s⇩1, s⇩2, k, Suc (length xs), λi∈{0..<s⇩1} × {0..<s⇩2}. snd (a i))))"
using a
by (intro bind_pmf_cong, simp_all add:bind_return_pmf del:fk_update.simps)
also have "... = prod_pmf ({0..<s⇩1} × {0..<s⇩2})
(λ_. fold (λa s. s ⤜ fk_update_2 a) xs (return_pmf (0, 0, 0))) ⤜
(λf. prod_pmf ({0..<s⇩1} × {0..<s⇩2}) (λi. fk_update_2 x (f i))) ⤜
(λa. return_pmf (s⇩1, s⇩2, k, Suc (length xs), λi∈{0..<s⇩1} × {0..<s⇩2}. snd (a i)))"
by (simp add:bind_assoc_pmf)
also have "... = (prod_pmf ({0..<s⇩1} × {0..<s⇩2})
(λ_. fold (λa s. s ⤜ fk_update_2 a) (xs@[x]) (return_pmf (0,0,0)))
⤜ (λa. return_pmf (s⇩1,s⇩2,k, length (xs@[x]), λi∈{0..<s⇩1}×{0..<s⇩2}. snd (a i))))"
by (simp, subst Pi_pmf_bind, auto)
finally show ?case by blast
qed
lemma power_diff_sum:
fixes a b :: "'a :: {comm_ring_1,power}"
assumes "k > 0"
shows "a^k - b^k = (a-b) * (∑i = 0..<k. a ^ i * b ^ (k - 1 - i))" (is "?lhs = ?rhs")
proof -
have insert_lb: "m < n ⟹ insert m {Suc m..<n} = {m..<n}" for m n :: nat
by auto
have "?rhs = sum (λi. a * (a^i * b^(k-1-i))) {0..<k} -
sum (λi. b * (a^i * b^(k-1-i))) {0..<k}"
by (simp add: sum_distrib_left[symmetric] algebra_simps)
also have "... = sum ((λi. (a^i * b^(k-i))) ∘ (λi. i+1)) {0..<k} -
sum (λi. (a^i * (b^(1+(k-1-i))))) {0..<k}"
by (simp add:algebra_simps)
also have "... = sum ((λi. (a^i * b^(k-i))) ∘ (λi. i+1)) {0..<k} -
sum (λi. (a^i * b^(k-i))) {0..<k}"
by (intro arg_cong2[where f="(-)"] sum.cong arg_cong2[where f="(*)"]
arg_cong2[where f="(λx y. x ^ y)"]) auto
also have "... = sum (λi. (a^i * b^(k-i))) (insert k {1..<k}) -
sum (λi. (a^i * b^(k-i))) (insert 0 {Suc 0..<k})"
using assms
by (subst sum.reindex[symmetric], simp, subst insert_lb, auto)
also have "... = ?lhs"
by simp
finally show ?thesis by presburger
qed
lemma power_diff_est:
assumes "k > 0"
assumes "(a :: real) ≥ b"
assumes "b ≥ 0"
shows "a^k -b^k ≤ (a-b) * k * a^(k-1)"
proof -
have " ⋀i. i < k ⟹ a ^ i * b ^ (k - 1 - i) ≤ a ^ i * a ^ (k-1-i)"
using assms by (intro mult_left_mono power_mono) auto
also have "⋀i. i < k ⟹ a ^ i * a ^ (k - 1 - i) = a ^ (k - Suc 0)"
using assms(1) by (subst power_add[symmetric], simp)
finally have a: "⋀i. i < k ⟹ a ^ i * b ^ (k - 1 - i) ≤ a ^ (k - Suc 0)"
by blast
have "a^k - b^k = (a-b) * (∑i = 0..<k. a ^ i * b ^ (k - 1 - i))"
by (rule power_diff_sum[OF assms(1)])
also have "... ≤ (a-b) * (∑i = 0..<k. a^(k-1))"
using a assms by (intro mult_left_mono sum_mono, auto)
also have "... = (a-b) * (k * a^(k-Suc 0))"
by simp
finally show ?thesis by simp
qed
text ‹Specialization of the Hoelder inquality for sums.›
lemma Holder_inequality_sum:
assumes "p > (0::real)" "q > 0" "1/p + 1/q = 1"
assumes "finite A"
shows "¦∑x∈A. f x * g x¦ ≤ (∑x∈A. ¦f x¦ powr p) powr (1/p) * (∑x∈A. ¦g x¦ powr q) powr (1/q)"
proof -
have "¦LINT x|count_space A. f x * g x¦ ≤
(LINT x|count_space A. ¦f x¦ powr p) powr (1 / p) *
(LINT x|count_space A. ¦g x¦ powr q) powr (1 / q)"
using assms integrable_count_space
by (intro Lp.Holder_inequality, auto)
thus ?thesis
using assms by (simp add: lebesgue_integral_count_space_finite[symmetric])
qed
lemma real_count_list_pos:
assumes "x ∈ set as"
shows "real (count_list as x) > 0"
using count_list_gr_1 assms by force
lemma fk_estimate:
assumes "as ≠ []"
shows "length as * of_rat (F (2*k-1) as) ≤ n powr (1 - 1 / real k) * (of_rat (F k as))^2"
(is "?lhs ≤ ?rhs")
proof (cases "k ≥ 2")
case True
define M where "M = Max (count_list as ` set as)"
have "M ∈ count_list as ` set as"
unfolding M_def using assms by (intro Max_in, auto)
then obtain m where m_in: "m ∈ set as" and m_def: "M = count_list as m"
by blast
have a: "real M > 0" using m_in count_list_gr_1 by (simp add:m_def, force)
have b: "2*k-1 = (k-1) + k" by simp
have " 0 < real (count_list as m)"
using m_in count_list_gr_1 by force
hence "M powr k = real (count_list as m) ^ k"
by (simp add: powr_realpow m_def)
also have "... ≤ (∑x∈set as. real (count_list as x) ^ k)"
using m_in by (intro member_le_sum, simp_all)
also have "... ≤ real_of_rat (F k as)"
by (simp add:F_def of_rat_sum of_rat_power)
finally have d: "M powr k ≤ real_of_rat (F k as)" by simp
have e: "0 ≤ real_of_rat (F k as)"
using F_gr_0[OF assms(1)] by (simp add: order_le_less)
have "real (k - 1) / real k + 1 = real (k - 1) / real k + real k / real k"
using assms True by simp
also have "... = real (2 * k - 1) / real k"
using b by (subst add_divide_distrib[symmetric], force)
finally have f: "real (k - 1) / real k + 1 = real (2 * k - 1) / real k"
by blast
have "real_of_rat (F (2*k-1) as) =
(∑x∈set as. real (count_list as x) ^ (k - 1) * real (count_list as x) ^ k)"
using b by (simp add:F_def of_rat_sum sum_distrib_left of_rat_mult power_add of_rat_power)
also have "... ≤ (∑x∈set as. real M ^ (k - 1) * real (count_list as x) ^ k)"
by (intro sum_mono mult_right_mono power_mono of_nat_mono) (auto simp:M_def)
also have "... = M powr (k-1) * of_rat (F k as)" using a
by (simp add:sum_distrib_left F_def of_rat_mult of_rat_sum of_rat_power powr_realpow)
also have "... = (M powr k) powr (real (k - 1) / real k) * of_rat (F k as) powr 1"
using e by (simp add:powr_powr)
also have "... ≤ (real_of_rat (F k as)) powr ((k-1)/k) * (real_of_rat (F k as) powr 1)"
using d by (intro mult_right_mono powr_mono2, auto)
also have "... = (real_of_rat (F k as)) powr ((2*k-1) / k)"
by (subst powr_add[symmetric], subst f, simp)
finally have a: "real_of_rat (F (2*k-1) as) ≤ (real_of_rat (F k as)) powr ((2*k-1) / k)"
by blast
have g: "card (set as) ≤ n"
using card_mono[OF _ as_range] by simp
have "length as = abs (sum (λx. real (count_list as x)) (set as))"
by (subst of_nat_sum[symmetric], simp add: sum_count_set)
also have "... ≤ card (set as) powr ((k-Suc 0)/k) *
(sum (λx. ¦real (count_list as x)¦ powr k) (set as)) powr (1/k)"
using assms True
by (intro Holder_inequality_sum[where p="k/(k-1)" and q="k" and f="λ_.1", simplified])
(auto simp add:algebra_simps add_divide_distrib[symmetric])
also have "... = (card (set as)) powr ((k-1) / real k) * of_rat (F k as) powr (1/ k)"
using real_count_list_pos
by (simp add:F_def of_rat_sum of_rat_power powr_realpow)
also have "... = (card (set as)) powr (1 - 1 / real k) * of_rat (F k as) powr (1/ k)"
using k_ge_1
by (subst of_nat_diff[OF k_ge_1], subst diff_divide_distrib, simp)
also have "... ≤ n powr (1 - 1 / real k) * of_rat (F k as) powr (1/ k)"
using k_ge_1 g
by (intro mult_right_mono powr_mono2, auto)
finally have h: "length as ≤ n powr (1 - 1 / real k) * of_rat (F k as) powr (1/real k)"
by blast
have i:"1 / real k + real (2 * k - 1) / real k = real 2"
using True by (subst add_divide_distrib[symmetric], simp_all add:of_nat_diff)
have "?lhs ≤ n powr (1 - 1/k) * of_rat (F k as) powr (1/k) * (of_rat (F k as)) powr ((2*k-1) / k)"
using a h F_ge_0 by (intro mult_mono mult_nonneg_nonneg, auto)
also have "... = ?rhs"
using i F_gr_0[OF assms] by (simp add:powr_add[symmetric] powr_realpow[symmetric])
finally show ?thesis
by blast
next
case False
have "n = 0 ⟹ False"
using as_range assms by auto
hence "n > 0"
by auto
moreover have "k = 1"
using assms k_ge_1 False by linarith
moreover have "length as = real_of_rat (F (Suc 0) as)"
by (simp add:F_def sum_count_set of_nat_sum[symmetric] del:of_nat_sum)
ultimately show ?thesis
by (simp add:power2_eq_square)
qed
definition result
where "result a = of_nat (length as) * of_nat (Suc (snd a) ^ k - snd a ^ k)"
lemma result_exp_1:
assumes "as ≠ []"
shows "expectation result = real_of_rat (F k as)"
proof -
have "expectation result = (∑a∈M⇩1. result a * pmf (pmf_of_set M⇩1) a)"
unfolding Ω⇩1_def using non_empty_space assms fin_space
by (subst integral_measure_pmf_real) auto
also have "... = (∑a∈M⇩1. result a / real (length as))"
using non_empty_space assms fin_space card_space by simp
also have "... = (∑a∈M⇩1. real (Suc (snd a) ^ k - snd a ^ k))"
using assms by (simp add:result_def)
also have "... = (∑u∈set as. ∑v = 0..<count_list as u. real (Suc v ^ k) - real (v ^ k))"
using k_ge_1 by (subst split_space, simp add:of_nat_diff)
also have "... = (∑u∈set as. real (count_list as u)^k)"
using k_ge_1 by (subst sum_Suc_diff') (auto simp add:zero_power)
also have "... = of_rat (F k as)"
by (simp add:F_def of_rat_sum of_rat_power)
finally show ?thesis by simp
qed
lemma result_var_1:
assumes "as ≠ []"
shows "variance result ≤ (of_rat (F k as))⇧2 * k * n powr (1 - 1 / real k)"
proof -
have k_gt_0: "k > 0" using k_ge_1 by linarith
have c:"real (Suc v ^ k) - real (v ^ k) ≤ k * real (count_list as a) ^ (k - Suc 0)"
if c_1: "v < count_list as a" for a v
proof -
have "real (Suc v ^ k) - real (v ^ k) ≤ (real (v+1) - real v) * k * (1 + real v) ^ (k - Suc 0)"
using k_gt_0 power_diff_est[where a="Suc v" and b="v"] by simp
moreover have "(real (v+1) - real v) = 1" by auto
ultimately have "real (Suc v ^ k) - real (v ^ k) ≤ k * (1 + real v) ^ (k - Suc 0)"
by auto
also have "... ≤ k * real (count_list as a) ^ (k- Suc 0)"
using c_1 by (intro mult_left_mono power_mono, auto)
finally show ?thesis by blast
qed
have "length as * (∑a∈ M⇩1. (real (Suc (snd a) ^ k - (snd a) ^ k))⇧2) =
length as * (∑a∈ set as. (∑v ∈ {0..<count_list as a}.
real (Suc v ^ k - v ^ k) * real (Suc v ^ k - v^k)))"
by (subst split_space, simp add:power2_eq_square)
also have "... ≤ length as * (∑a∈ set as. (∑v ∈ {0..<count_list as a}.
k * real (count_list as a) ^ (k-1) * real (Suc v ^ k - v ^ k)))"
using c by (intro mult_left_mono sum_mono mult_right_mono) (auto simp:power_mono of_nat_diff)
also have "... = length as * k * (∑a∈ set as. real (count_list as a) ^ (k-1) *
(∑v ∈ {0..<count_list as a}. real (Suc v ^ k) - real (v ^ k)))"
by (simp add:sum_distrib_left ac_simps of_nat_diff power_mono)
also have "... = length as * k * (∑a∈ set as. real (count_list as a ^ (2*k-1)))"
using assms k_ge_1
by (subst sum_Suc_diff', auto simp: zero_power[OF k_gt_0] mult_2 power_add[symmetric])
also have "... = k * (length as * of_rat (F (2*k-1) as))"
by (simp add:sum_distrib_left[symmetric] F_def of_rat_sum of_rat_power)
also have "... ≤ k * (of_rat (F k as)^2 * n powr (1 - 1 / real k))"
using fk_estimate[OF assms] by (intro mult_left_mono) (auto simp: mult.commute)
finally have b: "real (length as) * (∑a∈ M⇩1. (real (Suc (snd a) ^ k - (snd a) ^ k))⇧2) ≤
k * ((of_rat (F k as))⇧2 * n powr (1 - 1 / real k))"
by blast
have "expectation (λω. (result ω :: real)^2) - (expectation result)^2 ≤ expectation (λω. result ω^2)"
by simp
also have "... = (∑a∈M⇩1. (length as * real (Suc (snd a) ^ k - snd a ^ k))⇧2 * pmf (pmf_of_set M⇩1) a)"
using fin_space non_empty_space assms unfolding Ω⇩1_def result_def
by (subst integral_measure_pmf_real[where A="M⇩1"], auto)
also have "... = (∑a∈M⇩1. length as * (real (Suc (snd a) ^ k - snd a ^ k))⇧2)"
using assms non_empty_space fin_space by (subst pmf_of_set)
(simp_all add:card_space power_mult_distrib power2_eq_square ac_simps)
also have "... ≤ k * ((of_rat (F k as))⇧2 * n powr (1 - 1 / real k))"
using b by (simp add:sum_distrib_left[symmetric])
also have "... = of_rat (F k as)^2 * k * n powr (1 - 1 / real k)"
by (simp add:ac_simps)
finally have "expectation (λω. result ω^2) - (expectation result)^2 ≤
of_rat (F k as)^2 * k * n powr (1 - 1 / real k)"
by blast
thus ?thesis
using integrable_1[OF assms] by (simp add:variance_eq)
qed
theorem fk_alg_sketch:
assumes "as ≠ []"
shows "fold (λa state. state ⤜ fk_update a) as (fk_init k δ ε n) =
map_pmf (λx. (s⇩1,s⇩2,k,length as, x)) M⇩2" (is "?lhs = ?rhs")
proof -
have "?lhs = prod_pmf ({0..<s⇩1} × {0..<s⇩2})
(λ_. fold (λx s. s ⤜ fk_update_2 x) as (return_pmf (0, 0, 0))) ⤜
(λx. return_pmf (s⇩1, s⇩2, k, length as, λi∈{0..<s⇩1} × {0..<s⇩2}. snd (x i)))"
by (subst fk_update_distr, simp)
also have "... = prod_pmf ({0..<s⇩1} × {0..<s⇩2}) (λ_. pmf_of_set {..<length as} ⤜
(λk. return_pmf (length as, sketch as k))) ⤜
(λx. return_pmf (s⇩1, s⇩2, k, length as, λi∈{0..<s⇩1} × {0..<s⇩2}. snd (x i)))"
by (subst fk_update_2_distr[OF assms], simp)
also have "... = prod_pmf ({0..<s⇩1} × {0..<s⇩2}) (λ_. pmf_of_set {..<length as} ⤜
(λk. return_pmf (sketch as k)) ⤜ (λs. return_pmf (length as, s))) ⤜
(λx. return_pmf (s⇩1, s⇩2, k, length as, λi∈{0..<s⇩1} × {0..<s⇩2}. snd (x i)))"
by (subst bind_assoc_pmf, subst bind_return_pmf, simp)
also have "... = prod_pmf ({0..<s⇩1} × {0..<s⇩2}) (λ_. pmf_of_set {..<length as} ⤜
(λk. return_pmf (sketch as k))) ⤜
(λx. return_pmf (λi ∈ {0..<s⇩1} × {0..<s⇩2}. (length as, x i))) ⤜
(λx. return_pmf (s⇩1, s⇩2, k, length as, λi∈{0..<s⇩1} × {0..<s⇩2}. snd (x i)))"
by (subst Pi_pmf_bind_return[where d'="undefined"], simp, simp add:restrict_def)
also have "... = prod_pmf ({0..<s⇩1} × {0..<s⇩2}) (λ_. pmf_of_set {..<length as} ⤜
(λk. return_pmf (sketch as k))) ⤜
(λx. return_pmf (s⇩1, s⇩2, k, length as, restrict x ({0..<s⇩1} × {0..<s⇩2})))"
by (subst bind_assoc_pmf, simp add:bind_return_pmf cong:restrict_cong)
also have "... = M⇩2 ⤜
(λx. return_pmf (s⇩1, s⇩2, k, length as, restrict x ({0..<s⇩1} × {0..<s⇩2})))"
by (subst sketch_distr[OF assms], simp add:M⇩2_def)
also have "... = M⇩2 ⤜ (λx. return_pmf (s⇩1, s⇩2, k, length as, x))"
by (rule bind_pmf_cong, auto simp add:PiE_dflt_def M⇩2_def set_Pi_pmf)
also have "... = ?rhs"
by (simp add:map_pmf_def)
finally show ?thesis by simp
qed
definition mean_rv
where "mean_rv ω i⇩2 = (∑i⇩1 = 0..<s⇩1. result (ω (i⇩1, i⇩2))) / of_nat s⇩1"
definition median_rv
where "median_rv ω = median s⇩2 (λi⇩2. mean_rv ω i⇩2)"
lemma fk_alg_correct':
defines "M ≡ fold (λa state. state ⤜ fk_update a) as (fk_init k δ ε n) ⤜ fk_result"
shows "𝒫(ω in measure_pmf M. ¦ω - F k as¦ ≤ δ * F k as) ≥ 1 - of_rat ε"
proof (cases "as = []")
case True
have a: "nat ⌈- (18 * ln (real_of_rat ε))⌉ > 0" using ε_range by simp
show ?thesis using True ε_range
by (simp add:F_def M_def bind_return_pmf median_const[OF a] Let_def)
next
case False
have "set as ≠ {}" using assms False by blast
hence n_nonzero: "n > 0" using as_range by fastforce
have fk_nonzero: "F k as > 0"
using F_gr_0[OF False] by simp
have s1_nonzero: "s⇩1 > 0"
using δ_range k_ge_1 n_nonzero by (simp add:s⇩1_def)
have s2_nonzero: "s⇩2 > 0"
using ε_range by (simp add:s⇩2_def)
have real_of_rat_mean_rv: "⋀x i. mean_rv x = (λi. real_of_rat (mean_rv x i))"
by (rule ext, simp add:of_rat_divide of_rat_sum of_rat_mult result_def mean_rv_def)
have real_of_rat_median_rv: "⋀x. median_rv x = real_of_rat (median_rv x)"
unfolding median_rv_def using s2_nonzero
by (subst real_of_rat_mean_rv, simp add: median_rat median_restrict)
have space_Ω⇩2: "space Ω⇩2 = UNIV" by (simp add:Ω⇩2_def)
have fk_result_eta: "fk_result = (λ(x,y,z,u,v). fk_result (x,y,z,u,v))"
by auto
have a:"fold (λx state. state ⤜ fk_update x) as (fk_init k δ ε n) =
map_pmf (λx. (s⇩1,s⇩2,k,length as, x)) M⇩2"
by (subst fk_alg_sketch[OF False]) (simp add:s⇩1_def[symmetric] s⇩2_def[symmetric])
have "M = map_pmf (λx. (s⇩1, s⇩2, k, length as, x)) M⇩2 ⤜ fk_result"
by (subst M_def, subst a, simp)
also have "... = M⇩2 ⤜ return_pmf ∘ median_rv"
by (subst fk_result_eta)
(auto simp add:map_pmf_def bind_assoc_pmf bind_return_pmf median_rv_def mean_rv_def comp_def
M⇩1_def result_def median_restrict)
finally have b: "M = M⇩2 ⤜ return_pmf ∘ median_rv"
by simp
have result_exp:
"i⇩1 < s⇩1 ⟹ i⇩2 < s⇩2 ⟹ Ω⇩2.expectation (λx. result (x (i⇩1, i⇩2))) = real_of_rat (F k as)"
for i⇩1 i⇩2
unfolding Ω⇩2_def M⇩2_def
using integrable_1[OF False] result_exp_1[OF False]
by (subst expectation_Pi_pmf_slice, auto simp:Ω⇩1_def)
have result_var: "Ω⇩2.variance (λω. result (ω (i⇩1, i⇩2))) ≤ of_rat (δ * F k as)^2 * real s⇩1 / 3"
if result_var_assms: "i⇩1 < s⇩1" "i⇩2 < s⇩2" for i⇩1 i⇩2
proof -
have "3 * real k * n powr (1 - 1 / real k) =
(of_rat δ)⇧2 * (3 * real k * n powr (1 - 1 / real k) / (of_rat δ)⇧2)"
using δ_range by simp
also have "... ≤ (real_of_rat δ)⇧2 * (real s⇩1)"
unfolding s⇩1_def
by (intro mult_mono of_nat_ceiling, simp_all)
finally have f2_var_2: "3 * real k * n powr (1 - 1 / real k) ≤ (of_rat δ)⇧2 * (real s⇩1)"
by blast
have "Ω⇩2.variance (λω. result (ω (i⇩1, i⇩2)) :: real) = variance result"
using result_var_assms integrable_1[OF False]
unfolding Ω⇩2_def M⇩2_def Ω⇩1_def
by (subst variance_prod_pmf_slice, auto)
also have "... ≤ of_rat (F k as)^2 * real k * n powr (1 - 1 / real k)"
using assms False result_var_1 Ω⇩1_def by simp
also have "... =
of_rat (F k as)^2 * (real k * n powr (1 - 1 / real k))"
by (simp add:ac_simps)
also have "... ≤ of_rat (F k as)^2 * (of_rat δ^2 * (real s⇩1 / 3))"
using f2_var_2 by (intro mult_left_mono, auto)
also have "... = of_rat (F k as * δ)^2 * (real s⇩1 / 3)"
by (simp add: of_rat_mult power_mult_distrib)
also have "... = of_rat (δ * F k as)^2 * real s⇩1 / 3"
by (simp add:ac_simps)
finally show ?thesis
by simp
qed
have mean_rv_exp: "Ω⇩2.expectation (λω. mean_rv ω i) = real_of_rat (F k as)"
if mean_rv_exp_assms: "i < s⇩2" for i
proof -
have "Ω⇩2.expectation (λω. mean_rv ω i) = Ω⇩2.expectation (λω. ∑n = 0..<s⇩1. result (ω (n, i)) / real s⇩1)"
by (simp add:mean_rv_def sum_divide_distrib)
also have "... = (∑n = 0..<s⇩1. Ω⇩2.expectation (λω. result (ω (n, i))) / real s⇩1)"
using integrable_2[OF False]
by (subst Bochner_Integration.integral_sum, auto)
also have "... = of_rat (F k as)"
using s1_nonzero mean_rv_exp_assms
by (simp add:result_exp)
finally show ?thesis by simp
qed
have mean_rv_var: "Ω⇩2.variance (λω. mean_rv ω i) ≤ real_of_rat (δ * F k as)^2/3"
if mean_rv_var_assms: "i < s⇩2" for i
proof -
have a:"Ω⇩2.indep_vars (λ_. borel) (λn x. result (x (n, i)) / real s⇩1) {0..<s⇩1}"
unfolding Ω⇩2_def M⇩2_def using mean_rv_var_assms
by (intro indep_vars_restrict_intro'[where f="fst"], simp, simp add:restrict_dfl_def, simp, simp)
have "Ω⇩2.variance (λω. mean_rv ω i) = Ω⇩2.variance (λω. ∑j = 0..<s⇩1. result (ω (j, i)) / real s⇩1)"
by (simp add:mean_rv_def sum_divide_distrib)
also have "... = (∑j = 0..<s⇩1. Ω⇩2.variance (λω. result (ω (j, i)) / real s⇩1))"
using a integrable_2[OF False]
by (subst Ω⇩2.bienaymes_identity_full_indep, auto simp add:Ω⇩2_def)
also have "... = (∑j = 0..<s⇩1. Ω⇩2.variance (λω. result (ω (j, i))) / real s⇩1^2)"
using integrable_2[OF False]
by (subst Ω⇩2.variance_divide, auto)
also have "... ≤ (∑j = 0..<s⇩1. ((real_of_rat (δ * F k as))⇧2 * real s⇩1 / 3) / (real s⇩1^2))"
using result_var[OF _ mean_rv_var_assms]
by (intro sum_mono divide_right_mono, auto)
also have "... = real_of_rat (δ * F k as)^2/3"
using s1_nonzero
by (simp add:algebra_simps power2_eq_square)
finally show ?thesis by simp
qed
have "Ω⇩2.prob {y. of_rat (δ * F k as) < ¦mean_rv y i - real_of_rat (F k as)¦} ≤ 1/3"
(is "?lhs ≤ _") if c_assms: "i < s⇩2" for i
proof -
define a where "a = real_of_rat (δ * F k as)"
have c: "0 < a" unfolding a_def
using assms δ_range fk_nonzero
by (metis zero_less_of_rat_iff mult_pos_pos)
have "?lhs ≤ Ω⇩2.prob {y ∈ space Ω⇩2. a ≤ ¦mean_rv y i - Ω⇩2.expectation (λω. mean_rv ω i)¦}"
by (intro Ω⇩2.pmf_mono[OF Ω⇩2_def], simp add:a_def mean_rv_exp[OF c_assms] space_Ω⇩2)
also have "... ≤ Ω⇩2.variance (λω. mean_rv ω i)/a^2"
by (intro Ω⇩2.Chebyshev_inequality integrable_2 c False) (simp add:Ω⇩2_def)
also have "... ≤ 1/3" using c
using mean_rv_var[OF c_assms]
by (simp add:algebra_simps, simp add:a_def)
finally show ?thesis
by blast
qed
moreover have "Ω⇩2.indep_vars (λ_. borel) (λi ω. mean_rv ω i) {0..<s⇩2}"
using s1_nonzero unfolding Ω⇩2_def M⇩2_def
by (intro indep_vars_restrict_intro'[where f="snd"] finite_cartesian_product)
(simp_all add:mean_rv_def restrict_dfl_def space_Ω⇩2)
moreover have " - (18 * ln (real_of_rat ε)) ≤ real s⇩2"
by (simp add:s⇩2_def, linarith)
ultimately have "1 - of_rat ε ≤
Ω⇩2.prob {y ∈ space Ω⇩2. ¦median s⇩2 (mean_rv y) - real_of_rat (F k as)¦ ≤ of_rat (δ * F k as)}"
using ε_range
by (intro Ω⇩2.median_bound_2, simp_all add:space_Ω⇩2)
also have "... = Ω⇩2.prob {y. ¦median_rv y - real_of_rat (F k as)¦ ≤ real_of_rat (δ * F k as)}"
by (simp add:median_rv_def space_Ω⇩2)
also have "... = Ω⇩2.prob {y. ¦median_rv y - F k as¦ ≤ δ * F k as}"
by (simp add:real_of_rat_median_rv of_rat_less_eq flip: of_rat_diff)
also have "... = 𝒫(ω in measure_pmf M. ¦ω - F k as¦ ≤ δ * F k as)"
by (simp add: b comp_def map_pmf_def[symmetric] Ω⇩2_def)
finally show ?thesis by simp
qed
lemma fk_exact_space_usage':
defines "M ≡ fold (λa state. state ⤜ fk_update a) as (fk_init k δ ε n)"
shows "AE ω in M. bit_count (encode_fk_state ω) ≤ fk_space_usage (k, n, length as, ε, δ)"
(is "AE ω in M. (_ ≤ ?rhs)")
proof -
define H where "H = (if as = [] then return_pmf (λi∈ {0..<s⇩1}×{0..<s⇩2}. (0,0)) else M⇩2)"
have a:"M = map_pmf (λx.(s⇩1,s⇩2,k,length as, x)) H"
proof (cases "as ≠ []")
case True
then show ?thesis
unfolding M_def fk_alg_sketch[OF True] H_def
by (simp add:M⇩2_def)
next
case False
then show ?thesis
by (simp add:H_def M_def s⇩1_def[symmetric] Let_def s⇩2_def[symmetric] map_pmf_def bind_return_pmf)
qed
have "bit_count (encode_fk_state (s⇩1, s⇩2, k, length as, y)) ≤ ?rhs"
if b:"y ∈ set_pmf H" for y
proof -
have b0:" as ≠ [] ⟹ y ∈ {0..<s⇩1} × {0..<s⇩2} →⇩E M⇩1"
using b non_empty_space fin_space by (simp add:H_def M⇩2_def set_prod_pmf)
have "bit_count ((N⇩e ×⇩e N⇩e) (y x)) ≤
ereal (2 * log 2 (real n + 1) + 1) + ereal (2 * log 2 (real (length as) + 1) + 1)"
(is "_ ≤ ?rhs1")
if b1_assms: "x ∈ {0..<s⇩1}×{0..<s⇩2}" for x
proof -
have "fst (y x) ≤ n"
proof (cases "as = []")
case True
then show ?thesis using b b1_assms by (simp add:H_def)
next
case False
hence "1 ≤ count_list as (fst (y x))"
using b0 b1_assms by (simp add:PiE_iff case_prod_beta M⇩1_def, fastforce)
hence "fst (y x) ∈ set as"
using count_list_gr_1 by metis
then show ?thesis
by (meson lessThan_iff less_imp_le_nat subsetD as_range)
qed
moreover have "snd (y x) ≤ length as"
proof (cases "as = []")
case True
then show ?thesis using b b1_assms by (simp add:H_def)
next
case False
hence "(y x) ∈ M⇩1"
using b0 b1_assms by auto
hence "snd (y x) ≤ count_list as (fst (y x))"
by (simp add:M⇩1_def case_prod_beta)
then show ?thesis using count_le_length by (metis order_trans)
qed
ultimately have "bit_count (N⇩e (fst (y x))) + bit_count (N⇩e (snd (y x))) ≤ ?rhs1"
using exp_golomb_bit_count_est by (intro add_mono, auto)
thus ?thesis
by (subst dependent_bit_count_2, simp)
qed
moreover have "y ∈ extensional ({0..<s⇩1} × {0..<s⇩2})"
using b0 b PiE_iff by (cases "as = []", auto simp:H_def PiE_iff)
ultimately have "bit_count ((List.product [0..<s⇩1] [0..<s⇩2] →⇩e N⇩e ×⇩e N⇩e) y) ≤
ereal (real s⇩1 * real s⇩2) * (ereal (2 * log 2 (real n + 1) + 1) +
ereal (2 * log 2 (real (length as) + 1) + 1))"
by (intro fun_bit_count_est[where xs="(List.product [0..<s⇩1] [0..<s⇩2])", simplified], auto)
hence "bit_count (encode_fk_state (s⇩1, s⇩2, k, length as, y)) ≤
ereal (2 * log 2 (real s⇩1 + 1) + 1) +
(ereal (2 * log 2 (real s⇩2 + 1) + 1) +
(ereal (2 * log 2 (real k + 1) + 1) +
(ereal (2 * log 2 (real (length as) + 1) + 1) +
(ereal (real s⇩1 * real s⇩2) * (ereal (2 * log 2 (real n+1) + 1) +
ereal (2 * log 2 (real (length as)+1) + 1))))))"
unfolding encode_fk_state_def dependent_bit_count
by (intro add_mono exp_golomb_bit_count, auto)
also have "... ≤ ?rhs"
by (simp add: s⇩1_def[symmetric] s⇩2_def[symmetric] Let_def) (simp add:ac_simps)
finally show "bit_count (encode_fk_state (s⇩1, s⇩2, k, length as, y)) ≤ ?rhs"
by blast
qed
thus ?thesis
by (simp add: a AE_measure_pmf_iff del:fk_space_usage.simps)
qed
end
text ‹Main results of this section:›
theorem fk_alg_correct:
assumes "k ≥ 1"
assumes "ε ∈ {0<..<1}"
assumes "δ > 0"
assumes "set as ⊆ {..<n}"
defines "M ≡ fold (λa state. state ⤜ fk_update a) as (fk_init k δ ε n) ⤜ fk_result"
shows "𝒫(ω in measure_pmf M. ¦ω - F k as¦ ≤ δ * F k as) ≥ 1 - of_rat ε"
unfolding M_def using fk_alg_correct'[OF assms(1-4)] by blast
theorem fk_exact_space_usage:
assumes "k ≥ 1"
assumes "ε ∈ {0<..<1}"
assumes "δ > 0"
assumes "set as ⊆ {..<n}"
defines "M ≡ fold (λa state. state ⤜ fk_update a) as (fk_init k δ ε n)"
shows "AE ω in M. bit_count (encode_fk_state ω) ≤ fk_space_usage (k, n, length as, ε, δ)"
unfolding M_def using fk_exact_space_usage'[OF assms(1-4)] by blast
theorem fk_asymptotic_space_complexity:
"fk_space_usage ∈
O[at_top ×⇩F at_top ×⇩F at_top ×⇩F at_right (0::rat) ×⇩F at_right (0::rat)](λ (k, n, m, ε, δ).
real k * real n powr (1-1/ real k) / (of_rat δ)⇧2 * (ln (1 / of_rat ε)) * (ln (real n) + ln (real m)))"
(is "_ ∈ O[?F](?rhs)")
proof -
define k_of :: "nat × nat × nat × rat × rat ⇒ nat" where "k_of = (λ(k, n, m, ε, δ). k)"
define n_of :: "nat × nat × nat × rat × rat ⇒ nat" where "n_of = (λ(k, n, m, ε, δ). n)"
define m_of :: "nat × nat × nat × rat × rat ⇒ nat" where "m_of = (λ(k, n, m, ε, δ). m)"
define ε_of :: "nat × nat × nat × rat × rat ⇒ rat" where "ε_of = (λ(k, n, m, ε, δ). ε)"
define δ_of :: "nat × nat × nat × rat × rat ⇒ rat" where "δ_of = (λ(k, n, m, ε, δ). δ)"
define g1 where
"g1 = (λx. real (k_of x)*(real (n_of x)) powr (1-1/ real (k_of x)) * (1 / of_rat (δ_of x)^2))"
define g where
"g = (λx. g1 x * (ln (1 / of_rat (ε_of x))) * (ln (real (n_of x)) + ln (real (m_of x))))"
define s1_of where "s1_of = (λx.
nat ⌈3 * real (k_of x) * real (n_of x) powr (1 - 1 / real (k_of x)) / (real_of_rat (δ_of x))⇧2⌉)"
define s2_of where "s2_of = (λx. nat ⌈- (18 * ln (real_of_rat (ε_of x)))⌉)"
have evt: "(⋀x.
0 < real_of_rat (δ_of x) ∧ 0 < real_of_rat (ε_of x) ∧
1/real_of_rat (δ_of x) ≥ δ ∧ 1/real_of_rat (ε_of x) ≥ ε ∧
real (n_of x) ≥ n ∧ real (k_of x) ≥ k ∧ real (m_of x) ≥ m⟹ P x)
⟹ eventually P ?F" (is "(⋀x. ?prem x ⟹ _) ⟹ _")
for δ ε n k m P
apply (rule eventually_mono[where P="?prem" and Q="P"])
apply (simp add:ε_of_def case_prod_beta' δ_of_def n_of_def k_of_def m_of_def)
apply (intro eventually_conj eventually_prod1' eventually_prod2'
sequentially_inf eventually_at_right_less inv_at_right_0_inf)
by (auto simp add:prod_filter_eq_bot)
have 1:
"(λ_. 1) ∈ O[?F](λx. real (n_of x))"
"(λ_. 1) ∈ O[?F](λx. real (m_of x))"
"(λ_. 1) ∈ O[?F](λx. real (k_of x))"
by (intro landau_o.big_mono eventually_mono[OF evt], auto)+
have "(λx. ln (real (m_of x) + 1)) ∈ O[?F](λx. ln (real (m_of x)))"
by (intro landau_ln_2[where a="2"] evt[where m="2"] sum_in_bigo 1, auto)
hence 2: " (λx. log 2 (real (m_of x) + 1)) ∈ O[?F](λx. ln (real (n_of x)) + ln (real (m_of x)))"
by (intro landau_sum_2 eventually_mono[OF evt[where n="1" and m="1"]])
(auto simp add:log_def)
have 3: "(λ_. 1) ∈ O[?F](λx. ln (1 / real_of_rat (ε_of x)))"
using order_less_le_trans[OF exp_gt_zero] ln_ge_iff
by (intro landau_o.big_mono evt[where ε="exp 1"])
(simp add: abs_ge_iff, blast)
have 4: "(λ_. 1) ∈ O[?F](λx. 1 / (real_of_rat (δ_of x))⇧2)"
using one_le_power
by (intro landau_o.big_mono evt[where δ="1"])
(simp add:power_one_over[symmetric], blast)
have "(λx. 1) ∈ O[?F](λx. ln (real (n_of x)))"
using order_less_le_trans[OF exp_gt_zero] ln_ge_iff
by (intro landau_o.big_mono evt[where n="exp 1"])
(simp add: abs_ge_iff, blast)
hence 5: "(λx. 1) ∈ O[?F](λx. ln (real (n_of x)) + ln (real (m_of x)))"
by (intro landau_sum_1 evt[where n="1" and m="1"], auto)
have "(λx. -ln(of_rat (ε_of x))) ∈ O[?F](λx. ln (1 / real_of_rat (ε_of x)))"
by (intro landau_o.big_mono evt) (auto simp add:ln_div)
hence 6: "(λx. real (s2_of x)) ∈ O[?F](λx. ln (1 / real_of_rat (ε_of x)))"
unfolding s2_of_def
by (intro landau_nat_ceil 3, simp)
have 7: "(λ_. 1) ∈ O[?F](λx. real (n_of x) powr (1 - 1 / real (k_of x)))"
by (intro landau_o.big_mono evt[where n="1" and k="1"])
(auto simp add: ge_one_powr_ge_zero)
have 8: "(λ_. 1) ∈ O[?F](g1)"
unfolding g1_def by (intro landau_o.big_mult_1 1 7 4)
have "(λx. 3 * (real (k_of x) * (n_of x) powr (1 - 1 / real (k_of x)) / (of_rat (δ_of x))⇧2))
∈ O[?F](g1)"
by (subst landau_o.big.cmult_in_iff, simp, simp add:g1_def)
hence 9: "(λx. real (s1_of x)) ∈ O[?F](g1)"
unfolding s1_of_def by (intro landau_nat_ceil 8, auto simp:ac_simps)
have 10: "(λ_. 1) ∈ O[?F](g)"
unfolding g_def by (intro landau_o.big_mult_1 8 3 5)
have "(λx. real (s1_of x)) ∈ O[?F](g)"
unfolding g_def by (intro landau_o.big_mult_1 5 3 9)
hence "(λx. ln (real (s1_of x) + 1)) ∈ O[?F](g)"
using 10 by (intro landau_ln_3 sum_in_bigo, auto)
hence 11: "(λx. log 2 (real (s1_of x) + 1)) ∈ O[?F](g)"
by (simp add:log_def)
have 12: " (λx. ln (real (s2_of x) + 1)) ∈ O[?F](λx. ln (1 / real_of_rat (ε_of x)))"
using evt[where ε="2"] 6 3
by (intro landau_ln_3 sum_in_bigo, auto)
have 13: "(λx. log 2 (real (s2_of x) + 1)) ∈ O[?F](g)"
unfolding g_def
by (rule landau_o.big_mult_1, rule landau_o.big_mult_1', auto simp add: 8 5 12 log_def)
have "(λx. real (k_of x)) ∈ O[?F](g1)"
unfolding g1_def using 7 4
by (intro landau_o.big_mult_1, simp_all)
hence "(λx. log 2 (real (k_of x) + 1)) ∈ O[?F](g1)"
by (simp add:log_def) (intro landau_ln_3 sum_in_bigo 8, auto)
hence 14: "(λx. log 2 (real (k_of x) + 1)) ∈ O[?F](g)"
unfolding g_def by (intro landau_o.big_mult_1 3 5)
have 15: "(λx. log 2 (real (m_of x) + 1)) ∈ O[?F](g)"
unfolding g_def using 2 8 3
by (intro landau_o.big_mult_1', simp_all)
have "(λx. ln (real (n_of x) + 1)) ∈ O[?F](λx. ln (real (n_of x)))"
by (intro landau_ln_2[where a="2"] eventually_mono[OF evt[where n="2"]] sum_in_bigo 1, auto)
hence " (λx. log 2 (real (n_of x) + 1)) ∈ O[?F](λx. ln (real (n_of x)) + ln (real (m_of x)))"
by (intro landau_sum_1 evt[where n="1" and m="1"])
(auto simp add:log_def)
hence 16: "(λx. real (s1_of x) * real (s2_of x) *
(2 + 2 * log 2 (real (n_of x) + 1) + 2 * log 2 (real (m_of x) + 1))) ∈ O[?F](g)"
unfolding g_def using 9 6 5 2
by (intro landau_o.mult sum_in_bigo, auto)
have "fk_space_usage = (λx. fk_space_usage (k_of x, n_of x, m_of x, ε_of x, δ_of x))"
by (simp add:case_prod_beta' k_of_def n_of_def ε_of_def δ_of_def m_of_def)
also have "... ∈ O[?F](g)"
using 10 11 13 14 15 16
by (simp add:fun_cong[OF s1_of_def[symmetric]] fun_cong[OF s2_of_def[symmetric]] Let_def)
(intro sum_in_bigo, auto)
also have "... = O[?F](?rhs)"
by (simp add:case_prod_beta' g1_def g_def n_of_def ε_of_def δ_of_def m_of_def k_of_def)
finally show ?thesis by simp
qed
end