Theory Frequency_Moment_0
section ‹Frequency Moment $0$\label{sec:f0}›
theory Frequency_Moment_0
imports
Frequency_Moments_Preliminary_Results
Median_Method.Median
K_Smallest
Universal_Hash_Families.Carter_Wegman_Hash_Family
Frequency_Moments
Landau_Ext
Probability_Ext
Universal_Hash_Families.Universal_Hash_Families_More_Product_PMF
begin
text ‹This section contains a formalization of a new algorithm for the zero-th frequency moment
inspired by ideas described in \<^cite>‹"baryossef2002"›.
It is a KMV-type ($k$-minimum value) algorithm with a rounding method and matches the space complexity
of the best algorithm described in \<^cite>‹"baryossef2002"›.
In addition to the Isabelle proof here, there is also an informal hand-written proof in
Appendix~\ref{sec:f0_proof}.›
type_synonym f0_state = "nat × nat × nat × nat × (nat ⇒ nat list) × (nat ⇒ float set)"
definition hash where "hash p = ring.hash (ring_of (mod_ring p))"
fun f0_init :: "rat ⇒ rat ⇒ nat ⇒ f0_state pmf" where
"f0_init δ ε n =
do {
let s = nat ⌈-18 * ln (real_of_rat ε)⌉;
let t = nat ⌈80 / (real_of_rat δ)⇧2⌉;
let p = prime_above (max n 19);
let r = nat (4 * ⌈log 2 (1 / real_of_rat δ)⌉ + 23);
h ← prod_pmf {..<s} (λ_. pmf_of_set (bounded_degree_polynomials (ring_of (mod_ring p)) 2));
return_pmf (s, t, p, r, h, (λ_ ∈ {0..<s}. {}))
}"
fun f0_update :: "nat ⇒ f0_state ⇒ f0_state pmf" where
"f0_update x (s, t, p, r, h, sketch) =
return_pmf (s, t, p, r, h, λi ∈ {..<s}.
least t (insert (float_of (truncate_down r (hash p x (h i)))) (sketch i)))"
fun f0_result :: "f0_state ⇒ rat pmf" where
"f0_result (s, t, p, r, h, sketch) = return_pmf (median s (λi ∈ {..<s}.
(if card (sketch i) < t then of_nat (card (sketch i)) else
rat_of_nat t* rat_of_nat p / rat_of_float (Max (sketch i)))
))"
fun f0_space_usage :: "(nat × rat × rat) ⇒ real" where
"f0_space_usage (n, ε, δ) = (
let s = nat ⌈-18 * ln (real_of_rat ε)⌉ in
let r = nat (4 * ⌈log 2 (1 / real_of_rat δ)⌉ + 23) in
let t = nat ⌈80 / (real_of_rat δ)⇧2 ⌉ in
6 +
2 * log 2 (real s + 1) +
2 * log 2 (real t + 1) +
2 * log 2 (real n + 21) +
2 * log 2 (real r + 1) +
real s * (5 + 2 * log 2 (21 + real n) +
real t * (13 + 4 * r + 2 * log 2 (log 2 (real n + 13)))))"
definition encode_f0_state :: "f0_state ⇒ bool list option" where
"encode_f0_state =
N⇩e ⋈⇩e (λs.
N⇩e ×⇩e (
N⇩e ⋈⇩e (λp.
N⇩e ×⇩e (
([0..<s] →⇩e (P⇩e p 2)) ×⇩e
([0..<s] →⇩e (S⇩e F⇩e))))))"
lemma "inj_on encode_f0_state (dom encode_f0_state)"
proof -
have "is_encoding encode_f0_state"
unfolding encode_f0_state_def
by (intro dependent_encoding exp_golomb_encoding poly_encoding fun_encoding set_encoding float_encoding)
thus ?thesis by (rule encoding_imp_inj)
qed
context
fixes ε δ :: rat
fixes n :: nat
fixes as :: "nat list"
fixes result
assumes ε_range: "ε ∈ {0<..<1}"
assumes δ_range: "δ ∈ {0<..<1}"
assumes as_range: "set as ⊆ {..<n}"
defines "result ≡ fold (λa state. state ⤜ f0_update a) as (f0_init δ ε n) ⤜ f0_result"
begin
private definition t where "t = nat ⌈80 / (real_of_rat δ)⇧2⌉"
private lemma t_gt_0: "t > 0" using δ_range by (simp add:t_def)
private definition s where "s = nat ⌈-(18 * ln (real_of_rat ε))⌉"
private lemma s_gt_0: "s > 0" using ε_range by (simp add:s_def)
private definition p where "p = prime_above (max n 19)"
private lemma p_prime:"Factorial_Ring.prime p"
using p_def prime_above_prime by presburger
private lemma p_ge_18: "p ≥ 18"
proof -
have "p ≥ 19"
by (metis p_def prime_above_lower_bound max.bounded_iff)
thus ?thesis by simp
qed
private lemma p_gt_0: "p > 0" using p_ge_18 by simp
private lemma p_gt_1: "p > 1" using p_ge_18 by simp
private lemma n_le_p: "n ≤ p"
proof -
have "n ≤ max n 19" by simp
also have "... ≤ p"
unfolding p_def by (rule prime_above_lower_bound)
finally show ?thesis by simp
qed
private lemma p_le_n: "p ≤ 2*n + 40"
proof -
have "p ≤ 2 * (max n 19) + 2"
by (subst p_def, rule prime_above_upper_bound)
also have "... ≤ 2 * n + 40"
by (cases "n ≥ 19", auto)
finally show ?thesis by simp
qed
private lemma as_lt_p: "⋀x. x ∈ set as ⟹ x < p"
using as_range atLeastLessThan_iff
by (intro order_less_le_trans[OF _ n_le_p]) blast
private lemma as_subset_p: "set as ⊆ {..<p}"
using as_lt_p by (simp add: subset_iff)
private definition r where "r = nat (4 * ⌈log 2 (1 / real_of_rat δ)⌉ + 23)"
private lemma r_bound: "4 * log 2 (1 / real_of_rat δ) + 23 ≤ r"
proof -
have "0 ≤ log 2 (1 / real_of_rat δ)" using δ_range by simp
hence "0 ≤ ⌈log 2 (1 / real_of_rat δ)⌉" by simp
hence "0 ≤ 4 * ⌈log 2 (1 / real_of_rat δ)⌉ + 23"
by (intro add_nonneg_nonneg mult_nonneg_nonneg, auto)
thus ?thesis by (simp add:r_def)
qed
private lemma r_ge_23: "r ≥ 23"
proof -
have "(23::real) = 0 + 23" by simp
also have "... ≤ 4 * log 2 (1 / real_of_rat δ) + 23"
using δ_range by (intro add_mono mult_nonneg_nonneg, auto)
also have "... ≤ r" using r_bound by simp
finally show "23 ≤ r" by simp
qed
private lemma two_pow_r_le_1: "0 < 1 - 2 powr - real r"
proof -
have a: "2 powr (0::real) = 1"
by simp
show ?thesis using r_ge_23
by (simp, subst a[symmetric], intro powr_less_mono, auto)
qed
interpretation carter_wegman_hash_family "ring_of (mod_ring p)" 2
rewrites "ring.hash (ring_of (mod_ring p)) = Frequency_Moment_0.hash p"
using carter_wegman_hash_familyI[OF mod_ring_is_field mod_ring_finite]
using hash_def p_prime by auto
private definition tr_hash where "tr_hash x ω = truncate_down r (hash x ω)"
private definition sketch_rv where
"sketch_rv ω = least t ((λx. float_of (tr_hash x ω)) ` set as)"
private definition estimate
where "estimate S = (if card S < t then of_nat (card S) else of_nat t * of_nat p / rat_of_float (Max S))"
private definition sketch_rv' where "sketch_rv' ω = least t ((λx. tr_hash x ω) ` set as)"
private definition estimate' where "estimate' S = (if card S < t then real (card S) else real t * real p / Max S)"
private definition Ω⇩0 where "Ω⇩0 = prod_pmf {..<s} (λ_. pmf_of_set space)"
private lemma f0_alg_sketch:
defines "sketch ≡ fold (λa state. state ⤜ f0_update a) as (f0_init δ ε n)"
shows "sketch = map_pmf (λx. (s,t,p,r, x, λi ∈ {..<s}. sketch_rv (x i))) Ω⇩0"
unfolding sketch_rv_def
proof (subst sketch_def, induction as rule:rev_induct)
case Nil
then show ?case
by (simp add:s_def p_def[symmetric] map_pmf_def t_def r_def Let_def least_def restrict_def space_def Ω⇩0_def)
next
case (snoc x xs)
let ?sketch = "λω xs. least t ((λa. float_of (tr_hash a ω)) ` set xs)"
have "fold (λa state. state ⤜ f0_update a) (xs @ [x]) (f0_init δ ε n) =
(map_pmf (λω. (s, t, p, r, ω, λi ∈ {..<s}. ?sketch (ω i) xs)) Ω⇩0) ⤜ f0_update x"
by (simp add: restrict_def snoc del:f0_init.simps)
also have "... = Ω⇩0 ⤜ (λω. f0_update x (s, t, p, r, ω, λi∈{..<s}. ?sketch (ω i) xs)) "
by (simp add:map_pmf_def bind_assoc_pmf bind_return_pmf del:f0_update.simps)
also have "... = map_pmf (λω. (s, t, p, r, ω, λi∈{..<s}. ?sketch (ω i) (xs@[x]))) Ω⇩0"
by (simp add:least_insert map_pmf_def tr_hash_def cong:restrict_cong)
finally show ?case by blast
qed
private lemma card_nat_in_ball:
fixes x :: nat
fixes q :: real
assumes "q ≥ 0"
defines "A ≡ {k. abs (real x - real k) ≤ q ∧ k ≠ x}"
shows "real (card A) ≤ 2 * q" and "finite A"
proof -
have a: "of_nat x ∈ {⌈real x-q⌉..⌊real x+q⌋}"
using assms
by (simp add: ceiling_le_iff)
have "card A = card (int ` A)"
by (rule card_image[symmetric], simp)
also have "... ≤ card ({⌈real x-q⌉..⌊real x+q⌋} - {of_nat x})"
by (intro card_mono image_subsetI, simp_all add:A_def abs_le_iff, linarith)
also have "... = card {⌈real x-q⌉..⌊real x+q⌋} - 1"
by (rule card_Diff_singleton, rule a)
also have "... = int (card {⌈real x-q⌉..⌊real x+q⌋}) - int 1"
by (intro of_nat_diff)
(metis a card_0_eq empty_iff finite_atLeastAtMost_int less_one linorder_not_le)
also have "... ≤ ⌊q+real x⌋+1 -⌈real x-q⌉ - 1"
using assms by (simp, linarith)
also have "... ≤ 2*q"
by linarith
finally show "card A ≤ 2 * q"
by simp
have "A ⊆ {..x + nat ⌈q⌉}"
by (rule subsetI, simp add:A_def abs_le_iff, linarith)
thus "finite A"
by (rule finite_subset, simp)
qed
private lemma prob_degree_lt_1:
"prob {ω. degree ω < 1} ≤ 1/real p"
proof -
have "space ∩ {ω. length ω ≤ Suc 0} = bounded_degree_polynomials (ring_of (mod_ring p)) 1"
by (auto simp:set_eq_iff bounded_degree_polynomials_def space_def)
moreover have "field_size = p" by (simp add:ring_of_def mod_ring_def)
hence "real (card (bounded_degree_polynomials (ring_of (mod_ring p)) 1))/card space = 1 / real p"
by (simp add:space_def bounded_degree_polynomials_card power2_eq_square)
ultimately show ?thesis
by (simp add:M_def measure_pmf_of_set)
qed
private lemma collision_prob:
assumes "c ≥ 1"
shows "prob {ω. ∃x ∈ set as. ∃y ∈ set as. x ≠ y ∧ tr_hash x ω ≤ c ∧ tr_hash x ω = tr_hash y ω} ≤
(5/2) * (real (card (set as)))⇧2 * c⇧2 * 2 powr -(real r) / (real p)⇧2 + 1/real p" (is "prob {ω. ?l ω} ≤ ?r1 + ?r2")
proof -
define ρ :: real where "ρ = 9/8"
have rho_c_ge_0: "ρ * c ≥ 0" unfolding ρ_def using assms by simp
have c_ge_0: "c≥0" using assms by simp
have "degree ω ≥ 1 ⟹ ω ∈ space ⟹ degree ω = 1" for ω
by (simp add:bounded_degree_polynomials_def space_def)
(metis One_nat_def Suc_1 le_less_Suc_eq less_imp_diff_less list.size(3) pos2)
hence a: "⋀ω x y. x < p ⟹ y < p ⟹ x ≠ y ⟹ degree ω ≥ 1 ⟹ ω ∈ space ⟹ hash x ω ≠ hash y ω"
using inj_onD[OF inj_if_degree_1] mod_ring_carr by blast
have b: "prob {ω. degree ω ≥ 1 ∧ tr_hash x ω ≤ c ∧ tr_hash x ω = tr_hash y ω} ≤ 5 * c⇧2 * 2 powr (-real r) /(real p)⇧2"
if b_assms: "x ∈ set as" "y ∈ set as" "x < y" for x y
proof -
have c: "real u ≤ ρ * c ∧ ¦real u - real v¦ ≤ ρ * c * 2 powr (-real r)"
if c_assms:"truncate_down r (real u) ≤ c" "truncate_down r (real u) = truncate_down r (real v)" for u v
proof -
have "9 * 2 powr - real r ≤ 9 * 2 powr (- real 23)"
using r_ge_23 by (intro mult_left_mono powr_mono, auto)
also have "... ≤ 1" by simp
finally have "9 * 2 powr - real r ≤ 1" by simp
hence "1 ≤ ρ * (1 - 2 powr (- real r))"
by (simp add:ρ_def)
hence d: "(c*1) / (1 - 2 powr (-real r)) ≤ c * ρ"
using assms two_pow_r_le_1 by (simp add: pos_divide_le_eq)
have "⋀x. truncate_down r (real x) ≤ c ⟹ real x * (1 - 2 powr - real r) ≤ c * 1"
using truncate_down_pos[OF of_nat_0_le_iff] order_trans by (simp, blast)
hence "⋀x. truncate_down r (real x) ≤ c ⟹ real x ≤ c * ρ"
using two_pow_r_le_1 by (intro order_trans[OF _ d], simp add: pos_le_divide_eq)
hence e: "real u ≤ c * ρ" "real v ≤ c * ρ"
using c_assms by auto
have " ¦real u - real v¦ ≤ (max ¦real u¦ ¦real v¦) * 2 powr (-real r)"
using c_assms by (intro truncate_down_eq, simp)
also have "... ≤ (c * ρ) * 2 powr (-real r)"
using e by (intro mult_right_mono, auto)
finally have "¦real u - real v¦ ≤ ρ * c * 2 powr (-real r)"
by (simp add:algebra_simps)
thus ?thesis using e by (simp add:algebra_simps)
qed
have "prob {ω. degree ω ≥ 1 ∧ tr_hash x ω ≤ c ∧ tr_hash x ω = tr_hash y ω} ≤
prob (⋃ i ∈ {(u,v) ∈ {..<p} × {..<p}. u ≠ v ∧ truncate_down r u ≤ c ∧ truncate_down r u = truncate_down r v}.
{ω. hash x ω = fst i ∧ hash y ω = snd i})"
using a by (intro pmf_mono[OF M_def], simp add:tr_hash_def)
(metis hash_range mod_ring_carr b_assms as_subset_p lessThan_iff nat_neq_iff subset_eq)
also have "... ≤ (∑ i∈ {(u,v) ∈ {..<p} × {..<p}. u ≠ v ∧
truncate_down r u ≤ c ∧ truncate_down r u = truncate_down r v}.
prob {ω. hash x ω = fst i ∧ hash y ω = snd i})"
by (intro measure_UNION_le finite_cartesian_product finite_subset[where B="{0..<p} × {0..<p}"])
(auto simp add:M_def)
also have "... ≤ (∑ i∈ {(u,v) ∈ {..<p} × {..<p}. u ≠ v ∧
truncate_down r u ≤ c ∧ truncate_down r u = truncate_down r v}.
prob {ω. (∀u ∈ {x,y}. hash u ω = (if u = x then (fst i) else (snd i)))})"
by (intro sum_mono pmf_mono[OF M_def]) force
also have "... ≤ (∑ i∈ {(u,v) ∈ {..<p} × {..<p}. u ≠ v ∧
truncate_down r u ≤ c ∧ truncate_down r u = truncate_down r v}. 1/(real p)⇧2)"
using assms as_subset_p b_assms
by (intro sum_mono, subst hash_prob) (auto simp: ring_of_def mod_ring_def power2_eq_square)
also have "... = 1/(real p)⇧2 *
card {(u,v) ∈ {0..<p} × {0..<p}. u ≠ v ∧ truncate_down r u ≤ c ∧ truncate_down r u = truncate_down r v}"
by simp
also have "... ≤ 1/(real p)⇧2 *
card {(u,v) ∈ {..<p} × {..<p}. u ≠ v ∧ real u ≤ ρ * c ∧ abs (real u - real v) ≤ ρ * c * 2 powr (-real r)}"
using c
by (intro mult_mono of_nat_mono card_mono finite_cartesian_product finite_subset[where B="{..<p}×{..<p}"])
auto
also have "... ≤ 1/(real p)⇧2 * card (⋃u' ∈ {u. u < p ∧ real u ≤ ρ * c}.
{(u::nat,v::nat). u = u' ∧ abs (real u - real v) ≤ ρ * c * 2 powr (-real r) ∧ v < p ∧ v ≠ u'})"
by (intro mult_left_mono of_nat_mono card_mono finite_cartesian_product finite_subset[where B="{..<p}×{..<p}"])
auto
also have "... ≤ 1/(real p)⇧2 * (∑ u' ∈ {u. u < p ∧ real u ≤ ρ * c}.
card {(u,v). u = u' ∧ abs (real u - real v) ≤ ρ * c * 2 powr (-real r) ∧ v < p ∧ v ≠ u'})"
by (intro mult_left_mono of_nat_mono card_UN_le, auto)
also have "... = 1/(real p)⇧2 * (∑ u' ∈ {u. u < p ∧ real u ≤ ρ * c}.
card ((λx. (u' ,x)) ` {v. abs (real u' - real v) ≤ ρ * c * 2 powr (-real r) ∧ v < p ∧ v ≠ u'}))"
by (intro arg_cong2[where f="(*)"] arg_cong[where f="real"] sum.cong arg_cong[where f="card"])
(auto simp add:set_eq_iff)
also have "... ≤ 1/(real p)⇧2 * (∑ u' ∈ {u. u < p ∧ real u ≤ ρ * c}.
card {v. abs (real u' - real v) ≤ ρ * c * 2 powr (-real r) ∧ v < p ∧ v ≠ u'})"
by (intro mult_left_mono of_nat_mono sum_mono card_image_le, auto)
also have "... ≤ 1/(real p)⇧2 * (∑ u' ∈ {u. u < p ∧ real u ≤ ρ * c}.
card {v. abs (real u' - real v) ≤ ρ * c * 2 powr (-real r) ∧ v ≠ u'})"
by (intro mult_left_mono sum_mono of_nat_mono card_mono card_nat_in_ball subsetI) auto
also have "... ≤ 1/(real p)⇧2 * (∑ u' ∈ {u. u < p ∧ real u ≤ ρ * c}.
real (card {v. abs (real u' - real v) ≤ ρ * c * 2 powr (-real r) ∧ v ≠ u'}))"
by simp
also have "... ≤ 1/(real p)⇧2 * (∑ u' ∈ {u. u < p ∧ real u ≤ ρ * c}. 2 * (ρ * c * 2 powr (-real r)))"
by (intro mult_left_mono sum_mono card_nat_in_ball(1), auto)
also have "... = 1/(real p)⇧2 * (real (card {u. u < p ∧ real u ≤ ρ * c}) * (2 * (ρ * c * 2 powr (-real r))))"
by simp
also have "... ≤ 1/(real p)⇧2 * (real (card {u. u ≤ nat (⌊ρ * c ⌋)}) * (2 * (ρ * c * 2 powr (-real r))))"
using rho_c_ge_0 le_nat_floor
by (intro mult_left_mono mult_right_mono of_nat_mono card_mono subsetI) auto
also have "... ≤ 1/(real p)⇧2 * ((1+ρ * c) * (2 * (ρ * c * 2 powr (-real r))))"
using rho_c_ge_0 by (intro mult_left_mono mult_right_mono, auto)
also have "... ≤ 1/(real p)⇧2 * (((1+ρ) * c) * (2 * (ρ * c * 2 powr (-real r))))"
using assms by (intro mult_mono, auto simp add:distrib_left distrib_right ρ_def)
also have "... = (ρ * (2 + ρ * 2)) * c⇧2 * 2 powr (-real r) /(real p)⇧2"
by (simp add:ac_simps power2_eq_square)
also have "... ≤ 5 * c⇧2 * 2 powr (-real r) /(real p)⇧2"
by (intro divide_right_mono mult_right_mono) (auto simp add:ρ_def)
finally show ?thesis by simp
qed
have "prob {ω. ?l ω ∧ degree ω ≥ 1} ≤
prob (⋃ i ∈ {(x,y) ∈ (set as) × (set as). x < y}. {ω. degree ω ≥ 1 ∧ tr_hash (fst i) ω ≤ c ∧
tr_hash (fst i) ω = tr_hash (snd i) ω})"
by (rule pmf_mono[OF M_def], simp, metis linorder_neqE_nat)
also have "... ≤ (∑ i ∈ {(x,y) ∈ (set as) × (set as). x < y}. prob
{ω. degree ω ≥ 1 ∧ tr_hash (fst i) ω ≤ c ∧ tr_hash (fst i) ω = tr_hash (snd i) ω})"
unfolding M_def
by (intro measure_UNION_le finite_cartesian_product finite_subset[where B="(set as) × (set as)"])
auto
also have "... ≤ (∑ i ∈ {(x,y) ∈ (set as) × (set as). x < y}. 5 * c⇧2 * 2 powr (-real r) /(real p)⇧2)"
using b by (intro sum_mono, simp add:case_prod_beta)
also have "... = ((5/2) * c⇧2 * 2 powr (-real r) /(real p)⇧2) * (2 * card {(x,y) ∈ (set as) × (set as). x < y})"
by simp
also have "... = ((5/2) * c⇧2 * 2 powr (-real r) /(real p)⇧2) * (card (set as) * (card (set as) - 1))"
by (subst card_ordered_pairs, auto)
also have "... ≤ ((5/2) * c⇧2 * 2 powr (-real r) /(real p)⇧2) * (real (card (set as)))⇧2"
by (intro mult_left_mono) (auto simp add:power2_eq_square mult_left_mono)
also have "... = (5/2) * (real (card (set as)))⇧2 * c⇧2 * 2 powr (-real r) /(real p)⇧2"
by (simp add:algebra_simps)
finally have f:"prob {ω. ?l ω ∧ degree ω ≥ 1} ≤ ?r1" by simp
have "prob {ω. ?l ω} ≤ prob {ω. ?l ω ∧ degree ω ≥ 1} + prob {ω. degree ω < 1}"
by (rule pmf_add[OF M_def], auto)
also have "... ≤ ?r1 + ?r2"
by (intro add_mono f prob_degree_lt_1)
finally show ?thesis by simp
qed
private lemma of_bool_square: "(of_bool x)⇧2 = ((of_bool x)::real)"
by (cases x, auto)
private definition Q where "Q y ω = card {x ∈ set as. int (hash x ω) < y}"
private definition m where "m = card (set as)"
private lemma
assumes "a ≥ 0"
assumes "a ≤ int p"
shows exp_Q: "expectation (λω. real (Q a ω)) = real m * (of_int a) / p"
and var_Q: "variance (λω. real (Q a ω)) ≤ real m * (of_int a) / p"
proof -
have exp_single: "expectation (λω. of_bool (int (hash x ω) < a)) = real_of_int a /real p"
if a:"x ∈ set as" for x
proof -
have x_le_p: "x < p" using a as_lt_p by simp
have "expectation (λω. of_bool (int (hash x ω) < a)) = expectation (indicat_real {ω. int (Frequency_Moment_0.hash p x ω) < a})"
by (intro arg_cong2[where f="integral⇧L"] ext, simp_all)
also have "... = prob {ω. hash x ω ∈ {k. int k < a}}"
by (simp add:M_def)
also have "... = card ({k. int k < a} ∩ {..<p}) / real p"
by (subst prob_range) (simp_all add: x_le_p ring_of_def mod_ring_def lessThan_def)
also have "... = card {..<nat a} / real p"
using assms by (intro arg_cong2[where f="(/)"] arg_cong[where f="real"] arg_cong[where f="card"])
(auto simp add:set_eq_iff)
also have "... = real_of_int a/real p"
using assms by simp
finally show "expectation (λω. of_bool (int (hash x ω) < a)) = real_of_int a /real p"
by simp
qed
have "expectation(λω. real (Q a ω)) = expectation (λω. (∑x ∈ set as. of_bool (int (hash x ω) < a)))"
by (simp add:Q_def Int_def)
also have "... = (∑x ∈ set as. expectation (λω. of_bool (int (hash x ω) < a)))"
by (rule Bochner_Integration.integral_sum, simp)
also have "... = (∑ x ∈ set as. a /real p)"
by (rule sum.cong, simp, subst exp_single, simp, simp)
also have "... = real m * real_of_int a / real p"
by (simp add:m_def)
finally show "expectation (λω. real (Q a ω)) = real m * real_of_int a / p" by simp
have indep: "J ⊆ set as ⟹ card J = 2 ⟹ indep_vars (λ_. borel) (λi x. of_bool (int (hash i x) < a)) J" for J
using as_subset_p mod_ring_carr
by (intro indep_vars_compose2[where Y="λi x. of_bool (int x < a)" and M'="λ_. discrete"]
k_wise_indep_vars_subset[OF k_wise_indep] finite_subset[OF _ finite_set]) auto
have rv: "⋀x. x ∈ set as ⟹ random_variable borel (λω. of_bool (int (hash x ω) < a))"
by (simp add:M_def)
have "variance (λω. real (Q a ω)) = variance (λω. (∑x ∈ set as. of_bool (int (hash x ω) < a)))"
by (simp add:Q_def Int_def)
also have "... = (∑x ∈ set as. variance (λω. of_bool (int (hash x ω) < a)))"
by (intro bienaymes_identity_pairwise_indep_2 indep rv) auto
also have "... ≤ (∑ x ∈ set as. a / real p)"
by (rule sum_mono, simp add: variance_eq of_bool_square, simp add: exp_single)
also have "... = real m * real_of_int a /real p"
by (simp add:m_def)
finally show "variance (λω. real (Q a ω)) ≤ real m * real_of_int a / p"
by simp
qed
private lemma t_bound: "t ≤ 81 / (real_of_rat δ)⇧2"
proof -
have "t ≤ 80 / (real_of_rat δ)⇧2 + 1" using t_def t_gt_0 by linarith
also have "... ≤ 80 / (real_of_rat δ)⇧2 + 1 / (real_of_rat δ)⇧2"
using δ_range by (intro add_mono, simp, simp add:power_le_one)
also have "... = 81 / (real_of_rat δ)⇧2" by simp
finally show ?thesis by simp
qed
private lemma t_r_bound:
"18 * 40 * (real t)⇧2 * 2 powr (-real r) ≤ 1"
proof -
have "720 * (real t)⇧2 * 2 powr (-real r) ≤ 720 * (81 / (real_of_rat δ)⇧2)⇧2 * 2 powr (-4 * log 2 (1 / real_of_rat δ) - 23)"
using r_bound t_bound by (intro mult_left_mono mult_mono power_mono powr_mono, auto)
also have "... ≤ 720 * (81 / (real_of_rat δ)⇧2)⇧2 * (2 powr (-4 * log 2 (1 / real_of_rat δ)) * 2 powr (-23))"
using δ_range by (intro mult_left_mono mult_mono power_mono add_mono)
(simp_all add:power_le_one powr_diff)
also have "... = 720 * (81⇧2 / (real_of_rat δ)^4) * (2 powr (log 2 ((real_of_rat δ)^4)) * 2 powr (-23))"
using δ_range by (intro arg_cong2[where f="(*)"])
(simp_all add:power2_eq_square power4_eq_xxxx log_divide log_powr[symmetric])
also have "... = 720 * 81⇧2 * 2 powr (-23)" using δ_range by simp
also have "... ≤ 1" by simp
finally show ?thesis by simp
qed
private lemma m_eq_F_0: "real m = of_rat (F 0 as)"
by (simp add:m_def F_def)
private lemma estimate'_bounds:
"prob {ω. of_rat δ * real_of_rat (F 0 as) < ¦estimate' (sketch_rv' ω) - of_rat (F 0 as)¦} ≤ 1/3"
proof (cases "card (set as) ≥ t")
case True
define δ' where "δ' = 3 * real_of_rat δ / 4"
define u where "u = ⌈real t * p / (m * (1+δ'))⌉"
define v where "v = ⌊real t * p / (m * (1-δ'))⌋"
define has_no_collision where
"has_no_collision = (λω. ∀x∈ set as. ∀y ∈ set as. (tr_hash x ω = tr_hash y ω ⟶ x = y) ∨ tr_hash x ω > v)"
have "2 powr (-real r) ≤ 2 powr (-(4 * log 2 (1 / real_of_rat δ) + 23))"
using r_bound by (intro powr_mono, linarith, simp)
also have "... = 2 powr (-4 * log 2 (1 /real_of_rat δ) -23)"
by (rule arg_cong2[where f="(powr)"], auto simp add:algebra_simps)
also have "... ≤ 2 powr ( -1 * log 2 (1 /real_of_rat δ) -4)"
using δ_range by (intro powr_mono diff_mono, auto)
also have "... = 2 powr ( -1 * log 2 (1 /real_of_rat δ)) / 16"
by (simp add: powr_diff)
also have "... = real_of_rat δ / 16"
using δ_range by (simp add:log_divide)
also have "... < real_of_rat δ / 8"
using δ_range by (subst pos_divide_less_eq, auto)
finally have r_le_δ: "2 powr (-real r) < real_of_rat δ / 8"
by simp
have δ'_gt_0: "δ' > 0" using δ_range by (simp add:δ'_def)
have "δ' < 3/4" using δ_range by (simp add:δ'_def)+
also have "... < 1" by simp
finally have δ'_lt_1: "δ' < 1" by simp
have "t ≤ 81 / (real_of_rat δ)⇧2"
using t_bound by simp
also have "... = (81*9/16) / (δ')⇧2"
by (simp add:δ'_def power2_eq_square)
also have "... ≤ 46 / δ'⇧2"
by (intro divide_right_mono, simp, simp)
finally have t_le_δ': "t ≤ 46/ δ'⇧2" by simp
have "80 ≤ (real_of_rat δ)⇧2 * (80 / (real_of_rat δ)⇧2)" using δ_range by simp
also have "... ≤ (real_of_rat δ)⇧2 * t"
by (intro mult_left_mono, simp add:t_def of_nat_ceiling, simp)
finally have "80 ≤ (real_of_rat δ)⇧2 * t" by simp
hence t_ge_δ': "45 ≤ t * δ' * δ'" by (simp add:δ'_def power2_eq_square)
have "m ≤ card {..<n}" unfolding m_def using as_range by (intro card_mono, auto)
also have "... ≤ p" using n_le_p by simp
finally have m_le_p: "m ≤ p" by simp
hence t_le_m: "t ≤ card (set as)" using True by simp
have m_ge_0: "real m > 0" using m_def True t_gt_0 by simp
have "v ≤ real t * real p / (real m * (1 - δ'))" by (simp add:v_def)
also have "... ≤ real t * real p / (real m * (1/4))"
using δ'_lt_1 m_ge_0 δ_range
by (intro divide_left_mono mult_left_mono mult_nonneg_nonneg mult_pos_pos, simp_all add:δ'_def)
finally have v_ubound: "v ≤ 4 * real t * real p / real m" by (simp add:algebra_simps)
have a_ge_1: "u ≥ 1" using δ'_gt_0 p_gt_0 m_ge_0 t_gt_0
by (auto intro!:mult_pos_pos divide_pos_pos simp add:u_def)
hence a_ge_0: "u ≥ 0" by simp
have "real m * (1 - δ') < real m" using δ'_gt_0 m_ge_0 by simp
also have "... ≤ 1 * real p" using m_le_p by simp
also have "... ≤ real t * real p" using t_gt_0 by (intro mult_right_mono, auto)
finally have " real m * (1 - δ') < real t * real p" by simp
hence v_gt_0: "v > 0" using mult_pos_pos m_ge_0 δ'_lt_1 by (simp add:v_def)
hence v_ge_1: "real_of_int v ≥ 1" by linarith
have "real t ≤ real m" using True m_def by linarith
also have "... < (1 + δ') * real m" using δ'_gt_0 m_ge_0 by force
finally have a_le_p_aux: "real t < (1 + δ') * real m" by simp
have "u ≤ real t * real p / (real m * (1 + δ'))+1" by (simp add:u_def)
also have "... < real p + 1"
using m_ge_0 δ'_gt_0 a_le_p_aux a_le_p_aux p_gt_0
by (simp add: pos_divide_less_eq ac_simps)
finally have "u ≤ real p"
by (metis int_less_real_le not_less of_int_le_iff of_int_of_nat_eq)
hence u_le_p: "u ≤ int p" by linarith
have "prob {ω. Q u ω ≥ t} ≤ prob {ω ∈ Sigma_Algebra.space M. abs (real (Q u ω) -
expectation (λω. real (Q u ω))) ≥ 3 * sqrt (m * real_of_int u / p)}"
proof (rule pmf_mono[OF M_def])
fix ω
assume "ω ∈ {ω. t ≤ Q u ω}"
hence t_le: "t ≤ Q u ω" by simp
have "real m * real_of_int u / real p ≤ real m * (real t * real p / (real m * (1 + δ'))+1) / real p"
using m_ge_0 p_gt_0 by (intro divide_right_mono mult_left_mono, simp_all add: u_def)
also have "... = real m * real t * real p / (real m * (1+δ') * real p) + real m / real p"
by (simp add:distrib_left add_divide_distrib)
also have "... = real t / (1+δ') + real m / real p"
using p_gt_0 m_ge_0 by simp
also have "... ≤ real t / (1+δ') + 1"
using m_le_p p_gt_0 by (intro add_mono, auto)
finally have "real m * real_of_int u / real p ≤ real t / (1 + δ') + 1"
by simp
hence "3 * sqrt (real m * of_int u / real p) + real m * of_int u / real p ≤
3 * sqrt (t / (1+δ')+1)+(t/(1+δ')+1)"
by (intro add_mono mult_left_mono real_sqrt_le_mono, auto)
also have "... ≤ 3 * sqrt (real t+1) + ((t * (1 - δ' / (1+δ'))) + 1)"
using δ'_gt_0 t_gt_0 by (intro add_mono mult_left_mono real_sqrt_le_mono)
(simp_all add: pos_divide_le_eq left_diff_distrib)
also have "... = 3 * sqrt (real t+1) + (t - δ' * t / (1+δ')) + 1" by (simp add:algebra_simps)
also have "... ≤ 3 * sqrt (46 / δ'⇧2 + 1 / δ'⇧2) + (t - δ' * t/2) + 1 / δ'"
using δ'_gt_0 t_gt_0 δ'_lt_1 add_pos_pos t_le_δ'
by (intro add_mono mult_left_mono real_sqrt_le_mono add_mono)
(simp_all add: power_le_one pos_le_divide_eq)
also have "... ≤ (21 / δ' + (t - 45 / (2*δ'))) + 1 / δ'"
using δ'_gt_0 t_ge_δ' by (intro add_mono)
(simp_all add:real_sqrt_divide divide_le_cancel real_le_lsqrt pos_divide_le_eq ac_simps)
also have "... ≤ t" using δ'_gt_0 by simp
also have "... ≤ Q u ω" using t_le by simp
finally have "3 * sqrt (real m * of_int u / real p) + real m * of_int u / real p ≤ Q u ω"
by simp
hence " 3 * sqrt (real m * real_of_int u / real p) ≤ ¦real (Q u ω) - expectation (λω. real (Q u ω))¦"
using a_ge_0 u_le_p True by (simp add:exp_Q abs_ge_iff)
thus "ω ∈ {ω ∈ Sigma_Algebra.space M. 3 * sqrt (real m * real_of_int u / real p) ≤
¦real (Q u ω) - expectation (λω. real (Q u ω))¦}"
by (simp add: M_def)
qed
also have "... ≤ variance (λω. real (Q u ω)) / (3 * sqrt (real m * of_int u / real p))⇧2"
using a_ge_1 p_gt_0 m_ge_0
by (intro Chebyshev_inequality, simp add:M_def, auto)
also have "... ≤ (real m * real_of_int u / real p) / (3 * sqrt (real m * of_int u / real p))⇧2"
using a_ge_0 u_le_p by (intro divide_right_mono var_Q, auto)
also have "... ≤ 1/9" using a_ge_0 by simp
finally have case_1: "prob {ω. Q u ω ≥ t} ≤ 1/9" by simp
have case_2: "prob {ω. Q v ω < t} ≤ 1/9"
proof (cases "v ≤ p")
case True
have "prob {ω. Q v ω < t} ≤ prob {ω ∈ Sigma_Algebra.space M. abs (real (Q v ω) - expectation (λω. real (Q v ω)))
≥ 3 * sqrt (m * real_of_int v / p)}"
proof (rule pmf_mono[OF M_def])
fix ω
assume "ω ∈ set_pmf (pmf_of_set space)"
have "(real t + 3 * sqrt (real t / (1 - δ') )) * (1 - δ') = real t - δ' * t + 3 * ((1-δ') * sqrt( real t / (1-δ') ))"
by (simp add:algebra_simps)
also have "... = real t - δ' * t + 3 * sqrt ( (1-δ')⇧2 * (real t / (1-δ')))"
using δ'_lt_1 by (subst real_sqrt_mult, simp)
also have "... = real t - δ' * t + 3 * sqrt ( real t * (1- δ'))"
by (simp add:power2_eq_square distrib_left)
also have "... ≤ real t - 45/ δ' + 3 * sqrt ( real t )"
using δ'_gt_0 t_ge_δ' δ'_lt_1 by (intro add_mono mult_left_mono real_sqrt_le_mono)
(simp_all add:pos_divide_le_eq ac_simps left_diff_distrib power_le_one)
also have "... ≤ real t - 45/ δ' + 3 * sqrt ( 46 / δ'⇧2)"
using t_le_δ' δ'_lt_1 δ'_gt_0
by (intro add_mono mult_left_mono real_sqrt_le_mono, simp_all add:pos_divide_le_eq power_le_one)
also have "... = real t + (3 * sqrt(46) - 45)/ δ'"
using δ'_gt_0 by (simp add:real_sqrt_divide diff_divide_distrib)
also have "... ≤ t"
using δ'_gt_0 by (simp add:pos_divide_le_eq real_le_lsqrt)
finally have aux: "(real t + 3 * sqrt (real t / (1 - δ'))) * (1 - δ') ≤ real t "
by simp
assume "ω ∈ {ω. Q v ω < t}"
hence "Q v ω < t" by simp
hence "real (Q v ω) + 3 * sqrt (real m * real_of_int v / real p)
≤ real t - 1 + 3 * sqrt (real m * real_of_int v / real p)"
using m_le_p p_gt_0 by (intro add_mono, auto simp add: algebra_simps add_divide_distrib)
also have "... ≤ (real t-1) + 3 * sqrt (real m * (real t * real p / (real m * (1- δ'))) / real p)"
by (intro add_mono mult_left_mono real_sqrt_le_mono divide_right_mono)
(auto simp add:v_def)
also have "... ≤ real t + 3 * sqrt(real t / (1-δ')) - 1"
using m_ge_0 p_gt_0 by simp
also have "... ≤ real t / (1-δ')-1"
using δ'_lt_1 aux by (simp add: pos_le_divide_eq)
also have "... ≤ real m * (real t * real p / (real m * (1-δ'))) / real p - 1"
using p_gt_0 m_ge_0 by simp
also have "... ≤ real m * (real t * real p / (real m * (1-δ'))) / real p - real m / real p"
using m_le_p p_gt_0
by (intro diff_mono, auto)
also have "... = real m * (real t * real p / (real m * (1-δ'))-1) / real p"
by (simp add: left_diff_distrib right_diff_distrib diff_divide_distrib)
also have "... ≤ real m * real_of_int v / real p"
by (intro divide_right_mono mult_left_mono, simp_all add:v_def)
finally have "real (Q v ω) + 3 * sqrt (real m * real_of_int v / real p)
≤ real m * real_of_int v / real p" by simp
hence " 3 * sqrt (real m * real_of_int v / real p) ≤ ¦real (Q v ω) -expectation (λω. real (Q v ω))¦"
using v_gt_0 True by (simp add: exp_Q abs_ge_iff)
thus "ω ∈ {ω∈ Sigma_Algebra.space M. 3 * sqrt (real m * real_of_int v / real p) ≤
¦real (Q v ω) - expectation (λω. real (Q v ω))¦}"
by (simp add:M_def)
qed
also have "... ≤ variance (λω. real (Q v ω)) / (3 * sqrt (real m * real_of_int v / real p))⇧2"
using v_gt_0 p_gt_0 m_ge_0
by (intro Chebyshev_inequality, simp add:M_def, auto)
also have "... ≤ (real m * real_of_int v / real p) / (3 * sqrt (real m * real_of_int v / real p))⇧2"
using v_gt_0 True by (intro divide_right_mono var_Q, auto)
also have "... = 1/9"
using p_gt_0 v_gt_0 m_ge_0 by (simp add:power2_eq_square)
finally show ?thesis by simp
next
case False
have "prob {ω. Q v ω < t} ≤ prob {ω. False}"
proof (rule pmf_mono[OF M_def])
fix ω
assume a:"ω ∈ {ω. Q v ω < t}"
assume "ω ∈ set_pmf (pmf_of_set space)"
hence b:"⋀x. x < p ⟹ hash x ω < p"
using hash_range mod_ring_carr by (simp add:M_def measure_pmf_inverse)
have "t ≤ card (set as)" using True by simp
also have "... ≤ Q v ω"
unfolding Q_def using b False as_lt_p by (intro card_mono subsetI, simp, force)
also have "... < t" using a by simp
finally have "False" by auto
thus "ω ∈ {ω. False}" by simp
qed
also have "... = 0" by auto
finally show ?thesis by simp
qed
have "prob {ω. ¬has_no_collision ω} ≤
prob {ω. ∃x ∈ set as. ∃y ∈ set as. x ≠ y ∧ tr_hash x ω ≤ real_of_int v ∧ tr_hash x ω = tr_hash y ω}"
by (rule pmf_mono[OF M_def]) (simp add:has_no_collision_def M_def, force)
also have "... ≤ (5/2) * (real (card (set as)))⇧2 * (real_of_int v)⇧2 * 2 powr - real r / (real p)⇧2 + 1 / real p"
using collision_prob v_ge_1 by blast
also have "... ≤ (5/2) * (real m)⇧2 * (real_of_int v)⇧2 * 2 powr - real r / (real p)⇧2 + 1 / real p"
by (intro divide_right_mono add_mono mult_right_mono mult_mono power_mono, simp_all add:m_def)
also have "... ≤ (5/2) * (real m)⇧2 * (4 * real t * real p / real m)⇧2 * (2 powr - real r) / (real p)⇧2 + 1 / real p"
using v_def v_ge_1 v_ubound
by (intro add_mono divide_right_mono mult_right_mono mult_left_mono, auto)
also have "... = 40 * (real t)⇧2 * (2 powr -real r) + 1 / real p"
using p_gt_0 m_ge_0 t_gt_0 by (simp add:algebra_simps power2_eq_square)
also have "... ≤ 1/18 + 1/18"
using t_r_bound p_ge_18 by (intro add_mono, simp_all add: pos_le_divide_eq)
also have "... = 1/9" by simp
finally have case_3: "prob {ω. ¬has_no_collision ω} ≤ 1/9" by simp
have "prob {ω. real_of_rat δ * of_rat (F 0 as) < ¦estimate' (sketch_rv' ω) - of_rat (F 0 as)¦} ≤
prob {ω. Q u ω ≥ t ∨ Q v ω < t ∨ ¬(has_no_collision ω)}"
proof (rule pmf_mono[OF M_def], rule ccontr)
fix ω
assume "ω ∈ set_pmf (pmf_of_set space)"
assume "ω ∈ {ω. real_of_rat δ * real_of_rat (F 0 as) < ¦estimate' (sketch_rv' ω) - real_of_rat (F 0 as)¦}"
hence est: "real_of_rat δ * real_of_rat (F 0 as) < ¦estimate' (sketch_rv' ω) - real_of_rat (F 0 as)¦" by simp
assume "ω ∉ {ω. t ≤ Q u ω ∨ Q v ω < t ∨ ¬ has_no_collision ω}"
hence "¬( t ≤ Q u ω ∨ Q v ω < t ∨ ¬ has_no_collision ω)" by simp
hence lb: "Q u ω < t" and ub: "Q v ω ≥ t" and no_col: "has_no_collision ω" by simp+
define y where "y = nth_mset (t-1) {#int (hash x ω). x ∈# mset_set (set as)#}"
define y' where "y' = nth_mset (t-1) {#tr_hash x ω. x ∈# mset_set (set as)#}"
have rank_t_lb: "u ≤ y"
unfolding y_def using True t_gt_0 lb
by (intro nth_mset_bound_left, simp_all add:count_less_def swap_filter_image Q_def)
have rank_t_ub: "y ≤ v - 1"
unfolding y_def using True t_gt_0 ub
by (intro nth_mset_bound_right, simp_all add:Q_def swap_filter_image count_le_def)
have y_ge_0: "real_of_int y ≥ 0" using rank_t_lb a_ge_0 by linarith
have "mono (λx. truncate_down r (real_of_int x))"
by (metis truncate_down_mono mono_def of_int_le_iff)
hence y'_eq: "y' = truncate_down r y"
unfolding y_def y'_def using True t_gt_0
by (subst nth_mset_commute_mono[where f="(λx. truncate_down r (of_int x))"])
(simp_all add: multiset.map_comp comp_def tr_hash_def)
have "real_of_int u * (1 - 2 powr -real r) ≤ real_of_int y * (1 - 2 powr (-real r))"
using rank_t_lb of_int_le_iff two_pow_r_le_1
by (intro mult_right_mono, auto)
also have "... ≤ y'"
using y'_eq truncate_down_pos[OF y_ge_0] by simp
finally have rank_t_lb': "u * (1 - 2 powr -real r) ≤ y'" by simp
have "y' ≤ real_of_int y"
by (subst y'_eq, rule truncate_down_le, simp)
also have "... ≤ real_of_int (v-1)"
using rank_t_ub of_int_le_iff by blast
finally have rank_t_ub': "y' ≤ v-1"
by simp
have "0 < u * (1-2 powr -real r)"
using a_ge_1 two_pow_r_le_1 by (intro mult_pos_pos, auto)
hence y'_pos: "y' > 0" using rank_t_lb' by linarith
have no_col': "⋀x. x ≤ y' ⟹ count {#tr_hash x ω. x ∈# mset_set (set as)#} x ≤ 1"
using rank_t_ub' no_col
by (simp add:vimage_def card_le_Suc0_iff_eq count_image_mset has_no_collision_def) force
have h_1: "Max (sketch_rv' ω) = y'"
using True t_gt_0 no_col'
by (simp add:sketch_rv'_def y'_def nth_mset_max)
have "card (sketch_rv' ω) = card (least ((t-1)+1) (set_mset {#tr_hash x ω. x ∈# mset_set (set as)#}))"
using t_gt_0 by (simp add:sketch_rv'_def)
also have "... = (t-1) +1"
using True t_gt_0 no_col' by (intro nth_mset_max(2), simp_all add:y'_def)
also have "... = t" using t_gt_0 by simp
finally have "card (sketch_rv' ω) = t" by simp
hence h_3: "estimate' (sketch_rv' ω) = real t * real p / y'"
using h_1 by (simp add:estimate'_def)
have "(real t) * real p ≤ (1 + δ') * real m * ((real t) * real p / (real m * (1 + δ')))"
using δ'_lt_1 m_def True t_gt_0 δ'_gt_0 by auto
also have "... ≤ (1+δ') * m * u"
using δ'_gt_0 by (intro mult_left_mono, simp_all add:u_def)
also have "... < ((1 + real_of_rat δ)*(1-real_of_rat δ/8)) * m * u"
using True m_def t_gt_0 a_ge_1 δ_range
by (intro mult_strict_right_mono, auto simp add:δ'_def right_diff_distrib)
also have "... ≤ ((1 + real_of_rat δ)*(1-2 powr (-r))) * m * u"
using r_le_δ δ_range a_ge_0 by (intro mult_right_mono mult_left_mono, auto)
also have "... = (1 + real_of_rat δ) * m * (u * (1-2 powr -real r))"
by simp
also have "... ≤ (1 + real_of_rat δ) * m * y'"
using δ_range by (intro mult_left_mono rank_t_lb', simp)
finally have "real t * real p < (1 + real_of_rat δ) * m * y'" by simp
hence f_1: "estimate' (sketch_rv' ω) < (1 + real_of_rat δ) * m"
using y'_pos by (simp add: h_3 pos_divide_less_eq)
have "(1 - real_of_rat δ) * m * y' ≤ (1 - real_of_rat δ) * m * v"
using δ_range rank_t_ub' y'_pos by (intro mult_mono rank_t_ub', simp_all)
also have "... = (1-real_of_rat δ) * (real m * v)"
by simp
also have "... < (1-δ') * (real m * v)"
using δ_range m_ge_0 v_ge_1
by (intro mult_strict_right_mono mult_pos_pos, simp_all add:δ'_def)
also have "... ≤ (1-δ') * (real m * (real t * real p / (real m * (1-δ'))))"
using δ'_gt_0 δ'_lt_1 by (intro mult_left_mono, auto simp add:v_def)
also have "... = real t * real p"
using δ'_gt_0 δ'_lt_1 t_gt_0 p_gt_0 m_ge_0 by auto
finally have "(1 - real_of_rat δ) * m * y' < real t * real p" by simp
hence f_2: "estimate' (sketch_rv' ω) > (1 - real_of_rat δ) * m"
using y'_pos by (simp add: h_3 pos_less_divide_eq)
have "abs (estimate' (sketch_rv' ω) - real_of_rat (F 0 as)) < real_of_rat δ * (real_of_rat (F 0 as))"
using f_1 f_2 by (simp add:abs_less_iff algebra_simps m_eq_F_0)
thus "False" using est by linarith
qed
also have "... ≤ 1/9 + (1/9 + 1/9)"
by (intro pmf_add_2[OF M_def] case_1 case_2 case_3)
also have "... = 1/3" by simp
finally show ?thesis by simp
next
case False
have "prob {ω. real_of_rat δ * of_rat (F 0 as) < ¦estimate' (sketch_rv' ω) - of_rat (F 0 as)¦} ≤
prob {ω. ∃x ∈ set as. ∃y ∈ set as. x ≠ y ∧ tr_hash x ω ≤ real p ∧ tr_hash x ω = tr_hash y ω}"
proof (rule pmf_mono[OF M_def])
fix ω
assume a:"ω ∈ {ω. real_of_rat δ * real_of_rat (F 0 as) < ¦estimate' (sketch_rv' ω) - real_of_rat (F 0 as)¦}"
assume b:"ω ∈ set_pmf (pmf_of_set space)"
have c: "card (set as) < t" using False by auto
hence "card ((λx. tr_hash x ω) ` set as) < t"
using card_image_le order_le_less_trans by blast
hence d:"card (sketch_rv' ω) = card ((λx. tr_hash x ω) ` (set as))"
by (simp add:sketch_rv'_def card_least)
have "card (sketch_rv' ω) < t"
by (metis List.finite_set c d card_image_le order_le_less_trans)
hence "estimate' (sketch_rv' ω) = card (sketch_rv' ω)" by (simp add:estimate'_def)
hence "card (sketch_rv' ω) ≠ real_of_rat (F 0 as)"
using a δ_range by simp
(metis abs_zero cancel_comm_monoid_add_class.diff_cancel of_nat_less_0_iff pos_prod_lt zero_less_of_rat_iff)
hence "card (sketch_rv' ω) ≠ card (set as)"
using m_def m_eq_F_0 by linarith
hence "¬inj_on (λx. tr_hash x ω) (set as)"
using card_image d by auto
moreover have "tr_hash x ω ≤ real p" if a:"x ∈ set as" for x
proof -
have "hash x ω < p"
using hash_range as_lt_p a b by (simp add:mod_ring_carr M_def)
thus "tr_hash x ω ≤ real p" using truncate_down_le by (simp add:tr_hash_def)
qed
ultimately show "ω ∈ {ω. ∃x ∈ set as. ∃y ∈ set as. x ≠ y ∧ tr_hash x ω ≤ real p ∧ tr_hash x ω = tr_hash y ω}"
by (simp add:inj_on_def, blast)
qed
also have "... ≤ (5/2) * (real (card (set as)))⇧2 * (real p)⇧2 * 2 powr - real r / (real p)⇧2 + 1 / real p"
using p_gt_0 by (intro collision_prob, auto)
also have "... = (5/2) * (real (card (set as)))⇧2 * 2 powr (- real r) + 1 / real p"
using p_gt_0 by (simp add:ac_simps power2_eq_square)
also have "... ≤ (5/2) * (real t)⇧2 * 2 powr (-real r) + 1 / real p"
using False by (intro add_mono mult_right_mono mult_left_mono power_mono, auto)
also have "... ≤ 1/6 + 1/6"
using t_r_bound p_ge_18 by (intro add_mono, simp_all)
also have "... ≤ 1/3" by simp
finally show ?thesis by simp
qed
private lemma median_bounds:
"𝒫(ω in measure_pmf Ω⇩0. ¦median s (λi. estimate (sketch_rv (ω i))) - F 0 as¦ ≤ δ * F 0 as) ≥ 1 - real_of_rat ε"
proof -
have "strict_mono_on A real_of_float" for A by (meson less_float.rep_eq strict_mono_onI)
hence real_g_2: "⋀ω. sketch_rv' ω = real_of_float ` sketch_rv ω"
by (simp add: sketch_rv'_def sketch_rv_def tr_hash_def least_mono_commute image_comp)
moreover have "inj_on real_of_float A" for A
using real_of_float_inject by (simp add:inj_on_def)
ultimately have card_eq: "⋀ω. card (sketch_rv ω) = card (sketch_rv' ω)"
using real_g_2 by (auto intro!: card_image[symmetric])
have "Max (sketch_rv' ω) = real_of_float (Max (sketch_rv ω))" if a:"card (sketch_rv' ω) ≥ t" for ω
proof -
have "mono real_of_float"
using less_eq_float.rep_eq mono_def by blast
moreover have "finite (sketch_rv ω)"
by (simp add:sketch_rv_def least_def)
moreover have " sketch_rv ω ≠ {}"
using card_eq[symmetric] card_gt_0_iff t_gt_0 a by (simp, force)
ultimately show ?thesis
by (subst mono_Max_commute[where f="real_of_float"], simp_all add:real_g_2)
qed
hence real_g: "⋀ω. estimate' (sketch_rv' ω) = real_of_rat (estimate (sketch_rv ω))"
by (simp add:estimate_def estimate'_def card_eq of_rat_divide of_rat_mult of_rat_add real_of_rat_of_float)
have indep: "prob_space.indep_vars (measure_pmf Ω⇩0) (λ_. borel) (λi ω. estimate' (sketch_rv' (ω i))) {0..<s}"
unfolding Ω⇩0_def
by (rule indep_vars_restrict_intro', auto simp add:restrict_dfl_def lessThan_atLeast0)
moreover have "- (18 * ln (real_of_rat ε)) ≤ real s"
using of_nat_ceiling by (simp add:s_def) blast
moreover have "i < s ⟹ measure Ω⇩0 {ω. of_rat δ * of_rat (F 0 as) < ¦estimate' (sketch_rv' (ω i)) - of_rat (F 0 as)¦} ≤ 1/3"
for i
using estimate'_bounds unfolding Ω⇩0_def M_def
by (subst prob_prod_pmf_slice, simp_all)
ultimately have "1-real_of_rat ε ≤ 𝒫(ω in measure_pmf Ω⇩0.
¦median s (λi. estimate' (sketch_rv' (ω i))) - real_of_rat (F 0 as)¦ ≤ real_of_rat δ * real_of_rat (F 0 as))"
using ε_range prob_space_measure_pmf
by (intro prob_space.median_bound_2) auto
also have "... = 𝒫(ω in measure_pmf Ω⇩0.
¦median s (λi. estimate (sketch_rv (ω i))) - F 0 as¦ ≤ δ * F 0 as)"
using s_gt_0 median_rat[symmetric] real_g by (intro arg_cong2[where f="measure"])
(simp_all add:of_rat_diff[symmetric] of_rat_mult[symmetric] of_rat_less_eq)
finally show "𝒫(ω in measure_pmf Ω⇩0. ¦median s (λi. estimate (sketch_rv (ω i))) - F 0 as¦ ≤ δ * F 0 as) ≥ 1-real_of_rat ε"
by blast
qed
lemma f0_alg_correct':
"𝒫(ω in measure_pmf result. ¦ω - F 0 as¦ ≤ δ * F 0 as) ≥ 1 - of_rat ε"
proof -
have f0_result_elim: "⋀x. f0_result (s, t, p, r, x, λi∈{..<s}. sketch_rv (x i)) =
return_pmf (median s (λi. estimate (sketch_rv (x i))))"
by (simp add:estimate_def, rule median_cong, simp)
have "result = map_pmf (λx. (s, t, p, r, x, λi∈{..<s}. sketch_rv (x i))) Ω⇩0 ⤜ f0_result"
by (subst result_def, subst f0_alg_sketch, simp)
also have "... = Ω⇩0 ⤜ (λx. return_pmf (s, t, p, r, x, λi∈{..<s}. sketch_rv (x i))) ⤜ f0_result"
by (simp add:t_def p_def r_def s_def map_pmf_def)
also have "... = Ω⇩0 ⤜ (λx. return_pmf (median s (λi. estimate (sketch_rv (x i)))))"
by (subst bind_assoc_pmf, subst bind_return_pmf, subst f0_result_elim) simp
finally have a:"result = Ω⇩0 ⤜ (λx. return_pmf (median s (λi. estimate (sketch_rv (x i)))))"
by simp
show ?thesis
using median_bounds by (simp add: a map_pmf_def[symmetric])
qed
private lemma f_subset:
assumes "g ` A ⊆ h ` B"
shows "(λx. f (g x)) ` A ⊆ (λx. f (h x)) ` B"
using assms by auto
lemma f0_exact_space_usage':
defines "Ω ≡ fold (λa state. state ⤜ f0_update a) as (f0_init δ ε n)"
shows "AE ω in Ω. bit_count (encode_f0_state ω) ≤ f0_space_usage (n, ε, δ)"
proof -
have log_2_4: "log 2 4 = 2"
by (metis log2_of_power_eq mult_2 numeral_Bit0 of_nat_numeral power2_eq_square)
have a: "bit_count (F⇩e (float_of (truncate_down r y))) ≤
ereal (12 + 4 * real r + 2 * log 2 (log 2 (n+13)))" if a_1:"y ∈ {..<p}" for y
proof (cases "y ≥ 1")
case True
have aux_1: " 0 < 2 + log 2 (real y)"
using True by (intro add_pos_nonneg, auto)
have aux_2: "0 < 2 + log 2 (real p)"
using p_gt_1 by (intro add_pos_nonneg, auto)
have "bit_count (F⇩e (float_of (truncate_down r y))) ≤
ereal (10 + 4 * real r + 2 * log 2 (2 + ¦log 2 ¦real y¦¦))"
by (rule truncate_float_bit_count)
also have "... = ereal (10 + 4 * real r + 2 * log 2 (2 + (log 2 (real y))))"
using True by simp
also have "... ≤ ereal (10 + 4 * real r + 2 * log 2 (2 + log 2 p))"
using aux_1 aux_2 True p_gt_0 a_1 by simp
also have "... ≤ ereal (10 + 4 * real r + 2 * log 2 (log 2 4 + log 2 (2 * n + 40)))"
using log_2_4 p_le_n p_gt_0
by (simp add: Transcendental.log_mono aux_2)
also have "... = ereal (10 + 4 * real r + 2 * log 2 (log 2 (8 * n + 160)))"
by (simp add:log_mult[symmetric])
also have "... ≤ ereal (10 + 4 * real r + 2 * log 2 (log 2 ((n+13) powr 2)))"
by (intro ereal_mono add_mono mult_left_mono Transcendental.log_mono of_nat_mono add_pos_nonneg)
(auto simp add:power2_eq_square algebra_simps)
also have "... = ereal (10 + 4 * real r + 2 * log 2 (log 2 4 * log 2 (n + 13)))"
by (subst log_powr, simp_all add:log_2_4)
also have "... = ereal (12 + 4 * real r + 2 * log 2 (log 2 (n + 13)))"
by (subst log_mult, simp_all add:log_2_4)
finally show ?thesis by simp
next
case False
hence "y = 0" using a_1 by simp
then show ?thesis by (simp add:float_bit_count_zero)
qed
have "bit_count (encode_f0_state (s, t, p, r, x, λi∈{..<s}. sketch_rv (x i))) ≤
f0_space_usage (n, ε, δ)" if b: "x ∈ {..<s} →⇩E space" for x
proof -
have c: "x ∈ extensional {..<s}" using b by (simp add:PiE_def)
have d: "sketch_rv (x y) ⊆ (λk. float_of (truncate_down r k)) ` {..<p} "
if d_1: "y < s" for y
proof -
have "sketch_rv (x y) ⊆ (λxa. float_of (truncate_down r (hash xa (x y)))) ` set as"
using least_subset by (auto simp add:sketch_rv_def tr_hash_def)
also have "... ⊆ (λk. float_of (truncate_down r (real k))) ` {..<p}"
using b hash_range as_lt_p d_1
by (intro f_subset[where f="λx. float_of (truncate_down r (real x))"] image_subsetI)
(simp add: PiE_iff mod_ring_carr)
finally show ?thesis
by simp
qed
have "⋀y. y < s ⟹ finite (sketch_rv (x y))"
unfolding sketch_rv_def by (rule finite_subset[OF least_subset], simp)
moreover have card_sketch: "⋀y. y < s ⟹ card (sketch_rv (x y)) ≤ t "
by (simp add:sketch_rv_def card_least)
moreover have "⋀y z. y < s ⟹ z ∈ sketch_rv (x y) ⟹
bit_count (F⇩e z) ≤ ereal (12 + 4 * real r + 2 * log 2 (log 2 (real n + 13)))"
using a d by auto
ultimately have e: "⋀y. y < s ⟹ bit_count (S⇩e F⇩e (sketch_rv (x y)))
≤ ereal (real t) * (ereal (12 + 4 * real r + 2 * log 2 (log 2 (real (n + 13)))) + 1) + 1"
using float_encoding by (intro set_bit_count_est, auto)
have f: "⋀y. y < s ⟹ bit_count (P⇩e p 2 (x y)) ≤ ereal (real 2 * (log 2 (real p) + 1))"
using p_gt_1 b
by (intro bounded_degree_polynomial_bit_count) (simp_all add:space_def PiE_def Pi_def)
have "bit_count (encode_f0_state (s, t, p, r, x, λi∈{..<s}. sketch_rv (x i))) =
bit_count (N⇩e s) + bit_count (N⇩e t) + bit_count (N⇩e p) + bit_count (N⇩e r) +
bit_count (([0..<s] →⇩e P⇩e p 2) x) +
bit_count (([0..<s] →⇩e S⇩e F⇩e) (λi∈{..<s}. sketch_rv (x i)))"
by (simp add:encode_f0_state_def dependent_bit_count lessThan_atLeast0
s_def[symmetric] t_def[symmetric] p_def[symmetric] r_def[symmetric] ac_simps)
also have "... ≤ ereal (2* log 2 (real s + 1) + 1) + ereal (2* log 2 (real t + 1) + 1)
+ ereal (2* log 2 (real p + 1) + 1) + ereal (2 * log 2 (real r + 1) + 1)
+ (ereal (real s) * (ereal (real 2 * (log 2 (real p) + 1))))
+ (ereal (real s) * ((ereal (real t) *
(ereal (12 + 4 * real r + 2 * log 2 (log 2 (real (n + 13)))) + 1) + 1)))"
using c e f
by (intro add_mono exp_golomb_bit_count fun_bit_count_est[where xs="[0..<s]", simplified])
(simp_all add:lessThan_atLeast0)
also have "... = ereal ( 4 + 2 * log 2 (real s + 1) + 2 * log 2 (real t + 1) +
2 * log 2 (real p + 1) + 2 * log 2 (real r + 1) + real s * (3 + 2 * log 2 (real p) +
real t * (13 + (4 * real r + 2 * log 2 (log 2 (real n + 13))))))"
by (simp add:algebra_simps)
also have "... ≤ ereal ( 4 + 2 * log 2 (real s + 1) + 2 * log 2 (real t + 1) +
2 * log 2 (2 * (21 + real n)) + 2 * log 2 (real r + 1) + real s * (3 + 2 * log 2 (2 * (21 + real n)) +
real t * (13 + (4 * real r + 2 * log 2 (log 2 (real n + 13))))))"
using p_le_n p_gt_0
by (intro ereal_mono add_mono mult_left_mono, auto)
also have "... = ereal (6 + 2 * log 2 (real s + 1) + 2 * log 2 (real t + 1) +
2 * log 2 (21 + real n) + 2 * log 2 (real r + 1) + real s * (5 + 2 * log 2 (21 + real n) +
real t * (13 + (4 * real r + 2 * log 2 (log 2 (real n + 13))))))"
by (subst (1 2) log_mult, auto)
also have "... ≤ f0_space_usage (n, ε, δ)"
by (simp add:s_def[symmetric] r_def[symmetric] t_def[symmetric] Let_def)
(simp add:algebra_simps)
finally show "bit_count (encode_f0_state (s, t, p, r, x, λi∈{..<s}. sketch_rv (x i))) ≤
f0_space_usage (n, ε, δ)" by simp
qed
hence "⋀x. x ∈ set_pmf Ω⇩0 ⟹
bit_count (encode_f0_state (s, t, p, r, x, λi∈{..<s}. sketch_rv (x i))) ≤ ereal (f0_space_usage (n, ε, δ))"
by (simp add:Ω⇩0_def set_prod_pmf del:f0_space_usage.simps)
hence "⋀y. y ∈ set_pmf Ω ⟹ bit_count (encode_f0_state y) ≤ ereal (f0_space_usage (n, ε, δ))"
by (simp add: Ω_def f0_alg_sketch del:f0_space_usage.simps f0_init.simps)
(metis (no_types, lifting) image_iff pmf.set_map)
thus ?thesis
by (simp add: AE_measure_pmf_iff del:f0_space_usage.simps)
qed
end
text ‹Main results of this section:›
theorem f0_alg_correct:
assumes "ε ∈ {0<..<1}"
assumes "δ ∈ {0<..<1}"
assumes "set as ⊆ {..<n}"
defines "Ω ≡ fold (λa state. state ⤜ f0_update a) as (f0_init δ ε n) ⤜ f0_result"
shows "𝒫(ω in measure_pmf Ω. ¦ω - F 0 as¦ ≤ δ * F 0 as) ≥ 1 - of_rat ε"
using f0_alg_correct'[OF assms(1-3)] unfolding Ω_def by blast
theorem f0_exact_space_usage:
assumes "ε ∈ {0<..<1}"
assumes "δ ∈ {0<..<1}"
assumes "set as ⊆ {..<n}"
defines "Ω ≡ fold (λa state. state ⤜ f0_update a) as (f0_init δ ε n)"
shows "AE ω in Ω. bit_count (encode_f0_state ω) ≤ f0_space_usage (n, ε, δ)"
using f0_exact_space_usage'[OF assms(1-3)] unfolding Ω_def by blast
theorem f0_asymptotic_space_complexity:
"f0_space_usage ∈ O[at_top ×⇩F at_right 0 ×⇩F at_right 0](λ(n, ε, δ). ln (1 / of_rat ε) *
(ln (real n) + 1 / (of_rat δ)⇧2 * (ln (ln (real n)) + ln (1 / of_rat δ))))"
(is "_ ∈ O[?F](?rhs)")
proof -
define n_of :: "nat × rat × rat ⇒ nat" where "n_of = (λ(n, ε, δ). n)"
define ε_of :: "nat × rat × rat ⇒ rat" where "ε_of = (λ(n, ε, δ). ε)"
define δ_of :: "nat × rat × rat ⇒ rat" where "δ_of = (λ(n, ε, δ). δ)"
define t_of where "t_of = (λx. nat ⌈80 / (real_of_rat (δ_of x))⇧2⌉)"
define s_of where "s_of = (λx. nat ⌈-(18 * ln (real_of_rat (ε_of x)))⌉)"
define r_of where "r_of = (λx. nat (4 * ⌈log 2 (1 / real_of_rat (δ_of x))⌉ + 23))"
define g where "g = (λx. ln (1 / of_rat (ε_of x)) * (ln (real (n_of x)) +
1 / (of_rat (δ_of x))⇧2 * (ln (ln (real (n_of x))) + ln (1 / of_rat (δ_of x)))))"
have evt: "(⋀x.
0 < real_of_rat (δ_of x) ∧ 0 < real_of_rat (ε_of x) ∧
1/real_of_rat (δ_of x) ≥ δ ∧ 1/real_of_rat (ε_of x) ≥ ε ∧
real (n_of x) ≥ n ⟹ P x) ⟹ eventually P ?F" (is "(⋀x. ?prem x ⟹ _) ⟹ _")
for δ ε n P
apply (rule eventually_mono[where P="?prem" and Q="P"])
apply (simp add:ε_of_def case_prod_beta' δ_of_def n_of_def)
apply (intro eventually_conj eventually_prod1' eventually_prod2'
sequentially_inf eventually_at_right_less inv_at_right_0_inf)
by (auto simp add:prod_filter_eq_bot)
have exp_pos: "exp k ≤ real x ⟹ x > 0" for k x
using exp_gt_zero gr0I by force
have exp_gt_1: "exp 1 ≥ (1::real)"
by simp
have 1: "(λ_. 1) ∈ O[?F](λx. ln (1 / real_of_rat (ε_of x)))"
by (auto intro!:landau_o.big_mono evt[where ε="exp 1"] iffD2[OF ln_ge_iff] simp add:abs_ge_iff)
have 2: "(λ_. 1) ∈ O[?F](λx. ln (1 / real_of_rat (δ_of x)))"
by (auto intro!:landau_o.big_mono evt[where δ="exp 1"] iffD2[OF ln_ge_iff] simp add:abs_ge_iff)
have 3: " (λx. 1) ∈ O[?F](λx. ln (ln (real (n_of x))) + ln (1 / real_of_rat (δ_of x)))"
using exp_pos
by (intro landau_sum_2 2 evt[where n="exp 1" and δ="1"] ln_ge_zero iffD2[OF ln_ge_iff], auto)
have 4: "(λ_. 1) ∈ O[?F](λx. 1 / (real_of_rat (δ_of x))⇧2)"
using one_le_power
by (intro landau_o.big_mono evt[where δ="1"], auto simp add:power_one_over[symmetric])
have "(λx. 80 * (1 / (real_of_rat (δ_of x))⇧2)) ∈ O[?F](λx. 1 / (real_of_rat (δ_of x))⇧2)"
by (subst landau_o.big.cmult_in_iff, auto)
hence 5: "(λx. real (t_of x)) ∈ O[?F](λx. 1 / (real_of_rat (δ_of x))⇧2)"
unfolding t_of_def
by (intro landau_real_nat landau_ceil 4, auto)
have "(λx. ln (real_of_rat (ε_of x))) ∈ O[?F](λx. ln (1 / real_of_rat (ε_of x)))"
by (intro landau_o.big_mono evt[where ε="1"], auto simp add:ln_div)
hence 6: "(λx. real (s_of x)) ∈ O[?F](λx. ln (1 / real_of_rat (ε_of x)))"
unfolding s_of_def by (intro landau_nat_ceil 1, simp)
have 7: " (λx. 1) ∈ O[?F](λx. ln (real (n_of x)))"
using exp_pos by (auto intro!: landau_o.big_mono evt[where n="exp 1"] iffD2[OF ln_ge_iff] simp: abs_ge_iff)
have 8:" (λ_. 1) ∈
O[?F](λx. ln (real (n_of x)) + 1 / (real_of_rat (δ_of x))⇧2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (δ_of x))))"
using order_trans[OF exp_gt_1] exp_pos
by (intro landau_sum_1 7 evt[where n="exp 1" and δ="1"] ln_ge_zero iffD2[OF ln_ge_iff]
mult_nonneg_nonneg add_nonneg_nonneg) auto
have "(λx. ln (real (s_of x) + 1)) ∈ O[?F](λx. ln (1 / real_of_rat (ε_of x)))"
by (intro landau_ln_3 sum_in_bigo 6 1, simp)
hence 9: "(λx. log 2 (real (s_of x) + 1)) ∈ O[?F](g)"
unfolding g_def by (intro landau_o.big_mult_1 8, auto simp:log_def)
have 10: "(λx. 1) ∈ O[?F](g)"
unfolding g_def by (intro landau_o.big_mult_1 8 1)
have "(λx. ln (real (t_of x) + 1)) ∈
O[?F](λx. 1 / (real_of_rat (δ_of x))⇧2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (δ_of x))))"
using 5 by (intro landau_o.big_mult_1 3 landau_ln_3 sum_in_bigo 4, simp_all)
hence " (λx. log 2 (real (t_of x) + 1)) ∈
O[?F](λx. ln (real (n_of x)) + 1 / (real_of_rat (δ_of x))⇧2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (δ_of x))))"
using order_trans[OF exp_gt_1] exp_pos
by (intro landau_sum_2 evt[where n="exp 1" and δ="1"] ln_ge_zero iffD2[OF ln_ge_iff]
mult_nonneg_nonneg add_nonneg_nonneg) (auto simp add:log_def)
hence 11: "(λx. log 2 (real (t_of x) + 1)) ∈ O[?F](g)"
unfolding g_def by (intro landau_o.big_mult_1' 1, auto)
have " (λx. 1) ∈ O[?F](λx. real (n_of x))"
by (intro landau_o.big_mono evt[where n="1"], auto)
hence "(λx. ln (real (n_of x) + 21)) ∈ O[?F](λx. ln (real (n_of x)))"
by (intro landau_ln_2[where a="2"] evt[where n="2"] sum_in_bigo, auto)
hence 12: "(λx. log 2 (real (n_of x) + 21)) ∈ O[?F](g)"
unfolding g_def using exp_pos order_trans[OF exp_gt_1]
by (intro landau_o.big_mult_1' 1 landau_sum_1 evt[where n="exp 1" and δ="1"]
ln_ge_zero iffD2[OF ln_ge_iff] mult_nonneg_nonneg add_nonneg_nonneg) (auto simp add:log_def)
have "(λx. ln (1 / real_of_rat (δ_of x))) ∈ O[?F](λx. 1 / (real_of_rat (δ_of x))⇧2)"
by (intro landau_ln_3 evt[where δ="1"] landau_o.big_mono)
(auto simp add:power_one_over[symmetric] self_le_power)
hence " (λx. real (nat (4*⌈log 2 (1 / real_of_rat (δ_of x))⌉+23))) ∈ O[?F](λx. 1 / (real_of_rat (δ_of x))⇧2)"
using 4 by (auto intro!: landau_real_nat sum_in_bigo landau_ceil simp:log_def)
hence " (λx. ln (real (r_of x) + 1)) ∈ O[?F](λx. 1 / (real_of_rat (δ_of x))⇧2)"
unfolding r_of_def
by (intro landau_ln_3 sum_in_bigo 4, auto)
hence " (λx. log 2 (real (r_of x) + 1)) ∈
O[?F](λx. (1 / (real_of_rat (δ_of x))⇧2) * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (δ_of x))))"
by (intro landau_o.big_mult_1 3, simp add:log_def)
hence " (λx. log 2 (real (r_of x) + 1)) ∈
O[?F](λx. ln (real (n_of x)) + 1 / (real_of_rat (δ_of x))⇧2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (δ_of x))))"
using exp_pos order_trans[OF exp_gt_1]
by (intro landau_sum_2 evt[where n="exp 1" and δ="1"] ln_ge_zero
iffD2[OF ln_ge_iff] add_nonneg_nonneg mult_nonneg_nonneg) (auto)
hence 13: "(λx. log 2 (real (r_of x) + 1)) ∈ O[?F](g)"
unfolding g_def by (intro landau_o.big_mult_1' 1, auto)
have 14: "(λx. 1) ∈ O[?F](λx. real (n_of x))"
by (intro landau_o.big_mono evt[where n="1"], auto)
have "(λx. ln (real (n_of x) + 13)) ∈ O[?F](λx. ln (real (n_of x)))"
using 14 by (intro landau_ln_2[where a="2"] evt[where n="2"] sum_in_bigo, auto)
hence "(λx. ln (log 2 (real (n_of x) + 13))) ∈ O[?F](λx. ln (ln (real (n_of x))))"
using exp_pos by (intro landau_ln_2[where a="2"] iffD2[OF ln_ge_iff] evt[where n="exp 2"])
(auto simp add:log_def)
hence "(λx. log 2 (log 2 (real (n_of x) + 13))) ∈ O[?F](λx. ln (ln (real (n_of x))) + ln (1 / real_of_rat (δ_of x)))"
using exp_pos by (intro landau_sum_1 evt[where n="exp 1" and δ="1"] ln_ge_zero iffD2[OF ln_ge_iff])
(auto simp add:log_def)
moreover have "(λx. real (r_of x)) ∈ O[?F](λx. ln (1 / real_of_rat (δ_of x)))"
unfolding r_of_def using 2
by (auto intro!: landau_real_nat sum_in_bigo landau_ceil simp:log_def)
hence "(λx. real (r_of x)) ∈ O[?F](λx. ln (ln (real (n_of x))) + ln (1 / real_of_rat (δ_of x)))"
using exp_pos
by (intro landau_sum_2 evt[where n="exp 1" and δ="1"] ln_ge_zero iffD2[OF ln_ge_iff], auto)
ultimately have 15:" (λx. real (t_of x) * (13 + 4 * real (r_of x) + 2 * log 2 (log 2 (real (n_of x) + 13))))
∈ O[?F](λx. 1 / (real_of_rat (δ_of x))⇧2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (δ_of x))))"
using 5 3
by (intro landau_o.mult sum_in_bigo, auto)
have "(λx. 5 + 2 * log 2 (21 + real (n_of x)) + real (t_of x) * (13 + 4 * real (r_of x) + 2 * log 2 (log 2 (real (n_of x) + 13))))
∈ O[?F](λx. ln (real (n_of x)) + 1 / (real_of_rat (δ_of x))⇧2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (δ_of x))))"
proof -
have "∀⇩F x in ?F. 0 ≤ ln (real (n_of x))"
by (intro evt[where n="1"] ln_ge_zero, auto)
moreover have "∀⇩F x in ?F. 0 ≤ 1 / (real_of_rat (δ_of x))⇧2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (δ_of x)))"
using exp_pos
by (intro evt[where n="exp 1" and δ="1"] mult_nonneg_nonneg add_nonneg_nonneg
ln_ge_zero iffD2[OF ln_ge_iff]) auto
moreover have " (λx. ln (21 + real (n_of x))) ∈ O[?F](λx. ln (real (n_of x)))"
using 14 by (intro landau_ln_2[where a="2"] sum_in_bigo evt[where n="2"], auto)
hence "(λx. 5 + 2 * log 2 (21 + real (n_of x))) ∈ O[?F](λx. ln (real (n_of x)))"
using 7 by (intro sum_in_bigo, auto simp add:log_def)
ultimately show ?thesis
using 15 by (rule landau_sum)
qed
hence 16: "(λx. real (s_of x) * (5 + 2 * log 2 (21 + real (n_of x)) + real (t_of x) *
(13 + 4 * real (r_of x) + 2 * log 2 (log 2 (real (n_of x) + 13))))) ∈ O[?F](g)"
unfolding g_def
by (intro landau_o.mult 6, auto)
have "f0_space_usage = (λx. f0_space_usage (n_of x, ε_of x, δ_of x))"
by (simp add:case_prod_beta' n_of_def ε_of_def δ_of_def)
also have "... ∈ O[?F](g)"
using 9 10 11 12 13 16
by (simp add:fun_cong[OF s_of_def[symmetric]] fun_cong[OF t_of_def[symmetric]]
fun_cong[OF r_of_def[symmetric]] Let_def) (intro sum_in_bigo, auto)
also have "... = O[?F](?rhs)"
by (simp add:case_prod_beta' g_def n_of_def ε_of_def δ_of_def)
finally show ?thesis
by simp
qed
end