Theory Frequency_Moments.Frequency_Moments_Preliminary_Results
section ‹Preliminary Results›
theory Frequency_Moments_Preliminary_Results
imports
"HOL.Transcendental"
"HOL-Computational_Algebra.Primes"
"HOL-Library.Extended_Real"
"HOL-Library.Multiset"
"HOL-Library.Sublist"
Prefix_Free_Code_Combinators.Prefix_Free_Code_Combinators
Bertrands_Postulate.Bertrand
Expander_Graphs.Expander_Graphs_Multiset_Extras
begin
text ‹This section contains various preliminary results.›
lemma card_ordered_pairs:
fixes M :: "('a ::linorder) set"
assumes "finite M"
shows "2 * card {(x,y) ∈ M × M. x < y} = card M * (card M - 1)"
proof -
have a: "finite (M × M)" using assms by simp
have inj_swap: "inj (λx. (snd x, fst x))"
by (rule inj_onI, simp add: prod_eq_iff)
have "2 * card {(x,y) ∈ M × M. x < y} =
card {(x,y) ∈ M × M. x < y} + card ((λx. (snd x, fst x))`{(x,y) ∈ M × M. x < y})"
by (simp add: card_image[OF inj_on_subset[OF inj_swap]])
also have "... = card {(x,y) ∈ M × M. x < y} + card {(x,y) ∈ M × M. y < x}"
by (auto intro: arg_cong[where f="card"] simp add:set_eq_iff image_iff)
also have "... = card ({(x,y) ∈ M × M. x < y} ∪ {(x,y) ∈ M × M. y < x})"
by (intro card_Un_disjoint[symmetric] a finite_subset[where B="M × M"] subsetI) auto
also have "... = card ((M × M) - {(x,y) ∈ M × M. x = y})"
by (auto intro: arg_cong[where f="card"] simp add:set_eq_iff)
also have "... = card (M × M) - card {(x,y) ∈ M × M. x = y}"
by (intro card_Diff_subset a finite_subset[where B="M × M"] subsetI) auto
also have "... = card M ^ 2 - card ((λx. (x,x)) ` M)"
using assms
by (intro arg_cong2[where f="(-)"] arg_cong[where f="card"])
(auto simp:power2_eq_square set_eq_iff image_iff)
also have "... = card M ^ 2 - card M"
by (intro arg_cong2[where f="(-)"] card_image inj_onI, auto)
also have "... = card M * (card M - 1)"
by (cases "card M ≥ 0", auto simp:power2_eq_square algebra_simps)
finally show ?thesis by simp
qed
lemma ereal_mono: "x ≤ y ⟹ ereal x ≤ ereal y"
by simp
lemma abs_ge_iff: "((x::real) ≤ abs y) = (x ≤ y ∨ x ≤ -y)"
by linarith
lemma count_list_gr_1:
"(x ∈ set xs) = (count_list xs x ≥ 1)"
by (induction xs, simp, simp)
lemma count_list_append: "count_list (xs@ys) v = count_list xs v + count_list ys v"
by (induction xs, simp, simp)
lemma count_list_lt_suffix:
assumes "suffix a b"
assumes "x ∈ {b ! i| i. i < length b - length a}"
shows "count_list a x < count_list b x"
proof -
have "length a ≤ length b" using assms(1)
by (simp add: suffix_length_le)
hence "x ∈ set (nths b {i. i < length b - length a})"
using assms diff_commute by (auto simp add:set_nths)
hence a:"x ∈ set (take (length b - length a) b)"
by (subst (asm) lessThan_def[symmetric], simp)
have "b = (take (length b - length a) b)@drop (length b - length a) b"
by simp
also have "... = (take (length b - length a) b)@a"
using assms(1) suffix_take by auto
finally have b:"b = (take (length b - length a) b)@a" by simp
have "count_list a x < 1 + count_list a x" by simp
also have "... ≤ count_list (take (length b - length a) b) x + count_list a x"
using a count_list_gr_1
by (intro add_mono, fast, simp)
also have "... = count_list b x"
using b count_list_append by metis
finally show ?thesis by simp
qed
lemma suffix_drop_drop:
assumes "x ≥ y"
shows "suffix (drop x a) (drop y a)"
proof -
have "drop y a = take (x - y) (drop y a)@drop (x- y) (drop y a)"
by (subst append_take_drop_id, simp)
also have " ... = take (x-y) (drop y a)@drop x a"
using assms by simp
finally have "drop y a = take (x-y) (drop y a)@drop x a" by simp
thus ?thesis
by (auto simp add:suffix_def)
qed
lemma count_list_card: "count_list xs x = card {k. k < length xs ∧ xs ! k = x}"
proof -
have "count_list xs x = length (filter ((=) x) xs)"
by (induction xs, simp, simp)
also have "... = card {k. k < length xs ∧ xs ! k = x}"
by (subst length_filter_conv_card, metis)
finally show ?thesis by simp
qed
lemma card_gr_1_iff:
assumes "finite S" "x ∈ S" "y ∈ S" "x ≠ y"
shows "card S > 1"
using assms card_le_Suc0_iff_eq leI by auto
lemma count_list_ge_2_iff:
assumes "y < z"
assumes "z < length xs"
assumes "xs ! y = xs ! z"
shows "count_list xs (xs ! y) > 1"
proof -
have " 1 < card {k. k < length xs ∧ xs ! k = xs ! y}"
using assms by (intro card_gr_1_iff[where x="y" and y="z"], auto)
thus ?thesis
by (simp add: count_list_card)
qed
text ‹Results about multisets and sorting›
lemmas disj_induct_mset = disj_induct_mset
lemma prod_mset_conv:
fixes f :: "'a ⇒ 'b::{comm_monoid_mult}"
shows "prod_mset (image_mset f A) = prod (λx. f x^(count A x)) (set_mset A)"
proof (induction A rule: disj_induct_mset)
case 1
then show ?case by simp
next
case (2 n M x)
moreover have "count M x = 0" using 2 by (simp add: count_eq_zero_iff)
moreover have "⋀y. y ∈ set_mset M ⟹ y ≠ x" using 2 by blast
ultimately show ?case by (simp add:algebra_simps)
qed
text ‹There is a version @{thm [source] sum_list_map_eq_sum_count} but it doesn't work
if the function maps into the reals.›
lemma sum_list_eval:
fixes f :: "'a ⇒ 'b::{ring,semiring_1}"
shows "sum_list (map f xs) = (∑x ∈ set xs. of_nat (count_list xs x) * f x)"
proof -
define M where "M = mset xs"
have "sum_mset (image_mset f M) = (∑x ∈ set_mset M. of_nat (count M x) * f x)"
proof (induction "M" rule:disj_induct_mset)
case 1
then show ?case by simp
next
case (2 n M x)
have a:"⋀y. y ∈ set_mset M ⟹ y ≠ x" using 2(2) by blast
show ?case using 2 by (simp add:a count_eq_zero_iff[symmetric])
qed
moreover have "⋀x. count_list xs x = count (mset xs) x"
by (induction xs, simp, simp)
ultimately show ?thesis
by (simp add:M_def sum_mset_sum_list[symmetric])
qed
lemma prod_list_eval:
fixes f :: "'a ⇒ 'b::{ring,semiring_1,comm_monoid_mult}"
shows "prod_list (map f xs) = (∏x ∈ set xs. (f x)^(count_list xs x))"
proof -
define M where "M = mset xs"
have "prod_mset (image_mset f M) = (∏x ∈ set_mset M. f x ^ (count M x))"
proof (induction "M" rule:disj_induct_mset)
case 1
then show ?case by simp
next
case (2 n M x)
have a:"⋀y. y ∈ set_mset M ⟹ y ≠ x" using 2(2) by blast
have b:"count M x = 0" using 2 by (subst count_eq_zero_iff) blast
show ?case using 2 by (simp add:a b mult.commute)
qed
moreover have "⋀x. count_list xs x = count (mset xs) x"
by (induction xs, simp, simp)
ultimately show ?thesis
by (simp add:M_def prod_mset_prod_list[symmetric])
qed
lemma sorted_sorted_list_of_multiset: "sorted (sorted_list_of_multiset M)"
by (induction M, auto simp:sorted_insort)
lemma count_mset: "count (mset xs) a = count_list xs a"
by (induction xs, auto)
lemma swap_filter_image: "filter_mset g (image_mset f A) = image_mset f (filter_mset (g ∘ f) A)"
by (induction A, auto)
lemma list_eq_iff:
assumes "mset xs = mset ys"
assumes "sorted xs"
assumes "sorted ys"
shows "xs = ys"
using assms properties_for_sort by blast
lemma sorted_list_of_multiset_image_commute:
assumes "mono f"
shows "sorted_list_of_multiset (image_mset f M) = map f (sorted_list_of_multiset M)"
proof -
have "sorted (sorted_list_of_multiset (image_mset f M))"
by (simp add:sorted_sorted_list_of_multiset)
moreover have " sorted_wrt (λx y. f x ≤ f y) (sorted_list_of_multiset M)"
by (rule sorted_wrt_mono_rel[where P="λx y. x ≤ y"])
(auto intro: monoD[OF assms] sorted_sorted_list_of_multiset)
hence "sorted (map f (sorted_list_of_multiset M))"
by (subst sorted_wrt_map)
ultimately show ?thesis
by (intro list_eq_iff, auto)
qed
text ‹Results about rounding and floating point numbers›
lemma round_down_ge:
"x ≤ round_down prec x + 2 powr (-prec)"
using round_down_correct by (simp, meson diff_diff_eq diff_eq_diff_less_eq)
lemma truncate_down_ge:
"x ≤ truncate_down prec x + abs x * 2 powr (-prec)"
proof (cases "abs x > 0")
case True
have "x ≤ round_down (int prec - ⌊log 2 ¦x¦⌋) x + 2 powr (-real_of_int(int prec - ⌊log 2 ¦x¦⌋))"
by (rule round_down_ge)
also have "... ≤ truncate_down prec x + 2 powr ( ⌊log 2 ¦x¦⌋) * 2 powr (-real prec)"
by (rule add_mono, simp_all add:powr_add[symmetric] truncate_down_def)
also have "... ≤ truncate_down prec x + ¦x¦ * 2 powr (-real prec)"
using True
by (intro add_mono mult_right_mono, simp_all add:le_log_iff[symmetric])
finally show ?thesis by simp
next
case False
then show ?thesis by simp
qed
lemma truncate_down_pos:
assumes "x ≥ 0"
shows "x * (1 - 2 powr (-prec)) ≤ truncate_down prec x"
by (simp add:right_diff_distrib diff_le_eq)
(metis truncate_down_ge assms abs_of_nonneg)
lemma truncate_down_eq:
assumes "truncate_down r x = truncate_down r y"
shows "abs (x-y) ≤ max (abs x) (abs y) * 2 powr (-real r)"
proof -
have "x - y ≤ truncate_down r x + abs x * 2 powr (-real r) - y"
by (rule diff_right_mono, rule truncate_down_ge)
also have "... ≤ y + abs x * 2 powr (-real r) - y"
using truncate_down_le
by (intro diff_right_mono add_mono, subst assms(1), simp_all)
also have "... ≤ abs x * 2 powr (-real r)" by simp
also have "... ≤ max (abs x) (abs y) * 2 powr (-real r)" by simp
finally have a:"x - y ≤ max (abs x) (abs y) * 2 powr (-real r)" by simp
have "y - x ≤ truncate_down r y + abs y * 2 powr (-real r) - x"
by (rule diff_right_mono, rule truncate_down_ge)
also have "... ≤ x + abs y * 2 powr (-real r) - x"
using truncate_down_le
by (intro diff_right_mono add_mono, subst assms(1)[symmetric], auto)
also have "... ≤ abs y * 2 powr (-real r)" by simp
also have "... ≤ max (abs x) (abs y) * 2 powr (-real r)" by simp
finally have b:"y - x ≤ max (abs x) (abs y) * 2 powr (-real r)" by simp
show ?thesis
using abs_le_iff a b by linarith
qed
definition rat_of_float :: "float ⇒ rat" where
"rat_of_float f = of_int (mantissa f) *
(if exponent f ≥ 0 then 2 ^ (nat (exponent f)) else 1 / 2 ^ (nat (-exponent f)))"
lemma real_of_rat_of_float: "real_of_rat (rat_of_float x) = real_of_float x"
proof -
have "real_of_rat (rat_of_float x) = mantissa x * (2 powr (exponent x))"
by (simp add:rat_of_float_def of_rat_mult of_rat_divide of_rat_power powr_realpow[symmetric] powr_minus_divide)
also have "... = real_of_float x"
using mantissa_exponent by simp
finally show ?thesis by simp
qed
lemma log_est: "log 2 (real n + 1) ≤ n"
proof -
have "1 + real n = real (n + 1)"
by simp
also have "... ≤ real (2 ^ n)"
by (intro of_nat_mono suc_n_le_2_pow_n)
also have "... = 2 powr (real n)"
by (simp add:powr_realpow)
finally have "1 + real n ≤ 2 powr (real n)"
by simp
thus ?thesis
by (simp add: Transcendental.log_le_iff)
qed
lemma truncate_mantissa_bound:
"abs (⌊x * 2 powr (real r - real_of_int ⌊log 2 ¦x¦⌋)⌋) ≤ 2 ^ (r+1)" (is "?lhs ≤ _")
proof -
define q where "q = ⌊x * 2 powr (real r - real_of_int (⌊log 2 ¦x¦⌋))⌋"
have "abs q ≤ 2 ^ (r + 1)" if a:"x > 0"
proof -
have "abs q = q"
using a by (intro abs_of_nonneg, simp add:q_def)
also have "... ≤ x * 2 powr (real r - real_of_int ⌊log 2 ¦x¦⌋)"
unfolding q_def using of_int_floor_le by blast
also have "... = x * 2 powr real_of_int (int r - ⌊log 2 ¦x¦⌋)"
by auto
also have "... = 2 powr (log 2 x + real_of_int (int r - ⌊log 2 ¦x¦⌋))"
using a by (simp add:powr_add)
also have "... ≤ 2 powr (real r + 1)"
using a by (intro powr_mono, linarith+)
also have "... = 2 ^ (r+1)"
by (subst powr_realpow[symmetric], simp_all add:add.commute)
finally show "abs q ≤ 2 ^ (r+1)"
by (metis of_int_le_iff of_int_numeral of_int_power)
qed
moreover have "abs q ≤ (2 ^ (r + 1))" if a: "x < 0"
proof -
have "-(2 ^ (r+1) + 1) = -(2 powr (real r + 1)+1)"
by (subst powr_realpow[symmetric], simp_all add: add.commute)
also have "... < -(2 powr (log 2 (- x) + (r - ⌊log 2 ¦x¦⌋)) + 1)"
using a by (simp, linarith)
also have "... = x * 2 powr (r - ⌊log 2 ¦x¦⌋) - 1"
using a by (simp add:powr_add)
also have "... ≤ q"
by (simp add:q_def)
also have "... = - abs q"
using a
by (subst abs_of_neg, simp_all add: mult_pos_neg2 q_def)
finally have "-(2 ^ (r+1)+1) < - abs q" using of_int_less_iff by fastforce
hence "-(2 ^ (r+1)) ≤ - abs q" by linarith
thus "abs q ≤ 2^(r+1)" by linarith
qed
moreover have "x = 0 ⟹ abs q ≤ 2^(r+1)"
by (simp add:q_def)
ultimately have "abs q ≤ 2^(r+1)"
by fastforce
thus ?thesis using q_def by blast
qed
lemma truncate_float_bit_count:
"bit_count (F⇩e (float_of (truncate_down r x))) ≤ 10 + 4 * real r + 2*log 2 (2 + ¦log 2 ¦x¦¦)"
(is "?lhs ≤ ?rhs")
proof -
define m where "m = ⌊x * 2 powr (real r - real_of_int ⌊log 2 ¦x¦⌋)⌋"
define e where "e = ⌊log 2 ¦x¦⌋ - int r"
have a: "(real_of_int ⌊log 2 ¦x¦⌋ - real r) = e"
by (simp add:e_def)
have "abs m + 2 ≤ 2 ^ (r + 1) + 2^1"
using truncate_mantissa_bound
by (intro add_mono, simp_all add:m_def)
also have "... ≤ 2 ^ (r+2)"
by simp
finally have b:"abs m + 2 ≤ 2 ^ (r+2)" by simp
hence "real_of_int (¦m¦ + 2) ≤ real_of_int (4 * 2 ^ r)"
by (subst of_int_le_iff, simp)
hence "¦real_of_int m¦ + 2 ≤ 4 * 2 ^ r"
by simp
hence c:"log 2 (real_of_int (¦m¦ + 2)) ≤ r+2"
by (simp add: Transcendental.log_le_iff powr_add powr_realpow)
have "real_of_int (abs e + 1) ≤ real_of_int ¦⌊log 2 ¦x¦⌋¦ + real_of_int r + 1"
by (simp add:e_def)
also have "... ≤ 1 + abs (log 2 (abs x)) + real_of_int r + 1"
by (simp add:abs_le_iff, linarith)
also have "... ≤ (real_of_int r+ 1) * (2 + abs (log 2 (abs x)))"
by (simp add:distrib_left distrib_right)
finally have d:"real_of_int (abs e + 1) ≤ (real_of_int r+ 1) * (2 + abs (log 2 (abs x)))" by simp
have "log 2 (real_of_int (abs e + 1)) ≤ log 2 (real_of_int r + 1) + log 2 (2 + abs (log 2 (abs x)))"
using d by (simp add: log_mult[symmetric])
also have "... ≤ r + log 2 (2 + abs (log 2 (abs x)))"
using log_est by (intro add_mono, simp_all add:add.commute)
finally have e: "log 2 (real_of_int (abs e + 1)) ≤ r + log 2 (2 + abs (log 2 (abs x)))" by simp
have "?lhs = bit_count (F⇩e (float_of (real_of_int m * 2 powr real_of_int e)))"
by (simp add:truncate_down_def round_down_def m_def[symmetric] a)
also have "... ≤ ereal (6 + (2 * log 2 (real_of_int (¦m¦ + 2)) + 2 * log 2 (real_of_int (¦e¦ + 1))))"
using float_bit_count_2 by simp
also have "... ≤ ereal (6 + (2 * real (r+2) + 2 * (r + log 2 (2 + abs (log 2 (abs x))))))"
using c e
by (subst ereal_less_eq, intro add_mono mult_left_mono, linarith+)
also have "... = ?rhs" by simp
finally show ?thesis by simp
qed
definition prime_above :: "nat ⇒ nat"
where "prime_above n = (SOME x. x ∈ {n..(2*n+2)} ∧ prime x)"
text ‹The term @{term"prime_above n"} returns a prime between @{term "n::nat"} and @{term "2*(n::nat)+2"}.
Because of Bertrand's postulate there always is such a value. In a refinement of the algorithms, it may make sense to
replace this with an algorithm, that finds such a prime exactly or approximately.
The definition is intentionally inexact, to allow refinement with various algorithms, without modifying the
high-level mathematical correctness proof.›
lemma ex_subset:
assumes "∃x ∈ A. P x"
assumes "A ⊆ B"
shows "∃x ∈ B. P x"
using assms by auto
lemma
shows prime_above_prime: "prime (prime_above n)"
and prime_above_range: "prime_above n ∈ {n..(2*n+2)}"
proof -
define r where "r = (λx. x ∈ {n..(2*n+2)} ∧ prime x)"
have "∃x. r x"
proof (cases "n>2")
case True
hence "n-1 > 1" by simp
hence "∃x ∈ {(n-1)<..<(2*(n-1))}. prime x"
using bertrand by simp
moreover have "{n - 1<..<2 * (n - 1)} ⊆ {n..2 * n + 2}"
by (intro subsetI, auto)
ultimately have "∃x ∈ {n..(2*n+2)}. prime x"
by (rule ex_subset)
then show ?thesis by (simp add:r_def Bex_def)
next
case False
hence "2 ∈ {n..(2*n+2)}"
by simp
moreover have "prime (2::nat)"
using two_is_prime_nat by blast
ultimately have "r 2"
using r_def by simp
then show ?thesis by (rule exI)
qed
moreover have "prime_above n = (SOME x. r x)"
by (simp add:prime_above_def r_def)
ultimately have a:"r (prime_above n)"
using someI_ex by metis
show "prime (prime_above n)"
using a unfolding r_def by blast
show "prime_above n ∈ {n..(2*n+2)}"
using a unfolding r_def by blast
qed
lemma prime_above_min: "prime_above n ≥ 2"
using prime_above_prime
by (simp add: prime_ge_2_nat)
lemma prime_above_lower_bound: "prime_above n ≥ n"
using prime_above_range
by simp
lemma prime_above_upper_bound: "prime_above n ≤ 2*n+2"
using prime_above_range
by simp
end