Theory Source_Coding_Theorem
theory Source_Coding_Theorem
imports "HOL-Probability.Information"
begin
section‹Basic types›
type_synonym bit = bool
type_synonym bword = "bit list"
type_synonym letter = nat
type_synonym 'b word = "'b list"
type_synonym 'b encoder = "'b word ⇒ bword"
type_synonym 'b decoder = "bword ⇒ 'b word option"
section‹Locale for the source coding theorem›
locale source_code = information_space +
fixes fi :: "'b ⇒ real"
fixes X :: "'a ⇒ 'b"
assumes distr_i: "simple_distributed M X fi"
assumes b_val: "b = 2"
fixes enc::"'b encoder"
fixes dec::"'b decoder"
assumes real_code:
"dec (enc x) = Some x"
"enc w = [] ⟷ w = []"
"x ≠ [] ⟶ enc x = enc [hd x] @ enc (tl x)"
section‹Source coding theorem, direct: the entropy is a lower bound of the code rate›
context source_code
begin
subsection‹The letter set›
definition L :: "'b set" where
"L ≡ X ` space M"
lemma fin_L: "finite L"
using L_def distr_i
by auto
lemma emp_L: "L ≠ {}"
using L_def subprob_not_empty
by auto
subsection‹Codes and words›
abbreviation real_word :: "'b word ⇒ bool" where
"real_word w ≡ (set w ⊆ L)"
abbreviation k_words :: "nat ⇒ ('b word) set" where
"k_words k ≡ {w. length w = k ∧ real_word w}"
lemma rw_tail:
assumes "real_word w"
shows "w = [] ∨ real_word (tl w)"
by (meson assms list.set_sel(2) subset_code(1))
definition code_word_length :: "'e encoder ⇒ 'e ⇒ nat" where
"code_word_length e l = length (e [l])"
abbreviation cw_len :: "'b ⇒ nat" where
"cw_len l ≡ code_word_length enc l"
definition code_rate :: "'e encoder ⇒ ('a ⇒ 'e) ⇒ real" where
"code_rate e Xo = expectation (λa. (code_word_length e ((Xo) a)))"
lemma fi_pos: "i∈ L ⟹ 0 ≤ fi i"
using simple_distributed_nonneg[OF distr_i] L_def by auto
lemma (in prob_space) simp_exp_composed:
assumes X: "simple_distributed M X Px"
shows "expectation (λa. f (X a)) = (∑x ∈ X`space M. f x * Px x)"
using distributed_integral[OF simple_distributed[OF X], of f]
simple_distributed_nonneg[OF X]
lebesgue_integral_count_space_finite[OF simple_distributed_finite[OF X], of "λx. f x * Px x"]
by (simp add: ac_simps)
lemma cr_rw:
"code_rate enc X = (∑i ∈ X ` space M. fi i * cw_len i)"
using simp_exp_composed[OF distr_i, of "cw_len"]
by (simp add: mult.commute code_rate_def)
abbreviation cw_len_concat :: "'b word ⇒ nat" where
"cw_len_concat w ≡ foldr (λx s. (cw_len x) + s) w 0"
lemma cw_len_length: "cw_len_concat w = length (enc w)"
proof (induction w)
case Nil
show ?case using real_code by simp
case (Cons a w)
have "cw_len_concat (a # w) = cw_len a + cw_len_concat w" by simp
thus ?case using code_word_length_def real_code Cons
by (metis length_append list.distinct(1) list.sel(1) list.sel(3))
qed
lemma maj_fold:
assumes "⋀l. l∈L ⟹ f l ≤ bound"
assumes "real_word w"
shows "foldr (λx s. f x + s) w 0 ≤ length w * bound"
using assms
by(induction w) (simp,fastforce)
definition max_len :: "nat" where
"max_len = Max ((λx. cw_len x) ` L)"
lemma max_cw:
"l ∈ L ⟹ cw_len l ≤ max_len"
by (simp add: max_len_def fin_L)
subsection‹Related to the Kraft theorem›
definition 𝒦 :: "real" where
"𝒦 = (∑i∈L. 1 / b ^ (cw_len i))"
lemma pos_cw_len: "0 < 1 / b ^ cw_len i" using b_gt_1 by simp
lemma 𝒦_pos: "0 < 𝒦"
using emp_L fin_L pos_cw_len sum_pos 𝒦_def
by metis
lemma 𝒦_pow: "𝒦 = (∑i∈L. 1 / b powr cw_len i)"
using powr_realpow b_gt_1
by (simp add: 𝒦_def)
lemma k_words_rel:
"k_words (Suc k) = {w. (hd w ∈ L ∧ tl w ∈ k_words k ∧ w ≠ [])}"
proof
fix k
show "k_words (Suc k) ⊆ {w. (hd w ∈ L ∧ tl w ∈ k_words k ∧ w ≠ [] )}" (is "?l ⊆ ?r")
proof
fix w
assume w_kw: "w ∈ k_words (Suc k)"
hence "real_word w" by simp
hence "hd w ∈ L"
by (metis (mono_tags) w_kw hd_in_set list.size(3) mem_Collect_eq nat.distinct(1) subset_code(1))
moreover have "length w = Suc k" using w_kw by simp
moreover hence "w ≠ []" by auto
moreover have "real_word (tl w)" using ‹real_word w› calculation(3) rw_tail by auto
ultimately show "w ∈ ?r" using w_kw by simp
qed
next
fix k
show "k_words (Suc k) ⊇ {w. (hd w ∈ L ∧ tl w ∈ k_words k ∧ w ≠ [])}"
proof
fix w
assume asm: "w ∈ {w. hd w ∈ L ∧ tl w ∈ {w. length w = k ∧ real_word w} ∧ w ≠ []}"
hence "hd w ∈ L ∧ length (tl w) = k ∧ real_word (tl w)" by simp
hence "real_word w"
by (metis empty_iff insert_subset list.collapse list.set(1) set_simps(2) subsetI)
moreover hence "length w = Suc k" using asm by auto
ultimately show "w ∈ k_words (Suc k)" by simp
qed
qed
lemma bij_k_words:
shows "bij_betw (λwi. Cons (fst wi) (snd wi)) (L × k_words k) (k_words (Suc k))"
unfolding bij_betw_def
proof
fix k
let ?f = "(λwi. Cons (fst wi) (snd wi))"
let ?S = "L × (k_words k)"
let ?T = "k_words (Suc k)"
show "inj_on ?f ?S" by (simp add: inj_on_def)
show "?f`?S = ?T"
proof (rule ccontr)
assume "?f ` ?S ≠ ?T"
hence "∃w. w∈ ?T ∧ w ∉ ?f`?S" by auto
then obtain w where asm: "w∈ ?T ∧ w ∉ ?f`?S" by blast
hence "w = ?f (hd w,tl w)" using k_words_rel by simp
moreover have "(hd w,tl w) ∈ ?S" using k_words_rel asm by simp
ultimately have "w ∈ ?f`?S" by blast
thus "False" using asm by simp
qed
qed
lemma finite_k_words: "finite (k_words k)"
proof (induct k)
case 0
show ?case by simp
case (Suc n)
thus ?case using bij_k_words bij_betw_finite fin_L by blast
qed
lemma cartesian_product:
fixes f::"('c ⇒ real)"
fixes g::"('d ⇒ real)"
assumes "finite A"
assumes "finite B"
shows "(∑b∈B. g b) * (∑a∈A. f a) = (∑ab∈A×B. f (fst ab) * g (snd ab))"
using bilinear_times bilinear_sum[where h="(λx y. x * y)" and f="f" and g="g"] assms
by (metis (erased, lifting) sum.cong split_beta' Groups.ab_semigroup_mult_class.mult.commute)
lemma 𝒦_power:
shows "𝒦^k = (∑w ∈ (k_words k). 1 / b^(cw_len_concat w))"
proof (induct k)
case 0
have "k_words 0 = {[]}" by auto
thus ?case by simp
next
case (Suc n)
have " 𝒦 ^Suc n = 𝒦 ^n * 𝒦 " by simp
also have "… = (∑w ∈ k_words n. 1 / b^cw_len_concat w) * (∑i∈L. 1 / b^cw_len i)"
using Suc.hyps 𝒦_def by auto
also have "… = (∑wi ∈ L × k_words n. 1/b^cw_len (fst wi) * (1 / b^cw_len_concat (snd wi)))"
using fin_L finite_k_words cartesian_product
by blast
also have "… = (∑wi ∈ L × k_words n. 1 / b^(cw_len_concat (snd wi) + cw_len (fst wi)))"
by (metis (no_types, lifting) power_add add.commute power_one_over)
also have "… = (∑wi ∈ L × k_words n. 1 / b^cw_len_concat (fst wi # snd wi))"
by (metis (erased, lifting) add.commute comp_apply foldr.simps(2))
also have "… = (∑w ∈ (k_words (Suc n)). 1 / b^(cw_len_concat w))"
using sum.reindex_bij_betw [OF bij_k_words] by fastforce
finally show ?case by simp
qed
lemma bound_len_concat:
shows "w ∈ k_words k ⟹ cw_len_concat w ≤ k * max_len"
using max_cw maj_fold by blast
subsection‹Inequality of the kraft sum (source coding theorem, direct)›
subsubsection‹Sum manipulation lemmas and McMillan theorem›
lemma sum_vimage_proof:
fixes g::"nat ⇒ real"
assumes "⋀w. f w < bd"
shows "finite S ⟹ (∑w∈S. g (f w)) = (∑ m=0..<bd. (card ((f-`{m}) ∩ S) )* g m)"
(is "_ ⟹ _ = (∑ m=0..<bd. ?ff m S)")
proof (induct S rule: finite_induct)
case empty
show ?case by simp
next
case (insert x F)
let ?rr = "(∑m = 0..<bd. ?ff m (insert x F))"
have "(f x) ∈ {0..<bd}" using assms by simp
hence "⋀h::(nat ⇒ real). (∑m=0..<bd. h m) = (∑y∈({0..<bd} - {f x}).h y) + h (f x)"
by (metis diff_add_cancel finite_atLeastLessThan sum_diff1)
moreover hence
"(∑m = 0..<bd. ?ff m (insert x F))
= (∑m∈{0..<bd} - {f x}. ?ff m (insert x F)) + card (f -` {f x} ∩ F) * g (f x) + g (f x)"
by (simp add: semiring_normalization_rules(2), simp add: insert)
ultimately have "(∑m = 0..<bd. ?ff m (insert x F)) = (∑m∈{0..<bd}. ?ff m F) + g (f x)"
by fastforce
thus ?case using insert by simp
qed
lemma sum_vimage:
fixes g::"nat ⇒ real"
assumes bounded: "⋀w. w ∈ S ⟹ f w < bd" and "0 < bd"
assumes finite: "finite S"
shows "(∑w∈S. g (f w)) = (∑ m=0..<bd. (card ((f-`{m}) ∩ S) ) * g m)"
(is "?s1 = ?s2")
proof -
let ?ff = "(λx. if x∈S then f x else 0)"
let ?ss1 = "(∑w∈S. g (?ff w))"
let ?ss2 = "(∑ m=0..<bd. (card ((?ff-`{m}) ∩ S) ) * g m)"
have "?s1 =?ss1" by simp
moreover have"⋀m. ?ff -`{m} ∩ S = f-`{m} ∩ S" by auto
moreover hence "?s2 = ?ss2" by simp
moreover have "⋀w . ?ff w < bd" using assms by simp
moreover hence "?ss1 = ?ss2" using sum_vimage_proof[of "?ff"] finite by blast
ultimately show "?s1 = ?s2" by metis
qed
lemma 𝒦_rw:
"(∑w ∈ (k_words k). 1 / b^(cw_len_concat w)) = (∑m=0..<Suc (k*max_len). card (k_words k ∩
((cw_len_concat) -` {m})) * (1 / b^m))" (is "?L = ?R")
proof -
have "⋀w. w ∈ k_words k ⟹ cw_len_concat w < Suc ( k * max_len)"
by (simp add: bound_len_concat le_imp_less_Suc)
moreover have
"?R = (∑m = 0..<Suc (k * max_len).
(card (cw_len_concat -` {m} ∩ k_words k)) * (1 / b ^ m))"
by (metis Int_commute)
moreover have "0 < Suc (k*max_len)" by simp
ultimately show ?thesis
using finite_k_words
sum_vimage[where f="cw_len_concat" and g = "λi. 1/ (b^i)"]
by fastforce
qed
definition set_of_k_words_length_m :: "nat ⇒ nat ⇒ 'b word set" where
"set_of_k_words_length_m k m = {xk. xk ∈ k_words k} ∩ (cw_len_concat)-`{m}"
lemma am_inj_code: "inj_on enc ((cw_len_concat)-`{m})" (is "inj_on _ ?s")
using inj_on_def[of enc "?s"] real_code
by (metis option.inject)
lemma img_inc: "enc`cw_len_concat-`{m} ⊆ {bl. length bl = m}" using cw_len_length by auto
lemma bool_lists_card: "card {bl::bool list. length bl = m} = b^m"
using card_lists_length_eq[of "UNIV::bool set"]
by (simp add: b_val)
lemma bool_list_fin: "finite {bl::bool list. length bl = m}"
using finite_lists_length_eq[of "UNIV::bool set"]
by (simp add: b_val)
lemma set_of_k_words_bound:
shows "card (set_of_k_words_length_m k m) ≤ b^m" (is "?c ≤ ?b")
proof -
have card_w_len_m_bound: "card (cw_len_concat-`{m}) ≤ b^m"
by (metis (no_types, lifting) am_inj_code bool_list_fin bool_lists_card card_image card_mono
img_inc of_nat_le_iff)
have "set_of_k_words_length_m k m ⊆ (cw_len_concat)-`{m}"
by (simp add: set_of_k_words_length_m_def)
hence "card (set_of_k_words_length_m k m) ≤ card ((cw_len_concat)-`{m})"
by (metis (no_types, lifting) am_inj_code bool_list_fin card.infinite card_0_eq
card_image card_mono empty_iff finite_subset img_inc inf_img_fin_dom)
thus ?thesis using card_w_len_m_bound by simp
qed
lemma empty_set_k_words:
assumes "0 < k"
shows "set_of_k_words_length_m k 0 = {}"
proof(rule ccontr)
assume "¬ set_of_k_words_length_m k 0 = {}"
hence "∃x. x ∈ set_of_k_words_length_m k 0" by auto
then obtain x where x_def: "x ∈ set_of_k_words_length_m k 0" by auto
hence "x ≠ []" unfolding set_of_k_words_length_m_def using assms by auto
moreover have "cw_len_concat (hd x#tl x) = cw_len_concat (tl x) + cw_len (hd x)"
by (metis add.commute comp_apply foldr.simps(2))
moreover have "enc [(hd x)] ≠ []" using assms real_code by blast
moreover hence "0 < cw_len (hd x)" unfolding code_word_length_def by simp
ultimately have "x ∉ set_of_k_words_length_m k 0" by (simp add:set_of_k_words_length_m_def)
thus "False" using x_def by simp
qed
lemma 𝒦_rw2:
assumes "0 < k"
shows "(∑m=0..<Suc (k * max_len). card (set_of_k_words_length_m k m)/ b^m) ≤ (k * max_len)"
proof -
have
"(∑m=1..<Suc (k * max_len). card (set_of_k_words_length_m k m) / b^m)
≤ (∑m=1..<Suc(k * max_len). b^m / b^m)"
using set_of_k_words_bound b_val
Groups_Big.sum_mono[of "{1..<Suc(k * max_len)}"
"(λm. (card (set_of_k_words_length_m k m))/b^m)" "λm. b^m /b^m"]
by simp
moreover have"(∑m=1..<Suc(k * max_len). b^m / b^m) = (∑m=1..<Suc(k *max_len). 1)"
using b_gt_1 by simp
moreover have "(∑m=1..<Suc(k * max_len). 1) = (k * max_len)"
by simp
ultimately have
"(∑m = 1..<Suc (k * max_len). card (set_of_k_words_length_m k m) / b ^ m) ≤ k * max_len"
by (metis One_nat_def card_atLeastLessThan card_eq_sum diff_Suc_Suc real_of_card)
thus ?thesis using empty_set_k_words assms
by (simp add: sum_shift_lb_Suc0_0_upt split: if_split_asm)
qed
lemma 𝒦_power_bound :
assumes "0 < k"
shows " 𝒦^k ≤ k * max_len"
using assms 𝒦_power 𝒦_rw 𝒦_rw2
by (simp add: set_of_k_words_length_m_def)
theorem McMillan :
shows "𝒦 ≤ 1"
proof -
have ineq: "⋀k. 0 < k ⟹ 𝒦 ≤ root k k * root k max_len"
using 𝒦_pos 𝒦_power_bound
by (metis (no_types, opaque_lifting) not_less of_nat_0_le_iff of_nat_mult power_strict_mono real_root_mult real_root_pos_pos_le real_root_pos_unique real_root_power)
hence "0 < max_len ⟹ (λk. root k k * root k max_len) ⇢ 1"
by (auto intro!: tendsto_eq_intros LIMSEQ_root LIMSEQ_root_const)
moreover have "∀n≥1. 𝒦 ≤ root n n * root n max_len"
using ineq by simp
moreover have "max_len = 0 ⟹ 𝒦 ≤ 1" using ineq by fastforce
ultimately show " 𝒦 ≤ 1" using LIMSEQ_le_const by blast
qed
lemma entropy_rw: "ℋ(X) = -(∑i ∈ L. fi i * log b (fi i))"
using entropy_simple_distributed[OF distr_i]
by (simp add: L_def)
subsubsection‹Technical lemmas about the logarithm›
lemma log_mult_ext3:
"0 ≤ x ⟹ 0 < y ⟹ 0 < z ⟹ x * log b (x*y*z) = x * log b (x*y) + x * log b z"
by(cases "x=0")(simp add: log_mult_eq abs_of_pos distrib_left less_eq_real_def)+
lemma log_mult_ext2:
"0 ≤ x ⟹ 0 < y ⟹ x * log b (x*y) = x * log b x + x * log b y"
using log_mult_ext3[where y=1] by simp
subsubsection ‹KL divergence and properties›
definition KL_div ::"'b set ⇒ ('b ⇒ real) ⇒ ('b ⇒ real) ⇒ real" where
"KL_div S a d = (∑ i ∈ S. a i * log b (a i / d i))"
lemma KL_div_mul:
assumes "0 < d" "d ≤ 1"
assumes "⋀i. i∈S ⟹ 0 ≤ a i"
assumes "⋀i. i∈S ⟹ 0 < e i"
shows "KL_div S a e ≥ KL_div S a (λi. e i / d)"
unfolding KL_div_def
proof -
{
fix i
assume "i∈S"
hence "a i / (e i / d) ≤ a i / e i" using assms
by (metis (no_types) div_by_1 frac_le less_imp_triv not_less)
hence "log b (a i / (e i / d)) ≤ log b (a i / e i)" using assms
by (smt (verit, best) Transcendental.log_mono ‹i ∈ S› b_gt_1 diff_divide_distrib divide_pos_pos)
}
thus "(∑i∈S. a i * log b (a i / (e i / d))) ≤ (∑i∈S. a i * log b (a i / e i))"
by (meson mult_left_mono assms sum_mono)
qed
lemma KL_div_pos:
fixes a e::"'b ⇒ real"
assumes fin: "finite S"
assumes nemp: "S ≠ {}"
assumes non_null: "⋀i. i∈S ⟹ 0 < a i" "⋀i. i∈ S ⟹ 0 < e i"
assumes sum_a_one: "(∑ i ∈ S. a i) = 1"
assumes sum_c_one: "(∑ i ∈ S. e i) = 1"
shows "0 ≤ KL_div S a e"
unfolding KL_div_def
proof -
let ?f = "λi. e i / a i"
have f_pos: "⋀i. i∈S ⟹ 0 < ?f i"
using non_null
by simp
have a_pos: "⋀i. i∈ S ⟹ 0 ≤ a i"
using non_null
by (simp add: order.strict_implies_order)
have "- log b (∑i∈S. a i * e i / a i) ≤ (∑i∈S. a i * - log b (e i / a i))"
using convex_on_sum[OF fin nemp minus_log_convex[OF b_gt_1]
sum_a_one a_pos, of "λi. e i / a i"] f_pos by simp
also have "-log b (∑i∈S. a i * e i / a i) = -log b (∑i∈S. e i)"
proof -
from non_null(1) have "⋀i. i ∈ S ⟹ a i * e i / a i = e i" by force
thus ?thesis by simp
qed
finally have "0 ≤ (∑i∈S. a i * - log b (e i / a i))"
by (simp add: sum_c_one)
thus "0 ≤ (∑i∈S. a i * log b (a i / e i))"
using b_gt_1 log_divide non_null
by simp
qed
lemma KL_div_pos_emp:
"0 ≤ KL_div {} a e" by (simp add: KL_div_def)
lemma KL_div_pos_gen:
fixes a d::"'b ⇒ real"
assumes fin: "finite S"
assumes non_null: "⋀i. i∈S ⟹ 0 < a i" "⋀i. i∈ S ⟹ 0 < d i"
assumes sum_a_one: "(∑ i ∈ S. a i) = 1"
assumes sum_d_one: "(∑ i ∈ S. d i) = 1"
shows "0 ≤ KL_div S a d"
using KL_div_pos KL_div_pos_emp assms by metis
theorem KL_div_pos2:
fixes a d::"'b ⇒ real"
assumes fin: "finite S"
assumes non_null: "⋀i. i∈S ⟹ 0 ≤ a i" "⋀i. i∈ S ⟹ 0 < d i"
assumes sum_a_one: "(∑ i ∈ S. a i) = 1"
assumes sum_c_one: "(∑ i ∈ S. d i) = 1"
shows "0 ≤ KL_div S a d"
proof -
have "S = (S ∩ {i. 0 < a i}) ∪ (S ∩ {i. 0 = a i})" using non_null(1) by fastforce
moreover have "(S ∩ {i. 0 < a i}) ∩ (S ∩ {i. 0 = a i}) = {}" by auto
ultimately have
eq: "KL_div S a d = KL_div (S ∩ {i. 0 < a i}) a d + KL_div (S ∩ {i. 0 = a i}) a d"
unfolding KL_div_def
by (metis (mono_tags, lifting) fin finite_Un sum.union_disjoint)
have "KL_div (S ∩ {i. 0 = a i}) a d = 0" unfolding KL_div_def by simp
hence "KL_div S a d = KL_div (S ∩ {i. 0 < a i}) a d" using eq by simp
moreover have "0 ≤ KL_div (S ∩ {i. 0 < a i}) a d"
proof(cases "(S ∩ {i. 0 < a i}) = {}")
case True
thus ?thesis unfolding KL_div_def by simp
next
case False
let ?c = "λi. d i / (∑j ∈(S ∩ {i. 0 < a i}). d j)"
have 1: "(⋀i. i ∈ S ∩ {i. 0 < a i} ⟹ 0 < a i)" by simp
have 2: "(⋀i. i ∈ S ∩ {i. 0 < a i} ⟹ 0 < ?c i)"
by (metis False IntD1 divide_pos_pos fin finite_Int non_null(2) sum_pos)
have 3: "(∑i∈ (S ∩ {i. 0 < a i}). a i) = 1"
using sum.cong[of S, of S, of "(λx. if x ∈ {i. 0 < a i} then a x else 0)", of a]
sum.inter_restrict[OF fin, of a] non_null(1) sum_a_one
by fastforce
have "(∑i∈S ∩ {j. 0 < a j}. ?c i) = (∑i∈S ∩ {j. 0 < a j}. d i) / (∑i∈S ∩ {j. 0 < a j}. d i)"
by (metis sum_divide_distrib)
hence 5: "(∑i∈S ∩ {j. 0 < a j}. ?c i) = 1" using 2 False by force
hence "0 ≤ KL_div (S ∩ {j. 0 < a j}) a ?c"
using KL_div_pos_gen[
OF finite_Int[OF disjI1, of S, of "{j. 0 < a j}"], of a, of ?c
] 1 2 3
by (metis fin)
have fstdb: "0 < (∑i∈S ∩ {i. 0 < a i}. d i)" using non_null(2) False
by (metis Int_Collect fin finite_Int sum_pos)
have 6: "0 ≤ KL_div (S ∩ {i. 0 < a i}) a (λi. d i / (∑i∈(S ∩ {i. 0 < a i}). d i))"
using 2 3 5
KL_div_pos_gen[
OF finite_Int[OF disjI1, OF fin], of "{i. 0 < a i}", of "a", of "?c"
]
by simp
hence
"KL_div (S ∩ {j. 0 < a j}) a (λi. d i / (∑i∈(S ∩ {i. 0 < a i}). d i)) ≤ KL_div (S ∩ {j. 0 < a j}) a d"
using non_null sum.inter_restrict[OF fin, of d, of "{i. 0 < a i}"]
sum_mono[of S, of "(λx. if x ∈ {i. 0 < a i} then d x else 0)", of d] non_null(2) sum_c_one
non_null(2) fstdb KL_div_mul
by force
moreover have "0 ≤ KL_div (S ∩ {j. 0 < a j}) a (λi. d i / (∑i∈(S ∩ {i. 0 < a i}). d i))"
using KL_div_pos_gen[ OF finite_Int[OF disjI1, OF fin]] using 2 3 5 by fastforce
ultimately show "0 ≤ KL_div (S ∩ {j. 0 < a j}) a d" by simp
qed
ultimately show ?thesis by simp
qed
lemma sum_div_1:
fixes f::"'b ⇒ 'c::field"
assumes "(∑i∈A. f i) ≠ 0"
shows "(∑i∈A. f i / (∑j∈A. f j)) = 1"
by (metis (no_types) assms right_inverse_eq sum_divide_distrib)
theorem rate_lower_bound:
shows "ℋ(X) ≤ code_rate enc X"
proof -
let ?cr = "code_rate enc X"
let ?r = "(λi. 1 / ((b powr cw_len i) * 𝒦))"
have pos_pi: "⋀i. i ∈ L ⟹ 0 ≤ fi i" using fi_pos by simp
{
fix i
assume "i ∈ L"
hence
"fi i * (log b (1 / (1 / b powr (cw_len i))) + log b (fi i))
= fi i * log b (fi i / (1 / b powr (cw_len i)))"
using log_mult_ext2 [OF pos_pi, of i] b_gt_1
by simp (simp add: algebra_simps)
}
hence eqpi:
"⋀i. i∈ L ⟹ fi i * (log b (1 / (1 / b powr (cw_len i))) + log b (fi i))
= fi i * log b (fi i / (1 / b powr (cw_len i)))"
by simp
have sum_one_L: "(∑ i ∈ L. fi i) = 1"
using simple_distributed_sum_space[OF distr_i] by (simp add: L_def)
{
fix i
assume "i ∈ L"
hence h1: "0 ≤ fi i" using pos_pi by blast
have h2: "0 < 𝒦 / (1/b powr cw_len i)" using b_gt_1 𝒦_pos by auto
have h3: "0 < 1 / 𝒦" using 𝒦_pos by simp
have
"fi i * log b (fi i * 𝒦 / (1/b powr cw_len i) * (1/ 𝒦)) =
fi i * log b (fi i * 𝒦 / (1/b powr cw_len i)) + fi i * log b (1/ 𝒦)"
using log_mult_ext3[OF h1 h2 h3]
by (metis times_divide_eq_right)
} hence big_eq:
"⋀i. i ∈ L ⟹ fi i * log b (fi i * 𝒦 / (1/b powr cw_len i) * (1 / 𝒦)) =
fi i * log b (fi i * 𝒦 / (1/b powr cw_len i)) + fi i * log b (1 / 𝒦)"
by (simp add: inverse_eq_divide)
have 1: "?cr - ℋ(X) = (∑i ∈ L. fi i * cw_len i) + (∑i ∈ L. fi i * log b (fi i))"
using 𝒦_def entropy_rw cr_rw L_def by simp
also have 2: "(∑i∈L. fi i * cw_len i) = (∑i ∈ L. fi i * (-log b (1/(b powr (cw_len i)))))"
using b_gt_1 log_divide by simp
also have "… = -1 * (∑i ∈ L. fi i * (log b (1/(b powr (cw_len i)))))"
using sum_distrib_left[of "-1" "(λi. fi i * (- 1 * log b (1 / b powr (cw_len i))))" L]
by simp
finally have
"?cr - ℋ(X) = -(∑i ∈ L. fi i * log b (1/b powr cw_len i)) + (∑i ∈ L. fi i * log b (fi i))"
by simp
have "?cr - ℋ(X) = (∑i ∈ L. fi i * ((log b (1/ (1/(b powr (cw_len i))))) + log b (fi i)))"
using b_gt_1 1
by (simp add: distrib_left sum.distrib)
also have "… = (∑i ∈ L. fi i *((log b (fi i / (1/(b powr (cw_len i)))))))"
using Finite_Cartesian_Product.sum_cong_aux[OF eqpi] by simp
also from big_eq have
"… = (∑i∈L. fi i * (log b (fi i * 𝒦 / (1 / b powr (cw_len i))))) + (∑i ∈ L. fi i) * log b (1/ 𝒦)"
using 𝒦_pos
by (simp add: sum_distrib_right sum.distrib)
also have "… = (∑i∈L. fi i * (log b (fi i * 𝒦 / (1 / b powr (cw_len i))))) - log b (𝒦)"
using 𝒦_pos
by (simp add: log_inverse_eq divide_inverse sum_one_L)
also have "… = (∑ i ∈ L. fi i * log b (fi i / ?r i)) - log b (𝒦)"
by (metis (mono_tags, opaque_lifting) divide_divide_eq_left divide_divide_eq_right)
also have "… = KL_div L fi ?r - log b ( 𝒦)"
using b_gt_1 𝒦_pos log_inverse KL_div_def
by simp
also have "… = KL_div L fi ?r + log b (1 / 𝒦)"
using log_inverse b_val 𝒦_pos
by (simp add: inverse_eq_divide)
finally have code_ent_kl_log: "?cr - ℋ(X) = KL_div L fi ?r + log b (1 / 𝒦)" by simp
have "(∑i∈L. ?r i) = 1"
using sum_div_1[of "λi. 1 / (b powr (cw_len i))"] 𝒦_pos 𝒦_pow
by simp
moreover have "⋀i. 0 < ?r i" using b_gt_1 𝒦_pos by simp
moreover have "(∑i∈L. fi i) = 1" using sum_one_L by simp
ultimately have "0 ≤ KL_div L fi ?r"
using KL_div_pos2[OF fin_L fi_pos] by simp
hence "log b (1 / 𝒦) ≤ ?cr - ℋ(X)" using code_ent_kl_log by simp
moreover from McMillan have "0 ≤ log b (1 / 𝒦)"
using 𝒦_pos
by (simp add: b_gt_1)
ultimately show ?thesis by simp
qed
end
end