Theory Quick_Sort_Cost.Randomised_Quick_Sort
section ‹Randomised QuickSort›
theory Randomised_Quick_Sort
imports
"HOL-Probability.Probability"
"Landau_Symbols.Landau_More"
Comparison_Sort_Lower_Bound.Linorder_Relations
begin
subsection ‹Deletion by index›
text ‹
The following function deletes the $n$-th element of a list.
›
fun delete_index :: "nat ⇒ 'a list ⇒ 'a list" where
"delete_index _ [] = []"
| "delete_index 0 (x # xs) = xs"
| "delete_index (Suc n) (x # xs) = x # delete_index n xs"
lemma delete_index_altdef: "delete_index n xs = take n xs @ drop (Suc n) xs"
by (induction n xs rule: delete_index.induct) simp_all
lemma delete_index_ge_length: "n ≥ length xs ⟹ delete_index n xs = xs"
by (simp add: delete_index_altdef)
lemma length_delete_index [simp]: "n < length xs ⟹ length (delete_index n xs) = length xs - 1"
by (simp add: delete_index_altdef)
lemma delete_index_Cons:
"delete_index n (x # xs) = (if n = 0 then xs else x # delete_index (n - 1) xs)"
by (cases n) simp_all
lemma insert_set_delete_index:
"n < length xs ⟹ insert (xs ! n) (set (delete_index n xs)) = set xs"
by (induction n xs rule: delete_index.induct) auto
lemma add_mset_delete_index:
"i < length xs ⟹ add_mset (xs ! i) (mset (delete_index i xs)) = mset xs"
by (induction i xs rule: delete_index.induct) simp_all
lemma nth_delete_index:
"i < length xs ⟹ n < length xs ⟹
delete_index n xs ! i = (if i < n then xs ! i else xs ! Suc i)"
by (auto simp: delete_index_altdef nth_append min_def)
lemma set_delete_index_distinct:
assumes "distinct xs" "n < length xs"
shows "set (delete_index n xs) = set xs - {xs ! n}"
using assms by (induction n xs rule: delete_index.induct) fastforce+
lemma distinct_delete_index [simp, intro]:
assumes "distinct xs"
shows "distinct (delete_index n xs)"
proof (cases "n < length xs")
case True
with assms show ?thesis
by (induction n xs rule: delete_index.induct) (auto simp: set_delete_index_distinct)
qed (simp_all add: delete_index_ge_length assms)
lemma mset_delete_index [simp]:
"i < length xs ⟹ mset (delete_index i xs) = mset xs - {# xs!i #}"
by (induction i xs rule: delete_index.induct) simp_all
subsection ‹Definition›
text ‹
The following is a functional randomised version of QuickSort that also records the number
of comparisons that were made. The randomisation is in the selection of the pivot element:
In each step, the next pivot is chosen uniformly at random from all remaining list elements.
The function takes the ordering relation to use as a first argument in the form of a set of
pairs.
›
function rquicksort :: "('a × 'a) set ⇒ 'a list ⇒ ('a list × nat) pmf" where
"rquicksort R xs =
(if xs = [] then
return_pmf ([], 0)
else
do {
i ← pmf_of_set {..<length xs};
let x = xs ! i;
case partition (λy. (y,x) ∈ R) (delete_index i xs) of
(ls, rs) ⇒ do {
(ls, n1) ← rquicksort R ls;
(rs, n2) ← rquicksort R rs;
return_pmf (ls @ [x] @ rs, length xs - 1 + n1 + n2)
}
})"
by auto
termination proof (relation "Wellfounded.measure (length ∘ snd)", goal_cases)
show "wf (Wellfounded.measure (length ∘ snd))" by simp
qed (subst (asm) set_pmf_of_set; force intro!: le_less_trans[OF length_filter_le])+
declare rquicksort.simps [simp del]
lemma rquicksort_Nil [simp]: "rquicksort R [] = return_pmf ([], 0)"
by (simp add: rquicksort.simps)
subsection ‹Correctness proof›
lemma set_pmf_of_set_lessThan_length [simp]:
"xs ≠ [] ⟹ set_pmf (pmf_of_set {..<length xs}) = {..<length xs}"
by (subst set_pmf_of_set) auto
text ‹
We can now prove that any list that can be returned by QuickSort is sorted w.\,r.\,t.\ the
given relation. (as long as that relation is reflexive, transitive, and total)
›
theorem rquicksort_correct:
assumes "trans R" and "total_on (set xs) R" and "∀x∈set xs. (x,x) ∈ R"
assumes "(ys, n) ∈ set_pmf (rquicksort R xs)"
shows "sorted_wrt R ys ∧ mset ys = mset xs"
using assms(2-)
proof (induction xs arbitrary: ys n rule: length_induct)
case (1 xs)
have IH: "sorted_wrt R zs" "mset zs = mset ys"
if "(zs, n) ∈ set_pmf (rquicksort R ys)" "length ys < length xs" "set ys ⊆ set xs" for zs ys n
using that "1.IH" total_on_subset[OF "1.prems"(1) that(3)] "1.prems"(2) by blast+
show ?case
proof (cases "xs = []")
case False
with "1.prems" obtain ls rs n1 n2 i where *:
"i < length xs" "(ls, n1) ∈ set_pmf (rquicksort R [y←delete_index i xs. (y, xs ! i) ∈ R])"
"(rs, n2) ∈ set_pmf (rquicksort R [y←delete_index i xs. (y, xs ! i) ∉ R])"
"ys = ls @ [xs ! i] @ rs"
by (subst (asm) rquicksort.simps[of _ xs]) (auto simp: Let_def o_def)
note ys = ‹ys = ls @ [xs ! i] @ rs›
define ls' where "ls' = [y←delete_index i xs. (y, xs ! i) ∈ R]"
define rs' where "rs' = [y←delete_index i xs. (y, xs ! i) ∉ R]"
from ‹i < length xs› have less: "length ls' < length xs" "length rs' < length xs"
unfolding ls'_def rs'_def by (intro le_less_trans[OF length_filter_le]; force)+
have ls: "(ls, n1) ∈ set_pmf (rquicksort R ls')" and rs: "(rs, n2) ∈ set_pmf (rquicksort R rs')"
using * unfolding ls'_def rs'_def by blast+
have subset: "set ls' ⊆ set xs" "set rs' ⊆ set xs"
using insert_set_delete_index[of i xs] ‹i < length xs›
by (auto simp: ls'_def rs'_def)
have sorted: "sorted_wrt R ls" "sorted_wrt R rs"
and mset: "mset ls = mset ls'" "mset rs = mset rs'"
by (rule IH[of ls n1 ls'] IH[of rs n2 rs'] less ls rs subset)+
have ls_le: "(x, xs ! i) ∈ R" if "x ∈ set ls" for x
proof -
from that have "x ∈# mset ls" by simp
also note mset(1)
finally show ?thesis by (simp add: ls'_def)
qed
have rs_ge: "(x, xs ! i) ∉ R" "(xs ! i, x) ∈ R" if "x ∈ set rs" for x
proof -
from that have "x ∈# mset rs" by simp
also note mset(2)
finally have x: "x ∈ set rs'" by simp
thus "(x, xs ! i) ∉ R" by (simp_all add: rs'_def)
from x and subset and ‹i < length xs› have "x ∈ set xs" "xs ! i ∈ set xs" by auto
with "1.prems" and ‹(x, xs ! i) ∉ R› show "(xs ! i, x) ∈ R"
unfolding total_on_def by (cases "xs ! i = x") auto
qed
have "sorted_wrt R ys" unfolding ys
by (intro sorted_wrt_append ‹trans R› sorted_wrt_singleton sorted)
(auto intro: rs_ge ls_le transD[OF ‹trans R›, of _ "xs!i"])
moreover have "mset ys = mset xs" unfolding ys using ‹i < length xs›
by (simp add: mset ls'_def rs'_def add_mset_delete_index)
ultimately show ?thesis ..
qed (insert "1.prems", simp_all)
qed
subsection ‹Cost analysis›
text ‹
The following distribution describes the number of comparisons made by randomised
QuickSort in terms of the list length. (This is only valid if all list elements are distinct)
A succinct explanation of this cost analysis is given by Jacek Cicho\'{n}~\<^cite>‹"cichon"›.
›
fun rqs_cost :: "nat ⇒ nat pmf" where
"rqs_cost 0 = return_pmf 0"
| "rqs_cost (Suc n) =
do {i ← pmf_of_set {..n}; a ← rqs_cost i; b ← rqs_cost (n - i); return_pmf (n + a + b)}"
lemma finite_set_pmf_rqs_cost [intro!]: "finite (set_pmf (rqs_cost n))"
by (induction n rule: rqs_cost.induct) simp_all
text ‹
We connect the @{const rqs_cost} function to the @{const rquicksort} function by showing that
projecting out the number of comparisons from a run of @{const rquicksort} on a list with distinct
elements yields the same distribution as @{const rqs_cost} for the length of that list.
›
theorem snd_rquicksort:
assumes "linorder_on A R" and "set xs ⊆ A" and "distinct xs"
shows "map_pmf snd (rquicksort R xs) = rqs_cost (length xs)"
using assms(2-)
proof (induction xs rule: length_induct)
case (1 xs)
have IH: "map_pmf snd (rquicksort R ys) = rqs_cost (length ys)"
if "length ys < length xs" "mset ys ⊆# mset xs" for ys
proof -
from set_mset_mono[OF that(2)] have "set ys ⊆ set xs" by simp
also note ‹set xs ⊆ A›
finally have "set ys ⊆ A" .
moreover from ‹distinct xs› and that(2) have "distinct ys"
by (rule distinct_mset_mono)
ultimately show ?thesis using that and "1.IH" by blast
qed
define n where "n = length xs"
define cnt where "cnt = (λi. length [y←delete_index i xs. (y, xs ! i) ∈ R])"
have cnt_altdef: "cnt i = linorder_rank R (set xs) (xs ! i)" if i: "i < n" for i
proof -
have "cnt i = length [y←delete_index i xs. (y, xs ! i) ∈ R]" by (simp add: cnt_def)
also have "… = card (set [y←delete_index i xs. (y, xs ! i) ∈ R])"
by (intro distinct_card [symmetric] distinct_filter distinct_delete_index "1.prems")
also have "set [y←delete_index i xs. (y, xs ! i) ∈ R] =
{x ∈ set xs-{xs!i}. (x, xs ! i) ∈ R}"
using "1.prems" and i by (simp add: set_delete_index_distinct n_def)
also have "card … = linorder_rank R (set xs) (xs ! i)" by (simp add: linorder_rank_def)
finally show ?thesis .
qed
from "1.prems" have "bij_betw ((!) xs) {..<n} (set xs)"
by (intro bij_betw_byWitness[where f' = "index xs"]) (auto simp: n_def index_nth_id)
moreover have "bij_betw (linorder_rank R (set xs)) (set xs) {..<card (set xs)}"
using assms(1) by (rule bij_betw_linorder_rank) (insert "1.prems", auto)
ultimately have "bij_betw (linorder_rank R (set xs) ∘ (λi. xs ! i)) {..<n} {..<card (set xs)}"
by (rule bij_betw_trans)
hence bij: "bij_betw (λi. linorder_rank R (set xs) (xs ! i)) {..<n} {..<n}"
using "1.prems" by (simp add: n_def o_def distinct_card)
show ?case
proof (cases "xs = []")
case False
hence "n > 0" by (simp add: n_def)
hence [simp]: "n ≠ 0" by (intro notI) auto
from False have "map_pmf snd (rquicksort R xs) =
pmf_of_set {..<length xs} ⤜
(λi. map_pmf (λz. length xs - 1 + fst z + snd z)
(pair_pmf (map_pmf snd (rquicksort R [y←delete_index i xs. (y, xs ! i) ∈ R]))
(map_pmf snd (rquicksort R [y←delete_index i xs. (y, xs ! i) ∉ R]))))"
by (subst rquicksort.simps)
(simp add: map_bind_pmf bind_map_pmf Let_def case_prod_unfold o_def pair_pmf_def)
also have "… = pmf_of_set {..<length xs} ⤜
(λi. map_pmf (λz. n - 1 + fst z + snd z)
(pair_pmf (rqs_cost (cnt i)) (rqs_cost (n - 1 - cnt i))))"
proof (intro bind_pmf_cong refl, goal_cases)
case (1 i)
with ‹xs ≠ []› have i: "i < length xs" by auto
from i have "map_pmf snd (rquicksort R [y←delete_index i xs. (y, xs ! i) ∉ R]) =
rqs_cost (length [y←delete_index i xs. (y, xs ! i) ∉ R])"
by (intro IH)
(auto intro!: le_less_trans[OF length_filter_le]
intro: subset_mset.trans multiset_filter_subset diff_subset_eq_self)
also have "length [y←delete_index i xs. (y, xs ! i) ∉ R] = n - 1 - cnt i"
unfolding n_def cnt_def
using sum_length_filter_compl[of "λy. (y, xs ! i) ∈ R" "delete_index i xs"] i by simp
finally have "map_pmf snd (rquicksort R [y←delete_index i xs. (y, xs ! i) ∉ R]) =
rqs_cost (n - 1 - cnt i)" .
moreover have "map_pmf snd (rquicksort R [y←delete_index i xs. (y, xs ! i) ∈ R]) =
rqs_cost (cnt i)" unfolding cnt_def using i
by (intro IH)
(auto intro!: le_less_trans[OF length_filter_le]
intro: subset_mset.trans multiset_filter_subset diff_subset_eq_self)
ultimately show ?case by (simp only: n_def)
qed
also have "… = map_pmf cnt (pmf_of_set {..<n}) ⤜
(λi. map_pmf (λz. n - 1 + fst z + snd z) (pair_pmf (rqs_cost i) (rqs_cost (n - 1 - i))))"
(is "_ = bind_pmf _ ?f") by (simp add: bind_map_pmf n_def)
also have "map_pmf cnt (pmf_of_set {..<n}) =
map_pmf (λi. linorder_rank R (set xs) (xs ! i)) (pmf_of_set {..<n})"
using ‹n > 0› by (intro map_pmf_cong refl, subst (asm) set_pmf_of_set) (auto simp: cnt_altdef)
also from ‹n > 0› have "… = pmf_of_set {..<n}" by (intro map_pmf_of_set_bij_betw bij) auto
also have "pmf_of_set {..<n} ⤜ ?f = rqs_cost n"
by (cases n) (simp_all add: lessThan_Suc_atMost bind_map_pmf map_bind_pmf pair_pmf_def)
finally show ?thesis by (simp add: n_def)
qed simp_all
qed
subsection ‹Expected cost›
text ‹
It is relatively straightforward to see that the following recursive function
(sometimes called the `QuickSort equation') describes the expectation of @{const rqs_cost},
i.e. the expected number of comparisons of QuickSort when run on a list with distinct elements.
›
fun rqs_cost_exp :: "nat ⇒ real" where
"rqs_cost_exp 0 = 0"
| "rqs_cost_exp (Suc n) = real n + (∑i≤n. rqs_cost_exp i + rqs_cost_exp (n - i)) / real (Suc n)"
lemmas rqs_cost_exp_0 = rqs_cost_exp.simps(1)
lemmas rqs_cost_exp_Suc [simp del] = rqs_cost_exp.simps(2)
lemma rqs_cost_exp_Suc_0 [simp]: "rqs_cost_exp (Suc 0) = 0" by (simp add: rqs_cost_exp_Suc)
text ‹
The following theorem shows that @{const rqs_cost_exp} is indeed the expectation of
@{const rqs_cost}.
›
theorem expectation_rqs_cost: "measure_pmf.expectation (rqs_cost n) real = rqs_cost_exp n"
proof (induction n rule: rqs_cost.induct)
case (2 n)
note IH = "2.IH"
have "measure_pmf.expectation (rqs_cost (Suc n)) real =
(∑a≤n. inverse (real (Suc n)) *
measure_pmf.expectation (rqs_cost a ⤜ (λaa. rqs_cost (n - a) ⤜
(λb. return_pmf (n + aa + b)))) real)"
unfolding rqs_cost.simps by (subst pmf_expectation_bind_pmf_of_set) auto
also have "… = (∑i≤n. inverse (real (Suc n)) * (real n + rqs_cost_exp i + rqs_cost_exp (n - i)))"
proof (intro sum.cong refl, goal_cases)
case (1 i)
have "rqs_cost i ⤜ (λa. rqs_cost (n - i) ⤜ (λb. return_pmf (n + a + b))) =
map_pmf (λ(a,b). n + a + b) (pair_pmf (rqs_cost i) (rqs_cost (n - i)))"
by (simp add: pair_pmf_def map_bind_pmf)
also have "measure_pmf.expectation … real =
measure_pmf.expectation (pair_pmf (rqs_cost i) (rqs_cost (n - i)))
(λz. real n + (real (fst z) + real (snd z)))"
by (subst integral_map_pmf) (simp add: case_prod_unfold add_ac)
also have "… = real n + measure_pmf.expectation (pair_pmf (rqs_cost i) (rqs_cost (n - i)))
(λz. real (fst z) + real (snd z))" (is "_ = _ + ?A")
by (subst Bochner_Integration.integral_add) (auto intro!: integrable_measure_pmf_finite)
also have "?A = measure_pmf.expectation (map_pmf fst (pair_pmf (rqs_cost i) (rqs_cost (n - i)))) real +
measure_pmf.expectation (map_pmf snd (pair_pmf (rqs_cost i) (rqs_cost (n - i)))) real"
unfolding integral_map_pmf
by (subst Bochner_Integration.integral_add) (auto intro!: integrable_measure_pmf_finite)
also have "… = measure_pmf.expectation (rqs_cost i) real +
measure_pmf.expectation (rqs_cost (n - i)) real"
unfolding map_fst_pair_pmf map_snd_pair_pmf ..
also from 1 have "… = rqs_cost_exp i + rqs_cost_exp (n - i)" by (simp_all add: IH)
finally show ?case by simp
qed
also have "… = (∑i≤n. inverse (real (Suc n)) * real n) +
(∑i≤n. rqs_cost_exp i + rqs_cost_exp (n - i)) / real (Suc n)"
by (simp add: sum.distrib field_simps sum_distrib_left sum_distrib_right
sum_divide_distrib [symmetric] del: of_nat_Suc)
also have "(∑i≤n. inverse (real (Suc n)) * real n) = real n" by simp
also have "… + (∑i≤n. rqs_cost_exp i + rqs_cost_exp (n - i)) / real (Suc n) = rqs_cost_exp (Suc n)"
by (simp add: rqs_cost_exp_Suc)
finally show ?case .
qed simp_all
text ‹
We will now obtain a closed-form solution for @{const rqs_cost_exp}. First of all, we can
reindex the right-most sum in the recursion step and obtain:
›
lemma rqs_cost_exp_Suc':
"rqs_cost_exp (Suc n) = real n + 2 / real (Suc n) * (∑i≤n. rqs_cost_exp i)"
proof -
have "rqs_cost_exp (Suc n) = real n + (∑i≤n. rqs_cost_exp i + rqs_cost_exp (n - i)) / real (Suc n)"
by (rule rqs_cost_exp_Suc)
also have "(∑i≤n. rqs_cost_exp i + rqs_cost_exp (n - i)) = (∑i≤n. rqs_cost_exp i) + (∑i≤n. rqs_cost_exp (n - i))"
by (simp add: sum.distrib)
also have "(∑i≤n. rqs_cost_exp (n - i)) = (∑i≤n. rqs_cost_exp i)"
by (intro sum.reindex_bij_witness[of _ "λi. n - i" "λi. n - i"]) auto
also have "… + … = 2 * …" by simp
also have "… / real (Suc n) = 2 / real (Suc n) * (∑i≤n. rqs_cost_exp i)" by simp
finally show ?thesis .
qed
text ‹
Next, we can apply some standard techniques to transform this equation into a simple
linear recurrence, which we can then solve easily in terms of harmonic numbers:
›
theorem rqs_cost_exp_eq [code]: "rqs_cost_exp n = 2 * real (n + 1) * harm n - 4 * real n"
proof -
define F where "F = (λn. rqs_cost_exp n / (real n + 1))"
have [simp]: "F 0 = 0" "F (Suc 0) = 0" by (simp_all add: F_def)
have F_Suc: "F (Suc m) = F m + real (2*m) / (real ((m+1)*(m+2)))" if "m > 0" for m
proof (cases m)
case (Suc n)
have A: "rqs_cost_exp (Suc (Suc n)) * real (Suc (Suc n)) =
real ((n+1)*(n+2)) + 2 * (∑i≤n. rqs_cost_exp i) + 2 * rqs_cost_exp (Suc n)"
by (subst rqs_cost_exp_Suc') (simp_all add: field_simps)
have B: "rqs_cost_exp (Suc n) * real (Suc n) = real (n*(n+1)) + 2 * (∑i≤n. rqs_cost_exp i)"
by (subst rqs_cost_exp_Suc') (simp_all add: field_simps)
have "rqs_cost_exp (Suc (Suc n)) * real (Suc (Suc n)) - rqs_cost_exp (Suc n) * real (Suc n) =
real ((n+1)*(n+2)) - real (n*(n+1)) + 2 * rqs_cost_exp (Suc n)"
by (subst A, subst B) simp_all
also have "real ((n+1)*(n+2)) - real (n*(n+1)) = real (2*(n+1))" by simp
finally have "rqs_cost_exp (Suc (Suc n)) * real (n+2) = rqs_cost_exp (Suc n) * real (n+3) + real (2*(n+1))"
by (simp add: algebra_simps)
hence "rqs_cost_exp (Suc (Suc n)) / real (n+3) =
rqs_cost_exp (Suc n) / real (n+2) + real (2*(n+1)) / (real (n+2)*real (n+3))"
by (simp add: divide_simps del: of_nat_Suc of_nat_add)
thus ?thesis by (simp add: F_def algebra_simps Suc)
qed simp_all
have F_eq: "F n = 2 * (∑k=1..n. real (k - 1) / real (k * (k + 1)))" for n
proof (cases "n ≥ 1")
case True
thus ?thesis by (induction n rule: dec_induct) (simp_all add: F_Suc algebra_simps)
qed (simp_all add: not_le)
have "F n = 2 * (∑k=1..n. real (k - 1) / real (k * (k + 1)))" (is "_ = 2 * ?S") by (fact F_eq)
also have "?S = (∑k=1..n. 2 / real (Suc k) - 1 / real k)"
by (intro sum.cong) (simp_all add: field_simps of_nat_diff)
also have "… = 2 * (∑k=1..n. inverse (real (Suc k))) - harm n"
by (subst sum_subtractf) (simp add: harm_def sum.distrib sum_distrib_left divide_simps)
also have "(∑k=1..n. inverse (real (Suc k))) = (∑k=Suc 1..Suc n. inverse (real k))"
by (intro sum.reindex_bij_witness[of _ "λx. x - 1" Suc]) auto
also have "… = harm (Suc n) - 1" unfolding harm_def by (subst (2) sum.atLeast_Suc_atMost) simp_all
finally have "F n = 2 * harm n + 4 * (1 / (n + 1) - 1)" by (simp add: harm_Suc field_simps)
also have "… * real (n + 1) = 2 * real (n + 1) * harm n - 4 * real n"
by (simp add: field_simps)
also have "F n * real (n + 1) = rqs_cost_exp n" by (simp add: F_def add_ac)
finally show ?thesis .
qed
lemma asymp_equiv_harm [asymp_equiv_intros]: "harm ∼[at_top] (λn. ln (real n))"
proof -
have "(λn. harm n - ln (real n)) ∈ O(λ_. 1)" using euler_mascheroni_LIMSEQ
by (intro bigoI_tendsto[where c = euler_mascheroni]) simp_all
also have "(λ_. 1) ∈ o(λn. ln (real n))" by auto
finally have "(λn. ln (real n) + (harm n - ln (real n))) ∼[at_top] (λn. ln (real n))"
by (subst asymp_equiv_add_right) simp_all
thus ?thesis by simp
qed
corollary rqs_cost_exp_asymp_equiv: "rqs_cost_exp ∼[at_top] (λn. 2 * n * ln n)"
proof -
have "rqs_cost_exp = (λn. 2 * real (n + 1) * harm n - 4 * real n)" using rqs_cost_exp_eq ..
also have "… = (λn. 2 * real n * harm n + (2 * harm n - 4 * real n))"
by (simp add: algebra_simps)
finally have "rqs_cost_exp ∼[at_top] …" by simp
also have "… ∼[at_top] (λn. 2 * real n * harm n)"
proof (subst asymp_equiv_add_right)
have "(λx. 1 * harm x) ∈ o(λx. real x * harm x)"
by (intro landau_o.small_big_mult smallo_real_nat_transfer) simp_all
moreover have "harm ∈ ω(λ_. 1 :: real)"
by (intro smallomegaI_filterlim_at_top_norm) (auto simp: harm_at_top)
hence "(λx. real x * 1) ∈ o(λx. real x * harm x)"
by (intro landau_o.big_small_mult) (simp_all add: smallomega_iff_smallo)
ultimately show "(λn. 2 * harm n - 4 * real n) ∈ o(λn. 2 * real n * harm n)"
by (intro sum_in_smallo) simp_all
qed simp_all
also have "… ∼[at_top] (λn. 2 * real n * ln (real n))" by (intro asymp_equiv_intros)
finally show ?thesis .
qed
lemma harm_mono: "m ≤ n ⟹ harm m ≤ (harm n :: real)"
unfolding harm_def by (intro sum_mono2) auto
lemma harm_Suc_0 [simp]: "harm (Suc 0) = 1"
by (simp add: harm_def)
lemma harm_ge_1: "n > 0 ⟹ harm n ≥ (1::real)"
using harm_mono[of 1 n] by simp
lemma mono_rqs_cost_exp: "mono rqs_cost_exp"
proof (rule incseq_SucI)
fix n show "rqs_cost_exp n ≤ rqs_cost_exp (Suc n)"
proof (cases "n = 0")
case False
have "0 < (1 * 2 * (real n + 1) - 2 * real n) / (real n + 1)" by simp
also have "… ≤ (harm n * 2 * (real n + 1) - 2 * real n) / (real n + 1)" using False
by (intro divide_right_mono diff_right_mono mult_right_mono) (auto simp: harm_ge_1)
also have "… = rqs_cost_exp (Suc n) - rqs_cost_exp n"
by (simp add: rqs_cost_exp_eq harm_Suc field_simps)
finally show ?thesis by simp
qed auto
qed
lemma rqs_cost_exp_leI: "m ≤ n ⟹ rqs_cost_exp m ≤ rqs_cost_exp n"
using mono_rqs_cost_exp by (simp add: mono_def)
subsection ‹Version for lists with repeated elements›
definition threeway_partition where
"threeway_partition x R xs =
(filter (λy. (y,x) ∈ R ∧ (x,y) ∉ R) xs,
filter (λy. (x,y) ∈ R ∧ (y,x) ∈ R) xs,
filter (λy. (x,y) ∈ R ∧ (y,x) ∉ R) xs)"
text ‹
The following version of randomised Quicksort uses a three-way partitioning function
in order to also achieve expected logarithmic running time on lists with repeated elements.
›
function rquicksort' :: "('a × 'a) set ⇒ 'a list ⇒ ('a list × nat) pmf" where
"rquicksort' R xs =
(if xs = [] then
return_pmf ([], 0)
else
do {
i ← pmf_of_set {..<length xs};
let x = xs ! i;
case threeway_partition x R (delete_index i xs) of
(ls, es, rs) ⇒ do {
(ls, n1) ← rquicksort' R ls;
(rs, n2) ← rquicksort' R rs;
return_pmf (ls @ x # es @ rs, length xs - 1 + n1 + n2)
}
})"
by auto
termination proof (relation "Wellfounded.measure (length ∘ snd)", goal_cases)
show "wf (Wellfounded.measure (length ∘ snd))" by simp
qed (subst (asm) set_pmf_of_set;
force intro!: le_less_trans[OF length_filter_le] simp: threeway_partition_def)+
declare rquicksort'.simps [simp del]
lemma rquicksort'_Nil [simp]: "rquicksort' R [] = return_pmf ([], 0)"
by (simp add: rquicksort'.simps)
context
begin
qualified definition lesss :: "('a × 'a) set ⇒ 'a ⇒ 'a list ⇒ 'a list" where
"lesss R x xs = filter (λy. (y, x) ∈ R ∧ (x, y) ∉ R) xs"
qualified definition greaters :: "('a × 'a) set ⇒ 'a ⇒ 'a list ⇒ 'a list" where
"greaters R x xs = filter (λy. (x, y) ∈ R ∧ (y, x) ∉ R) xs"
qualified lemma lesss_Cons:
"lesss R x (y # ys) =
(if (y, x) ∈ R ∧ (x, y) ∉ R then y # lesss R x ys else lesss R x ys)"
by (simp add: lesss_def)
qualified lemma length_lesss_le [intro]: "length (lesss R x xs) ≤ length xs"
by (simp add: lesss_def)
qualified lemma length_lesss_less [intro]:
assumes "x ∈ set xs"
shows "length (lesss R x xs) < length xs"
using assms by (induction xs) (auto simp: lesss_Cons intro: le_less_trans)
qualified lemma greaters_Cons:
"greaters R x (y # ys) =
(if (x, y) ∈ R ∧ (y, x) ∉ R then y # greaters R x ys else greaters R x ys)"
by (simp add: greaters_def)
qualified lemma length_greaters_le [intro]: "length (greaters R x xs) ≤ length xs"
by (simp add: greaters_def)
qualified lemma length_greaters_less [intro]:
assumes "x ∈ set xs"
shows "length (greaters R x xs) < length xs"
using assms by (induction xs) (auto simp: greaters_Cons intro: le_less_trans)
text ‹
The following function counts the comparisons made by the modified randomised Quicksort.
›
function rqs'_cost :: "('a × 'a) set ⇒ 'a list ⇒ nat pmf" where
"rqs'_cost R xs =
(if xs = [] then
return_pmf 0
else
do {
i ← pmf_of_set {..<length xs};
let x = xs ! i;
map_pmf (λ(n1,n2). length xs - 1 + n1 + n2)
(pair_pmf (rqs'_cost R (lesss R x xs)) (rqs'_cost R (greaters R x xs)))
})"
by auto
termination by (relation "Wellfounded.measure (length ∘ snd)") auto
declare rqs'_cost.simps [simp del]
lemma rqs'_cost_nonempty:
"xs ≠ [] ⟹ rqs'_cost R xs =
do {
i ← pmf_of_set {..<length xs};
let x = xs ! i;
n1 ← rqs'_cost R (lesss R x xs);
n2 ← rqs'_cost R (greaters R x xs);
return_pmf (length xs - 1 + n1 + n2)
}"
by (subst rqs'_cost.simps) (auto simp: pair_pmf_def Let_def map_bind_pmf)
lemma finite_set_pmf_rqs'_cost [simp, intro]:
"finite (set_pmf (rqs'_cost R xs))"
by (induction R xs rule: rqs'_cost.induct) (auto simp: rqs'_cost.simps Let_def)
lemma expectation_pair_pmf_fst [simp]:
fixes f :: "'a ⇒ 'b::{banach, second_countable_topology}"
shows "measure_pmf.expectation (pair_pmf p q) (λx. f (fst x)) = measure_pmf.expectation p f"
proof -
have "measure_pmf.expectation (pair_pmf p q) (λx. f (fst x)) =
measure_pmf.expectation (map_pmf fst (pair_pmf p q)) f" by simp
also have "map_pmf fst (pair_pmf p q) = p"
by (simp add: map_fst_pair_pmf)
finally show ?thesis .
qed
lemma expectation_pair_pmf_snd [simp]:
fixes f :: "'a ⇒ 'b::{banach, second_countable_topology}"
shows "measure_pmf.expectation (pair_pmf p q) (λx. f (snd x)) = measure_pmf.expectation q f"
proof -
have "measure_pmf.expectation (pair_pmf p q) (λx. f (snd x)) =
measure_pmf.expectation (map_pmf snd (pair_pmf p q)) f" by simp
also have "map_pmf snd (pair_pmf p q) = q"
by (simp add: map_snd_pair_pmf)
finally show ?thesis .
qed
qualified lemma length_lesss_le_sorted:
assumes "sorted_wrt R xs" "i < length xs"
shows "length (lesss R (xs ! i) xs) ≤ i"
using assms by (induction arbitrary: i rule: sorted_wrt.induct)
(force simp: lesss_def nth_Cons le_Suc_eq split: nat.splits)+
qualified lemma length_greaters_le_sorted:
assumes "sorted_wrt R xs" "i < length xs"
shows "length (greaters R (xs ! i) xs) ≤ length xs - i - 1"
using assms
by (induction arbitrary: i rule: sorted_wrt.induct)
(force simp: greaters_def nth_Cons le_Suc_eq split: nat.splits)+
qualified lemma length_lesss_le':
assumes "i < length xs" "linorder_on A R" "set xs ⊆ A"
shows "length (lesss R (insort_wrt R xs ! i) xs) ≤ i"
proof -
define x where "x = insort_wrt R xs ! i"
define less where "less = (λx y. (x,y) ∈ R ∧ (y,x) ∉ R)"
have "length (lesss R x xs) = size {# y ∈# mset xs. less y x #}"
by (simp add: lesss_def size_mset [symmetric] less_def mset_filter del: size_mset)
also have "mset xs = mset (insort_wrt R xs)" by simp
also have "size {#y ∈# mset (insort_wrt R xs). less y x#} =
length (lesss R x (insort_wrt R xs))"
by (simp only: mset_filter [symmetric] size_mset lesss_def less_def)
also have "… ≤ i" unfolding x_def by (rule length_lesss_le_sorted) (use assms in auto)
finally show ?thesis unfolding x_def .
qed
qualified lemma length_greaters_le':
assumes "i < length xs" "linorder_on A R" "set xs ⊆ A"
shows "length (greaters R (insort_wrt R xs ! i) xs) ≤ length xs - i - 1"
proof -
define x where "x = insort_wrt R xs ! i"
define less where "less = (λx y. (x,y) ∈ R ∧ (y,x) ∉ R)"
have "length (greaters R x xs) = size {# y ∈# mset xs. less x y #}"
by (simp add: greaters_def size_mset [symmetric] less_def mset_filter del: size_mset)
also have "mset xs = mset (insort_wrt R xs)" by simp
also have "size {#y ∈# mset (insort_wrt R xs). less x y#} =
length (greaters R x (insort_wrt R xs))"
by (simp only: mset_filter [symmetric] size_mset greaters_def less_def)
also have "… ≤ length (insort_wrt R xs) - i - 1" unfolding x_def
by (rule length_greaters_le_sorted) (use assms in auto)
finally show ?thesis unfolding x_def by simp
qed
text ‹
We can show quite easily that the expected number of comparisons in this modified
QuickSort is bounded above by the expected number of comparisons on a list of the same length
with no repeated elements.
›
theorem rqs'_cost_expectation_le:
assumes "linorder_on A R" "set xs ⊆ A"
shows "measure_pmf.expectation (rqs'_cost R xs) real ≤ rqs_cost_exp (length xs)"
using assms
proof (induction R xs rule: rqs'_cost.induct)
case (1 R xs)
show ?case
proof (cases "xs = []")
case False
define n where "n = length xs - 1"
have length_eq: "length xs = Suc n" using False by (simp add: n_def)
define E where "E = (λxs. measure_pmf.expectation (rqs'_cost R xs) real)"
define f where "f = (λx. rqs_cost_exp (length (lesss R x xs)) +
rqs_cost_exp (length (greaters R x xs)))"
have "rqs'_cost R xs =
do {
i ← pmf_of_set {..<length xs};
map_pmf (λ(n1, y). length xs - Suc 0 + n1 + y)
(pair_pmf (rqs'_cost R (lesss R (xs ! i) xs))
(rqs'_cost R (greaters R (xs ! i) xs)))
}"
using False by (subst rqs'_cost.simps) (simp_all add: Let_def)
also have "measure_pmf.expectation … real = real n +
(∑k<length xs. E (lesss R (xs ! k) xs) + E (greaters R (xs ! k) xs)) / real (length xs)"
using False
by (subst pmf_expectation_bind_pmf_of_set)
(auto intro!: finite_imageI finite_cartesian_product simp: case_prod_unfold
integrable_measure_pmf_finite sum_divide_distrib [symmetric] field_simps
length_eq sum.distrib E_def)
also have "… ≤ real n + (∑k<length xs. f (xs ! k)) / real (length xs)"
unfolding E_def f_def using False "1.prems"
by (intro add_mono order.refl divide_right_mono sum_mono "1.IH"[OF _ _ refl] False)
(auto simp: lesss_def greaters_def)
also have "(∑k<length xs. f (xs ! k)) = (∑x∈#mset xs. f x)"
by (simp only: mset_map [symmetric] sum_mset_sum_list sum_list_sum_nth)
(simp_all add: atLeast0LessThan)
also have "mset xs = mset (insort_wrt R xs)"
by simp
also have "(∑x∈#…. f x) = (∑i<length xs. f (insort_wrt R xs ! i))"
by (simp only: mset_map [symmetric] sum_mset_sum_list sum_list_sum_nth)
(simp_all add: atLeast0LessThan)
also have "… ≤ (∑i<length xs. rqs_cost_exp i + rqs_cost_exp (length xs - i - 1))"
unfolding f_def
proof (intro sum_mono add_mono rqs_cost_exp_leI)
fix i assume i: "i ∈ {..<length xs}"
show "length (lesss R (insort_wrt R xs ! i) xs) ≤ i"
using i "1.prems" by (intro length_lesss_le'[where A = A]) auto
show "length (greaters R (insort_wrt R xs ! i) xs) ≤ length xs - i - 1"
using i "1.prems" by (intro length_greaters_le'[where A = A]) auto
qed
also have "… = (∑i≤n. rqs_cost_exp i + rqs_cost_exp (n - i))"
by (intro sum.cong) (auto simp: length_eq)
also have "real n + … / real (length xs) = rqs_cost_exp (length xs)"
by (simp add: length_eq rqs_cost_exp.simps(2))
finally show ?thesis by (simp add: divide_right_mono)
qed (auto simp: rqs'_cost.simps)
qed
end
end