Theory Optimal_BST2
section ‹Optimal BSTs: The `Quadratic' Algorithm\label{sec:quadratic}›
theory Optimal_BST2
imports
Optimal_BST
Quadrilateral_Inequality
begin
text ‹Knuth presented an optimization of the previously known cubic dynamic programming algorithm
to a quadratic one. A simplified proof of this optimization was found by Yao~\<^cite>‹"Yao80"›.
Mehlhorn follows Yao closely. The core of the optimization argument is given abstractly
in theory @{theory Optimal_BST.Quadrilateral_Inequality}. In addition we first need to establish some more
properties of @{const argmin}.›
text‹An index-based specification of @{const argmin}
expressing that the last minimal list-element is picked:›
lemma argmin_takes_last: "xs ≠ [] ⟹
argmin f xs = xs ! Max {i. i < length xs ∧ (∀x ∈ set xs. f(xs!i) ≤ f x)}"
(is "_ ⟹ _ = _ ! Max (?M xs)")
proof(induction xs)
case (Cons x xs)
show ?case
proof cases
assume "xs = []" thus ?thesis by(simp cong: conj_cong)
next
assume 0: "xs ≠ []"
show ?thesis
proof cases
assume 1: "∀u ∈ set xs. f x < f u"
hence 2: "?M (x#xs) = {0}"
by (fastforce simp: not_less[symmetric] less_Suc_eq_0_disj)
have "f x < f (argmin f xs)" using 0 1 argmin_Min[of xs f] by auto
with 1 Cons.prems show ?case by(subst 2) (auto simp: Let_def)
next
assume 1: "¬(∀u ∈ set xs. f x < f u)"
have 2: "¬ f x < f (argmin f xs)" using 1 argmin_Min[of xs f] 0 by auto
have "argmin f xs : {u ∈ set xs. ∀x∈set xs. f u ≤ f x}"
using 0 argmin_Min[of xs f] by (simp add: argmin_in)
hence "{u ∈ set xs. ∀x∈set xs. f u ≤ f x} ≠ {}" by blast
hence ne: "?M xs ≠ {}" by(auto simp: in_set_conv_nth)
have "Max (?M (x#xs)) = Max (?M xs) + 1"
proof (cases "∃u ∈ set xs. f u < f x")
case True
hence "?M (x#xs) = (+) 1 ` ?M xs"
by (auto simp: nth_Cons' image_def less_Suc_eq_0_disj)
thus ?thesis
using mono_Max_commute[of "(+) 1" "?M xs"] ne by (auto simp: mono_def)
next
case False
hence *: "?M (x#xs) = insert 0 ((+) 1 ` ?M xs)"
using 1 by (auto simp: nth_Cons' image_def less_Suc_eq_0_disj)
hence "Max (?M (x#xs)) = Max ((+) 1 ` ?M xs)" using Max_insert ne by simp
thus ?thesis using mono_Max_commute[of "(+) 1" "?M xs"] ne by (auto simp: mono_def)
qed
with Cons 2 0 show ?case by auto
qed
qed
qed simp
lemma Min_ex: "⟦ finite F; F ≠ {} ⟧ ⟹ ∃m ∈ F. ∀n ∈ F. m ≤ (n::_::linorder)"
using eq_Min_iff[of F "Min F"] by (fastforce)
text ‹A consequence of @{thm [source] argmin_takes_last}:›
lemma argmin_Max_Args_min_on: assumes [arith]: "i ≤ j"
shows "argmin f [i..j] = Max (Args_min_on f {i..j})"
proof -
let ?min = "λk. ∀n ∈ {i..j}. f([i..j]!k) ≤ f n"
let ?M = "{k. k < nat(j-i+1) ∧ ?min k}"
let ?Max = "Max ?M"
have "?M ≠ {}" using Min_ex[of "f ` {i..j}"]
apply(auto simp add: nth_upto)
apply(rule_tac x="nat (m-i)" in exI)
by simp
hence "?Max < nat(j-i+1)" by(simp add: nth_upto)
hence 1: "i + int ?Max ≤ j" by linarith
have "argmin f [i..j] = [i..j] ! ?Max"
using argmin_takes_last[of "[i..j]" f] by simp
also have "… = i + int ?Max" using 1 by(simp add: nth_upto)
also have "… = i + Max(int ` {k. k < nat(j-i+1) ∧ ?min k})"
using ‹?M ≠ {}› by (simp add: monoI mono_Max_commute)
also have "… = Max ((λx. i + x) ` (int ` {k. k < nat(j-i+1) ∧ ?min k}))"
using ‹?M ≠ {}› by (simp add: monoI mono_Max_commute)
also have "(λx. i + x) ` (int ` {k. k < nat(j-i+1) ∧ ?min k}) =
{k. is_arg_min_on f {i..j} k}"
apply(auto simp: is_arg_min_on_def Ball_def nth_upto image_def cong: conj_cong)
apply(rule_tac x = "x-i" in exI)
apply auto
apply(rule_tac x = "nat(x-i)" in exI)
by auto
finally show ?thesis by(simp add: Args_min_simps)
qed
text ‹As a consequence of @{thm [source] argmin_Max_Args_min_on} the following lemma
allows us to justify the restriction of the index range of @{const argmin}
used below in the optimized (quadratic) algorithm.›
lemma argmin_red_ivl:
assumes "i ≤ i'" "argmin f [i..j] ∈ {i'..j'}" "j' ≤ j"
shows "argmin f [i'..j'] = argmin f [i..j]"
proof -
have ij[arith]: "i ≤ j" using assms by simp
have ij'[arith]: "i' ≤ j'" using assms by simp
from Min_ex[of "f ` {i..j}"] have m: "∃m ∈ {i..j}. ∀n ∈ {i..j}. f m ≤ f n" by auto
note * = argmin_Max_Args_min_on[OF ij, of f]
note ** = argmin_Max_Args_min_on[OF ij', of f]
let ?M = "Args_min_on f {i..j}"
let ?M' = "Args_min_on f {i'..j'}"
have M: "finite ?M" "?M ≠ {}"
using m by (fastforce simp: Args_min_simps simp del: atLeastAtMost_iff)+
have "Max ?M ∈ ?M" by (simp add: M)
have "Max ?M ∈ ?M'" using Max_in[OF M] assms * by(auto simp: Args_min_simps)
have "?M' ⊆ ?M" using ‹Max ?M ∈ ?M› ‹Max ?M ∈ ?M'› assms(1,3)
by(force simp add: Args_min_simps Ball_def)
have "finite ?M'" using M(1) ‹?M' ⊆ ?M› infinite_super by blast
hence "Max ?M ≤ Max ?M'" by (simp add: ‹Max ?M ∈ ?M'›)
have "Max ?M' ≤ Max ?M" using Max.subset_imp[OF ‹?M' ⊆ ?M› _ M(1)] ‹Max ?M ∈ ?M'› by auto
thus ?thesis using * ** ‹Max ?M ≤ Max ?M'› by force
qed
fun root:: "'a tree ⇒ 'a" where
"root ⟨_,r,_⟩ = r"
text ‹Now we can formulate and verify the improved algorithm. This requires two
assumptions on the weight function ‹w›.›
locale Optimal_BST2 = Optimal_BST +
assumes monotone_w: "⟦i ≤ i'; i' ≤ j; j ≤ j'⟧ ⟹ w i' j ≤ w i j'"
assumes QI_w: "⟦i ≤ i'; i' ≤ j; j ≤ j'⟧⟹ w i j + w i' j'≤ w i' j + w i j'"
begin
text‹When finding an optimal tree for @{term "[i..j]"} the optimization consists in reducing
the search for the root from @{term "[i..j]"} to
@{term "[root (opt_bst2 i (j-1)) .. root (opt_bst2 (i+1) j)]"}:›
function opt_bst2 :: "int ⇒ int ⇒ int tree" where
"opt_bst2 i j =
(if i > j then Leaf else
if i = j then Node Leaf i Leaf else
let left = root (opt_bst2 i (j-1)) in
let right= root (opt_bst2 (i+1) j) in
argmin (wpl i j) [⟨opt_bst2 i (k-1), k, opt_bst2 (k+1) j⟩. k ← [left..right]])"
by auto
text ‹The termination of @{const opt_bst2} is not completely obvious.
We first need to establish some functional properties of the terminating computations.
We start by showing that the root of the returned tree is always between ‹left› and ‹right›.
This is essentially equivalent to proving that ‹left ≤ right›
because otherwise @{const argmin} is applied to ‹[]›, which is undefined.›
lemma left_le_right:
"opt_bst2_dom(i,j) ⟹
(i=j ⟶ root(opt_bst2 i j) = i) ∧
(i<j ⟶ root(opt_bst2 i j) ∈ {root(opt_bst2 i (j-1)) .. root(opt_bst2 (i+1) j)})"
proof (induction rule: opt_bst2.pinduct)
case (1 i j)
let ?left = "root (opt_bst2 i (j-1))"
let ?right = "root (opt_bst2 (i+1) j)"
let ?f ="(λk. ⟨opt_bst2 i (k - 1), k, opt_bst2 (k + 1) j⟩)"
show ?case
proof cases
assume "i > j" thus ?thesis by auto
next
assume [arith]: "¬ i > j"
show ?thesis
proof cases
assume "i = j" thus ?thesis using opt_bst2.psimps[OF "1.hyps"] by simp
next
assume [arith]: "i ≠ j"
have left_le_right: "?left ≤ ?right"
proof cases
assume [arith]: "i = j-1"
have l: "root (opt_bst2 i (j - 1)) = i" using "1.IH"(1) by auto
have r: "root (opt_bst2 (i+1) j) = j" using "1.IH"(2) by auto
show ?thesis using l r by auto
next
assume "¬ i = j-1"
hence[arith]: "i < j-1" by arith
have "?left ≤ root (opt_bst2 (i + 1) (j - 1))" using "1.IH"(1) by auto
also have "... ≤ root (opt_bst2 (i+1) j)" using "1.IH"(2) by auto
finally have "?left ≤ ?right" .
thus ?thesis by auto
qed
let ?lambda = "λt. root t ∈ {?left .. ?right}"
show ?thesis
using argmin_forall[of ‹map ?f [?left..?right]› ‹?lambda› ‹wpl i j›] left_le_right
by (fastforce simp add: opt_bst2.psimps[OF "1.hyps"])
qed
qed
qed
text ‹Now we can bound the result of @{const opt_bst2} easily:›
lemma root_opt_bst2_bound:
"opt_bst2_dom (i,j) ⟹ i ≤ j ⟹ root (opt_bst2 i j) ∈ {i..j}"
proof(induction i j rule:opt_bst2.pinduct)
case (1 i j)
show ?case using "1.prems" "1.IH"(1,2) left_le_right[OF "1.hyps"] by force
qed
text ‹Now termination follows easily:›
lemma opt_bst2_dom: "∀args. opt_bst2_dom args"
by (relation "measure (λ(i,j). nat (j-i+1))") (auto dest: root_opt_bst2_bound)
termination by(rule opt_bst2_dom)
declare opt_bst2.simps[simp del]
abbreviation "min_wpl3 i j k ≡ min_wpl i (k-1) + min_wpl (k+1) j + w i j"
text‹The correctness proof \<^cite>‹"Yao80"› is based on a general theory of `quatrilateral inequalities'
developed in locale QI that we now instantiate:›
interpretation QI
where
c = "λi j. min_wpl (i+1) j"
and c_k = "λi j. min_wpl3 (i+1) j"
and w = "λi j. w (i+1) j"
proof (standard, goal_cases)
case (1 i i' j j')
thus ?case using QI_w by simp
next
case (2 i i' j j')
thus ?case using monotone_w by simp
next
case (3 i j)
thus ?case by simp
next
case (4 i j k)
show ?case by simp
qed
lemma K_argmin: "i < j ⟹ K i j = argmin (min_wpl3 (i+1) j) [i+1..j]"
by(simp add: K_def argmin_Max_Args_min_on Args_min_on_def)
theorem opt_bst2_opt_bst: "opt_bst2 i j = opt_bst i j"
proof (induction i j rule: opt_bst2.induct)
case (1 i j)
show ?case
proof cases
assume "i ≥ j" thus ?thesis by(cases "i=j") (auto simp: opt_bst2.simps)
next
assume [arith]: "¬ i ≥ j"
let ?c = "λk. min_wpl i (k-1) + min_wpl (k+1) j + w i j"
let ?opt = "λk. ⟨opt_bst i (k-1), k, opt_bst (k+1) j⟩"
have 1: "i ≤ K (i-1) (j-1)"
using argmin_in[of "[i..j-1]"] by(auto simp add: K_argmin)
have 2: "argmin ?c [i..j] ∈ {K (i-1) (j-1)..K i j}"
using lemma_3[of "i-1" "j-1"] by(simp add: K_argmin)
have 3: "K i j ≤ j" using argmin_in[of "[i+1..j]"] by(auto simp: K_argmin)
have *: "argmin ?c [K (i-1) (j-1)..K i j] = argmin ?c [i..j]"
by(rule argmin_red_ivl[OF 1 2 3])
have "opt_bst2 i j =
argmin (wpl i j) (map ?opt [root(opt_bst2 i (j-1))..root(opt_bst2 (i+1) j)])"
using [[simp_depth_limit=3]]
by(simp add: "1.IH"(3,4)[OF _ _ refl refl] opt_bst2.simps[of i j] cong: list.map_cong_simp)
also have "… = argmin (wpl i j) (map ?opt [root(opt_bst i (j-1))..root(opt_bst (i+1) j)])"
by (simp add: "1.IH"(1,2))
also have "root(opt_bst i (j-1)) = K (i-1) (j-1)"
by(simp add: argmin_map wpl_opt_bst comp_def K_argmin)
also have "root(opt_bst (i+1) j) = K i j"
by(simp add: argmin_map wpl_opt_bst comp_def K_argmin)
also have "argmin (wpl i j) (map ?opt [K (i - 1) (j - 1)..K i j])
= ?opt (argmin (wpl i j o ?opt) [K (i-1) (j-1)..K i j])"
using lemma_3[of "i-1" "j-1"] by(simp add: argmin_map)
also have "… = ?opt (argmin ?c [K (i-1) (j-1)..K i j])"
by(simp add: comp_def wpl_opt_bst)
also have "… = ?opt(argmin ?c [i..j])"
by (simp add: "*")
also have "… = ?opt(argmin (wpl i j o ?opt) [i..j])"
by(simp add: comp_def wpl_opt_bst)
also have "… = argmin (wpl i j) (map ?opt [i..j])"
by(simp add: argmin_map)
also have "… = opt_bst i j"
by simp
finally show ?thesis .
qed
qed
corollary opt_bst2_is_optimal: "wpl i j (opt_bst2 i j) = min_wpl i j"
by (simp add: opt_bst2_opt_bst wpl_opt_bst)
function opt_bst_wpl2 :: "int ⇒ int ⇒ int tree × nat" where
"opt_bst_wpl2 i j =
(if i > j then (Leaf,0) else
if i = j then (Node Leaf i Leaf, w i i) else
let l = root(fst(opt_bst_wpl2 i (j-1)));
r = root(fst(opt_bst_wpl2 (i+1) j)) in
argmin snd
[let (tl,wl) = opt_bst_wpl2 i (k-1); (tr,wr) = opt_bst_wpl2 (k+1) j
in (⟨tl, k, tr⟩, wl + wr + w i j) . k ← [l..r]])"
by auto
lemma left_le_right2:
"opt_bst_wpl2_dom(i,j) ⟹
(i=j ⟶ root(fst(opt_bst_wpl2 i j)) = i) ∧
(i<j ⟶ root(fst(opt_bst_wpl2 i j)) ∈
{root(fst(opt_bst_wpl2 i (j-1))) .. root(fst(opt_bst_wpl2 (i+1) j))})"
proof (induction rule: opt_bst_wpl2.pinduct)
case (1 i j)
let ?l = "root (fst(opt_bst_wpl2 i (j-1)))"
let ?r = "root (fst(opt_bst_wpl2 (i+1) j))"
let ?f ="λk. let (tl,wl) = opt_bst_wpl2 i (k-1); (tr,wr) = opt_bst_wpl2 (k+1) j
in (⟨tl, k, tr⟩, wl + wr + w i j)"
show ?case
proof cases
assume "i > j" thus ?thesis by auto
next
assume [arith]: "¬ i > j"
show ?thesis
proof cases
assume "i = j" thus ?thesis using opt_bst_wpl2.psimps[OF "1.hyps"] by simp
next
assume [arith]: "i ≠ j"
have left_le_right: "?l ≤ ?r"
proof cases
assume [arith]: "i = j-1"
have l: "root (fst(opt_bst_wpl2 i (j-1))) = i" using "1.IH"(1) by auto
have r: "root (fst(opt_bst_wpl2 (i+1) j)) = j" using ‹i = j-1› "1.IH"(2)
by auto
show ?thesis using l r by auto
next
assume "¬ i = j-1"
hence[arith]: "i < j-1" by arith
have "?l ≤ root (fst(opt_bst_wpl2 (i+1) (j-1)))" using "1.IH"(1) by auto
also have "... ≤ root (fst(opt_bst_wpl2 (i+1) j))" using "1.IH"(2) by auto
finally have "?l ≤ ?r" .
thus ?thesis by auto
qed
let ?P = "λt. root (fst t) ∈ {?l .. ?r}"
show ?thesis
using argmin_forall[of ‹map ?f [?l..?r]› ?P snd] left_le_right
by (fastforce simp add: opt_bst_wpl2.psimps[OF "1.hyps"] split: prod.splits)
qed
qed
qed
text ‹Now we can bound the result of @{const opt_bst_wpl2}:›
lemma root_opt_bst_wpl2_bound:
"opt_bst_wpl2_dom (i,j) ⟹ i ≤ j ⟹ root (fst(opt_bst_wpl2 i j)) ∈ {i..j}"
proof(induction i j rule:opt_bst_wpl2.pinduct)
case (1 i j)
show ?case using "1.prems" "1.IH"(1) "1.IH"(2)[OF _ _ refl] left_le_right2[OF "1.hyps"]
by fastforce
qed
text ‹Now termination follows easily:›
lemma opt_bst_wpl2_dom: "∀args. opt_bst_wpl2_dom args"
by (relation "measure (λ(i,j). nat (j-i+1))") (auto dest: root_opt_bst_wpl2_bound)
termination by(rule opt_bst_wpl2_dom)
declare opt_bst_wpl2.simps[simp del]
lemma opt_bst_wpl2_eq_pair:
"opt_bst_wpl2 i j = (opt_bst2 i j, wpl i j (opt_bst2 i j))"
proof(induction i j rule: opt_bst_wpl2.induct)
case (1 i j)
note [simp] = opt_bst2.simps[of i j] opt_bst_wpl2.simps[of i j]
show ?case
proof cases
assume "i > j" thus ?thesis using "1.prems" by (simp)
next
assume [arith]: "¬ i > j"
show ?case
proof cases
assume [arith]: "i = j"
show ?thesis by(simp) (simp add: ‹i = j›)
next
assume [arith]: "i ≠ j"
let ?l = "root (opt_bst2 i (j-1))" let ?r = "root (opt_bst2 (i+1) j)"
have *: "?l ≤ ?r"
using left_le_right[of i j] by (fastforce simp: opt_bst2_opt_bst opt_bst2_dom)
let ?f = "λk. case opt_bst_wpl2 i (k-1) of
(l,wl) ⇒ case opt_bst_wpl2 (k+1) j of
(r,wr) ⇒ (⟨l,k,r⟩, wl + wr + w i j)"
let ?g = "λk. (⟨opt_bst2 i (k-1), k, opt_bst2 (k+1) j⟩,
wpl i (k-1) (opt_bst2 i (k-1)) + wpl (k+1) j (opt_bst2 (k+1) j) + w i j)"
have fg: "?f k = ?g k" if k: "k ∈ {?l..?r}" for k
proof -
have 1: "opt_bst_wpl2 i (k-1) = (opt_bst2 i (k-1), wpl i (k-1) (opt_bst2 i (k-1)))"
using k "1.IH"(3) by(simp add: "1.IH"(1,2))
have 2: "opt_bst_wpl2 (k+1) j = (opt_bst2 (k+1) j, wpl (k+1) j (opt_bst2 (k+1) j))"
using 1 k "1.IH"(4) by(simp add: "1.IH"(1,2))
show ?thesis using 1 2 by(simp)
qed
have "opt_bst_wpl2 i j =
argmin snd (map ?f [root(fst(opt_bst_wpl2 i (j-1)))..root(fst(opt_bst_wpl2 (i+1) j))])"
by(simp)
also have "… = argmin snd (map ?f [?l..?r])"
by(simp add: "1.IH"(1,2))
also have "… = argmin snd (map ?g [?l..?r])"
using fg by (simp cong: list.map_cong_simp)
also have "… = (opt_bst2 i j, wpl i j (opt_bst2 i j))" using *
by(simp add: argmin_pairs comp_def)
finally show ?thesis .
qed
qed
qed
corollary opt_bst_wpl2_eq_pair': "opt_bst_wpl2 i j = (opt_bst i j, min_wpl i j)"
by (simp add: opt_bst_wpl2_eq_pair opt_bst2_opt_bst wpl_opt_bst)
end
end