Theory Weight_Balanced_Trees
section ‹Weight Balanced Tree Implementation of Sets›
text ‹This theory follows Hirai and Yamamoto but we do not prove their general
theorem. Instead we provide a short parameterized theory that, when
interpreted with valid parameters, will prove perservation of the invariant
for these parameters.›
theory Weight_Balanced_Trees
imports
"HOL-Data_Structures.Isin2"
begin
lemma neq_Leaf2_iff: "t ≠ Leaf ⟷ (∃l a n r. t = Node l (a,n) r)"
by(cases t) auto
type_synonym 'a wbt = "('a * nat) tree"
fun size_wbt :: "'a wbt ⇒ nat" where
"size_wbt Leaf = 0" |
"size_wbt (Node _ (_, n) _) = n"
text ‹Smart constructor:›
fun N :: "'a wbt ⇒ 'a ⇒ 'a wbt ⇒ 'a wbt" where
"N l a r = Node l (a, size_wbt l + size_wbt r + 1) r"
text "Basic Rotations:"
fun rot1L :: "'a wbt ⇒ 'a ⇒ 'a wbt ⇒ 'a ⇒ 'a wbt ⇒ 'a wbt" where
"rot1L A a B b C = N (N A a B) b C"
fun rot1R :: "'a wbt ⇒ 'a ⇒ 'a wbt ⇒ 'a ⇒ 'a wbt ⇒ 'a wbt" where
"rot1R A a B b C = N A a (N B b C)"
fun rot2 :: "'a wbt ⇒ 'a ⇒ 'a wbt ⇒ 'a ⇒ 'a wbt ⇒ 'a wbt" where
"rot2 A a (Node B1 (b,_) B2) c C = N (N A a B1) b (N B2 c C)"
subsection "WB trees"
text ‹
Parameters:
➧ ‹Δ› determines when a tree needs to be rebalanced
➧ ‹Γ› determines whether it needs to be single or double rotation.
\noindent We represent rational numbers as pairs: ‹Δ = Δ1/Δ2› and ‹Γ = Γ1/Γ2›.
\bigskip
Hirai and Yamamoto \<^cite>‹"HiraiY11"› proved that under the following constraints
insertion and deletion preserve the WB invariant, i.e.\
‹Δ› and ‹Γ› are \emph{valid}:›
definition valid_params :: "nat ⇒ nat ⇒ nat ⇒ nat ⇒ bool" where
"valid_params Δ1 Δ2 Γ1 Γ2 = (
Δ1 * 2 < Δ2 * 9 ∧
Γ1 * Δ2 + Γ2 * Δ2 ≤ Γ2 * Δ1 ∧
Γ1 * Δ1 ≥ Γ2 * (Δ1 + Δ2) ∧
(5*Δ2 ≤ 2*Δ1 ∧ 1*Δ1 < 3*Δ2 ⟶ Γ1*2 ≤ Γ2*3)
∧
(3*Δ2 ≤ 1*Δ1 ∧ 2*Δ1 < 7*Δ2 ⟶ Γ1*2 ≤ Γ2*4)
∧
(7*Δ2 ≤ 2*Δ1 ∧ 1*Δ1 < 4*Δ2 ⟶ Γ1*3 ≤ Γ2*4)
∧
(4*Δ2 ≤ 1*Δ1 ∧ 2*Δ1 < 9*Δ2 ⟶ Γ1*3 ≤ Γ2*5)
)"
text ‹We do not make use of these constraints and do not prove that they guarantee
preservation of the invariant. Instead, we provide generic proofs of invariant preservation
that work for many (all?) interpretations of locale ‹WBT› (below) with valid parameters.
Further down we demonstrate this by interpreting ‹WBT› with a selection of valid parameters.
[For some parameters, some ‹smt› proofs fail because ‹smt› on ‹nat›s fails although
on non-negative ‹int›s it succeeds, i.e.\ the goal should be provable.
This is a shortcoming of ‹smt› that is under investigation.]
Locale ‹WBT› comes with some minimal assumptions (‹Γ1 > Γ2› and ‹Δ1 > Δ2›) which follow
from @{const valid_params} and from which we conclude some simple lemmas.
›
locale WBT =
fixes Δ1 Δ2 :: nat and Γ1 Γ2 :: nat
assumes Delta_gr1: "Δ1 > Δ2" and Gamma_gr1: "Γ1 > Γ2"
begin
subsubsection "Balance Indicators"
fun balanced1 :: "'a wbt ⇒ 'a wbt ⇒ bool" where
"balanced1 t1 t2 = (Δ1 * (size_wbt t1 + 1) ≥ Δ2 * (size_wbt t2 + 1))"
text ‹The global weight-balanced tree invariant:›
fun wbt :: "'a wbt ⇒ bool" where
"wbt Leaf = True"|
"wbt (Node l (_, n) r) =
(n = size l + size r + 1 ∧ balanced1 l r ∧ balanced1 r l ∧ wbt l ∧ wbt r)"
lemma size_wbt_eq_size[simp]: "wbt t ⟹ size_wbt t = size t"
by(induction t) auto
fun single :: "'a wbt ⇒ 'a wbt ⇒ bool" where
"single t1 t2 = (Γ1 * (size_wbt t2 + 1) > Γ2 * (size_wbt t1 + 1))"
subsubsection "Code"
fun rotateL :: "'a wbt ⇒ 'a ⇒ 'a wbt ⇒ 'a wbt" where
"rotateL A a (Node B (b, _) C) =
(if single B C then rot1L A a B b C else rot2 A a B b C)"
fun balanceL :: "'a wbt ⇒ 'a ⇒ 'a wbt ⇒ 'a wbt" where
"balanceL l a r = (if balanced1 l r then N l a r else rotateL l a r)"
fun rotateR :: "'a wbt ⇒ 'a ⇒ 'a wbt ⇒ 'a wbt" where
"rotateR (Node A (a, _) B) b C =
(if single B A then rot1R A a B b C else rot2 A a B b C)"
fun balanceR :: "'a wbt ⇒ 'a ⇒ 'a wbt ⇒ 'a wbt" where
"balanceR l a r = (if balanced1 r l then N l a r else rotateR l a r)"
fun insert :: "'a::linorder ⇒ 'a wbt ⇒ 'a wbt" where
"insert x Leaf = Node Leaf (x, 1) Leaf" |
"insert x (Node l (a, n) r) =
(case cmp x a of
LT ⇒ balanceR (insert x l) a r |
GT ⇒ balanceL l a (insert x r) |
EQ ⇒ Node l (a, n) r )"
fun split_min :: "'a wbt ⇒ 'a * 'a wbt" where
"split_min (Node l (a, _) r) =
(if l = Leaf then (a,r) else let (x,l') = split_min l in (x, balanceL l' a r))"
fun del_max :: "'a wbt ⇒ 'a * 'a wbt" where
"del_max (Node l (a, _) r) =
(if r = Leaf then (a,l) else let (x,r') = del_max r in (x, balanceR l a r'))"
fun combine :: "'a wbt ⇒ 'a wbt ⇒ 'a wbt" where
"combine Leaf Leaf = Leaf"|
"combine Leaf r = r"|
"combine l Leaf = l"|
"combine l r =
(if size l > size r then
let (lMax, l') = del_max l in balanceL l' lMax r
else
let (rMin, r') = split_min r in balanceR l rMin r')"
fun delete :: "'a::linorder ⇒ 'a wbt ⇒ 'a wbt" where
"delete _ Leaf = Leaf" |
"delete x (Node l (a, _) r) =
(case cmp x a of
LT ⇒ balanceL (delete x l) a r |
GT ⇒ balanceR l a (delete x r) |
EQ ⇒ combine l r)"
subsection "Functional Correctness Proofs"
text ‹A WB tree must be of a certain structure if balanced1 and single are False.›
lemma not_Leaf_if_not_balanced1:
assumes "¬ balanced1 l r"
shows "r ≠ Leaf"
proof
assume "r = Leaf" with assms Delta_gr1 show False by simp
qed
lemma not_Leaf_if_not_single:
assumes "¬ single l r"
shows "l ≠ Leaf"
proof
assume "l = Leaf" with assms Gamma_gr1 show False by simp
qed
subsubsection "Inorder Properties"
lemma inorder_rot2:
"B ≠ Leaf ⟹ inorder(rot2 A a B b C) = inorder A @ a # inorder B @ b # inorder C"
by (cases "(A,a,B,b,C)" rule: rot2.cases) (auto)
lemma inorder_rotateL:
"r ≠ Leaf ⟹ inorder(rotateL l a r) = inorder l @ a # inorder r"
by (induction l a r rule: rotateL.induct) (auto simp add: inorder_rot2 not_Leaf_if_not_single)
lemma inorder_rotateR:
"l ≠ Leaf ⟹ inorder(rotateR l a r) = inorder l @ a # inorder r"
by (induction l a r rule: rotateR.induct) (auto simp add: inorder_rot2 not_Leaf_if_not_single)
lemma inorder_insert:
"sorted(inorder t) ⟹ inorder(insert x t) = ins_list x (inorder t)"
by (induction t)
(auto simp: ins_list_simps inorder_rotateL inorder_rotateR not_Leaf_if_not_balanced1)
lemma split_minD:
"split_min t = (x,t') ⟹ t ≠ Leaf ⟹ x # inorder t' = inorder t"
by (induction t arbitrary: t' rule: split_min.induct)
(auto simp: sorted_lems inorder_rotateL not_Leaf_if_not_balanced1
split: prod.splits if_splits)
lemma del_maxD:
"del_max t = (x,t') ⟹ t ≠ Leaf ⟹ inorder t' @ [x] = inorder t"
by (induction t arbitrary: t' rule: del_max.induct)
(auto simp: sorted_lems inorder_rotateR not_Leaf_if_not_balanced1
split: prod.splits if_splits)
lemma inorder_combine:
"inorder(combine l r) = inorder l @ inorder r"
by(induction l r rule: combine.induct)
(auto simp: del_maxD split_minD inorder_rotateL inorder_rotateR not_Leaf_if_not_balanced1
simp del: rotateL.simps rotateR.simps split: prod.splits)
lemma inorder_delete:
"sorted(inorder t) ⟹ inorder(delete x t) = del_list x (inorder t)"
by(induction t)
(auto simp: del_list_simps inorder_combine inorder_rotateL inorder_rotateR
not_Leaf_if_not_balanced1 simp del: rotateL.simps rotateR.simps)
subsection "Size Lemmas"
subsubsection "Insertion"
lemma size_rot2L[simp]:
"B ≠ Leaf ⟹ size(rot2 A a B b C) = size A + size B + size C + 2"
by(induction A a B b C rule: rot2.induct) auto
lemma size_rotateR[simp]:
"l ≠ Leaf ⟹ size(rotateR l a r) = size l + size r + 1"
by(induction l a r rule: rotateR.induct)
(auto simp: not_Leaf_if_not_single simp del: rot2.simps)
lemma size_rotateL[simp]:
"r ≠ Leaf ⟹ size(rotateL l a r) = size l + size r + 1"
by(induction l a r rule: rotateL.induct)
(auto simp: not_Leaf_if_not_single simp del: rot2.simps)
lemma size_length: "size t = length (inorder t)"
by (induction t rule: inorder.induct) auto
lemma size_insert: "size (insert x t) = (if isin t x then size t else Suc (size t))"
by (induction t rule: tree2_induct) (auto simp: not_Leaf_if_not_balanced1)
subsubsection "Deletion"
lemma size_delete_if_isin: "isin t x ⟹ size t = Suc (size(delete x t))"
proof (induction t rule: tree2_induct)
case (Node _ a _ _)
thus ?case
proof (cases "cmp x a")
case LT thus ?thesis using Node.prems by (simp add: Node.IH(1) not_Leaf_if_not_balanced1)
next
case EQ thus ?thesis by simp (metis size_length inorder_combine length_append)
next
case GT thus ?thesis using Node.prems by (simp add: Node.IH(2) not_Leaf_if_not_balanced1)
qed
qed (auto)
lemma delete_id_if_wbt_notin: "wbt t ⟹ ¬ isin t x ⟹ delete x t = t"
by (induction t) auto
lemma size_split_min: "t ≠ Leaf ⟹ size t = Suc (size (snd (split_min t)))"
by(induction t) (auto simp: not_Leaf_if_not_balanced1 split: if_splits prod.splits)
lemma size_del_max: "t ≠ Leaf ⟹ size t = Suc(size(snd(del_max t)))"
by(induction t) (auto simp: not_Leaf_if_not_balanced1 split: if_splits prod.splits)
subsection "Auxiliary Definitions"
fun balanced1_arith :: "nat ⇒ nat ⇒ bool" where
"balanced1_arith a b = (Δ1 * (a + 1) ≥ Δ2 * (b + 1))"
fun balanced2_arith :: "nat ⇒ nat ⇒ bool" where
"balanced2_arith a b = (balanced1_arith a b ∧ balanced1_arith b a)"
fun singly_balanced_arith :: "nat ⇒ nat ⇒ nat ⇒ bool" where
"singly_balanced_arith x y w = (balanced2_arith x y ∧ balanced2_arith (x+y+1) w)"
fun doubly_balanced_arith :: "nat ⇒ nat ⇒ nat ⇒ nat ⇒ bool" where
"doubly_balanced_arith x y z w =
(balanced2_arith x y ∧ balanced2_arith z w ∧ balanced2_arith (x+y+1) (z+w+1))"
end
subsection "Preservation of WB tree Invariant for Concrete Parameters"
text ‹A number of sample interpretations with valid parameters:›