(* Author: Tobias Nipkow *) section "Weighted Path Length of BST" theory Weighted_Path_Length imports "HOL-Library.Tree" begin text ‹This theory presents two definitions of the \emph{weighted path length} of a BST, the objective function we want to minimize, and proves them equivalent. Function ‹Wpl› is the intuitive global definition that sums ‹a› over all leaves and ‹b› over all nodes, taking their depth (= number of comparisons to reach that point) into account. Function ‹wpl› is a recursive definition and thus suitable for the later dynamic programming approaches to building a BST with the minimal weighted path length. › lemma inorder_upto_split: assumes "inorder ⟨l,k,r⟩ = [i..j]" shows "inorder l = [i..k-1]" "inorder r = [k+1..j]" "i ≤ k" "k ≤ j" proof - have k: "k∈set[i..j]" using assms by (metis set_inorder tree.set_intros(2)) have "[i..k-1] @ k # [k+1..j] = [i..j]" using k upto_rec1 upto_split1 by (metis atLeastAtMost_iff set_upto) also have "… = inorder l @ k # inorder r" using assms by auto finally have "inorder l = [i..k-1] ∧ inorder r = [k+1..j]" (is "?A ∧ ?B") by(auto simp: append_Cons_eq_iff) thus ?A ?B by auto show "i ≤ k" "k ≤ j" using k by auto qed fun incr2 :: "int × nat ⇒ int × nat" where "incr2 (x,n) = (x, n + 1)" fun leaves :: "int ⇒ int tree ⇒ (int * nat) set" where "leaves i Leaf = {(i,0)}" | "leaves i (Node l k r) = incr2 ` (leaves i l ∪ leaves (k+1) r)" fun nodes :: "int tree ⇒ (int * nat) set" where "nodes Leaf = {}" | "nodes (Node l k r) = {(k,1)} ∪ incr2 ` (nodes l ∪ nodes r)" lemma finite_nodes: "finite (nodes t)" by(induction t) auto lemma finite_leaves: "finite (leaves i t)" by(induction i t rule: leaves.induct) auto lemma notin_nodes0: "(k, 0) ∉ nodes t" by(induction t) auto lemma sum_incr2: "sum f (incr2 ` A) = sum (λxy. f(fst xy,snd xy+1)) A" proof - have "sum f (incr2 ` A) = sum (f o incr2) A" by(subst sum.reindex)(auto simp: inj_on_def) also have "f o incr2 = (λxy. f(fst xy,snd xy+1))" by(auto simp: fun_eq_iff) finally show ?thesis by simp qed lemma fst_nodes: "fst ` nodes t = set_tree t" apply(induction t) apply simp apply (fastforce simp: image_def set_eq_iff ball_Un) done lemma fst_leaves: "⟦ inorder t = [i..j]; i ≤ j+1⟧ ⟹ fst ` leaves i t = {i..j+1}" proof(induction t arbitrary: i j) case Leaf then show ?case by auto next case (Node t1 k t2) note inorder = inorder_upto_split[OF Node.prems(1)] show ?case using Node.IH(1)[OF inorder(1)] Node.IH(2)[OF inorder(2)] inorder(3,4) Node.prems(2) by (fastforce simp: image_def set_eq_iff bex_Un) qed lemma sum_leaves: "⟦ inorder t = [i..j]; i ≤ j+1 ⟧ ⟹ (∑x∈leaves i t. (f(fst x) :: nat)) = sum f {i..j+1}" proof(induction t arbitrary: i j) case Leaf hence "i = j+1" by simp thus ?case by simp next case (Node l k r) note inorder = inorder_upto_split[OF Node.prems(1)] let ?Ll = "leaves i l" let ?Lr = "leaves (k+1) r" let ?L = "?Ll ∪ ?Lr" have "fst ` ?Ll ∩ fst ` ?Lr = {}" using inorder by(simp add: fst_leaves del: set_inorder add: set_inorder[symmetric]) hence l0: "?Ll ∩ ?Lr = {}" by auto have "{i..j+1} = {i..k} ∪ {k+1..j+1}" using inorder(3,4) by auto thus ?case using Node.IH(1)[OF inorder(1)] Node.IH(2)[OF inorder(2)] inorder(3,4) Node.prems(2) by(simp add: sum_incr2 sum_Un_nat finite_leaves l0) qed lemma sum_nodes: "inorder t = [i..j] ⟹ (∑xy∈nodes t. (f(fst xy) :: nat)) = sum f {i..j}" proof(induction t arbitrary: i j) case Leaf thus ?case by simp next case (Node l k r) note inorder = inorder_upto_split[OF Node.prems(1)] let ?Nl = "nodes l" let ?Nr = "nodes r" let ?N = "?Nl ∪ ?Nr" have "(fst ` ?Nl) ∩ (fst ` ?Nr) = {}" using inorder(1,2) by(simp add: fst_nodes del: set_inorder add: set_inorder[symmetric]) hence n0: "?Nl ∩ ?Nr = {}" by auto have "(∑xy∈nodes(Node l k r). (f (fst xy) :: nat)) = (∑xy∈insert (k, Suc 0) (incr2 ` (nodes l ∪ nodes r)). f(fst xy))" by(simp) also have "… = f k + (∑xy∈(incr2 ` (nodes l ∪ nodes r)). f(fst xy))" by(subst sum.insert, auto simp: finite_nodes notin_nodes0) also have "… = sum f {i..j}" proof - have "{i..j} = {i..k-1} ∪ {k} ∪ {k+1..j}" using inorder(3,4) by auto thus ?thesis using Node.IH(1)[OF inorder(1)] Node.IH(2)[OF inorder(2)] inorder(3,4) by(simp add: sum_incr2 sum_Un_nat finite_nodes n0) qed finally show ?case . qed locale wpl = fixes w :: "int ⇒ int ⇒ nat" begin fun wpl :: "int ⇒ int ⇒ int tree ⇒ nat" where "wpl i j Leaf = 0" | "wpl i j (Node l k r) = wpl i (k-1) l + wpl (k+1) j r + w i j" end locale Wpl = fixes a b :: "int ⇒ nat" begin definition Wpl :: "int ⇒ int tree ⇒ nat" where "Wpl i t = sum (λ(k,c). c * b k) (nodes t) + sum (λ(k,c). c * a k) (leaves i t)" definition w :: "int ⇒ int ⇒ nat" where "w i j = sum a {i..j+1} + sum b {i..j}" sublocale wpl where w = w . lemma "inorder t = [i..j] ⟹ wpl i j t = Wpl i t" proof(induction t arbitrary: i j) case Leaf thus ?case by(simp add: Wpl_def) next case (Node l k r) let ?b = "λ(k,c). c * b k" let ?a = "λ(k,c). c * a k" note inorder = inorder_upto_split[OF Node.prems] let ?Nl = "nodes l" let ?Nr = "nodes r" let ?N = "?Nl ∪ ?Nr" let ?Ll = "leaves i l" let ?Lr = "leaves (k+1) r" let ?L = "?Ll ∪ ?Lr" have "(fst ` ?Nl) ∩ (fst ` ?Nr) = {}" using inorder(1,2) by(simp add: fst_nodes del: set_inorder add: set_inorder[symmetric]) hence n0: "?Nl ∩ ?Nr = {}" by auto have "fst ` ?Ll ∩ fst ` ?Lr = {}" using inorder by(simp add: fst_leaves del: set_inorder add: set_inorder[symmetric]) hence l0: "?Ll ∩ ?Lr = {}" by auto have "wpl i j (Node l k r) = Wpl i l + Wpl (k + 1) r + w i j" using Node.IH inorder by(simp) also have "… = sum ?b (nodes l) + sum ?a (leaves i l) + sum ?b (nodes r) + sum ?a (leaves (k+1) r) + w i j" by (simp add: Wpl_def) also have "… = (sum ?b (nodes l) + sum ?b (nodes r)) + (sum ?a (leaves i l) + sum ?a (leaves (k+1) r)) + w i j" by(simp add: algebra_simps) also have "… = sum ?b ?N + sum ?a ?L + w i j" by(simp add: sum_Un_nat finite_nodes finite_leaves l0 n0) also have "… = sum ?b ?N + sum ?a ?L + sum a {i..j+1} + sum b {i..j}" by (simp add: w_def) also have "… = sum ?b ?N + sum b {i..j} + (sum ?a ?L + sum a {i..j+1})" by(simp add: algebra_simps) also have "sum ?a ?L + sum a {i..j+1} = sum ?a (incr2 ` ?L)" proof - have "{i..j+1} = {i..k} ∪ {k+1..j+1}" using inorder(3,4) by auto thus ?thesis using inorder(3,4) by (simp add: sum_incr2 split_def sum.distrib sum_Un_nat finite_leaves l0 sum_leaves[OF inorder(1)] sum_leaves[OF inorder(2)]) qed also have "sum ?b ?N + sum b {i..j} = sum ?b ?N + sum b ({i..k-1} ∪ {k+1..j}) + b k" proof - have "{i..j} = {k} ∪ {i..k-1} ∪ {k+1..j}" using inorder(3,4) by auto thus ?thesis by simp qed also have "sum ?b ?N + sum b ({i..k-1} ∪ {k+1..j}) = sum ?b (incr2 ` ?N)" by (simp add: sum_incr2 split_def sum.distrib sum_Un_nat finite_nodes n0 sum_nodes[OF inorder(1)] sum_nodes[OF inorder(2)]) also have "sum ?b (incr2 ` ?N) + b k = sum ?b ({(k,1)} ∪ incr2 ` ?N)" by(simp, subst sum.insert, auto simp add: finite_nodes notin_nodes0) also have "sum ?b ({(k,1)} ∪ incr2 ` ?N) + sum ?a (incr2 ` ?L) = Wpl i ⟨l,k,r⟩" by(simp add: Wpl_def) finally show ?case . qed end end