Theory Cayley_Hamilton
theory Cayley_Hamilton
imports
Square_Matrix
"HOL-Computational_Algebra.Polynomial"
begin
definition C :: "'a ⇒ 'a::ring_1 poly" where "C c = [:c:]"
abbreviation CC ("❙C") where "❙C ≡ map_sq_matrix C"
lemma degree_C[simp]: "degree (C a) = 0"
by (simp add: C_def)
lemma coeff_C_0[simp]: "coeff (C x) 0 = x"
by (simp add: C_def)
lemma coeff_C_gt0[simp]: "0 < n ⟹ coeff (C x) n = 0"
by (cases n) (simp_all add: C_def)
lemma coeff_C_eq: "coeff (C x) n = (if n = 0 then x else 0)"
by simp
lemma coeff_mult_C[simp]: "coeff (a * C x) n = coeff a n * x"
by (simp add: coeff_mult coeff_C_eq if_distrib[where f="λx. a * x" for a] sum.If_cases)
lemma coeff_C_mult[simp]: "coeff (C x * a) n = x * coeff a n"
by (simp add: coeff_mult coeff_C_eq if_distrib[where f="λx. x * a" for a] sum.If_cases)
lemma C_0[simp]: "C 0 = 0"
by (simp add: C_def)
lemma C_1[simp]: "C 1 = 1"
by (simp add: C_def)
lemma C_linear:
shows C_mult: "C (a * b) = C b * C a"
and C_add: "C (a + b) = C a + C b"
and C_minus: "C (- a) = - C a"
and C_diff: "C (a - b) = C a - C b"
by (simp_all add: C_def)
definition X :: "'a::ring_1 poly" where "X = [:0, 1:]"
abbreviation XX ("❙X") where "❙X ≡ diag X"
lemma degree_X[simp]: "degree X = 1"
by (simp add: X_def)
lemma coeff_X_Suc_0[simp]: "coeff X (Suc 0) = 1"
by (auto simp: X_def)
lemma coeff_X_mult[simp]: "coeff (X * p) (Suc i) = coeff p i"
by (auto simp: X_def)
lemma coeff_mult_X[simp]: "coeff (p * X) (Suc i) = coeff p i"
by (auto simp: X_def)
lemma coeff_X_mult_0[simp]: "coeff (X * p) 0 = 0"
by (auto simp: X_def)
lemma coeff_mult_X_0[simp]: "coeff (p * X) 0 = 0"
by (auto simp: X_def)
lemma coeff_X: "coeff X i = (if i = 1 then 1 else 0)"
by (cases i) (auto simp: X_def gr0_conv_Suc)
lemma coeff_pow_X: "coeff (X ^ i) n = (if i = n then 1 else 0)"
proof (induction i arbitrary: n)
case (Suc i) then show ?case
by (cases n) simp_all
qed auto
lemma coeff_pow_X_eq[simp]: "coeff (X^i) i = 1"
by (simp add: coeff_pow_X)
lemma (in monoid_mult) power_ac: "a * (a^n * x) = a^n * (a * x)"
by (metis power_Suc2 power_Suc mult.assoc)
text‹This theory contains auxiliary lemmas on polynomials.›
lemma degree_prod_le: "degree (∏i∈S. f i) ≤ (∑i∈S. degree (f i))"
by (induction S rule: infinite_finite_induct)
(simp_all, metis (lifting) degree_mult_le dual_order.trans nat_add_left_cancel_le)
lemma coeff_mult_sum:
"degree p ≤ m ⟹ degree q ≤ n ⟹ coeff (p * q) (m + n) = coeff p m * coeff q n"
using degree_mult_le[of p q] by (auto simp add: le_less coeff_eq_0 coeff_mult_degree_sum)
lemma coeff_mult_prod_sum:
"coeff (∏i∈S. f i) (∑i∈S. degree (f i)) = (∏i∈S. coeff (f i) (degree (f i)))"
by (induct rule: infinite_finite_induct)(simp_all add: coeff_mult_sum degree_prod_le)
lemma degree_sum_less:
"0 < n ⟹ (⋀x. x ∈ A ⟹ degree (f x) < n) ⟹ degree (∑x∈A. f x) < n"
by (induct rule: infinite_finite_induct) (simp_all add: degree_add_less)
lemma degree_sum_le:
shows "(⋀x. x ∈ A ⟹ degree (f x) ≤ n) ⟹ degree (∑x∈A. f x) ≤ n"
by (induct rule: infinite_finite_induct) (auto intro!: degree_add_le)
lemma degree_sum_le_Max:
"finite F ⟹ degree (sum f F) ≤ Max ((λx. degree (f x))`F)"
by (intro degree_sum_le) (auto intro!: Max.coboundedI)
lemma poly_as_sum_of_monoms': assumes n: "degree p ≤ n" shows "(∑i≤n. X^i * C (coeff p i)) = p"
proof -
have eq: "⋀i. {..n} ∩ {i} = (if i ≤ n then {i} else {})"
by auto
show ?thesis
using n
by (simp add: poly_eq_iff coeff_sum coeff_eq_0 sum.If_cases eq coeff_pow_X
if_distrib[where f="λx. x * a" for a])
qed
lemma poly_as_sum_of_monoms: "(∑i≤degree p. X^i * C (coeff p i)) = p"
by (intro poly_as_sum_of_monoms' order_refl)
lemma degree_sum_unique':
assumes I: "finite I" "i ∉ I" "⋀j. j ∈ I ⟹ degree (p j) < degree (p i)"
shows "degree (∑i∈insert i I. p i) = degree (p i)"
using I
proof (induction I)
case (insert j I) then show ?case
by (subst insert_commute) (auto simp: degree_add_eq_right)
qed simp
lemma degree_sum_unique:
"finite I ⟹ i ∈ I ⟹ (⋀j. j ∈ I ⟹ j ≠ i ⟹ degree (p j) < degree (p i)) ⟹
degree (∑i∈I. p i) = degree (p i)"
using degree_sum_unique'[of "I - {i}" i p] by (auto simp: insert_absorb)
lemma coeff_sum_unique:
fixes p :: "'a ⇒ 'b::semiring_0 poly"
assumes I: "finite I" "i ∈ I" "⋀j. j ∈ I ⟹ j ≠ i ⟹ degree (p j) < degree (p i)"
shows "coeff (∑i∈I. p i) (degree (p i)) = coeff (p i) (degree (p i))"
proof -
have "(∑j∈I. coeff (p j) (degree (p i))) = (∑i∈{i}. coeff (p i) (degree (p i)))"
using I by (intro sum.mono_neutral_cong_right) (auto intro!: coeff_eq_0)
then show ?thesis
by (simp add: coeff_sum)
qed
lemma diag_coeff: "diag (coeff x i) = map_sq_matrix (λx. coeff x i) (diag x)"
by transfer' (simp add: vec_eq_iff)
lemma smult_one: "x *⇩S 1 = diag x"
by transfer (simp add: fun_eq_iff)
lemma sum_telescope_Ico: "a ≤ b ⟹ (∑i=a ..< b. f i - f (Suc i) ::_::ab_group_add) = f a - f b"
by (induction b rule: dec_induct) auto
lemmas map_sq_matrix = map_sq_matrix_diff map_sq_matrix_add map_sq_matrix_smult map_sq_matrix_sum
lemma sign_permut: "degree (of_int (sign p) * q) = degree q"
by (simp add: sign_def)
lemma degree_det:
assumes "⋀j. j permutes UNIV ⟹ j ≠ id ⟹ degree (∏i∈UNIV. to_fun A i (j i)) < degree (∏i∈UNIV. to_fun A i i)"
shows "degree (det A) = degree (∏i∈UNIV. to_fun A i i)"
unfolding det_eq
by (subst degree_sum_unique[where i=id])
(simp_all add: sign_permut permutes_id assms)
definition max_degree :: "'a::zero poly^^'n ⇒ nat" where
"max_degree A = Max (range (λ(i, j). degree (to_fun A i j)))"
lemma degree_le_max_degree: "degree (to_fun A i j) ≤ max_degree A"
unfolding max_degree_def by (auto simp add: Max_ge_iff)
definition "charpoly A = det (❙X - ❙C A)"
lemma degree_diff_cancel: "degree q < degree p ⟹ degree (p - q::_::ab_group_add poly) = degree p"
by (metis add_uminus_conv_diff degree_add_eq_left degree_minus)
lemma
fixes A :: "'a::comm_ring_1^^'n"
shows degree_charpoly: "degree (charpoly A) = CARD('n)"
and coeff_charpoly: "coeff (charpoly A) (degree (charpoly A)) = 1"
proof -
let ?B = "diag X - map_sq_matrix C A"
let ?f = "λp. ∏i∈UNIV. to_fun ?B i (p i)"
let ?g = "λp. of_int (sign p) * ?f p"
have dB: "⋀i j. degree (to_fun ?B i j) = (if i = j then 1 else 0)"
by transfer' (simp add: degree_diff_cancel)
have cB: "⋀i j. coeff (to_fun ?B i j) (Suc 0) = (if i = j then 1 else 0)"
by transfer' simp
have degree_f_id: "degree (?f (λi. i)) = CARD('n)"
using coeff_mult_prod_sum[of "λi. to_fun ?B i i" UNIV]
by (intro antisym degree_prod_le[THEN order_trans] le_degree)
(simp_all add: dB cB)
have degree_less: "⋀p. p ≠ id ⟹ degree (?f p) < degree (?f (λi. i))"
unfolding degree_f_id
by (rule le_less_trans[OF degree_prod_le])
(auto simp add: dB sum.If_cases set_eq_iff intro!: psubset_card_mono)
have degree_charpoly: "degree (charpoly A) = degree (?f (λi. i))"
using degree_less unfolding charpoly_def by (rule degree_det)
show degree_eq: "degree (charpoly A) = CARD('n)"
using degree_charpoly degree_f_id by simp
have "coeff (∑p | p permutes UNIV. ?g p) (degree (?g id)) = 1"
proof (subst coeff_sum_unique)
show "coeff (?g id) (degree (?g id)) = 1"
using coeff_mult_prod_sum[of "λi. to_fun ?B i i" UNIV]
by (simp add: dB cB sign_id degree_f_id)
qed (auto simp: degree_less sign_permut permutes_id)
then show "coeff (charpoly A) (degree (charpoly A)) = 1"
unfolding degree_charpoly by (simp add: sign_permut charpoly_def det_eq)
qed
definition "max_perm_degree A = Max ((λp. ∑i∈UNIV. degree (to_fun A i (p i)))`{p. p permutes UNIV})"
lemma max_perm_degree_eqI:
"(⋀p. p permutes (UNIV::'a::finite set) ⟹ (∑i∈UNIV. degree (to_fun A i (p i))) ≤ x) ⟹
(∃p. p permutes UNIV ∧ (∑i∈UNIV. degree (to_fun A i (p i))) = x) ⟹
max_perm_degree A = x"
by (auto intro!: Max_eqI simp: max_perm_degree_def)
lemma degree_prod_le_max_perm_degree:
"j permutes (UNIV::'a::finite set) ⟹ degree (∏i∈UNIV. to_fun A i (j i)) ≤ max_perm_degree A"
unfolding max_perm_degree_def by (rule order_trans[OF degree_prod_le]) auto
lemma degree_le_max_perm_degree: "degree (det A) ≤ max_perm_degree A"
unfolding det_eq
by (rule order_trans[OF degree_sum_le_Max])
(auto intro!: degree_prod_le_max_perm_degree Max_le_iff[THEN iffD2] permutes_id simp: sign_permut)
lemma max_degree_adjugate:
fixes A :: "_^^'n"
shows "max_degree (adjugate (❙X - ❙C A)) = CARD('n) - 1"
(is "?R = _")
proof -
let ?M = "minor (❙X - ❙C A)"
let ?D = "λi j k l. degree (to_fun (?M i j) k l)"
have M: "⋀i j k l. to_fun (?M i j) k l = (if k = i ∧ l = j then 1
else if k = i ∨ l = j then 0
else if k = l then [: - to_fun A k l, 1 :] else [: - to_fun A k l :])"
by transfer' (simp add: vec_eq_iff C_def X_def)
have "?R = Max (range (λ(i, j). degree (det (?M j i))))" (is "_ = Max ?Max")
unfolding max_degree_def by (simp add: transpose.rep_eq cofactor_def adjugate_def of_fun_inverse)
also have "… = CARD('n) - 1"
proof (rule antisym)
show "Max ?Max ≤ CARD('n) - 1"
proof (safe intro!: Max.boundedI)
fix i j
have "max_perm_degree (?M j i) = card (UNIV - {i, j})"
by (intro max_perm_degree_eqI)
(auto simp: M sum.If_cases if_distrib[of degree] simp del: card_Diff_insert
intro!: card_mono permutes_id arg_cong[where f=card])
then show "degree (det (?M j i)) ≤ CARD('n) - 1"
using degree_le_max_perm_degree[of "?M j i"] by (cases "i = j") auto
qed auto
next
obtain x :: 'n where True by auto
let ?P = "λj k p. ∏i∈UNIV. to_fun (?M j k) i (p i)"
have "degree (det (?M x x)) = CARD('n) - 1"
proof (subst degree_det)
have "CARD('n) - 1 = (∑i∈UNIV. ?D x x i i)"
by (simp add: M if_distrib[where f="degree"] sum.If_cases Collect_neg_eq Compl_eq_Diff_UNIV)
also have "… = degree (?P x x id)"
by (auto intro!: antisym degree_prod_le le_degree simp add: coeff_mult_prod_sum)
(simp add: M if_distrib[where f="λx. coeff x b" for b] prod.If_cases)
finally show *: "degree (?P x x (λx. x)) = CARD('n) - 1"
by simp
fix p :: "'n ⇒ 'n" assume "p permutes UNIV" "p ≠ id"
then obtain i j where ij: "i ≠ j" "p i = j" and p: "i ≠ p i" "j ≠ p j"
unfolding id_def by simp (metis permutes_univ)
then have "card {i,j} ≤ CARD('n)"
by (intro card_mono) auto
have "degree (?P x x p) ≤ card (UNIV - {i, j})"
using degree_prod_le
by (rule order_trans)
(auto simp: M if_distrib[where f="degree"] sum.If_cases Collect_neg_eq Compl_eq_Diff_UNIV p intro!: card_mono)
also have "… < CARD('n) - 1"
using ‹card {i, j} ≤ CARD('n)› ij by auto
finally show "degree (?P x x p) < degree (?P x x (λx. x))"
using * by simp
qed
then show "CARD('n) - 1 ≤ Max ?Max"
by (auto simp add: Max_ge_iff intro!: exI[of _ x])
qed
finally show ?thesis .
qed
definition poly_mat :: "'a::ring_1 poly ⇒ 'a^^'n ⇒ 'a^^'n" where
"poly_mat p A = (∑i≤degree p. coeff p i *⇩S A^i)"
lemma zero_smult[simp]: "0 *⇩S M = (0::'a::semiring_1^^'n)"
by transfer (simp add: vec_eq_iff)
lemma smult_smult: "a *⇩S b *⇩S M = (a * b::'a::monoid_mult) *⇩S M"
by transfer (simp add: mult_ac)
lemma map_sq_matrix_mult_eq_smult[simp]: "map_sq_matrix ((*) a) M = a *⇩S M"
by transfer rule
lemma coeff_smult_1: "coeff p i *⇩S m = m * map_sq_matrix (λp. coeff p i) (p *⇩S 1::_::comm_ring_1 ^^ 'n)"
by (simp add: smult_one mult_diag)
lemma map_sq_matrix_if_distrib[simp]:
"map_sq_matrix (λx. if P then f x else g x) = (if P then map_sq_matrix f else map_sq_matrix g)"
by simp
theorem Cayley_Hamilton:
fixes A :: "'a::comm_ring_1 ^^ 'n"
shows "poly_mat (charpoly A) A = 0"
proof -
text %visible ‹\hrulefill ~~ Part 1 ~~ \hrulefill›
define n where "n = CARD('n) - 1"
then have d_charpoly: "n + 1 = degree (charpoly A)" and
d_adj: "n = max_degree (adjugate (❙X - ❙C A))"
by %invisible (simp_all add: degree_charpoly n_def max_degree_adjugate monom_0 diag_1[symmetric])
define B where "B i = map_sq_matrix (λp. coeff p i) (adjugate (❙X - ❙C A))" for i
have A_eq_B: "adjugate (❙X - ❙C A) = (∑i≤n. X^i *⇩S ❙C (B i))"
by %invisible (simp add: map_sq_matrix_smult sum_map_sq_matrix B_def d_adj
degree_le_max_degree poly_as_sum_of_monoms' cong: map_sq_matrix_cong)
text %visible ‹\hrulefill ~~ Part 2 ~~ \hrulefill›
have "charpoly A *⇩S 1 = X *⇩S adjugate (❙X - ❙C A) - ❙C A * adjugate (❙X - ❙C A)"
by %invisible (simp add: smult_one charpoly_def mult_adjugate_det[symmetric] field_simps diag_mult)
also have "… = (∑i≤n. X^(i + 1) *⇩S ❙C (B i)) - (∑i≤n. X^i *⇩S ❙C (A * B i))"
unfolding %invisible A_eq_B by %invisible (simp add: sum_distrib_left smult_mult2[symmetric]
map_sq_matrix_mult[symmetric] C_linear smult_sum[symmetric] smult_smult)
also have "(∑i≤n. X^(i + 1) *⇩S ❙C (B i)) =
(∑i<n. X^(i + 1) *⇩S ❙C (B i)) + X^(n + 1) *⇩S ❙C (B n)"
by %invisible (simp add: lessThan_Suc_atMost[symmetric])
also have "(∑i≤n. X^i *⇩S ❙C (A * B i)) =
(∑i<n. X^(i + 1) *⇩S ❙C (A * B (i + 1))) + ❙C (A * B 0)"
unfolding %invisible lessThan_Suc_atMost[symmetric] lessThan_Suc_eq_insert_0
by %invisible (simp add: zero_notin_Suc_image monom_0 sum.reindex one_poly_def[symmetric] diag_mult)
finally have diag_charpoly:
"charpoly A *⇩S 1 = X^(n + 1) *⇩S ❙C (B n) +
(∑i<n. X^(i + 1) *⇩S ❙C (B i - A * B (i + 1))) - ❙C (A * B 0)"
by %invisible (simp add: map_sq_matrix_diff C_linear sum_subtractf smult_diff)
text %visible ‹\hrulefill ~~ Part 3 ~~ \hrulefill›
let ?p = "λi. coeff (charpoly A) i *⇩S A^i"
let ?AB = "λi. A^(i + 1) * B i"
have "(∑i≤n+1. ?p i) = ?p 0 + (∑i<n. ?p (i + 1)) + ?p (n + 1)"
unfolding %invisible sum.atMost_Suc_shift Suc_eq_plus1[symmetric]
by %invisible (simp add: lessThan_Suc_atMost[symmetric])
also have "?p 0 = - ?AB 0"
by %invisible (simp add: coeff_smult_1 diag_charpoly map_sq_matrix)
also have "(∑i<n. ?p (i + 1)) = (∑i=0..<n. ?AB i - ?AB (i + 1))"
by %invisible (rule sum.cong)
(auto simp: coeff_smult_1 coeff_pow_X diag_charpoly map_sq_matrix sum_subtractf
if_distrib[where f="λx. x a" for a] if_distrib[where f="λx. a * x" for a]
field_simps sum.If_cases power_Suc2
simp del: power_Suc)
also have "… = ?AB 0 - ?AB n"
unfolding %invisible Suc_eq_plus1[symmetric]
by %invisible (subst sum_telescope_Ico) auto
also have "?AB n = ?p (n + 1)"
unfolding %invisible coeff_smult_1 diag_charpoly
by %invisible (simp add: mult_diag map_sq_matrix coeff_pow_X)
also have "coeff (charpoly A) (n + 1) = 1"
by %invisible (simp add: coeff_charpoly d_charpoly[simplified])
finally show ?thesis
by %invisible (simp add: poly_mat_def d_charpoly[simplified] diag_0_eq mult_diag)
qed
end