Theory SA_Count
theory SA_Count
imports Rank_Select
"../util/SA_Util"
begin
section "Counting Properties on Suffix Arays"
context Suffix_Array_General begin
subsection "Counting Properties"
lemma sa_card_index:
assumes "i < length s"
shows "i = card {j. j < length s ∧ suffix s (sa s ! j) < suffix s (sa s ! i)}"
(is "i = card ?A")
proof -
let ?P = "λj. j < length s ∧ suffix s (sa s ! j) < suffix s (sa s ! i)"
have P: "∀j < i. ?P j"
proof (safe)
fix j
assume "j < i"
with assms
show "j < length s"
by simp
next
fix j
assume "j < i"
with sorted_wrt_nth_less[OF sa_g_sorted[of s] `j < i`] assms
show "suffix s (sa s ! j) < suffix s (sa s ! i)"
using assms sa_length by auto
qed
have "?A = {j. j < i}"
proof (safe)
fix x
assume "x < i"
then show "x < length s"
using assms by simp
next
fix x
assume "x < i"
then show "suffix s (sa s ! x) < suffix s (sa s ! i)"
using P by auto
next
fix x
assume Q: "x < length s" "suffix s (sa s ! x) < suffix s (sa s ! i)"
hence "x ≠ i"
by blast
with sorted_nth_less_mono[OF strict_sorted_imp_sorted[OF sa_g_sorted],
simplified length_map sa_length,
OF Q(1) assms]
Q assms
show "x < i"
by (simp add: sa_length)
qed
then show ?thesis
using card_Collect_less_nat by presburger
qed
corollary sa_card_s_index:
assumes "i < length s"
shows "i = card {j. j < length s ∧ suffix s j < suffix s (sa s ! i)}"
(is "i = card ?A")
proof -
let ?i = "sa s ! i"
let ?v = "s ! ?i"
let ?B = "{j. j < length s ∧ suffix s (sa s ! j) < suffix s ?i}"
from sa_card_index[OF assms]
have "i = card ?B" .
moreover
have "bij_betw (λx. sa s ! x) ?B ?A"
proof (intro bij_betwI'; safe)
fix x y
assume "x < length s" "y < length s" "sa s ! x = sa s ! y"
then show "x = y"
by (simp add: nth_eq_iff_index_eq sa_distinct sa_length)
next
fix x
assume "x < length s"
then show "sa s ! x < length s"
using sa_nth_ex by fastforce
next
fix x
assume "x < length s" "suffix s x < suffix s ?i"
then show "∃y ∈ ?B. x = sa s ! y"
using ex_sa_nth by blast
qed
hence "card ?B = card ?A"
using bij_betw_same_card by blast
ultimately show ?thesis
by simp
qed
lemma sa_card_s_idx:
assumes "i < length s"
shows "i = card {j. j < length s ∧ s ! j < s ! (sa s ! i)} +
card {j. j < length s ∧ s ! j = s ! (sa s ! i) ∧ suffix s j < suffix s (sa s ! i)}"
proof -
let ?i = "sa s ! i"
let ?v = "s ! ?i"
let ?A = "{j. j < length s ∧ s ! j < ?v}"
let ?B = "{j. j < length s ∧ s ! j = ?v ∧ suffix s j < suffix s ?i}"
let ?C = "{j. j < length s ∧ suffix s j < suffix s ?i}"
from sa_card_s_index[OF assms]
have "i = card ?C"
by simp
moreover
have "?A ∩ ?B = {}"
by fastforce
moreover
have "?C = ?A ∪ ?B"
proof (safe)
fix x
assume "x < length s" "suffix s x < suffix s ?i" "¬s ! x < s ! ?i"
then show "s ! x = s ! ?i"
by (metis Cons_less_Cons sa_nth_ex assms suffix_cons_Suc)
next
fix x
assume "x < length s" "s ! x < s ! ?i"
then show "suffix s x < suffix s ?i"
by (metis Cons_less_Cons sa_nth_ex assms suffix_cons_Suc)
qed
ultimately show ?thesis
by (simp add: card_Un_disjoint)
qed
lemma sa_card_index_lower_bound:
assumes "i < length s"
shows "card {j. j < length s ∧ s ! (sa s ! j) < s ! (sa s ! i)} ≤ i"
(is "card ?A ≤ i")
proof -
let ?B = "{j. j < length s ∧ suffix s (sa s ! j) < suffix s (sa s ! i)}"
have "?A ⊆ ?B"
proof safe
fix x
assume "x < length s" "s ! (sa s ! x) < s ! (sa s ! i)"
then show "suffix s (sa s ! x) < suffix s (sa s ! i)"
by (metis Cons_less_Cons Cons_nth_drop_Suc assms sa_nth_ex)
qed
hence "card ?A ≤ card ?B"
by (simp add: card_mono)
then show ?thesis
using sa_card_index[OF assms] by simp
qed
lemma sa_card_rank_idx:
assumes "i < length s"
shows "i = card {j. j < length s ∧ s ! (sa s ! j) < s ! (sa s ! i)}
+ rank (sort s) (s ! (sa s ! i)) i"
proof -
from sorted_card_rank_idx[of "sort s" i]
have "i = card {j. j < length (sort s) ∧ sort s ! j < sort s ! i} + rank (sort s) (sort s ! i) i"
using assms by fastforce
moreover
have "sort s ! i = s ! (sa s ! i)"
using assms sort_sa_nth by auto
moreover
have "length (sort s) = length s"
by simp
ultimately show ?thesis
using sort_sa_nth[of _s]
by (metis (no_types, lifting) Collect_cong)
qed
corollary sa_card_rank_s_idx:
assumes "i < length s"
shows "i = card {j. j < length s ∧ s ! j < s ! (sa s ! i)}
+ rank (sort s) (s ! (sa s ! i)) i"
proof -
let ?A = "{j. j < length s ∧ s ! j < s ! (sa s ! i)}"
and ?B = "{j. j < length s ∧ s ! (sa s ! j) < s ! (sa s ! i)}"
from sa_card_rank_idx[OF assms]
have "i = card {j. j < length s ∧ s ! (sa s ! j) < s ! (sa s ! i)} +
rank (sort s) (s ! (sa s ! i)) i" .
moreover
have "bij_betw (λx. sa s ! x)
{j. j < length s ∧ s ! (sa s ! j) < s ! (sa s ! i)}
{j. j < length s ∧ s ! j < s ! (sa s ! i)}"
proof (rule bij_betwI'; safe)
fix x y
assume "x < length s" "y < length s" "sa s ! x = sa s ! y"
then show "x = y"
by (simp add: nth_eq_iff_index_eq sa_distinct sa_length)
next
fix x
assume "x < length s"
then show "sa s ! x < length s"
using sa_nth_ex by auto
next
fix x
assume "x < length s" "s ! x < s ! (sa s ! i)"
then show "∃xa ∈ {j. j < length s ∧ s ! (sa s ! j) < s ! (sa s ! i)}. x = sa s ! xa"
using ex_sa_nth by blast
qed
hence "card ?B = card ?A"
using bij_betw_same_card by blast
ultimately show ?thesis
by simp
qed
lemma sa_rank_nth:
assumes "i < length s"
shows "rank (sort s) (s ! (sa s ! i)) i =
card {j. j < length s ∧ s ! j = s ! (sa s ! i) ∧
suffix s j < suffix s (sa s ! i)}"
proof -
let ?i = "sa s ! i"
let ?v = "s ! ?i"
let ?A = "{j. j < length s ∧ s ! j < ?v}"
let ?B = "{j. j < length s ∧ s ! j = ?v ∧ suffix s j < suffix s ?i}"
from sa_card_rank_s_idx[OF assms]
have "i = card ?A + rank (sort s) ?v i" .
moreover
from sa_card_s_idx[OF assms]
have "i = card ?A + card ?B" .
ultimately show ?thesis
by linarith
qed
lemma sa_suffix_nth:
assumes "card {k. k < length s ∧ s ! k < c } + i < length s"
and "i < count_list s c"
shows "∃as. suffix s (sa s ! (card {k. k < length s ∧ s ! k < c} + i)) = c # as"
proof -
let ?A = "{k. k < length s ∧ s ! k < c}"
let ?i = "card ?A"
let ?A' = "{k. k < length (sort s) ∧ (sort s) ! k < c}"
have "∃as. suffix s (sa s ! (?i + i)) = (s ! (sa s ! (?i + i))) # as"
using assms sa_nth_ex suffix_cons_ex by blast
moreover
have "s ! (sa s ! (?i + i)) = sort s ! (?i + i)"
using assms(1) sort_sa_nth by presburger
moreover
{
have "i < count_list (sort s) c"
by (metis assms(2) count_list_perm sort_perm)
moreover
have "card ?A = card ?A'"
proof -
have "∃f. bij_betw f {n. n < length s ∧ s ! n < c} {n. n < length (sort s) ∧ sort s ! n < c}"
using bij_betw_sort_idx_ex by blast
then show ?thesis
using bij_betw_same_card by blast
qed
ultimately have "sort s ! (?i + i) = c"
using sorted_nth_gen[of "sort s" c i] assms(1) by auto
}
ultimately show ?thesis
by force
qed
subsection "Ordering Properties"
lemma sa_suffix_order_le:
assumes "card {k. k < length s ∧ s ! k < c } < length s"
shows "[c] ≤ suffix s (sa s ! (card {k. k < length s ∧ s ! k < c}))"
proof -
let ?A = "{k. k < length s ∧ s ! k < c}"
let ?A' = "{k. k < length (sort s) ∧ (sort s) ! k < c}"
let ?i = "card ?A"
let ?i' = "card ?A'"
have "∃as. suffix s (sa s ! ?i) = (s ! (sa s ! ?i)) # as"
using assms sa_nth_ex suffix_cons_ex by blast
then obtain as where
"suffix s (sa s ! ?i) = (s ! (sa s ! ?i)) # as"
by blast
moreover
from sort_sa_nth[of ?i s]
have "sort s ! ?i = s ! (sa s ! ?i)"
using assms by blast
moreover
have "?i = ?i'"
proof -
have "∃f. bij_betw f {n. n < length s ∧ s ! n < c} {n. n < length (sort s) ∧ sort s ! n < c}"
using bij_betw_sort_idx_ex by blast
then show ?thesis
using bij_betw_same_card by blast
qed
hence "c ≤ sort s ! ?i"
using sorted_nth_le[of "sort s" c] assms by auto
ultimately show ?thesis
by fastforce
qed
lemma sa_suffix_order_le_gen:
assumes "card {k. k < length s ∧ s ! k < c } + i < length s"
shows "[c] ≤ suffix s (sa s ! (card {k. k < length s ∧ s ! k < c} + i))"
proof (cases i)
case 0
then show ?thesis
using assms sa_suffix_order_le by auto
next
let ?x = "card {k. k < length s ∧ s ! k < c }"
case (Suc m)
with sorted_wrt_mapD[OF sa_g_sorted, of ?x "?x + i" s]
have "suffix s (sa s ! ?x) < suffix s (sa s ! (?x + i))"
using assms sa_length by auto
moreover
have "[c] ≤ suffix s (sa s ! ?x)"
using add_lessD1 assms sa_suffix_order_le by blast
ultimately show ?thesis
by order
qed
lemma sa_suffix_nth_less:
assumes "i < card {k. k < length s ∧ s ! k < c}"
shows "∀as. suffix s (sa s ! i) < c # as"
proof -
have "i < length s"
using assms card_less_idx_upper dual_order.strict_trans1 by blast
hence "∃as. suffix s (sa s ! i) = s ! (sa s ! i) # as"
using sa_nth_ex suffix_cons_Suc by blast
moreover
have "i < card {k. k < length (sort s) ∧ (sort s) ! k < c}"
using bij_betw_sort_idx_ex[of "sort s" s c] assms bij_betw_same_card by force
with sorted_nth_less_gen[of "sort s" i c]
have "s ! (sa s ! i) < c"
using sorted_nth_less_gen[of "sort s" i c] ‹i < length s› sort_sa_nth by force
ultimately show ?thesis
by fastforce
qed
lemma sa_suffix_nth_gr:
assumes "card {k. k < length s ∧ s ! k < c} + i < length s"
and "count_list s c ≤ i"
shows "∀as. c # as < suffix s (sa s ! (card {k. k < length s ∧ s ! k < c} + i))"
proof -
let ?x = "card {k. k < length s ∧ s ! k < c}"
let ?i = "?x + i"
let ?y = "card {k. k < length (sort s) ∧ sort s ! k < c}"
have "∃as. suffix s (sa s ! ?i) = s ! (sa s ! ?i) # as"
using assms(1) sa_nth_ex suffix_cons_Suc by blast
moreover
{
have "?y = ?x"
using bij_betw_sort_idx_ex[of "sort s" s c] bij_betw_same_card by force
moreover
have "?y + i < length (sort s)"
using assms(1) calculation(1) by auto
moreover
have "count_list (sort s) c ≤ i"
by (metis assms(2) count_list_perm mset_sort)
ultimately have "s ! (sa s ! ?i) > c"
using sorted_nth_gr_gen[of "sort s" c i] sort_sa_nth by fastforce
}
ultimately show ?thesis
by fastforce
qed
end
end