Theory KnuthMorrisPratt

section ‹Knuth-Morris-Pratt fast string search algorithm›

text ‹Development based on Filliâtre's verification using Why3›

text ‹Many thanks to Christian Zimmerer for versions of the algorithms
as while loops›

theory KnuthMorrisPratt imports "Collections.Diff_Array" "HOL-Library.While_Combinator"

begin

subsection ‹General definitions›

abbreviation "array ≡ new_array"
abbreviation length_array :: "'a array ⇒ nat" (‹∥_∥›) 
    where "length_array ≡ array_length"
notation array_get (infixl ‹!!› 100)
notation array_set (‹_[_ ::= _]› [1000,0,0] 900)

definition matches :: "'a array ⇒ nat ⇒ 'a array ⇒ nat ⇒ nat ⇒ bool"
  where "matches a i b j n = (i+n ≤ ∥a∥ ∧ j+n ≤ ∥b∥ 
                           ∧ (∀k<n. a!!(i+k) = b!!(j+k)))"

lemma matches_empty [simp]: "matches a i b j 0 ⟷ i ≤ ∥a∥ ∧ j ≤ ∥b∥"
  by (simp add: matches_def)

lemma matches_right_extension:
  "⟦matches a i b j n;
    Suc (i+n) ≤ ∥a∥;
    Suc (j+n) ≤ ∥b∥;
    a!!(i+n) = b!!(j+n)⟧ ⟹
    matches a i b j (Suc n)"
  by (auto simp: matches_def less_Suc_eq)

lemma matches_contradiction_at_first:
  "⟦0 < n; a!!i ≠ b!!j⟧ ⟹ ¬ matches a i b j n"
  by (auto simp: matches_def)

lemma matches_contradiction_at_i:
  "⟦a!!(i+k) ≠ b!!(j+k); k < n⟧ ⟹ ¬ matches a i b j n"
  by (auto simp: matches_def)

lemma matches_right_weakening:
  "⟦matches a i b j n; n' ≤ n⟧ ⟹ matches a i b j n'"
  by (auto simp: matches_def)

lemma matches_left_weakening_add:
  assumes "matches a i b j n" "k≤n"
  shows "matches a (i+k) b (j+k) (n-k)"
  using assms by (auto simp: matches_def less_diff_conv algebra_simps)

lemma matches_left_weakening:
  assumes "matches a (i - (n - n')) b (j - (n - n')) n"
      and "n' ≤ n"                                                   
      and "n - n' ≤ i"
      and "n - n' ≤ j"
    shows "matches a i b j n'"
  by (metis assms diff_diff_cancel diff_le_self le_add_diff_inverse2 matches_left_weakening_add)

lemma matches_sym: "matches a i b j n ⟹ matches b j a i n"
  by (simp add: matches_def)

lemma matches_trans:
  "⟦matches a i b j n; matches b j c k n⟧ ⟹ matches a i c k n"
  by (simp add: matches_def)

text ‹Denotes the maximal @{term "n < j"} such that the first @{term n} elements of @{term p}
      match the last @{term n} elements of @{term p}[0..@{term "j-1"}]
The first @{term n} characters of the pattern have a copy starting at @{term "j-n"}.›
definition is_next :: "'a array ⇒ nat ⇒ nat ⇒ bool" where
  "is_next p j n =
     (n < j ∧ matches p (j-n) p 0 n ∧ (∀m. n < m ∧ m < j ⟶ ¬ matches p (j-m) p 0 m))"

lemma next_iteration:
  assumes "matches a (i-j) p 0 j" "is_next p j n" "j ≤ i" 
  shows "matches a (i-n) p 0 n"
proof -
  have "matches a (i-n) p (j-n) n"
    using assms by (auto simp: algebra_simps is_next_def intro: matches_left_weakening [where n=j])
  moreover have "matches p (j-n) p 0 n"
    using is_next_def assms by blast
  ultimately show ?thesis
    using matches_trans by blast
qed

lemma next_is_maximal:
  assumes "matches a (i-j) p 0 j" "is_next p j n"
    and "j ≤ i" "n < m" "m < j"
  shows "¬ matches a (i-m) p 0 m"
proof -
  have "matches a (i-m) p (j-m) m"
    by (rule matches_left_weakening [where n=j]) (use assms in auto)
  with assms show ?thesis
    by (meson is_next_def matches_sym matches_trans)
qed

text ‹Filliâtre's version of the lemma above›
corollary next_is_maximal':
  assumes match: "matches a (i-j) p 0 j" "is_next p j n"
    and more: "j ≤ i" "i-j < k" "k < i-n"
  shows "¬ matches a k p 0 ∥p∥"
proof -
  have "¬ matches a k p 0 (i-k)"
    using next_is_maximal [OF match] more
    by (metis add.commute diff_diff_cancel diff_le_self le_trans less_diff_conv less_or_eq_imp_le)
  moreover have "i-k < ∥p∥"
    using assms by (auto simp: matches_def)
  ultimately show ?thesis
    using matches_right_weakening nless_le by blast
qed

lemma next_1_0 [simp]: "is_next p 1 0 ⟷ 1 ≤ ∥p∥"
  by (auto simp: is_next_def matches_def)

subsection ‹The Build-table routine›

definition buildtab_step :: "'a array ⇒ nat array ⇒ nat ⇒ nat ⇒ nat array × nat × nat" where
  "buildtab_step p nxt i j =
       (if p!!i = p!!j then (nxt[Suc i::=Suc j], Suc i, Suc j)
           else if j=0 then (nxt[Suc i::=0], Suc i, j)
                else (nxt, i, nxt!!j))"

text ‹The conjunction of the invariants given in the Why3 development›
definition buildtab_invariant :: "'a array ⇒ nat array ⇒ nat ⇒ nat ⇒ bool" where
  "buildtab_invariant p nxt i j = 
     (∥nxt∥ = ∥p∥ ∧ i ≤ ∥p∥ 
   ∧ j<i ∧ matches p (i-j) p 0 j
   ∧ (∀k. 0 < k ∧ k ≤ i ⟶ is_next p k (nxt!!k))
   ∧ (∀k. Suc j < k ∧ k < Suc i ⟶ ¬ matches p (Suc i - k) p 0 k))"

text ‹The invariant trivially holds upon initialisation›

lemma buildtab_invariant_init: "∥p∥ ≥ 2 ⟹ buildtab_invariant p (array 0 ∥p∥) 1 0"
  by (auto simp: buildtab_invariant_def is_next_def)

subsubsection ‹The invariant holds after an iteration›
text ‹each conjunct is proved separately›

lemma length_invariant:
  shows "let (nxt',i',j') = buildtab_step p nxt i j in ∥nxt'∥ = ∥nxt∥"
  by (simp add: buildtab_step_def)

lemma i_invariant:
  assumes "Suc i < m"
  shows "let (nxt',i',j') = buildtab_step p nxt i j in i' ≤ m"
  using assms by (simp add: buildtab_step_def)

lemma ji_invariant:
  assumes "buildtab_invariant p nxt i j" 
  shows "let (nxt',i',j') = buildtab_step p nxt i j in j'<i'"
proof -
  have j: "0 < j ⟹ nxt !! j < j"
    using assms by (simp add: buildtab_invariant_def is_next_def)
  show ?thesis
    using assms by (auto simp: buildtab_invariant_def buildtab_step_def intro: order.strict_trans j)
qed

lemma matches_invariant:
  assumes "buildtab_invariant p nxt i j" and "Suc i < ∥p∥"
  shows "let (nxt',i',j') = buildtab_step p nxt i j in matches p (i' - j') p 0 j'"
  using assms by (auto simp: buildtab_invariant_def buildtab_step_def matches_right_extension intro: next_iteration)

lemma is_next_invariant:
  assumes "buildtab_invariant p nxt i j" and "Suc i < ∥p∥"
  shows "let (nxt',i',j') = buildtab_step p nxt i j in ∀k. 0 < k ⟶ k ≤ i' ⟶ is_next p k (nxt'!!k)"
proof (cases "p!!i = p!!j")
  case True
  with assms have "matches p (i-j) p 0 (Suc j)"
    by (simp add: buildtab_invariant_def matches_right_extension)
  then have "is_next p (Suc i) (Suc j)"
    using assms by (auto simp: is_next_def buildtab_invariant_def)
  with True assms show ?thesis
    by (simp add: buildtab_invariant_def buildtab_step_def array_get_array_set_other le_Suc_eq)
next
  case neq: False
  show ?thesis
  proof (cases "j=0")
    case True
    then have "¬ matches p (i-j) p 0 (Suc j)"
      using matches_contradiction_at_first neq by fastforce
    with True assms have "is_next p (Suc i) 0"
      unfolding is_next_def buildtab_invariant_def
      by (metis Suc_leI diff_Suc_Suc diff_zero matches_empty nat_less_le zero_less_Suc)
    with assms neq show ?thesis
      by (simp add: buildtab_invariant_def buildtab_step_def array_get_array_set_other le_Suc_eq)
  next
    case False
    with assms neq show ?thesis
      by (simp add: buildtab_invariant_def buildtab_step_def)
  qed
qed

lemma non_matches_aux: 
  assumes "Suc (Suc j) < k" "matches p (Suc (Suc i) - k) p 0 k" 
  shows "matches p (Suc i - (k - 1)) p 0 (k - 1)"
  using matches_right_weakening assms by fastforce

lemma non_matches_invariant:
  assumes bt: "buildtab_invariant p nxt i j" and "∥p∥ ≥ 2" "Suc i < ∥p∥"
  shows "let (nxt',i',j') = buildtab_step p nxt i j in ∀k. Suc j' < k ⟶ k < Suc i' ⟶ ¬ matches p (Suc i' - k) p 0 k"
proof (cases "p!!i = p!!j")
  case True
  with non_matches_aux bt show ?thesis
    by (fastforce simp: Suc_less_eq2 buildtab_step_def buildtab_invariant_def)
next
  case neq: False
  have "j < i"
    using bt by (auto simp: buildtab_invariant_def)
  then have no_match_Sj: "¬ matches p (Suc i - Suc j) p 0 (Suc j)"
    using neq by (force simp: matches_def)
  show ?thesis
  proof (cases "j=0")
    case True
    have "¬ matches p (Suc (Suc i) - k) p 0 k"
      if "1 < k" and "k < Suc (Suc i)" for k 
    proof (cases "k = 2")
      case True
      with assms neq that show ?thesis
        by (auto simp: matches_contradiction_at_first ‹j=0›)
    next
      case False
      then have "1 < k - 1"
        using that by linarith
      with bt that have "¬ matches p (Suc i - (k - 1)) p 0 (k - 1)"
        using True by (force simp: buildtab_invariant_def)
      with False that show ?thesis
        using diff_le_self matches_right_weakening by force
    qed
    with True show ?thesis
      by (auto simp: buildtab_invariant_def buildtab_step_def)
  next
    case False
    then have "0 < j"
      by auto
    have False if lessK: "Suc (nxt!!j) < k" and "k < Suc i" and contra: "matches p (Suc i - k) p 0 k" for k
    proof (cases "Suc j < k")
      case True
      then show ?thesis
        using bt that by (auto simp: buildtab_invariant_def)        
    next
      case False
      then have "k ≤ j"
        using less_Suc_eq_le no_match_Sj contra by fastforce
      obtain k' where k': "k = Suc k'" "k' < i"
        using ‹k < Suc i› lessK not0_implies_Suc by fastforce
      have "is_next p j (nxt!!j)"
        using bt that ‹j>0› by (auto simp: buildtab_invariant_def)
      with no_match_Sj k' have "¬ matches p (j - k') p 0 k'"
        by (metis Suc_less_eq ‹k ≤ j› is_next_def lessK less_Suc_eq_le)
      moreover 
      have "matches p 0 p (i-j) j"
        using bt buildtab_invariant_def by (metis matches_sym)
      then have "matches p (j-k') p (i - k') k'"
        using ‹j<i› False k' matches_left_weakening
        by (smt (verit, best) Nat.diff_diff_eq Suc_leI Suc_le_lessD ‹k ≤ j› diff_diff_cancel diff_is_0_eq lessI nat_less_le)
      moreover have "matches p (i - k') p 0 k'"
        using contra k' matches_right_weakening by fastforce
      ultimately show False
        using matches_trans by blast
    qed
    with assms neq False show ?thesis
      by (auto simp: buildtab_invariant_def buildtab_step_def)
  qed   
qed

lemma buildtab_invariant:
  assumes ini: "buildtab_invariant p nxt i j"
  and "Suc i < ∥p∥" "(nxt',i',j') = buildtab_step p nxt i j"
  shows "buildtab_invariant p nxt' i' j'"
  unfolding buildtab_invariant_def
  using assms i_invariant [of concl: p nxt i j] length_invariant [of p nxt i j]
    ji_invariant [OF ini] matches_invariant [OF ini] non_matches_invariant [OF ini] is_next_invariant [OF ini]
  by (simp add: buildtab_invariant_def split: prod.split_asm)

subsubsection ‹The build-table loop and its correctness›

text ‹Declaring a partial recursive function with the tailrec option
relaxes the need for a termination proof, because a tail-recursive recursion
equation can never cause inconsistency.›

subsubsection ‹The build-table loop and its correctness›

partial_function (tailrec) buildtab :: "'a array ⇒ nat array ⇒ nat ⇒ nat ⇒ nat array" where
  "buildtab p nxt i j =
     (if Suc i < ∥p∥
      then let (nxt',i',j') = buildtab_step p nxt i j in buildtab p nxt' i' j'
      else nxt)"
declare buildtab.simps[code]

text ‹Nevertheless, termination must eventually be shown:
 to use induction to reason about executions. We do so by defining
a well founded relation. Termination proofs are by well-founded induction.›

definition "rel_buildtab m = inv_image (lex_prod (measure (λi. m-i)) (measure id)) snd"

lemma wf_rel_buildtab: "wf (rel_buildtab m)"
  unfolding rel_buildtab_def
  by (auto intro: wf_same_fst)

lemma buildtab_correct:
  assumes k: "0<k ∧ k < ∥p∥" and ini: "buildtab_invariant p nxt i j"
  shows "is_next p k (buildtab p nxt i j !! k)"
  using ini
proof (induction "(nxt,i,j)" arbitrary: nxt i j rule: wf_induct_rule[OF wf_rel_buildtab [of "∥p∥"]])
  case (1 nxt i j)
  show ?case
  proof (cases "Suc i < ∥p∥")
    case True
    then obtain nxt' i' j' 
        where eq: "(nxt', i', j') = buildtab_step p nxt i j" and invar': "buildtab_invariant p nxt' i' j'"
      using "1.prems" buildtab_invariant by (metis surj_pair)
    then have "j>0 ⟹ nxt'!!j < j"
      using "1.prems"
      by (auto simp: buildtab_invariant_def is_next_def buildtab_step_def split: if_split_asm)
    then have decreasing: "((nxt', i', j'), nxt, i, j) ∈ rel_buildtab ∥p∥"
      using eq True by (auto simp: rel_buildtab_def buildtab_step_def split: if_split_asm)
    show ?thesis
      using "1.hyps" [OF decreasing invar'] "1.prems" eq True
        by(auto simp: buildtab.simps[of p nxt] split: prod.splits)
  next
    case False
    with 1 k show ?thesis
      by (auto simp: buildtab_invariant_def buildtab.simps)
  qed
qed

text ‹Before building the table, check for the degenerate case›
definition table :: "'a array ⇒ nat array" where
 "table p = (if ∥p∥ > 1 then buildtab p (array 0 ∥p∥) 1 0
            else array 0 ∥p∥)"
declare table_def[code]

lemma is_next_table:
  assumes "0 < j ∧ j < ∥p∥"
  shows "is_next p j (table p !!j)"
  using buildtab_correct[of _ p] buildtab_invariant_init[of p] assms by (simp add: table_def)


subsubsection ‹Linearity of @{term buildtabW}›

partial_function (tailrec) T_buildtab :: "'a array ⇒ nat array ⇒ nat ⇒ nat ⇒ nat ⇒ nat" where
  "T_buildtab p nxt i j t =
     (if Suc i < ∥p∥
      then let (nxt',i',j') = buildtab_step p nxt i j in T_buildtab p nxt' i' j' (Suc t)
      else t)"

lemma T_buildtab_correct:
  assumes ini: "buildtab_invariant p nxt i j"
  shows "T_buildtab p nxt i j t ≤ 2*∥p∥ - 2*i + j + t"
  using ini
proof (induction "(nxt,i,j)" arbitrary: nxt i j t rule: wf_induct_rule[OF wf_rel_buildtab [of "∥p∥"]])
  case 1
  have *: "Suc (T_buildtab p nxt' i' j' t) ≤ 2*∥p∥ - 2*i + j + t"
    if eq: "buildtab_step p nxt i j = (nxt', i', j')" and "Suc i < ∥p∥"
    for nxt' i' j' t
  proof -
    have invar': "buildtab_invariant p nxt' i' j'"
      using "1.prems" buildtab_invariant that by fastforce
    then have nextj: "j>0 ⟹ nxt'!!j < j"
      using eq "1.prems"
      by (auto simp: buildtab_invariant_def is_next_def buildtab_step_def split: if_split_asm)
    then have decreasing: "((nxt', i', j'), nxt, i, j) ∈ rel_buildtab ∥p∥"
      using that by (auto simp: rel_buildtab_def same_fst_def buildtab_step_def split: if_split_asm)
    then have "T_buildtab p nxt' i' j' t ≤ 2 * ∥p∥ - 2 * i' + j' + t"
      using "1.hyps" invar' by blast
    then show ?thesis
      using "1.prems" that nextj
      by (force simp: T_buildtab.simps [of p nxt' i' j'] buildtab_step_def split: if_split_asm)
  qed             
  show ?case
    using * [where t = "Suc t"] by (auto simp: T_buildtab.simps split: prod.split)
qed

lemma T_buildtab_linear:
  assumes "2 ≤ ∥p∥" 
  shows "T_buildtab p (array 0 ∥p∥) 1 0 0 ≤ 2*(∥p∥ - 1)"
  using assms T_buildtab_correct [OF buildtab_invariant_init, of p 0] by auto

subsection ‹The actual string search algorithm›

definition 
  "KMP_step p nxt a i j =
      (if a!!i = p!!j then (Suc i, Suc j)
       else if j=0 then (Suc i, 0) else (i, nxt!!j))"

text ‹The conjunction of the invariants given in the Why3 development›
definition KMP_invariant :: "'a array ⇒ 'a array ⇒ nat ⇒ nat ⇒ bool" where
 "KMP_invariant p a i j = 
      (j ≤ ∥p∥ ∧ j≤i ∧ i ≤ ∥a∥ ∧ matches a (i-j) p 0 j
    ∧ (∀k < i-j. ¬ matches a k p 0 ∥p∥))"

text ‹The invariant trivially holds upon initialisation›

lemma KMP_invariant_init: "KMP_invariant p a 0 0"
  by (auto simp: KMP_invariant_def)

text ‹The invariant holds after an iteration›

lemma KMP_invariant:
  assumes ini: "KMP_invariant p a i j"
    and j: "j < ∥p∥" and i: "i < ∥a∥"
  shows "let (i',j') = KMP_step p (table p) a i j in KMP_invariant p a i' j'"
proof (cases "a!!i = p!!j")
  case True
  then show ?thesis
    using assms by (simp add: KMP_invariant_def KMP_step_def matches_right_extension)
next
  case neq: False
  show ?thesis
  proof (cases "j=0")
    case True
    with neq assms show ?thesis
      by (simp add: matches_contradiction_at_first KMP_invariant_def KMP_step_def less_Suc_eq)
  next
    case False
    then have is_nxt: "is_next p j (table p !!j)"
      using assms is_next_table j by blast
    then have "table p !! j ≤ j"
      by (simp add: is_next_def)
    moreover have "matches a (i - table p !! j) p 0 (table p !! j)"
      by (meson is_nxt KMP_invariant_def ini next_iteration)
    moreover
    have False if k: "k < i - table p !! j" and ma: "matches a k p 0 ∥p∥" for k
    proof -
      have "k ≠ i-j"
        by (metis KMP_invariant_def add_0 ini j le_add_diff_inverse2 ma matches_contradiction_at_i neq)
      then show False
        by (meson KMP_invariant_def ini is_nxt k linorder_cases ma next_is_maximal')
    qed
    ultimately show ?thesis 
      using neq assms False by (auto simp: KMP_invariant_def KMP_step_def)
  qed
qed

text ‹The first three arguments are precomputed so that they are not part of the inner loop.›
partial_function (tailrec) search :: "nat ⇒ nat ⇒ nat array ⇒ 'a array ⇒ 'a array ⇒ nat ⇒ nat ⇒ nat * nat" where
  "search m n nxt p a i j =
     (if j < m ∧ i < n then let (i',j') = KMP_step p nxt a i j in search m n nxt p a i' j'
      else (i,j))"
declare search.simps[code]

definition "rel_KMP n = lex_prod (measure (λi. n-i)) (measure id)"

lemma wf_rel_KMP: "wf (rel_KMP n)"
  unfolding rel_KMP_def by (auto intro: wf_same_fst)

text ‹Also expresses the absence of a match, when @{term "r = ∥a∥"}›
definition first_occur :: "'a array ⇒ 'a array ⇒ nat ⇒ bool"
  where "first_occur p a r = ((r < ∥a∥ ⟶ matches a r p 0 ∥p∥) ∧ (∀k<r. ¬ matches a k p 0 ∥p∥))"

lemma KMP_correct:
  assumes ini: "KMP_invariant p a i j"  
  defines [simp]: "nxt ≡ table p" 
  shows "let (i',j') = search ∥p∥ ∥a∥ nxt p a i j in first_occur p a (if j' = ∥p∥ then i' - ∥p∥ else i')"
  using ini
proof (induction "(i,j)" arbitrary: i j rule: wf_induct_rule[OF wf_rel_KMP [of "∥a∥"]])
  case (1 i j)
  then have ij: "j ≤ ∥p∥" "j ≤ i"  "i ≤ ∥a∥" 
    and match: "matches a (i - j) p 0 j" 
    and nomatch: "(∀k<i - j. ¬ matches a k p 0 ∥p∥)"
    by (auto simp: KMP_invariant_def)
  show ?case
  proof (cases "j < ∥p∥ ∧ i < ∥a∥")
    case True
    have "first_occur p a (if j'' = ∥p∥ then i'' - ∥p∥ else i'')"
      if eq: "KMP_step p (table p) a i j = (i', j')" and eq': "search ∥p∥ ∥a∥ nxt p a i' j' = (i'',j'')"
      for i' j' i'' j''
    proof -
      have decreasing: "((i',j'), i, j) ∈ rel_KMP ∥a∥"
        using that is_next_table [of j] True
        by (auto simp: rel_KMP_def KMP_step_def is_next_def split: if_split_asm)
      show ?thesis
        using "1.hyps" [OF decreasing] "1.prems" KMP_invariant that True by fastforce
    qed
    with True show ?thesis
      by (smt (verit, best) case_prodI2 nxt_def prod.case_distrib search.simps)
  next
    case False
    have "False" if "matches a k p 0 ∥p∥" "j < ∥p∥" "i = ∥a∥" for k
    proof -
      have "∥p∥+k ≤ i"
        using that by (simp add: matches_def)
      with that nomatch show False by auto
    qed
    with False ij show ?thesis
      apply (simp add: first_occur_def split: prod.split)
      by (metis le_less_Suc_eq match nomatch not_less_eq prod.inject search.simps)
  qed
qed

definition KMP_search :: "'a array ⇒ 'a array ⇒ nat × nat" where
  "KMP_search p a = search ∥p∥ ∥a∥ (table p) p a 0 0"
declare KMP_search_def[code]

lemma KMP_search:
   "(i,j) = KMP_search p a ⟹ first_occur p a (if j = ∥p∥ then i - ∥p∥ else i)"
unfolding KMP_search_def
using KMP_correct[OF KMP_invariant_init[of p a]] by auto

subsection ‹Examples›

text ‹Building the table, examples from the KMP paper and from Cormen et al.›
definition "Knuth_pattern = array_of_list [1,2,3,1,2,3,1,3,1,2::nat]"

value "list_of_array (table Knuth_pattern)"

definition "CLR_pattern = array_of_list [1,2,1,2,1,2,1,2,3,1::nat]"

value "list_of_array (table CLR_pattern)"

text ‹Worst-case string searches›

definition bad_list :: "nat ⇒ nat list"
  where "bad_list n = replicate n 0 @ [1]"

definition "bad_pattern = array_of_list (bad_list 1000)"

definition "bad_string = array_of_list (bad_list 2000000)"

definition "worse_string = array_of_list (replicate 2000000 (0::nat))"

definition "lousy_string = array_of_list (concat (replicate 2002 (bad_list 999)))"

value "list_of_array (table bad_pattern)"

text ‹A successful search›
value "KMP_search bad_pattern bad_string"

text ‹The search above from the specification alone, i.e. brute-force›
lemma "matches bad_string (2000001-1001) bad_pattern 0 1001"
  by eval

text ‹Unsuccessful searches›
value "KMP_search bad_pattern worse_string"

text ‹The search above from the specification alone, i.e. brute-force›
lemma "∀k<2000000. ¬ matches worse_string k bad_pattern 0 1001"
  by eval

value "KMP_search lousy_string bad_string"

lemma "∀k < ∥lousy_string∥. ¬ matches lousy_string k bad_pattern 0 1001"
  by eval

subsection ‹Alternative approach, expressing the algorithms as while loops›

definition buildtabW:: "'a array ⇒ nat array ⇒ nat ⇒ nat ⇒ nat array option" where
  "buildtabW p nxt i j ≡
  map_option fst (while_option (λ(_, i', _). Suc i' < ∥p∥)
                               (λ(nxt', i', j'). buildtab_step p nxt' i' j')
                               (nxt, i, j))"

lemma buildtabW_halts:
  assumes "buildtab_invariant p nxt i j"
    shows "∃y. buildtabW p nxt i j = Some y"
proof -
  have "∃y. (λp nxt i j. while_option (λ(_, i', _). Suc i' < ∥p∥)
                               (λ(nxt', i', j'). buildtab_step p nxt' i' j')
                               (nxt, i, j)) p nxt i j = Some y"
  proof (rule measure_while_option_Some[of "λ(nxt, i, j). buildtab_invariant p nxt i j" _ _
    "(λp (nxt, i, j). 2 * ∥p∥ - 2 * i + j) p"],
    clarify, rule conjI, goal_cases)
    case (2 nxt i j)
    then show ?case by (auto simp: buildtab_step_def buildtab_invariant_def matches_def is_next_def)
  qed (fastforce simp: assms buildtab_invariant)+
  then show ?thesis unfolding buildtabW_def by blast
qed

lemma buildtabW_correct:
  assumes k: "0<k ∧ k < ∥p∥" and ini: "buildtab_invariant p nxt i j"
  shows "is_next p k (the (buildtabW p nxt i j) !! k)"
proof -
  obtain nxt' i' j' where †:
    "while_option (λ(_, i', _). Suc i' < ∥p∥) (λ(nxt', i', j'). buildtab_step p nxt' i' j') (nxt, i, j) = Some (nxt', i', j')"
    using buildtabW_halts[OF ini] unfolding buildtabW_def by fast
  from while_option_rule[OF _ †, of "λ(nxt, i, j). buildtab_invariant p nxt i j"]
    have "buildtab_invariant p nxt' i' j'" using buildtab_invariant ini by fastforce
  with while_option_stop[OF †] † show ?thesis
    using assms k by (auto simp: is_next_def matches_def buildtab_invariant_def buildtabW_def)
qed

subsubsection ‹Linearity of @{term buildtabW}›

definition T_buildtabW :: "'a array ⇒ nat array ⇒ nat ⇒ nat ⇒ nat ⇒ nat option" where
  "T_buildtabW p nxt i j t ≡ map_option (λ(_, _, _, r). r)
    (while_option (λ(_, i, _, _). Suc i < ∥p∥)
                  (λ(nxt, i, j, t). let (nxt', i', j') = buildtab_step p nxt i j in (nxt', i', j', Suc t))
                  (nxt, i, j, t))"

lemma T_buildtabW_halts:
  assumes "buildtab_invariant p nxt i j"
    shows "∃y. T_buildtabW p nxt i j t = Some y"
proof -
  have "∃y. (while_option (λ(_, i, _, _). Suc i < ∥p∥)
                  (λ(nxt, i, j, t). let (nxt', i', j') = buildtab_step p nxt i j in (nxt', i', j', Suc t))
                  (nxt, i, j, t)) = Some y"
  proof (intro measure_while_option_Some[of "λ(nxt, i, j, t). buildtab_invariant p nxt i j" _ _
    "(λp (nxt, i, j, t). 2 * ∥p∥ - 2 * i + j) p"], clarify, rule conjI, goal_cases)
    case (2 nxt i j t)
    then show ?case by (auto simp: buildtab_step_def buildtab_invariant_def is_next_def)
  qed (fastforce simp: assms buildtab_invariant split: prod.splits)+
  then show ?thesis unfolding T_buildtabW_def by blast
qed

lemma T_buildtabW_correct:
  assumes ini: "buildtab_invariant p nxt i j"
  shows "the (T_buildtabW p nxt i j t) ≤ 2*∥p∥ - 2*i + j + t"
proof -
  let ?b = "(λ(nxt', i', j', t'). Suc i' < ∥p∥)"
  let ?c = "(λ(nxt, i, j, t). let (nxt', i', j') = buildtab_step p nxt i j in (nxt', i', j', Suc t))"
  let ?s = "(nxt, i, j, t)"
  let ?P1 = "λ(nxt', i', j', t'). buildtab_invariant p nxt' i' j'
    ∧ (if Suc i' < ∥p∥ then Suc t' else t') ≤ 2 * ∥p∥ - (2 * i' - j') + t'"
  let ?P2 = "λ(nxt', i', j', t'). buildtab_invariant p nxt' i' j'
    ∧ 2 * ∥p∥ - 2 * i' + j' + t' ≤ 2 * ∥p∥ - 2 * i + j + t"

  obtain nxt' i' j' t' where †: "(while_option ?b ?c ?s) = Some (nxt', i', j', t')"
    using T_buildtabW_halts[OF ini] unfolding T_buildtabW_def by fast
  have 1: "(⋀s. ?P1 s ⟹ ?b s ⟹ ?P1 (?c s))" proof (clarify, intro conjI, goal_cases)
    case (2 nxt⇩₁ i⇩₁ j⇩₁ t⇩₁ nxt⇩₂ i⇩₂ j⇩₂ t⇩₂)
    then show ?case
      by (auto simp: buildtab_step_def split: if_split_asm)
    qed (insert buildtab_invariant, fastforce split: prod.splits)
  have P1: "?P1 ?s" using ini by auto
  from while_option_rule[OF 1 † P1]
    have invar1: "buildtab_invariant p nxt' i' j'" and
         invar2: "t' ≤ 2 * ∥p∥ - (2 * i' - j') + t'" by blast (simp add: while_option_stop[OF †])
  have "?P2 (nxt', i', j', t')" proof (rule while_option_rule[OF _ †], clarify, intro conjI, goal_cases)
      case (1 nxt⇩₁ i⇩₁ j⇩₁ t⇩₁ nxt⇩₂ i⇩₂ j⇩₂ t⇩₂)
      with buildtab_invariant[OF 1(3)] show invar: ?case by (auto split: prod.splits)
    next
      case (2 nxt⇩₁ i⇩₁ j⇩₁ t⇩₁ nxt⇩₂ i⇩₂ j⇩₂ t⇩₂)
      with 2(4) show ?case
        by (auto 0 2 simp: buildtab_step_def buildtab_invariant_def is_next_def split: if_split_asm)
    qed (use ini in simp)
  with invar1 invar2 † have "t' ≤ 2 * ∥p∥ - 2 * i + j + t" by simp
  with † show ?thesis by (simp add: T_buildtabW_def)
qed

lemma T_buildtabW_linear:
  assumes "2 ≤ ∥p∥"
  shows "the (T_buildtabW p (array 0 ∥p∥) 1 0 0) ≤ 2*(∥p∥ - 1)"
  using assms T_buildtabW_correct [OF buildtab_invariant_init, of p 0] by linarith

subsubsection ‹The actual string search algorithm›
definition searchW :: "nat ⇒ nat ⇒ nat array ⇒ 'a array ⇒ 'a array ⇒ nat ⇒ nat ⇒ (nat * nat) option" where
  "searchW m n nxt p a i j = while_option (λ(i, j). j < m ∧ i < n) (λ(i,j). KMP_step p nxt a i j) (i,j)"

lemma searchW_halts:
  assumes "KMP_invariant p a i j"
    shows "∃y. searchW ∥p∥ ∥a∥ (table p) p a i j = Some y"
      unfolding searchW_def
proof ((intro measure_while_option_Some[of "λ(i,j). KMP_invariant p a i j" _ _
  "λ(i, j). 2 * ∥a∥ - 2 * i + j"], rule conjI; clarify), goal_cases)
  case (2 i j)
    moreover obtain i' j' where †: "(i', j') = KMP_step p (table p) a i j" by (metis surj_pair)
    moreover have "KMP_invariant p a i' j'" using † KMP_invariant[OF 2(1) 2(2) 2(3)] by auto
    ultimately have "2 * ∥a∥ - 2 * i' + j' < 2 * ∥a∥ - 2 * i + j"
        using is_next_table[of j p]
      by (auto simp: KMP_invariant_def KMP_step_def matches_def is_next_def split: if_split_asm)
  then show ?case using † by (auto split: prod.splits)
qed (use KMP_invariant assms in fastforce)+

lemma KMP_correctW:
  assumes ini: "KMP_invariant p a i j"
  defines [simp]: "nxt ≡ table p"
  shows "let (i',j') = the (searchW ∥p∥ ∥a∥ nxt p a i j) in first_occur p a (if j' = ∥p∥ then i' - ∥p∥ else i')"
proof -
  obtain i' j' where †: "while_option (λ(i, j). j < ∥p∥ ∧ i < ∥a∥) (λ(i,j). KMP_step p nxt a i j) (i,j) = Some (i', j')"
    using searchW_halts[OF ini] by (auto simp: searchW_def)
  have "KMP_invariant p a i' j'"
    using while_option_rule[OF _ †, of "λ(i', j'). KMP_invariant p a i' j'"] ini KMP_invariant by fastforce
  with † while_option_stop[OF †] show ?thesis
    by (auto simp: searchW_def KMP_invariant_def first_occur_def matches_def)
qed

definition KMP_searchW :: "'a array ⇒ 'a array ⇒ nat × nat" where
  "KMP_searchW p a = the (searchW ∥p∥ ∥a∥ (table p) p a 0 0)"
declare KMP_searchW_def[code]

lemma KMP_searchW:
   "(i,j) = KMP_searchW p a ⟹ first_occur p a (if j = ∥p∥ then i - ∥p∥ else i)"
unfolding KMP_searchW_def
using KMP_correctW[OF KMP_invariant_init[of p a]] by auto

end