Theory Aho_Corasick
theory Aho_Corasick
imports "HOL-Library.Sublist"
begin
section ‹Aho-Corasick String Matching›
text ‹
The original Aho-Corasick string matching algorithm was introduced by Aho
and Corasick \<^cite>‹AhoCorasick1975›. It searches for all occurrences of
a finite dictionary of patterns in a text. Its central automaton invariant
is that, after reading a text prefix, the current state is the longest suffix
of the processed text that is also a prefix of a dictionary pattern.
This theory formalizes that canonical Aho-Corasick state and an executable
reference search procedure over arbitrary lists. For each consumed text
prefix, the reference transition recomputes the canonical state by scanning
the finite list of pattern prefixes; this is the abstract automaton semantics
that the usual failure-link implementation realizes more efficiently. We
also derive a state-only transition theorem, a fold-based scan invariant, and
a recursive failure-link transition that refines the canonical transition.
The main theorem proves that the reports produced by the search are exactly
the pattern occurrences ending at each text position.
AI assistance was used for proof engineering. The final definitions,
statements, and proofs are checked by Isabelle.
›
subsection ‹Pattern-prefix states›
definition ac_states :: "'a list list ⇒ 'a list list" where
"ac_states pats = remdups ([] # concat (map prefixes pats))"
lemma set_ac_states:
"set (ac_states pats) = insert [] {q. ∃p∈set pats. prefix q p}"
by (auto simp: ac_states_def)
lemma Nil_in_ac_states [simp]: "[] ∈ set (ac_states pats)"
by (simp add: set_ac_states)
fun longest_list :: "'a list list ⇒ 'a list" where
"longest_list [] = []"
| "longest_list (x # xs) =
(let y = longest_list xs in if length x < length y then y else x)"
lemma longest_list_member:
assumes "xs ≠ []"
shows "longest_list xs ∈ set xs"
using assms
proof (induction xs)
case Nil
then show ?case by simp
next
case (Cons x xs)
show ?case
proof (cases "xs = []")
case True
then show ?thesis by simp
next
case False
let ?y = "longest_list xs"
from False have y_mem: "?y ∈ set xs"
using Cons.IH by simp
show ?thesis
proof (cases "length x < length ?y")
case True
then show ?thesis using y_mem by simp
next
case False
then show ?thesis by simp
qed
qed
qed
lemma longest_list_longest:
assumes "x ∈ set xs"
shows "length x ≤ length (longest_list xs)"
using assms
proof (induction xs arbitrary: x)
case Nil
then show ?case by simp
next
case (Cons y ys)
show ?case
proof (cases "ys = []")
case True
then show ?thesis using Cons.prems by auto
next
case False
let ?z = "longest_list ys"
have z_long: "⋀u. u ∈ set ys ⟹ length u ≤ length ?z"
using Cons.IH by blast
show ?thesis
proof (cases "length y < length ?z")
case True
then show ?thesis using Cons.prems z_long by auto
next
case le: False
then have z_le_y: "length ?z ≤ length y" by simp
show ?thesis
proof (cases "x = y")
case True
then show ?thesis using le by auto
next
case neq: False
then have "x ∈ set ys"
using Cons.prems by simp
then have "length x ≤ length ?z"
using z_long by simp
also have "… ≤ length y"
using z_le_y .
finally show ?thesis using le by auto
qed
qed
qed
qed
subsection ‹The canonical Aho-Corasick state›
text ‹
The canonical state for a word ‹w› is chosen among the finite list of all
pattern prefixes. Since the empty list is always a state and always a suffix,
the candidate list is nonempty. The selected state is therefore a suffix of
‹w›, a valid pattern-prefix state, and at least as long as any other valid
suffix state.
›
definition state_candidates :: "'a list list ⇒ 'a list ⇒ 'a list list" where
"state_candidates pats w = filter (λq. suffix q w) (ac_states pats)"
definition ac_state :: "'a list list ⇒ 'a list ⇒ 'a list" where
"ac_state pats w = longest_list (state_candidates pats w)"
lemma state_candidates_nonempty: "state_candidates pats w ≠ []"
proof -
have "[] ∈ set (state_candidates pats w)"
by (simp add: state_candidates_def)
then show ?thesis by auto
qed
lemma ac_state_in_states: "ac_state pats w ∈ set (ac_states pats)"
using longest_list_member[OF state_candidates_nonempty]
by (auto simp: ac_state_def state_candidates_def)
lemma ac_state_suffix: "suffix (ac_state pats w) w"
using longest_list_member[OF state_candidates_nonempty]
by (auto simp: ac_state_def state_candidates_def)
lemma ac_state_longest:
assumes "q ∈ set (ac_states pats)"
assumes "suffix q w"
shows "length q ≤ length (ac_state pats w)"
using assms longest_list_longest[of q "state_candidates pats w"]
by (auto simp: ac_state_def state_candidates_def)
lemma pattern_is_state:
assumes "p ∈ set pats"
shows "p ∈ set (ac_states pats)"
using assms by (auto simp: set_ac_states)
lemma ac_state_snoc_prefix_closed:
assumes "r @ [x] ∈ set (ac_states pats)"
shows "r ∈ set (ac_states pats)"
using assms by (auto simp: set_ac_states dest: append_prefixD)
lemma pattern_suffix_ac_state_iff:
assumes "p ∈ set pats"
shows "suffix p (ac_state pats w) ⟷ suffix p w"
proof
assume "suffix p (ac_state pats w)"
then show "suffix p w"
using ac_state_suffix suffix_order.order_trans by blast
next
assume p_suffix: "suffix p w"
have "length p ≤ length (ac_state pats w)"
using ac_state_longest[OF pattern_is_state[OF assms] p_suffix] .
then show "suffix p (ac_state pats w)"
using suffix_length_suffix[OF p_suffix ac_state_suffix] by blast
qed
lemma ac_state_Nil [simp]: "ac_state pats [] = []"
using ac_state_suffix[of pats "[]"] by simp
subsection ‹State-only transition›
text ‹
The next theorem captures the automaton nature of the construction. Although
the canonical state is defined from the whole consumed text prefix, the next
canonical state can be computed from only the current state and the next
symbol.
›
definition ac_step :: "'a list list ⇒ 'a list ⇒ 'a ⇒ 'a list" where
"ac_step pats q x = ac_state pats (q @ [x])"
lemma suffix_snoc_mono:
assumes "suffix q w"
shows "suffix (q @ [x]) (w @ [x])"
using assms by (auto simp: suffix_def)
lemma state_suffix_next_window:
assumes q: "q = ac_state pats w"
assumes r_state: "r ∈ set (ac_states pats)"
assumes r_suffix: "suffix r (w @ [x])"
shows "suffix r (q @ [x])"
proof (cases r rule: rev_cases)
case Nil
then show ?thesis by simp
next
case (snoc r' y)
from r_suffix snoc have y: "y = x" and r'_suffix: "suffix r' w"
by auto
have r'_state: "r' ∈ set (ac_states pats)"
using r_state snoc by (auto intro: ac_state_snoc_prefix_closed)
have "length r' ≤ length q"
using ac_state_longest[OF r'_state r'_suffix] q by simp
moreover have "suffix q w"
using q ac_state_suffix by simp
ultimately have "suffix r' q"
using suffix_length_suffix[OF r'_suffix] by blast
with snoc y show ?thesis by simp
qed
theorem ac_state_snoc:
"ac_state pats (w @ [x]) = ac_step pats (ac_state pats w) x"
proof -
let ?q = "ac_state pats w"
let ?left = "ac_state pats (w @ [x])"
let ?right = "ac_state pats (?q @ [x])"
have left_in: "?left ∈ set (ac_states pats)"
by (rule ac_state_in_states)
have left_suffix_full: "suffix ?left (w @ [x])"
by (rule ac_state_suffix)
have left_suffix_q: "suffix ?left (?q @ [x])"
using state_suffix_next_window[
where q="?q" and pats=pats and w=w and r="?left" and x=x]
left_in left_suffix_full by simp
have right_suffix_q: "suffix ?right (?q @ [x])"
by (rule ac_state_suffix)
have left_le_right: "length ?left ≤ length ?right"
using ac_state_longest[OF left_in left_suffix_q] .
have left_suffix_right: "suffix ?left ?right"
using suffix_length_suffix[OF left_suffix_q right_suffix_q left_le_right] .
have q_suffix: "suffix ?q w"
by (rule ac_state_suffix)
have right_in: "?right ∈ set (ac_states pats)"
by (rule ac_state_in_states)
have right_suffix_w: "suffix ?right (w @ [x])"
using suffix_snoc_mono[OF q_suffix, of x] right_suffix_q
suffix_order.order_trans by blast
have right_le_left: "length ?right ≤ length ?left"
using ac_state_longest[OF right_in right_suffix_w] .
have right_suffix_left: "suffix ?right ?left"
using suffix_length_suffix[OF right_suffix_w ac_state_suffix right_le_left] .
have "?left = ?right"
using left_suffix_right right_suffix_left by (rule suffix_order.antisym)
then show ?thesis
by (simp add: ac_step_def)
qed
text ‹
Iterating the state transition over a text fragment therefore tracks the
canonical state for the whole consumed prefix.
›
theorem ac_state_foldl_from:
assumes "q = ac_state pats w"
shows "foldl (ac_step pats) q xs = ac_state pats (w @ xs)"
using assms
proof (induction xs arbitrary: w q)
case Nil
then show ?case by simp
next
case (Cons x xs)
let ?w' = "w @ [x]"
have step: "ac_step pats q x = ac_state pats ?w'"
using Cons.prems ac_state_snoc[of pats w x] by simp
have "foldl (ac_step pats) (ac_state pats ?w') xs =
ac_state pats (?w' @ xs)"
using Cons.IH[where w="?w'" and q="ac_state pats ?w'"] by simp
then show ?case
using step by simp
qed
theorem ac_state_foldl:
"foldl (ac_step pats) [] xs = ac_state pats xs"
using ac_state_foldl_from[where pats=pats and w="[]" and q="[]" and xs=xs]
by simp
theorem ac_state_foldl_take:
"foldl (ac_step pats) [] (take i text) = ac_state pats (take i text)"
by (simp add: ac_state_foldl)
subsection ‹Failure-link transition refinement›
text ‹
The failure link of a state is the longest proper suffix that is also a
pattern-prefix state. The recursive failure-link transition follows these
links until it finds a state that can consume the next symbol, or returns to
the empty state. The main theorem of this subsection proves that this
recognizable Aho-Corasick transition refines the canonical transition above.
›
definition proper_suffix_state_candidates :: "'a list list ⇒ 'a list ⇒ 'a list list" where
"proper_suffix_state_candidates pats q =
filter (λr. suffix r q ∧ r ≠ q) (ac_states pats)"
definition ac_fail :: "'a list list ⇒ 'a list ⇒ 'a list" where
"ac_fail pats q = longest_list (proper_suffix_state_candidates pats q)"
lemma proper_suffix_state_candidates_nonempty:
assumes "q ≠ []"
shows "proper_suffix_state_candidates pats q ≠ []"
proof -
have "[] ∈ set (proper_suffix_state_candidates pats q)"
using assms by (simp add: proper_suffix_state_candidates_def)
then show ?thesis by auto
qed
lemma ac_fail_in_states:
assumes "q ≠ []"
shows "ac_fail pats q ∈ set (ac_states pats)"
using longest_list_member[OF proper_suffix_state_candidates_nonempty[OF assms]]
by (auto simp: ac_fail_def proper_suffix_state_candidates_def)
lemma ac_fail_suffix:
assumes "q ≠ []"
shows "suffix (ac_fail pats q) q"
using longest_list_member[OF proper_suffix_state_candidates_nonempty[OF assms]]
by (auto simp: ac_fail_def proper_suffix_state_candidates_def)
lemma ac_fail_proper:
assumes "q ≠ []"
shows "ac_fail pats q ≠ q"
using longest_list_member[OF proper_suffix_state_candidates_nonempty[OF assms]]
by (auto simp: ac_fail_def proper_suffix_state_candidates_def)
lemma length_ac_fail_less:
assumes "q ≠ []"
shows "length (ac_fail pats q) < length q"
proof -
obtain zs where q: "q = zs @ ac_fail pats q"
using ac_fail_suffix[OF assms] by (auto simp: suffix_def)
have zs_ne: "zs ≠ []"
proof
assume "zs = []"
then have "ac_fail pats q = q"
using q by simp
then show False
using ac_fail_proper[OF assms] by simp
qed
have "length q = length zs + length (ac_fail pats q)"
by (subst q, simp)
moreover have "0 < length zs"
using zs_ne by simp
ultimately show ?thesis
by simp
qed
lemma ac_fail_longest:
assumes "r ∈ set (ac_states pats)"
assumes "suffix r q"
assumes "r ≠ q"
shows "length r ≤ length (ac_fail pats q)"
using assms longest_list_longest[of r "proper_suffix_state_candidates pats q"]
by (auto simp: ac_fail_def proper_suffix_state_candidates_def)
lemma ac_state_self_if_state:
assumes "q ∈ set (ac_states pats)"
shows "ac_state pats q = q"
proof -
have q_le_state: "length q ≤ length (ac_state pats q)"
using ac_state_longest[OF assms, of q] by simp
have state_suffix_q: "suffix (ac_state pats q) q"
by (rule ac_state_suffix)
have q_suffix_state: "suffix q (ac_state pats q)"
using suffix_length_suffix[OF _ state_suffix_q q_le_state] by simp
show ?thesis
using state_suffix_q q_suffix_state by (rule suffix_order.antisym)
qed
lemma ac_state_singleton_if_no_state:
assumes "[x] ∉ set (ac_states pats)"
shows "ac_state pats [x] = []"
proof -
have state: "ac_state pats [x] ∈ set (ac_states pats)"
by (rule ac_state_in_states)
have suffix: "suffix (ac_state pats [x]) [x]"
by (rule ac_state_suffix)
have "ac_state pats [x] = [] ∨ ac_state pats [x] = [x]"
proof -
obtain zs where "[x] = zs @ ac_state pats [x]"
using suffix by (auto simp: suffix_def)
then show ?thesis
by (cases zs; cases "ac_state pats [x]") auto
qed
then show ?thesis
using assms state by auto
qed
lemma state_suffix_fail_next:
assumes q_ne: "q ≠ []"
assumes qx_not_state: "q @ [x] ∉ set (ac_states pats)"
assumes r_state: "r ∈ set (ac_states pats)"
assumes r_suffix: "suffix r (q @ [x])"
shows "suffix r (ac_fail pats q @ [x])"
proof (cases r rule: rev_cases)
case Nil
then show ?thesis by simp
next
case (snoc r' y)
from r_suffix snoc have y: "y = x" and r'_suffix: "suffix r' q"
by auto
have r'_state: "r' ∈ set (ac_states pats)"
using r_state snoc by (auto intro: ac_state_snoc_prefix_closed)
have r'_proper: "r' ≠ q"
using qx_not_state r_state snoc y by auto
have "length r' ≤ length (ac_fail pats q)"
using ac_fail_longest[OF r'_state r'_suffix r'_proper] .
moreover have "suffix (ac_fail pats q) q"
using ac_fail_suffix[OF q_ne] .
ultimately have "suffix r' (ac_fail pats q)"
using suffix_length_suffix[OF r'_suffix] by blast
with snoc y show ?thesis by simp
qed
lemma ac_step_fail_eq:
assumes q_state: "q ∈ set (ac_states pats)"
assumes q_ne: "q ≠ []"
assumes qx_not_state: "q @ [x] ∉ set (ac_states pats)"
shows "ac_step pats (ac_fail pats q) x = ac_step pats q x"
proof -
let ?fail = "ac_fail pats q"
let ?left = "ac_state pats (q @ [x])"
let ?right = "ac_state pats (?fail @ [x])"
have left_in: "?left ∈ set (ac_states pats)"
by (rule ac_state_in_states)
have left_suffix_q: "suffix ?left (q @ [x])"
by (rule ac_state_suffix)
have left_suffix_fail: "suffix ?left (?fail @ [x])"
using state_suffix_fail_next[OF q_ne qx_not_state left_in left_suffix_q] .
have right_suffix_fail: "suffix ?right (?fail @ [x])"
by (rule ac_state_suffix)
have left_le_right: "length ?left ≤ length ?right"
using ac_state_longest[OF left_in left_suffix_fail] .
have left_suffix_right: "suffix ?left ?right"
using suffix_length_suffix[OF left_suffix_fail right_suffix_fail left_le_right] .
have fail_suffix_q: "suffix ?fail q"
using ac_fail_suffix[OF q_ne] .
have right_in: "?right ∈ set (ac_states pats)"
by (rule ac_state_in_states)
have right_suffix_q: "suffix ?right (q @ [x])"
using suffix_snoc_mono[OF fail_suffix_q, of x] right_suffix_fail
suffix_order.order_trans by blast
have right_le_left: "length ?right ≤ length ?left"
using ac_state_longest[OF right_in right_suffix_q] .
have right_suffix_left: "suffix ?right ?left"
using suffix_length_suffix[OF right_suffix_q ac_state_suffix right_le_left] .
have "?left = ?right"
using left_suffix_right right_suffix_left by (rule suffix_order.antisym)
then show ?thesis
by (simp add: ac_step_def)
qed
function ac_fail_step :: "'a list list ⇒ 'a list ⇒ 'a ⇒ 'a list" where
"ac_fail_step pats q x =
(if q @ [x] ∈ set (ac_states pats) then q @ [x]
else if q = [] then []
else ac_fail_step pats (ac_fail pats q) x)"
by pat_completeness auto
termination
by (relation "measure (λ(pats, q, x). length q)")
(auto simp: length_ac_fail_less)
theorem ac_fail_step_refines:
assumes "q ∈ set (ac_states pats)"
shows "ac_fail_step pats q x = ac_step pats q x"
using assms
proof (induction pats q x rule: ac_fail_step.induct)
case (1 pats q x)
show ?case
proof -
consider
(next_state) "q @ [x] ∈ set (ac_states pats)"
| (root) "q @ [x] ∉ set (ac_states pats)" "q = []"
| (fail) "q @ [x] ∉ set (ac_states pats)" "q ≠ []"
by blast
then show ?thesis
proof cases
case next_state
then show ?thesis
by (simp add: ac_step_def ac_state_self_if_state)
next
case root
then show ?thesis
using ac_state_singleton_if_no_state[of x pats]
by (simp add: ac_step_def)
next
case fail
have fail_state: "ac_fail pats q ∈ set (ac_states pats)"
using ac_fail_in_states[OF fail(2)] .
have "ac_fail_step pats q x = ac_fail_step pats (ac_fail pats q) x"
using fail by simp
also have "… = ac_step pats (ac_fail pats q) x"
using "1.IH"[OF fail fail_state] .
also have "… = ac_step pats q x"
using ac_step_fail_eq[OF "1.prems" fail(2) fail(1)] .
finally show ?thesis .
qed
qed
qed
theorem ac_fail_state_foldl_from:
assumes "q = ac_state pats w"
shows "foldl (ac_fail_step pats) q xs = ac_state pats (w @ xs)"
using assms
proof (induction xs arbitrary: w q)
case Nil
then show ?case by simp
next
case (Cons x xs)
let ?w' = "w @ [x]"
have q_state: "q ∈ set (ac_states pats)"
using Cons.prems ac_state_in_states by simp
have step: "ac_fail_step pats q x = ac_state pats ?w'"
using ac_fail_step_refines[OF q_state, of x] Cons.prems ac_state_snoc[of pats w x]
by simp
have rec: "foldl (ac_fail_step pats) (ac_state pats ?w') xs =
ac_state pats (?w' @ xs)"
by (rule Cons.IH) simp
have "foldl (ac_fail_step pats) q (x # xs) =
foldl (ac_fail_step pats) (ac_state pats ?w') xs"
by (simp only: foldl.simps step)
also have "… = ac_state pats (?w' @ xs)"
using rec .
also have "… = ac_state pats (w @ x # xs)"
by simp
finally show ?case .
qed
theorem ac_fail_state_foldl:
"foldl (ac_fail_step pats) [] xs = ac_state pats xs"
using ac_fail_state_foldl_from[where pats=pats and w="[]" and q="[]" and xs=xs]
by simp
theorem ac_fail_state_foldl_take:
"foldl (ac_fail_step pats) [] (take i text) = ac_state pats (take i text)"
by (simp add: ac_fail_state_foldl)
subsection ‹Search and correctness›
text ‹
The output function filters the dictionary to the patterns that are suffixes
of the current state. By the canonical-state invariant, this is equivalent
to filtering the dictionary to the patterns that occur ending at the current
text position.
›
definition ac_output :: "'a list list ⇒ 'a list ⇒ 'a list list" where
"ac_output pats q = filter (λp. suffix p q) pats"
theorem ac_output_state:
"set (ac_output pats (ac_state pats w)) =
{p. p ∈ set pats ∧ suffix p w}"
by (auto simp: ac_output_def pattern_suffix_ac_state_iff)
fun ac_search_from ::
"'a list list ⇒ 'a list ⇒ nat ⇒ 'a list ⇒ ('a list × nat) list"
where
"ac_search_from pats w i [] = []"
| "ac_search_from pats w i (x # xs) =
(let w' = w @ [x];
q = ac_state pats w'
in map (λp. (p, i)) (ac_output pats q) @
ac_search_from pats w' (Suc i) xs)"
definition ac_search :: "'a list list ⇒ 'a list ⇒ ('a list × nat) list" where
"ac_search pats text = ac_search_from pats [] 0 text"
fun ac_search_states_from ::
"'a list list ⇒ 'a list ⇒ nat ⇒ 'a list ⇒ ('a list × nat) list"
where
"ac_search_states_from pats q i [] = []"
| "ac_search_states_from pats q i (x # xs) =
(let q' = ac_step pats q x in
map (λp. (p, i)) (ac_output pats q') @
ac_search_states_from pats q' (Suc i) xs)"
definition ac_search_states :: "'a list list ⇒ 'a list ⇒ ('a list × nat) list" where
"ac_search_states pats text = ac_search_states_from pats [] 0 text"
fun ac_search_fail_from ::
"'a list list ⇒ 'a list ⇒ nat ⇒ 'a list ⇒ ('a list × nat) list"
where
"ac_search_fail_from pats q i [] = []"
| "ac_search_fail_from pats q i (x # xs) =
(let q' = ac_fail_step pats q x in
map (λp. (p, i)) (ac_output pats q') @
ac_search_fail_from pats q' (Suc i) xs)"
definition ac_search_fail :: "'a list list ⇒ 'a list ⇒ ('a list × nat) list" where
"ac_search_fail pats text = ac_search_fail_from pats [] 0 text"
lemma ac_search_states_from_eq:
assumes "q = ac_state pats w"
shows "ac_search_states_from pats q i xs = ac_search_from pats w i xs"
using assms
proof (induction xs arbitrary: w q i)
case Nil
then show ?case by simp
next
case (Cons x xs)
let ?w' = "w @ [x]"
have step: "ac_step pats q x = ac_state pats ?w'"
using Cons.prems ac_state_snoc[of pats w x] by simp
have rec:
"ac_search_states_from pats (ac_state pats ?w') (Suc i) xs =
ac_search_from pats ?w' (Suc i) xs"
using Cons.IH[where w="?w'" and q="ac_state pats ?w'" and i="Suc i"] by simp
show ?case
using step rec by (simp add: Let_def)
qed
theorem ac_search_states_eq:
"ac_search_states pats text = ac_search pats text"
unfolding ac_search_states_def ac_search_def
by (rule ac_search_states_from_eq) simp
lemma ac_search_fail_from_eq:
assumes "q = ac_state pats w"
shows "ac_search_fail_from pats q i xs = ac_search_from pats w i xs"
using assms
proof (induction xs arbitrary: w q i)
case Nil
then show ?case by simp
next
case (Cons x xs)
let ?w' = "w @ [x]"
have q_state: "q ∈ set (ac_states pats)"
using Cons.prems ac_state_in_states by simp
have step: "ac_fail_step pats q x = ac_state pats ?w'"
using ac_fail_step_refines[OF q_state, of x] Cons.prems ac_state_snoc[of pats w x]
by simp
have rec:
"ac_search_fail_from pats (ac_state pats ?w') (Suc i) xs =
ac_search_from pats ?w' (Suc i) xs"
using Cons.IH[where w="?w'" and q="ac_state pats ?w'" and i="Suc i"] by simp
show ?case
using step rec by (simp add: Let_def)
qed
theorem ac_search_fail_eq:
"ac_search_fail pats text = ac_search pats text"
unfolding ac_search_fail_def ac_search_def
by (rule ac_search_fail_from_eq) simp
subsection ‹Executable example›
text ‹
The following concrete input exercises overlapping dictionary patterns for
both executable search procedures. The ‹eval› proof method compiles the
definitions to ML, evaluates them, and checks the resulting equations.
›
lemma ac_search_states_example:
"ac_search_states [[0], [0,1], [1,0,1], [1,2], [1,2,0], [2], [2,0,0]]
[0,1,2,2,0,1::nat] =
[([0], 0), ([0, 1], 1), ([1, 2], 2), ([2], 2), ([2], 3), ([0], 4), ([0, 1], 5)]"
by eval
lemma ac_search_fail_example:
"ac_search_fail [[0], [0,1], [1,0,1], [1,2], [1,2,0], [2], [2,0,0]]
[0,1,2,2,0,1::nat] =
[([0], 0), ([0, 1], 1), ([1, 2], 2), ([2], 2), ([2], 3), ([0], 4), ([0, 1], 5)]"
by eval
definition occurs_ending_at :: "'a list ⇒ 'a list ⇒ nat ⇒ bool" where
"occurs_ending_at pat text i ⟷
i < length text ∧ suffix pat (take (Suc i) text)"
lemma in_ac_search_from_iff:
"(p, k) ∈ set (ac_search_from pats w i xs) ⟷
(∃j < length xs.
k = i + j ∧
p ∈ set pats ∧
suffix p (ac_state pats (w @ take (Suc j) xs)))"
proof (induction xs arbitrary: w i k)
case Nil
then show ?case by simp
next
case (Cons x xs)
let ?w' = "w @ [x]"
show ?case (is "?lhs ⟷ ?rhs")
proof
assume ?lhs
then have mem:
"(p, k) ∈
set (map (λp. (p, i)) (ac_output pats (ac_state pats ?w')) @
ac_search_from pats ?w' (Suc i) xs)"
by (simp add: Let_def)
have out_or_rec:
"p ∈ set (ac_output pats (ac_state pats ?w')) ∧ k = i ∨
(p, k) ∈ set (ac_search_from pats ?w' (Suc i) xs)"
using mem by auto
then show ?rhs
proof
assume "p ∈ set (ac_output pats (ac_state pats ?w')) ∧ k = i"
then have p: "p ∈ set pats"
and k: "k = i"
and s: "suffix p (ac_state pats ?w')"
by (auto simp: ac_output_def)
then show ?rhs
using p k s by (intro exI[of _ 0]) simp
next
assume rec: "(p, k) ∈ set (ac_search_from pats ?w' (Suc i) xs)"
then obtain j where j: "j < length xs"
and k: "k = Suc i + j"
and p: "p ∈ set pats"
and s: "suffix p (ac_state pats (?w' @ take (Suc j) xs))"
using Cons.IH[where w="?w'" and i="Suc i" and k=k] by auto
have "?w' @ take (Suc j) xs = w @ take (Suc (Suc j)) (x # xs)"
by simp
moreover have "k = i + Suc j"
using k by simp
ultimately show ?rhs
using j p s by (intro exI[of _ "Suc j"]) auto
qed
next
assume ?rhs
then obtain j where j: "j < length (x # xs)"
and k: "k = i + j"
and p: "p ∈ set pats"
and s: "suffix p (ac_state pats (w @ take (Suc j) (x # xs)))"
by auto
show ?lhs
proof (cases j)
case 0
then have "p ∈ set (ac_output pats (ac_state pats ?w'))"
using p s by (simp add: ac_output_def)
with 0 k show ?thesis by (simp add: Let_def)
next
case (Suc j')
then have j': "j' < length xs"
using j by simp
have "?w' @ take (Suc j') xs = w @ take (Suc j) (x # xs)"
using Suc by simp
then have rec_suffix:
"suffix p (ac_state pats (?w' @ take (Suc j') xs))"
using s by simp
have "(p, k) ∈ set (ac_search_from pats ?w' (Suc i) xs)"
using Cons.IH[where w="?w'" and i="Suc i" and k=k] j' k p rec_suffix Suc by auto
then show ?thesis by (simp add: Let_def)
qed
qed
qed
theorem ac_search_correct:
"set (ac_search pats text) =
{(p, i). p ∈ set pats ∧ occurs_ending_at p text i}"
proof
show "set (ac_search pats text) ⊆
{(p, i). p ∈ set pats ∧ occurs_ending_at p text i}"
proof
fix x
assume xin: "x ∈ set (ac_search pats text)"
obtain p i where x: "x = (p, i)"
by (cases x)
have pin: "(p, i) ∈ set (ac_search pats text)"
using xin by (simp add: x)
then have pin': "(p, i) ∈ set (ac_search_from pats [] 0 text)"
by (simp add: ac_search_def)
then obtain j where j: "j < length text"
and i: "i = j"
and p: "p ∈ set pats"
and s: "suffix p (ac_state pats (take (Suc j) text))"
by (auto simp: in_ac_search_from_iff)
have "suffix p (take (Suc j) text)"
using pattern_suffix_ac_state_iff[OF p, of "take (Suc j) text"] s by simp
then have "suffix p (take (Suc i) text)"
using i by simp
with j i p show "x ∈ {(p, i). p ∈ set pats ∧ occurs_ending_at p text i}"
by (auto simp: x occurs_ending_at_def)
qed
next
show "{(p, i). p ∈ set pats ∧ occurs_ending_at p text i} ⊆
set (ac_search pats text)"
proof
fix x
assume xin: "x ∈ {(p, i). p ∈ set pats ∧ occurs_ending_at p text i}"
obtain p i where x: "x = (p, i)"
by (cases x)
from xin have p: "p ∈ set pats"
and i: "i < length text"
and s: "suffix p (take (Suc i) text)"
by (auto simp: x occurs_ending_at_def)
have "suffix p (ac_state pats (take (Suc i) text))"
using pattern_suffix_ac_state_iff[OF p, of "take (Suc i) text"] s by simp
with p i have "(p, i) ∈ set (ac_search_from pats [] 0 text)"
by (auto simp: in_ac_search_from_iff)
then show "x ∈ set (ac_search pats text)"
by (simp add: x ac_search_def)
qed
qed
theorem ac_search_states_correct:
"set (ac_search_states pats text) =
{(p, i). p ∈ set pats ∧ occurs_ending_at p text i}"
by (simp add: ac_search_states_eq ac_search_correct)
theorem ac_search_fail_correct:
"set (ac_search_fail pats text) =
{(p, i). p ∈ set pats ∧ occurs_ending_at p text i}"
by (simp add: ac_search_fail_eq ac_search_correct)
end