Theory Simple_Tabulation_Hashing

theory Simple_Tabulation_Hashing
imports
  Universal_Hash_Families.Universal_Hash_Families_More_Product_PMF
  Dependent_Product
  Xor_Inst
  Vec_Extras
begin

unbundle Xor.xor_syntax
unbundle Fixed_Length_Vector.vec_syntax
unbundle no Finite_Cartesian_Product.vec_syntax (* imported from HOL-Analysis *)
hide_type Finite_Cartesian_Product.vec

subsection ‹Intro rules, contrapositives and bijections›
lemma (in prob_space) k_wise_indep_varsI:
  assumes "⋀a J. ⟦ J ⊆ I; card J ≤ k; finite J ⟧ ⟹
     prob {ω. ∀i∈J. X i ω = a i} = (∏i∈J. prob {ω. X i ω = a i})"
          "M = measure_pmf p"
  shows "k_wise_indep_vars k (λ_. count_space UNIV) X I"
  apply (simp only: k_wise_indep_vars_def prob_space_axioms)
  apply (clarsimp, rule indep_vars_pmf, rule assms(2))
  by (metis (no_types, lifting) assms(1) card_mono order_trans_rules(23))

(* used for the proof of non-4-independence *)
lemma (in prob_space) indep_vars_pmf_contrapos:
  assumes "prob {ω. ∀x∈J. P x (X' x ω)} ≠ (∏x∈J. prob {ω. P x (X' x ω)})"
          "finite J" "M = measure_pmf p"
  shows "¬ indep_vars (λ_. count_space UNIV) X' J"
  apply (rule contrapos_nn[OF assms(1)])
  using assms by (intro split_indep_events; force)

locale (*tag:important*) bij_betw_funcsetE begin

text ‹These lemmas prove bijections between sets of 2-arity functions and their decompositions

For example, a function $f: [0,100] \rightharpoonup \{A,B,C\} \rightharpoonup \{True,False\}$ can be decomposed into 3 parts:

$\begin{gathered}
[0,100]\rightharpoonup\{A,B,C\}\rightharpoonup\{True,False\} \ \\
\rightleftharpoons [1,100]\rightharpoonup\{A,B,C\}\rightharpoonup\{True,False\} \times \\
\{A,C\}\rightharpoonup\{True,False\} \times \\
\{True,False\} \
\end{gathered}$

\begin{itemize}
  \item base: $[1,100] \rightharpoonup \{A,B,C\} \rightharpoonup \{True,False\}$ (split the 0-index, leaving 1-100 intact)
  \item $z_0$: $\{A,C\} \rightharpoonup \{True,False\}$ (represents the 0-index, but further split the B-index)
  \item $z_b$: $B \in \{True,False\}$ (decides the value for $f\ 0\ B$)
\end{itemize}

$f$ is equivalent to its reconstruction, $f = \mathop{base}(0 := z_0(B := z_b))$›

context
  fixes α and A B :: "_ set"
  assumes α_in_A: "α ∈ A"
begin

lemma bij_PiE_remove_point:
  "bij_betw (λ (f, v). f(α := v)) ((A - {α} →⇩_E B) × B) (A →⇩_E B)"
proof (intro bij_betw_imageI inj_onI, goal_cases "inj" "surj")
  case (inj _ _)
  then show ?case
    apply (simp add: case_prod_beta' image_iff prod_eq_iff set_eq_iff mem_Times_iff fun_eq_iff)
    by (metis Diff_iff PiE_arb insertCI)
next
  case surj
  then show ?case
  proof (intro equalityI subsetI)
    fix h assume "h ∈ A →⇩_E B"
    with α_in_A have "h α ∈ B" "h(α := undefined) ∈ A - {α} →⇩_E B"
      by (auto simp: PiE_def extensional_def)
    then show "h ∈ (λ (f, v). f(α := v)) ` ((A - {α} →⇩_E B) × B)"
      apply (simp add: case_prod_beta' image_iff)
      by (metis fun_upd_upd fun_upd_triv)
  next
    fix h assume "h ∈ (λ(f, v). f(α := v)) ` ((A - {α} →⇩_E B) × B)"
    with α_in_A show "h ∈ A →⇩_E B"
      apply (simp add: case_prod_beta' image_iff)
      by (metis insert_Diff PiE_fun_upd)
  qed
qed

text ‹
  Helper: "Absorb" a point into a function space bijection.
  This allows chaining: if we have a bijection $F$ for a smaller domain $S \rightarrow (A-\{x\}\rightarrow B)$,
  we can automatically get a bijection for $S\ x\ B \rightarrow (A\rightarrow B)$.
›
lemma bij_absorb_point:
  assumes "bij_betw F S (A - {α} →⇩_E B)"
  shows "bij_betw (λ (s, v). (F s)(α := v)) (S × B) (A →⇩_E B)"
proof -
  ― ‹1. Combine $F$ with Identity on $B$›
  from assms have "bij_betw (map_prod F id) (S × B) ((A - {α} →⇩_E B) × B)"
    by (blast intro: bij_betw_map_prod)
  ― ‹2. Apply the base removal lemma›
  with bij_PiE_remove_point show ?thesis
    by (auto
      elim: bij_betw_trans[simplified comp_def, elim_format]
      simp: map_prod_def case_prod_beta')
qed

text ‹
  Helper 2: "Lift Value"
  Attaches a computed value $V$ to the function at point $x$.
  Used for the outer function (e.g., @{term "a(α := V)"}).
›
lemma bij_lift_value:
  assumes "bij_betw V S B"
  shows "bij_betw (λ(f, s). f(α := V s)) ((A - {α} →⇩_E B) × S) (A →⇩_E B)"
proof -
  have "bij_betw (map_prod id V) ((A - {α} →⇩_E B) × S) ((A - {α} →⇩_E B) × B)"
    using assms by (simp add: bij_betw_map_prod)
  with bij_PiE_remove_point show ?thesis
    by (auto
      elim: bij_betw_trans[simplified comp_def, elim_format]
      simp: map_prod_def case_prod_beta')
qed

end

text ‹Tuple Association Helper: $((A, B), C) \longleftrightarrow (A, B, C)$›
lemma bij_assoc_right_to_left:
  "bij_betw (λ (a, b, c). ((a, b), c)) (A × B × C) ((A × B) × C)"
  by (auto intro: bij_betwI[where g="λ ((a, b), c). (a, b, c)"])

lemma bij_assoc_left_to_right:
  "bij_betw (λ((a, b), c). (a, b, c)) ((A × B) × C) (A × B × C)"
  by (auto intro: bij_betwI[where g="λ(a, b, c). ((a, b), c)"])

lemma bij_betw_combine_right:
  assumes "bij_betw f B C"
  shows "bij_betw (λ(a, b). (a, f b)) (A × B) (A × C)"
  using assms by (simp flip: map_prod_def id_def add: case_prod_beta' bij_betw_map_prod)

subsubsection ‹Application›

lemma remove1_remove1:
  assumes "α ∈ A" "β ∈ B" "finite A" "finite B" "finite C" "C ≠ {}"
  shows "bij_betw
      (λ(a,b,c). a(α := b(β := c)))
      ((A - {α} →⇩_E B →⇩_E C) × (B - {β} →⇩_E C) × C)
      (A →⇩_E B →⇩_E C)"
proof -
  ― ‹1. Establish bijection for the value: @{term "b(β := c)"}›
  have bij_val: "bij_betw (λ(b, c). b(β := c)) ((B - {β} →⇩_E C) × C) (B →⇩_E C)"
    using bij_PiE_remove_point[OF `β ∈ B`] by auto

  ― ‹2. Lift this value to $A$›
  have bij_lift: "bij_betw (λ(a, (b,c)). a(α := b(β := c)))
                  ((A - {α} →⇩_E B →⇩_E C) × ((B - {β} →⇩_E C) × C))
                  (A →⇩_E B →⇩_E C)"
    using bij_lift_value[OF `α ∈ A` bij_val] by (auto simp: case_prod_beta')

  ― ‹3. Fix tuple associativity $(a,b,c) \rightarrow (a,(b,c))$›
  have bij_assoc: "bij_betw (λ(a,b,c). (a,(b,c)))
                   ((A - {α} →⇩_E B →⇩_E C) × (B - {β} →⇩_E C) × C)
                   ((A - {α} →⇩_E B →⇩_E C) × ((B - {β} →⇩_E C) × C))"
    by (simp add: bij_betwI[where g="λ(a, (b, c)). (a, b, c)"])

  show ?thesis
    using bij_betw_trans[OF bij_assoc bij_lift] by (simp add: case_prod_beta)
qed

lemma remove1_remove2:
  assumes "β1 ≠ β2" "α ∈ A" "β1 ∈ B" "β2 ∈ B"
          "finite A" "finite B" "finite C" "C ≠ {}"
  shows "bij_betw
      (λ(a,b,c,d). a(α := b(β1 := c, β2 := d)))
      ((A - {α} →⇩_E B →⇩_E C) × (B - {β1,β2} →⇩_E C) × C × C)
      (A →⇩_E B →⇩_E C)"
proof -
  let ?S_B = "(B - {β1,β2} →⇩_E C) × C × C"

  ― ‹1. Build bijection for value: @{term "b(β1 := c, β2 := d)"}›
  ― ‹We use @{thm [source] bij_absorb_point} recursively›

  have "β1 ∈ B - {β2}" using assms by simp
  ― ‹Start with remove\_point for @{term "β1"}›
  have bij_B_start: "bij_betw (λ(b, c). b(β1 := c))
                     ((B - {β1,β2} →⇩_E C) × C) (B - {β2} →⇩_E C)"
    using bij_PiE_remove_point[OF `β1 ∈ B - {β2}`] by (metis Diff_insert)

  ― ‹Absorb @{term "β2"}›
  have bij_B_next: "bij_betw (λ((b, c), d). b(β1 := c, β2 := d))
                    (((B - {β1,β2} →⇩_E C) × C) × C) (B →⇩_E C)"
    using bij_absorb_point[OF `β2 ∈ B` bij_B_start] by (simp add: case_prod_beta')

  ― ‹Flatten tuple for $B$›
  have bij_B_val: "bij_betw (λ(b,c,d). b(β1 := c, β2 := d)) ?S_B (B →⇩_E C)"
    using bij_betw_trans[OF bij_assoc_right_to_left bij_B_next]
    by (simp add: case_prod_beta' comp_def)

  ― ‹2. Lift to $A$›
  have bij_lift: "bij_betw (λ(a, (b,c,d)). a(α := b(β1 := c, β2 := d)))
                  ((A - {α} →⇩_E B →⇩_E C) × ?S_B) (A →⇩_E B →⇩_E C)"
    using bij_lift_value[OF `α ∈ A` bij_B_val] by (simp add: case_prod_beta' comp_def)

  ― ‹3. Fix tuple associativity›
  have bij_assoc: "bij_betw (λ(a,b,c,d). (a,(b,c,d)))
                   ((A - {α} →⇩_E B →⇩_E C) × ?S_B)
                   ((A - {α} →⇩_E B →⇩_E C) × ?S_B)"
    by (simp add: bij_betwI [where g="λ(a, (b,c,d)). (a,b,c,d)"])

  show ?thesis
    using bij_betw_trans[OF bij_assoc bij_lift]  by (simp add: case_prod_beta)
qed

lemma remove1_remove3:
  assumes "β1 ≠ β2" "β2 ≠ β3" "β3 ≠ β1" "α ∈ A" "β1 ∈ B" "β2 ∈ B" "β3 ∈ B"
          "finite A" "finite B" "finite C" "C ≠ {}"
  shows "bij_betw
      (λ(a,b,c,d,e). a(α := b(β1 := c, β2 := d, β3 := e)))
      ((A - {α} →⇩_E B →⇩_E C) × (B - {β1,β2,β3} →⇩_E C) × C × C × C)
      (A →⇩_E B →⇩_E C)"
proof -
  let ?DomB3 = "B - {β1,β2,β3} →⇩_E C"
  let ?S_B = "?DomB3 × C × C × C"

  ― ‹1. Build bijection for value $B$ recursively›

  ― ‹Step 1: Base @{term "β1"}›
  from assms bij_PiE_remove_point[of "β1" "B - {β3, β2}"]
  have "bij_betw (λ(b,c). b(β1:=c)) (?DomB3 × C) (B - {β3, β2} →⇩_E C)"
    by (auto simp flip: Diff_insert simp: insert_commute)

  ― ‹Step 2: Absorb @{term "β2"}›
  with assms bij_absorb_point[of "β2" "B - {β3}"]
  have "bij_betw (λ((b,c),d). b(β1:=c, β2:=d)) ((?DomB3 × C) × C) (B - {β3} →⇩_E C)"
    by (auto simp flip: Diff_insert simp: case_prod_beta' insert_commute)

  ― ‹Step 3: Absorb @{term "β3"}›
  with assms bij_absorb_point[of "β3" B]
  have bij_3: "bij_betw (λ(((b,c),d),e). b(β1:=c, β2:=d, β3:=e)) (((?DomB3 × C) × C) × C) (B →⇩_E C)"
    by (auto simp: case_prod_beta' insert_commute)

  moreover have "bij_betw (λ(b,c,d,e). (((b,c),d),e)) ?S_B (((?DomB3 × C) × C) × C)"
    by (auto intro: bij_betwI[where g="λ(((b,c),d),e). (b,c,d,e)"])

  ― ‹Step 4: Flatten tuple for $B$›
  ultimately have "bij_betw (λ(b,c,d,e). b(β1:=c, β2:=d, β3:=e)) ?S_B (B →⇩_E C)"
    by (auto
      elim: bij_betw_trans[simplified comp_def, elim_format]
      simp: case_prod_beta')

  ― ‹2. Lift to $A$›
  with bij_lift_value assms
  have bij_lift: "bij_betw (λ(a, (b,c,d,e)). a(α := b(β1:=c, β2:=d, β3:=e)))
                  ((A - {α} →⇩_E B →⇩_E C) × ?S_B) (A →⇩_E B →⇩_E C)"
    by (simp add: case_prod_beta')

  ― ‹3. Fix tuple associativity›
  have bij_assoc: "bij_betw (λ(a,b,c,d,e). (a,(b,c,d,e)))
                   ((A - {α} →⇩_E B →⇩_E C) × ?S_B)
                   ((A - {α} →⇩_E B →⇩_E C) × ?S_B)"
    by (auto intro: bij_betwI [where g="λ(a, (b,c,d,e)). (a,b,c,d,e)"])

  with bij_betw_trans[OF bij_assoc bij_lift] show ?thesis
    by (simp add: case_prod_beta)
qed

lemma remove2_remove2'remove1:
  assumes "α1 ≠ α2" "β1 ≠ β2" "α1 ∈ A" "α2 ∈ A" "β1 ∈ B" "β2 ∈ B" "β3 ∈ B"
          "finite A" "finite B" "finite C" "C ≠ {}"
  shows "bij_betw
      (λ(a,b,c,d,e,f). a(α1 := b(β1 := d, β2 := e), α2 := c(β3 := f)))
      ((A - {α1, α2} →⇩_E B →⇩_E C) × (B - {β1, β2} →⇩_E C) × (B - {β3} →⇩_E C) × C × C × C)
      (A →⇩_E B →⇩_E C)"
proof -
  let ?DomA = "A - {α1, α2} →⇩_E B →⇩_E C"
  let ?DomB1 = "B - {β1, β2} →⇩_E C"
  let ?DomB2 = "B - {β3} →⇩_E C"

  have "bij_betw (λ(b,d,e). ((b,d),e)) (?DomB1 × C × C) ((?DomB1 × C) × C)"
    by (auto intro: bij_betwI[where g="λ((b,d),e). (b,d,e)"])

  moreover from assms bij_absorb_point[of "β2" "B"] bij_PiE_remove_point[of "β1" "B - {β2}"]
  have "bij_betw (λ((b,d),e). b(β1:=d, β2:=e)) ((?DomB1 × C) × C) (B →⇩_E C)"
    by (auto simp flip: Diff_insert simp: case_prod_beta')

  ― ‹1. Val 1: @{term "b(β1 := d, β2 := e)"}›
  ultimately have "bij_betw (λ(b, d, e). b(β1 := d, β2 := e)) (?DomB1 × C × C) (B →⇩_E C)"
    by (auto elim: bij_betw_trans[simplified comp_def, elim_format] simp: case_prod_beta')

  ― ‹2. Val 2: @{term "c(β3 := f)"}›
  moreover from bij_PiE_remove_point[of "β3" B] assms
  have "bij_betw (λ(c, f). c(β3 := f)) (?DomB2 × C) (B →⇩_E C)" by auto

  ― ‹3. Combine values (Product Step)›
  ― ‹We explicitly form the tuple structure $((B1,C,C), (B2,C))$›
  ultimately have bij_vals: "bij_betw
      (λ((b,d,e), (c,f)). (b(β1:=d, β2:=e), c(β3:=f)))
      ((?DomB1 × C × C) × (?DomB2 × C))
      ((B →⇩_E C) × (B →⇩_E C))"
    by (fastforce dest: bij_betw_map_prod simp add: map_prod_def id_def case_prod_beta')

  ― ‹4. Lift to Parallel›
  from bij_betw_map_prod[OF bij_betw_id bij_vals] have bij_parallel:
    "bij_betw
      (λ(a, rest). (a, (λ((b,d,e), (c,f)). (b(β1:=d, β2:=e), c(β3:=f))) rest))
      (?DomA × ((?DomB1 × C × C) × (?DomB2 × C)))
      (?DomA × ((B →⇩_E C) × (B →⇩_E C)))"
    by (auto simp: map_prod_def id_def case_prod_beta')

  have bij_A_step1: "bij_betw (λ((a, v1), v2). (a(α1 := v1), v2))
                 ((?DomA × (B →⇩_E C)) × (B →⇩_E C)) ((A - {α2} →⇩_E B →⇩_E C) × (B →⇩_E C))"
    using assms bij_lift_value[of "α1" "A - {α2}"]
    by (force
      intro: bij_betw_map_prod simp flip: map_prod_def id_def Diff_insert
      simp: case_prod_beta')

  have bij_A_step2: "bij_betw (λ(a, v2). a(α2 := v2))
                 ((A - {α2} →⇩_E B →⇩_E C) × (B →⇩_E C)) (A →⇩_E B →⇩_E C)"
    using assms bij_PiE_remove_point[of "α2" "A"] by (auto simp: case_prod_beta')

  ― ‹5. Explicit Shuffle›
  have bij_shuffle: "bij_betw (λ(a, b, c, d, e, f). (a, (b, d, e), (c, f)))
      (?DomA × ?DomB1 × ?DomB2 × C × C × C)
      (?DomA × (?DomB1 × C × C) × (?DomB2 × C))"
    by (auto intro: bij_betwI[where g="λ(a, (b, d, e), (c, f)). (a, b, c, d, e, f)"])

  ― ‹6. Final Chain - broken into steps to avoid unification issues›
  have "bij_betw (λ(a, (b,d,e), (c,f)). ((a, b(β1:=d, β2:=e)), c(β3:=f)))
        (?DomA × (?DomB1 × C × C) × (?DomB2 × C))
        ((?DomA × (B →⇩_E C)) × (B →⇩_E C))"
  proof -
    have "bij_betw (λ(a, v1, v2). ((a, v1), v2))
          (?DomA × (B →⇩_E C) × (B →⇩_E C)) ((?DomA × (B →⇩_E C)) × (B →⇩_E C))"
      by (rule bij_betwI [where g="λ((a,v1),v2). (a,v1,v2)"]) auto
    with bij_parallel show ?thesis
      by (auto elim: bij_betw_trans[simplified comp_def, elim_format] simp: case_prod_beta')
  qed

  with bij_shuffle have
    "bij_betw (λ(a, b, c, d, e, f). ((a, b(β1:=d, β2:=e)), c(β3:=f)))
      (?DomA × ?DomB1 × ?DomB2 × C × C × C)
      ((?DomA × (B →⇩_E C)) × (B →⇩_E C))"
    by (auto elim: bij_betw_trans[simplified comp_def, elim_format] simp: case_prod_beta')

  moreover from bij_A_step1 bij_A_step2 have
    "bij_betw (λ((a, v1), v2). a(α1 := v1, α2 := v2))
      ((?DomA × (B →⇩_E C)) × (B →⇩_E C))
      (A →⇩_E B →⇩_E C)"
    by (auto elim: bij_betw_trans[simplified comp_def, elim_format] simp: case_prod_beta')

  ultimately show ?thesis
    by (auto elim: bij_betw_trans[simplified comp_def, elim_format] simp: case_prod_beta')
qed

end (* locale bij_prod *)

section ‹Definitions›

locale simple_tabulation_hashing =
  fixes n :: "'n :: {index1, zero} itself" ― ‹key length, i.e. 4 for 4 'fragments›
  fixes r :: "'result :: {abel_group_xor, finite} itself" ― ‹bool or any type that supports xor›
  fixes q :: "'q :: index1 itself" ― ‹digest length, i.e. 3›
  fixes d :: "'fragment :: finite itself" ― ‹domain of input key fragments›
  assumes n: "n = TYPE('n)"
begin

text ‹
In tabulation hashing, keys are split into \emph{n} fragments, each piece hashed individually,
before being combined into a final digest with the XOR operator.
  i.e. $x \equiv [x_0,x_1,x_2]$, $h(x) \equiv h_0 x_0 \oplus h_1 x_1 \oplus h_2 x_2$

Therefore, the type variable 'n, CARD('n) and n must reflect the number of parts in a key. In the
example above, 'n = 3 (which is an alias for num1 bit1)

The final digest length is indicated by the type variable 'q, and 'result is any type that supports
xor. For example, 'result = bool. When using 'result = bool and 'q = 3, it can be interpreted as
using a 3-bit word as the output, which can encode $2^3=8$ possibilities, e.g. [True, False, True]›

abbreviation cardn :: "'n itself ⇒ nat" where "cardn _ ≡ CARD('n)"

abbreviation N :: "'n itself ⇒ nat set" where "N type ≡ {..<cardn type}"

abbreviation D :: "'fragment set" where "D ≡ UNIV"

abbreviation Dn :: "('fragment, 'n) vec set" (‹D⇧ⁿ›) where
  ― ‹domain of all possible input keys s.t. each key is composed of exactly *n* fragments›
  "D⇧ⁿ ≡ UNIV"

abbreviation R :: "('result, 'q) vec set" where
  ― ‹range of all possible outputs, s.t. each output is composed of exactly *q* bits›
  "R ≡ UNIV"

abbreviation H :: "'n itself ⇒ (nat ⇒ 'fragment ⇒ ('result, 'q) vec) set" where
  ― ‹set of all possible assignments and combinations for every column-item pair›
  "H type ≡ N type →⇩_E D →⇩_E R"

definition tb_S :: "'n itself ⇒ (nat ⇒ 'fragment ⇒ ('result, 'q) vec) pmf" where
  ― ‹Generator of instances of H›
  "tb_S _ ≡ pmf_of_set (H n)"

definition tb_H :: "('fragment, 'n) vec ⇒ (nat ⇒ 'fragment ⇒ ('result, 'q) vec) ⇒ ('result, 'q) vec" where
  ― ‹Applies the tabulation hashing algorithm on *k* using a hash function from H›
  "tb_H k h ≡ xor_fold (map (λi. h i (k $ to_index i)) ([0..<cardn n]))"

lemma tb_S_alt_def:
  shows "tb_S n = prod_pmf (N n) (λ_. prod_pmf UNIV (λ_. pmf_of_set R))"
  unfolding tb_S_def
  by (simp add: Pi_pmf_of_set PiE_defaut_undefined_eq)

lemma tb_H_absorb:
  assumes "i < CARD('n)"
  shows "tb_H x h ⊕ c = tb_H x (h(i := (h i)(x $ to_index i := h i (x $ to_index i) ⊕ c)))"
  unfolding tb_H_def
  by (auto intro!: arg_cong[where f = xor_fold] simp: fold_absorb[of i] assms)

lemma tb_H_absorb':
  shows "tb_H x h ⊕ c =
    tb_H x (h(from_index i := (h (from_index i))(x $ i := h (from_index i) (x $ i) ⊕ c)))"
  by (metis from_index_bound from_to_index tb_H_absorb)

lemma tb_H_extract:
  assumes "i < CARD('n)"
  shows "tb_H x (h(i := g(x $ to_index i := α))) = tb_H x (h(i := g(x $ to_index i := 0))) ⊕ α"
  by (subst tb_H_absorb[OF assms]) simp

lemma tb_H_extract':
  shows "tb_H x (h(from_index i := g(x $ i := α))) = tb_H x (h(from_index i := g(x $ i := 0))) ⊕ α"
  by (metis from_index_bound from_to_index tb_H_extract)

lemma tb_H_exch:
  assumes "i < CARD('n)"
  shows "tb_H x (a(i := aa(x $ to_index i := b))) = α ⟷
     b = tb_H x (a(i := aa(x $ to_index i := α)))"
  apply (subst (1 2) tb_H_extract[OF assms])
  by (metis (no_types, lifting) assoc abel_group_xor_class.eq_iff)

lemma (*tag:important*) tb_H_exch':
  shows "tb_H x (a(from_index i := aa(x $ i := b))) = α ⟷
     b = tb_H x (a(from_index i := aa(x $ i := α)))"
  by (metis from_index_bound from_to_index tb_H_exch)

lemma (*tag:important*) tb_H_discard:
  assumes "x $ i ≠ y $ i"
  shows "tb_H x (a(from_index i := b(y $ i := c))) = tb_H x (a(from_index i := b))"
  unfolding tb_H_def
  apply (intro arg_cong[where ?f = "xor_fold"])
  by (simp_all add: assms)

section ‹Proofs›

subsection ‹Proof of 1-independence and uniformity›

lemma (*tag:important*) prob_tb_H_bin:
  shows "measure_pmf.prob (tb_S n) {h. tb_H x h = α} = 1 / real (card R)" (is "?lhs = ?rhs")
proof -
  let ?N = "N n"
  let ?H = "H n"
  let ?tb_S = "tb_S n"
  let ?x0 = "x $ to_index 0"

  note finite_PiE[simp] PiE_eq_empty_iff[simp] measure_pmf_single[simp]

  have bij:
    "bij_betw (λ(a, b, c). a(0 := b(?x0 := c)))
      ((?N - {0} →⇩_E D →⇩_E R) × (D - {?x0} →⇩_E R) × R)
      ?H"
    by (fastforce intro!: bij_betw_funcsetE.remove1_remove1)

  let ?destructure =
    "pair_pmf (pmf_of_set (?N - {0} →⇩_E D →⇩_E R))
      (pair_pmf (pmf_of_set (D - {?x0} →⇩_E R))
        (pmf_of_set R))"

  have asmap: "?tb_S =
     map_pmf (λ(a, b, c). a(0 := b(?x0 := c))) ?destructure"
    unfolding tb_S_def
    by (simp add: bij flip: pmf_of_set_prod_eq map_pmf_of_set_bij_betw del: PiE_UNIV)

  have "?lhs =
    measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λZ.
          {tb_H x (h(0 := Z(?x0 := α)))})))"
    unfolding asmap measure_map_pmf by (clarsimp intro!: measure_pmf_cong tb_H_exch)

  also have "… = ?rhs" by (simp add: measure_pmf_prob_dependent_product_bound_eq')

  finally show ?thesis .
qed

theorem uniform:
  shows "prob_space.uniform_on (tb_S n) (tb_H key) R"
  using prob_tb_H_bin by (auto intro!: measure_pmf.uniform_onI)

subsection ‹Proof of two-independence›

lemma prob_tb_H_bin1_bin2:
  assumes "x ≠ y"
  shows "measure_pmf.prob (tb_S n) {h. tb_H x h = α ∧ tb_H y h = β} = 1 / real (card R * card R)"
  (is "?lhs = ?rhs")
proof -
  let ?N = "N n"
  let ?H = "H n"
  let ?tb_S = "tb_S n"

  note finite_PiE[simp] PiE_eq_empty_iff[simp] measure_pmf_single[simp]

  obtain i where i: "x $ i ≠ y $ i" by (meson assms vec_cong)

  let ?xi = "x $ i"
  let ?yi = "y $ i"

  have bij:
    "bij_betw (λ(a, b, c, d). a(from_index i := b(?xi := c, ?yi := d)))
      ((?N - {from_index i} →⇩_E D →⇩_E R) × (D - {?xi, ?yi} →⇩_E R) × R × R)
      ?H"
    using i by (intro bij_betw_funcsetE.remove1_remove2) simp_all

  let ?destructure =
    "pair_pmf (pmf_of_set (?N - {from_index i} →⇩_E D →⇩_E R))
      (pair_pmf (pmf_of_set (D - {?xi, ?yi} →⇩_E R))
        (pmf_of_set (R × R)))"

  have asmap: "?tb_S =
     map_pmf (λ(a, b, c, d). a(from_index i := b(?xi := c, ?yi := d))) ?destructure"
    unfolding tb_S_def
    by (simp add: bij flip: pmf_of_set_prod_eq map_pmf_of_set_bij_betw
        del: PiE_UNIV UNIV_Times_UNIV)

  have "?lhs =
    measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λI.
          {(xi',yi'). tb_H x (h(from_index i := I(?xi := xi', ?yi := yi'))) = α ∧
                      tb_H y (h(from_index i := I(?xi := xi', ?yi := yi'))) = β })))"
    unfolding asmap measure_map_pmf by (auto intro: measure_pmf_cong)

  also from i have "… =
    measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λI.
          {(tb_H x (h(from_index i := I(?xi := α, ?yi := β))),
            tb_H y (h(from_index i := I(?xi := α, ?yi := β))))})))"
    apply (clarsimp intro!: measure_pmf_cong simp: tb_H_discard tb_H_exch')
    by (smt (verit, ccfv_threshold) fun_upd_twist tb_H_discard)

  also have "… = ?rhs" by (simp add: measure_pmf_prob_dependent_product_bound_eq')

  finally show ?thesis .
qed

subsection ‹Proof of 3-independence›

lemma prob_3same_conv:
  assumes "x $ i ≠ y $ i" "y $ i ≠ z $ i" "z $ i ≠ x $ i"
  shows "measure_pmf.prob (tb_S n) {h. tb_H x h = α ∧ tb_H y h = β ∧ tb_H z h = γ} =
    1 / (real (card R) * real (card R) * real (card R))" (is "?lhs = ?rhs")
proof -
  let ?N = "N n"
  let ?H = "H n"
  let ?tb_S = "tb_S n"
  let ?xi = "x $ i"
  let ?yi = "y $ i"
  let ?zi = "z $ i"

  note finite_PiE[simp] PiE_eq_empty_iff[simp] measure_pmf_single[simp]

  from assms have bij:
    "bij_betw (λ(a, b, c, d, e). a(from_index i := b(?xi := c, ?yi := d, ?zi := e)))
      ((?N - {from_index i} →⇩_E D →⇩_E R) × (D - {?xi, ?yi, ?zi} →⇩_E R) × R × R × R)
      ?H"
    by (fastforce intro: bij_betw_funcsetE.remove1_remove3)

  let ?destructure =
    "(pair_pmf (pmf_of_set (?N - {from_index i} →⇩_E D →⇩_E R))
       (pair_pmf (pmf_of_set (D - {?xi, ?yi, ?zi} →⇩_E R))
         (pmf_of_set (R × R × R))))"

  have asmap: "?tb_S =
     map_pmf (λ(a, b, c, d, e). a(from_index i := b(?xi := c, ?yi := d, ?zi := e))) ?destructure"
    unfolding tb_S_def
    by (simp add: bij flip: pmf_of_set_prod_eq map_pmf_of_set_bij_betw
        del: PiE_UNIV UNIV_Times_UNIV)

  have "?lhs =
    measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
          Sigma UNIV (λI.
            {(xi',yi',zi').
              tb_H x (h(from_index i := I(?xi := xi', ?yi := yi', ?zi := zi'))) = α ∧
              tb_H y (h(from_index i := I(?xi := xi', ?yi := yi', ?zi := zi'))) = β ∧
              tb_H z (h(from_index i := I(?xi := xi', ?yi := yi', ?zi := zi'))) = γ})))"
    unfolding asmap measure_map_pmf by (auto intro: measure_pmf_cong)

  also from assms have "… =
    measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
          Sigma UNIV (λI.
            {(xi',yi',zi').
              tb_H x (h(from_index i := I(?xi := xi'))) = α ∧
              tb_H y (h(from_index i := I(?yi := yi'))) = β ∧
              tb_H z (h(from_index i := I(?zi := zi'))) = γ})))"
    unfolding asmap measure_map_pmf
    apply (clarsimp intro!: measure_pmf_cong simp: tb_H_discard)
    by (smt (verit, best) fun_upd_twist tb_H_discard)

  also have "… =
    (measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λI.
          {let xi' = tb_H x (h(from_index i := I(?xi := α)));
               yi' = tb_H y (h(from_index i := I(?yi := β)));
               zi' = tb_H z (h(from_index i := I(?zi := γ)))
           in (xi',yi',zi')}))))"
    by (auto simp add: tb_H_exch' intro: measure_pmf_cong conj_cong)

  also have "… = ?rhs" by (simp add: measure_pmf_prob_dependent_product_bound_eq')

  finally show ?thesis .
qed

lemma prob_2same_conv:
  assumes "x $ i ≠ y $ i" "z $ j ≠ x $ j" "i ≠ j"
      and "z $ i = x $ i"
  shows "measure_pmf.prob (tb_S n) {h. tb_H x h = α ∧ tb_H y h = β ∧ tb_H z h = γ} =
    1 / (real (card R) * real (card R) * real (card R))" (is "?lhs = ?rhs")
proof -
  let ?N = "N n"
  let ?H = "H n"
  let ?tb_S = "tb_S n"

  let ?xi = "x $ i"
  let ?yi = "y $ i"
  let ?zi = "z $ i"
  let ?xj = "x $ j"
  let ?yj = "y $ j"
  let ?zj = "z $ j"

  note finite_PiE[simp] PiE_eq_empty_iff[simp] measure_pmf_single[simp]

  from assms have bij:
    "bij_betw
      (λ(a, b, c, d, e, f). a(from_index i := b(?xi := d, ?yi := e), from_index j := c(?zj := f)))
      ((?N - {from_index i, from_index j} →⇩_E D →⇩_E R) ×
        (D - {?xi, ?yi} →⇩_E R) ×
        (D - {?zj} →⇩_E R) ×
        R × R × R)
      ?H"
    by (fastforce intro: bij_betw_funcsetE.remove2_remove2'remove1)

  let ?destructure = "
    (pair_pmf (pmf_of_set (?N - {from_index i, from_index j} →⇩_E D →⇩_E R))
      (pair_pmf (pmf_of_set (D - {?xi, ?yi} →⇩_E R))
        (pair_pmf (pmf_of_set (D - {?zj} →⇩_E R))
          (pmf_of_set (R × R × R)))))"

  have asmap: "?tb_S =
    map_pmf
      (λ(a, b, c, d, e, f). a(from_index i := b(?xi := d, ?yi := e), from_index j := c(?zj := f)))
      ?destructure"
    unfolding tb_S_def
    by (simp add: bij flip: pmf_of_set_prod_eq map_pmf_of_set_bij_betw
        del: PiE_UNIV UNIV_Times_UNIV)

  have "?lhs =
    (measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λI.
          Sigma UNIV (λJ.
            {(xi',yi',zj').
   tb_H x (h(from_index i := I(?xi := xi', ?yi := yi'), from_index j := J(?zj := zj'))) = α ∧
   tb_H y (h(from_index i := I(?xi := xi', ?yi := yi'), from_index j := J(?zj := zj'))) = β ∧
   tb_H z (h(from_index i := I(?xi := xi', ?yi := yi'), from_index j := J(?zj := zj'))) = γ
            })))))"
    unfolding asmap measure_map_pmf by (auto intro: measure_pmf_cong)

   also from assms have "… =
    (measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λI.
          Sigma UNIV (λJ.
            {(xi',yi',zj').
   tb_H x (h(from_index j := J, from_index i := I(?xi := xi'))) = α ∧
   tb_H y (h(from_index j := J(?zj := zj'), from_index i := I(?yi := yi'))) = β ∧
   tb_H z (h(from_index i := I(?xi := xi'), from_index j := J(?zj := zj'))) = γ
            })))))"
    unfolding asmap measure_map_pmf
    apply (clarsimp intro!: measure_pmf_cong dest!: from_index_neq simp: tb_H_discard)
    by (smt (verit, best) fun_upd_twist tb_H_discard)

  also have "… =
    measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λI.
          Sigma UNIV (λJ.
            {let
    xi' = tb_H x (h(from_index j := J, from_index i := I(?xi := α)));
    zj' = tb_H z (h(from_index i := I(?xi := xi'), from_index j := J(?zj := γ)));
    yi' = tb_H y (h(from_index j := J(?zj := zj'), from_index i := I(?yi := β)))
            in (xi',yi',zj')}))))"
    by (auto simp add: Let_unfold tb_H_exch' intro: measure_pmf_cong)

  also have "… = ?rhs" by (simp add: measure_pmf_prob_dependent_product_bound_eq')

  finally show ?thesis .
qed

lemma prob_tb_H_bin1_bin2_bin3:
  assumes "x ≠ y" "y ≠ z" "z ≠ x"
  shows "measure_pmf.prob (tb_S n) {h. tb_H x h = α ∧ tb_H y h = β ∧ tb_H z h = γ} =
    1 / (real (card R) * real (card R) * real (card R))" (is "?lhs = ?rhs")
proof -
  obtain i where i: "x $ i ≠ y $ i" using assms(1) vec_cong by blast

  let ?xi = "x $ i"
  let ?yi = "y $ i"
  let ?zi = "z $ i"

  consider "?zi ≠ ?xi ∧ ?zi ≠ ?yi" | "?zi = ?xi" | "?zi = ?yi" by blast
  then show ?thesis
  proof cases
    case 2
    moreover from assms vec_cong obtain j where "z $ j ≠ x $ j" by blast
    ultimately show ?thesis using i assms by (auto intro: prob_2same_conv)
  next
    case 3
    moreover obtain j where "z $ j ≠ y $ j" using assms(2) vec_cong by blast
    ultimately show ?thesis
      using i assms
      by (subst prob_2same_conv[where y = x, symmetric]) (auto simp: ac_simps)
  qed (auto intro!: prob_3same_conv simp: assms i)
qed

subsection ‹Proof of 3-universal›

lemma three_indep_vars:
  shows "prob_space.k_wise_indep_vars (tb_S n) 3 (λ_. count_space R) tb_H D⇧ⁿ"
proof (rule prob_space.k_wise_indep_varsI)
  show "prob_space (measure_pmf (tb_S n))" using measure_pmf.prob_space_axioms by blast
  show "measure_pmf (tb_S n) = measure_pmf (tb_S n)" by (rule refl)
next
  fix J :: "('fragment, 'n) vec set" assume "card J ≤ 3" "finite J" (* a k-subset of D⇧ⁿ *)
  fix f
  consider (0) "card J = 0"
         | (1) "card J = 1"
         | (2) "card J = 2"
         | (3) "card J = 3" using ‹card J ≤ 3› by linarith
  then show "measure_pmf.prob (tb_S n) {h. ∀key∈J. tb_H key h = f key} =
    (∏key∈J. measure_pmf.prob (tb_S n) {h. tb_H key h = f key})"
    (is "?lhs = ?rhs")
  proof cases
    case 0
    then have "J = {}" using ‹finite J› card_0_eq by blast
    then have "?lhs = 1" by simp
    also have  "… = ?rhs" unfolding ‹J = {}› prod.empty ..
    finally show ?thesis .
  next
    case 1
    then have "∃key. J = {key}" by (meson card_1_singletonE)
    then obtain key where [simp]: "J = {key}" by blast
    ― ‹Since there is only one value inhabiting J, we can erase the quantifier›
    then have "?lhs = measure_pmf.prob (tb_S n) {h. tb_H key h = f key}" by simp
    also have "… = ?rhs" by simp
    finally show ?thesis .
  next
    case 2
    then have "∃k1 k2. J = {k1,k2} ∧ k1 ≠ k2" by (meson card_2_iff)
    then obtain k1 k2 where [simp]: "J = {k1,k2}" "k1 ≠ k2" by blast+

    have "?lhs = measure_pmf.prob (tb_S n) {h. tb_H k1 h = f k1 ∧ tb_H k2 h = f k2}" by simp
    also have "… = 1 / (real (card R) * real (card R))" using prob_tb_H_bin1_bin2 by simp
    also have "… = (∏key∈J. indicat_real R (f key) / real (card R))" by simp
    also have "… = ?rhs" using prob_tb_H_bin by fastforce
    finally show ?thesis .
  next
    case 3
    then have "∃k1 k2 k3. J = {k1,k2,k3} ∧ k1 ≠ k2 ∧ k2 ≠ k3 ∧ k3 ≠ k1" by (metis card_3_iff)
    then obtain k1 k2 k3 where [simp]: "J = {k1,k2,k3}" "k1 ≠ k2" "k2 ≠ k3" "k3 ≠ k1" "k1 ≠ k3"
       by blast+

    have "?lhs = measure_pmf.prob (tb_S n)
      {h. tb_H k1 h = f k1 ∧ tb_H k2 h = f k2 ∧ tb_H k3 h = f k3}" by simp
    also have "… = 1 / (real (card R) * real (card R) * real (card R))"
      using prob_tb_H_bin1_bin2_bin3 by simp
    also have "… = (∏key∈J. indicat_real R (f key) / real (card R))" by simp
    also have "… = ?rhs" using prob_tb_H_bin by fastforce
    finally show ?thesis .
  qed
qed

theorem three_universal:
  shows "prob_space.k_universal (tb_S n) 3 tb_H D⇧ⁿ R"
  using prob_space.k_universal_def measure_pmf.prob_space_axioms three_indep_vars uniform by blast

subsection ‹Proof of non-4-universal›

lemma prob_4_conv:
  fixes u v :: 'fragment
  assumes "W ⊕ X ⊕ Y ⊕ Z = 0" "⋀h. P (h(0 := undefined, Suc 0 := undefined)) = P h"
          "CARD('n) > Suc 0" "u ≠ v"
  shows "measure_pmf.prob (tb_S n) {h::nat ⇒ 'fragment ⇒ ('result, 'q) vec.
     h 0 u ⊕ h (Suc 0) u ⊕ P h = W ∧
     h 0 u ⊕ h (Suc 0) v ⊕ P h = X ∧
     h 0 v ⊕ h (Suc 0) u ⊕ P h = Y ∧
     h 0 v ⊕ h (Suc 0) v ⊕ P h = Z }
    = 1 / (real (card R) * real (card R) * real (card R))" (is "?lhs = ?rhs")
proof -
  let ?N = "N n"
  let ?H = "H n"
  let ?tb_S = "tb_S n"

  note assms(1,2) [simp]
  note finite_PiE[simp] PiE_eq_empty_iff[simp] measure_pmf_single[simp]

  define W' X' Y' Z' where
    "W' ≡ λh. W ⊕ P h"
    "X' ≡ λh. X ⊕ P h"
    "Y' ≡ λh. Y ⊕ P h"
    "Z' ≡ λh. Z ⊕ P h"

  have xor_sum:  "W' h ⊕ X' h ⊕ Y' h ⊕ Z' h = 0" for h
    by (simp add: W'_X'_Y'_Z'_def commute left_commute)

  have bij:
    "bij_betw
      (λ(a, b, c, d, e, f). a(Suc 0 := b(u := d, v := e), 0 := c(v := f)))
      ((?N - {Suc 0, 0} →⇩_E D →⇩_E R) ×
        (D - {u, v} →⇩_E R) ×
        (D - {v} →⇩_E R) ×
        R × R × R)
      ?H"
    using assms by (intro bij_betw_funcsetE.remove2_remove2'remove1) simp_all

  let ?destructure =
    "(pair_pmf (pmf_of_set (?N - {Suc 0, 0} →⇩_E D →⇩_E R))
      (pair_pmf (pmf_of_set (D - {u, v} →⇩_E R))
        (pair_pmf (pmf_of_set (D - {v} →⇩_E R))
          (pmf_of_set (R × R × R)))))"

  have asmap: "?tb_S =
    map_pmf
      (λ(a, b, c, d, e, f). a(1 := b(u := d, v := e), 0 := c(v := f)))
      ?destructure"
    unfolding tb_S_def
    by (simp add: bij flip: pmf_of_set_prod_eq map_pmf_of_set_bij_betw
        del: PiE_UNIV UNIV_Times_UNIV)

  have "?lhs =
    (measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λI1.
          Sigma UNIV (λI0.
            {(u1,v1,v0).
              I0 u ⊕ u1 ⊕ P h = W ∧
              I0 u ⊕ v1 ⊕ P h = X ∧
              v0   ⊕ u1 ⊕ P h = Y ∧
              v0   ⊕ v1 ⊕ P h = Z})))))"
    unfolding asmap measure_map_pmf
    apply (clarsimp intro!: measure_pmf_cong simp add: assms(4))
    by (smt (z3) array_rules(5) assms(2) fun_upd_twist)

  (* note: it helps the solver when we group ‹W ⊕ P h› into ‹W' h› *)
  also have "… =
    measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λI1.
          Sigma UNIV (λI0.
            {(u1,v1,v0).
              I0 u ⊕ u1 = W' h ∧
              I0 u ⊕ v1 = X' h ∧
              v0   ⊕ u1 = Y' h ∧
              v0   ⊕ v1 = Z' h }))))"
    apply (clarsimp intro!: measure_pmf_cong)
    unfolding W'_X'_Y'_Z'_def
    by (smt (verit, best) assoc right_neutral self_inv)

  also have "… =
    measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λI1.
          Sigma UNIV (λI0.
            {(I0 u ⊕ W' h,
              I0 u ⊕ X' h,
              I0 u ⊕ W' h ⊕ Y' h)}))))"
    apply (clarsimp intro!: measure_pmf_cong)
    by (smt (z3) xor_sum assoc abel_group_xor_class.eq_iff)

  also have "… = ?rhs"
    apply (subst measure_pmf_prob_dependent_product_bound_eq'[where r = "?rhs"])+
    by simp_all

  finally show ?thesis .
qed

lemma not_four_indep:
  assumes "CARD('n) > 1" ― ‹if n = 1, then tabulation hashing is 4-independent›
          "card D ≥ 2" ― ‹if D = {} or D = {x}, then it is impossible to obtain 4 distinct keys›
          "card R > 1" ― ‹tabulation hashing is 4-independent otherwise›
  shows "¬ prob_space.k_wise_indep_vars (tb_S n) 4 (λ_. count_space R) tb_H D⇧ⁿ"
proof -
  let ?tb_S = "tb_S n"
  fix f :: "('fragment, 'n) vec ⇒ ('result, 'q) vec"

  obtain u v :: "'fragment" where pq [simp]: "u ≠ v" by (meson assms(2) UNIV_I card_2_iff' ex_card)
  define u_n where "u_n ≡ replicate (CARD('n) - 2) u"

  let ?w = "u # u # u_n"
  let ?x = "u # v # u_n"
  let ?y = "v # u # u_n"
  let ?z = "v # v # u_n"

  define w x y z :: "('fragment, 'n) vec" where wxyz:
    "w = vec_of_list ?w" "x = vec_of_list ?x" "y = vec_of_list ?y" "z = vec_of_list ?z"
  define J where [simp]: "J = {w,x,y,z}"

  (* Preliminaries start *)
  have N: "a # b # u_n ∈ {xs. length xs = CARD('n)}" for a b
    unfolding u_n_def using assms(1) by simp

  have distinct [simp]: "w ≠ x" "w ≠ y" "w ≠ z" "x ≠ y" "x ≠ z" "y ≠ z"
    using N by (simp_all add: wxyz vec_of_list_inject)

  have [simp]: "card J = 4" by simp

  have [simp]:
    "w $ to_index 0 = u" "w $ to_index (Suc 0) = u"
    "x $ to_index 0 = u" "x $ to_index (Suc 0) = v"
    "y $ to_index 0 = v" "y $ to_index (Suc 0) = u"
    "z $ to_index 0 = v" "z $ to_index (Suc 0) = v"
    using N by (simp_all add: wxyz nth_vec.rep_eq vec_of_list_inverse to_from_index)

  (* unpack the definition of tb_H w | tb_H x | tb_H y | tb_H z *)
  have rw: "
    xor_fold (map (λi. h i ((a # b # u_n) ! from_index (to_index i :: 'n))) [0..<CARD('n)]) =
      h 0 a ⊕
      h (Suc 0) b ⊕
      xor_fold (map (λi. h i u) [Suc (Suc 0)..<CARD('n)])"
    (is "xor_fold ?a = _") for a b h
  proof -
    have "?a = h 0 a # h (Suc 0) b # map (λi. h i u) [Suc (Suc 0) ..< CARD('n)]"
      using assms(1) by (auto simp: upt_conv_Cons to_from_index u_n_def)
    then show ?thesis by simp
  qed

  (* Preliminaries end *)

  have "¬ prob_space.indep_vars ?tb_S (λ_. count_space UNIV) tb_H J"
  proof -
    have "measure_pmf.prob ?tb_S {h. ∀key∈J. tb_H key h = f key}
      ≠ 1/(card R * card R * card R * card R)" (is "?lhs ≠ …")
    proof (cases "f w ⊕ f x ⊕ f y ⊕ f z = 0")
      case True

      text ‹In this case, we want to show that the probability is $\frac{1}{R^3}$ and not $\frac{1}{R^4}$›

      let ?a = "λh. xor_fold (map (λi. h i u) [Suc (Suc 0)..<CARD('n)])"

      have "?lhs = measure_pmf.prob ?tb_S {h.
        h 0 u ⊕ h (Suc 0) u ⊕ ?a h = f w ∧
        h 0 u ⊕ h (Suc 0) v ⊕ ?a h = f x ∧
        h 0 v ⊕ h (Suc 0) u ⊕ ?a h = f y ∧
        h 0 v ⊕ h (Suc 0) v ⊕ ?a h = f z}"
        using N by (simp add: tb_H_def wxyz nth_vec.rep_eq vec_of_list_inverse rw to_from_index)

      also have "… = 1 / real (card R * card R * card R)"
        using assms(1) True by (auto intro!: prob_4_conv arg_cong[where f = xor_fold])

      finally show ?thesis using assms(3) by simp
    next
      case False
      (* In this case, we want to show that the probability is 0 because it is impossible *)
      have False if "tb_H w h = f w" "tb_H x h = f x" "tb_H y h = f y" "tb_H z h = f z" for h
      proof -
        have "tb_H w h ⊕ tb_H x h ⊕ tb_H y h ⊕ tb_H z h = 0"
          using N by (simp add: tb_H_def wxyz nth_vec.rep_eq vec_of_list_inverse rw to_from_index ac_simps)
        then show False using that False by simp
      qed
      then have "?lhs = 0" by (auto simp: measure_pmf_zero_iff)
      then show ?thesis by simp
    qed

    moreover have "… = (∏key∈J. measure_pmf.prob ?tb_S {h. tb_H key h = f key})"
      using prob_tb_H_bin by fastforce

    ultimately show ?thesis
      by (intro measure_pmf.indep_vars_pmf_contrapos[where P = "λ x y. y = f x"]) simp_all
  qed

  then show ?thesis by (force simp: prob_space.k_wise_indep_vars_def prob_space_measure_pmf)
qed

theorem not_four_universal:
  assumes "CARD('n) > 1" ― ‹if n = 1, then tabulation hashing is 4-independent›
          "card D ≥ 2" ― ‹if D = {} or D = {x}, then it is impossible to obtain 4 distinct keys›
          "card R > 1" ― ‹tabulation hashing is 4-independent otherwise›
  shows "¬ prob_space.k_universal (tb_S n) 4 tb_H D⇧ⁿ R"
  using not_four_indep[OF assms]
  by (simp add: prob_space.k_universal_def measure_pmf.prob_space_axioms)

end (* locale simple_tabulation_hashing *)

section ‹Appendix›
subsection ‹Utility›

context prob_space
begin

context
  fixes K D k p
  assumes assms': "K ⊆ D" "card K ≤ k" "finite K" "M = measure_pmf p"
begin

(* useful downstream lemma *)
lemma k_wise_indep_vars_probD:
  assumes "k_wise_indep_vars k (λ_. count_space UNIV) X D"
  shows "prob {ω. ∀x∈K. P x (X x ω)} = (∏x∈K. prob {ω. P x (X x ω)})"
  using assms' assms unfolding k_wise_indep_vars_def by (subst split_indep_events[of _ X K]) auto

(* useful downstream lemma *)
lemma
  assumes "k_universal k X D R"
  shows
    k_universal_probD:
      "⋀ P. prob {ω. ∀x∈K. P x (X x ω)} = (∏x∈K. prob {ω. P x (X x ω)})"
      (is "PROP ?thesis_0") and
    k_universal_probD':
      "prob {ω. ∀x∈K. X x ω = P x} = (∏x∈K. indicat_real R (P x) / real (card R))"
      (is ?thesis_1)
proof -
  from assms k_wise_indep_vars_probD show "PROP ?thesis_0"
    unfolding k_universal_def by fastforce
  from this[of "λ x y. y = P x"] uniform_onD[where A = R and B = "{P _}"] assms' assms
  show ?thesis_1
    unfolding k_universal_def
    by (auto intro!: prod.cong split: split_indicator simp: prob_space_measure_pmf)
qed

end
end (* prob_space *)

(* wrapper around simple_tabulation_hashing *)
locale simple_tabulation_hashing' = simple_tabulation_hashing +
  assumes r: "r = TYPE('result::{finite,abel_group_xor})"
  assumes q: "q = TYPE('q::index1)"
  assumes d: "d = TYPE('fragment::finite)"
begin

end (*locale simple_tabulation_hashing'*)

subsection ‹Alternate proof of non-4-universal›

subsubsection ‹Preliminaries›

lemma (in prob_space) finite_if_k_universal:
  assumes "k_universal k X D R" "D ≠ {}"
  shows "finite R"
  using assms by (auto simp add: k_universal_def uniform_on_def)

(* we settle on using functions to represent a vector *)
lemma xor_sum_uniform:
  fixes α :: "'a :: {finite,abel_group_xor}"
  defines "R ≡ UNIV"
  assumes [simp]: "finite J" "J ≠ {}"
  shows "measure_pmf.prob (pmf_of_set (J →⇩_E R)) {f. (⨁x∈J. f x) = α} = 1 / real CARD('a)"
  (is "?lhs = ?rhs")
proof -
  note finite_PiE[simp] PiE_eq_empty_iff[simp] measure_pmf_single[simp]

  obtain a where a: "a ∈ J" and bij: "bij_betw (λ(f, v). f(a := v)) ((J - {a} →⇩_E R) × R) (J →⇩_E R)"
    using bij_betw_funcsetE.bij_PiE_remove_point[of _ J R] assms(3) by blast

  have [simp]: "R ≠ {}" using assms(1) by blast

  have [simp]: "set_pmf (pmf_of_set (J →⇩_E R)) = J →⇩_E R"
               "set_pmf (pmf_of_set (J - {a} →⇩_E R)) = J - {a} →⇩_E R"
               "set_pmf (pmf_of_set R) = R"
    by (auto intro!: set_pmf_of_set)

  let ?destructure =
    "(pair_pmf (pmf_of_set (J - {a} →⇩_E R))
       (pmf_of_set R))"

  have asmap: "pmf_of_set (J →⇩_E R) = map_pmf (λ(f, v). f(a := v)) ?destructure"
    using assms bij by (simp flip: pmf_of_set_prod_eq map_pmf_of_set_bij_betw
                              del: PiE_UNIV UNIV_Times_UNIV)

  have "?lhs = measure_pmf.prob (pmf_of_set (J →⇩_E R)) {f∈J →⇩_E R. (⨁x∈J. f x) = α}"
    apply (subst measure_Int_set_pmf[symmetric])
    by (auto intro!: arg_cong[where ?f = "measure_pmf.prob _"])

  also have "… =
    measure_pmf.prob ?destructure
      (Sigma (J - {a} →⇩_E R) (λf. {x. x ⊕ (⨁x∈J - {a}. f x) = α}))"
    using PiE_fun_upd a by (auto simp add: asmap xor_sum.remove[where ?x = a, OF assms(2) a]
                                 intro!: measure_pmf_cong)

  also have "… =
    measure_pmf.prob ?destructure
      (Sigma (J - {a} →⇩_E R) (λf. {α ⊕ (⨁x∈J- {a}. f x)}))"
    by (fastforce intro: measure_pmf_cong simp add: ac_simps)

  also have "… = ?rhs"
    by (simp add: assms(1) measure_pmf_prob_dependent_product_bound_eq' measure_pmf_of_set)

  finally show "?lhs = ?rhs" .
qed

(* we used ‹D ≠ {}› instead of ‹J ≠ {}› because it is more general *)
(* card J ≠ 0 implies finite J as well *)
lemma k_universal_conv_pmf_of_set:
  defines "R ≡ UNIV" (* to figure out if we can allow R ⊆ UNIV *)
  assumes "prob_space.k_universal (measure_pmf p) k X D R"
      and "J ⊆ D" "card J ≤ k" "finite J" "D ≠ {}"
  shows "map_pmf (λω. λx∈J. X x ω) p = pmf_of_set (J →⇩_E R)"
proof (intro pmf_eqI)
  fix P
  from assms(2) have fR: "finite R" by (simp add: assms(6) measure_pmf.finite_if_k_universal)

  {
    assume P: "P ∈ J →⇩_E R"

    have "pmf (map_pmf (λω. λx∈J. X x ω) p) P = measure_pmf.prob p {ω. (λx∈J. X x ω) = P}"
      by (simp add: pmf_map vimage_def)

    also have "… = measure_pmf.prob p {ω. ∀x∈J. X x ω = P x}"
      using P by (fastforce intro: arg_cong[where ?f = "measure_pmf.prob _"])

    also have "… = (∏x∈J. indicat_real R (P x) / real (card R))"
      using assms by (simp add: measure_pmf.k_universal_probD')

    also have "… = (1 / real (card R)) ^ card J"
      apply (subst prod.cong[where ?B= J and ?h = "λ_. 1 / real (card R)"]; simp)
      by (metis P indicator_simps(1) PiE_E)

    also have "… = indicat_real (J →⇩_E R) P / real (card (J →⇩_E R))"
      by (simp add: P assms(5) card_funcsetE power_one_over)

    also have "… = pmf (pmf_of_set (J →⇩_E R)) P"
      using P by (subst pmf_of_set) (auto simp add: assms(5) fR finite_PiE)

    finally have "pmf (map_pmf (λω. λx∈J. X x ω) p) P = pmf (pmf_of_set (J →⇩_E R)) P" .
  }

  moreover {
    assume P: "P ∉ J →⇩_E R"

    have "pmf (map_pmf (λω. λx∈J. X x ω) p) P = measure_pmf.prob p {ω. (λx∈J. X x ω) = P}"
      by (simp add: pmf_map vimage_def)

    also have "… = 0"
    proof (subst measure_pmf_zero_iff)
      have "{ω. (λx∈J. X x ω) = P} = {}"
        using P assms(1) (* using R ≡ UNIV for now *)
        by (auto simp add: measure_pmf.prob_space_axioms prob_space.k_universal_def)

      then show "set_pmf p ∩ {ω. (λx∈J. X x ω) = P} = {}" by blast
    qed

    also have "… = pmf (pmf_of_set (J →⇩_E R)) P"
      apply (rule sym)
      apply (subst pmf_eq_0_set_pmf)
      apply (subst set_pmf_of_set)
      subgoal by (simp add: PiE_eq_empty_iff assms(1))
      subgoal by (simp add: assms(5) fR finite_PiE)
      using P .

    finally have "pmf (map_pmf (λω. λx∈J. X x ω) p) P = pmf (pmf_of_set (J →⇩_E R)) P" .
  }

  ultimately show "pmf (map_pmf (λω. λx∈J. X x ω) p) P = pmf (pmf_of_set (J →⇩_E R)) P" by blast
qed

lemma (*tag:important*) k_universal_imp_xor_sum_uniform:
  fixes α :: "'a :: {finite,abel_group_xor}"
  defines "R ≡ UNIV"
  assumes "prob_space.k_universal (measure_pmf p) k X D R"
      and "J ⊆ D" "card J ≤ k" "finite J" "J ≠ {}"
  shows "measure_pmf.prob p {ω. (⨁x∈J. X x ω) = α} = 1 / real (card R)"
  (is "?lhs = ?rhs")
proof -
  have "?lhs = measure_pmf.prob (map_pmf (λω. λx∈J. X x ω) p) {f. (⨁x∈J. f x) = α}" by simp
  also have "… = measure_pmf.prob (pmf_of_set (J →⇩_E R)) {f. (⨁x∈J. f x) = α}"
    using assms by (intro arg_cong2[where ?f = "measure_pmf.prob"])
                   (auto intro!: k_universal_conv_pmf_of_set)
  also have "… = ?rhs" using assms by (auto intro!: xor_sum_uniform)
  finally show "?lhs = ?rhs" .
qed

subsubsection ‹Proofs›

context simple_tabulation_hashing begin

lemma k_wise_indep_vars_imp_xor_sum_uniform:
  assumes "prob_space.k_wise_indep_vars (tb_S n) k (λ_. count_space R) tb_H D⇧ⁿ"
          "J ≠ {}" "card J ≤ k"
  shows "measure_pmf.prob (tb_S n) {h. (⨁x∈J. tb_H x h) = α} = 1 / real (card R)"
  using assms
  by (auto
    intro: k_universal_imp_xor_sum_uniform[where ?k = k and ?D = "D⇧ⁿ"]
    simp: prob_space.k_universal_def prob_space_measure_pmf uniform)

lemma not_k_wise_indep_vars_by_xor_sum:
  assumes "measure_pmf.prob (tb_S n) {h. (⨁x∈J. tb_H x h) = α} ≠ 1 / real (card R)"
          "J ≠ {}" "card J ≤ k"
  shows "¬ prob_space.k_wise_indep_vars (tb_S n) k (λ_. count_space R) tb_H D⇧ⁿ"
  using assms(1) apply (rule contrapos_nn)
  using assms by (simp add: k_wise_indep_vars_imp_xor_sum_uniform)

lemma not_four_indep':
  assumes "CARD('n) > 1" ― ‹if n = 1, then tabulation hashing is 4-independent›
          "card D ≥ 2" ― ‹if D = {} or D = {x}, then it is impossible to obtain 4 distinct keys›
          "card R > 1" ― ‹tabulation hashing is 4-independent otherwise›
  shows "¬ prob_space.k_wise_indep_vars (tb_S n) 4 (λ_. count_space R) tb_H D⇧ⁿ"
proof -
  obtain u v :: "'fragment" where pq [simp]: "u ≠ v" by (meson assms(2) UNIV_I card_2_iff' ex_card)
  define u_n where "u_n ≡ replicate (CARD('n) - 2) u"
  let ?w = "u # u # u_n"
  let ?x = "u # v # u_n"
  let ?y = "v # u # u_n"
  let ?z = "v # v # u_n"

  define w x y z :: "('fragment, 'n) vec" where wxyz:
    "w = vec_of_list ?w" "x = vec_of_list ?x" "y = vec_of_list ?y" "z = vec_of_list ?z"
  define J where [simp]: "J = {w,x,y,z}"

  have N: "a # b # u_n ∈ {xs. length xs = CARD('n)}" for a b
    using assms(1) by (simp add: u_n_def)

  have [simp]: "w ≠ x" "w ≠ y" "w ≠ z" "x ≠ y" "x ≠ z" "y ≠ z"
    using N by (simp_all add: vec_of_list_inject wxyz)

  have [simp]: "card J = 4" by simp

  (* unpack the definition of tb_H w | tb_H x | tb_H y | tb_H z *)
  have rw: "
    xor_fold (map (λi. h i ((a # b # u_n) ! from_index (to_index i :: 'n))) [0..<CARD('n)]) =
      h 0 a ⊕
      h (Suc 0) b ⊕
      xor_fold (map (λi. h i u) [Suc (Suc 0)..<CARD('n)])"
    (is "xor_fold ?a = _") for a b h
  proof -
    have "?a = h 0 a # h (Suc 0) b # map (λi. h i u) [Suc (Suc 0) ..< CARD('n)]"
      using assms(1) by (auto simp: upt_conv_Cons to_from_index u_n_def)
    then show ?thesis by simp
  qed

  from N have "measure_pmf.prob (tb_S n) {h. (⨁x∈J. tb_H x h) = 0} = 1"
    apply (simp add: tb_H_def nth_vec.rep_eq del: eq_iff)
    by (simp add: wxyz nth_vec.rep_eq vec_of_list_inverse rw ac_simps)

  then show ?thesis
    using assms(3) by (intro not_k_wise_indep_vars_by_xor_sum[of J 0]) simp_all
qed
end (* context simple_tabulation_hashing *)
end (* theory *)