Theory Simple_Tabulation_Hashing

theory Simple_Tabulation_Hashing
imports
  Universal_Hash_Families.Universal_Hash_Families_More_Product_PMF
  Dependent_Product
  Xor_Inst
  Vec_Extras
begin

unbundle Xor.xor_syntax
unbundle Fixed_Length_Vector.vec_syntax
unbundle no Finite_Cartesian_Product.vec_syntax (* imported from HOL-Analysis *)
hide_type Finite_Cartesian_Product.vec

subsection ‹Intro rules, contrapositives and bijections›
lemma (in prob_space) k_wise_indep_varsI:
  assumes "a J.  J  I; card J  k; finite J  
     prob {ω. iJ. X i ω = a i} = (iJ. prob {ω. X i ω = a i})"
          "M = measure_pmf p"
  shows "k_wise_indep_vars k (λ_. count_space UNIV) X I"
  apply (simp only: k_wise_indep_vars_def prob_space_axioms)
  apply (clarsimp, rule indep_vars_pmf, rule assms(2))
  by (metis (no_types, lifting) assms(1) card_mono order_trans_rules(23))

(* used for the proof of non-4-independence *)
lemma (in prob_space) indep_vars_pmf_contrapos:
  assumes "prob {ω. xJ. P x (X' x ω)}  (xJ. prob {ω. P x (X' x ω)})"
          "finite J" "M = measure_pmf p"
  shows "¬ indep_vars (λ_. count_space UNIV) X' J"
  apply (rule contrapos_nn[OF assms(1)])
  using assms by (intro split_indep_events; force)

locale (*tag:important*) bij_betw_funcsetE begin

text ‹These lemmas prove bijections between sets of 2-arity functions and their decompositions

For example, a function $f: [0,100] \rightharpoonup \{A,B,C\} \rightharpoonup \{True,False\}$ can be decomposed into 3 parts:

$\begin{gathered}
[0,100]\rightharpoonup\{A,B,C\}\rightharpoonup\{True,False\} \ \\
\rightleftharpoons [1,100]\rightharpoonup\{A,B,C\}\rightharpoonup\{True,False\} \times \\
\{A,C\}\rightharpoonup\{True,False\} \times \\
\{True,False\} \
\end{gathered}$

\begin{itemize}
  \item base: $[1,100] \rightharpoonup \{A,B,C\} \rightharpoonup \{True,False\}$ (split the 0-index, leaving 1-100 intact)
  \item $z_0$: $\{A,C\} \rightharpoonup \{True,False\}$ (represents the 0-index, but further split the B-index)
  \item $z_b$: $B \in \{True,False\}$ (decides the value for $f\ 0\ B$)
\end{itemize}

$f$ is equivalent to its reconstruction, $f = \mathop{base}(0 := z_0(B := z_b))$›

context
  fixes α and A B :: "_ set"
  assumes α_in_A: "α  A"
begin

lemma bij_PiE_remove_point:
  "bij_betw (λ (f, v). f(α := v)) ((A - {α} E B) × B) (A E B)"
proof (intro bij_betw_imageI inj_onI, goal_cases "inj" "surj")
  case (inj _ _)
  then show ?case
    apply (simp add: case_prod_beta' image_iff prod_eq_iff set_eq_iff mem_Times_iff fun_eq_iff)
    by (metis Diff_iff PiE_arb insertCI)
next
  case surj
  then show ?case
  proof (intro equalityI subsetI)
    fix h assume "h  A E B"
    with α_in_A have "h α  B" "h(α := undefined)  A - {α} E B"
      by (auto simp: PiE_def extensional_def)
    then show "h  (λ (f, v). f(α := v)) ` ((A - {α} E B) × B)"
      apply (simp add: case_prod_beta' image_iff)
      by (metis fun_upd_upd fun_upd_triv)
  next
    fix h assume "h  (λ(f, v). f(α := v)) ` ((A - {α} E B) × B)"
    with α_in_A show "h  A E B"
      apply (simp add: case_prod_beta' image_iff)
      by (metis insert_Diff PiE_fun_upd)
  qed
qed

text ‹
  Helper: "Absorb" a point into a function space bijection.
  This allows chaining: if we have a bijection $F$ for a smaller domain $S \rightarrow (A-\{x\}\rightarrow B)$,
  we can automatically get a bijection for $S\ x\ B \rightarrow (A\rightarrow B)$.
›
lemma bij_absorb_point:
  assumes "bij_betw F S (A - {α} E B)"
  shows "bij_betw (λ (s, v). (F s)(α := v)) (S × B) (A E B)"
proof -
  ― ‹1. Combine $F$ with Identity on $B$›
  from assms have "bij_betw (map_prod F id) (S × B) ((A - {α} E B) × B)"
    by (blast intro: bij_betw_map_prod)
  ― ‹2. Apply the base removal lemma›
  with bij_PiE_remove_point show ?thesis
    by (auto
      elim: bij_betw_trans[simplified comp_def, elim_format]
      simp: map_prod_def case_prod_beta')
qed

text ‹
  Helper 2: "Lift Value"
  Attaches a computed value $V$ to the function at point $x$.
  Used for the outer function (e.g., @{term "a(α := V)"}).
›
lemma bij_lift_value:
  assumes "bij_betw V S B"
  shows "bij_betw (λ(f, s). f(α := V s)) ((A - {α} E B) × S) (A E B)"
proof -
  have "bij_betw (map_prod id V) ((A - {α} E B) × S) ((A - {α} E B) × B)"
    using assms by (simp add: bij_betw_map_prod)
  with bij_PiE_remove_point show ?thesis
    by (auto
      elim: bij_betw_trans[simplified comp_def, elim_format]
      simp: map_prod_def case_prod_beta')
qed

end

text ‹Tuple Association Helper: $((A, B), C) \longleftrightarrow (A, B, C)$›
lemma bij_assoc_right_to_left:
  "bij_betw (λ (a, b, c). ((a, b), c)) (A × B × C) ((A × B) × C)"
  by (auto intro: bij_betwI[where g="λ ((a, b), c). (a, b, c)"])

lemma bij_assoc_left_to_right:
  "bij_betw (λ((a, b), c). (a, b, c)) ((A × B) × C) (A × B × C)"
  by (auto intro: bij_betwI[where g="λ(a, b, c). ((a, b), c)"])

lemma bij_betw_combine_right:
  assumes "bij_betw f B C"
  shows "bij_betw (λ(a, b). (a, f b)) (A × B) (A × C)"
  using assms by (simp flip: map_prod_def id_def add: case_prod_beta' bij_betw_map_prod)

subsubsection ‹Application›

lemma remove1_remove1:
  assumes "α  A" "β  B" "finite A" "finite B" "finite C" "C  {}"
  shows "bij_betw
      (λ(a,b,c). a(α := b(β := c)))
      ((A - {α} E B E C) × (B - {β} E C) × C)
      (A E B E C)"
proof -
  ― ‹1. Establish bijection for the value: @{term "b(β := c)"}
  have bij_val: "bij_betw (λ(b, c). b(β := c)) ((B - {β} E C) × C) (B E C)"
    using bij_PiE_remove_point[OF `β  B`] by auto

  ― ‹2. Lift this value to $A$›
  have bij_lift: "bij_betw (λ(a, (b,c)). a(α := b(β := c)))
                  ((A - {α} E B E C) × ((B - {β} E C) × C))
                  (A E B E C)"
    using bij_lift_value[OF `α  A` bij_val] by (auto simp: case_prod_beta')

  ― ‹3. Fix tuple associativity $(a,b,c) \rightarrow (a,(b,c))$›
  have bij_assoc: "bij_betw (λ(a,b,c). (a,(b,c)))
                   ((A - {α} E B E C) × (B - {β} E C) × C)
                   ((A - {α} E B E C) × ((B - {β} E C) × C))"
    by (simp add: bij_betwI[where g="λ(a, (b, c)). (a, b, c)"])

  show ?thesis
    using bij_betw_trans[OF bij_assoc bij_lift] by (simp add: case_prod_beta)
qed

lemma remove1_remove2:
  assumes "β1  β2" "α  A" "β1  B" "β2  B"
          "finite A" "finite B" "finite C" "C  {}"
  shows "bij_betw
      (λ(a,b,c,d). a(α := b(β1 := c, β2 := d)))
      ((A - {α} E B E C) × (B - {β1,β2} E C) × C × C)
      (A E B E C)"
proof -
  let ?S_B = "(B - {β1,β2} E C) × C × C"

  ― ‹1. Build bijection for value: @{term "b(β1 := c, β2 := d)"}
  ― ‹We use @{thm [source] bij_absorb_point} recursively›

  have "β1  B - {β2}" using assms by simp
  ― ‹Start with remove\_point for @{term "β1"}
  have bij_B_start: "bij_betw (λ(b, c). b(β1 := c))
                     ((B - {β1,β2} E C) × C) (B - {β2} E C)"
    using bij_PiE_remove_point[OF `β1  B - {β2}`] by (metis Diff_insert)

  ― ‹Absorb @{term "β2"}
  have bij_B_next: "bij_betw (λ((b, c), d). b(β1 := c, β2 := d))
                    (((B - {β1,β2} E C) × C) × C) (B E C)"
    using bij_absorb_point[OF `β2  B` bij_B_start] by (simp add: case_prod_beta')

  ― ‹Flatten tuple for $B$›
  have bij_B_val: "bij_betw (λ(b,c,d). b(β1 := c, β2 := d)) ?S_B (B E C)"
    using bij_betw_trans[OF bij_assoc_right_to_left bij_B_next]
    by (simp add: case_prod_beta' comp_def)

  ― ‹2. Lift to $A$›
  have bij_lift: "bij_betw (λ(a, (b,c,d)). a(α := b(β1 := c, β2 := d)))
                  ((A - {α} E B E C) × ?S_B) (A E B E C)"
    using bij_lift_value[OF `α  A` bij_B_val] by (simp add: case_prod_beta' comp_def)

  ― ‹3. Fix tuple associativity›
  have bij_assoc: "bij_betw (λ(a,b,c,d). (a,(b,c,d)))
                   ((A - {α} E B E C) × ?S_B)
                   ((A - {α} E B E C) × ?S_B)"
    by (simp add: bij_betwI [where g="λ(a, (b,c,d)). (a,b,c,d)"])

  show ?thesis
    using bij_betw_trans[OF bij_assoc bij_lift]  by (simp add: case_prod_beta)
qed

lemma remove1_remove3:
  assumes "β1  β2" "β2  β3" "β3  β1" "α  A" "β1  B" "β2  B" "β3  B"
          "finite A" "finite B" "finite C" "C  {}"
  shows "bij_betw
      (λ(a,b,c,d,e). a(α := b(β1 := c, β2 := d, β3 := e)))
      ((A - {α} E B E C) × (B - {β1,β2,β3} E C) × C × C × C)
      (A E B E C)"
proof -
  let ?DomB3 = "B - {β1,β2,β3} E C"
  let ?S_B = "?DomB3 × C × C × C"

  ― ‹1. Build bijection for value $B$ recursively›

  ― ‹Step 1: Base @{term "β1"}
  from assms bij_PiE_remove_point[of "β1" "B - {β3, β2}"]
  have "bij_betw (λ(b,c). b(β1:=c)) (?DomB3 × C) (B - {β3, β2} E C)"
    by (auto simp flip: Diff_insert simp: insert_commute)

  ― ‹Step 2: Absorb @{term "β2"}
  with assms bij_absorb_point[of "β2" "B - {β3}"]
  have "bij_betw (λ((b,c),d). b(β1:=c, β2:=d)) ((?DomB3 × C) × C) (B - {β3} E C)"
    by (auto simp flip: Diff_insert simp: case_prod_beta' insert_commute)

  ― ‹Step 3: Absorb @{term "β3"}
  with assms bij_absorb_point[of "β3" B]
  have bij_3: "bij_betw (λ(((b,c),d),e). b(β1:=c, β2:=d, β3:=e)) (((?DomB3 × C) × C) × C) (B E C)"
    by (auto simp: case_prod_beta' insert_commute)

  moreover have "bij_betw (λ(b,c,d,e). (((b,c),d),e)) ?S_B (((?DomB3 × C) × C) × C)"
    by (auto intro: bij_betwI[where g="λ(((b,c),d),e). (b,c,d,e)"])

  ― ‹Step 4: Flatten tuple for $B$›
  ultimately have "bij_betw (λ(b,c,d,e). b(β1:=c, β2:=d, β3:=e)) ?S_B (B E C)"
    by (auto
      elim: bij_betw_trans[simplified comp_def, elim_format]
      simp: case_prod_beta')

  ― ‹2. Lift to $A$›
  with bij_lift_value assms
  have bij_lift: "bij_betw (λ(a, (b,c,d,e)). a(α := b(β1:=c, β2:=d, β3:=e)))
                  ((A - {α} E B E C) × ?S_B) (A E B E C)"
    by (simp add: case_prod_beta')

  ― ‹3. Fix tuple associativity›
  have bij_assoc: "bij_betw (λ(a,b,c,d,e). (a,(b,c,d,e)))
                   ((A - {α} E B E C) × ?S_B)
                   ((A - {α} E B E C) × ?S_B)"
    by (auto intro: bij_betwI [where g="λ(a, (b,c,d,e)). (a,b,c,d,e)"])

  with bij_betw_trans[OF bij_assoc bij_lift] show ?thesis
    by (simp add: case_prod_beta)
qed

lemma remove2_remove2'remove1:
  assumes "α1  α2" "β1  β2" "α1  A" "α2  A" "β1  B" "β2  B" "β3  B"
          "finite A" "finite B" "finite C" "C  {}"
  shows "bij_betw
      (λ(a,b,c,d,e,f). a(α1 := b(β1 := d, β2 := e), α2 := c(β3 := f)))
      ((A - {α1, α2} E B E C) × (B - {β1, β2} E C) × (B - {β3} E C) × C × C × C)
      (A E B E C)"
proof -
  let ?DomA = "A - {α1, α2} E B E C"
  let ?DomB1 = "B - {β1, β2} E C"
  let ?DomB2 = "B - {β3} E C"

  have "bij_betw (λ(b,d,e). ((b,d),e)) (?DomB1 × C × C) ((?DomB1 × C) × C)"
    by (auto intro: bij_betwI[where g="λ((b,d),e). (b,d,e)"])

  moreover from assms bij_absorb_point[of "β2" "B"] bij_PiE_remove_point[of "β1" "B - {β2}"]
  have "bij_betw (λ((b,d),e). b(β1:=d, β2:=e)) ((?DomB1 × C) × C) (B E C)"
    by (auto simp flip: Diff_insert simp: case_prod_beta')

  ― ‹1. Val 1: @{term "b(β1 := d, β2 := e)"}
  ultimately have "bij_betw (λ(b, d, e). b(β1 := d, β2 := e)) (?DomB1 × C × C) (B E C)"
    by (auto elim: bij_betw_trans[simplified comp_def, elim_format] simp: case_prod_beta')

  ― ‹2. Val 2: @{term "c(β3 := f)"}
  moreover from bij_PiE_remove_point[of "β3" B] assms
  have "bij_betw (λ(c, f). c(β3 := f)) (?DomB2 × C) (B E C)" by auto

  ― ‹3. Combine values (Product Step)›
  ― ‹We explicitly form the tuple structure $((B1,C,C), (B2,C))$›
  ultimately have bij_vals: "bij_betw
      (λ((b,d,e), (c,f)). (b(β1:=d, β2:=e), c(β3:=f)))
      ((?DomB1 × C × C) × (?DomB2 × C))
      ((B E C) × (B E C))"
    by (fastforce dest: bij_betw_map_prod simp add: map_prod_def id_def case_prod_beta')

  ― ‹4. Lift to Parallel›
  from bij_betw_map_prod[OF bij_betw_id bij_vals] have bij_parallel:
    "bij_betw
      (λ(a, rest). (a, (λ((b,d,e), (c,f)). (b(β1:=d, β2:=e), c(β3:=f))) rest))
      (?DomA × ((?DomB1 × C × C) × (?DomB2 × C)))
      (?DomA × ((B E C) × (B E C)))"
    by (auto simp: map_prod_def id_def case_prod_beta')

  have bij_A_step1: "bij_betw (λ((a, v1), v2). (a(α1 := v1), v2))
                 ((?DomA × (B E C)) × (B E C)) ((A - {α2} E B E C) × (B E C))"
    using assms bij_lift_value[of "α1" "A - {α2}"]
    by (force
      intro: bij_betw_map_prod simp flip: map_prod_def id_def Diff_insert
      simp: case_prod_beta')

  have bij_A_step2: "bij_betw (λ(a, v2). a(α2 := v2))
                 ((A - {α2} E B E C) × (B E C)) (A E B E C)"
    using assms bij_PiE_remove_point[of "α2" "A"] by (auto simp: case_prod_beta')

  ― ‹5. Explicit Shuffle›
  have bij_shuffle: "bij_betw (λ(a, b, c, d, e, f). (a, (b, d, e), (c, f)))
      (?DomA × ?DomB1 × ?DomB2 × C × C × C)
      (?DomA × (?DomB1 × C × C) × (?DomB2 × C))"
    by (auto intro: bij_betwI[where g="λ(a, (b, d, e), (c, f)). (a, b, c, d, e, f)"])

  ― ‹6. Final Chain - broken into steps to avoid unification issues›
  have "bij_betw (λ(a, (b,d,e), (c,f)). ((a, b(β1:=d, β2:=e)), c(β3:=f)))
        (?DomA × (?DomB1 × C × C) × (?DomB2 × C))
        ((?DomA × (B E C)) × (B E C))"
  proof -
    have "bij_betw (λ(a, v1, v2). ((a, v1), v2))
          (?DomA × (B E C) × (B E C)) ((?DomA × (B E C)) × (B E C))"
      by (rule bij_betwI [where g="λ((a,v1),v2). (a,v1,v2)"]) auto
    with bij_parallel show ?thesis
      by (auto elim: bij_betw_trans[simplified comp_def, elim_format] simp: case_prod_beta')
  qed

  with bij_shuffle have
    "bij_betw (λ(a, b, c, d, e, f). ((a, b(β1:=d, β2:=e)), c(β3:=f)))
      (?DomA × ?DomB1 × ?DomB2 × C × C × C)
      ((?DomA × (B E C)) × (B E C))"
    by (auto elim: bij_betw_trans[simplified comp_def, elim_format] simp: case_prod_beta')

  moreover from bij_A_step1 bij_A_step2 have
    "bij_betw (λ((a, v1), v2). a(α1 := v1, α2 := v2))
      ((?DomA × (B E C)) × (B E C))
      (A E B E C)"
    by (auto elim: bij_betw_trans[simplified comp_def, elim_format] simp: case_prod_beta')

  ultimately show ?thesis
    by (auto elim: bij_betw_trans[simplified comp_def, elim_format] simp: case_prod_beta')
qed

end (* locale bij_prod *)

section ‹Definitions›

locale simple_tabulation_hashing =
  fixes n :: "'n :: {index1, zero} itself" ― ‹key length, i.e. 4 for 4 'fragments›
  fixes r :: "'result :: {abel_group_xor, finite} itself" ― ‹bool or any type that supports xor›
  fixes q :: "'q :: index1 itself" ― ‹digest length, i.e. 3›
  fixes d :: "'fragment :: finite itself" ― ‹domain of input key fragments›
  assumes n: "n = TYPE('n)"
begin

text ‹
In tabulation hashing, keys are split into \emph{n} fragments, each piece hashed individually,
before being combined into a final digest with the XOR operator.
  i.e. $x \equiv [x_0,x_1,x_2]$, $h(x) \equiv h_0 x_0 \oplus h_1 x_1 \oplus h_2 x_2$

Therefore, the type variable 'n, CARD('n) and n must reflect the number of parts in a key. In the
example above, 'n = 3 (which is an alias for num1 bit1)

The final digest length is indicated by the type variable 'q, and 'result is any type that supports
xor. For example, 'result = bool. When using 'result = bool and 'q = 3, it can be interpreted as
using a 3-bit word as the output, which can encode $2^3=8$ possibilities, e.g. [True, False, True]›

abbreviation cardn :: "'n itself  nat" where "cardn _  CARD('n)"

abbreviation N :: "'n itself  nat set" where "N type  {..<cardn type}"

abbreviation D :: "'fragment set" where "D  UNIV"

abbreviation Dn :: "('fragment, 'n) vec set" (Dn) where
  ― ‹domain of all possible input keys s.t. each key is composed of exactly *n* fragments›
  "Dn  UNIV"

abbreviation R :: "('result, 'q) vec set" where
  ― ‹range of all possible outputs, s.t. each output is composed of exactly *q* bits›
  "R  UNIV"

abbreviation H :: "'n itself  (nat  'fragment  ('result, 'q) vec) set" where
  ― ‹set of all possible assignments and combinations for every column-item pair›
  "H type  N type E D E R"

definition tb_S :: "'n itself  (nat  'fragment  ('result, 'q) vec) pmf" where
  ― ‹Generator of instances of H›
  "tb_S _  pmf_of_set (H n)"

definition tb_H :: "('fragment, 'n) vec  (nat  'fragment  ('result, 'q) vec)  ('result, 'q) vec" where
  ― ‹Applies the tabulation hashing algorithm on *k* using a hash function from H›
  "tb_H k h  xor_fold (map (λi. h i (k $ to_index i)) ([0..<cardn n]))"

lemma tb_S_alt_def:
  shows "tb_S n = prod_pmf (N n) (λ_. prod_pmf UNIV (λ_. pmf_of_set R))"
  unfolding tb_S_def
  by (simp add: Pi_pmf_of_set PiE_defaut_undefined_eq)

lemma tb_H_absorb:
  assumes "i < CARD('n)"
  shows "tb_H x h  c = tb_H x (h(i := (h i)(x $ to_index i := h i (x $ to_index i)  c)))"
  unfolding tb_H_def
  by (auto intro!: arg_cong[where f = xor_fold] simp: fold_absorb[of i] assms)

lemma tb_H_absorb':
  shows "tb_H x h  c =
    tb_H x (h(from_index i := (h (from_index i))(x $ i := h (from_index i) (x $ i)  c)))"
  by (metis from_index_bound from_to_index tb_H_absorb)

lemma tb_H_extract:
  assumes "i < CARD('n)"
  shows "tb_H x (h(i := g(x $ to_index i := α))) = tb_H x (h(i := g(x $ to_index i := 0)))  α"
  by (subst tb_H_absorb[OF assms]) simp

lemma tb_H_extract':
  shows "tb_H x (h(from_index i := g(x $ i := α))) = tb_H x (h(from_index i := g(x $ i := 0)))  α"
  by (metis from_index_bound from_to_index tb_H_extract)

lemma tb_H_exch:
  assumes "i < CARD('n)"
  shows "tb_H x (a(i := aa(x $ to_index i := b))) = α 
     b = tb_H x (a(i := aa(x $ to_index i := α)))"
  apply (subst (1 2) tb_H_extract[OF assms])
  by (metis (no_types, lifting) assoc abel_group_xor_class.eq_iff)

lemma (*tag:important*) tb_H_exch':
  shows "tb_H x (a(from_index i := aa(x $ i := b))) = α 
     b = tb_H x (a(from_index i := aa(x $ i := α)))"
  by (metis from_index_bound from_to_index tb_H_exch)

lemma (*tag:important*) tb_H_discard:
  assumes "x $ i  y $ i"
  shows "tb_H x (a(from_index i := b(y $ i := c))) = tb_H x (a(from_index i := b))"
  unfolding tb_H_def
  apply (intro arg_cong[where ?f = "xor_fold"])
  by (simp_all add: assms)

section ‹Proofs›

subsection ‹Proof of 1-independence and uniformity›

lemma (*tag:important*) prob_tb_H_bin:
  shows "measure_pmf.prob (tb_S n) {h. tb_H x h = α} = 1 / real (card R)" (is "?lhs = ?rhs")
proof -
  let ?N = "N n"
  let ?H = "H n"
  let ?tb_S = "tb_S n"
  let ?x0 = "x $ to_index 0"

  note finite_PiE[simp] PiE_eq_empty_iff[simp] measure_pmf_single[simp]

  have bij:
    "bij_betw (λ(a, b, c). a(0 := b(?x0 := c)))
      ((?N - {0} E D E R) × (D - {?x0} E R) × R)
      ?H"
    by (fastforce intro!: bij_betw_funcsetE.remove1_remove1)

  let ?destructure =
    "pair_pmf (pmf_of_set (?N - {0} E D E R))
      (pair_pmf (pmf_of_set (D - {?x0} E R))
        (pmf_of_set R))"

  have asmap: "?tb_S =
     map_pmf (λ(a, b, c). a(0 := b(?x0 := c))) ?destructure"
    unfolding tb_S_def
    by (simp add: bij flip: pmf_of_set_prod_eq map_pmf_of_set_bij_betw del: PiE_UNIV)

  have "?lhs =
    measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λZ.
          {tb_H x (h(0 := Z(?x0 := α)))})))"
    unfolding asmap measure_map_pmf by (clarsimp intro!: measure_pmf_cong tb_H_exch)

  also have " = ?rhs" by (simp add: measure_pmf_prob_dependent_product_bound_eq')

  finally show ?thesis .
qed

theorem uniform:
  shows "prob_space.uniform_on (tb_S n) (tb_H key) R"
  using prob_tb_H_bin by (auto intro!: measure_pmf.uniform_onI)

subsection ‹Proof of two-independence›

lemma prob_tb_H_bin1_bin2:
  assumes "x  y"
  shows "measure_pmf.prob (tb_S n) {h. tb_H x h = α  tb_H y h = β} = 1 / real (card R * card R)"
  (is "?lhs = ?rhs")
proof -
  let ?N = "N n"
  let ?H = "H n"
  let ?tb_S = "tb_S n"

  note finite_PiE[simp] PiE_eq_empty_iff[simp] measure_pmf_single[simp]

  obtain i where i: "x $ i  y $ i" by (meson assms vec_cong)

  let ?xi = "x $ i"
  let ?yi = "y $ i"

  have bij:
    "bij_betw (λ(a, b, c, d). a(from_index i := b(?xi := c, ?yi := d)))
      ((?N - {from_index i} E D E R) × (D - {?xi, ?yi} E R) × R × R)
      ?H"
    using i by (intro bij_betw_funcsetE.remove1_remove2) simp_all

  let ?destructure =
    "pair_pmf (pmf_of_set (?N - {from_index i} E D E R))
      (pair_pmf (pmf_of_set (D - {?xi, ?yi} E R))
        (pmf_of_set (R × R)))"

  have asmap: "?tb_S =
     map_pmf (λ(a, b, c, d). a(from_index i := b(?xi := c, ?yi := d))) ?destructure"
    unfolding tb_S_def
    by (simp add: bij flip: pmf_of_set_prod_eq map_pmf_of_set_bij_betw
        del: PiE_UNIV UNIV_Times_UNIV)

  have "?lhs =
    measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λI.
          {(xi',yi'). tb_H x (h(from_index i := I(?xi := xi', ?yi := yi'))) = α 
                      tb_H y (h(from_index i := I(?xi := xi', ?yi := yi'))) = β })))"
    unfolding asmap measure_map_pmf by (auto intro: measure_pmf_cong)

  also from i have " =
    measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λI.
          {(tb_H x (h(from_index i := I(?xi := α, ?yi := β))),
            tb_H y (h(from_index i := I(?xi := α, ?yi := β))))})))"
    apply (clarsimp intro!: measure_pmf_cong simp: tb_H_discard tb_H_exch')
    by (smt (verit, ccfv_threshold) fun_upd_twist tb_H_discard)

  also have " = ?rhs" by (simp add: measure_pmf_prob_dependent_product_bound_eq')

  finally show ?thesis .
qed

subsection ‹Proof of 3-independence›

lemma prob_3same_conv:
  assumes "x $ i  y $ i" "y $ i  z $ i" "z $ i  x $ i"
  shows "measure_pmf.prob (tb_S n) {h. tb_H x h = α  tb_H y h = β  tb_H z h = γ} =
    1 / (real (card R) * real (card R) * real (card R))" (is "?lhs = ?rhs")
proof -
  let ?N = "N n"
  let ?H = "H n"
  let ?tb_S = "tb_S n"
  let ?xi = "x $ i"
  let ?yi = "y $ i"
  let ?zi = "z $ i"

  note finite_PiE[simp] PiE_eq_empty_iff[simp] measure_pmf_single[simp]

  from assms have bij:
    "bij_betw (λ(a, b, c, d, e). a(from_index i := b(?xi := c, ?yi := d, ?zi := e)))
      ((?N - {from_index i} E D E R) × (D - {?xi, ?yi, ?zi} E R) × R × R × R)
      ?H"
    by (fastforce intro: bij_betw_funcsetE.remove1_remove3)

  let ?destructure =
    "(pair_pmf (pmf_of_set (?N - {from_index i} E D E R))
       (pair_pmf (pmf_of_set (D - {?xi, ?yi, ?zi} E R))
         (pmf_of_set (R × R × R))))"

  have asmap: "?tb_S =
     map_pmf (λ(a, b, c, d, e). a(from_index i := b(?xi := c, ?yi := d, ?zi := e))) ?destructure"
    unfolding tb_S_def
    by (simp add: bij flip: pmf_of_set_prod_eq map_pmf_of_set_bij_betw
        del: PiE_UNIV UNIV_Times_UNIV)

  have "?lhs =
    measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
          Sigma UNIV (λI.
            {(xi',yi',zi').
              tb_H x (h(from_index i := I(?xi := xi', ?yi := yi', ?zi := zi'))) = α 
              tb_H y (h(from_index i := I(?xi := xi', ?yi := yi', ?zi := zi'))) = β 
              tb_H z (h(from_index i := I(?xi := xi', ?yi := yi', ?zi := zi'))) = γ})))"
    unfolding asmap measure_map_pmf by (auto intro: measure_pmf_cong)

  also from assms have " =
    measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
          Sigma UNIV (λI.
            {(xi',yi',zi').
              tb_H x (h(from_index i := I(?xi := xi'))) = α 
              tb_H y (h(from_index i := I(?yi := yi'))) = β 
              tb_H z (h(from_index i := I(?zi := zi'))) = γ})))"
    unfolding asmap measure_map_pmf
    apply (clarsimp intro!: measure_pmf_cong simp: tb_H_discard)
    by (smt (verit, best) fun_upd_twist tb_H_discard)

  also have " =
    (measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λI.
          {let xi' = tb_H x (h(from_index i := I(?xi := α)));
               yi' = tb_H y (h(from_index i := I(?yi := β)));
               zi' = tb_H z (h(from_index i := I(?zi := γ)))
           in (xi',yi',zi')}))))"
    by (auto simp add: tb_H_exch' intro: measure_pmf_cong conj_cong)

  also have " = ?rhs" by (simp add: measure_pmf_prob_dependent_product_bound_eq')

  finally show ?thesis .
qed

lemma prob_2same_conv:
  assumes "x $ i  y $ i" "z $ j  x $ j" "i  j"
      and "z $ i = x $ i"
  shows "measure_pmf.prob (tb_S n) {h. tb_H x h = α  tb_H y h = β  tb_H z h = γ} =
    1 / (real (card R) * real (card R) * real (card R))" (is "?lhs = ?rhs")
proof -
  let ?N = "N n"
  let ?H = "H n"
  let ?tb_S = "tb_S n"

  let ?xi = "x $ i"
  let ?yi = "y $ i"
  let ?zi = "z $ i"
  let ?xj = "x $ j"
  let ?yj = "y $ j"
  let ?zj = "z $ j"

  note finite_PiE[simp] PiE_eq_empty_iff[simp] measure_pmf_single[simp]

  from assms have bij:
    "bij_betw
      (λ(a, b, c, d, e, f). a(from_index i := b(?xi := d, ?yi := e), from_index j := c(?zj := f)))
      ((?N - {from_index i, from_index j} E D E R) ×
        (D - {?xi, ?yi} E R) ×
        (D - {?zj} E R) ×
        R × R × R)
      ?H"
    by (fastforce intro: bij_betw_funcsetE.remove2_remove2'remove1)

  let ?destructure = "
    (pair_pmf (pmf_of_set (?N - {from_index i, from_index j} E D E R))
      (pair_pmf (pmf_of_set (D - {?xi, ?yi} E R))
        (pair_pmf (pmf_of_set (D - {?zj} E R))
          (pmf_of_set (R × R × R)))))"

  have asmap: "?tb_S =
    map_pmf
      (λ(a, b, c, d, e, f). a(from_index i := b(?xi := d, ?yi := e), from_index j := c(?zj := f)))
      ?destructure"
    unfolding tb_S_def
    by (simp add: bij flip: pmf_of_set_prod_eq map_pmf_of_set_bij_betw
        del: PiE_UNIV UNIV_Times_UNIV)

  have "?lhs =
    (measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λI.
          Sigma UNIV (λJ.
            {(xi',yi',zj').
   tb_H x (h(from_index i := I(?xi := xi', ?yi := yi'), from_index j := J(?zj := zj'))) = α 
   tb_H y (h(from_index i := I(?xi := xi', ?yi := yi'), from_index j := J(?zj := zj'))) = β 
   tb_H z (h(from_index i := I(?xi := xi', ?yi := yi'), from_index j := J(?zj := zj'))) = γ
            })))))"
    unfolding asmap measure_map_pmf by (auto intro: measure_pmf_cong)

   also from assms have " =
    (measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λI.
          Sigma UNIV (λJ.
            {(xi',yi',zj').
   tb_H x (h(from_index j := J, from_index i := I(?xi := xi'))) = α 
   tb_H y (h(from_index j := J(?zj := zj'), from_index i := I(?yi := yi'))) = β 
   tb_H z (h(from_index i := I(?xi := xi'), from_index j := J(?zj := zj'))) = γ
            })))))"
    unfolding asmap measure_map_pmf
    apply (clarsimp intro!: measure_pmf_cong dest!: from_index_neq simp: tb_H_discard)
    by (smt (verit, best) fun_upd_twist tb_H_discard)

  also have " =
    measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λI.
          Sigma UNIV (λJ.
            {let
    xi' = tb_H x (h(from_index j := J, from_index i := I(?xi := α)));
    zj' = tb_H z (h(from_index i := I(?xi := xi'), from_index j := J(?zj := γ)));
    yi' = tb_H y (h(from_index j := J(?zj := zj'), from_index i := I(?yi := β)))
            in (xi',yi',zj')}))))"
    by (auto simp add: Let_unfold tb_H_exch' intro: measure_pmf_cong)

  also have " = ?rhs" by (simp add: measure_pmf_prob_dependent_product_bound_eq')

  finally show ?thesis .
qed

lemma prob_tb_H_bin1_bin2_bin3:
  assumes "x  y" "y  z" "z  x"
  shows "measure_pmf.prob (tb_S n) {h. tb_H x h = α  tb_H y h = β  tb_H z h = γ} =
    1 / (real (card R) * real (card R) * real (card R))" (is "?lhs = ?rhs")
proof -
  obtain i where i: "x $ i  y $ i" using assms(1) vec_cong by blast

  let ?xi = "x $ i"
  let ?yi = "y $ i"
  let ?zi = "z $ i"

  consider "?zi  ?xi  ?zi  ?yi" | "?zi = ?xi" | "?zi = ?yi" by blast
  then show ?thesis
  proof cases
    case 2
    moreover from assms vec_cong obtain j where "z $ j  x $ j" by blast
    ultimately show ?thesis using i assms by (auto intro: prob_2same_conv)
  next
    case 3
    moreover obtain j where "z $ j  y $ j" using assms(2) vec_cong by blast
    ultimately show ?thesis
      using i assms
      by (subst prob_2same_conv[where y = x, symmetric]) (auto simp: ac_simps)
  qed (auto intro!: prob_3same_conv simp: assms i)
qed

subsection ‹Proof of 3-universal›

lemma three_indep_vars:
  shows "prob_space.k_wise_indep_vars (tb_S n) 3 (λ_. count_space R) tb_H Dn"
proof (rule prob_space.k_wise_indep_varsI)
  show "prob_space (measure_pmf (tb_S n))" using measure_pmf.prob_space_axioms by blast
  show "measure_pmf (tb_S n) = measure_pmf (tb_S n)" by (rule refl)
next
  fix J :: "('fragment, 'n) vec set" assume "card J  3" "finite J" (* a k-subset of Dn *)
  fix f
  consider (0) "card J = 0"
         | (1) "card J = 1"
         | (2) "card J = 2"
         | (3) "card J = 3" using card J  3 by linarith
  then show "measure_pmf.prob (tb_S n) {h. keyJ. tb_H key h = f key} =
    (keyJ. measure_pmf.prob (tb_S n) {h. tb_H key h = f key})"
    (is "?lhs = ?rhs")
  proof cases
    case 0
    then have "J = {}" using finite J card_0_eq by blast
    then have "?lhs = 1" by simp
    also have  " = ?rhs" unfolding J = {} prod.empty ..
    finally show ?thesis .
  next
    case 1
    then have "key. J = {key}" by (meson card_1_singletonE)
    then obtain key where [simp]: "J = {key}" by blast
    ― ‹Since there is only one value inhabiting J, we can erase the quantifier›
    then have "?lhs = measure_pmf.prob (tb_S n) {h. tb_H key h = f key}" by simp
    also have " = ?rhs" by simp
    finally show ?thesis .
  next
    case 2
    then have "k1 k2. J = {k1,k2}  k1  k2" by (meson card_2_iff)
    then obtain k1 k2 where [simp]: "J = {k1,k2}" "k1  k2" by blast+

    have "?lhs = measure_pmf.prob (tb_S n) {h. tb_H k1 h = f k1  tb_H k2 h = f k2}" by simp
    also have " = 1 / (real (card R) * real (card R))" using prob_tb_H_bin1_bin2 by simp
    also have " = (keyJ. indicat_real R (f key) / real (card R))" by simp
    also have " = ?rhs" using prob_tb_H_bin by fastforce
    finally show ?thesis .
  next
    case 3
    then have "k1 k2 k3. J = {k1,k2,k3}  k1  k2  k2  k3  k3  k1" by (metis card_3_iff)
    then obtain k1 k2 k3 where [simp]: "J = {k1,k2,k3}" "k1  k2" "k2  k3" "k3  k1" "k1  k3"
       by blast+

    have "?lhs = measure_pmf.prob (tb_S n)
      {h. tb_H k1 h = f k1  tb_H k2 h = f k2  tb_H k3 h = f k3}" by simp
    also have " = 1 / (real (card R) * real (card R) * real (card R))"
      using prob_tb_H_bin1_bin2_bin3 by simp
    also have " = (keyJ. indicat_real R (f key) / real (card R))" by simp
    also have " = ?rhs" using prob_tb_H_bin by fastforce
    finally show ?thesis .
  qed
qed

theorem three_universal:
  shows "prob_space.k_universal (tb_S n) 3 tb_H Dn R"
  using prob_space.k_universal_def measure_pmf.prob_space_axioms three_indep_vars uniform by blast

subsection ‹Proof of non-4-universal›

lemma prob_4_conv:
  fixes u v :: 'fragment
  assumes "W  X  Y  Z = 0" "h. P (h(0 := undefined, Suc 0 := undefined)) = P h"
          "CARD('n) > Suc 0" "u  v"
  shows "measure_pmf.prob (tb_S n) {h::nat  'fragment  ('result, 'q) vec.
     h 0 u  h (Suc 0) u  P h = W 
     h 0 u  h (Suc 0) v  P h = X 
     h 0 v  h (Suc 0) u  P h = Y 
     h 0 v  h (Suc 0) v  P h = Z }
    = 1 / (real (card R) * real (card R) * real (card R))" (is "?lhs = ?rhs")
proof -
  let ?N = "N n"
  let ?H = "H n"
  let ?tb_S = "tb_S n"

  note assms(1,2) [simp]
  note finite_PiE[simp] PiE_eq_empty_iff[simp] measure_pmf_single[simp]

  define W' X' Y' Z' where
    "W'  λh. W  P h"
    "X'  λh. X  P h"
    "Y'  λh. Y  P h"
    "Z'  λh. Z  P h"

  have xor_sum:  "W' h  X' h  Y' h  Z' h = 0" for h
    by (simp add: W'_X'_Y'_Z'_def commute left_commute)

  have bij:
    "bij_betw
      (λ(a, b, c, d, e, f). a(Suc 0 := b(u := d, v := e), 0 := c(v := f)))
      ((?N - {Suc 0, 0} E D E R) ×
        (D - {u, v} E R) ×
        (D - {v} E R) ×
        R × R × R)
      ?H"
    using assms by (intro bij_betw_funcsetE.remove2_remove2'remove1) simp_all

  let ?destructure =
    "(pair_pmf (pmf_of_set (?N - {Suc 0, 0} E D E R))
      (pair_pmf (pmf_of_set (D - {u, v} E R))
        (pair_pmf (pmf_of_set (D - {v} E R))
          (pmf_of_set (R × R × R)))))"

  have asmap: "?tb_S =
    map_pmf
      (λ(a, b, c, d, e, f). a(1 := b(u := d, v := e), 0 := c(v := f)))
      ?destructure"
    unfolding tb_S_def
    by (simp add: bij flip: pmf_of_set_prod_eq map_pmf_of_set_bij_betw
        del: PiE_UNIV UNIV_Times_UNIV)

  have "?lhs =
    (measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λI1.
          Sigma UNIV (λI0.
            {(u1,v1,v0).
              I0 u  u1  P h = W 
              I0 u  v1  P h = X 
              v0    u1  P h = Y 
              v0    v1  P h = Z})))))"
    unfolding asmap measure_map_pmf
    apply (clarsimp intro!: measure_pmf_cong simp add: assms(4))
    by (smt (z3) array_rules(5) assms(2) fun_upd_twist)

  (* note: it helps the solver when we group ‹W ⊕ P h› into ‹W' h› *)
  also have " =
    measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λI1.
          Sigma UNIV (λI0.
            {(u1,v1,v0).
              I0 u  u1 = W' h 
              I0 u  v1 = X' h 
              v0    u1 = Y' h 
              v0    v1 = Z' h }))))"
    apply (clarsimp intro!: measure_pmf_cong)
    unfolding W'_X'_Y'_Z'_def
    by (smt (verit, best) assoc right_neutral self_inv)

  also have " =
    measure_pmf.prob ?destructure
      (Sigma UNIV (λh.
        Sigma UNIV (λI1.
          Sigma UNIV (λI0.
            {(I0 u  W' h,
              I0 u  X' h,
              I0 u  W' h  Y' h)}))))"
    apply (clarsimp intro!: measure_pmf_cong)
    by (smt (z3) xor_sum assoc abel_group_xor_class.eq_iff)

  also have " = ?rhs"
    apply (subst measure_pmf_prob_dependent_product_bound_eq'[where r = "?rhs"])+
    by simp_all

  finally show ?thesis .
qed

lemma not_four_indep:
  assumes "CARD('n) > 1" ― ‹if n = 1, then tabulation hashing is 4-independent›
          "card D  2" ― ‹if D = {} or D = {x}, then it is impossible to obtain 4 distinct keys›
          "card R > 1" ― ‹tabulation hashing is 4-independent otherwise›
  shows "¬ prob_space.k_wise_indep_vars (tb_S n) 4 (λ_. count_space R) tb_H Dn"
proof -
  let ?tb_S = "tb_S n"
  fix f :: "('fragment, 'n) vec  ('result, 'q) vec"

  obtain u v :: "'fragment" where pq [simp]: "u  v" by (meson assms(2) UNIV_I card_2_iff' ex_card)
  define u_n where "u_n  replicate (CARD('n) - 2) u"

  let ?w = "u # u # u_n"
  let ?x = "u # v # u_n"
  let ?y = "v # u # u_n"
  let ?z = "v # v # u_n"

  define w x y z :: "('fragment, 'n) vec" where wxyz:
    "w = vec_of_list ?w" "x = vec_of_list ?x" "y = vec_of_list ?y" "z = vec_of_list ?z"
  define J where [simp]: "J = {w,x,y,z}"

  (* Preliminaries start *)
  have N: "a # b # u_n  {xs. length xs = CARD('n)}" for a b
    unfolding u_n_def using assms(1) by simp

  have distinct [simp]: "w  x" "w  y" "w  z" "x  y" "x  z" "y  z"
    using N by (simp_all add: wxyz vec_of_list_inject)

  have [simp]: "card J = 4" by simp

  have [simp]:
    "w $ to_index 0 = u" "w $ to_index (Suc 0) = u"
    "x $ to_index 0 = u" "x $ to_index (Suc 0) = v"
    "y $ to_index 0 = v" "y $ to_index (Suc 0) = u"
    "z $ to_index 0 = v" "z $ to_index (Suc 0) = v"
    using N by (simp_all add: wxyz nth_vec.rep_eq vec_of_list_inverse to_from_index)

  (* unpack the definition of tb_H w | tb_H x | tb_H y | tb_H z *)
  have rw: "
    xor_fold (map (λi. h i ((a # b # u_n) ! from_index (to_index i :: 'n))) [0..<CARD('n)]) =
      h 0 a 
      h (Suc 0) b 
      xor_fold (map (λi. h i u) [Suc (Suc 0)..<CARD('n)])"
    (is "xor_fold ?a = _") for a b h
  proof -
    have "?a = h 0 a # h (Suc 0) b # map (λi. h i u) [Suc (Suc 0) ..< CARD('n)]"
      using assms(1) by (auto simp: upt_conv_Cons to_from_index u_n_def)
    then show ?thesis by simp
  qed

  (* Preliminaries end *)

  have "¬ prob_space.indep_vars ?tb_S (λ_. count_space UNIV) tb_H J"
  proof -
    have "measure_pmf.prob ?tb_S {h. keyJ. tb_H key h = f key}
       1/(card R * card R * card R * card R)" (is "?lhs  ")
    proof (cases "f w  f x  f y  f z = 0")
      case True

      text ‹In this case, we want to show that the probability is $\frac{1}{R^3}$ and not $\frac{1}{R^4}$›

      let ?a = "λh. xor_fold (map (λi. h i u) [Suc (Suc 0)..<CARD('n)])"

      have "?lhs = measure_pmf.prob ?tb_S {h.
        h 0 u  h (Suc 0) u  ?a h = f w 
        h 0 u  h (Suc 0) v  ?a h = f x 
        h 0 v  h (Suc 0) u  ?a h = f y 
        h 0 v  h (Suc 0) v  ?a h = f z}"
        using N by (simp add: tb_H_def wxyz nth_vec.rep_eq vec_of_list_inverse rw to_from_index)

      also have " = 1 / real (card R * card R * card R)"
        using assms(1) True by (auto intro!: prob_4_conv arg_cong[where f = xor_fold])

      finally show ?thesis using assms(3) by simp
    next
      case False
      (* In this case, we want to show that the probability is 0 because it is impossible *)
      have False if "tb_H w h = f w" "tb_H x h = f x" "tb_H y h = f y" "tb_H z h = f z" for h
      proof -
        have "tb_H w h  tb_H x h  tb_H y h  tb_H z h = 0"
          using N by (simp add: tb_H_def wxyz nth_vec.rep_eq vec_of_list_inverse rw to_from_index ac_simps)
        then show False using that False by simp
      qed
      then have "?lhs = 0" by (auto simp: measure_pmf_zero_iff)
      then show ?thesis by simp
    qed

    moreover have " = (keyJ. measure_pmf.prob ?tb_S {h. tb_H key h = f key})"
      using prob_tb_H_bin by fastforce

    ultimately show ?thesis
      by (intro measure_pmf.indep_vars_pmf_contrapos[where P = "λ x y. y = f x"]) simp_all
  qed

  then show ?thesis by (force simp: prob_space.k_wise_indep_vars_def prob_space_measure_pmf)
qed

theorem not_four_universal:
  assumes "CARD('n) > 1" ― ‹if n = 1, then tabulation hashing is 4-independent›
          "card D  2" ― ‹if D = {} or D = {x}, then it is impossible to obtain 4 distinct keys›
          "card R > 1" ― ‹tabulation hashing is 4-independent otherwise›
  shows "¬ prob_space.k_universal (tb_S n) 4 tb_H Dn R"
  using not_four_indep[OF assms]
  by (simp add: prob_space.k_universal_def measure_pmf.prob_space_axioms)

end (* locale simple_tabulation_hashing *)

section ‹Appendix›
subsection ‹Utility›

context prob_space
begin

context
  fixes K D k p
  assumes assms': "K  D" "card K  k" "finite K" "M = measure_pmf p"
begin

(* useful downstream lemma *)
lemma k_wise_indep_vars_probD:
  assumes "k_wise_indep_vars k (λ_. count_space UNIV) X D"
  shows "prob {ω. xK. P x (X x ω)} = (xK. prob {ω. P x (X x ω)})"
  using assms' assms unfolding k_wise_indep_vars_def by (subst split_indep_events[of _ X K]) auto

(* useful downstream lemma *)
lemma
  assumes "k_universal k X D R"
  shows
    k_universal_probD:
      " P. prob {ω. xK. P x (X x ω)} = (xK. prob {ω. P x (X x ω)})"
      (is "PROP ?thesis_0") and
    k_universal_probD':
      "prob {ω. xK. X x ω = P x} = (xK. indicat_real R (P x) / real (card R))"
      (is ?thesis_1)
proof -
  from assms k_wise_indep_vars_probD show "PROP ?thesis_0"
    unfolding k_universal_def by fastforce
  from this[of "λ x y. y = P x"] uniform_onD[where A = R and B = "{P _}"] assms' assms
  show ?thesis_1
    unfolding k_universal_def
    by (auto intro!: prod.cong split: split_indicator simp: prob_space_measure_pmf)
qed

end
end (* prob_space *)

(* wrapper around simple_tabulation_hashing *)
locale simple_tabulation_hashing' = simple_tabulation_hashing +
  assumes r: "r = TYPE('result::{finite,abel_group_xor})"
  assumes q: "q = TYPE('q::index1)"
  assumes d: "d = TYPE('fragment::finite)"
begin

end (*locale simple_tabulation_hashing'*)

subsection ‹Alternate proof of non-4-universal›

subsubsection ‹Preliminaries›

lemma (in prob_space) finite_if_k_universal:
  assumes "k_universal k X D R" "D  {}"
  shows "finite R"
  using assms by (auto simp add: k_universal_def uniform_on_def)

(* we settle on using functions to represent a vector *)
lemma xor_sum_uniform:
  fixes α :: "'a :: {finite,abel_group_xor}"
  defines "R  UNIV"
  assumes [simp]: "finite J" "J  {}"
  shows "measure_pmf.prob (pmf_of_set (J E R)) {f. (xJ. f x) = α} = 1 / real CARD('a)"
  (is "?lhs = ?rhs")
proof -
  note finite_PiE[simp] PiE_eq_empty_iff[simp] measure_pmf_single[simp]

  obtain a where a: "a  J" and bij: "bij_betw (λ(f, v). f(a := v)) ((J - {a} E R) × R) (J E R)"
    using bij_betw_funcsetE.bij_PiE_remove_point[of _ J R] assms(3) by blast

  have [simp]: "R  {}" using assms(1) by blast

  have [simp]: "set_pmf (pmf_of_set (J E R)) = J E R"
               "set_pmf (pmf_of_set (J - {a} E R)) = J - {a} E R"
               "set_pmf (pmf_of_set R) = R"
    by (auto intro!: set_pmf_of_set)

  let ?destructure =
    "(pair_pmf (pmf_of_set (J - {a} E R))
       (pmf_of_set R))"

  have asmap: "pmf_of_set (J E R) = map_pmf (λ(f, v). f(a := v)) ?destructure"
    using assms bij by (simp flip: pmf_of_set_prod_eq map_pmf_of_set_bij_betw
                              del: PiE_UNIV UNIV_Times_UNIV)

  have "?lhs = measure_pmf.prob (pmf_of_set (J E R)) {fJ E R. (xJ. f x) = α}"
    apply (subst measure_Int_set_pmf[symmetric])
    by (auto intro!: arg_cong[where ?f = "measure_pmf.prob _"])

  also have " =
    measure_pmf.prob ?destructure
      (Sigma (J - {a} E R) (λf. {x. x  (xJ - {a}. f x) = α}))"
    using PiE_fun_upd a by (auto simp add: asmap xor_sum.remove[where ?x = a, OF assms(2) a]
                                 intro!: measure_pmf_cong)

  also have " =
    measure_pmf.prob ?destructure
      (Sigma (J - {a} E R) (λf. {α  (xJ- {a}. f x)}))"
    by (fastforce intro: measure_pmf_cong simp add: ac_simps)

  also have " = ?rhs"
    by (simp add: assms(1) measure_pmf_prob_dependent_product_bound_eq' measure_pmf_of_set)

  finally show "?lhs = ?rhs" .
qed

(* we used ‹D ≠ {}› instead of ‹J ≠ {}› because it is more general *)
(* card J ≠ 0 implies finite J as well *)
lemma k_universal_conv_pmf_of_set:
  defines "R  UNIV" (* to figure out if we can allow R ⊆ UNIV *)
  assumes "prob_space.k_universal (measure_pmf p) k X D R"
      and "J  D" "card J  k" "finite J" "D  {}"
  shows "map_pmf (λω. λxJ. X x ω) p = pmf_of_set (J E R)"
proof (intro pmf_eqI)
  fix P
  from assms(2) have fR: "finite R" by (simp add: assms(6) measure_pmf.finite_if_k_universal)

  {
    assume P: "P  J E R"

    have "pmf (map_pmf (λω. λxJ. X x ω) p) P = measure_pmf.prob p {ω. (λxJ. X x ω) = P}"
      by (simp add: pmf_map vimage_def)

    also have " = measure_pmf.prob p {ω. xJ. X x ω = P x}"
      using P by (fastforce intro: arg_cong[where ?f = "measure_pmf.prob _"])

    also have " = (xJ. indicat_real R (P x) / real (card R))"
      using assms by (simp add: measure_pmf.k_universal_probD')

    also have " = (1 / real (card R)) ^ card J"
      apply (subst prod.cong[where ?B= J and ?h = "λ_. 1 / real (card R)"]; simp)
      by (metis P indicator_simps(1) PiE_E)

    also have " = indicat_real (J E R) P / real (card (J E R))"
      by (simp add: P assms(5) card_funcsetE power_one_over)

    also have " = pmf (pmf_of_set (J E R)) P"
      using P by (subst pmf_of_set) (auto simp add: assms(5) fR finite_PiE)

    finally have "pmf (map_pmf (λω. λxJ. X x ω) p) P = pmf (pmf_of_set (J E R)) P" .
  }

  moreover {
    assume P: "P  J E R"

    have "pmf (map_pmf (λω. λxJ. X x ω) p) P = measure_pmf.prob p {ω. (λxJ. X x ω) = P}"
      by (simp add: pmf_map vimage_def)

    also have " = 0"
    proof (subst measure_pmf_zero_iff)
      have "{ω. (λxJ. X x ω) = P} = {}"
        using P assms(1) (* using R ≡ UNIV for now *)
        by (auto simp add: measure_pmf.prob_space_axioms prob_space.k_universal_def)

      then show "set_pmf p  {ω. (λxJ. X x ω) = P} = {}" by blast
    qed

    also have " = pmf (pmf_of_set (J E R)) P"
      apply (rule sym)
      apply (subst pmf_eq_0_set_pmf)
      apply (subst set_pmf_of_set)
      subgoal by (simp add: PiE_eq_empty_iff assms(1))
      subgoal by (simp add: assms(5) fR finite_PiE)
      using P .

    finally have "pmf (map_pmf (λω. λxJ. X x ω) p) P = pmf (pmf_of_set (J E R)) P" .
  }

  ultimately show "pmf (map_pmf (λω. λxJ. X x ω) p) P = pmf (pmf_of_set (J E R)) P" by blast
qed

lemma (*tag:important*) k_universal_imp_xor_sum_uniform:
  fixes α :: "'a :: {finite,abel_group_xor}"
  defines "R  UNIV"
  assumes "prob_space.k_universal (measure_pmf p) k X D R"
      and "J  D" "card J  k" "finite J" "J  {}"
  shows "measure_pmf.prob p {ω. (xJ. X x ω) = α} = 1 / real (card R)"
  (is "?lhs = ?rhs")
proof -
  have "?lhs = measure_pmf.prob (map_pmf (λω. λxJ. X x ω) p) {f. (xJ. f x) = α}" by simp
  also have " = measure_pmf.prob (pmf_of_set (J E R)) {f. (xJ. f x) = α}"
    using assms by (intro arg_cong2[where ?f = "measure_pmf.prob"])
                   (auto intro!: k_universal_conv_pmf_of_set)
  also have " = ?rhs" using assms by (auto intro!: xor_sum_uniform)
  finally show "?lhs = ?rhs" .
qed

subsubsection ‹Proofs›

context simple_tabulation_hashing begin

lemma k_wise_indep_vars_imp_xor_sum_uniform:
  assumes "prob_space.k_wise_indep_vars (tb_S n) k (λ_. count_space R) tb_H Dn"
          "J  {}" "card J  k"
  shows "measure_pmf.prob (tb_S n) {h. (xJ. tb_H x h) = α} = 1 / real (card R)"
  using assms
  by (auto
    intro: k_universal_imp_xor_sum_uniform[where ?k = k and ?D = "Dn"]
    simp: prob_space.k_universal_def prob_space_measure_pmf uniform)

lemma not_k_wise_indep_vars_by_xor_sum:
  assumes "measure_pmf.prob (tb_S n) {h. (xJ. tb_H x h) = α}  1 / real (card R)"
          "J  {}" "card J  k"
  shows "¬ prob_space.k_wise_indep_vars (tb_S n) k (λ_. count_space R) tb_H Dn"
  using assms(1) apply (rule contrapos_nn)
  using assms by (simp add: k_wise_indep_vars_imp_xor_sum_uniform)

lemma not_four_indep':
  assumes "CARD('n) > 1" ― ‹if n = 1, then tabulation hashing is 4-independent›
          "card D  2" ― ‹if D = {} or D = {x}, then it is impossible to obtain 4 distinct keys›
          "card R > 1" ― ‹tabulation hashing is 4-independent otherwise›
  shows "¬ prob_space.k_wise_indep_vars (tb_S n) 4 (λ_. count_space R) tb_H Dn"
proof -
  obtain u v :: "'fragment" where pq [simp]: "u  v" by (meson assms(2) UNIV_I card_2_iff' ex_card)
  define u_n where "u_n  replicate (CARD('n) - 2) u"
  let ?w = "u # u # u_n"
  let ?x = "u # v # u_n"
  let ?y = "v # u # u_n"
  let ?z = "v # v # u_n"

  define w x y z :: "('fragment, 'n) vec" where wxyz:
    "w = vec_of_list ?w" "x = vec_of_list ?x" "y = vec_of_list ?y" "z = vec_of_list ?z"
  define J where [simp]: "J = {w,x,y,z}"

  have N: "a # b # u_n  {xs. length xs = CARD('n)}" for a b
    using assms(1) by (simp add: u_n_def)

  have [simp]: "w  x" "w  y" "w  z" "x  y" "x  z" "y  z"
    using N by (simp_all add: vec_of_list_inject wxyz)

  have [simp]: "card J = 4" by simp

  (* unpack the definition of tb_H w | tb_H x | tb_H y | tb_H z *)
  have rw: "
    xor_fold (map (λi. h i ((a # b # u_n) ! from_index (to_index i :: 'n))) [0..<CARD('n)]) =
      h 0 a 
      h (Suc 0) b 
      xor_fold (map (λi. h i u) [Suc (Suc 0)..<CARD('n)])"
    (is "xor_fold ?a = _") for a b h
  proof -
    have "?a = h 0 a # h (Suc 0) b # map (λi. h i u) [Suc (Suc 0) ..< CARD('n)]"
      using assms(1) by (auto simp: upt_conv_Cons to_from_index u_n_def)
    then show ?thesis by simp
  qed

  from N have "measure_pmf.prob (tb_S n) {h. (xJ. tb_H x h) = 0} = 1"
    apply (simp add: tb_H_def nth_vec.rep_eq del: eq_iff)
    by (simp add: wxyz nth_vec.rep_eq vec_of_list_inverse rw ac_simps)

  then show ?thesis
    using assms(3) by (intro not_k_wise_indep_vars_by_xor_sum[of J 0]) simp_all
qed
end (* context simple_tabulation_hashing *)
end (* theory *)