Theory CVM_Abstract_Algorithm

section ‹Abstract Algorithm\label{sec:cvm_abs}›

text ‹This section verifies an abstract version of the CVM algorithm, where the subsampling step
can be an arbitrary randomized algorithm fulfilling an expectation invariant.

The abstract algorithm is presented in Algorithm~\ref{alg:cvm_abs}.

\begin{algorithm}[h!]
	\caption{Abstract CVM algorithm.\label{alg:cvm_abs}}
	\begin{algorithmic}[1]
  \Require Stream elements $a_1,\ldots,a_l$, $0 < \varepsilon$, $0 < \delta < 1$, $\frac{1/2} \leq f < 1$
  \Ensure An estimate $R$, s.t., $\prob \left( | R - |A| | > \varepsilon |A| \right) \leq \delta$ where $A := \{a_1,\ldots,a_l\}.$
  \State $\chi \gets \{\}, p \gets 1, n \geq \left\lceil \frac{12}{\varepsilon^2} \ln(\frac{3l}{\delta}) \right\rceil$
  \For{$i \gets 1$ to $l$}
    \State $b \getsr \Ber(p)$ \Comment insert $a_i$ with probability $p$ (and remove it otherwise)
    \If{$b$}
      \State $\chi \gets \chi \cup \{a_i\}$
    \Else
      \State $\chi \gets \chi - \{a_i\}$
    \EndIf
    \If{$|\chi| = n$}
      \State $\chi \getsr \mathrm{subsample}(\chi)$ \Comment abstract subsampling step
      \State $p \gets p f$
    \EndIf
  \EndFor
  \State \Return $\frac{|\chi|}{p}$ \Comment estimate cardinality of $A$
\end{algorithmic}
\end{algorithm}

For the subsampling step we assume that it fulfills the following inequality:

\begin{equation}
\label{eq:subsample_condition}
\int_{\mathrm{subsample}(\chi)} \left(\prod_{i \in S} g(i \in \omega) \right)\, d \omega \leq
  \prod_{i \in S} \left(\int_{Ber(f)} g(\omega)\, d\omega\right)
\end{equation}
for all non-negative functions $g$ and $S \subseteq \chi$,
where $\mathrm{Ber}(p)$ denotes the Bernoulli-distribution.

The original CVM algorithm uses a subsampling step where each element of $\chi$ is retained
independently with probability $f$. It is straightforward to see that this fulfills the above
condition (with equality).

The new CVM algorithm variant proposed in this work uses a subsampling step where a random
$nf$-sized subset of $\chi$ is kept. This also fulfills the above inequality, although this is
harder to prove and will be explained in more detail in Section~\ref{sec:cvm_new}.

In this section, we will verify that the above abstract algorithm indeed fulfills the desired
conditions on its estimate, as well as unbiasedness, i.e., that: $\expect [R] = |A|$.
The part that is not going to be verified in this section, is the fact that the algorithm keeps at
most $n$ elements in the state $\chi$, because it is not unconditionally true, but will be ensured
(by different means) for the concrete instantiations in the following sections.

An informal version of this proof is presented in Appendix~\ref{apx:informal_proof}.
For important lemmas and theorems, we include a reference to the corresponding statement in the
appendix.›

theory CVM_Abstract_Algorithm

imports
  "HOL-Decision_Procs.Approximation"
  CVM_Preliminary
  Finite_Fields.Finite_Fields_More_PMF
  Universal_Hash_Families.Universal_Hash_Families_More_Product_PMF
begin

unbundle no_vec_syntax

datatype 'a state = State (state_χ: 'a set) (state_p: real)

datatype 'a run_state = FinalState 'a list | IntermState 'a list 'a

lemma run_state_induct:
  assumes P (FinalState [])
  assumes xs x. P (FinalState xs)  P (IntermState xs x)
  assumes xs x. P (IntermState xs x)  P (FinalState (xs@[x]))
  shows P result
proof -
  have P (FinalState xs)  P (IntermState xs x) for xs x
    using assms by (induction xs rule:rev_induct) auto
  thus ?thesis by (cases result) auto
qed

locale cvm_algo_abstract =
  fixes n :: nat and f :: real and subsample :: 'a set  'a set pmf
  assumes n_gt_0: n > 0
  assumes f_range: f  {1/2..<1}
  assumes subsample:
    U x. card U = n  x  set_pmf (subsample U)  x  U
  assumes subsample_inequality:
    g U S. S  U
       card U = n
       range g  {0::real..}
       (ω. (sS. g(s  ω)) subsample U)  (sS. (ω. g ω bernoulli_pmf f))
begin

text ‹Line 1 of Algorithm~\ref{alg:cvm_abs}:›
definition initial_state :: 'a state where
  initial_state  State {} 1

text ‹Lines 3--7:›
fun step_1 :: 'a  'a state  'a state pmf where
  step_1 a (State χ p) =
    do {
      b  bernoulli_pmf p;
      let χ = (if b then χ  {a} else χ - {a});

      return_pmf (State χ p)
    }

text ‹Lines 8--10:›
fun step_2 :: 'a state  'a state pmf where
  step_2 (State χ p) = do {
    if card χ = n
    then do {
      χ  subsample χ;
      return_pmf (State χ (p*f))
    } else do {
      return_pmf (State χ p)
    }
  }

schematic_goal step_1_def: step_1 x σ = ?x
  by (subst state.collapse[symmetric], subst step_1.simps, rule refl)

schematic_goal step_2_def: step_2 σ = ?x
  by (subst state.collapse[symmetric], subst step_2.simps, rule refl)

text ‹Lines 1--10:›
definition run_steps :: 'a list  'a state pmf where
  run_steps xs  foldM_pmf (λx σ. step_1 x σ  step_2) xs initial_state

text ‹Line 11:›
definition estimate :: 'a state  real where
  estimate σ = card (state_χ σ) / state_p σ

lemma run_steps_snoc: run_steps (xs @[x]) = run_steps xs  step_1 x  step_2
  unfolding run_steps_def foldM_pmf_snoc by (simp add:bind_assoc_pmf)

fun run_state_pmf where
  run_state_pmf (FinalState xs) = run_steps xs |
  run_state_pmf (IntermState xs x) = run_steps xs  step_1 x

fun len_run_state where
  len_run_state (FinalState xs) = length xs |
  len_run_state (IntermState xs x) = length xs

fun run_state_set where
  run_state_set (FinalState xs) = set xs |
  run_state_set (IntermState xs x) = set xs  {x}

lemma finite_run_state_set[simp]: finite (run_state_set σ) by (cases σ) auto

lemma subsample_finite_pmf:
  assumes card U = n
  shows finite (set_pmf (subsample U))
proof-
  have finite (Pow U) unfolding finite_Pow_iff using assms n_gt_0 card_gt_0_iff by blast
  from finite_subset[OF _ this] show ?thesis using subsample[OF assms] by auto
qed

lemma finite_run_state_pmf: finite (set_pmf (run_state_pmf ρ))
proof (induction ρ rule:run_state_induct)
  case 1 thus ?case by (simp add:run_steps_def)
next
  case (2 xs x) thus ?case by (simp add:step_1_def Let_def)
next
  case (3 xs x)
  have a: run_state_pmf (FinalState (xs@[x])) = run_state_pmf (IntermState xs x)  step_2
    by (simp add:run_steps_snoc)
  show ?case unfolding a using 3 subsample_finite_pmf
    by (auto simp:step_2_def simp del:run_state_pmf.simps)
qed

lemma state_χ_run_state_pmf: AE σ in run_state_pmf ρ. state_χ σ  run_state_set ρ
proof (induction ρ rule:run_state_induct)
  case 1 thus ?case by (simp add:run_steps_def AE_measure_pmf_iff initial_state_def)
next
  case (2 xs x)
  then show ?case by (simp add:AE_measure_pmf_iff Let_def step_1_def) auto
next
  case (3 xs x)
  let ?p = run_state_pmf (IntermState xs x)
  have b: run_state_pmf (FinalState (xs@[x])) = ?p  step_2
    by (simp add:run_steps_snoc)
  show ?case unfolding b using subsample 3 by (simp add:AE_measure_pmf_iff step_2_def Let_def) blast
qed

lemma state_χ_finite: AE σ in run_state_pmf ρ. finite (state_χ σ)
  using finite_subset[OF _ finite_run_state_set]
  by (intro AE_mp[OF state_χ_run_state_pmf AE_I2]) auto

lemma state_p_range: AE σ in run_state_pmf ρ. state_p σ  {0<..1}
proof (induction ρ rule:run_state_induct)
  case 1 thus ?case by (simp add:run_steps_def AE_measure_pmf_iff initial_state_def)
next
  case (2 xs x)
  then show ?case by (simp add:AE_measure_pmf_iff Let_def step_1_def)
next
  case (3 xs x)
  let ?p = run_state_pmf (IntermState xs x)
  have b: run_state_pmf (FinalState (xs@[x])) = ?p  step_2
    by (simp add:run_steps_snoc)
  have x * f  1 if x  {0<..1} for x using f_range that by (intro mult_le_one) auto
  thus ?case unfolding b using 3 f_range by (simp add:AE_measure_pmf_iff step_2_def Let_def)
qed

text ‹Lemma~\ref{le:prob_invariant}:›
lemma run_steps_preserves_expectation_le:
  fixes φ :: real  bool  real
  assumes phi :
     x b. 0 < x; x  1  φ x b  0
     p x. 0 < p; p  1; 0 < x; x  1  (ω. φ x ω bernoulli_pmf p)  φ (x / p) True
    mono_on {0..1} (λx. φ x False)
  defines aux  λ S σ. ( x  S. φ (state_p σ) (x  state_χ σ))
  assumes S  run_state_set ρ
  shows measure_pmf.expectation (run_state_pmf ρ) (aux S)  φ 1 True ^ card S
  using assms(5)
proof (induction ρ arbitrary: S rule: run_state_induct)
  case 1 thus ?case by (simp add:aux_def)
next
  case (2 xs x)

  have finite (set_pmf (run_steps xs))
    using finite_run_state_pmf[where ρ=FinalState xs] by simp
  note [simp] = integrable_measure_pmf_finite[OF this]

  have fin_S: finite S using finite_run_state_set 2(2) finite_subset by auto

  have a: aux S ω = aux (S - {x}) ω * aux (S  {x}) ω for ω :: 'a state
    unfolding aux_def using fin_S by (subst prod.union_disjoint[symmetric]) (auto intro:prod.cong)

  have b: finite (set_pmf (run_steps xs  step_1 x))
    using finite_run_state_pmf[where ρ=IntermState xs x] by simp

  have c:(u. aux (S{x}) u step_1 x τ)  φ 1 True^(card (S  {x}))  (is ?L  ?R)
    if τ  set_pmf (run_steps xs) for τ
  proof(cases x  S)
    case True
    have p_range: state_p τ  {0<..1}
      using state_p_range[where ρ=FinalState xs] that by (auto simp: AE_measure_pmf_iff)
    have ?L = measure_pmf.expectation (bernoulli_pmf (state_p τ)) (λx. φ (state_p τ) x)
      unfolding step_1_def Let_def map_pmf_def[symmetric] integral_map_pmf aux_def using True
      by (intro integral_cong_AE AE_pmfI) simp_all
    also have   φ (state_p τ / state_p τ) True using p_range by (intro phi(2)) auto
    also have   φ 1 True using p_range by simp
    also have  = φ 1 True ^ card (S  {x}) using True by simp
    finally show ?thesis by simp
  next
    case False thus ?thesis by (simp add:aux_def)
  qed

  have d: aux (S-{x}) τ  0 if τ  set_pmf (run_steps xs) for τ
    using state_p_range[where ρ=FinalState xs] that unfolding aux_def
    by (intro prod_nonneg phi(1) power_le_one) (auto simp: AE_measure_pmf_iff)

  have (τ. aux S τ (bind_pmf (run_steps xs) (step_1 x))) =
    (τ. (u. aux (S - {x}) τ * aux (S  {x}) u step_1 x τ) run_steps xs)
    unfolding integral_bind_pmf[OF bounded_finite[OF b]] a
    by (intro integral_cong_AE AE_pmfI arg_cong2[where f=(*)] prod.cong)
      (auto simp:step_1_def aux_def Let_def)
  also have  = (τ. aux (S-{x}) τ * (u. aux (S{x}) u step_1 x τ) run_steps xs)
    by simp
  also have   (τ. aux (S-{x}) τ * (φ 1 True)^ card (S{x}) run_steps xs)
    by (intro integral_mono_AE AE_pmfI mult_left_mono c d) auto
  also have  = (φ 1 True) ^ card (S{x}) * (τ. aux (S-{x}) τ (run_state_pmf (FinalState xs)))
    by simp
  also have   (φ 1 True) ^ card (S{x}) * (φ 1 True)^ card (S - {x})
    using phi(1) 2(2) by (intro mult_left_mono 2(1)) auto
  also have  = (φ 1 True) ^ (card ((S{x})  (S-{x})))
    using fin_S by (subst card_Un_disjoint) (auto simp add:power_add)
  also have  = (φ 1 True) ^ card S by (auto intro!:arg_cong2[where f=λx y. x ^ (card y)])
  finally show ?case by simp
next
  case (3 xs x)
  define p where  p = run_state_pmf (IntermState xs x)
  have a: run_state_pmf (FinalState (xs@[x])) = p  step_2
    by (simp add:run_steps_snoc p_def)

  have fin_S: finite S using finite_run_state_set 3(2) finite_subset by auto

  have finite (set_pmf p)
    using finite_run_state_pmf[where ρ=IntermState xs x] by (simp add:p_def)
  note [simp] = integrable_measure_pmf_finite[OF this]

  have b:finite (set_pmf (p  step_2))
    using finite_run_state_pmf[where ρ=FinalState (xs@[x])] a by simp

  have c: (u. (sS. φ (f * state_p τ) (s  u)) subsample (state_χ τ))  aux S τ
    (is ?L  ?R) if that': card(state_χ τ) = n τ  set_pmf p for τ
  proof -
    let ?q = subsample (state_χ τ)
    let ?U = state_χ τ

    define p' where p' = f * state_p τ

    have d: y  state_χ τ if y  set_pmf (subsample (state_χ τ)) for y
      using subsample[OF that'(1)] that by auto

    have e: state_p τ  {0<..1}
      using that(2) unfolding p_def using state_p_range by (meson AE_measure_pmf_iff)
    hence f: p'  {0<..1} unfolding p'_def using f_range by (auto intro:mult_le_one)

    have ?L = (u. (s(S-?U)  (S?U). φ p' (su)) ?q)
      using fin_S p'_def by (intro integral_cong_AE prod.cong AE_pmfI) auto
    also have  = (u. (sS-?U. φ p' (su)) * (s(S?U). φ p' (su)) ?q)
      using fin_S by (subst prod.union_disjoint) auto
    also have  = (u. (sS-?U. φ p' False) * (s(S?U). φ p' (su)) ?q)
      using d by (intro integral_cong_AE AE_pmfI arg_cong2[where f=(*)] prod.cong
          arg_cong2[where f=φ]) auto
    also have  = (sS-?U. φ p' False) * (u. (sS?U. φ p' (su)) ?q)
      by simp
    also have   (sS-?U. φ p' False) * (sS?U. (u. φ p' u bernoulli_pmf f))
      using that f by (intro mult_left_mono subsample_inequality prod_nonneg) (auto intro!:phi(1))
    also have   (sS-?U. φ p' False) * (sS?U. φ (p'/f) True)
      using f f_range
      by (intro mult_left_mono prod_mono phi(2) conjI integral_nonneg_AE AE_pmfI phi(1) prod_nonneg)
         auto
    also have   (sS-?U. φ (state_p τ) False) * (sS?U. φ (state_p τ) True)
      using e f f_range unfolding p'_def
      by (intro mult_mono prod_mono conjI phi(1) mono_onD[OF phi(3)] prod_nonneg power_le_one) auto
    also have  = (sS-?U. φ(state_p τ) (s  ?U)) * (sS?U. φ(state_p τ) (s  ?U))
      by simp
    also have  = (s(S-?U)(S?U). φ(state_p τ) (s  ?U))
      using fin_S by (subst prod.union_disjoint[symmetric]) (auto)
    also have  = aux S τ unfolding aux_def by (intro prod.cong) auto
    finally show ?thesis by simp
  qed

  have (τ. aux S τ run_state_pmf (FinalState (xs@[x]))) = (τ. aux S τ bind_pmf p step_2)
    unfolding a by simp
  also have  = (τ. (u. aux S u step_2 τ) p) by (intro integral_bind_pmf bounded_finite b)
  also have  = (τ. (if card(state_χ τ) = n then
    (u. (sS. φ (f * state_p τ) (s  u)) subsample (state_χ τ)) else aux S τ) p)
    unfolding step_2_def map_pmf_def[symmetric] Let_def aux_def
    by (simp add:if_distrib if_distribR ac_simps cong:if_cong)
  also have   (τ. (if card(state_χ τ) < n then aux S τ else aux S τ) p)
    using c by (intro integral_mono_AE AE_pmfI) auto
  also have  = (τ. aux S τ p) by simp
  also have   φ 1 True ^ card S using 3(2) unfolding p_def by (intro 3(1)) auto
  finally show ?case by simp
qed

text ‹Lemma~\ref{le:prob_invariant_simple}:›
lemma run_steps_preserves_expectation_le' :
  fixes q :: real and h :: real  real
  assumes h:
    0 < q q  1
    concave_on {0 .. 1 / q} h
     x. 0  x; x * q  1  h x  0
  defines
    aux  λS σ. (x  S. of_bool (state_p σ  q) * h (of_bool (x  state_χ σ) / state_p σ))

  assumes S  run_state_set ρ
  shows (τ. aux S τ run_state_pmf ρ)  (h 1) ^ card S (is ?L  ?R)
proof -
  define φ where φ = (λp e. of_bool (q  p) * h(of_bool e / p))
  have φ_1: φ x b  0 if x > 0 x  1 for x b
    using h(1,4) that unfolding φ_def by simp
  have φ_2: φ x True * p + φ x False * (1 - p)  φ (x / p) True if x  {0<..1} p  {0<..1}
    for x p
  proof (cases 1 / x  {0..1 / q})
    case True
    hence a:q  x using that(1) h(1) by (simp add:divide_simps)
    also have   x/p using that by (auto simp add:divide_simps)
    finally have b:q  x / p by simp
    show ?thesis
      unfolding φ_def using that concave_onD[OF h(3) _ _ _ True, where t=p and x=0] a b h(1)
      by (auto simp:algebra_simps)
  next
    case False
    hence a:q > x using that h(1) by (auto simp add:divide_simps)
    hence q  x/p  0  h (p / x) using that by (intro h(4)) (auto simp:field_simps)
    thus ?thesis using a by (simp add:φ_def)
  qed
  have φ_3: φ x False  φ y False if x  y for x y
     using that by (auto intro:h(4) simp add:φ_def)

  have ?L = (τ. (xS. φ (state_p τ) (x  state_χ τ)) run_state_pmf ρ)
    unfolding φ_def by (simp add:state_p_def aux_def)
  also have   φ 1 True^ card S using φ_1 φ_2 φ_3
    by (intro run_steps_preserves_expectation_le assms ) (auto intro:mono_onI)
  also have  = h 1 ^ card S using h unfolding φ_def by simp
  finally show ?thesis by simp
qed

text ‹Analysis of the case where @{term card (set xs)  n}:›

context
  fixes xs :: 'a list
begin

private abbreviation A  real (card (set xs))

context
  assumes set_larger_than_n: card (set xs)  n
begin

private definition q = real n / (4 * card (set xs))

lemma q_range: q  {0<..1/4}
  using set_larger_than_n n_gt_0 unfolding q_def by auto

lemma mono_nonnegI:
  assumes x. x  I  h' x  0
  assumes x. x  I  (h has_real_derivative (h' x)) (at x)
  assumes x  I  {0..} convex I 0  I h 0  0
  shows h x  0
proof -
  have h_mono: h x  h y if that': x  y x  I y  I for x y
  proof -
    have {x..y}  I unfolding closed_segment_eq_real_ivl1[OF that(1),symmetric]
      by (intro closed_segment_subset assms that)
    from subsetD[OF this]
    show ?thesis using assms(1,2) by (intro DERIV_nonneg_imp_nondecreasing[OF that(1)]) auto
  qed

  have 0  h 0 using assms by simp
  also have   h x using assms by (intro h_mono) auto
  finally show ?thesis by simp
qed

lemma upper_tail_bound_helper:
  assumes x  {0<..1::real}
  defines h  (λx. - q * x2 / 3 - ln (1 + q * x) + q * ln (1 + x) * (1 + x))
  shows h x  0
proof -
  define h' where h' x = -2*x*q/3 - q/(1+q*x) + q*ln(1+x) + q for x :: real

  have a: (h has_real_derivative (h' x)) (at x) if x  0 x  1 for x
  proof -
    have 0 < (1::real) + 0 by simp
    also have   1 + q * x using that q_range by (intro add_mono mult_nonneg_nonneg) auto
    finally have 0 < 1 + q * x by simp
    thus ?thesis
      using that q_range unfolding h_def h'_def power2_eq_square
      by (auto intro!:derivative_eq_intros)
  qed

  have b: h' x  0 if x  0 x  1 for x
  proof -
    have exp(2*x/3) = exp( (1-x) *R 0 + x *R (2/3)) by simp
    also have   (1-x) * exp 0 + x * exp(2/3)
      using that by (intro convex_onD[OF exp_convex]) auto
    also have  = 1 + x * (exp (2/3)-exp 0) by (simp add:algebra_simps)
    also have   1 + x * 1 by (intro that add_mono order.refl mult_left_mono) (approximation 5)
    finally have exp(2*x/3)  exp (ln (1+x)) using that by simp
    hence 0  ln (1+x) - 2 * x / 3 by simp
    also have  = ln (1+x) + 1 - 2*x/3 - 1 by simp
    also have   ln (1+x) + 1 - 2*x/3 - 1/(1+q*x)
      using q_range that by (intro add_mono diff_mono) (auto simp:divide_simps)
    finally have c: 0  ln (1+x) + 1 - 2*x/3 - 1/(1+q*x) by simp

    have h' x = q * (-2*x/3 - 1/(1+q*x) + ln (1+x) + 1)
      unfolding h'_def by (simp add:algebra_simps)
    also have   0 using c q_range by (intro mult_nonneg_nonneg) auto
    finally show ?thesis by simp
  qed

  show ?thesis by (rule mono_nonnegI[where I={0..1}, OF b a]) (use assms(1) h_def in simp_all)
qed

private definition θ where θ t x = 1 + q * x * (exp (t / q) - 1)

lemma θ_concave: concave_on {0..1 / q} (θ t)
  unfolding θ_def by (intro concave_on_linorderI) (auto simp:algebra_simps)

lemma θ_ge_exp_1:
  assumes x  {0..1/q}
  shows exp (t * x)  θ t x
proof -
  have exp (t * x) = exp ((1-q*x) *R 0 + (q*x) *R (t/q)) using q_range by simp
  also have   (1-q*x) * exp 0 + (q*x) * exp (t/q) using assms q_range
    by (intro convex_onD[OF exp_convex]) (auto simp:field_simps)
  also have  = θ t x unfolding θ_def by (simp add:algebra_simps)
  finally show ?thesis by simp
qed

lemma θ_ge_exp:
  assumes y  q
  shows exp (t / y)  θ t (1 / y)
  using assms θ_ge_exp_1[where x=1/y and t=t] q_range by (auto simp:field_simps)

lemma θ_nonneg:
  assumes x  {0..1/q}
  shows θ t x  0 θ t x > 0
proof -
  have 0 < exp (t * x) by simp
  also have   θ t x by (intro θ_ge_exp_1 assms)
  finally show θ t x > 0 by simp
  thus θ t x  0 by simp
qed

lemma θ_0: θ t 0 = 1 unfolding θ_def by simp

lemma tail_bound_aux:
  assumes run_state_set ρ  set xs c > 0
  defines A'  real (card (run_state_set ρ))
  shows measure (run_state_pmf ρ) {ω. exp (t * estimate ω)  c  state_p ω  q}  θ t 1 powr A'/c
    (is ?L  ?R)
proof -
  let ?p = run_state_pmf ρ
  note [simp] = integrable_measure_pmf_finite[OF finite_run_state_pmf]

  let ?A' = run_state_set ρ
  let ?X = λi ω. of_bool (i  state_χ ω) / state_p ω

  have a: 0 < θ t 1 using q_range by (intro θ_nonneg) auto

  have ?L  𝒫(ω in ?p. of_bool(state_p ω  q) * exp (t*estimate ω)  c)
    by (intro pmf_mono) auto
  also have   (ω. of_bool(state_p ω  q) * exp (t*estimate ω) ?p) / c
    by (intro integral_Markov_inequality_measure[where A={}] assms(2)) simp_all
  also have  = (ω. of_bool(state_p ω  q) * exp((i?A'. t * ?X i ω)) ?p)/c
    using state_χ_run_state_pmf[where ρ=ρ] Int_absorb1
    unfolding sum_divide_distrib[symmetric] sum_distrib_left[symmetric] estimate_def
    by (intro integral_cong_AE arg_cong2[where f=(/)])
      (auto simp:AE_measure_pmf_iff intro!:arg_cong[where f=card])
  also have   (ω. (i  ?A'. of_bool(state_p ω  q) * exp(t * ?X i ω)) ?p) / c
    unfolding exp_sum[OF finite_run_state_set] prod.distrib using assms(2)
    by (intro divide_right_mono integral_mono_AE AE_pmfI)
      (auto intro!:mult_nonneg_nonneg prod_nonneg)
  also have   (ω. (i  ?A'. of_bool(state_p ω  q) * θ t (?X i ω)) ?p) / c
    using assms(2) θ_ge_exp θ_0 by (intro divide_right_mono integral_mono_AE AE_pmfI prod_mono) auto
  also have   θ t 1 ^ card ?A' / c using q_range θ_concave assms(2)
    by (intro divide_right_mono run_steps_preserves_expectation_le' θ_nonneg)
     (auto intro!:θ_nonneg simp:field_simps)
  also have   ?R
    unfolding A'_def using card_mono[OF _ assms(1)] assms(2) a
    by (subst powr_realpow) (auto intro!:power_increasing divide_right_mono)
  finally show ?thesis by simp
qed

text ‹Lemma \ref{le:upper_tail}:›
lemma upper_tail_bound:
  assumes ε  {0<..1::real}
  assumes run_state_set ρ  set xs
  shows measure (run_state_pmf ρ) {ω. estimate ω  (1+ε)*A  state_p ω  q}  exp(-real n/12*ε2)
    (is ?L  ?R)
proof -
  let ?p = run_state_pmf ρ
  define t where t = q * ln (1+ε)

  have t_gt_0: t > 0 unfolding t_def using q_range assms(1) by auto

  have mono_exp_t: strict_mono (λ(x::real). exp (t * x))
    using t_gt_0 by (intro strict_monoI) auto

  have a: θ t 1 = 1 + q * ε using assms(1) unfolding θ_def t_def by simp
  have b: θ t 1  1 unfolding a using q_range assms(1) by auto

  have c: ln (θ t 1) - t * (1 + ε)  - q * ε^2 / 3
    using upper_tail_bound_helper[OF assms(1)]
    unfolding a unfolding t_def by (simp add:algebra_simps)

  have ?L = measure ?p {ω. exp (t * estimate ω)  exp (t*((1+ε)* A))state_p ω q}
    by (subst strict_mono_less_eq[OF mono_exp_t]) simp
  also have   θ t 1 powr real (card (run_state_set ρ)) / exp (t * ((1+ε)* A))
    by (intro tail_bound_aux assms) auto
  also have   θ t 1 powr A / exp (t * ((1+ε)* A))
    using card_mono[OF finite_set assms(2)] b
    by (intro powr_mono divide_right_mono) auto
  also have  = exp (A * (ln (θ t 1) - t * (1 + ε)))
    using b unfolding powr_def by (simp add:algebra_simps exp_diff)
  also have   exp (A * (-q * ε^2/3))
    by (intro iffD2[OF exp_le_cancel_iff] mult_left_mono c) simp
  also have  = ?R using set_larger_than_n n_gt_0 unfolding q_def by auto
  finally show ?thesis by simp
qed

text ‹Lemma~\ref{le:low_p}:›
lemma low_p:
  shows measure (run_steps xs) {σ. state_p σ < q}  real (length xs) * exp(-real n/12)
    (is ?L  ?R)
proof -
  define ρ where ρ = FinalState xs

  have ih: run_state_set ρ  set xs unfolding ρ_def by simp

  have ?L = measure (run_state_pmf ρ) {ω. state_p ω < q}
    unfolding ρ_def run_state_pmf.simps by simp
  also have   real (len_run_state ρ) * exp(- real n/12)
    using ih
  proof (induction ρ rule:run_state_induct)
    case 1
    then show ?case using q_range by (simp add:run_steps_def initial_state_def)
  next
    case (2 ys x)
    let ?pmf = run_state_pmf (IntermState ys x)
    have a:run_state_set (FinalState ys)  set xs using 2(2) by auto

    have measure ?pmf {ω. state_p ω < q} = (σ. of_bool (state_p σ < q) run_steps ys)
      unfolding run_state_pmf.simps step_1_def Let_def by (simp add:measure_bind_pmf indicator_def)
    also have  = (σ. indicator {ω. (state_p ω < q)} σ run_steps ys)
      by (intro integral_cong_AE AE_pmfI) simp_all
    also have  = measure (run_steps ys) {ω. (state_p ω < q)} by simp
    also have   real (len_run_state (IntermState ys x)) * exp (- real n / 12)
      using 2(1)[OF a] by simp
    finally show ?case by simp
  next
    case (3 ys x)
    define p where p = run_state_pmf (IntermState ys x)

    have finite (set_pmf p) unfolding p_def by (intro finite_run_state_pmf)
    note [simp] = integrable_measure_pmf_finite[OF this]

    have a: run_state_pmf (FinalState (ys@[x])) = p  step_2 (is ?pmf= _)
      by (simp add:run_steps_snoc p_def)

    have b: run_state_set (IntermState ys x)  set xs
      using 3(2) by simp

    have c:measure (step_2 σ) {σ. state_p σ < q} 
      indicator {σ. state_p σ < q  (card (state_χ σ) = n  state_p σ  {q..<q/f}) } σ
      for σ :: 'a state
      using f_range
      by (simp add:step_2_def Let_def indicator_def map_pmf_def[symmetric] divide_simps)

    have d: 2 * real (card (set xs))  real n / α if α  {q..<q / f} for α
    proof -
      have α  q * (1/f) using that by simp
      also have   q * 2 using q_range f_range by (intro mult_left_mono) (auto simp:divide_simps)
      finally have α  2*q by simp
      hence α  real n / (2 * real (card (set xs)))
        using set_larger_than_n n_gt_0 unfolding q_def by (simp add:divide_simps)
      thus ?thesis
        using set_larger_than_n n_gt_0 that q_range by (simp add:field_simps)
    qed

    hence measure p {σ. card (state_χ σ) = n  state_p σ  {q..<q/f}} 
      measure p {σ. (1+1) * A  estimate σ  q  state_p σ}
      unfolding estimate_def by (intro pmf_mono) (simp add:estimate_def)
    also have   exp (- real n / 12 * 1^2)
      unfolding p_def by (intro upper_tail_bound b) simp
    finally have e:
      measure p {σ. card (state_χ σ) = n  state_p σ  {q..<q/f}}  exp (- real n / 12)
      by simp

    have measure (run_state_pmf (FinalState (ys @ [x]))) {ω. state_p ω < q} =
      (s. measure (step_2 s) {ω. state_p ω < q} p)
      unfolding a by (simp add:measure_bind_pmf)
    also have   (s. indicator {ω. state_p ω < q  card (state_χ ω) = n  state_p ω  {q..<q/f}} s p)
      by (intro integral_mono_AE AE_pmfI c) simp_all
    also have  = measure p {ω. state_p ω < q  card (state_χ ω) = n  state_p ω  {q..<q/f}}
      by simp
    also have  measure p {ω. state_p ω<q}+measure p {ω. card(state_χ ω)=nstate_p ω{q..<q/f}}
      by (intro pmf_add) auto
    also have   length ys * exp (- real n / 12) + exp (- real n / 12)
      using 3(1)[OF b] by (intro add_mono e) (simp add:p_def)
    also have  = real (len_run_state (FinalState (ys @ [x]))) * exp (- real n / 12)
      by (simp add:algebra_simps)
    finally show ?case by simp
  qed
  also have  = real (length xs) * exp(- real n/12) by (simp add:ρ_def)
  finally show ?thesis by simp
qed

lemma lower_tail_bound_helper:
  assumes x  {0<..<1::real}
  defines h  (λx. - q * x2 / 2 - ln (1 - q * x) + q * ln (1 - x) * (1 - x))
  shows h x  0
proof -
  define h' where h' x = -x*q + q/(1-q*x) - q*ln(1-x) - q for x

  have a: (h has_real_derivative (h' x)) (at x) if x  0 x < 1 for x
  proof -
    have q * x  (1/4) * 1 using that q_range by (intro mult_mono) auto
    also have  < 1 by simp
    finally have q * x < 1 by simp
    thus ?thesis
      using that q_range unfolding h_def h'_def power2_eq_square
      by (auto intro!:derivative_eq_intros)
  qed

  have b: h' x  0 if x  0 x < 1 for x
  proof -
    have q * x  (1/4) * 1 using that q_range by (intro mult_mono) auto
    also have  < 1 by simp
    finally have a:q * x < 1 by simp

    have 0  - ln (1 - x) - x using ln_one_minus_pos_upper_bound[OF that] by simp
    also have  = - ln (1 - x) - 1 - x + 1 by simp
    also have   - ln (1 - x) - 1 - x + 1 / (1 - q * x)
      using a q_range that by (intro add_mono diff_mono) (auto simp:divide_simps)
    finally have b: 0  - ln (1 - x) - 1 - x + 1 / (1 - q * x) by simp

    have h' x = q * (-x + 1 / (1 - q * x) - ln (1 - x) - 1)
      unfolding h'_def by (simp add:algebra_simps)
    also have   0  using b q_range by (intro mult_nonneg_nonneg) auto
    finally show ?thesis by simp
  qed

  show ?thesis by (rule mono_nonnegI[where I={0..<1}, OF b a]) (use assms(1) h_def in simp_all)
qed

text ‹Lemma~\ref{le:lower_tail}:›
lemma lower_tail_bound:
  assumes ε  {0<..<1::real}
  shows measure (run_steps xs) {ω. estimate ω  (1-ε) * A  state_p ω  q}  exp(-real n/8*ε2)
    (is ?L  ?R)
proof -
  let ?p = run_state_pmf (FinalState xs)
  define t where t = q * ln (1-ε)

  have t_lt_0: t < 0
    unfolding t_def using q_range assms(1) by (intro mult_pos_neg ln_less_zero) auto

  have mono_exp_t: exp (t * x)  exp (t * y)  y  x for x y using t_lt_0 by auto

  have a: θ t 1 = 1 - q * ε using assms(1) unfolding θ_def t_def by simp
  have ε * (q * 4)  1 * 1 using q_range assms(1) by (intro mult_mono) auto
  hence b: θ t 1  3/4 unfolding a by (auto simp:algebra_simps)

  have c: ln (θ t 1) - t * (1 - ε)  - q * ε^2 / 2
    unfolding a unfolding t_def using lower_tail_bound_helper[OF assms(1)]
    by (simp add:divide_simps)

  have ?L = measure ?p {ω. exp (t * estimate ω)  exp (t * ((1-ε) * A))  state_p ω  q}
    by (subst mono_exp_t) simp
  also have   θ t 1 powr card (run_state_set (FinalState xs)) / exp (t * ((1 - ε) * A))
    by (intro tail_bound_aux assms) auto
  also have   θ t 1 powr A / exp (t * ((1 - ε) * A)) by simp
  also have  = exp (A * (ln (θ t 1) - t * (1 - ε)))
    using b unfolding powr_def by (simp add:algebra_simps exp_add[symmetric] exp_diff)
  also have   exp (A * (- q * ε ^ 2 / 2))
    by (intro iffD2[OF exp_le_cancel_iff] mult_left_mono c) simp
  also have  = ?R using set_larger_than_n n_gt_0 unfolding q_def by auto
  finally show ?thesis by simp
qed

lemma correctness_aux:
  assumes ε  {0<..<1::real} δ  {0<..<1::real}
  assumes real n  12/ε^2 * ln (3*real (length xs) /δ)
  shows measure (run_steps xs) {ω. ¦estimate ω - A¦ > ε*A }  δ
    (is ?L  ?R)
proof -
  let ?pmf = run_steps xs
  let ?pmf' = run_state_pmf (FinalState xs)
  let ?p = state_p
  let ?l = real (length xs)
  have l_gt_0: length xs > 0 using set_larger_than_n n_gt_0 by auto
  hence l_ge_1: ?l  1  by linarith

  have a:ln (3 * real (length xs) / δ) = - ln (δ / (3 * ?l))
    using l_ge_1 assms(2) by (subst (1 2) ln_div) auto

  have exp (- real n / 12 * 1)  exp (- real n / 12 * ε ^ 2)
    using assms(1) by (intro iffD2[OF exp_le_cancel_iff] mult_left_mono_neg power_le_one) auto
  also have    δ / (3 * ?l)
    using assms(1-3) l_ge_1 unfolding a by (subst ln_ge_iff[symmetric]) (auto simp:divide_simps)
  finally have exp (- real n / 12)  δ / (3*?l) by simp
  hence b: ?l * exp (- real n / 12)  δ / 3 using l_gt_0 by (auto simp:field_simps)

  have exp(-real n/12 * ε^2)  δ / (3*?l)
    using assms(1-3) l_ge_1 unfolding a by (subst ln_ge_iff[symmetric]) (auto simp:divide_simps)
  also have   δ / 3 using assms(1-3) l_ge_1 by (intro divide_left_mono) auto
  finally have c: exp(-real n/12 * ε^2)  δ / 3 by simp

  have exp(-real n/8 * ε^2)  exp(-real n/12 * ε^2) by (intro iffD2[OF exp_le_cancel_iff]) auto
  also have   δ/3 using c by simp
  finally have d: exp(-real n/8 * ε^2)  δ / 3 by simp

  have ?L  measure ?pmf {ω. ¦estimate ω - A¦  ε * A} by (intro pmf_mono) auto
  also have   measure ?pmf {ω. ¦estimate ω - A¦  ε*A  ?p ω  q} + measure ?pmf {ω. ?p ω < q}
    by (intro pmf_add) auto
  also have   measure ?pmf {ω. (estimate ω  (1-ε) * A  estimate ω  (1+ε) * A)  ?p ω  q} +
    ?l * exp(-real n/12)
    by (intro pmf_mono add_mono low_p) (auto simp:abs_real_def algebra_simps split:if_split_asm)
  also have   measure ?pmf {ω. estimate ω  (1-ε) * A  state_p ω  q} +
    measure ?pmf' {ω. estimate ω  (1+ε) * A  state_p ω  q} + δ/3
    unfolding run_state_pmf.simps by (intro add_mono pmf_add b) auto
  also have   exp(-real n/8 * ε ^ 2) + exp(-real n/12 * ε ^ 2) + δ / 3
    using assms(1) by (intro upper_tail_bound add_mono lower_tail_bound) auto
  also have   δ / 3 +  δ / 3 + δ / 3 by (intro  add_mono d c) auto
  finally show ?thesis by simp
qed

end

lemma deterministic_phase:
  assumes card (run_state_set σ) < n
  shows run_state_pmf σ = return_pmf (State (run_state_set σ) 1)
  using assms
proof (induction σ rule:run_state_induct)
  case 1 thus ?case by (simp add:run_steps_def initial_state_def)
next
  case (2 xs x)
  have card (set xs) < n using 2(2) by (simp add:card_insert_if) presburger
  moreover have bernoulli_pmf 1 = return_pmf True
    by (intro pmf_eqI) (auto simp:bernoulli_pmf.rep_eq)
  ultimately show ?case using 2(1) by (simp add:step_1_def bind_return_pmf)
next
  case (3 xs x)
  let ?p = run_state_pmf (IntermState xs x)
  have a: card (run_state_set (IntermState xs x)) < n using 3(2) by simp
  have b: run_state_pmf (FinalState (xs@[x])) = ?p  step_2
    by (simp add:run_steps_snoc)
  show ?case
    using 3(2) unfolding b 3(1)[OF a] by (simp add:step_2_def bind_return_pmf Let_def)
qed

text ‹Theorem~\ref{th:concentration}:›
theorem correctness:
  fixes ε δ :: real
  assumes ε  {0<..<1} δ  {0<..<1}
  assumes real n  12 / ε2 * ln (3 * real (length xs) / δ)
  shows measure (run_steps xs) {ω. ¦estimate ω - A¦ > ε * A}  δ
proof (cases card (set xs)  n)
  case True
  show ?thesis by (intro correctness_aux True assms)
next
  case False
  hence run_steps xs = return_pmf (State (set xs) 1)
    using deterministic_phase[where σ=FinalState xs] by simp
  thus ?thesis using assms(1,2) by (simp add:indicator_def estimate_def not_less)
qed

lemma p_pos: M{0<..1}. AE ω in run_steps xs. state_p ω  M
proof -
  have fin:finite (set_pmf (run_steps xs))
    using finite_run_state_pmf[where ρ=FinalState xs] by simp
  define M where M = (MIN σ  set_pmf (run_steps xs). state_p σ)
  have M  state_p ` set_pmf (run_steps xs)
    using fin set_pmf_not_empty unfolding M_def by (intro Min_in) auto
  also have   {0<..1}
    using state_p_range[where ρ=FinalState xs]
    by (intro image_subsetI) (simp add:AE_measure_pmf_iff)
  finally have M  {0<..1} by simp

  moreover have AE ω in run_steps xs. state_p ω  M
    using fin unfolding AE_measure_pmf_iff M_def by (intro ballI Min_le) auto
  ultimately show ?thesis by auto
qed

lemma run_steps_expectation_sing:
  assumes i: i  set xs
  shows measure_pmf.expectation (run_steps xs) (λω. of_bool (i  state_χ ω) / state_p ω) = 1
  (is ?L = _)
proof -
  have finite (set_pmf (run_steps xs))
    using finite_run_state_pmf[where ρ=FinalState xs] by simp
  note int = integrable_measure_pmf_finite[OF this]

  obtain M where *: AE σ in run_steps xs. M  state_p σ and M_range: M  {0<..1}
    using p_pos by blast
  then have ?L = (τ. (x{i}. of_bool (M  state_p τ) * (of_bool (x  state_χ τ) / state_p τ))
      run_state_pmf (FinalState xs))
    by (auto intro!: integral_cong_AE)

  also have   1 ^ card {i}
    using M_range i by (intro run_steps_preserves_expectation_le') (auto simp:concave_on_iff)
  finally have le: ?L  1 by auto

  have concave: concave_on {0..1 / M} ((-) (1 / M + 1))
    unfolding concave_on_iff
    using M_range apply (clarsimp simp add: field_simps)
    by (metis combine_common_factor distrib_right linear mult_1_left)

  have 1 / M + 1 - ?L = (ω. 1 / M + 1 - of_bool (i  state_χ ω) / state_p ω run_steps xs)
    by (auto simp:int)
  also have  = (τ. (x{i}. of_bool (M  state_p τ) *
    (1 / M + 1 - of_bool (x  state_χ τ) / state_p τ)) run_state_pmf (FinalState xs))
    using * by (auto intro!: integral_cong_AE)
  also have   (1 / M + 1 - 1) ^ card {i}
    using i M_range
    by (intro run_steps_preserves_expectation_le'[OF _ _ concave]) (auto simp: field_simps)
  also have  = 1 / M by auto
  finally have ge:-?L  -1 by auto
  show ?thesis using le ge by auto
qed

text ‹Subsection~\ref{sec:unbiasedness}:›
corollary unbiasedness:
  fixes σ :: 'a run_state
  shows measure_pmf.expectation (run_steps xs) estimate = real (card (set xs))
    (is ?L = _)
proof -
  have finite (set_pmf (run_steps xs))
    using finite_run_state_pmf[where ρ=FinalState xs] by simp
  note [simp] = integrable_measure_pmf_finite[OF this]

  have s: AE ω in run_steps xs. state_χ ω  set xs
    by (metis run_state_pmf.simps(1) run_state_set.simps(1) state_χ_run_state_pmf)

  have ?L = (ω. (iset xs. of_bool (i  state_χ ω)) / state_p ω run_steps xs)
    unfolding estimate_def state_p_def[symmetric]
    by (auto intro!: integral_cong_AE intro: AE_mp[OF s] simp add: Int_absorb1)

  also have  = (ω. (iset xs. of_bool (i  state_χ ω) / state_p ω) run_steps xs)
    by (metis (no_types) sum_divide_distrib)
  also have  = (iset xs. (ω. of_bool (i  state_χ ω) / state_p ω run_steps xs))
    by (auto intro: Bochner_Integration.integral_sum)
  also have  = (iset xs. 1)
    using run_steps_expectation_sing by (auto cong:sum.cong)
  finally show ?thesis by auto
qed

end (* fixes xs *)

end (* cvm_algo_abstract *)

end (* theory *)