Theory Randomized_Closest_Pair_Growth

section ‹Growth of Close Points›

text ‹This section verifies a result similar to (but more general than)
Lemma~2 by Rabin~\cite{rabin1976}. Let $N(d)$ denote the number of pairs from the point sequence
$p_1,\ldots,p_n$, with distance less than $d$:

\[
  N(d) := | \{ (i,j) | d(p_i, p_j) < d \wedge 1 \leq i,j \leq n \} |
\]

Obviously, $N(d)$ is monotone. It is possible to show that the growth of
$N(d)$ is bounded.

In particular:
\[
  N(ad) \leq (2a \sqrt{2}+3)^2 N(d)
\]
for all $a > 0$, $d > 0$. As far as we can tell the proof below is new.

\emph{Proof:} Consider a 2D-grid with size $\alpha := \frac{d}{\sqrt{2}}$ and let us denote by $G(x,y)$
the number of points that fall in the cell $(x,y) \in \mathbb Z \times \mathbb Z$, i.e.:

\[
G(x,y) := \left| \left\{ i \middle | \left\lfloor \frac{p_{i,1}}{\alpha} \right\rfloor = x \wedge
  \left\lfloor \frac{p_{i,2}}{\alpha} \right\rfloor = x \right\} \right| \textrm{,}
\]
where $p_{i,1}$ (resp. $p_{i,2}$) denote the first (resp. second) component of point p.

Let also $s := \lceil a \sqrt{2} \rceil$.

Then we can observe that
\begin{eqnarray*}
N(ad) & \leq & \sum_{(x,y)\in\mathbb Z\times\mathbb Z} \sum_{i=-s}^{s} \sum_{j=-s}^{s} G(x,y) G(x+i,y+j) \\
& = & \sum_{i=-s}^{s} \sum_{j=-s}^{s} \sum_{(x,y)\in\mathbb Z\times\mathbb Z} G(x,y) G(x+i,y+j)\\
& \leq & \sum_{i=-s}^{s} \sum_{j=-s}^{s} \left(\left(\sum_{(x,y)\in\mathbb Z\times\mathbb Z} G(x,y)^2 \right) \left(\sum_{(x,y)\in\mathbb Z\times\mathbb Z} G(x+i,y+j)^2\right) \right)^{1/2}\\
& \leq & \sum_{i=-s}^{s} \sum_{j=-s}^{s} \left(\left(\sum_{(x,y)\in\mathbb Z\times\mathbb Z} G(x,y)^2 \right) \left(\sum_{(x,y)\in\mathbb Z\times\mathbb Z} G(x,y)^2\right) \right)^{1/2}\\
& \leq & (2s+1)^2 \sum_{(x,y)\in\mathbb Z\times\mathbb Z} G(x,y)^2 \\
& \leq & (2a \sqrt(2)+3)^2 \sum_{(x,y)\in\mathbb Z\times\mathbb Z} G(x,y)^2 \\
& \leq & (2a \sqrt(2)+3)^2 N(d)
\end{eqnarray*}

The first inequality follows from the fact that if two points are $ad$ close,
their x-coordinates and y-coordinates will differ by at most $ad$. I.e. their grid coordinates will
differ at most by $s$. This means the pair will be accounted for in the right hand side of the
inequality.

The third inequality is an application of the Cauchy--Schwarz inequality.

The last inequality follows from the fact that the largest possible distance of two points
in the same grid cell is $d$. \qed›

theory Randomized_Closest_Pair_Growth
  imports
    "HOL-Library.Sublist"
    Randomized_Closest_Pair_Correct
begin

lemma inj_translate:
  fixes a b :: int
  shows "inj (λx. (fst x + a, snd x + b))"
proof -
  have 0:"(λx. (fst x + a, snd x + b)) = (λx. x + (a,b))" by auto
  show ?thesis unfolding 0 by simp
qed

lemma of_nat_sum':
  "(of_nat (sum' f S) :: ('a :: {semiring_char_0})) = sum' (λx. of_nat (f x)) S"
  unfolding sum.G_def by simp

lemma sum'_nonneg:
  fixes f :: "'a  'b :: {ordered_comm_monoid_add}"
  assumes "x. x  S  f x  0"
  shows "sum' f S  0"
proof -
  have "0  sum f {x  S. f x  0}" using assms by (intro sum_nonneg) auto
  thus ?thesis unfolding sum.G_def by simp
qed

lemma sum'_mono:
  fixes f :: "'a  'b :: {ordered_comm_monoid_add}"
  assumes "x. x  S  f x  g x"
  assumes "finite {x  S. f x  0}"
  assumes "finite {x  S. g x  0}"
  shows "sum' f S  sum' g S" (is "?L  ?R")
proof -
  let ?S = "{i  S. f i  0}  {i  S. g i  0}"

  have "?L = sum' f ?S" by (intro sum.mono_neutral_right') auto
  also have "... = (i  ?S. f i)" using assms by (intro sum.eq_sum) auto
  also have "...  (i  ?S. g i)" using assms by (intro sum_mono) auto
  also have "... = sum' g ?S" using assms by (intro sum.eq_sum[symmetric]) auto
  also have "... = ?R"  by (intro sum.mono_neutral_left') auto
  finally show ?thesis by simp
qed

lemma cauchy_schwarz':
  assumes "finite {i  S. f i  0}"
  assumes "finite {i  S. g i  0}"
  shows "sum' (λi. f i * g i) S  sqrt (sum' (λi. f i^2) S) * sqrt (sum' (λi. g i^2) S)"
    (is "?L  ?R")
proof -
  let ?S = "{i  S. f i  0}  {i  S. g i  0}"

  have "?L = sum' (λi. f i * g i) ?S" by (intro sum.mono_neutral_right') auto
  also have "... = (i  ?S. f i * g i)" using assms by (intro sum.eq_sum) auto
  also have "...  (i  ?S. ¦f i¦ * ¦g i¦)" by (intro sum_mono) (metis abs_ge_self abs_mult)
  also have "...  L2_set f ?S * L2_set g ?S" by (rule L2_set_mult_ineq)
  also have "... = sqrt (sum' (λi. f i^2) ?S) * sqrt (sum' (λi. g i^2) ?S)"
    unfolding L2_set_def using assms sum.eq_sum by simp
  also have "... = ?R"
    by (intro arg_cong2[where f="(λx y. sqrt x * sqrt y)"] sum.mono_neutral_left') auto
  finally show ?thesis by simp
qed

context comm_monoid_set
begin

lemma reindex_bij_betw':
  assumes "bij_betw h S T"
  shows "G (λx. g (h x)) S = G g T"
proof -
  have "h ` {x  S. g (h x)  1} = {x  T. g x  1}"
    using bij_betw_imp_surj_on[OF assms] by auto
  hence 0: "bij_betw h {x  S. g (h x)  1} {x  T. g x  1}"
    by (intro bij_betw_subset[OF assms]) auto
  hence "finite {x  S. g (h x)  1} = finite {x  T. g x  1}"
    using bij_betw_finite by auto
  thus ?thesis unfolding G_def using reindex_bij_betw[OF 0] by simp
qed

end


definition "close_point_size xs d = length (filter (λ(p,q). dist p q < d) (List.product xs xs))"

lemma grid_dist_upper:
  fixes p q :: point
  assumes "d > 0"
  shows "dist p q < sqrt (iUNIV.(d*(¦p$i/d-q$i/d¦+1))^2)"
    (is "?L < ?R")
proof -
  have a:"¦x - y¦ < ¦d * real_of_int (¦x/d - y/d¦ + 1)¦" for x y :: real
  proof -
    have "¦x - y¦ = d * ¦x/d - y/d¦"
      using assms by (simp add: abs_mult_pos' right_diff_distrib)
    also have "... < d * real_of_int (¦x/d - y/d¦ + 1)"
      by (intro mult_strict_left_mono assms) linarith
    also have "... = ¦d * real_of_int (¦x/d - y/d¦ + 1)¦"
      using assms by simp
    finally show ?thesis by simp
  qed
  have "?L =  sqrt (iUNIV. (p $ i - q $ i)2)"
    unfolding dist_vec_def dist_real_def L2_set_def by simp
  also have "... < ?R"
    using assms by (intro real_sqrt_less_mono sum_strict_mono power2_strict_mono a) auto
  finally show ?thesis by simp
qed

lemma grid_dist_upperI:
  fixes p q :: point
  fixes d :: real
  assumes "d > 0"
  assumes "k. ¦p$k/d-q$k/d¦  s"
  shows "dist p q < d * (s+1) * sqrt 2"
proof -
  have s_ge_0: "s  0" using assms(2)[where k="0"] by simp
  have "dist p q < sqrt (i  UNIV. (d*(¦p$i/d-q$i/d¦+1))^2)"
    by (intro grid_dist_upper assms)
  also have "...  sqrt (i  (UNIV::2 set). (d*(s+1))^2)"
    using assms
    by (intro real_sqrt_le_mono sum_mono power_mono mult_left_mono iffD2[OF of_int_le_iff]) auto
  also have "... = sqrt (2 * (d*(s+1))^2)" by simp
  also have "... = sqrt 2 * sqrt ((d*(s+1))^2)" by (simp add:real_sqrt_mult)
  also have "... = sqrt 2 * (d * (s+1))" using assms s_ge_0 by simp
  also have "... =  d * (s+1) * sqrt 2" by simp
  finally show ?thesis by simp
qed

lemma close_point_approx_upper:
  fixes xs :: "point list"
  fixes G :: "int × int  real"
  assumes "d > 0" "e > 0"
  defines "s  d / e"
  defines "G  (λx. real (length (filter (λp. to_grid e p = x)  xs)))"
  shows "close_point_size xs d  (i  {-s..s}×{-s..s}. sum' (λx. G x * G (x+i)) UNIV)"
    (is "?L  ?R")
proof -
  let ?f = "to_grid e"
  let ?pairs = "mset (List.product xs xs)"

  define T where "T = {-s..s} × {-s..s}"

  have "s  1" unfolding s_def using assms by simp
  hence s_ge_0: "s  0" by simp

  have 0: "finite T" unfolding T_def by simp

  have a: "size {#p ∈# ?pairs. ?f (fst p)-?f (snd p) = i #} = sum' (λx. G x * G (x+i)) UNIV"
    (is "?L1 = ?R1") for i
  proof -
    have "?L1 = size {#p ∈# ?pairs. (?f (fst p),?f (snd p))  {(x,y). x - y = i} #}"
      by simp
    also have "... = sum' (λq. size {# p ∈# ?pairs. (?f (fst p), ?f (snd p))= q #} ) {(x,y). x-y=i}"
      unfolding size_filter_mset_decompose' by simp
    also have "... = sum' (λq. size {# p ∈# ?pairs. (?f (fst p), ?f (snd p)) = (q+i,q) #}) UNIV"
      by (intro arg_cong[where f="real"] sum.reindex_bij_betw'[symmetric] bij_betwI[where g="snd"])
        auto
    also have "... =
      sum' (λq. length (filter (λp. ?f (fst p) = q+i  ?f (snd p) = q) (List.product xs xs))) UNIV"
      by (simp flip: size_mset mset_filter conj_commute)
    also have "... = sum' (λx. G (x+i) * G x) UNIV"
      by (subst filter_product)
        (simp add:G_def build_grid_def of_nat_sum' case_prod_beta' prod_eq_iff)
    finally show ?thesis by (simp add:algebra_simps)
  qed

  have b:"f (?f p) - f (?f q)  {-s..s}" if "f = fst  f = snd" "dist p q < d" for p q f
  proof -
    have "¦f (?f p) - f (?f q)¦  dist p q/e"
      using grid_dist[OF assms(2), where p="p" and q="q"] that(1) unfolding to_grid_def by auto
    also have "...  s"
      unfolding s_def using that(2) assms(1,2)
      by (simp add: ceiling_mono divide_le_cancel)
    finally have "¦f (?f p) - f (?f q)¦  s" by simp
    thus ?thesis using s_ge_0 by auto
  qed

  have c:"?f p - ?f q  T" if "dist p q < d" for p q
    unfolding T_def using b[OF _ that] unfolding mem_Times_iff by simp

  have "?L = size (filter_mset (λ(p,q). dist p q < d) ?pairs)"
    unfolding close_point_size_def by (metis mset_filter size_mset)
  also have "...  size (filter_mset (λp. ?f (fst p) - ?f (snd p)  T) ?pairs)"
    using c by (intro size_mset_mono of_nat_mono multiset_filter_mono_2) auto
  also have "... = (i  T. size (filter_mset (λp. ?f (fst p) - ?f (snd p) = i) ?pairs))"
    by (intro size_filter_mset_decompose arg_cong[where f="of_nat"] 0)
  also have "... = (i  T. sum' (λx. G x * G (x+i)) UNIV)"
    unfolding of_nat_sum by (intro sum.cong a refl)
  also have "... = ?R" unfolding T_def by simp
  finally show ?thesis by simp
qed

lemma close_point_approx_lower:
  fixes xs :: "point list"
  fixes G :: "int × int  real"
  fixes d :: real
  assumes "d > 0"
  defines "G  (λx. real (length (filter (λp. to_grid d p = x)  xs)))"
  shows "sum' (λx. G x ^ 2) UNIV  close_point_size xs (d * sqrt 2)"
    (is "?L  ?R")
proof -
  let ?f = "to_grid d"
  let ?pairs = "mset (List.product xs xs)"

  have "?L = sum' (λx. length (filter (λp. ?f p = x) xs)^2) UNIV "
    unfolding build_grid_def G_def by (simp add:of_nat_sum' prod_eq_iff case_prod_beta')
  also have "... = sum'(λx. length(List.product (filter(λp. ?f p=x)xs) (filter(λp. ?f p=x)xs)))UNIV"
    unfolding length_product by (simp add:power2_eq_square)
  also have "... = sum' (λx. length (filter(λp. ?f(fst p)=x?f(snd p)=x)(List.product xs xs))) UNIV"
    by (subst filter_product) simp
  also have "... = sum' (λx. size {# p ∈# ?pairs. ?f (fst p) = x  ?f (snd p) = x #}) UNIV"
    by (intro arg_cong2[where f="sum'"] arg_cong[where f="real"] refl ext)
     (metis (no_types, lifting) mset_filter size_mset)
  also have "... = sum' (λx. size {# p ∈#{# p∈#?pairs. ?f(fst p)=?f(snd p) #}. ?f(fst p)=x #}) UNIV"
    unfolding filter_filter_mset
    by (intro sum.cong' arg_cong[where f="real"] arg_cong[where f="size"] filter_mset_cong) auto
  also have "... = size {# p ∈# {# p ∈# ?pairs. ?f (fst p) = ?f (snd p) #}. ?f (fst p)  UNIV #}"
    by (intro arg_cong[where f="real"] size_filter_mset_decompose'[symmetric])
  also have "...  size {# p ∈# ?pairs. ?f (fst p) = ?f (snd p) #}" by simp
  also have "... = size {# p ∈# ?pairs. k. fst p $ k/d = snd p $ k/d #}"
    unfolding to_grid_def prod.inject
    by (intro arg_cong[where f="size"] arg_cong[where f="of_nat"] filter_mset_cong refl)
     (metis (full_types) exhaust_2 one_neq_zero)
  also have "...  size {# p ∈# ?pairs. dist (fst p) (snd p) < d * of_int (0+1) * sqrt 2 #}"
    by (intro of_nat_mono size_mset_mono multiset_filter_mono_2 grid_dist_upperI[OF assms(1)]) simp
  also have "... = ?R" unfolding close_point_size_def
    by (simp add:case_prod_beta') (metis (no_types, lifting) mset_filter size_mset)
  finally show ?thesis by simp
qed

lemma build_grid_finite:
  assumes "inj f"
  shows "finite {x. filter (λp. to_grid d p = f x) xs  []}"
proof -
  have 0:"finite (to_grid d ` set xs)" by (intro finite_imageI) auto
  have "finite {x. filter (λp. to_grid d p = x) xs  []}"
    unfolding filter_empty_conv by (intro finite_subset[OF _ 0]) blast
  hence "finite (f -` {x. filter (λp. to_grid d p = x) xs  []})" by (intro finite_vimageI assms)
  thus ?thesis by (simp add:vimage_def)
qed

text ‹Main result of this section:›

lemma growth_lemma:
  fixes xs :: "point list"
  assumes "a > 0" "d > 0"
  shows "close_point_size xs (a * d)  (2 * sqrt 2 * a + 3)^2 * close_point_size xs d"
    (is "?L  ?R")
proof -
  let ?s = "a * sqrt 2"
  let ?G = "(λx. real (length (filter (λp. to_grid (d/sqrt 2) p = x)  xs)))"
  let ?I = "{-?s..?s}×{-?s..?s}"

  have "?s  1" using assms by auto
  hence s_ge_0: "?s  0" by simp

  have a: "?s = a * d / (d / sqrt 2)" using assms by simp

  have "?L  (i{-?s..?s}×{-?s..?s}. sum' (λx. ?G x * ?G (x+i)) UNIV)"
    using assms unfolding a by (intro close_point_approx_upper) auto
  also have "...  (i?I. sqrt (sum' (λx. ?G x^2) UNIV) * sqrt (sum' (λx. ?G (x+i)^2) UNIV))"
    by (intro sum_mono cauchy_schwarz') (auto intro: inj_translate build_grid_finite)
  also have "... = (i?I. sqrt (sum' (λx. ?G x^2) UNIV) * sqrt (sum' (λx. ?G x^2) UNIV))"
    by (intro arg_cong2[where f="(λx y. sqrt x * sqrt y)"] sum.cong refl
        sum.reindex_bij_betw' bij_plus_right)
  also have "... = (i?I. ¦sum' (λx. ?G x^2) UNIV¦)" by simp
  also have "... = (2* ?s + 1)^2 * ¦sum' (λx. ?G x^2) UNIV¦"
    using s_ge_0 by (auto simp: power2_eq_square)
  also have "... = (2* ?s + 1)^2 * sum' (λx. ?G x^2) UNIV"
    by (intro arg_cong2[where f="(*)"] refl abs_of_nonneg sum'_nonneg) auto
  also have "...  (2*?s+1)^2 * real (close_point_size xs ((d/sqrt 2)* sqrt 2))"
    using assms by (intro mult_left_mono close_point_approx_lower) auto
  also have "... =  (2 * of_int ?s+1)^2 * real (close_point_size xs d)" by simp
  also have "...  (2 * (a * sqrt 2 + 1) + 1)^2 * real (close_point_size xs d)"
    using s_ge_0 by (intro mult_right_mono power_mono add_mono mult_left_mono) auto
  also have "... = ?R" by (auto simp:algebra_simps)
  finally show ?thesis by simp
qed

end