Theory Probability_Logic

chapter ‹ Probability Logic \label{chapter:probability} ›

theory Probability_Logic
  imports
    "Propositional_Logic_Class.Classical_Connectives"
    HOL.Real
    "HOL-Library.Countable"
begin

unbundle no funcset_syntax

section ‹ Definition of Probability Logic \label{sec:definition-of-probability-logic} ›

text ‹ Probability logic is defined in terms of an operator over
       classical logic obeying certain postulates. Scholars often credit
       George Boole for first conceiving this kind of formulation @{cite booleChapterXVITheory1853}.
       Theodore Hailperin in particular has written extensively on this subject
       @{cite hailperinProbabilityLogic1984
         and hailperinBooleLogicProbability1986
         and hailperinSententialProbabilityLogic1996}. ›

text ‹ The presentation below roughly follows Kolmogorov's axiomatization
       @{cite kolmogoroffChapterElementareWahrscheinlichkeitsrechnung1933}.
       A key difference is that we only require ∗‹finite additivity›, rather
       than ∗‹countable additivity›. Finite additivity is also defined in
       terms of implication \<^term>‹(→)›. ›

class probability_logic = classical_logic +
  fixes 𝒫 :: "'a ⇒ real"
  assumes probability_non_negative: "𝒫 φ ≥ 0"
  assumes probability_unity: "⊢ φ ⟹ 𝒫 φ = 1"
  assumes probability_implicational_additivity:
    "⊢ φ → ψ → ⊥ ⟹ 𝒫 ((φ → ⊥) → ψ) = 𝒫 φ + 𝒫 ψ"

text ‹ A similar axiomatization may be credited to
       Rescher @{cite ‹pg. 185› rescherManyvaluedLogic1969}.
       However, our formulation has fewer axioms.
       While Rescher assumes \<^term>‹⊢ φ ↔ ψ ⟹ 𝒫 φ = 𝒫 ψ›, we
       show this is a lemma in \S\ref{sec:prob-logic-alt-def}. ›

section ‹ Why Finite Additivity? \label{section:why-finite-additivity} ›

text ‹ In this section we touch on why we have chosen to
       employ finite additivity in our axiomatization of
       @{class probability_logic} and deviate from conventional
       probability theory. ›

text ‹ Conventional probability obeys an axiom known as ∗‹countable additivity›.
       Traditionally it states if ‹S› is a countable set of sets which are
       pairwise disjoint, then the limit ‹∑ s ∈ S. 𝒫 s› exists and
       ‹𝒫 (⋃ S) = (∑ s ∈ S. 𝒫 s)›. This is more powerful than our
       finite additivity axiom
       @{lemma ‹⊢ φ → ψ → ⊥ ⟹ 𝒫 ((φ → ⊥) → ψ) = 𝒫 φ + 𝒫 ψ›
          by (metis probability_implicational_additivity) }. ›

text ‹ However, we argue that demanding countable additivity is not practical. ›

text ‹ Historically, the statisticians Bruno de Finetti and Leonard Savage
       gave the most well known critiques.  In @{cite definettiSuiPassaggiLimite1930}
       de Finetti shows various properties which are true for countably additive
       probability measures may not hold for finitely additive measures.
       Savage @{cite savageDifficultiesTheoryPersonal1967}, on the other hand,
       develops probability based on choices prizes in lotteries. ›

text ‹ We instead argue that if we demand countable additivity, then certain
       properties of real world software would no longer be formally verifiable 
       as we demonstrate here. In particular, it prohibits conventional recursive
       data structures for defining propositions. Our argument is derivative of 
       one given by Giangiacomo Gerla @{cite ‹Section 3› gerlaInferencesProbabilityLogic1994}. ›

text ‹ By taking equivalence classes modulo \<^term>‹λ φ ψ . ⊢ φ ↔ ψ›,
       any classical logic instance gives rise to a Boolean algebra known as
       a ∗‹Lindenbaum Algebra›. In the case of @{typ "'a classical_propositional_formula"}
       this Boolean algebra algebra is both countable and ∗‹atomless›.
       A theorem of Horn and Tarski @{cite ‹Theorem 3.2› hornMeasuresBooleanAlgebras1948}
       asserts there can be no countably additive \<^term>‹Pr› for a countable
       atomless Boolean algebra. ›

text ‹ The above argument is not intended as a blanket refutation of conventional
       probability theory. It is simply an impossibility result with respect
       to software implementations of probability logic. Plenty of classic results 
       in probability rely on countable additivity. A nice example, formalized in 
       Isabelle/HOL, is Bouffon's needle @{cite eberlBuffonNeedleProblem2017}. ›

section ‹ Basic Properties of Probability Logic ›

lemma (in probability_logic) probability_additivity:
  assumes "⊢ ∼ (φ ⊓ ψ)"
  shows "𝒫 (φ ⊔ ψ) = 𝒫 φ + 𝒫 ψ"
  using
    assms
  unfolding
    conjunction_def
    disjunction_def
    negation_def
  by (simp add: probability_implicational_additivity)

lemma (in probability_logic) probability_alternate_additivity:
  assumes "⊢ φ → ψ → ⊥"
  shows "𝒫 (φ ⊔ ψ) = 𝒫 φ + 𝒫 ψ"
  using assms
  by (metis
        probability_additivity
        double_negation_converse
        modus_ponens
        conjunction_def
        negation_def)

lemma (in probability_logic) complementation:
  "𝒫 (∼ φ) = 1 - 𝒫 φ"
  by (metis
        probability_alternate_additivity
        probability_unity
        bivalence
        negation_elimination
        add.commute
        add_diff_cancel_left')

lemma (in probability_logic) unity_upper_bound:
  "𝒫 φ ≤ 1"
  by (metis
        (no_types)
        diff_ge_0_iff_ge
        probability_non_negative
        complementation)

section ‹ Alternate Definition of Probability Logic \label{sec:prob-logic-alt-def} ›

text ‹ There is an alternate axiomatization of probability logic,
       due to Brian Gaines @{cite ‹pg. 159, postulates P7, P8, and P8› gainesFuzzyProbabilityUncertainty1978}
       and independently formulated by Brian Weatherson @{cite weathersonClassicalIntuitionisticProbability2003}.
       As Weatherson notes, this axiomatization is suited to formulating
       ∗‹intuitionistic› probability logic. In the case where the underlying
       logic is classical the Gaines/Weatherson axiomatization is equivalent
       to the traditional Kolmogorov axiomatization from
       \S\ref{sec:definition-of-probability-logic}. ›

class gaines_weatherson_probability = classical_logic +
  fixes 𝒫 :: "'a ⇒ real"
  assumes gaines_weatherson_thesis:
    "𝒫 ⊤ = 1"
  assumes gaines_weatherson_antithesis:
    "𝒫 ⊥ = 0"
  assumes gaines_weatherson_monotonicity:
    "⊢ φ → ψ ⟹ 𝒫 φ ≤ 𝒫 ψ"
  assumes gaines_weatherson_sum_rule:
    "𝒫 φ + 𝒫 ψ = 𝒫 (φ ⊓ ψ) + 𝒫 (φ ⊔ ψ)"

sublocale gaines_weatherson_probability ⊆ probability_logic
proof
  fix φ
  have "⊢ ⊥ → φ"
    by (simp add: ex_falso_quodlibet)
  thus "0 ≤ 𝒫 φ"
    using
      gaines_weatherson_antithesis
      gaines_weatherson_monotonicity
    by fastforce
next
  fix φ
  assume "⊢ φ"
  thus "𝒫 φ = 1"
    by (metis
          gaines_weatherson_thesis
          gaines_weatherson_monotonicity
          eq_iff
          axiom_k
          ex_falso_quodlibet
          modus_ponens
          verum_def)
next
  fix φ ψ
  assume "⊢ φ → ψ → ⊥"
  hence "⊢ ∼ (φ ⊓ ψ)"
    by (simp add: conjunction_def negation_def)
  thus "𝒫 ((φ → ⊥) → ψ) = 𝒫 φ + 𝒫 ψ"
    by (metis
          add.commute
          add.right_neutral
          eq_iff
          disjunction_def
          ex_falso_quodlibet
          negation_def
          gaines_weatherson_antithesis
          gaines_weatherson_monotonicity
          gaines_weatherson_sum_rule)
qed

lemma (in probability_logic) monotonicity:
  "⊢ φ → ψ ⟹ 𝒫 φ ≤ 𝒫 ψ"
proof -
  assume "⊢ φ → ψ"
  hence "⊢ ∼ (φ ⊓ ∼ ψ)"
    unfolding negation_def conjunction_def
    by (metis
          conjunction_def
          exclusion_contrapositive_equivalence
          negation_def
          weak_biconditional_weaken)
  hence "𝒫 (φ ⊔ ∼ ψ) = 𝒫 φ + 𝒫 (∼ ψ)"
    by (simp add: probability_additivity)
  hence "𝒫 φ + 𝒫 (∼ ψ) ≤ 1"
    by (metis unity_upper_bound)
  hence "𝒫 φ + 1 - 𝒫 ψ ≤ 1"
    by (simp add: complementation)
  thus ?thesis by linarith
qed

lemma (in probability_logic) biconditional_equivalence:
  "⊢ φ ↔ ψ ⟹ 𝒫 φ = 𝒫 ψ"
  by (meson
        eq_iff
        modus_ponens
        biconditional_left_elimination
        biconditional_right_elimination
        monotonicity)

lemma (in probability_logic) sum_rule:
  "𝒫 (φ ⊔ ψ) + 𝒫 (φ ⊓ ψ) = 𝒫 φ + 𝒫 ψ"
proof -
  have "⊢ (φ ⊔ ψ) ↔ (φ ⊔ ψ ∖ (φ ⊓ ψ))"
  proof -
    have "∀ 𝔐. 𝔐 ⊨⇩_p⇩_r⇩_o⇩_p (❙⟨φ❙⟩ ⊔ ❙⟨ψ❙⟩) ↔ (❙⟨φ❙⟩ ⊔ ❙⟨ψ❙⟩ ∖ (❙⟨φ❙⟩ ⊓ ❙⟨ψ❙⟩))"
      unfolding
        classical_logic_class.subtraction_def
        classical_logic_class.negation_def
        classical_logic_class.biconditional_def
        classical_logic_class.conjunction_def
        classical_logic_class.disjunction_def
      by simp
    hence "⊢ ❙⦇ (❙⟨φ❙⟩ ⊔ ❙⟨ψ❙⟩) ↔ (❙⟨φ❙⟩ ⊔ ❙⟨ψ❙⟩ ∖ (❙⟨φ❙⟩ ⊓ ❙⟨ψ❙⟩)) ❙⦈"
      using propositional_semantics by blast
    thus ?thesis by simp
  qed
  moreover have "⊢ φ → (ψ ∖ (φ ⊓ ψ)) → ⊥"
  proof -
    have "∀ 𝔐. 𝔐 ⊨⇩_p⇩_r⇩_o⇩_p ❙⟨φ❙⟩ → (❙⟨ψ❙⟩ ∖ (❙⟨φ❙⟩ ⊓ ❙⟨ψ❙⟩)) → ⊥"
      unfolding
        classical_logic_class.subtraction_def
        classical_logic_class.negation_def
        classical_logic_class.biconditional_def
        classical_logic_class.conjunction_def
        classical_logic_class.disjunction_def
      by simp
    hence "⊢ ❙⦇ ❙⟨φ❙⟩ → (❙⟨ψ❙⟩ ∖ (❙⟨φ❙⟩ ⊓ ❙⟨ψ❙⟩)) → ⊥ ❙⦈"
      using propositional_semantics by blast
    thus ?thesis by simp
  qed
  hence "𝒫 (φ ⊔ ψ) = 𝒫 φ + 𝒫 (ψ ∖ (φ ⊓ ψ))"
    using
      probability_alternate_additivity
      biconditional_equivalence
      calculation
    by auto
  moreover have "⊢ ψ ↔ (ψ ∖ (φ ⊓ ψ) ⊔ (φ ⊓ ψ))"
  proof -
    have "∀ 𝔐. 𝔐 ⊨⇩_p⇩_r⇩_o⇩_p ❙⟨ψ❙⟩ ↔ (❙⟨ψ❙⟩ ∖ (❙⟨φ❙⟩ ⊓ ❙⟨ψ❙⟩) ⊔ (❙⟨φ❙⟩ ⊓ ❙⟨ψ❙⟩))"
      unfolding
        classical_logic_class.subtraction_def
        classical_logic_class.negation_def
        classical_logic_class.biconditional_def
        classical_logic_class.conjunction_def
        classical_logic_class.disjunction_def
      by auto
    hence "⊢ ❙⦇ ❙⟨ψ❙⟩ ↔ (❙⟨ψ❙⟩ ∖ (❙⟨φ❙⟩ ⊓ ❙⟨ψ❙⟩) ⊔ (❙⟨φ❙⟩ ⊓ ❙⟨ψ❙⟩)) ❙⦈"
      using propositional_semantics by
      blast
    thus ?thesis by simp
  qed
  moreover have "⊢ (ψ ∖ (φ ⊓ ψ)) → (φ ⊓ ψ) → ⊥"
    unfolding
      subtraction_def
      negation_def
      conjunction_def
    using
      conjunction_def
      conjunction_right_elimination
    by auto
  hence "𝒫 ψ = 𝒫 (ψ ∖ (φ ⊓ ψ)) + 𝒫 (φ ⊓ ψ)"
    using
      probability_alternate_additivity
      biconditional_equivalence
      calculation
    by auto
  ultimately show ?thesis
    by simp
qed

sublocale probability_logic ⊆ gaines_weatherson_probability
proof
  show "𝒫 ⊤ = 1"
    by (simp add: probability_unity)
next
  show "𝒫 ⊥ = 0"
    by (metis
          add_cancel_left_right
          probability_additivity
          ex_falso_quodlibet
          probability_unity
          bivalence
          conjunction_right_elimination
          negation_def)
next
  fix φ ψ
  assume "⊢ φ → ψ"
  thus "𝒫 φ ≤ 𝒫 ψ"
    using monotonicity
    by auto
next
  fix φ ψ
  show "𝒫 φ + 𝒫 ψ = 𝒫 (φ ⊓ ψ) + 𝒫 (φ ⊔ ψ)"
    by (metis sum_rule add.commute)
qed

sublocale probability_logic ⊆ consistent_classical_logic
proof
  show "¬ ⊢ ⊥" using probability_unity gaines_weatherson_antithesis by auto
qed

lemma (in probability_logic) subtraction_identity:
  "𝒫 (φ ∖ ψ) = 𝒫 φ - 𝒫 (φ ⊓ ψ)"
proof -
  have "⊢ φ ↔ ((φ ∖ ψ) ⊔ (φ ⊓ ψ))"
  proof -
    have "∀ 𝔐. 𝔐 ⊨⇩_p⇩_r⇩_o⇩_p ❙⟨φ❙⟩ ↔ ((❙⟨φ❙⟩ ∖ ❙⟨ψ❙⟩) ⊔ (❙⟨φ❙⟩ ⊓ ❙⟨ψ❙⟩))"
      unfolding
        classical_logic_class.subtraction_def
        classical_logic_class.negation_def
        classical_logic_class.biconditional_def
        classical_logic_class.conjunction_def
        classical_logic_class.disjunction_def
      by (simp, blast)
    hence "⊢ ❙⦇ ❙⟨φ❙⟩ ↔ ((❙⟨φ❙⟩ ∖ ❙⟨ψ❙⟩) ⊔ (❙⟨φ❙⟩ ⊓ ❙⟨ψ❙⟩)) ❙⦈"
      using propositional_semantics by blast
    thus ?thesis by simp
  qed
  hence "𝒫 φ  = 𝒫 ((φ ∖ ψ) ⊔ (φ ⊓ ψ))"
    using biconditional_equivalence
    by simp
  moreover have "⊢ ∼((φ ∖ ψ) ⊓ (φ ⊓ ψ))"
  proof -
    have "∀ 𝔐. 𝔐 ⊨⇩_p⇩_r⇩_o⇩_p ∼((❙⟨φ❙⟩ ∖ ❙⟨ψ❙⟩) ⊓ (❙⟨φ❙⟩ ⊓ ❙⟨ψ❙⟩))"
      unfolding
        classical_logic_class.subtraction_def
        classical_logic_class.negation_def
        classical_logic_class.conjunction_def
        classical_logic_class.disjunction_def
      by simp
    hence "⊢ ❙⦇ ∼((❙⟨φ❙⟩ ∖ ❙⟨ψ❙⟩) ⊓ (❙⟨φ❙⟩ ⊓ ❙⟨ψ❙⟩)) ❙⦈"
      using propositional_semantics by blast
    thus ?thesis by simp
  qed
  ultimately show ?thesis
    using probability_additivity
    by auto
qed

section ‹ Basic Probability Logic Inequality Results \label{sec:basic-probability-inequality-results}›

lemma (in probability_logic) disjunction_sum_inequality:
  "𝒫 (φ ⊔ ψ) ≤ 𝒫 φ + 𝒫 ψ"
proof -
  have "𝒫 (φ ⊔ ψ) + 𝒫 (φ ⊓ ψ) = 𝒫 φ + 𝒫 ψ"
       "0 ≤ 𝒫 (φ ⊓ ψ)"
    by (simp add: sum_rule, simp add: probability_non_negative)
  thus ?thesis by linarith
qed

lemma (in probability_logic)
  arbitrary_disjunction_list_summation_inequality:
  "𝒫 (⨆ Φ) ≤ (∑φ←Φ. 𝒫 φ)"
proof (induct Φ)
  case Nil
  then show ?case by (simp add: gaines_weatherson_antithesis)
next
  case (Cons φ Φ)
  have "𝒫 (⨆ (φ # Φ)) ≤ 𝒫 φ + 𝒫 (⨆ Φ)"
    using disjunction_sum_inequality
    by simp
  with Cons have "𝒫 (⨆ (φ # Φ)) ≤ 𝒫 φ + (∑φ←Φ. 𝒫 φ)" by linarith
  then show ?case by simp
qed

lemma (in probability_logic) implication_list_summation_inequality:
  assumes "⊢ φ → ⨆ Ψ"
  shows "𝒫 φ ≤ (∑ψ←Ψ. 𝒫 ψ)"
  using
    assms
    arbitrary_disjunction_list_summation_inequality
    monotonicity
    order_trans
  by blast

lemma (in probability_logic) arbitrary_disjunction_set_summation_inequality:
  "𝒫 (⨆ Φ) ≤ (∑φ ∈ set Φ. 𝒫 φ)"
  by (metis
        arbitrary_disjunction_list_summation_inequality
        arbitrary_disjunction_remdups
        biconditional_equivalence
        sum.set_conv_list)

lemma (in probability_logic) implication_set_summation_inequality:
  assumes "⊢ φ → ⨆ Ψ"
  shows "𝒫 φ ≤ (∑ψ ∈ set Ψ. 𝒫 ψ)"
  using
    assms
    arbitrary_disjunction_set_summation_inequality
    monotonicity
    order_trans
  by blast

section ‹ Dirac Measures \label{sec:dirac-measures}›

text ‹ Before presenting ∗‹Dirac measures› in probability logic, we first
       give the set of all functions satisfying probability logic.›

definition (in classical_logic) probabilities :: "('a ⇒ real) set"
  where "probabilities =
         {𝒫. class.probability_logic (λ φ. ⊢ φ) (→) ⊥ 𝒫 }"

text ‹ Traditionally, a Dirac measure is a function \<^term>‹δ⇩_x› where
       \<^term>‹δ⇩_x(S) = (1::real)› if \<^term>‹x ∈ S› and \<^term>‹δ⇩_x(S) = (0::real)›
       otherwise.  This means that Dirac measures correspond to special
       ultrafilters on their underlying \<^term>‹σ›-algebra which are
       closed under countable unions. ›

text ‹ Probability logic, as discussed in \S\ref{section:why-finite-additivity},
       may not have countable joins in its underlying logic. In the setting
       of probability logic, Dirac measures are simple probability functions
       that are either 0 or 1. ›

definition (in classical_logic) dirac_measures :: "('a ⇒ real) set"
  where "dirac_measures =
         { 𝒫. class.probability_logic (λ φ. ⊢ φ) (→) ⊥ 𝒫
               ∧ (∀x. 𝒫 x = 0 ∨ 𝒫 x = 1) }"

lemma (in classical_logic) dirac_measures_subset:
  "dirac_measures ⊆ probabilities"
  unfolding
    probabilities_def
    dirac_measures_def
  by fastforce

text ‹ Maximally consistent sets correspond to Dirac measures. One direction
       of this correspondence is established below. ›

lemma (in classical_logic) MCS_dirac_measure:
  assumes "MCS Ω"
    shows "(λ χ. if χ∈Ω then (1 :: real) else 0) ∈ dirac_measures"
      (is "?𝒫 ∈ dirac_measures")
proof -
  have "class.probability_logic (λ φ. ⊢ φ) (→) ⊥ ?𝒫"
  proof (standard, simp,
         meson
           assms
           formula_maximally_consistent_set_def_reflection
           maximally_consistent_set_def
           set_deduction_weaken)
    fix φ ψ
    assume "⊢ φ → ψ → ⊥"
    hence "φ ⊓ ψ ∉ Ω"
      by (metis
            assms
            formula_consistent_def
            formula_maximally_consistent_set_def_def
            maximally_consistent_set_def
            conjunction_def
            set_deduction_modus_ponens
            set_deduction_reflection
            set_deduction_weaken)
    hence "φ ∉ Ω ∨ ψ ∉ Ω"
      using
        assms
        formula_maximally_consistent_set_def_reflection
        maximally_consistent_set_def
        conjunction_set_deduction_equivalence
      by meson
    have "φ ⊔ ψ ∈ Ω = (φ ∈ Ω ∨ ψ ∈ Ω)"
      by (metis
            ‹φ ⊓ ψ ∉ Ω›
            assms
            formula_maximally_consistent_set_def_implication
            maximally_consistent_set_def
            conjunction_def
            disjunction_def)
    have "?𝒫 (φ ⊔ ψ) = ?𝒫 φ + ?𝒫 ψ"
    proof (cases "φ ⊔ ψ ∈ Ω")
      case True
      hence ♢: "1 = ?𝒫 (φ ⊔ ψ)" by simp
      show ?thesis
      proof (cases "φ ∈ Ω")
        case True
        hence "ψ ∉ Ω"
          using ‹φ ∉ Ω ∨ ψ ∉ Ω›
          by blast
        have "?𝒫 (φ ⊔ ψ) = (1::real)" using ♢ by simp
        also have "... = 1 + (0::real)" by linarith
        also have "... = ?𝒫 φ + ?𝒫 ψ"
          using ‹ψ ∉ Ω› ‹φ ∈ Ω› by simp
        finally show ?thesis .
      next
        case False
        hence "ψ ∈ Ω"
          using ‹φ ⊔ ψ ∈ Ω› ‹(φ ⊔ ψ ∈ Ω) = (φ ∈ Ω ∨ ψ ∈ Ω)›
          by blast
        have "?𝒫 (φ ⊔ ψ) = (1::real)" using ♢ by simp
        also have "... = (0::real) + 1" by linarith
        also have "... = ?𝒫 φ + ?𝒫 ψ"
          using ‹ψ ∈ Ω› ‹φ ∉ Ω› by simp
        finally show ?thesis .
      qed
    next
      case False
      moreover from this have "φ ∉ Ω" "ψ ∉ Ω"
        using ‹(φ ⊔ ψ ∈ Ω) = (φ ∈ Ω ∨ ψ ∈ Ω)› by blast+
      ultimately show ?thesis by simp
    qed
    thus "?𝒫 ((φ → ⊥) → ψ) = ?𝒫 φ + ?𝒫 ψ"
      unfolding disjunction_def .
  qed
  thus ?thesis
    unfolding dirac_measures_def
    by simp
qed

unbundle funcset_syntax

end