Theory Global_Invariants
theory Global_Invariants
imports
Proofs_Basis
begin
section‹Global Invariants \label{sec:global-invariants}›
subsection‹The valid references invariant›
text‹
The key safety property of a GC is that it does not free objects that
are reachable from mutator roots. The GC also requires that there are
objects for all references reachable from grey objects.
›
definition valid_refs_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
"valid_refs_inv = (❙∀m x. mut_m.reachable m x ❙∨ grey_reachable x ❙⟶ valid_ref x)"
text‹
The remainder of the invariants support the inductive argument that
this one holds.
›
subsection‹The strong-tricolour invariant \label{sec:strong-tricolour-invariant} ›
text‹
As the GC algorithm uses both insertion and deletion barriers, it
preserves the \emph{strong tricolour-invariant}:
›
abbreviation points_to_white :: "'ref ⇒ 'ref ⇒ ('field, 'mut, 'payload, 'ref) lsts_pred" (infix "points'_to'_white" 51) where
"x points_to_white y ≡ x points_to y ❙∧ white y"
definition strong_tricolour_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
"strong_tricolour_inv = (❙∀b w. black b ❙⟶ ❙¬b points_to_white w)"
text‹
Intuitively this invariant says that there are no pointers from
completely processed objects to the unexplored space; i.e., the grey
references properly separate the two. In contrast the weak tricolour
invariant allows such pointers, provided there is a grey reference
that protects the unexplored object.
›
definition has_white_path_to :: "'ref ⇒ 'ref ⇒ ('field, 'mut, 'payload, 'ref) lsts_pred" (infix "has'_white'_path'_to" 51) where
"x has_white_path_to y = (λs. (λx y. (x points_to_white y) s)⇧*⇧* x y)"
definition grey_protects_white :: "'ref ⇒ 'ref ⇒ ('field, 'mut, 'payload, 'ref) lsts_pred" (infix "grey'_protects'_white" 51) where
"g grey_protects_white w = (grey g ❙∧ g has_white_path_to w)"
definition weak_tricolour_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
"weak_tricolour_inv =
(❙∀b w. black b ❙∧ b points_to_white w ❙⟶ (❙∃g. g grey_protects_white w))"
lemma "strong_tricolour_inv s ⟹ weak_tricolour_inv s"
by (clarsimp simp: strong_tricolour_inv_def weak_tricolour_inv_def grey_protects_white_def)
text‹
The key invariant that the mutators establish as they perform ‹get_roots›: they protect their white-reachable references with grey
objects.
›
definition in_snapshot :: "'ref ⇒ ('field, 'mut, 'payload, 'ref) lsts_pred" where
"in_snapshot r = (black r ❙∨ (❙∃g. g grey_protects_white r))"
definition (in mut_m) reachable_snapshot_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
"reachable_snapshot_inv = (❙∀r. reachable r ❙⟶ in_snapshot r)"
subsection‹Phase invariants \label{sec:phase-invariants}›
text (in mut_m) ‹
The phase structure of this GC algorithm greatly complicates this
safety proof. The following assertions capture this structure in
several relations.
We begin by relating the mutators' @{const
"mut_ghost_hs_phase"} to @{const "sys_ghost_hs_phase"},
which tracks the GC's. Each mutator can be at most one handshake step
behind the GC. If any mutator is behind then the GC is stalled on a
pending handshake. We include the handshake type as
‹get_work› can occur any number of times.
›
definition hp_step_rel :: "(bool × hs_type × hs_phase × hs_phase) set" where
"hp_step_rel =
{ True } × ({ (ht_NOOP, hp, hp) |hp. hp ∈ {hp_Idle, hp_IdleInit, hp_InitMark, hp_Mark} }
∪ { (ht_GetRoots, hp_IdleMarkSweep, hp_IdleMarkSweep)
, (ht_GetWork, hp_IdleMarkSweep, hp_IdleMarkSweep) })
∪ { False } × { (ht_NOOP, hp_Idle, hp_IdleMarkSweep)
, (ht_NOOP, hp_IdleInit, hp_Idle)
, (ht_NOOP, hp_InitMark, hp_IdleInit)
, (ht_NOOP, hp_Mark, hp_InitMark)
, (ht_GetRoots, hp_IdleMarkSweep, hp_Mark)
, (ht_GetWork, hp_IdleMarkSweep, hp_IdleMarkSweep) }"
definition handshake_phase_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
"handshake_phase_inv = (❙∀m.
sys_ghost_hs_in_sync m ❙⊗ sys_hs_type ❙⊗ sys_ghost_hs_phase ❙⊗ mut_m.mut_ghost_hs_phase m ❙∈ ⟨hp_step_rel⟩
❙∧ (sys_hs_pending m ❙⟶ ❙¬sys_ghost_hs_in_sync m))"
text ‹
In some phases we need to know that the insertion and deletion
barriers are installed, in order to preserve the snapshot. These can
ignore TSO effects as the process doing the marking holds the TSO lock
until the mark is committed to the shared memory (see
\S\ref{def:valid_W_inv}).
Note that it is not easy to specify precisely when the snapshot (of
objects the GC will retain) is taken due to the raggedness of the
initialisation.
Read the following as ``when mutator ‹m› is past the
specified handshake, and has yet to reach the next one, ... holds.''
›
abbreviation marked_insertion :: "('field, 'payload, 'ref) mem_store_action ⇒ ('field, 'mut, 'payload, 'ref) lsts_pred" where
"marked_insertion w ≡ λs. case w of mw_Mutate r f (Some r') ⇒ marked r' s | _ ⇒ True"
abbreviation marked_deletion :: "('field, 'payload, 'ref) mem_store_action ⇒ ('field, 'mut, 'payload, 'ref) lsts_pred" where
"marked_deletion w ≡ λs. case w of mw_Mutate r f opt_r' ⇒ obj_at_field_on_heap (λr'. marked r' s) r f s | _ ⇒ True"
context mut_m
begin
definition marked_insertions :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
"marked_insertions = (❙∀w. tso_pending_store (mutator m) w ❙⟶ marked_insertion w)"
definition marked_deletions :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
"marked_deletions = (❙∀w. tso_pending_store (mutator m) w ❙⟶ marked_deletion w)"
primrec mutator_phase_inv_aux :: "hs_phase ⇒ ('field, 'mut, 'payload, 'ref) lsts_pred" where
"mutator_phase_inv_aux hp_Idle = ⟨True⟩"
| "mutator_phase_inv_aux hp_IdleInit = no_black_refs"
| "mutator_phase_inv_aux hp_InitMark = marked_insertions"
| "mutator_phase_inv_aux hp_Mark = (marked_insertions ❙∧ marked_deletions)"
| "mutator_phase_inv_aux hp_IdleMarkSweep = (marked_insertions ❙∧ marked_deletions ❙∧ reachable_snapshot_inv)"
abbreviation mutator_phase_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
"mutator_phase_inv ≡ mutator_phase_inv_aux ❙$ mut_ghost_hs_phase"
end
abbreviation mutators_phase_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
"mutators_phase_inv ≡ (❙∀m. mut_m.mutator_phase_inv m)"
text‹
This is what the GC guarantees. Read this as ``when the GC is at or
past the specified handshake, ... holds.''
›
primrec sys_phase_inv_aux :: "hs_phase ⇒ ('field, 'mut, 'payload, 'ref) lsts_pred" where
"sys_phase_inv_aux hp_Idle = ( (If sys_fA ❙= sys_fM Then black_heap Else white_heap) ❙∧ no_grey_refs )"
| "sys_phase_inv_aux hp_IdleInit = no_black_refs"
| "sys_phase_inv_aux hp_InitMark = (sys_fA ❙≠ sys_fM ❙⟶ no_black_refs)"
| "sys_phase_inv_aux hp_Mark = ⟨True⟩"
| "sys_phase_inv_aux hp_IdleMarkSweep = ( (sys_phase ❙= ⟨ph_Idle⟩ ❙∨ tso_pending_store gc (mw_Phase ph_Idle)) ❙⟶ no_grey_refs )"
abbreviation sys_phase_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
"sys_phase_inv ≡ sys_phase_inv_aux ❙$ sys_ghost_hs_phase"
subsubsection‹Writes to shared GC variables›
text‹
Relate @{const "sys_ghost_hs_phase"}, @{const "gc_phase"},
@{const "sys_phase"} and writes to the phase in the GC's TSO buffer.
The first relation treats the case when the GC's TSO buffer does not
contain any writes to the phase.
The second relation exhibits the data race on the phase variable: we
need to precisely track the possible states of the GC's TSO buffer.
›
definition handshake_phase_rel :: "hs_phase ⇒ bool ⇒ gc_phase ⇒ bool" where
"handshake_phase_rel hp in_sync ph =
(case hp of
hp_Idle ⇒ ph = ph_Idle
| hp_IdleInit ⇒ ph = ph_Idle ∨ (in_sync ∧ ph = ph_Init)
| hp_InitMark ⇒ ph = ph_Init ∨ (in_sync ∧ ph = ph_Mark)
| hp_Mark ⇒ ph = ph_Mark
| hp_IdleMarkSweep ⇒ ph = ph_Mark ∨ (in_sync ∧ ph ∈ { ph_Idle, ph_Sweep }))"
definition phase_rel :: "(bool × hs_phase × gc_phase × gc_phase × ('field, 'payload, 'ref) mem_store_action list) set" where
"phase_rel =
({ (in_sync, hp, ph, ph, []) |in_sync hp ph. handshake_phase_rel hp in_sync ph }
∪ ({True} × { (hp_IdleInit, ph_Init, ph_Idle, [mw_Phase ph_Init]),
(hp_InitMark, ph_Mark, ph_Init, [mw_Phase ph_Mark]),
(hp_IdleMarkSweep, ph_Sweep, ph_Mark, [mw_Phase ph_Sweep]),
(hp_IdleMarkSweep, ph_Idle, ph_Mark, [mw_Phase ph_Sweep, mw_Phase ph_Idle]),
(hp_IdleMarkSweep, ph_Idle, ph_Sweep, [mw_Phase ph_Idle]) }))"
definition phase_rel_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
"phase_rel_inv = ((❙∀m. sys_ghost_hs_in_sync m) ❙⊗ sys_ghost_hs_phase ❙⊗ gc_phase ❙⊗ sys_phase ❙⊗ tso_pending_phase gc ❙∈ ⟨phase_rel⟩)"
text‹
Similarly we track the validity of @{const "sys_fM"} (respectively,
@{const "sys_fA"}) wrt @{const "gc_fM"} (@{const "sys_fA"}) and the
handshake phase. We also include the TSO lock to rule out the GC
having any pending marks during the @{const "hp_Idle"} handshake
phase.
›
definition fM_rel :: "(bool × hs_phase × gc_mark × gc_mark × ('field, 'payload, 'ref) mem_store_action list × bool) set" where
"fM_rel =
{ (in_sync, hp, fM, fM, [], l) |fM hp in_sync l. hp = hp_Idle ⟶ ¬in_sync }
∪ { (in_sync, hp_Idle, fM, fM', [], l) |fM fM' in_sync l. in_sync }
∪ { (in_sync, hp_Idle, ¬fM, fM, [mw_fM (¬fM)], False) |fM in_sync. in_sync }"
definition fM_rel_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
"fM_rel_inv = ((❙∀m. sys_ghost_hs_in_sync m) ❙⊗ sys_ghost_hs_phase ❙⊗ gc_fM ❙⊗ sys_fM ❙⊗ tso_pending_fM gc ❙⊗ (sys_mem_lock ❙= ⟨Some gc⟩) ❙∈ ⟨fM_rel⟩)"
definition fA_rel :: "(bool × hs_phase × gc_mark × gc_mark × ('field, 'payload, 'ref) mem_store_action list) set" where
"fA_rel =
{ (in_sync, hp_Idle, fA, fM, []) |fA fM in_sync. ¬in_sync ⟶ fA = fM }
∪ { (in_sync, hp_IdleInit, fA, ¬fA, []) |fA in_sync. True }
∪ { (in_sync, hp_InitMark, fA, ¬fA, [mw_fA (¬fA)]) |fA in_sync. in_sync }
∪ { (in_sync, hp_InitMark, fA, fM, []) |fA fM in_sync. ¬in_sync ⟶ fA ≠ fM }
∪ { (in_sync, hp_Mark, fA, fA, []) |fA in_sync. True }
∪ { (in_sync, hp_IdleMarkSweep, fA, fA, []) |fA in_sync. True }"
definition fA_rel_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
"fA_rel_inv = ((❙∀m. sys_ghost_hs_in_sync m) ❙⊗ sys_ghost_hs_phase ❙⊗ sys_fA ❙⊗ gc_fM ❙⊗ tso_pending_fA gc ❙∈ ⟨fA_rel⟩)"
subsection‹Worklist invariants \label{def:valid_W_inv}›
text‹
The worklists track the grey objects. The following invariant asserts
that grey objects are marked on the heap except for a few steps near
the end of @{const "mark_object_fn"}, the processes' worklists and
@{const "ghost_honorary_grey"}s are disjoint, and that pending marks
are sensible.
The safety of the collector does not to depend on disjointness; we
include it as proof that the single-threading of grey objects in the
implementation is sound.
Note that the phase invariants of \S\ref{sec:phase-invariants} limit
the scope of this invariant.
›
definition valid_W_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
"valid_W_inv =
((❙∀p r. r in_W p ❙∨ (sys_mem_lock ❙≠ ⟨Some p⟩ ❙∧ r in_ghost_honorary_grey p) ❙⟶ marked r)
❙∧ (❙∀p q. ⟨p ≠ q⟩ ❙⟶ WL p ❙∩ WL q ❙= ⟨{}⟩)
❙∧ (❙∀p q r. ❙¬(r in_ghost_honorary_grey p ❙∧ r in_W q))
❙∧ (EMPTY sys_ghost_honorary_grey)
❙∧ (❙∀p r fl. tso_pending_store p (mw_Mark r fl)
❙⟶ ⟨fl⟩ ❙= sys_fM
❙∧ r in_ghost_honorary_grey p
❙∧ tso_locked_by p
❙∧ white r
❙∧ tso_pending_mark p ❙= ⟨[mw_Mark r fl]⟩ ))"
subsection‹Coarse invariants about the stores a process can issue›
abbreviation gc_writes :: "('field, 'payload, 'ref) mem_store_action ⇒ bool" where
"gc_writes w ≡ case w of mw_Mark _ _ ⇒ True | mw_Phase _ ⇒ True | mw_fM _ ⇒ True | mw_fA _ ⇒ True | _ ⇒ False"
abbreviation mut_writes :: "('field, 'payload, 'ref) mem_store_action ⇒ bool" where
"mut_writes w ≡ case w of mw_Mutate _ _ _ ⇒ True | mw_Mark _ _ ⇒ True | _ ⇒ False"
definition tso_store_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
"tso_store_inv =
((❙∀w. tso_pending_store gc w ❙⟶ ⟨gc_writes w⟩)
❙∧ (❙∀m w. tso_pending_store (mutator m) w ❙⟶ ⟨mut_writes w⟩))"
subsection‹The global invariants collected›
definition invs :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
"invs =
(handshake_phase_inv
❙∧ phase_rel_inv
❙∧ strong_tricolour_inv
❙∧ sys_phase_inv
❙∧ tso_store_inv
❙∧ valid_refs_inv
❙∧ valid_W_inv
❙∧ mutators_phase_inv
❙∧ fA_rel_inv ❙∧ fM_rel_inv)"
subsection‹Initial conditions \label{sec:initial-conditions}›
text‹
We ask that the GC and system initially agree on some things:
\begin{itemize}
\item All objects on the heap are marked (have their flags equal to
@{const "sys_fM"}, and there are no grey references, i.e. the heap
is uniformly black.
\item The GC and system have the same values for @{term "fA"}, @{term
"fM"}, etc. and the phase is @{term "Idle"}.
\item No process holds the TSO lock and all write buffers are empty.
\item All root-reachable references are backed by objects.
\end{itemize}
Note that these are merely sufficient initial conditions and can be
weakened.
›
locale gc_system =
fixes initial_mark :: gc_mark
begin
definition gc_initial_state :: "('field, 'mut, 'payload, 'ref) lst_pred" where
"gc_initial_state s =
(fM s = initial_mark
∧ phase s = ph_Idle
∧ ghost_honorary_grey s = {}
∧ W s = {})"
definition mut_initial_state :: "('field, 'mut, 'payload, 'ref) lst_pred" where
"mut_initial_state s =
(ghost_hs_phase s = hp_IdleMarkSweep
∧ ghost_honorary_grey s = {}
∧ ghost_honorary_root s = {}
∧ W s = {})"
definition sys_initial_state :: "('field, 'mut, 'payload, 'ref) lst_pred" where
"sys_initial_state s =
((∀m. ¬hs_pending s m ∧ ghost_hs_in_sync s m)
∧ ghost_hs_phase s = hp_IdleMarkSweep ∧ hs_type s = ht_GetRoots
∧ obj_mark ` ran (heap s) ⊆ {initial_mark}
∧ fA s = initial_mark
∧ fM s = initial_mark
∧ phase s = ph_Idle
∧ ghost_honorary_grey s = {}
∧ W s = {}
∧ (∀p. mem_store_buffers s p = [])
∧ mem_lock s = None)"
abbreviation
"root_reachable y ≡ ❙∃m x. ⟨x⟩ ❙∈ mut_m.mut_roots m ❙∧ x reaches y"
definition valid_refs :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
"valid_refs = (❙∀y. root_reachable y ❙⟶ valid_ref y)"
definition gc_system_init :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
"gc_system_init =
((λs. gc_initial_state (s gc))
❙∧ (λs. ∀m. mut_initial_state (s (mutator m)))
❙∧ (λs. sys_initial_state (s sys))
❙∧ valid_refs)"
text‹
The system consists of the programs and these constraints on the initial state.
›
abbreviation gc_system :: "('field, 'mut, 'payload, 'ref) gc_system" where
"gc_system ≡ ⦇PGMs = gc_coms, INIT = gc_system_init, FAIR = ⟨True⟩⦈"
end
end