Theory JVMCFG
chapter ‹A Control Flow Graph for Jinja Byte Code›
section ‹Formalizing the CFG›
theory JVMCFG imports "../Basic/BasicDefs" Jinja.BVExample begin
declare lesub_list_impl_same_size [simp del]
declare nlistsE_length [simp del]
subsection ‹Type definitions›
subsubsection ‹Wellformed Programs›
definition "wf_jvmprog = {(P, Phi). wf_jvm_prog⇘Phi⇙ P}"
typedef wf_jvmprog = wf_jvmprog
proof
show "(E, Phi) ∈ wf_jvmprog"
unfolding wf_jvmprog_def by (auto intro: wf_prog)
qed
hide_const Phi E
abbreviation rep_jvmprog_jvm_prog :: "wf_jvmprog ⇒ jvm_prog"
("_⇘wf⇙")
where "P⇘wf⇙ ≡ fst(Rep_wf_jvmprog(P))"
abbreviation rep_jvmprog_phi :: "wf_jvmprog ⇒ ty⇩P"
("_⇘Φ⇙")
where "P⇘Φ⇙ ≡ snd(Rep_wf_jvmprog(P))"
lemma wf_jvmprog_is_wf: "wf_jvm_prog⇘P⇘Φ⇙⇙ (P⇘wf⇙)"
using Rep_wf_jvmprog [of P]
by (auto simp: wf_jvmprog_def split_beta)
subsubsection ‹Basic Types›
text ‹
We consider a program to be a well-formed Jinja program,
together with a given base class and a main method
›
type_synonym jvmprog = "wf_jvmprog × cname × mname"
type_synonym callstack = "(cname × mname × pc) list"
text ‹
The state is modeled as $\textrm{heap} \times \textrm{stack-variables} \times \textrm{local-variables}$
stack and local variables are modeled as pairs of natural numbers. The first number
gives the position in the call stack (i.e. the method in which the variable is used),
the second the position in the method's stack or array of local variables resp.
The stack variables are numbered from bottom up (which is the reverse order of the
array for the stack in Jinja's state representation), whereas local variables are identified
by their position in the array of local variables of Jinja's state representation.
›
type_synonym state = "heap × ((nat × nat) ⇒ val) × ((nat × nat) ⇒ val)"
abbreviation heap_of :: "state ⇒ heap"
where
"heap_of s ≡ fst(s)"
abbreviation stk_of :: "state ⇒ ((nat × nat) ⇒ val)"
where
"stk_of s ≡ fst(snd(s))"
abbreviation loc_of :: "state ⇒ ((nat × nat) ⇒ val)"
where
"loc_of s ≡ snd(snd(s))"
subsection ‹Basic Definitions›
subsubsection ‹State update (instruction execution)›
text ‹
This function models instruction execution for our state representation.
Additional parameters are the call depth of the current program point,
the stack length of the current program point,
the length of the stack in the underlying call frame (needed for {\sc Return}),
and (for {\sc Invoke}) the length of the array of local variables of the invoked method.
Exception handling is not covered by this function.
›
fun exec_instr :: "instr ⇒ wf_jvmprog ⇒ state ⇒ nat ⇒ nat ⇒ nat ⇒ nat ⇒ state"
where
exec_instr_Load:
"exec_instr (Load n) P s calldepth stk_length rs ill =
(let (h,stk,loc) = s
in (h, stk((calldepth,stk_length):=loc(calldepth,n)), loc))"
| exec_instr_Store:
"exec_instr (Store n) P s calldepth stk_length rs ill =
(let (h,stk,loc) = s
in (h, stk, loc((calldepth,n):=stk(calldepth,stk_length - 1))))"
| exec_instr_Push:
"exec_instr (Push v) P s calldepth stk_length rs ill =
(let (h,stk,loc) = s
in (h, stk((calldepth,stk_length):=v), loc))"
| exec_instr_New:
"exec_instr (New C) P s calldepth stk_length rs ill =
(let (h,stk,loc) = s;
a = the(new_Addr h)
in (h(a ↦ (blank (P⇘wf⇙) C)), stk((calldepth,stk_length):=Addr a), loc))"
| exec_instr_Getfield:
"exec_instr (Getfield F C) P s calldepth stk_length rs ill =
(let (h,stk,loc) = s;
a = the_Addr (stk (calldepth,stk_length - 1));
(D,fs) = the(h a)
in (h, stk((calldepth,stk_length - 1) := the(fs(F,C))), loc))"
| exec_instr_Putfield:
"exec_instr (Putfield F C) P s calldepth stk_length rs ill =
(let (h,stk,loc) = s;
v = stk (calldepth,stk_length - 1);
a = the_Addr (stk (calldepth,stk_length - 2));
(D,fs) = the(h a)
in (h(a ↦ (D,fs((F,C) ↦ v))), stk, loc))"
| exec_instr_Checkcast:
"exec_instr (Checkcast C) P s calldepth stk_length rs ill = s"
| exec_instr_Pop:
"exec_instr (Pop) P s calldepth stk_length rs ill = s"
| exec_instr_IAdd:
"exec_instr (IAdd) P s calldepth stk_length rs ill =
(let (h,stk,loc) = s;
i⇩1 = the_Intg (stk (calldepth, stk_length - 1));
i⇩2 = the_Intg (stk (calldepth, stk_length - 2))
in (h, stk((calldepth, stk_length - 2) := Intg (i⇩1 + i⇩2)), loc))"
| exec_instr_IfFalse:
"exec_instr (IfFalse b) P s calldepth stk_length rs ill = s"
| exec_instr_CmpEq:
"exec_instr (CmpEq) P s calldepth stk_length rs ill =
(let (h,stk,loc) = s;
v⇩1 = stk (calldepth, stk_length - 1);
v⇩2 = stk (calldepth, stk_length - 2)
in (h, stk((calldepth, stk_length - 2) := Bool (v⇩1 = v⇩2)), loc))"
| exec_instr_Goto:
"exec_instr (Goto i) P s calldepth stk_length rs ill = s"
| exec_instr_Throw:
"exec_instr (Throw) P s calldepth stk_length rs ill = s"
| exec_instr_Invoke:
"exec_instr (Invoke M n) P s calldepth stk_length rs invoke_loc_length =
(let (h,stk,loc) = s;
loc' = (λ(a,b). if (a ≠ Suc calldepth ∨ b ≥ invoke_loc_length) then loc(a,b) else
(if (b ≤ n) then stk(calldepth, stk_length - (Suc n - b)) else arbitrary))
in (h,stk,loc'))"
| exec_instr_Return:
"exec_instr (Return) P s calldepth stk_length ret_stk_length ill =
(if (calldepth = 0)
then s
else
(let (h,stk,loc) = s;
v = stk(calldepth, stk_length - 1)
in (h,stk((calldepth - 1, ret_stk_length - 1) := v),loc))
)"
subsubsection ‹length of stack and local variables›
text ‹The following terms extract the stack length at a given program point
from the well-typing of the given program›
abbreviation stkLength :: "wf_jvmprog ⇒ cname ⇒ mname ⇒ pc ⇒ nat"
where
"stkLength P C M pc ≡ length (fst(the(((P⇘Φ⇙) C M)!pc)))"
abbreviation locLength :: "wf_jvmprog ⇒ cname ⇒ mname ⇒ pc ⇒ nat"
where
"locLength P C M pc ≡ length (snd(the(((P⇘Φ⇙) C M)!pc)))"
subsubsection ‹Conversion functions›
text ‹
This function takes a natural number n and a function f with domain ‹nat›
and creates the array [f 0, f 1, f 2, ..., f (n - 1)].
This is used for extracting the array of local variables
›
abbreviation locs :: "nat ⇒ (nat ⇒ 'a) ⇒ 'a list"
where "locs n loc ≡ map loc [0..<n]"
text ‹
This function takes a natural number n and a function f with domain ‹nat›
and creates the array [f (n - 1), ..., f 1, f 0].
This is used for extracting the stack as a list
›
abbreviation stks :: "nat ⇒ (nat ⇒ 'a) ⇒ 'a list"
where "stks n stk ≡ map stk (rev [0..<n])"
text ‹
This function creates a list of the arrays for local variables from the given state
corresponding to the given callstack
›
fun locss :: "wf_jvmprog ⇒ callstack ⇒ ((nat × nat) ⇒ 'a) ⇒ 'a list list"
where
"locss P [] loc = []"
| "locss P ((C,M,pc)#cs) loc =
(locs (locLength P C M pc) (λa. loc (length cs, a)))#(locss P cs loc)"
text ‹
This function creates a list of the (methods') stacks from the given state
corresponding to the given callstack
›
fun stkss :: "wf_jvmprog ⇒ callstack ⇒ ((nat × nat) ⇒ 'a) ⇒ 'a list list"
where
"stkss P [] stk = []"
| "stkss P ((C,M,pc)#cs) stk =
(stks (stkLength P C M pc) (λa. stk (length cs, a)))#(stkss P cs stk)"
text ‹Given a callstack and a state, this abbreviation converts the state
to Jinja's state representation
›
abbreviation state_to_jvm_state :: "wf_jvmprog ⇒ callstack ⇒ state ⇒ jvm_state"
where "state_to_jvm_state P cs s ≡
(None, heap_of s, zip (stkss P cs (stk_of s)) (zip (locss P cs (loc_of s)) cs))"
text ‹This function extracts the call stack from a given frame stack (as it is given
by Jinja's state representation)
›
definition framestack_to_callstack :: "frame list ⇒ callstack"
where "framestack_to_callstack frs ≡ map snd (map snd frs)"
subsubsection ‹State Conformance›
text ‹Now we lift byte code verifier conformance to our state representation›
definition bv_conform :: "wf_jvmprog ⇒ callstack ⇒ state ⇒ bool"
("_,_ ⊢⇘BV⇙ _ √")
where "P,cs ⊢⇘BV⇙ s √ ≡ correct_state (P⇘wf⇙) (P⇘Φ⇙) (state_to_jvm_state P cs s)"
subsubsection ‹Statically determine catch-block›
text ‹This function is equivalent to Jinja's ‹find_handler› function›
fun find_handler_for :: "wf_jvmprog ⇒ cname ⇒ callstack ⇒ callstack"
where
"find_handler_for P C [] = []"
| "find_handler_for P C (c#cs) = (let (C',M',pc') = c in
(case match_ex_table (P⇘wf⇙) C pc' (ex_table_of (P⇘wf⇙) C' M') of
None ⇒ find_handler_for P C cs
| Some pc_d ⇒ (C', M', fst pc_d)#cs))"
subsection ‹Simplification lemmas›
lemma find_handler_decr [simp]: "find_handler_for P Exc cs ≠ c#cs"
proof
assume "find_handler_for P Exc cs = c#cs"
hence "length cs < length (find_handler_for P Exc cs)" by simp
thus False by (induct cs, auto)
qed
lemma stkss_length [simp]: "length (stkss P cs stk) = length cs"
by (induct cs) auto
lemma locss_length [simp]: "length (locss P cs loc) = length cs"
by (induct cs) auto
lemma nth_stkss:
"⟦ a < length cs; b < length (stkss P cs stk ! (length cs - Suc a)) ⟧
⟹ stkss P cs stk ! (length cs - Suc a) !
(length (stkss P cs stk ! (length cs - Suc a)) - Suc b) = stk (a,b)"
proof (induct cs)
case Nil
thus ?case by (simp add: nth_Cons')
next
case (Cons aa cs)
thus ?case
by (cases aa, auto simp add: nth_Cons' rev_nth less_Suc_eq)
qed
lemma nth_locss:
"⟦ a < length cs; b < length (locss P cs loc ! (length cs - Suc a)) ⟧
⟹ locss P cs loc ! (length cs - Suc a) ! b = loc (a,b)"
proof (induct cs)
case Nil
thus ?case by (simp add: nth_Cons')
next
case (Cons aa cs)
thus ?case
by (cases aa, auto simp: nth_Cons' less_Suc_eq)
qed
lemma hd_stks [simp]: "n ≠ 0 ⟹ hd (stks n stk) = stk(n - 1)"
by (cases n, simp_all)
lemma hd_tl_stks: "n > 1 ⟹ hd (tl (stks n stk)) = stk(n - 2)"
by (cases n, auto)
lemma stkss_purge:
"length cs ≤ a ⟹ stkss P cs (stk((a,b) := c)) = stkss P cs stk"
by (induct cs, auto )
lemma stkss_purge':
"length cs ≤ a ⟹ stkss P cs (λs. if s = (a, b) then c else stk s) = stkss P cs stk"
by (fold fun_upd_def, simp only: stkss_purge)
lemma locss_purge:
"length cs ≤ a ⟹ locss P cs (loc((a,b) := c)) = locss P cs loc"
by (induct cs, auto )
lemma locss_purge':
"length cs ≤ a ⟹ locss P cs (λs. if s = (a, b) then c else loc s) = locss P cs loc"
by (fold fun_upd_def, simp only: locss_purge)
lemma locs_pullout [simp]:
"locs b (loc(n := e)) = (locs b loc) [n := e]"
proof (induct b)
case 0
thus ?case by simp
next
case (Suc b)
thus ?case
by (cases "n - b", auto simp: list_update_append not_less_eq less_Suc_eq)
qed
lemma locs_pullout' [simp]:
"locs b (λa. if a = n then e else loc (c, a)) = (locs b (λa. loc (c, a))) [n := e]"
by (fold fun_upd_def) simp
lemma stks_pullout:
"n < b ⟹ stks b (stk(n := e)) = (stks b stk) [b - Suc n := e]"
proof (induct b)
case 0
thus ?case by simp
next
case (Suc b)
thus ?case
proof (cases "b = n")
case True
with Suc show ?thesis
by auto
next
case False
with Suc show ?thesis
by (cases "b - n") (auto intro!: nth_equalityI simp: nth_list_update)
qed
qed
lemma nth_tl : "xs ≠ [] ⟹ tl xs ! n = xs ! (Suc n)"
by (cases xs, simp_all)
lemma f2c_Nil [simp]: "framestack_to_callstack [] = []"
by (simp add: framestack_to_callstack_def)
lemma f2c_Cons [simp]:
"framestack_to_callstack ((stk,loc,C,M,pc)#frs) = (C,M,pc)#(framestack_to_callstack frs)"
by (simp add: framestack_to_callstack_def)
lemma f2c_length [simp]:
"length (framestack_to_callstack frs) = length frs"
by (simp add: framestack_to_callstack_def)
lemma f2c_s2jvm_id [simp]:
"framestack_to_callstack
(snd(snd(state_to_jvm_state P cs s))) =
cs"
by (cases s, simp add: framestack_to_callstack_def)
lemma f2c_s2jvm_id' [simp]:
"framestack_to_callstack
(zip (stkss P cs stk) (zip (locss P cs loc) cs)) = cs"
by (simp add: framestack_to_callstack_def)
lemma f2c_append [simp]:
"framestack_to_callstack (frs @ frs') =
(framestack_to_callstack frs) @ (framestack_to_callstack frs')"
by (simp add: framestack_to_callstack_def)
subsection ‹CFG construction›
subsection ‹Datatypes›
text ‹Nodes are labeled with a callstack and an optional tuple (consisting of
a callstack and a flag).
The first callstack determines the current program point (i.e. the next statement
to execute). If the second parameter is not None, we are at an intermediate state,
where the target of the instruction is determined (the second callstack)
and the flag is set to whether an exception is thrown or not.
›