Theory CTranslationInfrastructure

(*
 * Copyright (c) 2022 Apple Inc. All rights reserved.
 *
 * SPDX-License-Identifier: BSD-2-Clause
 *)


chapter ‹C-Translation Infrastructure \label{chap:CTranslationInfrastructure}›

theory CTranslationInfrastructure
  imports
  CTranslation
begin

term guarded_spec_body
section ‹Local Variables›

text ‹
Local variables in the SIMPL outcome of the C-Parser are represented in typlocals.
 The natural number in the domain encodes the canonical position
in the scope of the C-function: input arguments, then return variable, then local variables.
The byte list in the range of the function is the encoding of the value according to the
UMM (unified memory model) classc_type. The typing of variables is maintained as data in
@{ML_structure "CLocals"} and a typed view is achieved via constclookup and constcupdate.

In previous attempts local variables were represented first as a @{command record} and more 
recently as @{command statespace}. 
The representation as typlocals tries to combine the best of all previous approaches:
 Uniform (program independent) representation of local variables.
 Positional 'naming' enables a canonical parameter passing for function calls, 
  even if the names of parameters are unknown, as in the case of function-pointers.
 Lightweight "typing" via ML infrastructure instead of the 'fixes' of @{command statespace}. 
  Unfortunately, the more flexible @{command statespace} representation for local variables turned
  out to reveal some performance bottle neck in the locale infrastructure, which itself boils down to
  the resources consumed by the omnipresent instantiation of types and terms. 

›

declare [[ML_print_depth = 1000]]

subsection ‹Basic ML primitives›

text ‹
To define new variables the basic primitive is @{ML CLocals.define_locals} which takes a
scope (currently a two element list of qualifiers: program-name followed by function-name) and 
the list of local variable declarations (name, type and kind). 
Note that the order is relevant as it represents the canonical positional ordering.
›

setup CLocals.define_locals ["prog", "myfun"] [
  ("x", @{typ 32 word}, NameGeneration.Loc), 
  ("p", @{typ 32 word ptr}, NameGeneration.Loc), 
  ("z", @{typ 64 word}, NameGeneration.Loc)] 

ML CLocals.Data.get (Context.Proof @{context})

text ‹
For each local variable a constant is defined with the position of the variable. The constant
name encodes the original name of the local variable. Whenever available we use this symbolic name
in favour of the plain literal number. However, at some places we use the plain numbers, in 
particular for parameter passing. There is some simplifier setup which we explain later that
allows to use either representation.

To enable syntax translations from short names to the symbolic constants together with their typing
one has to enter the correct scope, e.g.:
›

local_setup Local_Theory.declaration {pervasive=false, syntax=true, pos = } (fn _ =>  
  CLocals.scope_map (K ["prog", "myfun"]))

term prog.myfun.p_'
thm prog.myfun.p_'_def

text ‹
Some remarks on the naming scheme for those symbolic constants. 
The qualifiers for both program-name and function-name are mandatory.
The name mangling with the suffix serves two purposes:
 Avoid name clashes with user input via variable names in the C sources (in particular 
  for the artificial return variable)
 Avoid name clashes for bound or free variables (or new constants) in HOL. Note that
  even a constant with mandatory qualifier e.g. foo.c› would force the pretty printer to avoid
  a bound variable being named just as c›. The pretty printer only takes 
  the @{ML Long_Name.base_name} into account.

The general suffix _'› avoids clashes with C variable names as the prime is not
a legal character for C identifiers. Syntax translations will truncate this suffix.
The artificial return variable ret› that the C-Parser introduces is internally ret'_'› to
avoid a potential conflict with a local variable named ret›.
›

ML CLocals.Data.get (Context.Proof @{context})

subsection "Syntax"

text ‹Plain lookup and update in locals.›
term l  p›
ML_val terml  p›

term sx := y
term sx := y, p := k

ML_val termsx := y
ML_val termsx := y, p := k


text ‹Lookup and update in a complete Simpl state, selecting the constlocals

term s  p›
ML_val @{term s  p›}


term sx := y
term sx := y, p := k


subsection "Simplifier setup"

text ‹
The simplifier is setup to be able to deal with symbolic as well as literal numerals. Symbolic
ones are expanded on the fly employing the infrastructure form the code generator.
The trigger for this expansion is via a simplification procedure that. Conditional rules can be
configured via the attribute:
›
ML @{attributes [code_simproc]}

text ‹
The simplification procedure first checks wheter the precondition evaluates to true, via 
the code-generator. In case of success the simplifier is invoked to replay that proof and
trigger the rule. @{ML Code_Simproc.code_simp_prems}, @{ML Code_Simproc.code_prove}.

Note that the attribute also consumes an optional name to index the simprocs. This name is
taken from @{command named_theorems}.

The relevant setup of the rules can be found in 🗏‹CLocals.thy›.
›

schematic_goal (clookup 2 (cupdate prog.myfun.z_' (λ_. 2::64 word) l)) = (?X::64 word)
  apply simp
  done

schematic_goal (clookup prog.myfun.z_' (cupdate prog.myfun.z_' (λ_. 2::64 word) l)) = (?X::64 word)
  apply simp
  done
schematic_goal "sx := λ_. 2  x = ?X"
  apply simp
  done

schematic_goal "sx := λ_. 2  p = ?X"
  apply simp
  done

install_C_file "ex.c"

thm size_td_simps
text ‹The root locale storing the global content @{locale ex_global_addresses}. This is also
the locale ware the bodies of the imported functions are defined.›

context ex_global_addresses
begin
text ‹
The usage of global variables is analysed and three cases are distinguesed:
 static variables do not change there value. So they are defined as HOL constant with their
  initial value. In case no initialiser is given they are zero initialised.
 'ordinary' global variables that are also updated somewhere in the code are stored in
  the record of global variables typglobals. They are accessed by the lookup and update function
  of the @{command record} package. 
 In case the address of a global variabe is taken somewhere in the code, we declare a
  global constant for a pointer to that variable. Lookup and update is then indirectly via the
  heap. Note that the pointer is only declared. There is no defining equation. Currently there
  are also no assumptions maintained about distinctness of global variable pointers. This is
  up to the user.
›
thm global_const_defs
term g_static
term g_ordinary_' term g_ordinary_'_update
term g_addressed_'
thm main_body_def
thm globals.typing.get_upd
thm state.typing.get_upd
text ‹
Functions also result in a declaration of a constant representing the global function pointer.
@{const ex.add}. They have the type @{typ "unit ptr"}.
›
term ex.add

text ‹When the code actually uses function pointers to perform indirect calls, some more
infrastructure is provided,  cf 🗏‹../doc/fnptr.thy›

text ‹
When looking into the function bodies the symbolic names for the positional local variables
are displayed, fully qualified with program name as well as function name.
›
thm add_body_def

text ‹To have short names printed, one can either enter the scope of the function by activating
the corresponding variables› bundle, or use the option to always display short names.›
context includes add_variables
begin
thm add_body_def
end

declare [[clocals_short_names]]
thm  add_body_def
end

text ‹The canonical locale to do verification of a procedure is the impl› locale. This
activates the scope of the function and also stores the equation that the lookup of the
function pointer in the environment constΓ retrieves the expected body.›

context add_impl
begin
thm add_body_def
thm add_impl

text ‹
The symbolic names can be folded and unfolded by an attribute.
›
thm add_body_def [unfold_locals]
thm add_body_def [unfold_locals, fold_locals]

text ‹These attributes can also take qualifier in case an alternative scope should be added›
thm call_add_body_def
thm call_add_body_def [unfold_locals] ― ‹nothing happens as we are in the wrong scope›
thm call_add_body_def [unfold_locals call_add] ― ‹now they are expanded as we qualify the scope›

end


context call_add_impl
begin
thm call_add_body_def
declare [[hoare_use_call_tr' = false]]
thm call_add_body_def
end

text ‹All the implementations of the program are gathered in the simpl› locale ›

context ex_simpl
begin
thm add_impl
thm call_add_impl
end

section "Infrastructure for states" 

type_synonym state = "(globals, locals, 32 signed word) CProof.state"

context add_impl
begin

text ‹As a final part of the verification generator generator terms containing local and global
variables are generalised to a 'splitted' view, where each component becomes has a bound variable.›


lemma "
  s::state. s  n = 3"
  apply (tactic HPInter.GENERALISE @{context} 1)
  by simp

lemma "
  s::state. s  n = 3"
  apply (tactic HPInter.GENERALISE @{context} 1)
  by simp

lemma "
  s::state. s  n = 3  s  m = 3"
  apply (tactic HPInter.GENERALISE @{context} 1)
  by simp

lemma "
  s::state. s  n = 3  s  m = 3  g_' (globals s) = 4  global_exn_var'_' s = Break"
  apply (tactic HPInter.GENERALISE @{context} 1)
  by simp



text ‹Simpl syntax›

ML @{term ´n = x }
term ´n = x 
ML @{term ´n :== x}
term ´n :== x


term ´n :== CALL add(x, y)
term ´n :== CALL ex.add(x, y)
ML @{term ´n :== CALL add(x, y)}


ML @{term ´ret' :== PROC add(2, 3)} 

term ´ret' :== PROC add(2, 3)

end

section ‹Cached simproc examples›

text ‹Some more explanation on this is in 🗏‹../doc/AutoCorresInfrastructure.thy›


schematic_goal "TYPE(addr_bitsize word) τ TYPE(unpacked_C[12]) "
  apply simp
  done


lemma "TYPE(8 word) τ TYPE(array_C)"
  supply [[verbose=3]]
  apply simp
  done

lemma "TYPE(8 word) <τ TYPE(array_C)"
  supply [[verbose=3]]
  apply simp
  done

lemma "TYPE(addr_bitsize word) τ TYPE(matrix_C)"
  supply [[verbose=3]]
  apply simp
  done

lemma "TYPE(8 word) τ TYPE(matrix_C)"
  apply simp
  done

ML Cached_Theory_Simproc.trace_cache @{context}

schematic_goal field_names_no_padding (typ_info_t TYPE(outer_C)) (export_uinfo (typ_info_t TYPE(inner_C))) = ?XX
  supply [[simp_trace=true, verbose=5]]
  apply simp
  done
ML Cached_Theory_Simproc.trace_cache @{context}

schematic_goal set (field_names_no_padding (typ_info_t TYPE(outer_C)) (export_uinfo (typ_info_t TYPE(inner_C)))) = ?XX
  supply [[simp_trace=true, verbose=5]]
  apply simp
  done
ML Cached_Theory_Simproc.trace_cache @{context}


lemma "export_uinfo (typ_info_t TYPE(unpacked_C)) = export_uinfo (typ_info_t (TYPE(32 word)))"
  supply [[simp_trace=true,verbose=3]]
  apply simp
  oops

lemma "typ_uinfo_t TYPE(unpacked_C) = export_uinfo (typ_info_t (TYPE(32 word)))"
  apply simp
  oops

lemma "typ_uinfo_t TYPE(32 signed word) = export_uinfo (typ_info_t (TYPE(32 word)))"
  supply [[simp_trace=true,verbose=3]]
  apply simp
  done


lemma "typ_uinfo_t TYPE(32 signed word[3]) = export_uinfo (typ_info_t (TYPE(32 word[3])))"
  supply [[simp_trace=true,verbose=3]]
  apply simp
  done

schematic_goal set (field_names_no_padding (typ_info_t TYPE(unpacked_C)) (export_uinfo (typ_info_t TYPE(8 word)))) = ?XX
  supply [[simp_trace=true, verbose=3]]
  apply simp
  done

lemma "export_uinfo (typ_info_t TYPE(unpacked_C)) = export_uinfo (typ_info_t (TYPE(unpacked_C)))"
  supply [[simp_trace=true,verbose=3]]
  apply simp
  done

ML Cached_Theory_Simproc.trace_cache @{context}
lemma "TYPE (32 word) τ TYPE(32 word[256])" 
  supply [[simp_trace=true,verbose=3]]
  apply simp
  done

lemma "TYPE (32 word) τ TYPE(32 word[256])" 
  supply [[simp_trace=true,verbose=3]]
  apply simp
  done
ML Cached_Theory_Simproc.trace_cache @{context}

schematic_goal "n < 12  field_lookup (typ_info_t TYPE(array_C)) [''elements_C'', replicate n CHR ''1'', ''chr_C''] 0 = ?X"
  supply [[simp_trace=true, simp_trace_depth_limit=1, verbose=5]]
  apply simp
  done

schematic_goal "n < 12  field_lookup (typ_uinfo_t TYPE(array_C)) [''elements_C'', replicate n CHR ''1'', ''chr_C''] 0 = ?X"
  supply [[simp_trace=true, simp_trace_depth_limit=1, verbose=5]]
  apply simp
  done

schematic_goal "n < 12  field_ti TYPE(array_C) [''elements_C'', replicate n CHR ''1'', ''chr_C''] = ?X"
  supply [[simp_trace=false, simp_trace_depth_limit=1, verbose=3]]
  apply simp
  done

schematic_goal "n < 12  field_ti TYPE(array_C) [''elements_C'', replicate n CHR ''1'', ''chr_C''] = ?X"
  supply [[simp_trace=false, simp_trace_depth_limit=1, verbose=3]]
  apply simp
  done

lemma "TYPE(unpacked_C[12]) τ TYPE(array_C[2])"
  supply [[simp_trace=false, simp_trace_depth_limit=1, verbose=3]]
  apply simp
  done

lemma "¬ TYPE(unpacked_C[12]) τ TYPE(unpacked_C[24])"
  supply [[simp_trace=false, simp_trace_depth_limit=2, verbose=3]]
  apply simp
  done

lemma "TYPE(unpacked_C[12]) τ TYPE(array_C)"
  apply simp
  done

lemma "typ_uinfo_t TYPE(unit ptr)  typ_uinfo_t TYPE(32 word ptr)"
  apply simp
  done

text ‹Matching of the interpreteted locale @{locale stack_heap_raw_state}

ML_val val cterm_match = With_Fresh_Stack_Ptr.cterm_match @{context}
  val {n=n1, init=init1, c=c1, instantiate=inst1, ...} = cterm_match @{cterm "state.With_Fresh_Stack_Ptr n i1 c1"}
  val {n=n2, init=init2, c=c2, instantiate=inst2, ...} = cterm_match @{cterm "state.With_Fresh_Stack_Ptr n i2 c2"}
  val t = inst1 {n=n2, init = init2, c= c2}

ML_val val match = With_Fresh_Stack_Ptr.match @{context}
  val {n = n1, init=init1, c=c1, instantiate=inst1, ...} = match @{term "globals.With_Fresh_Stack_Ptr n i1 c1"}
  val {n = n2, init=init2, c=c2, instantiate=inst2, ...} = match @{term "globals.With_Fresh_Stack_Ptr n i2 c2"}
  val t = inst1 {n=n2, init = init2, c= c2}


thm zero_simps
thm make_zero
thm size_simps
thm size_align_simps

thm h_val_fields
thm heap_update_fields
thm fg_cons_simps
thm fl_ti_simps
thm fl_Some_simps

end