RTL function inlining
Require Import Coqlib.
Require Import Wfsimpl.
Require Import Errors.
Require Import Maps.
Require Import AST.
Require Import Integers.
Require Import Values.
Require Import Op.
Require Import Registers.
Require Import RTL.
Ltac xomega :=
unfold Plt,
Ple in *;
zify;
omega.
Environment of inlinable functions
We maintain a mapping from function names to their definitions.
In this mapping, we only include functions that are eligible for
inlining, as determined by the external heuristic
should_inline.
Definition funenv :
Type :=
PTree.t function.
Definition size_fenv (
fenv:
funenv) :=
PTree_Properties.cardinal fenv.
Parameter should_inline:
ident ->
function ->
bool.
Definition add_fundef (
fenv:
funenv) (
idf:
ident *
fundef) :
funenv :=
match idf with
| (
id,
External ef) =>
PTree.remove id fenv
| (
id,
Internal f) =>
if should_inline id f
then PTree.set id f fenv
else PTree.remove id fenv
end.
Definition remove_vardef (
fenv:
funenv) (
idv:
ident *
globvar unit) :
funenv :=
PTree.remove (
fst idv)
fenv.
Definition funenv_program (
p:
program) :
funenv :=
List.fold_left remove_vardef p.(
prog_vars)
(
List.fold_left add_fundef p.(
prog_funct) (
PTree.empty function)).
Resources used by a function.
Maximum PC (node number) in the CFG of a function. All nodes of
the CFG of f are between 1 and max_pc_function f (inclusive).
Definition max_pc_function (
f:
function) :=
PTree.fold (
fun m pc i =>
Pmax m pc)
f.(
fn_code) 1%
positive.
Maximum pseudo-register defined in a function. All results of
an instruction of f, as well as all parameters of f, are between
1 and max_def_function (inclusive).
Definition max_def_instr (
i:
instruction) :=
match i with
|
Iop op args res s =>
res
|
Iload chunk addr args dst s =>
dst
|
Icall sg ros args res s =>
res
|
Ibuiltin ef args res s =>
res
|
_ => 1%
positive
end.
Definition max_def_function (
f:
function) :=
Pmax
(
PTree.fold (
fun m pc i =>
Pmax m (
max_def_instr i))
f.(
fn_code) 1%
positive)
(
List.fold_left (
fun m r =>
Pmax m r)
f.(
fn_params) 1%
positive).
State monad
To construct incrementally the CFG of a function after inlining,
we use a state monad similar to that used in module RTLgen.
It records the current state of the CFG, plus counters to generate
fresh pseudo-registers and fresh CFG nodes. It also records the
stack size needed for the inlined function.
Record state :
Type :=
mkstate {
st_nextreg:
positive;
(* last used pseudo-register *)
st_nextnode:
positive;
(* last used CFG node *)
st_code:
code;
(* current CFG *)
st_stksize:
Z (* current stack size *)
}.
Monotone evolution of the state.
Inductive sincr (
s1 s2:
state) :
Prop :=
Sincr (
NEXTREG:
Ple s1.(
st_nextreg)
s2.(
st_nextreg))
(
NEXTNODE:
Ple s1.(
st_nextnode)
s2.(
st_nextnode))
(
STKSIZE:
s1.(
st_stksize) <=
s2.(
st_stksize)).
Remark sincr_refl:
forall s,
sincr s s.
Proof.
intros; constructor; xomega.
Qed.
Lemma sincr_trans:
forall s1 s2 s3,
sincr s1 s2 ->
sincr s2 s3 ->
sincr s1 s3.
Proof.
intros. inv H; inv H0. constructor; xomega.
Qed.
Dependently-typed state monad, ensuring that the final state is
greater or equal (in the sense of predicate sincr above) than
the initial state.
Inductive res {
A:
Type} {
s:
state}:
Type :=
R (
x:
A) (
s':
state) (
I:
sincr s s').
Definition mon (
A:
Type) :
Type :=
forall (
s:
state), @
res A s.
Operations on this monad.
Definition ret {
A:
Type} (
x:
A):
mon A :=
fun s =>
R x s (
sincr_refl s).
Definition bind {
A B:
Type} (
x:
mon A) (
f:
A ->
mon B):
mon B :=
fun s1 =>
match x s1 with R vx s2 I1 =>
match f vx s2 with R vy s3 I2 =>
R vy s3 (
sincr_trans s1 s2 s3 I1 I2)
end
end.
Notation "'
do'
X <-
A ;
B" := (
bind A (
fun X =>
B))
(
at level 200,
X ident,
A at level 100,
B at level 200).
Definition initstate :=
mkstate 1%
positive 1%
positive (
PTree.empty instruction) 0.
Program Definition set_instr (
pc:
node) (
i:
instruction):
mon unit :=
fun s =>
R tt
(
mkstate s.(
st_nextreg)
s.(
st_nextnode) (
PTree.set pc i s.(
st_code))
s.(
st_stksize))
_.
Next Obligation.
intros; constructor; simpl; xomega.
Qed.
Program Definition add_instr (
i:
instruction):
mon node :=
fun s =>
let pc :=
Psucc s.(
st_nextnode)
in
R pc
(
mkstate s.(
st_nextreg)
pc (
PTree.set pc i s.(
st_code))
s.(
st_stksize))
_.
Next Obligation.
intros; constructor; simpl; xomega.
Qed.
Program Definition reserve_nodes (
numnodes:
positive):
mon positive :=
fun s =>
R s.(
st_nextnode)
(
mkstate s.(
st_nextreg) (
Pplus s.(
st_nextnode)
numnodes)
s.(
st_code)
s.(
st_stksize))
_.
Next Obligation.
intros; constructor; simpl; xomega.
Qed.
Program Definition reserve_regs (
numregs:
positive):
mon positive :=
fun s =>
R s.(
st_nextreg)
(
mkstate (
Pplus s.(
st_nextreg)
numregs)
s.(
st_nextnode)
s.(
st_code)
s.(
st_stksize))
_.
Next Obligation.
intros; constructor; simpl; xomega.
Qed.
Program Definition request_stack (
sz:
Z):
mon unit :=
fun s =>
R tt
(
mkstate s.(
st_nextreg)
s.(
st_nextnode)
s.(
st_code) (
Zmax s.(
st_stksize)
sz))
_.
Next Obligation.
intros; constructor; simpl; xomega.
Qed.
Fixpoint mlist_iter2 {
A B:
Type} (
f:
A ->
B ->
mon unit) (
l:
list (
A*
B)):
mon unit :=
match l with
|
nil =>
ret tt
| (
x,
y) ::
l' =>
do z <-
f x y;
mlist_iter2 f l'
end.
Inlining contexts
A context describes how to insert the CFG for a source function into
the CFG for the function after inlining:
-
a source instruction at PC n is relocated to PC n + ctx.(dpc);
-
all pseudo-registers of this instruction are shifted by ctx.(dreg);
-
all stack references are shifted by ctx.(dstk);
-
"return" instructions are transformed into "return" or "move" instructions
as governed by ctx.(retinfo).
Record context:
Type :=
mkcontext {
dpc:
positive;
(* offset for PCs *)
dreg:
positive;
(* offset for pseudo-regs *)
dstk:
Z;
(* offset for stack references *)
mreg:
positive;
(* max pseudo-reg number *)
mstk:
Z;
(* original stack block size *)
retinfo:
option(
node *
reg)
(* where to branch on return *)
}.
The following functions "shift" (relocate) PCs, registers, operations, etc.
Definition spc (
ctx:
context) (
pc:
node) :=
Pplus pc ctx.(
dpc).
Definition sreg (
ctx:
context) (
r:
reg) :=
Pplus r ctx.(
dreg).
Definition sregs (
ctx:
context) (
rl:
list reg) :=
List.map (
sreg ctx)
rl.
Definition sros (
ctx:
context) (
ros:
reg +
ident) :=
sum_left_map (
sreg ctx)
ros.
Definition sop (
ctx:
context) (
op:
operation) :=
shift_stack_operation (
Int.repr ctx.(
dstk))
op.
Definition saddr (
ctx:
context) (
addr:
addressing) :=
shift_stack_addressing (
Int.repr ctx.(
dstk))
addr.
The initial context, used to copy the CFG of a toplevel function.
Definition initcontext (
dpc dreg nreg:
positive) (
sz:
Z) :=
{|
dpc :=
dpc;
dreg :=
dreg;
dstk := 0;
mreg :=
nreg;
mstk :=
Zmax sz 0;
retinfo :=
None |}.
The context used to inline a call to another function.
Definition min_alignment (
sz:
Z) :=
if zle sz 1
then 1
else if zle sz 2
then 2
else if zle sz 4
then 4
else 8.
Definition callcontext (
ctx:
context)
(
dpc dreg nreg:
positive) (
sz:
Z)
(
retpc:
node) (
retreg:
reg) :=
{|
dpc :=
dpc;
dreg :=
dreg;
dstk :=
align (
ctx.(
dstk) +
ctx.(
mstk)) (
min_alignment sz);
mreg :=
nreg;
mstk :=
Zmax sz 0;
retinfo :=
Some (
spc ctx retpc,
sreg ctx retreg) |}.
The context used to inline a tail call to another function.
Definition tailcontext (
ctx:
context) (
dpc dreg nreg:
positive) (
sz:
Z) :=
{|
dpc :=
dpc;
dreg :=
dreg;
dstk :=
align ctx.(
dstk) (
min_alignment sz);
mreg :=
nreg;
mstk :=
Zmax sz 0;
retinfo :=
ctx.(
retinfo) |}.
Recursive expansion and copying of a CFG
Insert "move" instructions to copy the arguments of an inlined
function into its parameters.
Fixpoint add_moves (
srcs dsts:
list reg) (
succ:
node):
mon node :=
match srcs,
dsts with
|
s1 ::
sl,
d1 ::
dl =>
do n <-
add_instr (
Iop Omove (
s1 ::
nil)
d1 succ);
add_moves sl dl n
|
_,
_ =>
ret succ
end.
To prevent infinite inlining of a recursive function, when we
inline the body of a function f, this function is removed from the
environment of inlinable functions and therefore becomes ineligible
for inlining. This decreases the size (number of entries) of the
environment and guarantees termination. Inlining is, therefore,
presented as a well-founded recursion over the size of the environment.
Section EXPAND_CFG.
Variable fenv:
funenv.
The rec parameter is the recursor: rec fenv' P ctx f copies
the body of function f, with inline expansion within, as governed
by context ctx. It can only be called for function environments
fenv' strictly smaller than the current environment fenv.
Variable rec:
forall fenv', (
size_fenv fenv' <
size_fenv fenv)%
nat ->
context ->
function ->
mon unit.
Given a register-or-symbol ros, can we inline the corresponding call?
Inductive inline_decision (
ros:
reg +
ident) :
Type :=
|
Cannot_inline
|
Can_inline (
id:
ident) (
f:
function) (
P:
ros =
inr reg id) (
Q:
fenv!
id =
Some f).
Program Definition can_inline (
ros:
reg +
ident):
inline_decision ros :=
match ros with
|
inl r =>
Cannot_inline _
|
inr id =>
match fenv!
id with Some f =>
Can_inline _ id f _ _ |
None =>
Cannot_inline _ end
end.
Inlining of a call to function f. An appropriate context is
created, then the CFG of f is recursively copied, then moves
are inserted to copy the arguments of the call to the parameters of f.
Definition inline_function (
ctx:
context) (
id:
ident) (
f:
function)
(
P:
PTree.get id fenv =
Some f)
(
args:
list reg)
(
retpc:
node) (
retreg:
reg) :
mon node :=
let npc :=
max_pc_function f in
let nreg :=
max_def_function f in
do dpc <-
reserve_nodes npc;
do dreg <-
reserve_regs nreg;
let ctx' :=
callcontext ctx dpc dreg nreg f.(
fn_stacksize)
retpc retreg in
do x <-
rec (
PTree.remove id fenv) (
PTree_Properties.cardinal_remove P)
ctx'
f;
add_moves (
sregs ctx args) (
sregs ctx'
f.(
fn_params)) (
spc ctx'
f.(
fn_entrypoint)).
Inlining of a tail call to function f. Similar to inline_function,
but the new context is different.
Definition inline_tail_function (
ctx:
context) (
id:
ident) (
f:
function)
(
P:
PTree.get id fenv =
Some f)
(
args:
list reg):
mon node :=
let npc :=
max_pc_function f in
let nreg :=
max_def_function f in
do dpc <-
reserve_nodes npc;
do dreg <-
reserve_regs nreg;
let ctx' :=
tailcontext ctx dpc dreg nreg f.(
fn_stacksize)
in
do x <-
rec (
PTree.remove id fenv) (
PTree_Properties.cardinal_remove P)
ctx'
f;
add_moves (
sregs ctx args) (
sregs ctx'
f.(
fn_params)) (
spc ctx'
f.(
fn_entrypoint)).
The instruction generated for a Ireturn instruction found in an
inlined function body.
Definition inline_return (
ctx:
context) (
or:
option reg) (
retinfo:
node *
reg) :=
match retinfo,
or with
| (
retpc,
retreg),
Some r =>
Iop Omove (
sreg ctx r ::
nil)
retreg retpc
| (
retpc,
retreg),
None =>
Inop retpc
end.
Expansion and copying of an instruction. For most instructions,
its registers and successor PC are shifted as per the context ctx,
then the instruction is inserted in the final CFG at its final position
spc ctx pc.
Icall instructions are either replaced by a "goto" to the expansion
of the called function, or shifted as described above.
Itailcall instructions are similar, with one additional case. If
the Itailcall occurs in the body of an inlined function, and
cannot be inlined itself, it must be turned into an Icall
instruction that branches to the return point of the inlined
function.
Finally, Ireturn instructions within an inlined function are
turned into a "move" or "goto" that stores the result, if any,
into the destination register, then branches back to the successor
of the inlined call.
Definition expand_instr (
ctx:
context) (
pc:
node) (
i:
instruction):
mon unit :=
match i with
|
Inop s =>
set_instr (
spc ctx pc) (
Inop (
spc ctx s))
|
Iop op args res s =>
set_instr (
spc ctx pc)
(
Iop (
sop ctx op) (
sregs ctx args) (
sreg ctx res) (
spc ctx s))
|
Iload chunk addr args dst s =>
set_instr (
spc ctx pc)
(
Iload chunk (
saddr ctx addr) (
sregs ctx args) (
sreg ctx dst) (
spc ctx s))
|
Istore chunk addr args src s =>
set_instr (
spc ctx pc)
(
Istore chunk (
saddr ctx addr) (
sregs ctx args) (
sreg ctx src) (
spc ctx s))
|
Icall sg ros args res s =>
match can_inline ros with
|
Cannot_inline =>
set_instr (
spc ctx pc)
(
Icall sg (
sros ctx ros) (
sregs ctx args) (
sreg ctx res) (
spc ctx s))
|
Can_inline id f P Q =>
do n <-
inline_function ctx id f Q args s res;
set_instr (
spc ctx pc) (
Inop n)
end
|
Itailcall sg ros args =>
match can_inline ros with
|
Cannot_inline =>
match ctx.(
retinfo)
with
|
None =>
set_instr (
spc ctx pc)
(
Itailcall sg (
sros ctx ros) (
sregs ctx args))
|
Some(
rpc,
rreg) =>
set_instr (
spc ctx pc)
(
Icall sg (
sros ctx ros) (
sregs ctx args)
rreg rpc)
end
|
Can_inline id f P Q =>
do n <-
inline_tail_function ctx id f Q args;
set_instr (
spc ctx pc) (
Inop n)
end
|
Ibuiltin ef args res s =>
set_instr (
spc ctx pc)
(
Ibuiltin ef (
sregs ctx args) (
sreg ctx res) (
spc ctx s))
|
Icond cond args s1 s2 =>
set_instr (
spc ctx pc)
(
Icond cond (
sregs ctx args) (
spc ctx s1) (
spc ctx s2))
|
Ijumptable r tbl =>
set_instr (
spc ctx pc)
(
Ijumptable (
sreg ctx r) (
List.map (
spc ctx)
tbl))
|
Ireturn or =>
match ctx.(
retinfo)
with
|
None =>
set_instr (
spc ctx pc) (
Ireturn (
option_map (
sreg ctx)
or))
|
Some rinfo =>
set_instr (
spc ctx pc) (
inline_return ctx or rinfo)
end
end.
The expansion of a function f iteratively expands all its
instructions, after recording how much stack it needs.
Definition expand_cfg_rec (
ctx:
context) (
f:
function):
mon unit :=
do x <-
request_stack (
ctx.(
dstk) +
ctx.(
mstk));
mlist_iter2 (
expand_instr ctx) (
PTree.elements f.(
fn_code)).
End EXPAND_CFG.
Here we "tie the knot" of the recursion, taking the fixpoint
of expand_cfg_rec.
Definition expand_cfg :=
Fixm size_fenv expand_cfg_rec.
Start of the recursion: copy and inline function f in the
initial context.
Definition expand_function (
fenv:
funenv) (
f:
function):
mon context :=
let npc :=
max_pc_function f in
let nreg :=
max_def_function f in
do dpc <-
reserve_nodes npc;
do dreg <-
reserve_regs nreg;
let ctx :=
initcontext dpc dreg nreg f.(
fn_stacksize)
in
do x <-
expand_cfg fenv ctx f;
ret ctx.
Inlining in functions and whole programs.
Local Open Scope string_scope.
Inlining can increase the size of the function's stack block. We must
make sure that the new size does not exceed Int.max_unsigned, otherwise
address computations within the stack would overflow and produce incorrect
results.
Definition transf_function (
fenv:
funenv) (
f:
function) :
Errors.res function :=
let '(
R ctx s _) :=
expand_function fenv f initstate in
if zle s.(
st_stksize)
Int.max_unsigned then
OK (
mkfunction f.(
fn_sig)
(
sregs ctx f.(
fn_params))
s.(
st_stksize)
s.(
st_code)
(
spc ctx f.(
fn_entrypoint)))
else
Error(
msg "
Inlining:
stack too big").
Definition transf_fundef (
fenv:
funenv) (
fd:
fundef) :
Errors.res fundef :=
AST.transf_partial_fundef (
transf_function fenv)
fd.
Definition transf_program (
p:
program):
Errors.res program :=
let fenv :=
funenv_program p in
AST.transform_partial_program (
transf_fundef fenv)
p.