Abstract syntax for the Compcert C language
Require Import Coqlib.
Require Import Integers.
Require Import Floats.
Require Import Values.
Require Import AST.
Require Import Errors.
Require Import Ctypes.
Require Import Cop.
Expressions
Compcert C expressions are almost identical to those of C.
The only omission is string literals. Some operators are treated
as derived forms: array indexing, pre-increment, pre-decrement, and
the && and || operators. All expressions are annotated with
their types.
Inductive expr :
Type :=
|
Eval (
v:
val) (
ty:
type)
(* constant *)
|
Evar (
x:
ident) (
ty:
type)
(* variable *)
|
Efield (
l:
expr) (
f:
ident) (
ty:
type)
|
Evalof (
l:
expr) (
ty:
type)
(* l-value used as a r-value *)
|
Ederef (
r:
expr) (
ty:
type)
(* pointer dereference (unary *) *)
|
Eaddrof (
l:
expr) (
ty:
type)
(* address-of operators (&) *)
|
Eunop (
op:
unary_operation) (
r:
expr) (
ty:
type)
|
Ebinop (
op:
binary_operation) (
r1 r2:
expr) (
ty:
type)
|
Ecast (
r:
expr) (
ty:
type)
(* type cast (ty)r *)
|
Eseqand (
r1 r2:
expr) (
ty:
type)
(* sequential "and" r1 && r2 *)
|
Eseqor (
r1 r2:
expr) (
ty:
type)
(* sequential "or" r1 && r2 *)
|
Econdition (
r1 r2 r3:
expr) (
ty:
type)
(* conditional r1 ? r2 : r3 *)
|
Esizeof (
ty':
type) (
ty:
type)
(* size of a type *)
|
Ealignof (
ty':
type) (
ty:
type)
(* natural alignment of a type *)
|
Eassign (
l:
expr) (
r:
expr) (
ty:
type)
(* assignment l = r *)
|
Eassignop (
op:
binary_operation) (
l:
expr) (
r:
expr) (
tyres ty:
type)
|
Epostincr (
id:
incr_or_decr) (
l:
expr) (
ty:
type)
|
Ecomma (
r1 r2:
expr) (
ty:
type)
(* sequence expression r1, r2 *)
|
Ecall (
r1:
expr) (
rargs:
exprlist) (
ty:
type)
|
Ebuiltin (
ef:
external_function) (
tyargs:
typelist) (
rargs:
exprlist) (
ty:
type)
|
Eloc (
b:
block) (
ofs:
int) (
ty:
type)
|
Eparen (
r:
expr) (
tycast:
type) (
ty:
type)
(* marked subexpression *)
with exprlist :
Type :=
|
Enil
|
Econs (
r1:
expr) (
rl:
exprlist).
Expressions are implicitly classified into l-values and r-values,
ranged over by
l and
r, respectively, in the grammar above.
L-values are those expressions that can occur to the left of an assignment.
They denote memory locations. (Indeed, the reduction semantics for
expression reduces them to
Eloc b ofs expressions.) L-values are
variables (
Evar), pointer dereferences (
Ederef), field accesses (
Efield).
R-values are all other expressions. They denote values, and the reduction
semantics reduces them to
Eval v expressions.
A l-value can be used in a r-value context, but this use must be marked
explicitly with the
Evalof operator, which is not materialized in the
concrete syntax of C but denotes a read from the location corresponding to
the l-value
l argument of
Evalof l.
The grammar above contains some forms that cannot appear in source terms
but appear during reduction. These forms are:
-
Eval v where v is a pointer or Vundef. (Eval of an integer or
float value can occur in a source term and represents a numeric literal.)
-
Eloc b ofs, which appears during reduction of l-values.
-
Eparen r tycast ty, which appears during reduction of conditionals
r1 ? r2 : r3 as well as sequential `and' / `or'.
Some C expressions are derived forms. Array access
r1[r2] is expressed
as
*(r1 + r2).
Definition Eindex (
r1 r2:
expr) (
ty:
type) :=
Ederef (
Ebinop Oadd r1 r2 (
Tpointer ty noattr))
ty.
Pre-increment ++l and pre-decrement --l are expressed as
l += 1 and l -= 1, respectively.
Definition Epreincr (
id:
incr_or_decr) (
l:
expr) (
ty:
type) :=
Eassignop (
match id with Incr =>
Oadd |
Decr =>
Osub end)
l (
Eval (
Vint Int.one)
type_int32s) (
typeconv ty)
ty.
Extract the type part of a type-annotated expression.
Definition typeof (
a:
expr) :
type :=
match a with
|
Eloc _ _ ty =>
ty
|
Evar _ ty =>
ty
|
Ederef _ ty =>
ty
|
Efield _ _ ty =>
ty
|
Eval _ ty =>
ty
|
Evalof _ ty =>
ty
|
Eaddrof _ ty =>
ty
|
Eunop _ _ ty =>
ty
|
Ebinop _ _ _ ty =>
ty
|
Ecast _ ty =>
ty
|
Econdition _ _ _ ty =>
ty
|
Eseqand _ _ ty =>
ty
|
Eseqor _ _ ty =>
ty
|
Esizeof _ ty =>
ty
|
Ealignof _ ty =>
ty
|
Eassign _ _ ty =>
ty
|
Eassignop _ _ _ _ ty =>
ty
|
Epostincr _ _ ty =>
ty
|
Ecomma _ _ ty =>
ty
|
Ecall _ _ ty =>
ty
|
Ebuiltin _ _ _ ty =>
ty
|
Eparen _ _ ty =>
ty
end.
Statements
Compcert C statements are very much like those of C and include:
-
empty statement Sskip
-
evaluation of an expression for its side-effects Sdo r
-
conditional if (...) { ... } else { ... }
-
the three loops while(...) { ... } and do { ... } while (...)
and for(..., ..., ...) { ... }
-
the switch construct
-
break, continue, return (with and without argument)
-
goto and labeled statements.
Only structured forms of
switch are supported; moreover,
the
default case must occur last. Blocks and block-scoped declarations
are not supported.
Definition label :=
ident.
Inductive statement :
Type :=
|
Sskip :
statement (* do nothing *)
|
Sdo :
expr ->
statement (* evaluate expression for side effects *)
|
Ssequence :
statement ->
statement ->
statement (* sequence *)
|
Sifthenelse :
expr ->
statement ->
statement ->
statement (* conditional *)
|
Swhile :
expr ->
statement ->
statement (* while loop *)
|
Sdowhile :
expr ->
statement ->
statement (* do loop *)
|
Sfor:
statement ->
expr ->
statement ->
statement ->
statement (* for loop *)
|
Sbreak :
statement (* break statement *)
|
Scontinue :
statement (* continue statement *)
|
Sreturn :
option expr ->
statement (* return statement *)
|
Sswitch :
expr ->
labeled_statements ->
statement (* switch statement *)
|
Slabel :
label ->
statement ->
statement
|
Sgoto :
label ->
statement
with labeled_statements :
Type :=
(* cases of a switch *)
|
LSnil:
labeled_statements
|
LScons:
option Z ->
statement ->
labeled_statements ->
labeled_statements.
Functions
A function definition is composed of its return type (fn_return),
the names and types of its parameters (fn_params), the names
and types of its local variables (fn_vars), and the body of the
function (a statement, fn_body).
Record function :
Type :=
mkfunction {
fn_return:
type;
fn_callconv:
calling_convention;
fn_params:
list (
ident *
type);
fn_vars:
list (
ident *
type);
fn_body:
statement
}.
Definition var_names (
vars:
list(
ident *
type)) :
list ident :=
List.map (@
fst ident type)
vars.
Functions can either be defined (Internal) or declared as
external functions (External).
Inductive fundef :
Type :=
|
Internal:
function ->
fundef
|
External:
external_function ->
typelist ->
type ->
calling_convention ->
fundef.
The type of a function definition.
Definition type_of_function (
f:
function) :
type :=
Tfunction (
type_of_params (
fn_params f)) (
fn_return f) (
fn_callconv f).
Definition type_of_fundef (
f:
fundef) :
type :=
match f with
|
Internal fd =>
type_of_function fd
|
External id args res cc =>
Tfunction args res cc
end.
Programs
A "pre-program", as produced by the elaborator is composed of:
-
a list of definitions of functions and global variables;
-
the names of functions and global variables that are public (not static);
-
the name of the function that acts as entry point ("main" function).
-
a list of definitions for structure and union names
A program is composed of the same information, plus the corresponding
composite environment, and a proof that this environment is consistent
with the composite definitions.
Record pre_program :
Type := {
pprog_defs:
list (
ident *
globdef fundef type);
pprog_public:
list ident;
pprog_main:
ident;
pprog_types:
list composite_definition
}.
Record program :
Type := {
prog_defs:
list (
ident *
globdef fundef type);
prog_public:
list ident;
prog_main:
ident;
prog_types:
list composite_definition;
prog_comp_env:
composite_env;
prog_comp_env_eq:
build_composite_env prog_types =
OK prog_comp_env
}.
Definition program_of_program (
p:
program) :
AST.program fundef type :=
{|
AST.prog_defs :=
p.(
prog_defs);
AST.prog_public :=
p.(
prog_public);
AST.prog_main :=
p.(
prog_main) |}.
Coercion program_of_program:
program >->
AST.program.
Program Definition program_of_pre_program (
p:
pre_program) :
res program :=
match build_composite_env p.(
pprog_types)
with
|
Error e =>
Error e
|
OK ce =>
OK {|
prog_defs :=
p.(
pprog_defs);
prog_public :=
p.(
pprog_public);
prog_main :=
p.(
pprog_main);
prog_types :=
p.(
pprog_types);
prog_comp_env :=
ce;
prog_comp_env_eq :=
_ |}
end.