overview.tex 15.2 KB
Newer Older
1
%!TEX root = atva2021.tex
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321

\section{Motivating example and overview}
\label{sec:over-hls}

This section illustrates the use of the logic $\slah$ for the specification 
	of programs manipulating heap lists and
give an overview of the ingredients used by the decision procedures
	we propose.

Figure~\ref{fig:search} presents the motivating example.
The function \lst{search} scans 
	a heap list between the addresses \lst{hbeg} (included) 
						and \lst{hend} (excluded)
	to find a chunk of size greater than the one given as parameter. 
%
A heap list is a block of memory divided into \emph{chunks},
such that each chunk stores its size at its start address.
For readability, we consider that the size counts 
the number of machine integers (and not bytes).
Scanning the list requires pointer arithmetics to
compute the start address of the next chunk.
After the size, the chunk may include additional information 
about the chunk in so-called  \emph{chunk's header}.
For readability, we consider %in this section 
that the header contains only the size information.
%In our example, 
%the second integer includes a flag storing the status 
%(1--free or 0--used) of the chunk.
The data carried by the chunk, called \emph{chunk's body},
starts after the chunk header
and ends before the header of the next chunk.

%% Example search size
%\input{fig-ex-rsz}

\begin{figure}[t]
{\small
\centering
\begin{minipage}[t]{0.33\textwidth}
\vspace{6mm}% Hack!! But does do the job
\lstset{language=C,numbers=left,stepnumber=1,fontadjust=true}
\begin{lstlisting}
int* hbeg; // heap start
int* hend; // heap end
int* search(int rsz) {
  int* t = hbeg;
  while (t < hend) {
    int sz = *t;
    if (rsz <= sz)
      return t;
    t = t+sz;
  }
  return NULL;
}
\end{lstlisting}
\end{minipage}
\begin{minipage}[t]{0.62\textwidth}
\vspace{0pt}% Hack!! But does do the job
\begin{eqnarray}\label{eq:over-sat}
\texttt{pre} \label{eq:over-pre}
        & \triangleq & 0 < b < e : \hls{}(b,e) 
\\[2mm]
\texttt{path}_{\texttt{4-9}} \label{eq:over-path-once}
	    & \triangleq & 0 < b \land b = t \land t < e \land sz_0 < rsz
\\
	    & \land & sz_0 = t_1 - t \land 2 \le t_1 - t  
\nonumber \\
        & :     & t \pto sz_0 \sepc \blk(t+1, t_1) \sepc \hls{}(t_1,e)
\nonumber \\[2mm]
I \label{eq:over-inv}
	    & \triangleq & 0 < b < e 
	    \\
	    \nonumber
	    & : & \hls{}(b,t; rsz-1) \sepc \hls{}(t,e)
\\[2mm]
\texttt{post}_{\texttt{5-10}}(I) \label{eq:over-inv-once}
        & \triangleq &  0 < b < e \land t_0 < e \land sz_0 < rsz \\
		& \land & 2 \le sz_0 \land t - t_0=sz_0 
\nonumber \\
		& :     & \hls{}(b,t_0;rsz-1) \sepc t_0 \pto sz_0 
\nonumber \\
		&       & \sepc\ \blk(t_0+1,t) \sepc \hls{}(t,e)
\nonumber
\end{eqnarray}
\end{minipage}
\vspace{-2eX}
\caption{Searching a chunk in a heap list 
and some of its specifications in \slah}
\label{fig:search}
}
\vspace{-4mm}
\end{figure}


The right part of Figure~\ref{fig:search} provides 
several formulas in the logic \slah\ defined in Section~\ref{sec:logic-hls}.
The formula \texttt{pre} specifies the precondition of \lst{search}, 
i.e., there is a heap list from \lst{hbeg} to \lst{hend}
represented by the logic variables $b$ resp. $e$.
The pure part of \texttt{pre} (left side of ':') is a linear constraint
on the addresses, while the spatial part (right side of ':') employs the
predicate $\hls{}$ defined inductively by the following two rules:
% Equations~(\ref{eq:hls-0})--(\ref{eq:hls-rec}).
\begin{align}
\label{eq:hlsv-emp}
\hls{}(x, y; v) & \Leftarrow x=y : \emp  \\
\label{eq:hlsv-rec}
\hls{}(x, y; v) & \Leftarrow \exists z \cdot 2 \le z - x \le v : 
	x \pto z-x \sepc \blk(x+1,z) \sepc \hls{}(z,y; v)  \\
\label{eq:hlsv-infty}
\hls{}(x, y) & \equiv \hls{}(x,y;\infty) \mbox{ i.e., no upper bound on chunks' sizes}
\end{align}
stating that a heap list from $x$ to $y$ is 
	either an empty heap if $x$ and $y$ are aliased,
    or a \emph{chunk} starting at address $x$ and ending at
    address $z$ followed by a heap list from $z$ to $y$.
    The chunk stores its size $z-x$ in the header
		(atom $x\pto z-x$).
	The chunk's body is specified by a memory block atom, $\blk(x+1,z)$, starting
		after the header and ending before the next chunk.
%This predicate belongs to the class  
%	defined in Section~\ref{sec:logic-hls} (Equations~(\ref{eq:hlsv-emp})--(\ref{eq:hlsv-rec})),
%	which allows predicates to specify 
%	an upper bound on the size of all chunks in the heap list
%	as a third parameter, i.e., $\hls{}(x,y;v)$;
%therefore $\hls{}(x,y)$ is a shorthand for $\hls{}(x,y;\infty)$.
The parameter $v$ is an upper bound on the size of all chunks in the list.
%By convention, $\hls{}(x,y)$ is a shorthand for $\hls{}(x,y;\infty)$, i.e, 
%the chunks' sizes are unconstrained from above.

The formula $\texttt{path}_{\texttt{4-9}}$ is generated by 
	the symbolic execution of \lst{search} 
	starting from \texttt{pre} and 
	executing the statements from line 4 to 9. 
It's satisfiability means that the line 9 is reachable from 
a state satisfying \texttt{pre}.
%	(We use the SSA representation for the program and 
%	 extend it with the logic variables introduced by 
%	 the unfolding of the $\hls{}$ predicate):
%	Bounded reachability techniques generate such formulas
%	and query their satisfiability.
%the bounded reachability which generates satisfiability queries,
  
The decision procedure for the satisfiability of \slah\
	in Section~\ref{sec:sat-hls} is based
on the translation of a \slah\ formula $\varphi$ into 
an equi-satisfiable Presburger arithmetic (\PbA) formula $\varphi^P$. 
The delicate point 
with respect to the previous work, e.g., \cite{DBLP:journals/corr/abs-1802-05935},  
is to compute a summary in \PbA\ for the $\hls{}$ atoms.
% what happens if over/under-approximation?
% what happens if conditions are not satisfied? undecidability?
The summary computed for the atom $\hls{}(x,y;v)$ 
when the heap it denotes is not empty
is
$(v=2 \land \exists k\cdot k > 0 \land 2k = y - x) \lor (2 < v \land 2 < y-x)$,
i.e., either all chunks have size 2 and the heap-list has an even size or
$v$ and the size of the heap-list are strictly greater than 2.
For the empty case, the summary is trivially $x=y$.
%For $v=\infty$, the summary is $2 < y-x$.
The other spatial atoms $a$ (e.g., $x \pto v$ and $\blk(x,y)$) 
are summarized by constraints on 
their start address denoted by $\atomhead(a)$ (e.g., $x$) and 
their end address $\atomtail(a)$ (e.g., $x+1$ resp. $y$).
For the points-to atom, this constraint is true, 
but for the $\blk(x,y)$ atom, the constraint is 
$x = \atomhead(a) < \atomtail(a) = y$.
Therefore, the spatial part of $\texttt{path}_{\texttt{4-9}}$ is translated 
into the \PbA\ formula $\texttt{pb}^\Sigma_{\texttt{4-9}}$:
%
\begin{align*}
\underbrace{t+1 < t_1}_{\blk{}(t+1,t_1)} & \land 
  \underbrace{(t_1 = e \lor 2 < e-t_1)}_{\hls{}(t_1,e)}.
\end{align*}
%
Alone, $\texttt{pb}^\Sigma_{\texttt{4-9}}$ does not capture the semantics of 
	the separating conjunction in the spatial part.
For that, we add a set of constraints 
	expressing the disjointness of memory blocks occupied by the spatial atoms.
For our example, these constraints are 
	$\texttt{pb}^\sepc_{\texttt{4-9}} \equiv t < t_1$.
By conjoining the pure part of $\texttt{path}_{\texttt{4-9}}$ with 
formulas $\texttt{pb}^\Sigma_{\texttt{4-9}}$ and 
         $\texttt{pb}^\sepc_{\texttt{4-9}}$,
we obtain an equi-satisfiable existentially quantified {\PbA} formula
 whose satisfiability is NP-complete.

\smallskip
\noindent
The {\PbA} abstraction is also used to decide the validity of entailments 
in \slah\ in combination with a matching procedure between spatial parts.
To illustrate this decision procedure presented in Section~\ref{sec:ent},
we consider the verification conditions generated by the proof of 
 the invariant $I$ from Equation~(\ref{eq:over-inv}) for the \lst{search}'s loop. 
%Consider the invariant $I$ from Equation~(\ref{eq:over-inv}) in Figure~\ref{fig:search}.
It states that \lst{t} splits the heap list in two parts. 
%The following entailment states the initial condition for $I$:
%\begin{eqnarray}\label{eq:over-ent-0}
%0 < b < e \land\ t = b : \hls{}(b,e) & \limp & I
%\end{eqnarray}
%The decision procedure first checks that the antecedent is satisfiable
%(otherwise the entailment is trivially valid).
%If it is the case, %\todo{ESOP 21: difficult to follow, MS explicit}
%    the procedure explore a set of cases
%    defining all total orders between the equivalence classes among
%    location terms used as start address of a spatial atom 
%    in the antecedent and the consequent.
%For our example, the cases to be explored are $t=b$, $t<b$ or $t>b$.
%	However, if the constraint defining the case is not implied 
%	by the PA abstraction of the antecedent, the case's exploration stops.
%	In our example, only one case is explored, i.e., $0 < b = t < e$. 
%Given the total order and the equivalence classes of terms of the explored case,
%    the spatial part of the antecedent and consequent formulas are transformed
%	by removing empty heap list atoms (i.e., $\hls{}(x,y;v)$ for which $x=y$)
%	and ordering the atoms by their start location. 
%In our example, only the consequent changes its spatial part to $\hls{}(b,e)$.
%Finally, the ordered spatial parts are matched in the context of the 
%	relation on locations given by the antecedent's \PbA\ abstraction.
%	In our example, the two spatial atoms are syntactically the same, 
%	so the matching succeeds trivially.
%
%To illustrate a non trivial case of the matching procedure,
%	we consider the verification condition (VC)
%	for the inductiveness of $I$.
%
To illustrate a non-trivial case of the matching procedure used
	in the decision procedure for entailment,
	we consider the verification condition (VC)
	for the inductiveness of $I$.
	The antecedent of the VC is the formula 
	$\texttt{post}_{\texttt{5-10}}(I)$ in Figure~\ref{fig:search},
	obtained by symbolically executing the path including the statements
	at lines 5--7 and 9 starting from $I$.
The \PbA\ abstraction of $\texttt{post}_{\texttt{5-10}}(I)$ is satisfiable
and entails the following constraint on the terms used by the spatial atoms:
$0 < b \le t_0 < t_0+1 < t \le e$.
The spatial atoms used in the antecedent and consequent 
are ordered using the order given by this contraint as follows:
\[\begin{array}{lcc}
\textrm{antecedent:} & \hls{}(b,t_0;rsz-1) \sepc\ t_0 \pto sz_0 \sepc\ \blk(t_0+1,t) 
				     & \sepc\ \hls{}(t,e) \\
\textrm{consequent:} & \hls{}(b,t;rsz-1) & \sepc\ \hls{}(t,e)
\end{array}\]
%
The matching procedure starts by searching 
	a prefix of the sequence of atoms in the antecedent
	that matches the first atom in the consequent, $\hls{}(b,t;rsz-1)$, such that
	the start and end addresses of the sequence are respectively $b$ and $t$.
The sequence found is $\hls{}(b,t_0;rsz-1) \sepc t_0 \pto sz_0 \sepc \blk(t_0+1,t)$
	which also satisfies the condition (encoded in \PbA)
		that it defines a contiguous memory block between $b$ and $t$.
The algorithm continues by trying to prove the matching found
	using a composition lemma $\hls{}(b,t_0;rsz-1) \sepc \hls{}(t_0,t;rsz-1) \models \hls{}(b,t;rsz-1)$
	and the unfolding of the atom $\hls{}(t_0,t;rsz-1)$.
	%ESOP 21: "one unfolding" what determines that one makes *one* unfolding here, in terms of the (automatic) procedure?
The \PbA\ abstraction of the antecedent is used to ensure that 
    $sz_0$ is the size of the heap list starting at $t_0$, i.e., $t-t_0$
    and the constraint $sz_0 \le rsz-1$ is satisfied.
	For this ordering, the algorithm is able to prove the matching. Since this ordering is the only possible ordering compatible with the {\PbA} abstraction of the antecedent, we conclude that the entailment is valid.
%The procedure continues with the remaining cases, until 
%    either a matching fails (the entailment is invalid) 
%    or cases led to a matching (the entailment is valid).
    





%Memory allocators manage the heap part allocated to the process.
%They usually organize the heap into a set of non-overlapping chunks and some memory used to manage this set. 
%\begin{wrapfigure}{R}{0.45\textwidth}
%\centering
%\vspace{-4eX}
%\includegraphics[trim=2cm 2cm 2cm 1cm,scale=0.4]{../figs/fig_dma_chk.pdf}
%\caption{heap list in DMA}
%\label{fig:heap}
%\vspace{-4eX}
%\end{wrapfigure}
%In this work, we consider the organization of the set as a heap list, i.e., a list stored in a raw array of bytes, where elements are in sequence in this block and the successor relation is given by the element size, see Figure~\ref{fig:heap}.
%To obtain the heap list data structures, DMA managers split each chunk 
%in two parts:
%(i) the header containing management information, at least the chunk size including the header and its status -- occupied by user data or free to be allocated,
%(ii) the body intended to store user's data (payload).
%
%The heap list data structure may be seen as a continuous block of values (suppose that values are all integers) with some properties about its content, for example:
%(i) the size of the block is the sum of values contained at some positions in the block,
%(ii) the status of two adjacent chunks are not both free (early coalescing policy).
%Therefore, to capture these properties, we cannot use only a predicate $\blk(x;y)$ denoting a sequence of values stating at location $x$ and ending just before location $y$, i.e., at $y-1$. We propose to use a special predicate called $\hls{}$ defined inductively by the first two rules below  as a list segment of chunks, each chunk being specified by the predicate $\hck$:
%\begin{eqnarray*}
%\hls{}(x;y) & \Leftarrow & x=y : \emp \label{id:hls-emp}
%\\
%\hls{}(x;y) & \Leftarrow & 
%\exists w, st, nx \cdot  w - x > 3 \land 0\leq st \leq 1: x \pto w-x \sepc x+1\pto st   \\
%		& & \sepc\ x+2\pto nx  \sepc\ \blk(x+3; w)  \sepc \hls{}(w; y)
% %
%%\hls{}(x;y) & \Leftarrow & \exists w\cdot \hck(x;w)\sepc\hls{}(w;y) \label{id:hls-rec}
%%\\
%%\hck(x;y) & \Leftarrow & \exists sz,st,nx\cdot\begin{array}[t]{l}
%% 		sz > 3 \land y=x+sz \land 0\leq st \leq 1: \\
% %		x\pto sz \sepc x+1\pto st \sepc x+2\pto nx 
%%		\sepc\ \blk(x+3;x+sz)  %%\blk(x,3,sz)
%%		\end{array}\label{id:hck}
%\end{eqnarray*}
%The chunk is specified using $\blk$ predicate to denote the body of the chunk, and points-to atoms $t\pto t'$ to obtain the value stored in the header. Notice the use of pointer arithmetics to access the chunk fields and to obtain the start location of the next chunk in the heap.
%%%\zhilin{Is $\blk(x,3,sz)$ really better than $\blk(x+3, sz)$ ?}
%%%MS: is  better than $\blk(x+3, x+sz)$
%
%
%The heap lists with no successive free chunks used in early coalescing policy are captured by an ID which employs the integer parameters $z$ and $z'$ to keep track of the status of chunks before the heap list segment boundaries $x$ and $y$:
%\begin{eqnarray}
%\hls{c}(x,z;y,z') & \Leftarrow & x=y \land z=z' : \emp \label{id:hlsc-emp}\\
%\hls{c}(x,z;y,z') & \Leftarrow & \exists w,z''\cdot \begin{array}[t]{l}
%				z\neq\fif(x) \land z''=\fif(x) : \\
%				\hck(x;w) \sepc\ \hls{c}(w,z'';y,z') 
%				\end{array} \label{id:hlsc-rec}
%\end{eqnarray}
%where the term $\fif(x)$ denotes the value of the status field (at offset $+1$ from $x$), which may be 0 (for occupied chunk) or 1 (for free chunk).
%Similar definitions are used to capture other properties of allocators' configurations that don't use a special list for the set of free chunks. We provide such predicates in Section~\ref{sec:slid-hls} to specify allocation policies, \eg, best-fit or first-fit.