overview.tex 15.2 KB
 Mihaela SIGHIREANU committed Apr 26, 2021 1 %!TEX root = atva2021.tex  Mihaela SIGHIREANU committed Apr 22, 2021 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321  \section{Motivating example and overview} \label{sec:over-hls} This section illustrates the use of the logic $\slah$ for the specification of programs manipulating heap lists and give an overview of the ingredients used by the decision procedures we propose. Figure~\ref{fig:search} presents the motivating example. The function \lst{search} scans a heap list between the addresses \lst{hbeg} (included) and \lst{hend} (excluded) to find a chunk of size greater than the one given as parameter. % A heap list is a block of memory divided into \emph{chunks}, such that each chunk stores its size at its start address. For readability, we consider that the size counts the number of machine integers (and not bytes). Scanning the list requires pointer arithmetics to compute the start address of the next chunk. After the size, the chunk may include additional information about the chunk in so-called \emph{chunk's header}. For readability, we consider %in this section that the header contains only the size information. %In our example, %the second integer includes a flag storing the status %(1--free or 0--used) of the chunk. The data carried by the chunk, called \emph{chunk's body}, starts after the chunk header and ends before the header of the next chunk. %% Example search size %\input{fig-ex-rsz} \begin{figure}[t] {\small \centering \begin{minipage}[t]{0.33\textwidth} \vspace{6mm}% Hack!! But does do the job \lstset{language=C,numbers=left,stepnumber=1,fontadjust=true} \begin{lstlisting} int* hbeg; // heap start int* hend; // heap end int* search(int rsz) { int* t = hbeg; while (t < hend) { int sz = *t; if (rsz <= sz) return t; t = t+sz; } return NULL; } \end{lstlisting} \end{minipage} \begin{minipage}[t]{0.62\textwidth} \vspace{0pt}% Hack!! But does do the job \begin{eqnarray}\label{eq:over-sat} \texttt{pre} \label{eq:over-pre} & \triangleq & 0 < b < e : \hls{}(b,e) \2mm] \texttt{path}_{\texttt{4-9}} \label{eq:over-path-once} & \triangleq & 0 < b \land b = t \land t < e \land sz_0 < rsz \\ & \land & sz_0 = t_1 - t \land 2 \le t_1 - t \nonumber \\ & : & t \pto sz_0 \sepc \blk(t+1, t_1) \sepc \hls{}(t_1,e) \nonumber \\[2mm] I \label{eq:over-inv} & \triangleq & 0 < b < e \\ \nonumber & : & \hls{}(b,t; rsz-1) \sepc \hls{}(t,e) \\[2mm] \texttt{post}_{\texttt{5-10}}(I) \label{eq:over-inv-once} & \triangleq & 0 < b < e \land t_0 < e \land sz_0 < rsz \\ & \land & 2 \le sz_0 \land t - t_0=sz_0 \nonumber \\ & : & \hls{}(b,t_0;rsz-1) \sepc t_0 \pto sz_0 \nonumber \\ & & \sepc\ \blk(t_0+1,t) \sepc \hls{}(t,e) \nonumber \end{eqnarray} \end{minipage} \vspace{-2eX} \caption{Searching a chunk in a heap list and some of its specifications in \slah} \label{fig:search} } \vspace{-4mm} \end{figure} The right part of Figure~\ref{fig:search} provides several formulas in the logic \slah\ defined in Section~\ref{sec:logic-hls}. The formula \texttt{pre} specifies the precondition of \lst{search}, i.e., there is a heap list from \lst{hbeg} to \lst{hend} represented by the logic variables b resp. e. The pure part of \texttt{pre} (left side of ':') is a linear constraint on the addresses, while the spatial part (right side of ':') employs the predicate \hls{} defined inductively by the following two rules: % Equations~(\ref{eq:hls-0})--(\ref{eq:hls-rec}). \begin{align} \label{eq:hlsv-emp} \hls{}(x, y; v) & \Leftarrow x=y : \emp \\ \label{eq:hlsv-rec} \hls{}(x, y; v) & \Leftarrow \exists z \cdot 2 \le z - x \le v : x \pto z-x \sepc \blk(x+1,z) \sepc \hls{}(z,y; v) \\ \label{eq:hlsv-infty} \hls{}(x, y) & \equiv \hls{}(x,y;\infty) \mbox{ i.e., no upper bound on chunks' sizes} \end{align} stating that a heap list from x to y is either an empty heap if x and y are aliased, or a \emph{chunk} starting at address x and ending at address z followed by a heap list from z to y. The chunk stores its size z-x in the header (atom x\pto z-x). The chunk's body is specified by a memory block atom, \blk(x+1,z), starting after the header and ending before the next chunk. %This predicate belongs to the class % defined in Section~\ref{sec:logic-hls} (Equations~(\ref{eq:hlsv-emp})--(\ref{eq:hlsv-rec})), % which allows predicates to specify % an upper bound on the size of all chunks in the heap list % as a third parameter, i.e., \hls{}(x,y;v); %therefore \hls{}(x,y) is a shorthand for \hls{}(x,y;\infty). The parameter v is an upper bound on the size of all chunks in the list. %By convention, \hls{}(x,y) is a shorthand for \hls{}(x,y;\infty), i.e, %the chunks' sizes are unconstrained from above. The formula \texttt{path}_{\texttt{4-9}} is generated by the symbolic execution of \lst{search} starting from \texttt{pre} and executing the statements from line 4 to 9. It's satisfiability means that the line 9 is reachable from a state satisfying \texttt{pre}. % (We use the SSA representation for the program and % extend it with the logic variables introduced by % the unfolding of the \hls{} predicate): % Bounded reachability techniques generate such formulas % and query their satisfiability. %the bounded reachability which generates satisfiability queries, The decision procedure for the satisfiability of \slah\ in Section~\ref{sec:sat-hls} is based on the translation of a \slah\ formula \varphi into an equi-satisfiable Presburger arithmetic (\PbA) formula \varphi^P. The delicate point with respect to the previous work, e.g., \cite{DBLP:journals/corr/abs-1802-05935}, is to compute a summary in \PbA\ for the \hls{} atoms. % what happens if over/under-approximation? % what happens if conditions are not satisfied? undecidability? The summary computed for the atom \hls{}(x,y;v) when the heap it denotes is not empty is (v=2 \land \exists k\cdot k > 0 \land 2k = y - x) \lor (2 < v \land 2 < y-x), i.e., either all chunks have size 2 and the heap-list has an even size or v and the size of the heap-list are strictly greater than 2. For the empty case, the summary is trivially x=y. %For v=\infty, the summary is 2 < y-x. The other spatial atoms a (e.g., x \pto v and \blk(x,y)) are summarized by constraints on their start address denoted by \atomhead(a) (e.g., x) and their end address \atomtail(a) (e.g., x+1 resp. y). For the points-to atom, this constraint is true, but for the \blk(x,y) atom, the constraint is x = \atomhead(a) < \atomtail(a) = y. Therefore, the spatial part of \texttt{path}_{\texttt{4-9}} is translated into the \PbA\ formula \texttt{pb}^\Sigma_{\texttt{4-9}}: % \begin{align*} \underbrace{t+1 < t_1}_{\blk{}(t+1,t_1)} & \land \underbrace{(t_1 = e \lor 2 < e-t_1)}_{\hls{}(t_1,e)}. \end{align*} % Alone, \texttt{pb}^\Sigma_{\texttt{4-9}} does not capture the semantics of the separating conjunction in the spatial part. For that, we add a set of constraints expressing the disjointness of memory blocks occupied by the spatial atoms. For our example, these constraints are \texttt{pb}^\sepc_{\texttt{4-9}} \equiv t < t_1. By conjoining the pure part of \texttt{path}_{\texttt{4-9}} with formulas \texttt{pb}^\Sigma_{\texttt{4-9}} and \texttt{pb}^\sepc_{\texttt{4-9}}, we obtain an equi-satisfiable existentially quantified {\PbA} formula whose satisfiability is NP-complete. \smallskip \noindent The {\PbA} abstraction is also used to decide the validity of entailments in \slah\ in combination with a matching procedure between spatial parts. To illustrate this decision procedure presented in Section~\ref{sec:ent}, we consider the verification conditions generated by the proof of the invariant I from Equation~(\ref{eq:over-inv}) for the \lst{search}'s loop. %Consider the invariant I from Equation~(\ref{eq:over-inv}) in Figure~\ref{fig:search}. It states that \lst{t} splits the heap list in two parts. %The following entailment states the initial condition for I: %\begin{eqnarray}\label{eq:over-ent-0} %0 < b < e \land\ t = b : \hls{}(b,e) & \limp & I %\end{eqnarray} %The decision procedure first checks that the antecedent is satisfiable %(otherwise the entailment is trivially valid). %If it is the case, %\todo{ESOP 21: difficult to follow, MS explicit} % the procedure explore a set of cases % defining all total orders between the equivalence classes among % location terms used as start address of a spatial atom % in the antecedent and the consequent. %For our example, the cases to be explored are t=b, tb. % However, if the constraint defining the case is not implied % by the PA abstraction of the antecedent, the case's exploration stops. % In our example, only one case is explored, i.e., 0 < b = t < e. %Given the total order and the equivalence classes of terms of the explored case, % the spatial part of the antecedent and consequent formulas are transformed % by removing empty heap list atoms (i.e., \hls{}(x,y;v) for which x=y) % and ordering the atoms by their start location. %In our example, only the consequent changes its spatial part to \hls{}(b,e). %Finally, the ordered spatial parts are matched in the context of the % relation on locations given by the antecedent's \PbA\ abstraction. % In our example, the two spatial atoms are syntactically the same, % so the matching succeeds trivially. % %To illustrate a non trivial case of the matching procedure, % we consider the verification condition (VC) % for the inductiveness of I. % To illustrate a non-trivial case of the matching procedure used in the decision procedure for entailment, we consider the verification condition (VC) for the inductiveness of I. The antecedent of the VC is the formula \texttt{post}_{\texttt{5-10}}(I) in Figure~\ref{fig:search}, obtained by symbolically executing the path including the statements at lines 5--7 and 9 starting from I. The \PbA\ abstraction of \texttt{post}_{\texttt{5-10}}(I) is satisfiable and entails the following constraint on the terms used by the spatial atoms: 0 < b \le t_0 < t_0+1 < t \le e. The spatial atoms used in the antecedent and consequent are ordered using the order given by this contraint as follows: \[\begin{array}{lcc} \textrm{antecedent:} & \hls{}(b,t_0;rsz-1) \sepc\ t_0 \pto sz_0 \sepc\ \blk(t_0+1,t) & \sepc\ \hls{}(t,e) \\ \textrm{consequent:} & \hls{}(b,t;rsz-1) & \sepc\ \hls{}(t,e) \end{array} % The matching procedure starts by searching a prefix of the sequence of atoms in the antecedent that matches the first atom in the consequent, $\hls{}(b,t;rsz-1)$, such that the start and end addresses of the sequence are respectively $b$ and $t$. The sequence found is $\hls{}(b,t_0;rsz-1) \sepc t_0 \pto sz_0 \sepc \blk(t_0+1,t)$ which also satisfies the condition (encoded in \PbA) that it defines a contiguous memory block between $b$ and $t$. The algorithm continues by trying to prove the matching found using a composition lemma $\hls{}(b,t_0;rsz-1) \sepc \hls{}(t_0,t;rsz-1) \models \hls{}(b,t;rsz-1)$ and the unfolding of the atom $\hls{}(t_0,t;rsz-1)$. %ESOP 21: "one unfolding" what determines that one makes *one* unfolding here, in terms of the (automatic) procedure? The \PbA\ abstraction of the antecedent is used to ensure that $sz_0$ is the size of the heap list starting at $t_0$, i.e., $t-t_0$ and the constraint $sz_0 \le rsz-1$ is satisfied. For this ordering, the algorithm is able to prove the matching. Since this ordering is the only possible ordering compatible with the {\PbA} abstraction of the antecedent, we conclude that the entailment is valid. %The procedure continues with the remaining cases, until % either a matching fails (the entailment is invalid) % or cases led to a matching (the entailment is valid). %Memory allocators manage the heap part allocated to the process. %They usually organize the heap into a set of non-overlapping chunks and some memory used to manage this set. %\begin{wrapfigure}{R}{0.45\textwidth} %\centering %\vspace{-4eX} %\includegraphics[trim=2cm 2cm 2cm 1cm,scale=0.4]{../figs/fig_dma_chk.pdf} %\caption{heap list in DMA} %\label{fig:heap} %\vspace{-4eX} %\end{wrapfigure} %In this work, we consider the organization of the set as a heap list, i.e., a list stored in a raw array of bytes, where elements are in sequence in this block and the successor relation is given by the element size, see Figure~\ref{fig:heap}. %To obtain the heap list data structures, DMA managers split each chunk %in two parts: %(i) the header containing management information, at least the chunk size including the header and its status -- occupied by user data or free to be allocated, %(ii) the body intended to store user's data (payload). % %The heap list data structure may be seen as a continuous block of values (suppose that values are all integers) with some properties about its content, for example: %(i) the size of the block is the sum of values contained at some positions in the block, %(ii) the status of two adjacent chunks are not both free (early coalescing policy). %Therefore, to capture these properties, we cannot use only a predicate $\blk(x;y)$ denoting a sequence of values stating at location $x$ and ending just before location $y$, i.e., at $y-1$. We propose to use a special predicate called $\hls{}$ defined inductively by the first two rules below as a list segment of chunks, each chunk being specified by the predicate $\hck$: %\begin{eqnarray*} %\hls{}(x;y) & \Leftarrow & x=y : \emp \label{id:hls-emp} %\\ %\hls{}(x;y) & \Leftarrow & %\exists w, st, nx \cdot w - x > 3 \land 0\leq st \leq 1: x \pto w-x \sepc x+1\pto st \\ % & & \sepc\ x+2\pto nx \sepc\ \blk(x+3; w) \sepc \hls{}(w; y) % % %%\hls{}(x;y) & \Leftarrow & \exists w\cdot \hck(x;w)\sepc\hls{}(w;y) \label{id:hls-rec} %%\\ %%\hck(x;y) & \Leftarrow & \exists sz,st,nx\cdot\begin{array}[t]{l} %% sz > 3 \land y=x+sz \land 0\leq st \leq 1: \\ % % x\pto sz \sepc x+1\pto st \sepc x+2\pto nx %% \sepc\ \blk(x+3;x+sz) %%\blk(x,3,sz) %% \end{array}\label{id:hck} %\end{eqnarray*} %The chunk is specified using $\blk$ predicate to denote the body of the chunk, and points-to atoms $t\pto t'$ to obtain the value stored in the header. Notice the use of pointer arithmetics to access the chunk fields and to obtain the start location of the next chunk in the heap. %%%\zhilin{Is $\blk(x,3,sz)$ really better than $\blk(x+3, sz)$ ?} %%%MS: is better than $\blk(x+3, x+sz)$ % % %The heap lists with no successive free chunks used in early coalescing policy are captured by an ID which employs the integer parameters $z$ and $z'$ to keep track of the status of chunks before the heap list segment boundaries $x$ and $y$: %\begin{eqnarray} %\hls{c}(x,z;y,z') & \Leftarrow & x=y \land z=z' : \emp \label{id:hlsc-emp}\\ %\hls{c}(x,z;y,z') & \Leftarrow & \exists w,z''\cdot \begin{array}[t]{l} % z\neq\fif(x) \land z''=\fif(x) : \\ % \hck(x;w) \sepc\ \hls{c}(w,z'';y,z') % \end{array} \label{id:hlsc-rec} %\end{eqnarray} %where the term $\fif(x)$ denotes the value of the status field (at offset $+1$ from $x$), which may be 0 (for occupied chunk) or 1 (for free chunk). %Similar definitions are used to capture other properties of allocators' configurations that don't use a special list for the set of free chunks. We provide such predicates in Section~\ref{sec:slid-hls} to specify allocation policies, \eg, best-fit or first-fit.