evmil/il/
compiler.rs

1// Licensed under the Apache License, Version 2.0 (the "License");
2// you may not use this file except in compliance with the License.
3// You may obtain a copy of the License at
4//
5//    http://www.apache.org/licenses/LICENSE-2.0
6//
7// Unless required by applicable law or agreed to in writing, software
8// distributed under the License is distributed on an "AS IS" BASIS,
9// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10// See the License for the specific language governing permissions and
11// limitations under the License.
12use crate::il::{BinOp, Region, Term};
13use crate::bytecode::{Assembly,Builder,Instruction,StructuredSection};
14use crate::bytecode::Instruction::*;
15use crate::util::*;
16
17type Result = std::result::Result<(), CompilerError>;
18
19// ============================================================================
20// Errors
21// ============================================================================
22
23#[derive(Debug)]
24pub enum CompilerError {
25    /// An integer (or hex) literal is too large (i.e. exceeds `2^256`).
26    LiteralOverflow,
27    /// Attempt to read from an invalid memory region.
28    InvalidMemoryAccess,
29    /// Attempt to write something which doesn't exist, or is not an lval.
30    InvalidLVal,
31}
32
33// ============================================================================
34// Compiler
35// ============================================================================
36
37pub struct Compiler {
38    /// Instructions being constructed by this compiler.
39    builder: Builder,
40    /// Counts the number of labels in use
41    labels: usize
42}
43
44impl Compiler {
45    pub fn new() -> Self {
46        Self {
47            builder: Builder::new(),
48            labels: 0
49        }
50    }
51
52    pub fn to_assembly(self) -> Assembly {
53        let insns = self.builder.to_insns();
54        let code = StructuredSection::Code(insns);
55        Assembly::new(vec![code])        
56    }
57
58    pub fn translate(&mut self, term: &Term) -> Result {
59        match term {
60            // Statements
61            Term::Assert(e) => self.translate_assert(e),
62            Term::Assignment(e1, e2) => self.translate_assignment(e1, e2),
63            Term::Fail => self.translate_fail(),
64            Term::Goto(l) => self.translate_goto(l),
65            Term::IfGoto(e, l) => self.translate_ifgoto(e, l),
66            Term::Label(l) => self.translate_label(l),
67            Term::Return(es) => self.translate_return(es),
68            Term::Revert(es) => self.translate_revert(es),
69            Term::Succeed(es) => self.translate_succeed(es),
70            Term::Stop => self.translate_stop(),
71            // Expressions
72            Term::Binary(bop, e1, e2) => self.translate_binary(*bop, e1, e2),
73            Term::Call(n,es) => self.translate_call(n,es),
74            Term::ArrayAccess(src, index) => self.translate_array_access(src, index),
75            Term::MemoryAccess(_) => Err(CompilerError::InvalidMemoryAccess),
76            // Values
77            Term::Int(bytes) => self.translate_literal(bytes, 10),
78            Term::Hex(bytes) => self.translate_literal(bytes, 16),
79            //
80        }
81    }
82
83    fn fresh_label(&mut self) -> String {
84        let lab = format!("lab{}",self.labels);
85        self.labels += 1;
86        lab
87    }
88
89    // ============================================================================
90    // Statements
91    // ============================================================================
92
93    fn translate_assert(&mut self, expr: &Term) -> Result {
94        // Allocate labels for true/false outcomes
95        let lab = self.fresh_label();
96        // Translate conditional branch
97        self.translate_conditional(expr, Some(&lab), None)?;
98        // False branch
99        self.builder.push(PUSH(vec![0x00]));
100        self.builder.push(PUSH(vec![0x00]));        
101        self.builder.push(REVERT);
102        // True branch
103        self.builder.mark_label(&lab).unwrap();
104        self.builder.push(JUMPDEST);
105        //
106        Ok(())
107    }
108
109    fn translate_assignment(&mut self, lhs: &Term, rhs: &Term) -> Result {
110        // Translate value being assigned
111        self.translate(rhs)?;
112        // Translate assignent itself
113        match lhs {
114            Term::ArrayAccess(src, idx) => {
115                self.translate_assignment_array(src, idx)?;
116            }
117            _ => {
118                return Err(CompilerError::InvalidLVal);
119            }
120        }
121        //
122        Ok(())
123    }
124
125    fn translate_assignment_array(&mut self, src: &Term, index: &Term) -> Result {
126        match src {
127            Term::MemoryAccess(r) => self.translate_assignment_memory(*r, index),
128            _ => Err(CompilerError::InvalidMemoryAccess),
129        }
130    }
131
132    fn translate_assignment_memory(&mut self, region: Region, address: &Term) -> Result {
133        // Translate index expression
134        self.translate(address)?;
135        // Dispatch based on region
136        match region {
137            Region::Memory => self.builder.push(MSTORE),
138            Region::Storage => self.builder.push(SSTORE),
139            _ => {
140                return Err(CompilerError::InvalidMemoryAccess);
141            }
142        };
143        //
144        Ok(())
145    }
146
147    fn translate_call(&mut self, name: &str, exprs: &[Term]) -> Result {
148        let retlab = self.fresh_label();
149        let retlab_index = self.builder.get_label(&retlab);
150        let name_index = self.builder.get_label(name);
151        // Translate arguments
152        for e in exprs { self.translate(e)?; }
153        // Push return address (as a label)
154        self.builder.push_labeled(PUSH(label_bytes(retlab_index)));
155        // Push function address (as a label)
156        self.builder.push_labeled(PUSH(label_bytes(name_index)));
157        // Perform jump
158        self.builder.push(JUMP);
159        // Identify return point
160        self.builder.mark_label(&retlab).unwrap();
161        self.builder.push(JUMPDEST);
162        Ok(())
163    }
164
165    fn translate_fail(&mut self) -> Result {
166        self.builder.push(PUSH(vec![0x00]));
167        self.builder.push(PUSH(vec![0x00]));        
168        self.builder.push(REVERT);
169        Ok(())
170    }
171
172    fn translate_goto(&mut self, label: &str) -> Result {
173        let label_index = self.builder.get_label(label);        
174        // Translate unconditional branch
175        self.builder.push_labeled(PUSH(label_bytes(label_index)));
176        self.builder.push(JUMP);
177        //
178        Ok(())
179    }
180
181    fn translate_ifgoto(&mut self, expr: &Term, label: &str) -> Result {
182        // Translate conditional branch
183        self.translate_conditional(expr, Some(label), None)
184    }
185
186    fn translate_label(&mut self, label: &str) -> Result {
187        // Mark the label
188        self.builder.mark_label(label).unwrap();
189        self.builder.push(JUMPDEST);
190        // Done
191        Ok(())
192    }
193
194    fn translate_return(&mut self, exprs: &[Term]) -> Result {
195        if !exprs.is_empty() {
196            // Translate each expression (except first)
197            for e in exprs.iter().skip(1) { self.translate(e)?; }
198            // Translate first expression
199            self.translate(&exprs[0])?;
200            // Swap with returna address
201            self.builder.push(SWAP(exprs.len() as u8));
202        }
203        // A return statement is just an unconditional jump
204        self.builder.push(JUMP);
205        //
206        Ok(())
207    }
208
209    fn translate_revert(&mut self, exprs: &[Term]) -> Result {
210        self.translate_succeed_revert(REVERT, exprs)
211    }
212
213    fn translate_succeed(&mut self, exprs: &[Term]) -> Result {
214        if exprs.is_empty() {
215            self.builder.push(STOP);
216            Ok(())
217        } else {
218            self.translate_succeed_revert(RETURN, exprs)
219        }
220    }
221
222    fn translate_succeed_revert(&mut self, insn: Instruction, exprs: &[Term]) -> Result {
223        if exprs.is_empty() {
224            self.builder.push(PUSH(vec![0]));
225            self.builder.push(PUSH(vec![0]));
226        } else {
227            for (i,e) in exprs.iter().enumerate() {
228                let addr = (i * 0x20) as u128;
229                self.translate(e)?;
230                self.builder.push(make_push(addr)?);
231                self.builder.push(MSTORE);
232            }
233            let len = (exprs.len() * 0x20) as u128;
234            self.builder.push(PUSH(vec![0]));
235            self.builder.push(make_push(len)?);
236        }
237        self.builder.push(insn);
238        Ok(())
239    }
240
241    fn translate_stop(&mut self) -> Result {
242        self.builder.push(STOP);
243        Ok(())
244    }
245
246    // ============================================================================
247    // Conditional Expressions
248    // ============================================================================
249
250    /// Translate a conditional expression using _short circuiting_
251    /// semantics.  Since implementing short circuiting requires
252    /// branching, we can exploit this to optimise other translations
253    /// and reduce the number of branches required.  To do that, this
254    /// method requires either a `true` target or a `false` target.
255    /// If a `true` target is provided then, if the condition
256    /// evaluates to something other than `0` (i.e. is `true`),
257    /// control is transfered to this target.  Likewise, if the
258    /// condition evaluates to `0` (i.e. `false`) the control is
259    /// transfered to the `false` target.
260    fn translate_conditional(&mut self, expr: &Term, true_lab: Option<&str>, false_lab: Option<&str>) -> Result {
261        match expr {
262            Term::Binary(BinOp::LogicalAnd, l, r) => {
263                self.translate_conditional_conjunct(l, r, true_lab, false_lab)
264            }
265            Term::Binary(BinOp::LogicalOr, l, r) => {
266                self.translate_conditional_disjunct(l, r, true_lab, false_lab)
267            }
268            _ => self.translate_conditional_other(expr, true_lab, false_lab),
269        }
270    }
271
272    /// Translate a logical conjunction as a conditional. Since
273    /// such connectives require short circuiting, these must be
274    /// implementing using branches.
275    fn translate_conditional_conjunct(&mut self, lhs: &Term, rhs: &Term, true_lab: Option<&str>, false_lab: Option<&str>) -> Result {
276        match (true_lab, false_lab) {
277            (Some(_), None) => {
278                // Harder case
279                let lab = self.fresh_label();
280                self.translate_conditional(lhs, None, Some(&lab))?;
281                self.translate_conditional(rhs, true_lab, None)?;
282                self.builder.mark_label(&lab).unwrap();
283                self.builder.push(JUMPDEST);
284            }
285            (None, Some(_)) => {
286                // Easy case
287                self.translate_conditional(lhs, None, false_lab)?;
288                self.translate_conditional(rhs, true_lab, false_lab)?;
289            }
290            (_, _) => unreachable!(),
291        }
292        // Done
293        Ok(())
294    }
295
296    /// Translate a logical disjunction as a conditional. Since
297    /// such connectives require short circuiting, these must be
298    /// implementing using branches.
299    fn translate_conditional_disjunct(&mut self, lhs: &Term, rhs: &Term, true_lab: Option<&str>, false_lab: Option<&str>) -> Result {
300        match (true_lab, false_lab) {
301            (None, Some(_)) => {
302                // Harder case
303                let lab = self.fresh_label();
304                self.translate_conditional(lhs, Some(&lab), None)?;
305                self.translate_conditional(rhs, None, false_lab)?;
306                self.builder.mark_label(&lab).unwrap();
307                self.builder.push(JUMPDEST);
308            }
309            (Some(_), None) => {
310                // Easy case
311                self.translate_conditional(lhs, true_lab, None)?;
312                self.translate_conditional(rhs, true_lab, false_lab)?;
313            }
314            (_, _) => unreachable!(),
315        }
316        // Done
317        Ok(())
318    }
319
320    /// Translate a conditional expression which cannot be translated
321    /// by exploiting branches.  In such case, we have to generate the
322    /// boolean value and dispatch based on that.
323    fn translate_conditional_other(&mut self, expr: &Term, true_lab: Option<&str>, false_lab: Option<&str>) -> Result {
324        // Translate conditional expression
325        self.translate(expr)?;
326        //
327        match (true_lab, false_lab) {
328            (Some(lab), None) => {
329                let label_index = self.builder.get_label(lab);
330                self.builder.push_labeled(PUSH(label_bytes(label_index)));
331                self.builder.push(JUMPI);
332            }
333            (None, Some(lab)) => {
334                let label_index = self.builder.get_label(lab);
335                self.builder.push(ISZERO);
336                self.builder.push_labeled(PUSH(label_bytes(label_index)));
337                self.builder.push(JUMPI);
338            }
339            (_, _) => {
340                unreachable!("")
341            }
342        }
343        //
344        Ok(())
345    }
346
347    // ============================================================================
348    // Binary Expressions
349    // ============================================================================
350
351    /// Translate a binary operation.  Observe that logical operations
352    /// exhibit _short-circuit behaviour_.
353    fn translate_binary(&mut self, bop: BinOp, lhs: &Term, rhs: &Term) -> Result {
354        match bop {
355            BinOp::LogicalAnd | BinOp::LogicalOr => {
356                self.translate_logical_connective(bop, lhs, rhs)
357            }
358            _ => self.translate_binary_arithmetic(bop, lhs, rhs),
359        }
360    }
361
362    /// Translate one of the logical connectives (e.g. `&&` or `||`).
363    /// These are more challenging than standard binary operators because
364    /// they exhibit _short circuiting behaviour_.
365    fn translate_logical_connective(&mut self, bop: BinOp, lhs: &Term, rhs: &Term) -> Result {
366        self.translate(lhs)?;
367        self.builder.push(DUP(1));
368        if bop == BinOp::LogicalAnd {
369            self.builder.push(ISZERO);
370        }
371        // Allocate fresh label
372        let lab = self.fresh_label();
373        let lab_index = self.builder.get_label(&lab);
374        self.builder.push_labeled(PUSH(label_bytes(lab_index)));
375        self.builder.push(JUMPI);
376        self.builder.push(POP);
377        self.translate(rhs)?;
378        self.builder.mark_label(&lab).unwrap();
379        self.builder.push(JUMPDEST);
380        // Done
381        Ok(())
382    }
383
384    /// Translate a binary arithmetic operation or comparison.  This is
385    /// pretty straightforward, as we just load items on the stack and
386    /// perform the op.  Observe that the right-hand side is loaded onto
387    /// the stack first.
388    fn translate_binary_arithmetic(&mut self, bop: BinOp, lhs: &Term, rhs: &Term) -> Result {
389        self.translate(rhs)?;
390        self.translate(lhs)?;
391        //
392        match bop {
393            // standard
394            BinOp::Add => self.builder.push(ADD),
395            BinOp::Subtract => self.builder.push(SUB),
396            BinOp::Divide => self.builder.push(DIV),
397            BinOp::Multiply => self.builder.push(MUL),
398            BinOp::Remainder => self.builder.push(MOD),
399            BinOp::Equals => self.builder.push(EQ),
400            BinOp::LessThan => self.builder.push(LT),
401            BinOp::GreaterThan => self.builder.push(GT),
402            // non-standard
403            BinOp::NotEquals => {
404                self.builder.push(EQ);
405                self.builder.push(ISZERO);
406            }
407            BinOp::LessThanOrEquals => {
408                self.builder.push(GT);
409                self.builder.push(ISZERO);
410            }
411            BinOp::GreaterThanOrEquals => {
412                self.builder.push(LT);
413                self.builder.push(ISZERO);
414            }
415            _ => {
416                unreachable!();
417            }
418        }
419        //
420        Ok(())
421    }
422
423    // ============================================================================
424    // Array Access Expressions
425    // ============================================================================
426
427    /// Translate an array access of the form `src[index]`.  The actual
428    /// form of the translation depends on whether its a direct access
429    /// (e.g. to storage or memory), or indirect (e.g. via a pointer to
430    /// memory).
431    fn translate_array_access(&mut self, src: &Term, index: &Term) -> Result {
432        match src {
433            Term::MemoryAccess(r) => self.translate_memory_access(*r, index),
434            _ => Err(CompilerError::InvalidMemoryAccess),
435        }
436    }
437
438    fn translate_memory_access(&mut self, region: Region, index: &Term) -> Result {
439        // Translate index expression
440        self.translate(index)?;
441        // Dispatch based on region
442        match region {
443            Region::Memory => {
444                self.builder.push(MLOAD);
445            }
446            Region::Storage => {
447                self.builder.push(SLOAD);
448            }
449            Region::CallData => {
450                self.builder.push(CALLDATALOAD);
451            }
452        }
453        //
454        Ok(())
455    }
456
457    // ============================================================================
458    // Values
459    // ============================================================================
460
461    fn translate_literal(&mut self, digits: &[u8], radix: u32) -> Result {
462        let val = from_be_digits(digits, radix);
463        self.builder.push(make_push(val)?);
464        Ok(())
465    }
466}
467
468// ===================================================================
469// Traits
470// ===================================================================
471
472/// Translate a sequence of IL statements into EVM bytecode, or fail
473/// with an error.
474impl TryFrom<&[Term]> for Assembly {
475    type Error = CompilerError;
476
477    fn try_from(terms: &[Term]) -> std::result::Result<Self, Self::Error> {
478        try_from(terms)
479    }
480}
481
482/// Translate a sequence of IL statements into EVM bytecode, or fail
483/// with an error.
484impl<const N: usize> TryFrom<&[Term; N]> for Assembly {
485    type Error = crate::il::CompilerError;
486
487    fn try_from(terms: &[Term; N]) -> std::result::Result<Self, Self::Error> {
488        try_from(terms)
489    }
490}
491
492// ===================================================================
493// Helpers
494// ===================================================================
495
496fn label_bytes(index: usize) -> Vec<u8> {
497    // Always generate a push2 instruction
498    vec![(index / 256) as u8, (index % 256) as u8]
499}
500
501fn try_from(terms: &[Term]) -> std::result::Result<Assembly, CompilerError> {
502    let mut compiler = Compiler::new();
503    // Translate statements one-by-one
504    for t in terms {
505        compiler.translate(t)?;
506    }
507    // Done
508    Ok(compiler.to_assembly())
509}
510
511/// Construct a push instruction from a value.
512fn make_push(val: u128) -> std::result::Result<Instruction, CompilerError> {
513    let bytes = to_be_bytes(val);
514    // Sanity check size of literal
515    if bytes.len() > 32 {
516        // Too big!!
517        Err(CompilerError::LiteralOverflow)
518    } else {
519        Ok(PUSH(bytes))
520    }
521}