squid/backends/clang/
backend.rs

1use std::{
2    collections::{
3        BTreeMap,
4        HashMap,
5    },
6    hash::{
7        BuildHasher,
8        Hash,
9        Hasher,
10    },
11    path::{Path, PathBuf, Component},
12};
13
14use ahash::RandomState;
15use thiserror::Error;
16
17use crate::{
18    backends::{
19        clang::{
20            codegen::CLifterError,
21            concretize,
22            get_entrypoint_address,
23            insert_entrypoint,
24            insert_guard_pages,
25            populate_stack,
26            symbol::create_symbol_store,
27            AddressLayouter,
28            CLifter,
29            ClangRuntime,
30            EventChannel,
31            Memory,
32            Registers,
33            VariableStorage,
34        },
35        Backend,
36    },
37    event::EventPool,
38    frontend::ProcessImage,
39    riscv::register::GpRegister,
40    Logger,
41};
42
43/// The ClangBackendBuilder configures the [`ClangBackend`] with the values
44/// that you provide.
45/// Use the [`ClangBackend::builder`] method to create this builder.
46pub struct ClangBackendBuilder {
47    source_file: Option<PathBuf>,
48    heap_size: usize,
49    stack_size: Option<usize>,
50    env: BTreeMap<String, String>,
51    args: Vec<String>,
52    build_symbol_table: bool,
53    update_pc: bool,
54    update_last_instr: bool,
55    timeout: usize,
56    count_instructions: bool,
57    cflags: Vec<String>,
58    cc: String,
59    uninit_stack: bool,
60    allow_div_by_zero: bool,
61}
62
63impl ClangBackendBuilder {
64    /// Do not throw an error when dividing by zero and set the result to 0 instead (default: `false`)
65    pub fn allow_div_by_zero(mut self, flag: bool) -> Self {
66        self.allow_div_by_zero = flag;
67        self
68    }
69
70    /// Whenever a stackframe is allocated or deallocated, mark its contents as uninitialized (default: `true`)
71    pub fn enable_uninit_stack(mut self, flag: bool) -> Self {
72        self.uninit_stack = flag;
73        self
74    }
75
76    /// Set the compiler to use for compiling the AOT-code
77    pub fn cc<S: Into<String>>(mut self, cc: S) -> Self {
78        self.cc = cc.into();
79        self
80    }
81
82    /// Pass this flag to the c compiler when AOT-compiling the code
83    pub fn cflag<S: Into<String>>(mut self, arg: S) -> Self {
84        self.cflags.push(arg.into());
85        self
86    }
87
88    /// Store the AOT-code into this file
89    pub fn source_file<P: Into<PathBuf>>(mut self, source_file: P) -> Self {
90        let mut source_file = source_file.into();
91        
92        match source_file.components().next() {
93            None => unreachable!(),
94            Some(Component::Normal(_)) => {
95                source_file = Path::new(".").join(source_file);
96            },
97            _ => {},
98        }
99        
100        self.source_file = Some(source_file);
101        self
102    }
103
104    /// Generate a [`ClangRuntimeFault::Timeout`](crate::backends::clang::ClangRuntimeFault::Timeout) error after the given number of RISC-V instructions
105    pub fn timeout(mut self, timeout: usize) -> Self {
106        self.timeout = timeout;
107        self
108    }
109
110    /// If this is set to true, the backend emits code that tracks how many RISC-V instructions were executed each run.
111    /// The number of instructions can be access via [`ClangRuntime::get_executed_instructions`](crate::backends::clang::ClangRuntime::get_executed_instructions).
112    pub fn count_instructions(mut self, flag: bool) -> Self {
113        self.count_instructions = flag;
114        self
115    }
116
117    /// If this is set to true, build a symbol table in the runtime with all the names from the process image
118    pub fn build_symbol_table(mut self, flag: bool) -> Self {
119        self.build_symbol_table = flag;
120        self
121    }
122
123    /// If this is set to true, the backend emits code that updates the pc with the address of the basic block that has been executed last
124    pub fn update_pc(mut self, flag: bool) -> Self {
125        self.update_pc = flag;
126        self
127    }
128
129    /// If this is set to true, the backend emits code that stores which RISC-V instruction was executed last. Note that this is the virtual address
130    /// of the RISC-V instruction inside the ELF file and has nothing to do with the virtual address the ClangRuntime uses.
131    /// You can access this value in the runtime via the [`ClangRuntime::get_last_instruction`](crate::backends::clang::ClangRuntime::get_last_instruction) method.
132    pub fn update_last_instruction(mut self, flag: bool) -> Self {
133        self.update_last_instr = flag;
134        self
135    }
136
137    /// Set the size of the heap in bytes
138    pub fn heap_size(mut self, heap_size: usize) -> Self {
139        self.heap_size = heap_size;
140        self
141    }
142
143    /// Set the size of the stack in bytes
144    pub fn stack_size(mut self, stack_size: usize) -> Self {
145        self.stack_size = Some(stack_size);
146        self
147    }
148
149    /// Insert an environment variable into the environment of the guest
150    pub fn env<K, V>(mut self, key: K, value: V) -> Self
151    where
152        K: Into<String>,
153        V: Into<String>,
154    {
155        self.env.insert(key.into(), value.into());
156        self
157    }
158
159    /// Add the argument to the argv of the guest
160    pub fn arg<S>(mut self, arg: S) -> Self
161    where
162        S: Into<String>,
163    {
164        self.args.push(arg.into());
165        self
166    }
167
168    /// Add multiple args to the argv of the guest
169    pub fn args<I, S>(mut self, args: I) -> Self
170    where
171        I: IntoIterator<Item = S>,
172        S: Into<String>,
173    {
174        for arg in args {
175            self.args.push(arg.into());
176        }
177        self
178    }
179
180    /// Set argv\[0\] of the guest to the given name
181    pub fn progname<S>(mut self, progname: S) -> Self
182    where
183        S: Into<String>,
184    {
185        if let Some(arg) = self.args.get_mut(0) {
186            *arg = progname.into();
187        } else {
188            self.args.push(progname.into());
189        }
190        self
191    }
192
193    /// Create the [`ClangBackend`]
194    pub fn build(self) -> Result<ClangBackend, &'static str> {
195        let source_file = self.source_file.ok_or("Source file was not set")?;
196        let stack_size = self.stack_size.ok_or("Stack size was not set")?;
197
198        Ok(ClangBackend {
199            source_file,
200            heap_size: self.heap_size,
201            stack_size,
202            env: self.env,
203            args: self.args,
204            symbol_store: self.build_symbol_table,
205            update_pc: self.update_pc,
206            update_last_instr: self.update_last_instr,
207            timeout: self.timeout,
208            count_instructions: self.count_instructions,
209            cflags: self.cflags,
210            cc: self.cc,
211            uninit_stack: self.uninit_stack,
212            allow_div_by_zero: self.allow_div_by_zero,
213        })
214    }
215}
216
217/// This error shows everything that can go wrong during the operations of the ClangBackend.
218#[allow(missing_docs)]
219#[derive(Error, Debug)]
220pub enum ClangBackendError {
221    #[error("Codegen failed: {0}")]
222    CodegenError(#[from] CLifterError),
223
224    #[error("Could not populate stack (not enough memory?)")]
225    StackError,
226}
227
228/// The ClangBackend generates C code from the code in the process image and compiles that
229/// with clang for optimal codegen. It constructs the [`ClangRuntime`].
230pub struct ClangBackend {
231    source_file: PathBuf,
232    heap_size: usize,
233    stack_size: usize,
234    env: BTreeMap<String, String>,
235    args: Vec<String>,
236    symbol_store: bool,
237    update_pc: bool,
238    update_last_instr: bool,
239    timeout: usize,
240    count_instructions: bool,
241    cflags: Vec<String>,
242    cc: String,
243    uninit_stack: bool,
244    allow_div_by_zero: bool,
245}
246
247impl ClangBackend {
248    /// Create a [`ClangBackendBuilder`] that can configure this backend.
249    pub fn builder() -> ClangBackendBuilder {
250        ClangBackendBuilder {
251            source_file: None,
252            heap_size: 0,
253            stack_size: None,
254            env: BTreeMap::new(),
255            args: Vec::new(),
256            build_symbol_table: true,
257            update_pc: true,
258            update_last_instr: true,
259            timeout: 800_000_000 * 60,
260            count_instructions: true,
261            cflags: Vec::new(),
262            cc: "clang".to_string(),
263            uninit_stack: true,
264            allow_div_by_zero: false,
265        }
266    }
267}
268
269impl ClangBackend {
270    fn config_hash(&self, image: &ProcessImage) -> u64 {
271        let mut hasher = RandomState::with_seeds(1, 1, 1, 1).build_hasher();
272        hasher.write_usize(self.heap_size);
273        hasher.write_usize(self.stack_size);
274        hasher.write_u8(self.update_pc as u8);
275        hasher.write_u8(self.update_last_instr as u8);
276        hasher.write_usize(self.timeout);
277        hasher.write_u8(self.count_instructions as u8);
278        hasher.write_u8(self.uninit_stack as u8);
279        hasher.write_u8(self.allow_div_by_zero as u8);
280        for cflag in &self.cflags {
281            hasher.write_usize(cflag.len());
282            hasher.write(cflag.as_bytes());
283        }
284        hasher.write_usize(self.cc.len());
285        hasher.write(self.cc.as_bytes());
286        image.hash(&mut hasher);
287        hasher.finish()
288    }
289}
290
291impl Backend for ClangBackend {
292    type Runtime = ClangRuntime;
293    type Error = ClangBackendError;
294
295    fn name(&self) -> String {
296        "ClangBackend".to_string()
297    }
298
299    fn create_runtime(
300        &mut self,
301        mut image: ProcessImage,
302        event_pool: EventPool,
303        logger: &Logger,
304    ) -> Result<Self::Runtime, Self::Error> {
305        /* Add missing things to progam image */
306        insert_entrypoint(&mut image, &event_pool);
307        insert_guard_pages(&mut image);
308
309        /* Assign new virtual addresses to the elements in the process image */
310        let mut layouter = AddressLayouter::new();
311        layouter.layout(&mut image);
312
313        /* Concretize symbolic pointers */
314        concretize(&mut image);
315
316        /* Create the event channel */
317        let event_channel = EventChannel::new(&image);
318
319        /* Create the registers */
320        let mut registers = Registers::new();
321
322        /* Build memory for runtime */
323        let mut memory = Memory::new(&image, layouter.globals_size(), self.heap_size, self.stack_size);
324
325        /* Create variable storage */
326        let varstore = VariableStorage::new(&image);
327
328        /* Compile the code */
329        let config_hash = self.config_hash(&image);
330        let mut clifter = CLifter::new(
331            self.source_file.clone(),
332            self.update_pc,
333            self.update_last_instr,
334            self.timeout,
335            self.count_instructions,
336            config_hash,
337            layouter.code_size(),
338            self.uninit_stack,
339            self.allow_div_by_zero,
340        );
341        let executor = clifter.lift(&image, &memory, &varstore, logger, &self.cflags, &self.cc)?;
342
343        /* Print some stats */
344        logger.info(format!("Size of memory: {} bytes", memory.size()));
345        logger.info(format!("Size of static variable storage: {}", varstore.num_variables() * 8));
346        logger.info(format!("Size of event channel: {}", event_channel.capacity()));
347
348        /* Get entrypoint */
349        let entrypoint = get_entrypoint_address(&image);
350        registers.set_pc(entrypoint);
351
352        /* Create stack */
353        let sp = populate_stack(&mut memory, &self.args, &self.env).ok_or(ClangBackendError::StackError)?;
354        registers.set_gp(GpRegister::sp as usize, sp);
355        memory.clear_dirty_stack();
356
357        /* Create the symbol store */
358        let symbols = if self.symbol_store { create_symbol_store(&image) } else { HashMap::default() };
359
360        Ok(ClangRuntime::new(
361            memory,
362            event_channel,
363            registers,
364            executor,
365            entrypoint,
366            symbols,
367            vec![0; varstore.num_variables()],
368        ))
369    }
370}