bpf_script/compiler/
script.rs

1use crate::compiler::Helpers;
2use crate::error::{Error, Result as InternalResult, SemanticsErrorContext};
3use crate::optimizer::optimize;
4use crate::types::*;
5
6use bpf_ins::{Instruction, MemoryOpLoadType, Register};
7use peginator::PegParser;
8use peginator_macro::peginate;
9
10use std::collections::HashMap;
11use std::str::FromStr;
12
13peginate!(
14    "
15@export
16ScriptDef = input:InputLine {NewLine exprs:Expression}$;
17
18InputLine = 'fn' '(' [args:TypedArgument {',' args:TypedArgument}] ')';
19TypedArgument = name:Ident ':' type_name:TypeDecl;
20TypeDecl = [is_ref:ReferencePrefix] name:Ident;
21
22Expression = @:Assignment | @:FunctionCall | @:Return;
23
24Assignment = left:LValue [':' type_name:TypeDecl] '=' right:RValue;
25FunctionCall = name:Ident '(' [args:RValue {',' args:RValue}] ')';
26Return = 'return' [value:RValue];
27
28Condition = left:LValue WhiteSpace op:Comparator WhiteSpace right:RValue;
29
30RValue = @:FunctionCall | @:Immediate | @:LValue;
31LValue = [prefix:Prefix] name:Ident {derefs:DeReference};
32
33DeReference = @:FieldAccess | @:ArrayIndex;
34
35FieldAccess = '.' name:Ident;
36ArrayIndex = '[' element:Immediate ']';
37
38@string
39Immediate = {'0'..'9'}+;
40
41Comparator = @:Equals | @:NotEquals | @:LessThan | @:GreaterThan | @:LessOrEqual | @:GreaterOrEqual;
42Equals = '==';
43NotEquals = '!=';
44LessThan = '<';
45GreaterThan = '>';
46LessOrEqual = '<=';
47GreaterOrEqual = '>=';
48ReferencePrefix = '&';
49DeReferencePrefix = '*';
50
51Prefix = @:ReferencePrefix | @:DeReferencePrefix;
52
53@string
54@no_skip_ws
55Ident = {'a'..'z' | 'A'..'Z' | '_' | '0'..'9'}+;
56
57@string
58@no_skip_ws
59WhiteSpace = {' ' | '\t'};
60
61@string
62@no_skip_ws
63NewLine = {'\r' | '\n' | '\r\n'};
64"
65);
66
67macro_rules! semantics_bail {
68    ($line: expr, $($message:expr),+) => {
69        return Err(Error::Semantics {
70            line: $line,
71            message: format!($($message),+)
72        });
73    };
74}
75
76#[derive(Clone, Copy, Debug)]
77enum VariableLocation {
78    SpecialImmediate(u32),
79    Stack(i16),
80}
81
82#[derive(Clone, Debug)]
83struct VariableInfo {
84    var_type: Type,
85    location: VariableLocation,
86}
87
88pub struct Compiler<'a> {
89    types: &'a TypeDatabase,
90    variables: HashMap<String, VariableInfo>,
91    instructions: Vec<Instruction>,
92    stack: u32,
93    expr_num: u32,
94}
95
96impl<'a> Compiler<'a> {
97    const MAX_STACK_SIZE: u32 = 4096;
98
99    /// Create a new compiler instance.
100    ///
101    /// # Arguments
102    ///
103    /// * `types` - The BTF type library to use when resolving types.
104    ///
105    /// # Example
106    /// ```
107    /// use bpf_script::compiler::Compiler;
108    /// use bpf_script::types::TypeDatabase;
109    ///
110    /// let mut database = TypeDatabase::default();
111    /// let mut compiler = Compiler::create(&database);
112    /// ```
113    pub fn create(types: &'a TypeDatabase) -> Self {
114        Self {
115            types,
116            variables: HashMap::new(),
117            instructions: vec![],
118            stack: 0,
119            expr_num: 1,
120        }
121    }
122
123    /// Used to capture variables from the outer scope into the BPF
124    /// program being compiled. This is mostly used to capture map
125    /// identifers to pass to BPF helpers and for other integer values
126    /// that need to be captured. In the future, this will be extended
127    /// to capture arbitrary types making sharing between Rust and BPF
128    /// more seamless.
129    ///
130    /// # Arguments
131    ///
132    /// `name` - The name of the variable when referenced from the script.
133    /// `value` - The value of the variable.
134    ///
135    /// # Example
136    /// ```
137    /// use bpf_script::compiler::Compiler;
138    /// use bpf_script::types::TypeDatabase;
139    ///
140    /// let mut database = TypeDatabase::default();
141    /// let mut compiler = Compiler::create(&database);
142    /// compiler.capture("outer", 0xdeadbeef);
143    /// compiler.compile(r#"
144    ///     fn()
145    ///         return outer
146    /// "#).expect("Failed to compile.");
147    /// ```
148    pub fn capture(&mut self, name: &str, value: i64) {
149        let info = VariableInfo {
150            var_type: BaseType::Integer(Integer {
151                used_bits: 64,
152                bits: 64,
153                is_signed: false,
154            })
155            .into(),
156            location: VariableLocation::SpecialImmediate(value as u32),
157        };
158        self.variables.insert(name.to_string(), info);
159    }
160
161    /// Helper function for resolving a type by `TypeDecl` and printing an error
162    /// with line information, if it's not found.
163    ///
164    /// # Arguments
165    ///
166    /// * `decl` - The type declaration from the parsed ast.
167    fn type_from_decl(&mut self, decl: &TypeDecl) -> InternalResult<Type> {
168        let mut ty = self
169            .types
170            .get_type_by_name(&decl.name)
171            .context(
172                self.expr_num,
173                &format!("Type with name \"{}\" doesn't exist", decl.name),
174            )?
175            .clone();
176
177        if matches!(decl.is_ref, Some(ReferencePrefix)) {
178            ty.num_refs += 1;
179        }
180        Ok(ty)
181    }
182
183    /// Helper function for finding a scoped variable by name and printing an error
184    /// with line information, if it's not found.
185    ///
186    /// # Arguments
187    ///
188    /// * `name` - The name of the variable to retrieve.
189    fn get_variable_by_name(&mut self, name: &str) -> InternalResult<VariableInfo> {
190        if let Some(info) = self.variables.get(name) {
191            return Ok(info.clone());
192        }
193
194        semantics_bail!(self.expr_num, "No variable with name \"{}\"", name);
195    }
196
197    /// Helper function for parsing an immediate value and printin an error with line
198    /// information, if it's not found.
199    ///
200    /// # Arguments
201    ///
202    /// * `s` - The string representation of the immediate value.
203    fn parse_immediate<T: FromStr>(&mut self, s: &str) -> InternalResult<T> {
204        if let Ok(imm) = s.parse::<T>() {
205            return Ok(imm);
206        }
207
208        semantics_bail!(self.expr_num, "Failed to parse immediate value \"{}\"", s);
209    }
210
211    /// Get the current stack offset.
212    fn get_stack(&self) -> i16 {
213        -(self.stack as i16)
214    }
215
216    /// Push the stack value by a given size and return the new offset. Verifies the
217    /// new location doesn't overflow the stack and returns and error with line information,
218    /// if it does.
219    ///
220    /// # Arguments
221    ///
222    /// * `size` - The number of bytes to push the stack.
223    fn push_stack(&mut self, size: u32) -> InternalResult<i16> {
224        if self.stack + size > Self::MAX_STACK_SIZE {
225            semantics_bail!(
226                self.expr_num,
227                "Stack size exceeded {} bytes with this assignment",
228                Self::MAX_STACK_SIZE
229            );
230        }
231
232        self.stack += size;
233        Ok(self.get_stack())
234    }
235
236    /// Emits instructions to initialize a portion of the stack, works like an
237    /// abstract memset.
238    ///
239    /// # Arguments
240    ///
241    /// * `offset` - The offset to begin initializing.
242    /// * `value` - The value to initialize _each byte_.
243    /// * `size` - The number of bytes to initialize.
244    fn emit_init_stack_range(&mut self, mut offset: i16, value: i8, mut size: u32) {
245        let value = value as i64;
246        let v64 = value
247            | value << 8
248            | value << 16
249            | value << 24
250            | value << 32
251            | value << 40
252            | value << 48
253            | value << 56;
254        let mut remaining = size;
255        for _ in 0..size / 8 {
256            self.instructions
257                .push(Instruction::store64(Register::R10, offset, v64));
258            remaining -= 8;
259            offset += 8;
260        }
261        size = remaining;
262
263        for _ in 0..size / 4 {
264            self.instructions
265                .push(Instruction::store32(Register::R10, offset, v64 as i32));
266            remaining -= 4;
267            offset += 4;
268        }
269        size = remaining;
270
271        for _ in 0..size / 2 {
272            self.instructions
273                .push(Instruction::store16(Register::R10, offset, v64 as i16));
274            remaining -= 2;
275            offset += 2;
276        }
277        size = remaining;
278
279        for _ in 0..size {
280            self.instructions
281                .push(Instruction::store8(Register::R10, offset, v64 as i8));
282            remaining -= 1;
283            offset += 1;
284        }
285    }
286
287    /// Emits instructions that push the immediate value to the stack as the given type.
288    ///
289    /// # Arguments
290    ///
291    /// * `imm_str` - The string representation of the immediate value.
292    /// * `cast_type` - The destination type.
293    /// * `use_offset` - An optional offset at which the value is placed.
294    fn emit_push_immediate(
295        &mut self,
296        imm_str: &str,
297        cast_type: &Type,
298        use_offset: Option<i16>,
299    ) -> InternalResult<(i16, Type)> {
300        let size = cast_type.get_size();
301        if size == 0 && !matches!(cast_type.base_type, BaseType::Void) {
302            semantics_bail!(self.expr_num, "Can't assign to zero-sized type");
303        }
304
305        let offset = match use_offset {
306            Some(off) => off,
307            None => self.push_stack(size)?,
308        };
309
310        if cast_type.is_pointer() {
311            let imm = self.parse_immediate::<u8>(imm_str)?;
312            self.instructions
313                .push(Instruction::store8(Register::R10, offset, imm as i8));
314            return Ok((offset, cast_type.clone()));
315        }
316
317        // No type was given so a 64-bit unsigned integer is inferred
318        if matches!(cast_type.base_type, BaseType::Void) {
319            let imm = self.parse_immediate::<i64>(imm_str)?;
320            self.instructions
321                .push(Instruction::store64(Register::R10, offset, imm));
322            let new_type = BaseType::Integer(Integer {
323                used_bits: 64,
324                bits: 64,
325                is_signed: false,
326            });
327            return Ok((offset, new_type.into()));
328        }
329
330        if let BaseType::Integer(integer) = &cast_type.base_type {
331            match (size, integer.is_signed) {
332                (1, false) => {
333                    let imm = self.parse_immediate::<u8>(imm_str)?;
334                    self.instructions
335                        .push(Instruction::store8(Register::R10, offset, imm as i8));
336                }
337                (1, true) => {
338                    let imm = self.parse_immediate::<i8>(imm_str)?;
339                    self.instructions
340                        .push(Instruction::store8(Register::R10, offset, imm));
341                }
342                (2, false) => {
343                    let imm = self.parse_immediate::<u16>(imm_str)?;
344                    self.instructions
345                        .push(Instruction::store16(Register::R10, offset, imm as i16));
346                }
347                (2, true) => {
348                    let imm = self.parse_immediate::<i16>(imm_str)?;
349                    self.instructions
350                        .push(Instruction::store16(Register::R10, offset, imm));
351                }
352                (4, false) => {
353                    let imm = self.parse_immediate::<u32>(imm_str)?;
354                    self.instructions
355                        .push(Instruction::store32(Register::R10, offset, imm as i32));
356                }
357                (4, true) => {
358                    let imm = self.parse_immediate::<i32>(imm_str)?;
359                    self.instructions
360                        .push(Instruction::store32(Register::R10, offset, imm));
361                }
362                (8, false) => {
363                    let imm = self.parse_immediate::<u64>(imm_str)?;
364                    self.instructions
365                        .push(Instruction::store64(Register::R10, offset, imm as i64));
366                }
367                (8, true) => {
368                    let imm = self.parse_immediate::<i64>(imm_str)?;
369                    self.instructions
370                        .push(Instruction::store64(Register::R10, offset, imm));
371                }
372                (bits, _) => {
373                    semantics_bail!(self.expr_num, "{}-bit integers not supported", bits);
374                }
375            };
376        } else {
377            let imm = self.parse_immediate::<i8>(imm_str)?;
378            self.emit_init_stack_range(offset, imm, size);
379        }
380
381        Ok((offset, cast_type.clone()))
382    }
383
384    /// Emits instructions that push a register to the stack. If an offset is given,
385    /// the register is pushed to that offset.
386    ///
387    /// # Arguments
388    ///
389    /// * `reg` - The register to for which a push is emitted.
390    /// * `offset` - The stack offset to which the register is pushed.
391    fn emit_push_register(&mut self, reg: Register, offset: Option<i16>) -> InternalResult<i16> {
392        let offset = if let Some(offset) = offset {
393            offset
394        } else {
395            self.push_stack(8)?
396        };
397
398        self.instructions
399            .push(Instruction::storex64(Register::R10, offset, reg));
400        Ok(offset)
401    }
402
403    /// Emits instructions that dereference a register to the stack using its
404    /// currently held type. This always emits a `bpf_probe_read` call because
405    /// only certain memory can be directly dereferenced by BPF instructions but
406    /// all memory can be read through the helper.
407    ///
408    /// # Arguments
409    ///
410    /// * `reg` - The register holding the address to dereference.
411    /// * `deref_type` - The type of dereference.
412    /// * `offset` - The offset in the stack to which the value is copied.
413    fn emit_deref_register_to_stack(&mut self, reg: Register, deref_type: &Type, offset: i16) {
414        self.instructions
415            .push(Instruction::movx64(Register::R1, Register::R10));
416        self.instructions
417            .push(Instruction::add64(Register::R1, offset.into()));
418        self.instructions.push(Instruction::mov64(
419            Register::R2,
420            deref_type.get_size() as i32,
421        ));
422        self.instructions
423            .push(Instruction::movx64(Register::R3, reg));
424        self.instructions
425            .push(Instruction::call(Helpers::ProbeRead as u32));
426    }
427
428    /// Emits instructions that push an lvalue to the stack. Lvalues in this
429    /// language are anything that occurs on the left side of an assignment.
430    /// Currently, this is just stored variables.
431    ///
432    /// # Arguments
433    ///
434    /// * `lval` - The lvalue description.
435    /// * `cast_type` - The destination type, this can differ on re-assignments.
436    /// * `use_offset` - The (optional) offset at which the value should be stored.
437    fn emit_push_lvalue(
438        &mut self,
439        lval: &LValue,
440        cast_type: &Type,
441        use_offset: Option<i16>,
442    ) -> InternalResult<(i16, Type)> {
443        // This emits instructions to set R6 to a pointer to the lvalue, the type
444        // of the lvalue is returned by the function into `var_type`.
445        let var_type = self.emit_set_register_to_lvalue_addr(Register::R6, lval)?;
446
447        // If the cast type is `void` we "deduce" the type to be the type of the lvalue.
448        let mut real_type = if matches!(cast_type.base_type, BaseType::Void) {
449            var_type.clone()
450        } else {
451            cast_type.clone()
452        };
453
454        // The effective type must match the type of the lvalue in size.
455        if real_type.get_size() != var_type.get_size() {
456            semantics_bail!(self.expr_num, "Cannot assign two types of different sizes");
457        }
458
459        // Makes enough space on the stack to hold the value.
460        let offset = match use_offset {
461            Some(off) => off,
462            None => self.push_stack(real_type.get_size())?,
463        };
464
465        // Lastly, handle the prefix, either reference (&), dereference (*), or nothing.
466        match lval.prefix {
467            None => self.emit_deref_register_to_stack(Register::R6, &real_type, offset),
468            Some(Prefix::DeReferencePrefix(_)) => {
469                semantics_bail!(self.expr_num, "Dereferencing is not currently supported");
470            }
471            Some(Prefix::ReferencePrefix(_)) => {
472                real_type.num_refs += 1;
473                self.instructions
474                    .push(Instruction::storex64(Register::R10, offset, Register::R6));
475            }
476        }
477
478        Ok((offset, real_type.clone()))
479    }
480
481    /// Emits instructions that push an rvalue to the stack. RValues in this language
482    /// are anything that occur on the right hand side of an assignment: immediates,
483    /// lvalues, function calls, etc.
484    ///
485    /// # Arguments
486    ///
487    /// * `rval` - The rvalue to be pushed to the stack.
488    /// * `cast_type` - The type of the value, this can be different when casting.
489    /// * `use_offset` - An optional offset to which the value is pushed.
490    fn emit_push_rvalue(
491        &mut self,
492        rval: &RValue,
493        cast_type: &Type,
494        use_offset: Option<i16>,
495    ) -> InternalResult<(i16, Type)> {
496        match rval {
497            RValue::Immediate(imm_str) => self.emit_push_immediate(imm_str, cast_type, use_offset),
498            RValue::LValue(lval) => self.emit_push_lvalue(lval, cast_type, use_offset),
499            RValue::FunctionCall(call) => {
500                if let BaseType::Integer(integer) = &cast_type.base_type {
501                    if integer.get_size() != 8 {
502                        semantics_bail!(
503                            self.expr_num,
504                            "Function return values can only be stored in 64-bit types"
505                        );
506                    }
507
508                    self.emit_call(call)?;
509                    let offset = self.emit_push_register(Register::R0, use_offset)?;
510                    Ok((offset, cast_type.clone()))
511                } else {
512                    semantics_bail!(
513                        self.expr_num,
514                        "Function return values can only be stored in integer types"
515                    );
516                }
517            }
518        }
519    }
520
521    /// Returns the offset and type from a structure and field name.
522    ///
523    /// # Arguments
524    ///
525    /// * `structure` - The structure to access.
526    /// * `field_name` - The field within the structure.
527    fn get_field_access(
528        &mut self,
529        structure: &Type,
530        field_name: &str,
531    ) -> InternalResult<(u32, Type)> {
532        let structure = if let BaseType::Struct(structure) = &structure.base_type {
533            structure
534        } else {
535            semantics_bail!(self.expr_num, "Can't field-deref a non-structure type");
536        };
537
538        let field = structure.fields.get(field_name).context(
539            self.expr_num,
540            &format!("Field \"{}\" doesn't exist on type", field_name),
541        )?;
542
543        if field.offset % 8 != 0 {
544            semantics_bail!(self.expr_num, "Bit-field accesses not supported");
545        }
546
547        let field_type = self
548            .types
549            .get_type_by_id(field.type_id)
550            .context(self.expr_num, "Internal error; type id invalid")?;
551        Ok((field.offset / 8, field_type.clone()))
552    }
553
554    /// Returns the offset and type given an array and index.
555    ///
556    /// # Arguments
557    ///
558    /// * `array` - The array to access.
559    /// * `index` - The index into the array.
560    fn get_array_index(&mut self, array: &Type, index: &str) -> InternalResult<(u32, Type)> {
561        let array = if let BaseType::Array(array) = &array.base_type {
562            array
563        } else {
564            semantics_bail!(self.expr_num, "Can't array-deref a non-array type");
565        };
566
567        let index = self.parse_immediate::<u32>(index)?;
568        if index > array.num_elements {
569            semantics_bail!(
570                self.expr_num,
571                "Out-of-bounds array access {}/{}",
572                index,
573                array.num_elements
574            );
575        }
576
577        let element_type = self
578            .types
579            .get_type_by_id(array.element_type_id)
580            .context(self.expr_num, "Internal error; type id invalid")?;
581
582        let offset = element_type.get_size() * index;
583        Ok((offset, element_type.clone()))
584    }
585
586    /// Given a type and deref slice, returns the offset of the deref and its type.
587    ///
588    /// # Arguments
589    ///
590    /// * `ty` - The type being dereferenced.
591    /// * `derefs` - The list of derefs to apply to the type.
592    fn get_deref_offset(
593        &mut self,
594        ty: &Type,
595        derefs: &[DeReference],
596    ) -> InternalResult<(i16, Type)> {
597        let mut offset = 0;
598        let mut cur_type = ty.clone();
599        for deref in derefs.iter() {
600            if cur_type.is_pointer() {
601                semantics_bail!(
602                    self.expr_num,
603                    "Can't deref an offset through an indirection"
604                );
605            }
606
607            let (off, ty) = match deref {
608                DeReference::FieldAccess(ma) => self.get_field_access(&cur_type, &ma.name)?,
609                DeReference::ArrayIndex(ai) => self.get_array_index(&cur_type, &ai.element)?,
610            };
611
612            offset += off;
613            cur_type = ty;
614        }
615
616        let offset: i16 = offset
617            .try_into()
618            .context(self.expr_num, "Type is too large to deref")?;
619        Ok((offset, cur_type))
620    }
621
622    /// Emit instructions for an assignment expression.
623    ///
624    /// # Arguments
625    ///
626    /// * `assign` - Information about the assignment.
627    fn emit_assign(&mut self, assign: &Assignment) -> InternalResult<()> {
628        let mut new_variable = true;
629        let (cast_type, use_offset) =
630            if let Ok(info) = &self.get_variable_by_name(&assign.left.name) {
631                if assign.type_name.is_some() {
632                    semantics_bail!(
633                        self.expr_num,
634                        "Can't re-type \"{}\" after first assignment",
635                        assign.left.name
636                    );
637                } else if let VariableLocation::Stack(off) = info.location {
638                    let (rel_off, offset_type) =
639                        self.get_deref_offset(&info.var_type, &assign.left.derefs)?;
640                    new_variable = false;
641                    (offset_type, Some(off + rel_off))
642                } else {
643                    semantics_bail!(
644                        self.expr_num,
645                        "Variable \"{}\" cannot be re-assigned",
646                        assign.left.name
647                    );
648                }
649            } else if let Some(type_name) = &assign.type_name {
650                let assign_type = self.type_from_decl(type_name)?;
651                (assign_type, None)
652            } else {
653                (Default::default(), None)
654            };
655
656        let (offset, new_type) = self.emit_push_rvalue(&assign.right, &cast_type, use_offset)?;
657
658        if new_variable {
659            self.variables.insert(
660                assign.left.name.clone(),
661                VariableInfo {
662                    var_type: new_type,
663                    location: VariableLocation::Stack(offset),
664                },
665            );
666        }
667
668        Ok(())
669    }
670
671    /// From an address held in a register and a structure type, emits instructions that set
672    /// the register value to the address of the field being accessed.
673    ///
674    /// # Arguments
675    ///
676    /// * `reg` - The register holding the address of the structure.
677    /// * `structure` - The structure's type.
678    /// * `field_access` - Information about the field being accessed.
679    fn emit_field_access(
680        &mut self,
681        reg: Register,
682        structure: &Type,
683        field_access: &FieldAccess,
684    ) -> InternalResult<Type> {
685        let (offset, field_type) = self.get_field_access(structure, &field_access.name)?;
686        if offset > 0 {
687            self.instructions
688                .push(Instruction::add64(reg, offset as i32));
689        }
690        Ok(field_type)
691    }
692
693    /// From an address held in a register and an array type, emits instructions that set
694    /// the register value to the address of the element being accessed.
695    ///
696    /// # Arguments
697    ///
698    /// * `reg` - The register holding the address of the array.
699    /// * `array` - The array's type.
700    /// * `index` - Information about the index being accessed.
701    fn emit_index_array(
702        &mut self,
703        reg: Register,
704        array: &Type,
705        index: &ArrayIndex,
706    ) -> InternalResult<Type> {
707        let (offset, element_type) = self.get_array_index(array, &index.element)?;
708        if offset > 0 {
709            self.instructions
710                .push(Instruction::add64(reg, offset as i32));
711        }
712        Ok(element_type)
713    }
714
715    /// Given a register holding a `var_type` address, and a list of derefs, emits instructions
716    /// that apply these derefs to the register. After the instructions are executed, `reg` will
717    /// hold the address to the deref.
718    ///
719    /// # Arguments
720    ///
721    /// * `reg` - The register holding the address to be dereferenced.
722    /// * `var_type` - The type of variable being pointed to by `reg`.
723    /// * `derefs` - A list of derefs to apply.
724    fn emit_apply_derefs_to_reg(
725        &mut self,
726        reg: Register,
727        var_type: &Type,
728        derefs: &[DeReference],
729    ) -> InternalResult<Type> {
730        if derefs.is_empty() {
731            return Ok(var_type.clone());
732        }
733
734        // If the current var_type is a pointer then this deref is through a pointer.
735        // Before emiting instructions to access the structure or field, the address
736        // needs to be loaded into the register.
737        if var_type.is_pointer() {
738            self.instructions.push(Instruction::loadx64(reg, reg, 0));
739        }
740
741        let next_type = match &derefs[0] {
742            DeReference::FieldAccess(ma) => self.emit_field_access(reg, var_type, ma)?,
743            DeReference::ArrayIndex(ai) => self.emit_index_array(reg, var_type, ai)?,
744        };
745
746        self.emit_apply_derefs_to_reg(reg, &next_type, &derefs[1..])
747    }
748
749    /// Given a register and lvalue information, emits instructions that set the
750    /// register to the address of the lvalue being accessed. On success, the final
751    /// type of the lval access is returned and `reg` will contain the address pointing
752    /// to this type.
753    ///
754    /// # Arguments
755    ///
756    /// * `reg` - The register to be set.
757    /// * `lval` - The lvalue information.
758    fn emit_set_register_to_lvalue_addr(
759        &mut self,
760        reg: Register,
761        lval: &LValue,
762    ) -> InternalResult<Type> {
763        let info = self.get_variable_by_name(&lval.name)?;
764
765        match info.location {
766            VariableLocation::SpecialImmediate(_) => {
767                semantics_bail!(
768                    self.expr_num,
769                    "Variable \"{}\" is a capture; captures can't be assigned to",
770                    lval.name
771                );
772            }
773            VariableLocation::Stack(o) => {
774                self.instructions
775                    .push(Instruction::movx64(reg, Register::R10));
776                self.instructions.push(Instruction::add64(reg, o.into()));
777            }
778        }
779
780        self.emit_apply_derefs_to_reg(reg, &info.var_type, &lval.derefs)
781    }
782
783    /// Given a register and lvalue information, emits instructions that set the
784    /// register to the value of this lvalue access. This is different from
785    /// `emit_set_register_to_lvalue_addr` in that the register receives the final
786    /// dereferenced type, _not_ an address pointing to it.
787    ///
788    /// # Arguments
789    ///
790    /// * `reg` - The register that receives the value.
791    /// * `lval` - The lvalue information.
792    /// * `load_type` - The BPF load type to use when setting the register value.
793    fn emit_set_register_from_lvalue(
794        &mut self,
795        reg: Register,
796        lval: &LValue,
797        load_type: Option<MemoryOpLoadType>,
798    ) -> InternalResult<()> {
799        let info = self.get_variable_by_name(&lval.name)?;
800        if let VariableLocation::SpecialImmediate(v) = info.location {
801            if !lval.derefs.is_empty() {
802                semantics_bail!(
803                    self.expr_num,
804                    "Can't dereference \"{}\"; it's a capture",
805                    lval.name
806                );
807            }
808
809            let load_type = load_type.unwrap_or(MemoryOpLoadType::Void);
810            self.instructions
811                .push(Instruction::loadtype(reg, v.into(), load_type));
812            return Ok(());
813        }
814
815        let var_type = self.emit_set_register_to_lvalue_addr(reg, lval)?;
816
817        /*
818         * the register is already holding a pointer to the lvalue so, if a reference
819         * was specified, nothing else needs to be done.
820         */
821        if matches!(lval.prefix, Some(Prefix::ReferencePrefix(_))) {
822            return Ok(());
823        }
824
825        /*
826         * register is pointing to a value of type `var_type`, load it into the register,
827         * if it fits.
828         */
829        match var_type.get_size() {
830            1 => self.instructions.push(Instruction::loadx8(reg, reg, 0)),
831            2 => self.instructions.push(Instruction::loadx16(reg, reg, 0)),
832            4 => self.instructions.push(Instruction::loadx32(reg, reg, 0)),
833            8 => self.instructions.push(Instruction::loadx64(reg, reg, 0)),
834            size => {
835                semantics_bail!(
836                    self.expr_num,
837                    "The variable \"{}\" is {} bytes and is too large to be passed in a register",
838                    lval.name,
839                    size
840                );
841            }
842        }
843
844        /*
845         * the register is now holding `var_type`. if another dereference was requested
846         * then make sure the type being held by the register is a pointer.
847         */
848        if matches!(lval.prefix, Some(Prefix::DeReferencePrefix(_))) {
849            if !var_type.is_pointer() {
850                semantics_bail!(self.expr_num, "Cannot dereference a non-pointer type");
851            }
852
853            self.instructions.push(Instruction::loadx64(reg, reg, 0));
854        }
855
856        Ok(())
857    }
858
859    /// Given a register and rvalue information, emits instructions that set the
860    /// register to the value of this lvalue access. This can either be an lvalue,
861    /// in which case `emit_set_register_from_lvalue` is called, an immediate, or
862    /// a function call.
863    ///
864    /// # Arguments
865    ///
866    /// * `reg` - The register that receives the value.
867    /// * `lval` - The lvalue information.
868    /// * `load_type` - The BPF load type to use when setting the register value.
869    fn emit_set_register_from_rvalue(
870        &mut self,
871        reg: Register,
872        rval: &RValue,
873        load_type: Option<MemoryOpLoadType>,
874    ) -> InternalResult<()> {
875        match rval {
876            RValue::Immediate(imm_str) => {
877                if let Some(load_type) = load_type {
878                    let imm = self.parse_immediate(imm_str)?;
879                    self.instructions
880                        .push(Instruction::loadtype(reg, imm, load_type));
881                } else {
882                    let imm = self.parse_immediate(imm_str)?;
883                    self.instructions.push(Instruction::mov64(reg, imm));
884                }
885            }
886            RValue::LValue(lval) => {
887                self.emit_set_register_from_lvalue(reg, lval, load_type)?;
888            }
889            RValue::FunctionCall(call) => {
890                self.emit_call(call)?;
891                if !matches!(reg, Register::R0) {
892                    self.instructions
893                        .push(Instruction::movx64(reg, Register::R0));
894                }
895            }
896        }
897
898        Ok(())
899    }
900
901    /// Emits instructions that perform a call.
902    ///
903    /// # Arguments
904    ///
905    /// * `call` - Information about the call.
906    fn emit_call(&mut self, call: &FunctionCall) -> InternalResult<()> {
907        let helper = match Helpers::from_string(&call.name) {
908            Some(helper) => helper,
909            None => {
910                semantics_bail!(self.expr_num, "Unknown function \"{}\"", call.name);
911            }
912        };
913
914        let types = helper.get_arg_types();
915
916        for (i, arg) in call.args.iter().enumerate() {
917            match i {
918                0 => self.emit_set_register_from_rvalue(Register::R1, arg, Some(types[i]))?,
919                1 => self.emit_set_register_from_rvalue(Register::R2, arg, Some(types[i]))?,
920                2 => self.emit_set_register_from_rvalue(Register::R3, arg, Some(types[i]))?,
921                3 => self.emit_set_register_from_rvalue(Register::R4, arg, Some(types[i]))?,
922                4 => self.emit_set_register_from_rvalue(Register::R5, arg, Some(types[i]))?,
923                _ => {
924                    semantics_bail!(self.expr_num, "Function call exceeds 5 arguments");
925                }
926            };
927        }
928        self.instructions.push(Instruction::call(helper as u32));
929
930        Ok(())
931    }
932
933    /// Emits instructions that perform a return.
934    ///
935    /// # Arguments
936    ///
937    /// * `ret` - Information about the return.
938    fn emit_return(&mut self, ret: &Return) -> InternalResult<()> {
939        match &ret.value {
940            None => {
941                self.instructions.push(Instruction::mov64(Register::R0, 0));
942                self.instructions.push(Instruction::exit());
943            }
944            Some(value) => {
945                self.emit_set_register_from_rvalue(Register::R0, value, None)?;
946                self.instructions.push(Instruction::exit());
947            }
948        }
949
950        Ok(())
951    }
952
953    /// Emits instructions that setup the function. Pushes arguments to the
954    /// stack, sets their types, etc.
955    ///
956    /// # Arguments
957    ///
958    /// * `input` - Information about the function's input.
959    fn emit_prologue(&mut self, input: &InputLine) -> InternalResult<()> {
960        /*
961         * BPF limits the number of function arguments to 5 (R1 to R5).
962         */
963        if input.args.len() > 5 {
964            semantics_bail!(self.expr_num, "Function exceeds 5 arguments");
965        }
966
967        /*
968         * Push all input arguments to the stack and create variables entries for them.
969         */
970        for (i, arg) in input.args.iter().enumerate() {
971            let register = Register::from_num((i + 1) as u8).expect("too many args");
972            let arg_type = self.type_from_decl(&arg.type_name)?;
973            let offset = self.emit_push_register(register, None)?;
974            self.variables.insert(
975                arg.name.clone(),
976                VariableInfo {
977                    var_type: arg_type,
978                    location: VariableLocation::Stack(offset),
979                },
980            );
981        }
982
983        Ok(())
984    }
985
986    /// Emits instructions for the list of expressions given.
987    ///
988    /// # Arguments
989    ///
990    /// * `exprs` - The expressions in the body.
991    fn emit_body(&mut self, exprs: &[Expression]) -> InternalResult<()> {
992        for expr in exprs {
993            self.expr_num += 1;
994
995            match expr {
996                Expression::Assignment(assign) => {
997                    self.emit_assign(assign)?;
998                }
999                Expression::FunctionCall(call) => {
1000                    self.emit_call(call)?;
1001                }
1002                Expression::Return(ret) => {
1003                    self.emit_return(ret)?;
1004                }
1005            }
1006        }
1007
1008        /*
1009         * Programs implicitly return 0 when no return statement is specified.
1010         */
1011        let last = exprs.last();
1012        if matches!(last, None) || !matches!(last, Some(Expression::Return(_))) {
1013            self.emit_return(&Return { value: None })?;
1014        }
1015
1016        Ok(())
1017    }
1018
1019    /// Compile a given script.
1020    ///
1021    /// # Arguments
1022    ///
1023    /// * `script_text` - The script to compile, as a string.
1024    ///
1025    /// # Example
1026    /// ```
1027    /// use bpf_script::compiler::Compiler;
1028    /// use bpf_script::types::TypeDatabase;
1029    ///
1030    /// let mut database = TypeDatabase::default();
1031    /// database.add_integer(Some("u32"), 4, false);
1032    /// let mut compiler = Compiler::create(&database);
1033    /// compiler.compile(r#"
1034    ///     fn(a: u32)
1035    ///         return a
1036    /// "#).expect("Failed to compile.");
1037    /// ```
1038    pub fn compile(&mut self, script_text: &str) -> InternalResult<()> {
1039        let ast = ScriptDef::parse(script_text)?;
1040        self.emit_prologue(&ast.input)?;
1041        self.emit_body(&ast.exprs)?;
1042
1043        self.instructions = optimize(&self.instructions);
1044
1045        Ok(())
1046    }
1047
1048    /// Returns the internally held instructions after `compile` has been called.
1049    ///
1050    /// # Example
1051    /// ```
1052    /// use bpf_script::compiler::Compiler;
1053    /// use bpf_script::types::TypeDatabase;
1054    ///
1055    /// let mut database = TypeDatabase::default();
1056    /// database.add_integer(Some("u32"), 4, false);
1057    /// let mut compiler = Compiler::create(&database);
1058    /// compiler.compile(r#"
1059    ///     fn(a: u32)
1060    ///         return a
1061    /// "#).expect("Failed to compile.");
1062    /// for ins in compiler.get_instructions() {
1063    ///     println!("{}", ins);
1064    /// }
1065    /// ```
1066    pub fn get_instructions(&self) -> &[Instruction] {
1067        &self.instructions
1068    }
1069
1070    /// Returns the bytecode of a program after `compile` has been called. These
1071    /// are the raw instructions that make up a BPF program that can be passed
1072    /// directly to the kernel.
1073    ///
1074    /// # Example
1075    /// ```
1076    /// use bpf_script::compiler::Compiler;
1077    /// use bpf_script::types::TypeDatabase;
1078    ///
1079    /// let mut database = TypeDatabase::default();
1080    /// database.add_integer(Some("u32"), 4, false);
1081    /// let mut compiler = Compiler::create(&database);
1082    /// compiler.compile(r#"
1083    ///     fn(a: u32)
1084    ///         return a
1085    /// "#).expect("Failed to compile.");
1086    /// for ins in compiler.get_bytecode() {
1087    ///     println!("{}", ins);
1088    /// }
1089    /// ```
1090    pub fn get_bytecode(&self) -> Vec<u64> {
1091        let mut bytecode = vec![];
1092        for instruction in &self.instructions {
1093            let (n, x) = instruction.encode();
1094            bytecode.push(n);
1095            if let Some(x) = x {
1096                bytecode.push(x);
1097            }
1098        }
1099
1100        bytecode
1101    }
1102}