Skip to main content

sbpf_assembler/
lib.rs

1use {anyhow::Result, codespan::Files};
2
3// Parser
4pub mod parser;
5
6// Error handling and diagnostics
7pub mod errors;
8pub mod macros;
9
10// Intermediate Representation
11pub mod ast;
12pub mod astnode;
13pub mod dynsym;
14
15// ELF header, program, section
16pub mod header;
17pub mod program;
18pub mod section;
19
20// Debug info
21pub mod debug;
22
23// WASM bindings
24#[cfg(target_arch = "wasm32")]
25pub mod wasm;
26
27pub use self::{
28    astnode::ASTNode,
29    debug::DebugData,
30    errors::CompileError,
31    parser::{ParseResult, Token, parse},
32    program::Program,
33};
34
35/// sBPF target architecture
36#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
37pub enum SbpfArch {
38    #[default]
39    V0,
40    V3,
41}
42
43impl SbpfArch {
44    pub fn is_v3(&self) -> bool {
45        matches!(self, SbpfArch::V3)
46    }
47
48    pub fn e_flags(&self) -> u32 {
49        match self {
50            SbpfArch::V0 => 0,
51            SbpfArch::V3 => 3,
52        }
53    }
54}
55
56/// Debug mode configuration for the assembler
57#[derive(Debug, Clone)]
58pub struct DebugMode {
59    /// Source filename for debug info
60    pub filename: String,
61    /// Source directory for debug info
62    pub directory: String,
63}
64
65/// Options for the assembler
66#[derive(Debug, Clone, Default)]
67pub struct AssemblerOption {
68    /// sBPF target architecture
69    pub arch: SbpfArch,
70    /// Optional debug mode configuration
71    pub debug_mode: Option<DebugMode>,
72}
73
74/// Assembler for SBPF assembly code
75#[derive(Debug, Clone)]
76pub struct Assembler {
77    options: AssemblerOption,
78}
79
80impl Assembler {
81    /// Create a new Assembler with the given options
82    pub fn new(options: AssemblerOption) -> Self {
83        Self { options }
84    }
85
86    pub fn assemble(&self, source: &str) -> Result<Vec<u8>, Vec<CompileError>> {
87        let parse_result = match parse(source, self.options.arch) {
88            Ok(result) => result,
89            Err(errors) => {
90                return Err(errors);
91            }
92        };
93
94        // Build debug data if debug mode is enabled
95        let debug_data = if let Some(ref debug_mode) = self.options.debug_mode {
96            let (lines, labels) = collect_line_and_label_entries(source, &parse_result);
97            let code_end = parse_result.code_section.get_size();
98
99            Some(DebugData {
100                filename: debug_mode.filename.clone(),
101                directory: debug_mode.directory.clone(),
102                lines,
103                labels,
104                code_start: 0,
105                code_end,
106            })
107        } else {
108            None
109        };
110
111        let program = Program::from_parse_result(parse_result, debug_data);
112        let bytecode = program.emit_bytecode();
113        Ok(bytecode)
114    }
115}
116
117type LineEntry = (u64, u32); // (offset, line)
118type LabelEntry = (String, u64, u32); // (label, offset, line)
119
120/// Helper function to collect line and label entries
121fn collect_line_and_label_entries(
122    source: &str,
123    parse_result: &ParseResult,
124) -> (Vec<LineEntry>, Vec<LabelEntry>) {
125    let mut files: Files<&str> = Files::new();
126    let file_id = files.add("source", source);
127
128    let mut line_entries = Vec::new();
129    let mut label_entries = Vec::new();
130
131    for node in parse_result.code_section.get_nodes() {
132        match node {
133            ASTNode::Instruction {
134                instruction,
135                offset,
136            } => {
137                let line_index = files.line_index(file_id, instruction.span.start as u32);
138                let line_number = (line_index.to_usize() + 1) as u32;
139                line_entries.push((*offset, line_number));
140            }
141            ASTNode::Label { label, offset } => {
142                let line_index = files.line_index(file_id, label.span.start as u32);
143                let line_number = (line_index.to_usize() + 1) as u32;
144                label_entries.push((label.name.clone(), *offset, line_number));
145            }
146            _ => {}
147        }
148    }
149
150    for node in parse_result.data_section.get_nodes() {
151        if let ASTNode::ROData { rodata, offset } = node {
152            let line_index = files.line_index(file_id, rodata.span.start as u32);
153            let line_number = (line_index.to_usize() + 1) as u32;
154            label_entries.push((rodata.name.clone(), *offset, line_number));
155        }
156    }
157
158    (line_entries, label_entries)
159}
160
161#[cfg(test)]
162pub fn assemble(source: &str) -> Result<Vec<u8>, Vec<CompileError>> {
163    let options = AssemblerOption::default();
164    let assembler = Assembler::new(options);
165    assembler.assemble(source)
166}
167
168#[cfg(test)]
169pub fn assemble_with_debug_data(
170    source: &str,
171    filename: &str,
172    directory: &str,
173) -> Result<Vec<u8>, Vec<CompileError>> {
174    let options = AssemblerOption {
175        arch: SbpfArch::V0,
176        debug_mode: Some(DebugMode {
177            filename: filename.to_string(),
178            directory: directory.to_string(),
179        }),
180    };
181    let assembler = Assembler::new(options);
182    assembler.assemble(source)
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188
189    #[test]
190    fn test_assemble_success() {
191        let source = "exit";
192        let result = assemble(source);
193        assert!(result.is_ok());
194        let bytecode = result.unwrap();
195        assert!(!bytecode.is_empty());
196    }
197
198    #[test]
199    fn test_assemble_parse_error() {
200        let source = "invalid_xyz";
201        let result = assemble(source);
202        assert!(result.is_err());
203    }
204
205    #[test]
206    fn test_assemble_with_equ_directive() {
207        let source = r#"
208        .globl entrypoint
209        .equ MY_CONST, 42
210        entrypoint:
211            mov64 r1, MY_CONST
212            exit
213        "#;
214        let result = assemble(source);
215        assert!(result.is_ok());
216    }
217
218    #[test]
219    fn test_assemble_duplicate_label_error() {
220        let source = r#"
221        .globl entrypoint
222        entrypoint:
223            mov64 r1, 1
224        entrypoint:
225            exit
226        "#;
227        let result = assemble(source);
228        assert!(result.is_err());
229        let errors = result.unwrap_err();
230        assert!(!errors.is_empty());
231    }
232
233    #[test]
234    fn test_assemble_extern_directive() {
235        let source = r#"
236        .globl entrypoint
237        .extern my_extern_symbol
238        entrypoint:
239            exit
240        "#;
241        let result = assemble(source);
242        assert!(result.is_ok());
243    }
244
245    #[test]
246    fn test_assemble_rodata_section() {
247        let source = r#"
248        .globl entrypoint
249        .rodata
250        my_data: .ascii "hello"
251        .text
252        entrypoint:
253            exit
254        "#;
255        let result = assemble(source);
256        assert!(result.is_ok());
257    }
258
259    #[test]
260    fn test_assemble_rodata_byte() {
261        let source = r#"
262        .globl entrypoint
263        .rodata
264        my_byte: .byte 0x42
265        .text
266        entrypoint:
267            exit
268        "#;
269        let result = assemble(source);
270        assert!(result.is_ok());
271    }
272
273    #[test]
274    fn test_assemble_rodata_multiple_bytes() {
275        let source = r#"
276        .globl entrypoint
277        .rodata
278        my_bytes: .byte 0x01, 0x02, 0x03, 0x04
279        .text
280        entrypoint:
281            exit
282        "#;
283        let result = assemble(source);
284        assert!(result.is_ok());
285    }
286
287    #[test]
288    fn test_assemble_rodata_mixed() {
289        let source = r#"
290        .globl entrypoint
291        .rodata
292        data1: .byte 0x42
293        data2: .ascii "test"
294        .text
295        entrypoint:
296            exit
297        "#;
298        let result = assemble(source);
299        assert!(result.is_ok());
300    }
301
302    #[test]
303    fn test_assemble_jump_operations() {
304        let source = r#"
305        .globl entrypoint
306        entrypoint:
307            jeq r1, 0, +1
308            ja +2
309        target:
310            jne r1, r2, target
311            exit
312        "#;
313        let result = assemble(source);
314        assert!(result.is_ok());
315    }
316
317    #[test]
318    fn test_assemble_offset_expression() {
319        let source = r#"
320        .globl entrypoint
321        .equ BASE, 100
322        entrypoint:
323            mov64 r1, BASE+10
324            exit
325        "#;
326        let result = assemble(source);
327        assert!(result.is_ok());
328    }
329
330    #[test]
331    fn test_assemble_equ_expression() {
332        let source = r#"
333        .globl entrypoint
334        .equ BASE, 100
335        .equ OFFSET, 20
336        .equ COMPUTED, BASE
337        entrypoint:
338            mov64 r1, BASE
339            mov64 r2, OFFSET
340            mov64 r3, COMPUTED
341            exit
342        "#;
343        let result = assemble(source);
344        assert!(result.is_ok());
345    }
346
347    #[test]
348    fn test_assemble_with_debug_data() {
349        let source = r#".equ MSG_LEN, 14
350
351.globl entrypoint
352entrypoint:
353  lddw r1, message
354  mov64 r2, MSG_LEN
355  call sol_log_
356  exit
357.rodata
358  message: .ascii "Hello, Solana!"
359"#;
360        let result = assemble_with_debug_data(source, "hello_solana.s", "/tmp");
361        assert!(result.is_ok());
362        let bytecode = result.unwrap();
363
364        // Verify the ELF has all debug sections.
365        let bytecode_str = String::from_utf8_lossy(&bytecode);
366        assert!(
367            bytecode_str.contains(".debug_abbrev"),
368            "Missing .debug_abbrev section"
369        );
370        assert!(
371            bytecode_str.contains(".debug_info"),
372            "Missing .debug_info section"
373        );
374        assert!(
375            bytecode_str.contains(".debug_line"),
376            "Missing .debug_line section"
377        );
378        assert!(
379            bytecode_str.contains(".debug_line_str"),
380            "Missing .debug_line_str section"
381        );
382    }
383}