sbpf_assembler/
astnode.rs

1use crate::debuginfo::{DebugInfo, RegisterHint, RegisterType};
2use crate::errors::CompileError;
3use crate::instruction::Instruction;
4use crate::lexer::{ImmediateValue, Token};
5use sbpf_common::opcode::Opcode;
6use std::collections::HashMap;
7use std::ops::Range;
8
9#[derive(Debug, Clone)]
10pub enum ASTNode {
11    // only present in the AST
12    Directive {
13        directive: Directive,
14    },
15    GlobalDecl {
16        global_decl: GlobalDecl,
17    },
18    EquDecl {
19        equ_decl: EquDecl,
20    },
21    ExternDecl {
22        extern_decl: ExternDecl,
23    },
24    RodataDecl {
25        rodata_decl: RodataDecl,
26    },
27    Label {
28        label: Label,
29        offset: u64,
30    },
31    // present in the bytecode
32    ROData {
33        rodata: ROData,
34        offset: u64,
35    },
36    Instruction {
37        instruction: Instruction,
38        offset: u64,
39    },
40}
41
42#[derive(Debug, Clone)]
43pub struct Directive {
44    pub name: String,
45    pub args: Vec<Token>,
46    pub span: Range<usize>,
47}
48
49#[derive(Debug, Clone)]
50pub struct GlobalDecl {
51    pub entry_label: String,
52    pub span: Range<usize>,
53}
54
55impl GlobalDecl {
56    pub fn get_entry_label(&self) -> String {
57        self.entry_label.clone()
58    }
59}
60
61#[derive(Debug, Clone)]
62pub struct EquDecl {
63    pub name: String,
64    pub value: Token,
65    pub span: Range<usize>,
66}
67
68impl EquDecl {
69    pub fn get_name(&self) -> String {
70        self.name.clone()
71    }
72    pub fn get_val(&self) -> ImmediateValue {
73        match &self.value {
74            Token::ImmediateValue(val, _) => val.clone(),
75            _ => panic!("Invalid Equ declaration"),
76        }
77    }
78}
79
80#[derive(Debug, Clone)]
81pub struct ExternDecl {
82    pub args: Vec<Token>,
83    pub span: Range<usize>,
84}
85
86#[derive(Debug, Clone)]
87pub struct RodataDecl {
88    pub span: Range<usize>,
89}
90
91#[derive(Debug, Clone)]
92pub struct Label {
93    pub name: String,
94    pub span: Range<usize>,
95}
96
97#[derive(Debug, Clone)]
98pub struct ROData {
99    pub name: String,
100    pub args: Vec<Token>,
101    pub span: Range<usize>,
102}
103
104impl ROData {
105    /// Validates that an immediate value is within the specified range
106    fn validate_immediate_range(
107        value: &ImmediateValue,
108        min: i64,
109        max: i64,
110        span: Range<usize>,
111    ) -> Result<(), CompileError> {
112        match value {
113            ImmediateValue::Int(val) => {
114                if *val < min || *val > max {
115                    return Err(CompileError::OutOfRangeLiteral {
116                        span,
117                        custom_label: None,
118                    });
119                }
120            }
121            ImmediateValue::Addr(val) => {
122                if *val < min || *val > max {
123                    return Err(CompileError::OutOfRangeLiteral {
124                        span,
125                        custom_label: None,
126                    });
127                }
128            }
129        }
130        Ok(())
131    }
132
133    pub fn get_size(&self) -> u64 {
134        let size: u64;
135        match (&self.args[0], &self.args[1]) {
136            (Token::Directive(_, _), Token::StringLiteral(s, _)) => {
137                size = s.len() as u64;
138            }
139            (Token::Directive(directive, _), Token::VectorLiteral(values, _)) => {
140                match directive.as_str() {
141                    "byte" => {
142                        size = values.len() as u64;
143                    }
144                    "short" => {
145                        size = values.len() as u64 * 2;
146                    }
147                    "int" | "long" => {
148                        size = values.len() as u64 * 4;
149                    }
150                    "quad" => {
151                        size = values.len() as u64 * 8;
152                    }
153                    _ => panic!("Invalid ROData declaration"),
154                }
155            }
156            _ => panic!("Invalid ROData declaration"),
157        }
158        size
159    }
160    pub fn verify(&self) -> Result<(), CompileError> {
161        match (&self.args[0], &self.args[1]) {
162            (Token::Directive(directive, directive_span), Token::StringLiteral(_, _)) => {
163                if directive.as_str() != "ascii" {
164                    return Err(CompileError::InvalidRODataDirective {
165                        span: directive_span.clone(),
166                        custom_label: None,
167                    });
168                }
169            }
170            (
171                Token::Directive(directive, directive_span),
172                Token::VectorLiteral(values, vector_literal_span),
173            ) => match directive.as_str() {
174                "byte" => {
175                    for value in values {
176                        Self::validate_immediate_range(
177                            value,
178                            i8::MIN as i64,
179                            i8::MAX as i64,
180                            vector_literal_span.clone(),
181                        )?;
182                    }
183                }
184                "short" => {
185                    for value in values {
186                        Self::validate_immediate_range(
187                            value,
188                            i16::MIN as i64,
189                            i16::MAX as i64,
190                            vector_literal_span.clone(),
191                        )?;
192                    }
193                }
194                "int" | "long" => {
195                    for value in values {
196                        Self::validate_immediate_range(
197                            value,
198                            i32::MIN as i64,
199                            i32::MAX as i64,
200                            vector_literal_span.clone(),
201                        )?;
202                    }
203                }
204                "quad" => {
205                    for value in values {
206                        Self::validate_immediate_range(
207                            value,
208                            i64::MIN,
209                            i64::MAX,
210                            vector_literal_span.clone(),
211                        )?;
212                    }
213                }
214                _ => {
215                    return Err(CompileError::InvalidRODataDirective {
216                        span: directive_span.clone(),
217                        custom_label: None,
218                    });
219                }
220            },
221            _ => {
222                return Err(CompileError::InvalidRodataDecl {
223                    span: self.span.clone(),
224                    custom_label: None,
225                });
226            }
227        }
228        Ok(())
229    }
230}
231
232impl ASTNode {
233    pub fn bytecode_with_debug_map(&self) -> Option<(Vec<u8>, HashMap<u64, DebugInfo>)> {
234        match self {
235            ASTNode::Instruction {
236                instruction:
237                    Instruction {
238                        opcode,
239                        operands,
240                        span,
241                    },
242                offset,
243            } => {
244                let mut bytes = Vec::new();
245                let mut debug_map = HashMap::new();
246                let mut debug_info = DebugInfo::new(span.clone());
247                bytes.push(opcode.to_bytecode()); // 1 byte opcode
248
249                if *opcode == Opcode::Call {
250                    bytes.extend_from_slice(&[0x10, 0x00, 0x00]);
251                    if let Some(Token::ImmediateValue(imm, _)) = operands.last() {
252                        let imm32 = match imm {
253                            ImmediateValue::Int(val) => *val as i32,
254                            ImmediateValue::Addr(val) => *val as i32,
255                        };
256                        bytes.extend_from_slice(&imm32.to_le_bytes());
257                    } else {
258                        // external calls
259                        bytes.extend_from_slice(&[0xFF, 0xFF, 0xFF, 0xFF]);
260                    }
261                } else if *opcode == Opcode::Lddw {
262                    if let [Token::Register(reg, _), Token::ImmediateValue(imm, _)] = &operands[..]
263                    {
264                        // 1 byte register number (strip 'r' prefix)
265                        bytes.push(*reg);
266
267                        // 2 bytes of zeros (offset/reserved)
268                        bytes.extend_from_slice(&[0, 0]);
269
270                        // 8 bytes immediate value in little-endian
271                        let imm64 = match imm {
272                            ImmediateValue::Int(val) => *val,
273                            ImmediateValue::Addr(val) => *val,
274                        };
275                        bytes.extend_from_slice(&imm64.to_le_bytes()[..4]);
276                        bytes.extend_from_slice(&[0, 0, 0, 0]);
277                        bytes.extend_from_slice(&imm64.to_le_bytes()[4..8]);
278                    }
279                } else {
280                    match &operands[..] {
281                        [Token::ImmediateValue(imm, _)] => {
282                            // 1 byte of zeros (no register)
283                            bytes.push(0);
284
285                            if *opcode == Opcode::Ja {
286                                // 2 bytes immediate value in little-endian for 'ja'
287                                let imm16 = match imm {
288                                    ImmediateValue::Int(val) => *val as i16,
289                                    ImmediateValue::Addr(val) => *val as i16,
290                                };
291                                bytes.extend_from_slice(&imm16.to_le_bytes());
292                            } else {
293                                // 4 bytes immediate value in little-endian
294                                let imm32 = match imm {
295                                    ImmediateValue::Int(val) => *val as i32,
296                                    ImmediateValue::Addr(val) => *val as i32,
297                                };
298                                bytes.extend_from_slice(&imm32.to_le_bytes());
299                            }
300                        }
301
302                        [Token::Register(reg, _)] => {
303                            if *opcode == Opcode::Callx {
304                                bytes.push(0);
305                                bytes.extend_from_slice(&[0, 0]);
306                                bytes.extend_from_slice(&[*reg, 0, 0, 0]);
307                            } else {
308                                bytes.push(*reg);
309                                bytes.extend_from_slice(&[0, 0, 0, 0, 0, 0]);
310                            }
311                        }
312
313                        [Token::Register(reg, _), Token::ImmediateValue(imm, _)] => {
314                            // 1 byte register number (strip 'r' prefix)
315                            bytes.push(*reg);
316
317                            // 2 bytes of zeros (offset/reserved)
318                            bytes.extend_from_slice(&[0, 0]);
319
320                            // 4 bytes immediate value in little-endian
321                            let imm32 = match imm {
322                                ImmediateValue::Int(val) => *val as i32,
323                                ImmediateValue::Addr(val) => {
324                                    debug_info.register_hint = RegisterHint {
325                                        register: *reg as usize,
326                                        register_type: RegisterType::Addr,
327                                    };
328                                    *val as i32
329                                }
330                            };
331                            bytes.extend_from_slice(&imm32.to_le_bytes());
332                        }
333
334                        [
335                            Token::Register(reg, _),
336                            Token::ImmediateValue(imm, _),
337                            Token::ImmediateValue(offset, _),
338                        ] => {
339                            // 1 byte register number (strip 'r' prefix)
340                            bytes.push(*reg);
341
342                            // 2 bytes of offset in little-endian
343                            let offset16 = match offset {
344                                ImmediateValue::Int(val) => *val as u16,
345                                ImmediateValue::Addr(val) => *val as u16,
346                            };
347                            bytes.extend_from_slice(&offset16.to_le_bytes());
348
349                            // 4 bytes immediate value in little-endianß
350                            let imm32 = match imm {
351                                ImmediateValue::Int(val) => *val as i32,
352                                ImmediateValue::Addr(val) => {
353                                    debug_info.register_hint = RegisterHint {
354                                        register: *reg as usize,
355                                        register_type: RegisterType::Addr,
356                                    };
357                                    *val as i32
358                                }
359                            };
360                            bytes.extend_from_slice(&imm32.to_le_bytes());
361                        }
362
363                        [Token::Register(dst, _), Token::Register(src, _)] => {
364                            // Convert register strings to numbers
365                            let dst_num = dst;
366                            let src_num = src;
367
368                            // Combine src and dst into a single byte (src in high nibble, dst in low nibble)
369                            let reg_byte = (src_num << 4) | dst_num;
370                            bytes.push(reg_byte);
371                        }
372                        [
373                            Token::Register(dst, _),
374                            Token::Register(reg, _),
375                            Token::ImmediateValue(offset, _),
376                        ] => {
377                            // Combine base register and destination register into a single byte
378                            let reg_byte = (reg << 4) | dst;
379                            bytes.push(reg_byte);
380
381                            // Add the offset as a 16-bit value in little-endian
382                            let offset16 = match offset {
383                                ImmediateValue::Int(val) => *val as u16,
384                                ImmediateValue::Addr(val) => *val as u16,
385                            };
386                            bytes.extend_from_slice(&offset16.to_le_bytes());
387                        }
388                        [
389                            Token::Register(reg, _),
390                            Token::ImmediateValue(offset, _),
391                            Token::Register(dst, _),
392                        ] => {
393                            // Combine base register and destination register into a single byte
394                            let reg_byte = (dst << 4) | reg;
395                            bytes.push(reg_byte);
396
397                            // Add the offset as a 16-bit value in little-endian
398                            let offset16 = match offset {
399                                ImmediateValue::Int(val) => *val as u16,
400                                ImmediateValue::Addr(val) => *val as u16,
401                            };
402                            bytes.extend_from_slice(&offset16.to_le_bytes());
403                        }
404
405                        _ => {}
406                    }
407                }
408
409                // Add padding to make it 8 or 16 bytes depending on opcode
410                let target_len = if *opcode == Opcode::Lddw { 16 } else { 8 };
411                while bytes.len() < target_len {
412                    bytes.push(0);
413                }
414
415                debug_map.insert(*offset, debug_info);
416
417                Some((bytes, debug_map))
418            }
419            ASTNode::ROData {
420                rodata: ROData { name: _, args, .. },
421                ..
422            } => {
423                let mut bytes = Vec::new();
424                let debug_map = HashMap::<u64, DebugInfo>::new();
425                match (&args[0], &args[1]) {
426                    (Token::Directive(_, _), Token::StringLiteral(str_literal, _)) => {
427                        let str_bytes = str_literal.as_bytes().to_vec();
428                        bytes.extend(str_bytes);
429                    }
430                    (Token::Directive(directive, _), Token::VectorLiteral(values, _)) => {
431                        if directive == "byte" {
432                            for value in values {
433                                let imm8 = match value {
434                                    ImmediateValue::Int(val) => *val as i8,
435                                    ImmediateValue::Addr(val) => *val as i8,
436                                };
437                                bytes.extend(imm8.to_le_bytes());
438                            }
439                        } else if directive == "short" {
440                            for value in values {
441                                let imm16 = match value {
442                                    ImmediateValue::Int(val) => *val as i16,
443                                    ImmediateValue::Addr(val) => *val as i16,
444                                };
445                                bytes.extend(imm16.to_le_bytes());
446                            }
447                        } else if directive == "int" || directive == "long" {
448                            for value in values {
449                                let imm32 = match value {
450                                    ImmediateValue::Int(val) => *val as i32,
451                                    ImmediateValue::Addr(val) => *val as i32,
452                                };
453                                bytes.extend(imm32.to_le_bytes());
454                            }
455                        } else if directive == "quad" {
456                            for value in values {
457                                let imm64 = match value {
458                                    ImmediateValue::Int(val) => *val,
459                                    ImmediateValue::Addr(val) => *val,
460                                };
461                                bytes.extend(imm64.to_le_bytes());
462                            }
463                        } else {
464                            panic!("Invalid ROData declaration");
465                        }
466                    }
467
468                    _ => panic!("Invalid ROData declaration"),
469                }
470                Some((bytes, debug_map))
471            }
472            _ => None,
473        }
474    }
475
476    // Keep the old bytecode method for backward compatibility
477    pub fn bytecode(&self) -> Option<Vec<u8>> {
478        self.bytecode_with_debug_map().map(|(bytes, _)| bytes)
479    }
480}