sbpf_assembler/
parser.rs

1use {
2    crate::{
3        ast::AST,
4        astnode::{ASTNode, ExternDecl, GlobalDecl, Label, ROData, RodataDecl},
5        dynsym::{DynamicSymbolMap, RelDynMap},
6        errors::CompileError,
7        section::{CodeSection, DataSection},
8    },
9    either::Either,
10    pest::{Parser, iterators::Pair},
11    pest_derive::Parser,
12    sbpf_common::{
13        inst_param::{Number, Register},
14        instruction::Instruction,
15        opcode::Opcode,
16    },
17    std::{collections::HashMap, str::FromStr},
18};
19#[derive(Parser)]
20#[grammar = "sbpf.pest"]
21pub struct SbpfParser;
22
23/// Context containing all mutable state during parsing
24struct ParseContext<'a> {
25    ast: &'a mut AST,
26    const_map: &'a mut HashMap<String, Number>,
27    label_spans: &'a mut HashMap<String, std::ops::Range<usize>>,
28    errors: Vec<CompileError>,
29    rodata_phase: bool,
30    text_offset: u64,
31    rodata_offset: u64,
32    missing_text_directive: bool,
33}
34
35/// BPF_X flag: Converts immediate variant opcodes to register variant opcodes
36const BPF_X: u8 = 0x08;
37
38/// Token types used in the AST
39#[derive(Debug, Clone)]
40pub enum Token {
41    Directive(String, std::ops::Range<usize>),
42    Identifier(String, std::ops::Range<usize>),
43    ImmediateValue(Number, std::ops::Range<usize>),
44    StringLiteral(String, std::ops::Range<usize>),
45    VectorLiteral(Vec<Number>, std::ops::Range<usize>),
46}
47
48pub struct ParseResult {
49    // TODO: parse result is basically 1. static part 2. dynamic part of the program
50    pub code_section: CodeSection,
51
52    pub data_section: DataSection,
53
54    pub dynamic_symbols: DynamicSymbolMap,
55
56    pub relocation_data: RelDynMap,
57
58    // TODO: this can be removed and dynamic-ness should just be
59    // determined by if there's any dynamic symbol
60    pub prog_is_static: bool,
61}
62
63pub fn parse(source: &str) -> Result<ParseResult, Vec<CompileError>> {
64    let pairs = SbpfParser::parse(Rule::program, source).map_err(|e| {
65        vec![CompileError::ParseError {
66            error: e.to_string(),
67            span: 0..source.len(),
68            custom_label: None,
69        }]
70    })?;
71
72    let mut ast = AST::new();
73    let mut const_map = HashMap::<String, Number>::new();
74    let mut label_spans = HashMap::<String, std::ops::Range<usize>>::new();
75
76    let (text_offset, rodata_offset, errors) = {
77        let mut ctx = ParseContext {
78            ast: &mut ast,
79            const_map: &mut const_map,
80            label_spans: &mut label_spans,
81            errors: Vec::new(),
82            rodata_phase: false,
83            text_offset: 0,
84            rodata_offset: 0,
85            missing_text_directive: false,
86        };
87
88        for pair in pairs {
89            if pair.as_rule() == Rule::program {
90                for statement in pair.into_inner() {
91                    if statement.as_rule() == Rule::EOI {
92                        continue;
93                    }
94
95                    process_statement(statement, &mut ctx);
96                }
97            }
98        }
99
100        (ctx.text_offset, ctx.rodata_offset, ctx.errors)
101    };
102
103    if !errors.is_empty() {
104        return Err(errors);
105    }
106
107    ast.set_text_size(text_offset);
108    ast.set_rodata_size(rodata_offset);
109
110    ast.build_program()
111}
112
113fn process_statement(pair: Pair<Rule>, ctx: &mut ParseContext) {
114    for inner in pair.into_inner() {
115        match inner.as_rule() {
116            Rule::label => {
117                let mut label_opt = None;
118                let mut directive_opt = None;
119                let mut instruction_opt = None;
120
121                for item in inner.into_inner() {
122                    match item.as_rule() {
123                        Rule::identifier | Rule::numeric_label => {
124                            match extract_label_from_pair(item) {
125                                Ok(label) => label_opt = Some(label),
126                                Err(e) => ctx.errors.push(e),
127                            }
128                        }
129                        Rule::directive_inner => {
130                            directive_opt = Some(item);
131                        }
132                        Rule::instruction => {
133                            instruction_opt = Some(item);
134                        }
135                        _ => {}
136                    }
137                }
138
139                if let Some((label_name, label_span)) = label_opt {
140                    // Check for duplicate labels
141                    if let Some(original_span) = ctx.label_spans.get(&label_name) {
142                        ctx.errors.push(CompileError::DuplicateLabel {
143                            label: label_name,
144                            span: label_span,
145                            original_span: original_span.clone(),
146                            custom_label: Some("Label already defined".to_string()),
147                        });
148                        continue;
149                    }
150                    ctx.label_spans
151                        .insert(label_name.clone(), label_span.clone());
152
153                    if ctx.rodata_phase {
154                        // Handle rodata label with directive
155                        if let Some(dir_pair) = directive_opt {
156                            match process_rodata_directive(
157                                label_name.clone(),
158                                label_span.clone(),
159                                dir_pair,
160                            ) {
161                                Ok(rodata) => {
162                                    let size = rodata.get_size();
163                                    ctx.ast.rodata_nodes.push(ASTNode::ROData {
164                                        rodata,
165                                        offset: ctx.rodata_offset,
166                                    });
167                                    ctx.rodata_offset += size;
168                                }
169                                Err(e) => ctx.errors.push(e),
170                            }
171                        } else if let Some(inst_pair) = instruction_opt {
172                            if let Err(e) = process_instruction(inst_pair, ctx.const_map) {
173                                ctx.errors.push(e);
174                            }
175                            if !ctx.missing_text_directive {
176                                ctx.missing_text_directive = true;
177                                ctx.errors.push(CompileError::MissingTextDirective {
178                                    span: label_span,
179                                    custom_label: None,
180                                });
181                            }
182                        }
183                    } else {
184                        ctx.ast.nodes.push(ASTNode::Label {
185                            label: Label {
186                                name: label_name,
187                                span: label_span,
188                            },
189                            offset: ctx.text_offset,
190                        });
191
192                        if let Some(inst_pair) = instruction_opt {
193                            match process_instruction(inst_pair, ctx.const_map) {
194                                Ok(instruction) => {
195                                    let size = instruction.get_size();
196                                    ctx.ast.nodes.push(ASTNode::Instruction {
197                                        instruction,
198                                        offset: ctx.text_offset,
199                                    });
200                                    ctx.text_offset += size;
201                                }
202                                Err(e) => ctx.errors.push(e),
203                            }
204                        }
205                    }
206                }
207            }
208            Rule::directive => {
209                process_directive_statement(inner, ctx);
210            }
211            Rule::instruction => {
212                let span = inner.as_span();
213                let span_range = span.start()..span.end();
214
215                match process_instruction(inner, ctx.const_map) {
216                    Ok(instruction) => {
217                        if !ctx.rodata_phase {
218                            let size = instruction.get_size();
219                            ctx.ast.nodes.push(ASTNode::Instruction {
220                                instruction,
221                                offset: ctx.text_offset,
222                            });
223                            ctx.text_offset += size;
224                        }
225                    }
226                    Err(e) => ctx.errors.push(e),
227                }
228
229                if ctx.rodata_phase && !ctx.missing_text_directive {
230                    ctx.missing_text_directive = true;
231                    ctx.errors.push(CompileError::MissingTextDirective {
232                        span: span_range,
233                        custom_label: None,
234                    });
235                }
236            }
237            _ => {}
238        }
239    }
240}
241
242fn extract_label_from_pair(
243    pair: Pair<Rule>,
244) -> Result<(String, std::ops::Range<usize>), CompileError> {
245    let span = pair.as_span();
246    Ok((pair.as_str().to_string(), span.start()..span.end()))
247}
248
249fn process_directive_statement(pair: Pair<Rule>, ctx: &mut ParseContext) {
250    for directive_inner_pair in pair.into_inner() {
251        process_directive_inner(directive_inner_pair, ctx);
252    }
253}
254
255fn process_directive_inner(pair: Pair<Rule>, ctx: &mut ParseContext) {
256    for inner in pair.into_inner() {
257        match inner.as_rule() {
258            Rule::directive_globl => {
259                let span = inner.as_span();
260                for globl_inner in inner.into_inner() {
261                    if globl_inner.as_rule() == Rule::globl_symbol {
262                        let entry_label = globl_inner.as_str().to_string();
263                        ctx.ast.entry_label = Some(entry_label.clone());
264                        ctx.ast.nodes.push(ASTNode::GlobalDecl {
265                            global_decl: GlobalDecl {
266                                entry_label,
267                                span: span.start()..span.end(),
268                            },
269                        });
270                    }
271                }
272            }
273            Rule::directive_extern => {
274                let span = inner.as_span();
275                let mut symbols = Vec::new();
276                for extern_inner in inner.into_inner() {
277                    if extern_inner.as_rule() == Rule::symbol {
278                        let symbol_span = extern_inner.as_span();
279                        symbols.push(Token::Identifier(
280                            extern_inner.as_str().to_string(),
281                            symbol_span.start()..symbol_span.end(),
282                        ));
283                    }
284                }
285                ctx.ast.nodes.push(ASTNode::ExternDecl {
286                    extern_decl: ExternDecl {
287                        args: symbols,
288                        span: span.start()..span.end(),
289                    },
290                });
291            }
292            Rule::directive_equ => {
293                let mut ident = None;
294                let mut value = None;
295
296                for equ_inner in inner.into_inner() {
297                    match equ_inner.as_rule() {
298                        Rule::identifier => {
299                            ident = Some(equ_inner.as_str().to_string());
300                        }
301                        Rule::expression => match eval_expression(equ_inner, ctx.const_map) {
302                            Ok(v) => value = Some(v),
303                            Err(e) => ctx.errors.push(e),
304                        },
305                        _ => {}
306                    }
307                }
308
309                if let (Some(name), Some(val)) = (ident, value) {
310                    ctx.const_map.insert(name, val);
311                }
312            }
313            Rule::directive_section => {
314                let section_name = inner.as_str().trim_start_matches('.');
315                match section_name {
316                    "text" => ctx.rodata_phase = false,
317                    "rodata" => {
318                        ctx.rodata_phase = true;
319                        let span = inner.as_span();
320                        ctx.ast.nodes.push(ASTNode::RodataDecl {
321                            rodata_decl: RodataDecl {
322                                span: span.start()..span.end(),
323                            },
324                        });
325                    }
326                    _ => {}
327                }
328            }
329            _ => {}
330        }
331    }
332}
333
334fn process_rodata_directive(
335    label_name: String,
336    label_span: std::ops::Range<usize>,
337    pair: Pair<Rule>,
338) -> Result<ROData, CompileError> {
339    let inner_pair = if pair.as_rule() == Rule::directive_inner {
340        pair
341    } else {
342        pair.into_inner()
343            .next()
344            .ok_or_else(|| CompileError::ParseError {
345                error: "No directive content found".to_string(),
346                span: label_span.clone(),
347                custom_label: None,
348            })?
349    };
350
351    for inner in inner_pair.into_inner() {
352        let directive_span = inner.as_span();
353
354        match inner.as_rule() {
355            Rule::directive_ascii => {
356                for ascii_inner in inner.into_inner() {
357                    if ascii_inner.as_rule() == Rule::string_literal {
358                        for content_inner in ascii_inner.into_inner() {
359                            if content_inner.as_rule() == Rule::string_content {
360                                let content = content_inner.as_str().to_string();
361                                let content_span = content_inner.as_span();
362                                return Ok(ROData {
363                                    name: label_name,
364                                    args: vec![
365                                        Token::Directive(
366                                            "ascii".to_string(),
367                                            directive_span.start()..directive_span.end(),
368                                        ),
369                                        Token::StringLiteral(
370                                            content,
371                                            content_span.start()..content_span.end(),
372                                        ),
373                                    ],
374                                    span: label_span,
375                                });
376                            }
377                        }
378                    }
379                }
380            }
381            Rule::directive_byte
382            | Rule::directive_short
383            | Rule::directive_word
384            | Rule::directive_int
385            | Rule::directive_long
386            | Rule::directive_quad => {
387                let directive_name = match inner.as_rule() {
388                    Rule::directive_byte => "byte",
389                    Rule::directive_short => "short",
390                    Rule::directive_word => "word",
391                    Rule::directive_int => "int",
392                    Rule::directive_long => "long",
393                    Rule::directive_quad => "quad",
394                    _ => "byte",
395                };
396
397                let mut values = Vec::new();
398                for byte_inner in inner.into_inner() {
399                    if byte_inner.as_rule() == Rule::number {
400                        values.push(parse_number(byte_inner)?);
401                    }
402                }
403
404                let values_span = directive_span.start()..directive_span.end();
405                return Ok(ROData {
406                    name: label_name,
407                    args: vec![
408                        Token::Directive(
409                            directive_name.to_string(),
410                            directive_span.start()..directive_span.end(),
411                        ),
412                        Token::VectorLiteral(values, values_span),
413                    ],
414                    span: label_span,
415                });
416            }
417            _ => {}
418        }
419    }
420
421    Err(CompileError::InvalidRodataDecl {
422        span: label_span,
423        custom_label: None,
424    })
425}
426
427fn process_instruction(
428    pair: Pair<Rule>,
429    const_map: &HashMap<String, Number>,
430) -> Result<Instruction, CompileError> {
431    let outer_span = pair.as_span();
432    let outer_span_range = outer_span.start()..outer_span.end();
433
434    for inner in pair.into_inner() {
435        let span = inner.as_span();
436        let span_range = span.start()..span.end();
437
438        match inner.as_rule() {
439            Rule::instr_exit => {
440                return Ok(Instruction {
441                    opcode: Opcode::Exit,
442                    dst: None,
443                    src: None,
444                    off: None,
445                    imm: None,
446                    span: span_range,
447                });
448            }
449            Rule::instr_lddw => return process_lddw(inner, const_map, span_range),
450            Rule::instr_call => return process_call(inner, const_map, span_range),
451            Rule::instr_callx => return process_callx(inner, span_range),
452            Rule::instr_neg32 => return process_neg32(inner, span_range),
453            Rule::instr_neg64 => return process_neg64(inner, span_range),
454            Rule::instr_alu64_imm | Rule::instr_alu32_imm => {
455                return process_alu_imm(inner, const_map, span_range);
456            }
457            Rule::instr_alu64_reg | Rule::instr_alu32_reg => {
458                return process_alu_reg(inner, span_range);
459            }
460            Rule::instr_load => return process_load(inner, const_map, span_range),
461            Rule::instr_store_imm => return process_store_imm(inner, const_map, span_range),
462            Rule::instr_store_reg => return process_store_reg(inner, const_map, span_range),
463            Rule::instr_jump_imm => return process_jump_imm(inner, const_map, span_range),
464            Rule::instr_jump_reg => return process_jump_reg(inner, span_range),
465            Rule::instr_jump_uncond => return process_jump_uncond(inner, const_map, span_range),
466            Rule::instr_endian => return process_endian(inner, span_range),
467            _ => {}
468        }
469    }
470
471    Err(CompileError::ParseError {
472        error: "Invalid instruction".to_string(),
473        span: outer_span_range,
474        custom_label: None,
475    })
476}
477
478fn process_lddw(
479    pair: Pair<Rule>,
480    const_map: &HashMap<String, Number>,
481    span: std::ops::Range<usize>,
482) -> Result<Instruction, CompileError> {
483    let mut dst = None;
484    let mut imm = None;
485
486    for inner in pair.into_inner() {
487        match inner.as_rule() {
488            Rule::register => dst = Some(parse_register(inner)?),
489            Rule::operand => imm = Some(parse_operand(inner, const_map)?),
490            _ => {}
491        }
492    }
493
494    Ok(Instruction {
495        opcode: Opcode::Lddw,
496        dst,
497        src: None,
498        off: None,
499        imm,
500        span,
501    })
502}
503
504fn process_load(
505    pair: Pair<Rule>,
506    const_map: &HashMap<String, Number>,
507    span: std::ops::Range<usize>,
508) -> Result<Instruction, CompileError> {
509    let mut opcode = None;
510    let mut dst = None;
511    let mut src = None;
512    let mut off = None;
513
514    for inner in pair.into_inner() {
515        match inner.as_rule() {
516            Rule::load_op => opcode = Opcode::from_str(inner.as_str()).ok(),
517            Rule::register => dst = Some(parse_register(inner)?),
518            Rule::memory_ref => {
519                let (s, o) = parse_memory_ref(inner, const_map)?;
520                src = Some(s);
521                off = Some(o);
522            }
523            _ => {}
524        }
525    }
526
527    Ok(Instruction {
528        opcode: opcode.unwrap_or(Opcode::Exit),
529        dst,
530        src,
531        off,
532        imm: None,
533        span,
534    })
535}
536
537fn process_store_imm(
538    pair: Pair<Rule>,
539    const_map: &HashMap<String, Number>,
540    span: std::ops::Range<usize>,
541) -> Result<Instruction, CompileError> {
542    let mut opcode = None;
543    let mut dst = None;
544    let mut off = None;
545    let mut imm = None;
546
547    for inner in pair.into_inner() {
548        match inner.as_rule() {
549            Rule::store_op => opcode = Opcode::from_str(inner.as_str()).ok(),
550            Rule::memory_ref => {
551                let (d, o) = parse_memory_ref(inner, const_map)?;
552                dst = Some(d);
553                off = Some(o);
554            }
555            Rule::operand => imm = Some(parse_operand(inner, const_map)?),
556            _ => {}
557        }
558    }
559
560    Ok(Instruction {
561        opcode: opcode.unwrap_or(Opcode::Exit),
562        dst,
563        src: None,
564        off,
565        imm,
566        span,
567    })
568}
569
570fn process_store_reg(
571    pair: Pair<Rule>,
572    const_map: &HashMap<String, Number>,
573    span: std::ops::Range<usize>,
574) -> Result<Instruction, CompileError> {
575    let mut opcode = None;
576    let mut dst = None;
577    let mut src = None;
578    let mut off = None;
579
580    for inner in pair.into_inner() {
581        match inner.as_rule() {
582            Rule::store_op => opcode = Opcode::from_str(inner.as_str()).ok(),
583            Rule::memory_ref => {
584                let (d, o) = parse_memory_ref(inner, const_map)?;
585                dst = Some(d);
586                off = Some(o);
587            }
588            Rule::register => src = Some(parse_register(inner)?),
589            _ => {}
590        }
591    }
592
593    Ok(Instruction {
594        opcode: opcode.unwrap_or(Opcode::Exit),
595        dst,
596        src,
597        off,
598        imm: None,
599        span,
600    })
601}
602
603fn process_alu_imm(
604    pair: Pair<Rule>,
605    const_map: &HashMap<String, Number>,
606    span: std::ops::Range<usize>,
607) -> Result<Instruction, CompileError> {
608    let mut opcode = None;
609    let mut dst = None;
610    let mut imm = None;
611
612    for inner in pair.into_inner() {
613        match inner.as_rule() {
614            Rule::alu_64_op | Rule::alu_32_op => opcode = Opcode::from_str(inner.as_str()).ok(),
615            Rule::register => dst = Some(parse_register(inner)?),
616            Rule::operand => imm = Some(parse_operand(inner, const_map)?),
617            _ => {}
618        }
619    }
620
621    Ok(Instruction {
622        opcode: opcode.unwrap_or(Opcode::Exit),
623        dst,
624        src: None,
625        off: None,
626        imm,
627        span,
628    })
629}
630
631fn process_alu_reg(
632    pair: Pair<Rule>,
633    span: std::ops::Range<usize>,
634) -> Result<Instruction, CompileError> {
635    let mut opcode = None;
636    let mut dst = None;
637    let mut src = None;
638
639    for inner in pair.into_inner() {
640        match inner.as_rule() {
641            Rule::alu_64_op | Rule::alu_32_op => {
642                let op_str = inner.as_str();
643                let inner_span = inner.as_span();
644                if let Ok(opc) = Opcode::from_str(op_str) {
645                    // Convert to register variant using BPF_X flag
646                    let reg_opcode = Into::<u8>::into(opc) | BPF_X;
647                    opcode =
648                        Some(
649                            reg_opcode
650                                .try_into()
651                                .map_err(|e| CompileError::BytecodeError {
652                                    error: format!("Invalid opcode 0x{:02x}: {}", reg_opcode, e),
653                                    span: inner_span.start()..inner_span.end(),
654                                    custom_label: None,
655                                })?,
656                        );
657                }
658            }
659            Rule::register => {
660                if dst.is_none() {
661                    dst = Some(parse_register(inner)?);
662                } else {
663                    src = Some(parse_register(inner)?);
664                }
665            }
666            _ => {}
667        }
668    }
669
670    Ok(Instruction {
671        opcode: opcode.unwrap_or(Opcode::Exit),
672        dst,
673        src,
674        off: None,
675        imm: None,
676        span,
677    })
678}
679
680fn process_jump_imm(
681    pair: Pair<Rule>,
682    const_map: &HashMap<String, Number>,
683    span: std::ops::Range<usize>,
684) -> Result<Instruction, CompileError> {
685    let mut opcode = None;
686    let mut dst = None;
687    let mut imm = None;
688    let mut off = None;
689
690    for inner in pair.into_inner() {
691        match inner.as_rule() {
692            Rule::jump_op => opcode = Opcode::from_str(inner.as_str()).ok(),
693            Rule::register => dst = Some(parse_register(inner)?),
694            Rule::operand => imm = Some(parse_operand(inner, const_map)?),
695            Rule::jump_target => off = Some(parse_jump_target(inner, const_map)?),
696            _ => {}
697        }
698    }
699
700    Ok(Instruction {
701        opcode: opcode.unwrap_or(Opcode::Exit),
702        dst,
703        src: None,
704        off,
705        imm,
706        span,
707    })
708}
709
710fn process_jump_reg(
711    pair: Pair<Rule>,
712    span: std::ops::Range<usize>,
713) -> Result<Instruction, CompileError> {
714    let mut opcode = None;
715    let mut dst = None;
716    let mut src = None;
717    let mut off = None;
718
719    for inner in pair.into_inner() {
720        match inner.as_rule() {
721            Rule::jump_op => {
722                let op_str = inner.as_str();
723                let inner_span = inner.as_span();
724                if let Ok(opc) = Opcode::from_str(op_str) {
725                    // Convert Imm variant to Reg variant using BPF_X flag
726                    let reg_opcode = Into::<u8>::into(opc) | BPF_X;
727                    opcode =
728                        Some(
729                            reg_opcode
730                                .try_into()
731                                .map_err(|e| CompileError::BytecodeError {
732                                    error: format!("Invalid opcode 0x{:02x}: {}", reg_opcode, e),
733                                    span: inner_span.start()..inner_span.end(),
734                                    custom_label: None,
735                                })?,
736                        );
737                }
738            }
739            Rule::register => {
740                if dst.is_none() {
741                    dst = Some(parse_register(inner)?);
742                } else {
743                    src = Some(parse_register(inner)?);
744                }
745            }
746            Rule::jump_target => off = Some(parse_jump_target(inner, &HashMap::new())?),
747            _ => {}
748        }
749    }
750
751    Ok(Instruction {
752        opcode: opcode.unwrap_or(Opcode::Exit),
753        dst,
754        src,
755        off,
756        imm: None,
757        span,
758    })
759}
760
761fn process_jump_uncond(
762    pair: Pair<Rule>,
763    const_map: &HashMap<String, Number>,
764    span: std::ops::Range<usize>,
765) -> Result<Instruction, CompileError> {
766    let mut off = None;
767
768    for inner in pair.into_inner() {
769        if inner.as_rule() == Rule::jump_target {
770            off = Some(parse_jump_target(inner, const_map)?);
771        }
772    }
773
774    Ok(Instruction {
775        opcode: Opcode::Ja,
776        dst: None,
777        src: None,
778        off,
779        imm: None,
780        span,
781    })
782}
783
784fn process_call(
785    pair: Pair<Rule>,
786    const_map: &HashMap<String, Number>,
787    span: std::ops::Range<usize>,
788) -> Result<Instruction, CompileError> {
789    let mut imm = None;
790
791    for inner in pair.into_inner() {
792        if inner.as_rule() == Rule::symbol {
793            if let Some(symbol) = const_map.get(inner.as_str()) {
794                imm = Some(Either::Right(symbol.to_owned()));
795            } else {
796                imm = Some(Either::Left(inner.as_str().to_string()));
797            }
798        }
799    }
800
801    Ok(Instruction {
802        opcode: Opcode::Call,
803        dst: None,
804        src: None,
805        off: None,
806        imm,
807        span,
808    })
809}
810
811fn process_callx(
812    pair: Pair<Rule>,
813    span: std::ops::Range<usize>,
814) -> Result<Instruction, CompileError> {
815    let mut dst = None;
816
817    for inner in pair.into_inner() {
818        if inner.as_rule() == Rule::register {
819            dst = Some(parse_register(inner)?);
820        }
821    }
822
823    Ok(Instruction {
824        opcode: Opcode::Callx,
825        dst,
826        src: None,
827        off: None,
828        imm: None,
829        span,
830    })
831}
832
833fn process_neg32(
834    pair: Pair<Rule>,
835    span: std::ops::Range<usize>,
836) -> Result<Instruction, CompileError> {
837    let mut dst = None;
838
839    for inner in pair.into_inner() {
840        if inner.as_rule() == Rule::register {
841            dst = Some(parse_register(inner)?);
842        }
843    }
844
845    Ok(Instruction {
846        opcode: Opcode::Neg32,
847        dst,
848        src: None,
849        off: None,
850        imm: None,
851        span,
852    })
853}
854
855fn process_neg64(
856    pair: Pair<Rule>,
857    span: std::ops::Range<usize>,
858) -> Result<Instruction, CompileError> {
859    let mut dst = None;
860
861    for inner in pair.into_inner() {
862        if inner.as_rule() == Rule::register {
863            dst = Some(parse_register(inner)?);
864        }
865    }
866
867    Ok(Instruction {
868        opcode: Opcode::Neg64,
869        dst,
870        src: None,
871        off: None,
872        imm: None,
873        span,
874    })
875}
876
877fn process_endian(
878    pair: Pair<Rule>,
879    span: std::ops::Range<usize>,
880) -> Result<Instruction, CompileError> {
881    let mut opcode = None;
882    let mut dst = None;
883    let mut imm = None;
884
885    for inner in pair.into_inner() {
886        match inner.as_rule() {
887            Rule::endian_op => {
888                let op_str = inner.as_str();
889                let inner_span = inner.as_span();
890                // Extract opcode and size from instruction (example: "be16" = be opcode, 16 bits)
891                let (opc, size) = if let Some(size_str) = op_str.strip_prefix("be") {
892                    let size = size_str
893                        .parse::<i64>()
894                        .map_err(|_| CompileError::ParseError {
895                            error: format!("Invalid endian size in '{}'", op_str),
896                            span: inner_span.start()..inner_span.end(),
897                            custom_label: None,
898                        })?;
899                    (Opcode::Be, size)
900                } else if let Some(size_str) = op_str.strip_prefix("le") {
901                    let size = size_str
902                        .parse::<i64>()
903                        .map_err(|_| CompileError::ParseError {
904                            error: format!("Invalid endian size in '{}'", op_str),
905                            span: inner_span.start()..inner_span.end(),
906                            custom_label: None,
907                        })?;
908                    (Opcode::Le, size)
909                } else {
910                    return Err(CompileError::ParseError {
911                        error: format!("Invalid endian operation '{}'", op_str),
912                        span: inner_span.start()..inner_span.end(),
913                        custom_label: None,
914                    });
915                };
916                opcode = Some(opc);
917                imm = Some(Either::Right(Number::Int(size)));
918            }
919            Rule::register => dst = Some(parse_register(inner)?),
920            _ => {}
921        }
922    }
923
924    Ok(Instruction {
925        opcode: opcode.unwrap_or(Opcode::Exit),
926        dst,
927        src: None,
928        off: None,
929        imm,
930        span,
931    })
932}
933
934fn parse_register(pair: Pair<Rule>) -> Result<Register, CompileError> {
935    let reg_str = pair.as_str();
936    let span = pair.as_span();
937
938    if let Ok(n) = reg_str[1..].parse::<u8>() {
939        Ok(Register { n })
940    } else {
941        Err(CompileError::InvalidRegister {
942            register: reg_str.to_string(),
943            span: span.start()..span.end(),
944            custom_label: None,
945        })
946    }
947}
948
949fn parse_operand(
950    pair: Pair<Rule>,
951    const_map: &HashMap<String, Number>,
952) -> Result<Either<String, Number>, CompileError> {
953    let span = pair.as_span();
954    let span_range = span.start()..span.end();
955
956    for inner in pair.into_inner() {
957        match inner.as_rule() {
958            Rule::number => return Ok(Either::Right(parse_number(inner)?)),
959            Rule::symbol => {
960                let name = inner.as_str().to_string();
961                if let Some(value) = const_map.get(&name) {
962                    return Ok(Either::Right(value.clone()));
963                }
964                return Ok(Either::Left(name));
965            }
966            Rule::operand_expr => {
967                let mut sym_name = None;
968                let mut num_value = None;
969
970                for expr_inner in inner.into_inner() {
971                    match expr_inner.as_rule() {
972                        Rule::symbol => sym_name = Some(expr_inner.as_str().to_string()),
973                        Rule::number => num_value = Some(parse_number(expr_inner)?),
974                        _ => {}
975                    }
976                }
977
978                if let (Some(sym), Some(num)) = (sym_name, num_value) {
979                    if let Some(base_value) = const_map.get(&sym) {
980                        let result = base_value.clone() + num;
981                        return Ok(Either::Right(result));
982                    } else {
983                        return Ok(Either::Left(sym));
984                    }
985                }
986            }
987            _ => {}
988        }
989    }
990
991    Err(CompileError::ParseError {
992        error: "Invalid operand".to_string(),
993        span: span_range,
994        custom_label: None,
995    })
996}
997
998fn parse_jump_target(
999    pair: Pair<Rule>,
1000    _const_map: &HashMap<String, Number>,
1001) -> Result<Either<String, i16>, CompileError> {
1002    let span = pair.as_span();
1003    let span_range = span.start()..span.end();
1004
1005    for inner in pair.into_inner() {
1006        match inner.as_rule() {
1007            Rule::symbol | Rule::numeric_label_ref => {
1008                return Ok(Either::Left(inner.as_str().to_string()));
1009            }
1010            Rule::number => {
1011                let num = parse_number(inner)?;
1012                return Ok(Either::Right(num.to_i16()));
1013            }
1014            _ => {}
1015        }
1016    }
1017
1018    Err(CompileError::ParseError {
1019        error: "Invalid jump target".to_string(),
1020        span: span_range,
1021        custom_label: None,
1022    })
1023}
1024
1025fn parse_memory_ref(
1026    pair: Pair<Rule>,
1027    const_map: &HashMap<String, Number>,
1028) -> Result<(Register, Either<String, i16>), CompileError> {
1029    let mut reg = None;
1030    let mut accumulated_offset: i16 = 0;
1031    let mut unresolved_symbol: Option<String> = None;
1032    let mut sign: i16 = 1;
1033
1034    for inner in pair.into_inner() {
1035        match inner.as_rule() {
1036            Rule::register => {
1037                reg = Some(parse_register(inner)?);
1038            }
1039            Rule::memory_op => {
1040                sign = if inner.as_str() == "+" { 1 } else { -1 };
1041            }
1042            Rule::memory_offset => {
1043                for offset_inner in inner.into_inner() {
1044                    match offset_inner.as_rule() {
1045                        Rule::number => {
1046                            let num = parse_number(offset_inner)?;
1047                            accumulated_offset =
1048                                accumulated_offset.wrapping_add(sign * num.to_i16());
1049                        }
1050                        Rule::symbol => {
1051                            let name = offset_inner.as_str().to_string();
1052                            if let Some(value) = const_map.get(&name) {
1053                                accumulated_offset =
1054                                    accumulated_offset.wrapping_add(sign * value.to_i16());
1055                            } else if unresolved_symbol.is_none() {
1056                                unresolved_symbol = Some(name);
1057                            }
1058                        }
1059                        _ => {}
1060                    }
1061                }
1062            }
1063            _ => {}
1064        }
1065    }
1066
1067    let offset = if let Some(sym) = unresolved_symbol {
1068        Either::Left(sym)
1069    } else {
1070        Either::Right(accumulated_offset)
1071    };
1072
1073    Ok((reg.unwrap_or(Register { n: 0 }), offset))
1074}
1075
1076fn parse_number(pair: Pair<Rule>) -> Result<Number, CompileError> {
1077    let span = pair.as_span();
1078    let span_range = span.start()..span.end();
1079    let number_str = pair.as_str().replace('_', "");
1080
1081    // Try parsing as i64 first
1082    if let Ok(value) = number_str.parse::<i64>() {
1083        return Ok(Number::Int(value));
1084    }
1085
1086    let mut sign: i64 = 1;
1087    let value = if number_str.starts_with('-') {
1088        sign = -1;
1089        number_str.strip_prefix('-').unwrap()
1090    } else {
1091        number_str.as_str()
1092    };
1093
1094    if value.starts_with("0x") {
1095        let hex_str = value.trim_start_matches("0x");
1096        if let Ok(value) = u64::from_str_radix(hex_str, 16) {
1097            return Ok(Number::Addr(sign * (value as i64)));
1098        }
1099    }
1100
1101    Err(CompileError::InvalidNumber {
1102        number: number_str,
1103        span: span_range,
1104        custom_label: None,
1105    })
1106}
1107
1108fn eval_expression(
1109    pair: Pair<Rule>,
1110    const_map: &HashMap<String, Number>,
1111) -> Result<Number, CompileError> {
1112    let span = pair.as_span();
1113    let span_range = span.start()..span.end();
1114
1115    let mut stack = Vec::new();
1116    let mut op_stack = Vec::new();
1117
1118    for inner in pair.into_inner() {
1119        match inner.as_rule() {
1120            Rule::term => {
1121                let val = eval_term(inner, const_map)?;
1122                stack.push(val);
1123            }
1124            Rule::bin_op => {
1125                op_stack.push(inner.as_str());
1126            }
1127            _ => {}
1128        }
1129    }
1130
1131    // Apply operators
1132    while let Some(op) = op_stack.pop() {
1133        if stack.len() >= 2 {
1134            let b = stack.pop().unwrap();
1135            let a = stack.pop().unwrap();
1136            let result = match op {
1137                "+" => a + b,
1138                "-" => a - b,
1139                "*" => a * b,
1140                "/" => a / b,
1141                _ => a,
1142            };
1143            stack.push(result);
1144        }
1145    }
1146
1147    stack.pop().ok_or_else(|| CompileError::ParseError {
1148        error: "Invalid expression".to_string(),
1149        span: span_range,
1150        custom_label: None,
1151    })
1152}
1153
1154fn eval_term(
1155    pair: Pair<Rule>,
1156    const_map: &HashMap<String, Number>,
1157) -> Result<Number, CompileError> {
1158    let span = pair.as_span();
1159    let span_range = span.start()..span.end();
1160
1161    for inner in pair.into_inner() {
1162        match inner.as_rule() {
1163            Rule::expression => {
1164                return eval_expression(inner, const_map);
1165            }
1166            Rule::number => {
1167                return parse_number(inner);
1168            }
1169            Rule::symbol => {
1170                let name = inner.as_str().to_string();
1171                if let Some(value) = const_map.get(&name) {
1172                    return Ok(value.clone());
1173                }
1174                return Err(CompileError::ParseError {
1175                    error: format!("Undefined constant: {}", name),
1176                    span: inner.as_span().start()..inner.as_span().end(),
1177                    custom_label: None,
1178                });
1179            }
1180            _ => {}
1181        }
1182    }
1183
1184    Err(CompileError::ParseError {
1185        error: "Invalid term".to_string(),
1186        span: span_range,
1187        custom_label: None,
1188    })
1189}