Skip to main content

asm_rs/
parser.rs

1//! Multi-architecture assembly parser.
2//!
3//! Converts a stream of `Token`s from the lexer into a `Statement` list.
4//! Handles instructions, labels, directives, memory operands, size hints, and prefixes.
5//! Architecture-aware register parsing resolves naming conflicts (e.g. `r8` is
6//! x86-64 R8 vs ARM R8) based on the target `Arch`.
7
8use alloc::boxed::Box;
9use alloc::collections::BTreeMap;
10use alloc::string::String;
11use alloc::string::ToString;
12use alloc::vec;
13use alloc::vec::Vec;
14
15use crate::error::{AsmError, Span};
16use crate::ir::*;
17use crate::lexer::{Token, TokenKind};
18
19/// Zero-allocation ASCII-lowercase into a caller-provided stack buffer.
20/// Returns `&str` of the lowered text. Inputs longer than `buf` are truncated.
21#[inline]
22fn to_lower_buf<'b>(s: &str, buf: &'b mut [u8]) -> &'b str {
23    let len = s.len().min(buf.len());
24    buf[..len].copy_from_slice(&s.as_bytes()[..len]);
25    buf[..len].make_ascii_lowercase();
26    // Input was valid UTF-8 and ASCII lowercase preserves validity,
27    // so from_utf8 is infallible here.
28    core::str::from_utf8(&buf[..len]).unwrap_or("")
29}
30
31/// Parse a token stream into a list of IR statements.
32///
33/// # Errors
34///
35/// Returns `Err(AsmError)` if the token stream contains an unexpected token,
36/// a malformed directive, or an invalid instruction syntax.
37pub fn parse(tokens: &[Token<'_>]) -> Result<Vec<Statement>, AsmError> {
38    parse_with_arch(tokens, Arch::X86_64)
39}
40
41/// Parse with explicit architecture for register-name disambiguation.
42pub fn parse_with_arch(tokens: &[Token<'_>], arch: Arch) -> Result<Vec<Statement>, AsmError> {
43    parse_with_syntax(tokens, arch, Syntax::Intel)
44}
45
46/// Parse with explicit architecture and syntax dialect.
47pub fn parse_with_syntax(
48    tokens: &[Token<'_>],
49    arch: Arch,
50    syntax: Syntax,
51) -> Result<Vec<Statement>, AsmError> {
52    let mut parser = Parser::new(tokens, arch, syntax);
53    parser.parse_program()
54}
55
56struct Parser<'a> {
57    tokens: &'a [Token<'a>],
58    pos: usize,
59    /// Target architecture — controls register name resolution.
60    arch: Arch,
61    /// Syntax dialect — Intel (default) or AT&T/GAS.
62    syntax: Syntax,
63    /// Constants defined so far (via `.equ` or `NAME = expr`) — available to
64    /// `parse_const_expr` so that directive arguments can reference previously
65    /// defined constants.
66    constants: BTreeMap<String, i128>,
67}
68
69impl<'a> Parser<'a> {
70    fn new(tokens: &'a [Token<'a>], arch: Arch, syntax: Syntax) -> Self {
71        Self {
72            tokens,
73            pos: 0,
74            arch,
75            syntax,
76            constants: BTreeMap::new(),
77        }
78    }
79
80    #[inline]
81    fn peek(&self) -> &Token<'a> {
82        &self.tokens[self.pos.min(self.tokens.len() - 1)]
83    }
84
85    #[inline]
86    fn advance(&mut self) -> &Token<'a> {
87        let tok = &self.tokens[self.pos.min(self.tokens.len() - 1)];
88        if self.pos < self.tokens.len() {
89            self.pos += 1;
90        }
91        tok
92    }
93
94    #[inline]
95    fn at_end(&self) -> bool {
96        self.pos >= self.tokens.len() || self.peek().kind == TokenKind::Eof
97    }
98
99    fn expect_ident(&mut self) -> Result<(String, Span), AsmError> {
100        let tok = self.advance();
101        if tok.kind == TokenKind::Ident {
102            Ok((tok.text.to_string(), tok.span))
103        } else {
104            Err(AsmError::Syntax {
105                msg: alloc::format!("expected identifier, found '{}'", tok.text),
106                span: tok.span,
107            })
108        }
109    }
110
111    #[inline]
112    fn skip_newlines(&mut self) {
113        while !self.at_end() && self.peek().kind == TokenKind::Newline {
114            self.advance();
115        }
116    }
117
118    fn parse_program(&mut self) -> Result<Vec<Statement>, AsmError> {
119        // Heuristic: ~3 tokens per statement on average.
120        let mut stmts = Vec::with_capacity(self.tokens.len() / 3 + 1);
121        self.skip_newlines();
122        while !self.at_end() {
123            if let Some(stmt) = self.parse_statement()? {
124                stmts.push(stmt);
125            }
126            self.skip_newlines();
127        }
128        Ok(stmts)
129    }
130
131    fn parse_statement(&mut self) -> Result<Option<Statement>, AsmError> {
132        let tok = self.peek().clone();
133
134        match &tok.kind {
135            TokenKind::Eof => Ok(None),
136            TokenKind::Newline => {
137                self.advance();
138                Ok(None)
139            }
140
141            // Label definition
142            TokenKind::LabelDef => {
143                self.advance();
144                Ok(Some(Statement::Label(tok.text.to_string(), tok.span)))
145            }
146
147            // Numeric label definition
148            TokenKind::NumericLabelDef(n) => {
149                self.advance();
150                let name = alloc::format!("{}", n);
151                Ok(Some(Statement::Label(name, tok.span)))
152            }
153
154            // Directive
155            TokenKind::Directive => self.parse_directive(),
156
157            // Instruction or prefix
158            TokenKind::Ident => self.parse_instruction_or_prefix(),
159
160            _ => Err(AsmError::Syntax {
161                msg: alloc::format!("unexpected token '{}'", tok.text),
162                span: tok.span,
163            }),
164        }
165    }
166
167    fn parse_directive(&mut self) -> Result<Option<Statement>, AsmError> {
168        let tok = self.advance().clone();
169        let mut dir_buf = [0u8; 32];
170        let dir = to_lower_buf(&tok.text, &mut dir_buf);
171        let span = tok.span;
172
173        match dir {
174            // Data directives
175            ".byte" | ".db" => self.parse_data_directive(DataSize::Byte, span),
176            ".word" | ".dw" | ".short" => self.parse_data_directive(DataSize::Word, span),
177            ".long" | ".dd" | ".int" => self.parse_data_directive(DataSize::Long, span),
178            ".quad" | ".dq" => self.parse_data_directive(DataSize::Quad, span),
179
180            // String directives
181            ".ascii" => self.parse_string_directive(false, span),
182            ".asciz" | ".string" => self.parse_string_directive(true, span),
183
184            // Constant
185            ".equ" | ".set" => self.parse_equ_directive(span),
186
187            // Alignment
188            ".align" | ".balign" | ".p2align" => {
189                let is_p2 = dir == ".p2align";
190                self.parse_align_directive(is_p2, span)
191            }
192
193            // Fill
194            ".fill" => self.parse_fill_directive(span),
195
196            // Space/skip
197            ".space" | ".skip" => self.parse_space_directive(span),
198
199            // Org
200            ".org" => self.parse_org_directive(span),
201
202            // Global/extern (accepted but ignored for pure code generation)
203            ".global" | ".globl" | ".extern" => {
204                // Consume the symbol name
205                if !self.at_end() && self.peek().kind == TokenKind::Ident {
206                    self.advance();
207                }
208                Ok(None)
209            }
210
211            // Section directives (accepted but ignored)
212            ".text" | ".data" | ".bss" | ".rodata" | ".section" => {
213                // Skip to end of line
214                while !self.at_end()
215                    && self.peek().kind != TokenKind::Newline
216                    && self.peek().kind != TokenKind::Eof
217                {
218                    self.advance();
219                }
220                Ok(None)
221            }
222
223            // Code mode switching
224            ".code16" => Ok(Some(Statement::CodeMode(crate::ir::X86Mode::Mode16, span))),
225            ".code32" => Ok(Some(Statement::CodeMode(crate::ir::X86Mode::Mode32, span))),
226            ".code64" => Ok(Some(Statement::CodeMode(crate::ir::X86Mode::Mode64, span))),
227
228            // Literal pool flush
229            ".ltorg" | ".pool" => Ok(Some(Statement::Ltorg(span))),
230
231            // ARM/Thumb mode switching
232            ".thumb" => Ok(Some(Statement::ThumbMode(true, span))),
233            ".arm" => Ok(Some(Statement::ThumbMode(false, span))),
234            ".thumb_func" => Ok(Some(Statement::ThumbFunc(span))),
235
236            // Syntax switching: .syntax att / .syntax intel
237            ".syntax" => {
238                let next = self.peek().clone();
239                if next.kind == TokenKind::Ident {
240                    self.advance();
241                    if next.text.eq_ignore_ascii_case("att") {
242                        self.syntax = Syntax::Att;
243                        Ok(None)
244                    } else if next.text.eq_ignore_ascii_case("intel") {
245                        self.syntax = Syntax::Intel;
246                        Ok(None)
247                    } else {
248                        Err(AsmError::Syntax {
249                            msg: alloc::format!(
250                                "unknown syntax '{}' (expected 'att' or 'intel')",
251                                next.text
252                            ),
253                            span: next.span,
254                        })
255                    }
256                } else {
257                    Err(AsmError::Syntax {
258                        msg: String::from("expected 'att' or 'intel' after .syntax"),
259                        span: next.span,
260                    })
261                }
262            }
263
264            // RISC-V options: .option rvc / .option norvc
265            ".option" => {
266                let next = self.peek().clone();
267                if next.kind == TokenKind::Ident {
268                    self.advance();
269                    if next.text.eq_ignore_ascii_case("rvc") {
270                        Ok(Some(Statement::OptionRvc(true, span)))
271                    } else if next.text.eq_ignore_ascii_case("norvc") {
272                        Ok(Some(Statement::OptionRvc(false, span)))
273                    } else {
274                        Err(AsmError::Syntax {
275                            msg: alloc::format!(
276                                "unknown option '{}' (expected 'rvc' or 'norvc')",
277                                next.text
278                            ),
279                            span: next.span,
280                        })
281                    }
282                } else {
283                    Err(AsmError::Syntax {
284                        msg: String::from("expected 'rvc' or 'norvc' after .option"),
285                        span: next.span,
286                    })
287                }
288            }
289
290            _ => Err(AsmError::Syntax {
291                msg: alloc::format!("unknown directive '{}'", dir),
292                span,
293            }),
294        }
295    }
296
297    fn parse_data_directive(
298        &mut self,
299        size: DataSize,
300        span: Span,
301    ) -> Result<Option<Statement>, AsmError> {
302        let mut values = Vec::new();
303        loop {
304            let val = self.parse_data_value()?;
305            values.push(val);
306            if self.peek().kind == TokenKind::Comma {
307                self.advance();
308            } else {
309                break;
310            }
311        }
312        Ok(Some(Statement::Data(DataDecl { size, values, span })))
313    }
314
315    fn parse_data_value(&mut self) -> Result<DataValue, AsmError> {
316        let tok = self.peek().clone();
317        match &tok.kind {
318            TokenKind::Number(n) => {
319                self.advance();
320                Ok(DataValue::Integer(*n))
321            }
322            TokenKind::CharLit(ch) => {
323                self.advance();
324                Ok(DataValue::Integer(*ch as i128))
325            }
326            TokenKind::Ident => {
327                self.advance();
328                let label = tok.text.to_string();
329                // Parse optional addend: label + N or label - N
330                let addend = if self.peek().kind == TokenKind::Plus {
331                    self.advance();
332                    let n = self.parse_const_expr()?;
333                    n as i64
334                } else if self.peek().kind == TokenKind::Minus {
335                    self.advance();
336                    let n = self.parse_const_expr()?;
337                    -(n as i64)
338                } else {
339                    0
340                };
341                Ok(DataValue::Label(label, addend))
342            }
343            TokenKind::Minus => {
344                self.advance();
345                let next = self.peek().clone();
346                if let TokenKind::Number(n) = next.kind {
347                    self.advance();
348                    Ok(DataValue::Integer(-n))
349                } else {
350                    Err(AsmError::Syntax {
351                        msg: String::from("expected number after '-'"),
352                        span: tok.span,
353                    })
354                }
355            }
356            _ => Err(AsmError::Syntax {
357                msg: alloc::format!("expected data value, found '{}'", tok.text),
358                span: tok.span,
359            }),
360        }
361    }
362
363    fn parse_string_directive(
364        &mut self,
365        null_terminate: bool,
366        span: Span,
367    ) -> Result<Option<Statement>, AsmError> {
368        let tok = self.advance().clone();
369        if tok.kind != TokenKind::StringLit {
370            return Err(AsmError::Syntax {
371                msg: String::from("expected string literal"),
372                span: tok.span,
373            });
374        }
375        let mut bytes: Vec<u8> = tok.text.as_bytes().to_vec();
376        if null_terminate {
377            bytes.push(0);
378        }
379        Ok(Some(Statement::Data(DataDecl {
380            size: DataSize::Byte,
381            values: vec![DataValue::Bytes(bytes)],
382            span,
383        })))
384    }
385
386    fn parse_equ_directive(&mut self, span: Span) -> Result<Option<Statement>, AsmError> {
387        let (name, _) = self.expect_ident()?;
388        // Expect comma
389        if self.peek().kind == TokenKind::Comma {
390            self.advance();
391        }
392        let value = self.parse_const_expr()?;
393        self.constants.insert(name.clone(), value);
394        Ok(Some(Statement::Const(ConstDef { name, value, span })))
395    }
396
397    fn parse_align_directive(
398        &mut self,
399        is_p2: bool,
400        span: Span,
401    ) -> Result<Option<Statement>, AsmError> {
402        let raw = self.parse_const_expr()? as u32;
403        let alignment = if is_p2 { 1u32 << raw } else { raw };
404
405        // Validate alignment is a power of two (0 and 1 are no-ops)
406        if alignment > 1 && !alignment.is_power_of_two() {
407            return Err(AsmError::Syntax {
408                msg: alloc::format!("alignment must be a power of 2, got {alignment}"),
409                span,
410            });
411        }
412
413        let fill = if self.peek().kind == TokenKind::Comma {
414            self.advance();
415            // Try to parse a constant expression for the fill byte
416            if matches!(
417                self.peek().kind,
418                TokenKind::Number(_) | TokenKind::Minus | TokenKind::Ident
419            ) {
420                Some(self.parse_const_expr()? as u8)
421            } else {
422                None
423            }
424        } else {
425            None
426        };
427
428        let max_skip = if self.peek().kind == TokenKind::Comma {
429            self.advance();
430            if matches!(
431                self.peek().kind,
432                TokenKind::Number(_) | TokenKind::Minus | TokenKind::Ident
433            ) {
434                Some(self.parse_const_expr()? as u32)
435            } else {
436                None
437            }
438        } else {
439            None
440        };
441
442        Ok(Some(Statement::Align(AlignDirective {
443            alignment,
444            fill,
445            max_skip,
446            span,
447        })))
448    }
449
450    fn parse_fill_directive(&mut self, span: Span) -> Result<Option<Statement>, AsmError> {
451        let count = self.parse_const_expr()? as u32;
452        let mut size = 1u8;
453        let mut value = 0i64;
454        if self.peek().kind == TokenKind::Comma {
455            self.advance();
456            size = self.parse_const_expr()? as u8;
457            if self.peek().kind == TokenKind::Comma {
458                self.advance();
459                value = self.parse_const_expr()? as i64;
460            }
461        }
462        Ok(Some(Statement::Fill(FillDirective {
463            count,
464            size,
465            value,
466            span,
467        })))
468    }
469
470    fn parse_space_directive(&mut self, span: Span) -> Result<Option<Statement>, AsmError> {
471        let size = self.parse_const_expr()? as u32;
472        let fill = if self.peek().kind == TokenKind::Comma {
473            self.advance();
474            self.parse_const_expr()? as u8
475        } else {
476            0
477        };
478        Ok(Some(Statement::Space(SpaceDirective { size, fill, span })))
479    }
480
481    fn parse_org_directive(&mut self, span: Span) -> Result<Option<Statement>, AsmError> {
482        let offset = self.parse_const_expr()? as u64;
483        // Optional fill byte: .org offset, fill
484        let fill = if self.peek().kind == TokenKind::Comma {
485            self.advance(); // skip comma
486            self.parse_const_expr()? as u8
487        } else {
488            0x00
489        };
490        Ok(Some(Statement::Org(OrgDirective { offset, fill, span })))
491    }
492
493    /// Parse a constant expression with full operator support.
494    ///
495    /// Supports (from lowest to highest precedence):
496    /// - `|` (bitwise OR)
497    /// - `^` (bitwise XOR)
498    /// - `&` (bitwise AND)
499    /// - `<<`, `>>` (shifts)
500    /// - `+`, `-` (additive)
501    /// - `*`, `/`, `%` (multiplicative)
502    /// - Unary: `-`, `~` (negate, complement)
503    /// - Atoms: numbers, identifiers (constants), parenthesized sub-expressions
504    fn parse_const_expr(&mut self) -> Result<i128, AsmError> {
505        self.const_expr_or()
506    }
507
508    // ── Precedence levels ──────────────────────────────────
509
510    /// Bitwise OR: `a | b`
511    fn const_expr_or(&mut self) -> Result<i128, AsmError> {
512        let mut val = self.const_expr_xor()?;
513        while self.peek().kind == TokenKind::Pipe {
514            self.advance();
515            val |= self.const_expr_xor()?;
516        }
517        Ok(val)
518    }
519
520    /// Bitwise XOR: `a ^ b`
521    fn const_expr_xor(&mut self) -> Result<i128, AsmError> {
522        let mut val = self.const_expr_and()?;
523        while self.peek().kind == TokenKind::Caret {
524            self.advance();
525            val ^= self.const_expr_and()?;
526        }
527        Ok(val)
528    }
529
530    /// Bitwise AND: `a & b`
531    fn const_expr_and(&mut self) -> Result<i128, AsmError> {
532        let mut val = self.const_expr_shift()?;
533        while self.peek().kind == TokenKind::Ampersand {
534            self.advance();
535            val &= self.const_expr_shift()?;
536        }
537        Ok(val)
538    }
539
540    /// Shifts: `a << b`, `a >> b`
541    fn const_expr_shift(&mut self) -> Result<i128, AsmError> {
542        let mut val = self.const_expr_add()?;
543        loop {
544            match self.peek().kind {
545                TokenKind::LShift => {
546                    self.advance();
547                    let rhs = self.const_expr_add()?;
548                    val = val.wrapping_shl(rhs as u32);
549                }
550                TokenKind::RShift => {
551                    self.advance();
552                    let rhs = self.const_expr_add()?;
553                    val = val.wrapping_shr(rhs as u32);
554                }
555                _ => break,
556            }
557        }
558        Ok(val)
559    }
560
561    /// Addition/subtraction: `a + b`, `a - b`
562    fn const_expr_add(&mut self) -> Result<i128, AsmError> {
563        let mut val = self.const_expr_mul()?;
564        loop {
565            match self.peek().kind {
566                TokenKind::Plus => {
567                    self.advance();
568                    val = val.wrapping_add(self.const_expr_mul()?);
569                }
570                TokenKind::Minus => {
571                    self.advance();
572                    val = val.wrapping_sub(self.const_expr_mul()?);
573                }
574                _ => break,
575            }
576        }
577        Ok(val)
578    }
579
580    /// Multiplication/division/modulo: `a * b`, `a / b`, `a % b`
581    fn const_expr_mul(&mut self) -> Result<i128, AsmError> {
582        let mut val = self.const_expr_unary()?;
583        loop {
584            match self.peek().kind {
585                TokenKind::Star => {
586                    self.advance();
587                    val = val.wrapping_mul(self.const_expr_unary()?);
588                }
589                TokenKind::Slash => {
590                    let span = self.peek().span;
591                    self.advance();
592                    let rhs = self.const_expr_unary()?;
593                    if rhs == 0 {
594                        return Err(AsmError::Syntax {
595                            msg: String::from("division by zero in constant expression"),
596                            span,
597                        });
598                    }
599                    val /= rhs;
600                }
601                TokenKind::Percent => {
602                    let span = self.peek().span;
603                    self.advance();
604                    let rhs = self.const_expr_unary()?;
605                    if rhs == 0 {
606                        return Err(AsmError::Syntax {
607                            msg: String::from("modulo by zero in constant expression"),
608                            span,
609                        });
610                    }
611                    val %= rhs;
612                }
613                _ => break,
614            }
615        }
616        Ok(val)
617    }
618
619    /// Unary operators: `-x`, `~x`
620    fn const_expr_unary(&mut self) -> Result<i128, AsmError> {
621        match self.peek().kind {
622            TokenKind::Minus => {
623                self.advance();
624                Ok(-self.const_expr_unary()?)
625            }
626            TokenKind::Tilde => {
627                self.advance();
628                Ok(!self.const_expr_unary()?)
629            }
630            _ => self.const_expr_atom(),
631        }
632    }
633
634    /// Atoms: numbers, identifiers (constant lookup), and `(expr)`.
635    fn const_expr_atom(&mut self) -> Result<i128, AsmError> {
636        let tok = self.peek().clone();
637        match &tok.kind {
638            TokenKind::Number(n) => {
639                self.advance();
640                Ok(*n)
641            }
642            TokenKind::Ident => {
643                if let Some(&val) = self.constants.get(&*tok.text) {
644                    self.advance();
645                    Ok(val)
646                } else {
647                    Err(AsmError::Syntax {
648                        msg: alloc::format!(
649                            "expected constant expression, found undefined identifier '{}'",
650                            tok.text
651                        ),
652                        span: tok.span,
653                    })
654                }
655            }
656            TokenKind::OpenParen => {
657                self.advance(); // skip '('
658                let val = self.parse_const_expr()?;
659                if self.peek().kind != TokenKind::CloseParen {
660                    return Err(AsmError::Syntax {
661                        msg: String::from("expected ')' in constant expression"),
662                        span: self.peek().span,
663                    });
664                }
665                self.advance(); // skip ')'
666                Ok(val)
667            }
668            _ => Err(AsmError::Syntax {
669                msg: alloc::format!("expected constant expression, found '{}'", tok.text),
670                span: tok.span,
671            }),
672        }
673    }
674
675    fn parse_instruction_or_prefix(&mut self) -> Result<Option<Statement>, AsmError> {
676        let tok = self.peek().clone();
677
678        // Check for `name = expression` constant assignment syntax
679        if self.pos + 1 < self.tokens.len() && self.tokens[self.pos + 1].kind == TokenKind::Equals {
680            let name = tok.text.to_string();
681            let span = tok.span;
682            self.advance(); // consume name
683            self.advance(); // consume '='
684            let value = self.parse_const_expr()?;
685            self.constants.insert(name.clone(), value);
686            return Ok(Some(Statement::Const(ConstDef { name, value, span })));
687        }
688
689        // Check for prefixes (case-insensitive, zero extra allocations).
690        // We track the token position so we can lowercase the final mnemonic
691        // only once, after the prefix loop.
692        let mut prefixes = PrefixList::new();
693        let mut mnemonic_pos = self.pos;
694        let mut current_span = tok.span;
695
696        loop {
697            let prefix = {
698                let text = &*self.tokens[mnemonic_pos].text;
699                if text.eq_ignore_ascii_case("lock") {
700                    Some(Prefix::Lock)
701                } else if text.eq_ignore_ascii_case("rep")
702                    || text.eq_ignore_ascii_case("repe")
703                    || text.eq_ignore_ascii_case("repz")
704                {
705                    Some(Prefix::Rep)
706                } else if text.eq_ignore_ascii_case("repne") || text.eq_ignore_ascii_case("repnz") {
707                    Some(Prefix::Repne)
708                } else {
709                    None
710                }
711            };
712            match prefix {
713                Some(p) => {
714                    prefixes.push(p);
715                    self.advance();
716                }
717                None => break,
718            }
719            if self.at_end() || self.peek().kind != TokenKind::Ident {
720                // Standalone prefix — treat as instruction with no operands
721                return Ok(Some(Statement::Instruction(Instruction {
722                    mnemonic: {
723                        let mut lbuf = [0u8; 32];
724                        Mnemonic::from(to_lower_buf(&self.tokens[mnemonic_pos].text, &mut lbuf))
725                    },
726                    operands: OperandList::new(),
727                    size_hint: None,
728                    prefixes,
729                    opmask: None,
730                    zeroing: false,
731                    broadcast: None,
732                    span: current_span,
733                })));
734            }
735            mnemonic_pos = self.pos;
736            current_span = self.tokens[mnemonic_pos].span;
737        }
738
739        // Now lowercase the mnemonic into stack-allocated Mnemonic (zero alloc).
740        let mut mnemonic = {
741            let mut lbuf = [0u8; 32];
742            Mnemonic::from(to_lower_buf(&self.tokens[mnemonic_pos].text, &mut lbuf))
743        };
744        let mnemonic_span = current_span;
745        self.advance(); // consume mnemonic
746
747        // Parse operands
748        let mut operands = OperandList::new();
749        let mut size_hint = None;
750        let mut opmask: Option<Register> = None;
751        let mut zeroing = false;
752        let mut broadcast: Option<BroadcastMode> = None;
753
754        // AT&T syntax: strip size suffix from mnemonic (movq→mov, addl→add, etc.)
755        if self.syntax == Syntax::Att {
756            if let Some((base, sz)) = strip_att_suffix(&mnemonic) {
757                mnemonic = base;
758                size_hint = Some(sz);
759            }
760        }
761
762        if !self.at_end() && !self.is_statement_end() {
763            let (op, hint) = self.parse_operand()?;
764            // ARM writeback: `R0!` after a register → wrap as Memory(PreIndex, base=R0)
765            let op = if self.peek().kind == TokenKind::Bang {
766                if let Operand::Register(r) = &op {
767                    if r.is_arm() || r.is_aarch64() {
768                        self.advance(); // consume '!'
769                        Operand::Memory(Box::new(MemoryOperand {
770                            base: Some(*r),
771                            index: None,
772                            scale: 1,
773                            disp: 0,
774                            disp_label: None,
775                            segment: None,
776                            size: None,
777                            addr_mode: AddrMode::PreIndex,
778                            index_subtract: false,
779                        }))
780                    } else {
781                        op
782                    }
783                } else {
784                    op
785                }
786            } else {
787                op
788            };
789            operands.push(op);
790            if hint.is_some() && size_hint.is_none() {
791                size_hint = hint;
792            }
793
794            // ── AVX-512 decorators after first operand: {k1}, {k1}{z} ──
795            if self.arch == Arch::X86_64 || self.arch == Arch::X86 {
796                self.parse_evex_decorators(&mut opmask, &mut zeroing, &mut broadcast)?;
797            }
798
799            while self.peek().kind == TokenKind::Comma {
800                self.advance(); // skip comma
801                let (op, hint) = self.parse_operand()?;
802                operands.push(op);
803                if hint.is_some() && size_hint.is_none() {
804                    size_hint = hint;
805                }
806                // ── AVX-512 decorators after subsequent operands: {1to8} etc. ──
807                if self.arch == Arch::X86_64 || self.arch == Arch::X86 {
808                    self.parse_evex_decorators(&mut opmask, &mut zeroing, &mut broadcast)?;
809                }
810            }
811        }
812
813        // AT&T syntax: reverse operand order (src, dst → dst, src)
814        // Only for instructions with 2 or 3 operands.
815        // Skip for instructions that don't reverse (e.g., 0 or 1 operand).
816        if self.syntax == Syntax::Att && operands.len() >= 2 {
817            operands.reverse();
818        }
819
820        Ok(Some(Statement::Instruction(Instruction {
821            mnemonic,
822            operands,
823            size_hint,
824            prefixes,
825            opmask,
826            zeroing,
827            broadcast,
828            span: mnemonic_span,
829        })))
830    }
831
832    fn is_statement_end(&self) -> bool {
833        matches!(self.peek().kind, TokenKind::Newline | TokenKind::Eof)
834    }
835
836    /// Parse one operand. Returns the operand and optional size hint.
837    /// Parse a single atom in an expression after `+` or `-`:
838    /// either a numeric literal or an identifier (label / constant name).
839    fn parse_expr_atom(&mut self, ctx_tok: &Token<'a>) -> Result<Expr, AsmError> {
840        let next = self.peek().clone();
841        match &next.kind {
842            TokenKind::Number(n) => {
843                self.advance();
844                Ok(Expr::Num(*n))
845            }
846            TokenKind::Ident => {
847                self.advance();
848                // If identifier is a known constant, substitute immediately
849                if let Some(&val) = self.constants.get(&*next.text) {
850                    Ok(Expr::Num(val))
851                } else {
852                    Ok(Expr::Label(next.text.to_string()))
853                }
854            }
855            _ => Err(AsmError::Syntax {
856                msg: alloc::format!(
857                    "expected number or identifier after '+'/'-' near '{}'",
858                    ctx_tok.text
859                ),
860                span: next.span,
861            }),
862        }
863    }
864
865    /// Parse AVX-512 EVEX decorators: `{k1}`, `{k1}{z}`, `{1to2}`, `{1to4}`, `{1to8}`, `{1to16}`.
866    ///
867    /// Called after each operand to pick up decorators attached to that operand.
868    /// Opmask and zeroing typically follow the destination, broadcast follows a memory operand.
869    fn parse_evex_decorators(
870        &mut self,
871        opmask: &mut Option<Register>,
872        zeroing: &mut bool,
873        broadcast: &mut Option<BroadcastMode>,
874    ) -> Result<(), AsmError> {
875        while self.peek().kind == TokenKind::OpenBrace {
876            let brace_span = self.peek().span;
877            self.advance(); // skip '{'
878            let tok = self.peek().clone();
879            match tok.kind {
880                TokenKind::Ident => {
881                    let mut lbuf = [0u8; 32];
882                    let lower = to_lower_buf(&tok.text, &mut lbuf);
883                    if lower == "z" {
884                        *zeroing = true;
885                        self.advance();
886                    } else if let Some(kreg) = parse_register_lower(lower, Arch::X86_64) {
887                        if kreg.is_opmask() {
888                            *opmask = Some(kreg);
889                            self.advance();
890                        } else {
891                            return Err(AsmError::Syntax {
892                                msg: String::from("expected opmask register k0-k7"),
893                                span: tok.span,
894                            });
895                        }
896                    } else {
897                        return Err(AsmError::Syntax {
898                            msg: String::from("unexpected identifier in AVX-512 decorator"),
899                            span: tok.span,
900                        });
901                    }
902                }
903                TokenKind::Number(_) => {
904                    // {1to2}, {1to4}, {1to8}, {1to16}
905                    if tok.text == "1" {
906                        self.advance(); // skip '1'
907                                        // Expect identifier "to2", "to4", "to8", or "to16"
908                        let next = self.peek().clone();
909                        if next.kind == TokenKind::Ident {
910                            let mut lbuf = [0u8; 32];
911                            let nlower = to_lower_buf(&next.text, &mut lbuf);
912                            let mode = match nlower {
913                                "to2" => Some(BroadcastMode::OneToTwo),
914                                "to4" => Some(BroadcastMode::OneToFour),
915                                "to8" => Some(BroadcastMode::OneToEight),
916                                "to16" => Some(BroadcastMode::OneToSixteen),
917                                _ => None,
918                            };
919                            if let Some(m) = mode {
920                                *broadcast = Some(m);
921                                self.advance();
922                            } else {
923                                return Err(AsmError::Syntax {
924                                    msg: String::from("expected 1to2, 1to4, 1to8, or 1to16"),
925                                    span: next.span,
926                                });
927                            }
928                        } else {
929                            return Err(AsmError::Syntax {
930                                msg: String::from("expected broadcast specifier (1to2/4/8/16)"),
931                                span: next.span,
932                            });
933                        }
934                    } else {
935                        return Err(AsmError::Syntax {
936                            msg: String::from("unexpected number in AVX-512 decorator"),
937                            span: tok.span,
938                        });
939                    }
940                }
941                _ => {
942                    // Not an AVX-512 decorator — this is a regular brace (ARM register list etc.)
943                    // Put back by not advancing and returning. But we already consumed '{'.
944                    // This shouldn't happen in normal x86 flow. Return error.
945                    return Err(AsmError::Syntax {
946                        msg: String::from("unexpected token in AVX-512 decorator"),
947                        span: brace_span,
948                    });
949                }
950            }
951            // Expect closing '}'
952            if self.peek().kind == TokenKind::CloseBrace {
953                self.advance();
954            } else {
955                return Err(AsmError::Syntax {
956                    msg: String::from("expected '}' after AVX-512 decorator"),
957                    span: self.peek().span,
958                });
959            }
960        }
961        Ok(())
962    }
963
964    fn parse_operand(&mut self) -> Result<(Operand, Option<OperandSize>), AsmError> {
965        let tok = self.peek().clone();
966
967        // Check for size hint: byte/word/dword/qword [ptr]
968        if tok.kind == TokenKind::Ident {
969            let mut lbuf = [0u8; 32];
970            let lower = to_lower_buf(&tok.text, &mut lbuf);
971            if let Some(sz) = self.try_parse_size_hint(lower) {
972                // Must be followed by a memory operand or ptr keyword
973                if self.peek().kind == TokenKind::Ident
974                    && self.peek().text.eq_ignore_ascii_case("ptr")
975                {
976                    self.advance(); // skip "ptr"
977                }
978                let (op, _) = self.parse_operand_inner()?;
979                return Ok((op, Some(sz)));
980            }
981        }
982
983        self.parse_operand_inner()
984    }
985
986    /// Case-insensitive size hint parsing (zero allocations).
987    fn try_parse_size_hint(&mut self, ident: &str) -> Option<OperandSize> {
988        if ident.eq_ignore_ascii_case("byte") {
989            self.advance();
990            Some(OperandSize::Byte)
991        } else if ident.eq_ignore_ascii_case("word") {
992            self.advance();
993            Some(OperandSize::Word)
994        } else if ident.eq_ignore_ascii_case("dword") {
995            self.advance();
996            Some(OperandSize::Dword)
997        } else if ident.eq_ignore_ascii_case("qword") {
998            self.advance();
999            Some(OperandSize::Qword)
1000        } else if ident.eq_ignore_ascii_case("xmmword") || ident.eq_ignore_ascii_case("oword") {
1001            self.advance();
1002            Some(OperandSize::Xmmword)
1003        } else if ident.eq_ignore_ascii_case("ymmword") {
1004            self.advance();
1005            Some(OperandSize::Ymmword)
1006        } else if ident.eq_ignore_ascii_case("zmmword") {
1007            self.advance();
1008            Some(OperandSize::Zmmword)
1009        } else {
1010            None
1011        }
1012    }
1013
1014    fn parse_operand_inner(&mut self) -> Result<(Operand, Option<OperandSize>), AsmError> {
1015        // AT&T syntax: dispatch to AT&T operand parser
1016        if self.syntax == Syntax::Att {
1017            return self.parse_att_operand();
1018        }
1019
1020        let tok = self.peek().clone();
1021
1022        match &tok.kind {
1023            // ARM register list: {R0, R1, R4, LR}
1024            // SVE braced vector register: {z0.s}
1025            // NEON braced vector register: {v0.4s}
1026            TokenKind::OpenBrace => {
1027                self.advance(); // consume '{'
1028                                // Check for SVE/NEON braced vector register: {z0.s}, {v0.4s}
1029                let first = self.peek().clone();
1030                if let TokenKind::Ident = &first.kind {
1031                    let mut lbuf = [0u8; 32];
1032                    let lower = to_lower_buf(&first.text, &mut lbuf);
1033                    if let Some(dot_pos) = lower.find('.') {
1034                        let reg_part = &lower[..dot_pos];
1035                        let arr_part = &lower[dot_pos + 1..];
1036                        if let Some(reg) = parse_register_lower(reg_part, self.arch) {
1037                            if reg.is_a64_sve_z() || reg.is_a64_vector() {
1038                                if let Some(arr) = VectorArrangement::parse(arr_part) {
1039                                    self.advance(); // consume 'z0.s' / 'v0.4s'
1040                                    if self.peek().kind != TokenKind::CloseBrace {
1041                                        return Err(AsmError::Syntax {
1042                                            msg: String::from("expected '}' after vector register"),
1043                                            span: self.peek().span,
1044                                        });
1045                                    }
1046                                    self.advance(); // consume '}'
1047                                    return Ok((Operand::VectorRegister(reg, arr), None));
1048                                }
1049                            }
1050                        }
1051                    }
1052                }
1053                // Fall through to regular register list parsing
1054                let mut regs = Vec::new();
1055                loop {
1056                    let rtok = self.peek().clone();
1057                    if rtok.kind == TokenKind::CloseBrace {
1058                        self.advance();
1059                        break;
1060                    }
1061                    if rtok.kind == TokenKind::Comma {
1062                        self.advance();
1063                        continue;
1064                    }
1065                    if let TokenKind::Ident = &rtok.kind {
1066                        let mut lbuf = [0u8; 32];
1067                        let lower = to_lower_buf(&rtok.text, &mut lbuf);
1068                        if let Some(reg) = parse_register_lower(lower, self.arch) {
1069                            self.advance();
1070                            regs.push(reg);
1071                            continue;
1072                        }
1073                    }
1074                    return Err(AsmError::Syntax {
1075                        msg: alloc::format!(
1076                            "expected register in register list, found '{}'",
1077                            rtok.text
1078                        ),
1079                        span: rtok.span,
1080                    });
1081                }
1082                Ok((Operand::RegisterList(regs), None))
1083            }
1084
1085            // Memory operand
1086            TokenKind::OpenBracket => {
1087                let mem = self.parse_memory_operand()?;
1088                Ok((Operand::Memory(Box::new(mem)), None))
1089            }
1090
1091            // Literal pool value: =imm (ARM/AArch64 LDR Rn, =value)
1092            TokenKind::Equals => {
1093                self.advance(); // consume '='
1094                let next = self.peek().clone();
1095                match next.kind {
1096                    TokenKind::Number(n) => {
1097                        self.advance();
1098                        Ok((Operand::LiteralPoolValue(n), None))
1099                    }
1100                    TokenKind::Minus => {
1101                        self.advance();
1102                        if let TokenKind::Number(n) = self.peek().kind {
1103                            self.advance();
1104                            Ok((Operand::LiteralPoolValue(-n), None))
1105                        } else {
1106                            Err(AsmError::Syntax {
1107                                msg: String::from("expected number after '=-'"),
1108                                span: next.span,
1109                            })
1110                        }
1111                    }
1112                    _ => Err(AsmError::Syntax {
1113                        msg: alloc::format!("expected number after '=', found '{}'", next.text),
1114                        span: next.span,
1115                    }),
1116                }
1117            }
1118
1119            // RISC-V bare (reg) memory operand — equivalent to 0(reg)
1120            TokenKind::OpenParen if matches!(self.arch, Arch::Rv32 | Arch::Rv64) => {
1121                self.parse_riscv_mem_operand(0)
1122            }
1123
1124            // Immediate — or RISC-V memory operand: offset(reg)
1125            TokenKind::Number(n) => {
1126                let val = *n;
1127                self.advance();
1128                // RISC-V: 0(sp), 8(a0), etc.
1129                if matches!(self.arch, Arch::Rv32 | Arch::Rv64)
1130                    && self.peek().kind == TokenKind::OpenParen
1131                {
1132                    return self.parse_riscv_mem_operand(val);
1133                }
1134                Ok((Operand::Immediate(val), None))
1135            }
1136
1137            // Negative immediate — or RISC-V memory operand: -offset(reg)
1138            TokenKind::Minus => {
1139                self.advance();
1140                let next = self.peek().clone();
1141                if let TokenKind::Number(n) = next.kind {
1142                    self.advance();
1143                    let val = -n;
1144                    // RISC-V: -4(sp), -8(s0), etc.
1145                    if matches!(self.arch, Arch::Rv32 | Arch::Rv64)
1146                        && self.peek().kind == TokenKind::OpenParen
1147                    {
1148                        return self.parse_riscv_mem_operand(val);
1149                    }
1150                    Ok((Operand::Immediate(val), None))
1151                } else {
1152                    Err(AsmError::Syntax {
1153                        msg: String::from("expected number after '-'"),
1154                        span: tok.span,
1155                    })
1156                }
1157            }
1158
1159            // Character literal as immediate
1160            TokenKind::CharLit(ch) => {
1161                self.advance();
1162                Ok((Operand::Immediate(*ch as i128), None))
1163            }
1164
1165            // Identifier: register or label reference
1166            TokenKind::Ident => {
1167                let mut lbuf = [0u8; 32];
1168                let lower = to_lower_buf(&tok.text, &mut lbuf);
1169
1170                // Check for segment:memory pair (e.g., fs:[rax])
1171                if is_segment_name(lower) {
1172                    let seg = match parse_segment(lower) {
1173                        Some(s) => s,
1174                        None => {
1175                            return Err(AsmError::Syntax {
1176                                msg: alloc::format!("unknown segment register: {}", lower),
1177                                span: tok.span,
1178                            });
1179                        }
1180                    };
1181                    // Check if followed by colon and bracket
1182                    if self.pos + 1 < self.tokens.len()
1183                        && self.tokens[self.pos + 1].kind == TokenKind::Colon
1184                    {
1185                        self.advance(); // consume seg name
1186                        self.advance(); // consume ':'
1187
1188                        if self.peek().kind == TokenKind::OpenBracket {
1189                            let mut mem = self.parse_memory_operand()?;
1190                            mem.segment = Some(seg);
1191                            return Ok((Operand::Memory(Box::new(mem)), None));
1192                        }
1193                    }
1194                }
1195
1196                // Try register (possibly with vector arrangement: v0.4s, z0.s, p0.b)
1197                if let Some(dot_pos) = lower.find('.') {
1198                    // Check for vector register with arrangement specifier
1199                    let reg_part = &lower[..dot_pos];
1200                    let arr_part = &lower[dot_pos + 1..];
1201                    if let Some(reg) = parse_register_lower(reg_part, self.arch) {
1202                        if reg.is_a64_vector() || reg.is_a64_sve_z() || reg.is_a64_sve_p() {
1203                            if let Some(arr) = VectorArrangement::parse(arr_part) {
1204                                self.advance();
1205                                return Ok((Operand::VectorRegister(reg, arr), None));
1206                            }
1207                        }
1208                        // RISC-V V extension: v0.t → mask operand (bare register)
1209                        if reg.is_riscv_vec() && arr_part == "t" {
1210                            self.advance();
1211                            return Ok((Operand::Register(reg), None));
1212                        }
1213                    }
1214                }
1215
1216                if let Some(reg) = parse_register_lower(lower, self.arch) {
1217                    self.advance();
1218                    // SVE predicate qualifier: p0/m or p0/z
1219                    if reg.is_a64_sve_p() && self.peek().kind == TokenKind::Slash {
1220                        let next_pos = self.pos + 1;
1221                        if next_pos < self.tokens.len() {
1222                            let qual_text = &self.tokens[next_pos].text;
1223                            let qual = if qual_text.eq_ignore_ascii_case("m") {
1224                                Some(SvePredQual::Merging)
1225                            } else if qual_text.eq_ignore_ascii_case("z") {
1226                                Some(SvePredQual::Zeroing)
1227                            } else {
1228                                None
1229                            };
1230                            if let Some(q) = qual {
1231                                self.advance(); // consume '/'
1232                                self.advance(); // consume 'm' or 'z'
1233                                return Ok((Operand::SvePredicate(reg, q), None));
1234                            }
1235                        }
1236                    }
1237                    return Ok((Operand::Register(reg), None));
1238                }
1239
1240                // Check if it's a previously-defined constant (bare identifier → Immediate)
1241                if let Some(&val) = self.constants.get(&*tok.text) {
1242                    self.advance();
1243                    // Check for trailing +/- chains (e.g., CONST + 5)
1244                    let mut result = val;
1245                    loop {
1246                        if self.peek().kind == TokenKind::Plus {
1247                            self.advance();
1248                            let next = self.peek().clone();
1249                            match &next.kind {
1250                                TokenKind::Number(n) => {
1251                                    self.advance();
1252                                    result += n;
1253                                }
1254                                TokenKind::Ident => {
1255                                    if let Some(&v) = self.constants.get(&*next.text) {
1256                                        self.advance();
1257                                        result += v;
1258                                    } else {
1259                                        break;
1260                                    }
1261                                }
1262                                _ => break,
1263                            }
1264                        } else if self.peek().kind == TokenKind::Minus {
1265                            self.advance();
1266                            let next = self.peek().clone();
1267                            match &next.kind {
1268                                TokenKind::Number(n) => {
1269                                    self.advance();
1270                                    result -= n;
1271                                }
1272                                TokenKind::Ident => {
1273                                    if let Some(&v) = self.constants.get(&*next.text) {
1274                                        self.advance();
1275                                        result -= v;
1276                                    } else {
1277                                        break;
1278                                    }
1279                                }
1280                                _ => break,
1281                            }
1282                        } else {
1283                            break;
1284                        }
1285                    }
1286                    return Ok((Operand::Immediate(result), None));
1287                }
1288
1289                // Label reference
1290                self.advance();
1291                // Build an expression tree for label+offset, label-offset,
1292                // label+ident, etc.
1293                let mut expr: Expr = Expr::Label(tok.text.to_string());
1294                let mut is_expression = false;
1295                loop {
1296                    if self.peek().kind == TokenKind::Plus {
1297                        self.advance();
1298                        let rhs = self.parse_expr_atom(&tok)?;
1299                        expr = Expr::Add(Box::new(expr), Box::new(rhs));
1300                        is_expression = true;
1301                    } else if self.peek().kind == TokenKind::Minus {
1302                        self.advance();
1303                        let rhs = self.parse_expr_atom(&tok)?;
1304                        expr = Expr::Sub(Box::new(expr), Box::new(rhs));
1305                        is_expression = true;
1306                    } else {
1307                        break;
1308                    }
1309                }
1310
1311                if is_expression {
1312                    // Try to resolve all-constant expressions eagerly
1313                    expr.resolve_constants(|name| self.constants.get(name).copied());
1314                    if let Some(val) = expr.eval() {
1315                        return Ok((Operand::Immediate(val), None));
1316                    }
1317                    return Ok((Operand::Expression(expr), None));
1318                }
1319
1320                Ok((Operand::Label(tok.text.to_string()), None))
1321            }
1322
1323            // Numeric label references
1324            TokenKind::NumericLabelFwd(n) => {
1325                self.advance();
1326                Ok((Operand::Label(alloc::format!("{}f", n)), None))
1327            }
1328            TokenKind::NumericLabelBwd(n) => {
1329                self.advance();
1330                Ok((Operand::Label(alloc::format!("{}b", n)), None))
1331            }
1332
1333            _ => Err(AsmError::Syntax {
1334                msg: alloc::format!("expected operand, found '{}'", tok.text),
1335                span: tok.span,
1336            }),
1337        }
1338    }
1339
1340    // ── AT&T / GAS syntax operand parsing ────────────────────────────────
1341
1342    /// Parse a single AT&T-syntax operand.
1343    ///
1344    /// Forms:
1345    /// - `$imm` or `$-imm` or `$label` — immediate / label reference
1346    /// - `%reg` — register
1347    /// - `%seg:disp(%base, %index, scale)` — memory with segment override
1348    /// - `disp(%base, %index, scale)` — memory
1349    /// - `(%base)` — memory (zero displacement)
1350    /// - `label` or `label+offset` — label reference (for branches)
1351    fn parse_att_operand(&mut self) -> Result<(Operand, Option<OperandSize>), AsmError> {
1352        let tok = self.peek().clone();
1353
1354        match &tok.kind {
1355            // $imm — immediate
1356            TokenKind::Dollar => {
1357                self.advance(); // consume '$'
1358                let next = self.peek().clone();
1359                match &next.kind {
1360                    TokenKind::Number(n) => {
1361                        let val = *n;
1362                        self.advance();
1363                        Ok((Operand::Immediate(val), None))
1364                    }
1365                    TokenKind::Minus => {
1366                        self.advance(); // consume '-'
1367                        let num_tok = self.peek().clone();
1368                        if let TokenKind::Number(n) = num_tok.kind {
1369                            self.advance();
1370                            Ok((Operand::Immediate(-n), None))
1371                        } else {
1372                            Err(AsmError::Syntax {
1373                                msg: String::from("expected number after '$-'"),
1374                                span: num_tok.span,
1375                            })
1376                        }
1377                    }
1378                    TokenKind::Ident => {
1379                        let name = next.text.to_string();
1380                        self.advance();
1381                        // Check if it's a known constant
1382                        if let Some(&val) = self.constants.get(&name) {
1383                            Ok((Operand::Immediate(val), None))
1384                        } else {
1385                            // Label reference as immediate
1386                            Ok((Operand::Label(name), None))
1387                        }
1388                    }
1389                    _ => Err(AsmError::Syntax {
1390                        msg: alloc::format!(
1391                            "expected number or identifier after '$', found '{}'",
1392                            next.text
1393                        ),
1394                        span: next.span,
1395                    }),
1396                }
1397            }
1398
1399            // %reg or %seg:... — register or segment-prefixed memory
1400            TokenKind::Percent => {
1401                self.advance(); // consume '%'
1402                let reg_tok = self.peek().clone();
1403                if reg_tok.kind != TokenKind::Ident {
1404                    return Err(AsmError::Syntax {
1405                        msg: alloc::format!(
1406                            "expected register name after '%', found '{}'",
1407                            reg_tok.text
1408                        ),
1409                        span: reg_tok.span,
1410                    });
1411                }
1412                let mut lbuf = [0u8; 32];
1413                let lower = to_lower_buf(&reg_tok.text, &mut lbuf);
1414
1415                // Check for segment register followed by ':'
1416                if is_segment_name(lower) {
1417                    // SAFETY: is_segment_name() matches the exact same set as
1418                    // parse_segment(), so unwrap is unreachable.
1419                    let seg = parse_segment(lower).unwrap();
1420                    // Peek ahead: if next is ':', this is a segment-prefixed memory
1421                    if self.pos + 1 < self.tokens.len()
1422                        && self.tokens[self.pos + 1].kind == TokenKind::Colon
1423                    {
1424                        self.advance(); // consume segment name
1425                        self.advance(); // consume ':'
1426                                        // Parse the rest as memory: could be disp(%base,...), (%base,...), or bare disp
1427                        let (seg_disp, seg_disp_label) = match self.peek().kind {
1428                            TokenKind::Number(n) => {
1429                                let val = n;
1430                                self.advance();
1431                                (val as i64, None)
1432                            }
1433                            TokenKind::Minus => {
1434                                self.advance();
1435                                if let TokenKind::Number(n) = self.peek().kind {
1436                                    let val = n;
1437                                    self.advance();
1438                                    (-(val as i64), None)
1439                                } else {
1440                                    (0, None)
1441                                }
1442                            }
1443                            _ => (0, None),
1444                        };
1445                        let mut mem = self.parse_att_memory_operand(seg_disp, seg_disp_label)?;
1446                        mem.segment = Some(seg);
1447                        return Ok((Operand::Memory(Box::new(mem)), None));
1448                    }
1449                }
1450
1451                // Regular register
1452                if let Some(reg) = parse_register_lower(lower, self.arch) {
1453                    self.advance();
1454                    Ok((Operand::Register(reg), None))
1455                } else {
1456                    Err(AsmError::Syntax {
1457                        msg: alloc::format!("unknown register: %{}", lower),
1458                        span: reg_tok.span,
1459                    })
1460                }
1461            }
1462
1463            // (%base, ...) — memory with zero displacement
1464            TokenKind::OpenParen => {
1465                let mem = self.parse_att_memory_operand(0, None)?;
1466                Ok((Operand::Memory(Box::new(mem)), None))
1467            }
1468
1469            // number — could be displacement for memory or standalone immediate (for branches)
1470            TokenKind::Number(n) => {
1471                let val = *n;
1472                self.advance();
1473                // If followed by '(', this is a memory operand: disp(%base, %index, scale)
1474                if self.peek().kind == TokenKind::OpenParen {
1475                    let mem = self.parse_att_memory_operand(val as i64, None)?;
1476                    Ok((Operand::Memory(Box::new(mem)), None))
1477                } else {
1478                    // Standalone number (e.g., for branch targets, port numbers)
1479                    Ok((Operand::Immediate(val), None))
1480                }
1481            }
1482
1483            // -number — negative displacement for memory
1484            TokenKind::Minus => {
1485                self.advance(); // consume '-'
1486                let next = self.peek().clone();
1487                if let TokenKind::Number(n) = next.kind {
1488                    self.advance();
1489                    let val = -n;
1490                    if self.peek().kind == TokenKind::OpenParen {
1491                        let mem = self.parse_att_memory_operand(val as i64, None)?;
1492                        Ok((Operand::Memory(Box::new(mem)), None))
1493                    } else {
1494                        Ok((Operand::Immediate(val), None))
1495                    }
1496                } else {
1497                    Err(AsmError::Syntax {
1498                        msg: String::from("expected number after '-' in AT&T operand"),
1499                        span: tok.span,
1500                    })
1501                }
1502            }
1503
1504            // Identifier — label reference (for branches: jmp label, call label)
1505            TokenKind::Ident => {
1506                let name = tok.text.to_string();
1507                self.advance();
1508                // Check if it's a known constant
1509                if let Some(&val) = self.constants.get(&name) {
1510                    // If followed by '(', treat as memory displacement
1511                    if self.peek().kind == TokenKind::OpenParen {
1512                        let mem = self.parse_att_memory_operand(val as i64, None)?;
1513                        return Ok((Operand::Memory(Box::new(mem)), None));
1514                    }
1515                    return Ok((Operand::Immediate(val), None));
1516                }
1517                // Build expression with optional +/- offset
1518                let mut expr = Expr::Label(name.clone());
1519                let mut has_offset = false;
1520                loop {
1521                    if self.peek().kind == TokenKind::Plus {
1522                        self.advance();
1523                        let atom = self.parse_expr_atom(&tok)?;
1524                        expr = Expr::Add(Box::new(expr), Box::new(atom));
1525                        has_offset = true;
1526                    } else if self.peek().kind == TokenKind::Minus {
1527                        self.advance();
1528                        let atom = self.parse_expr_atom(&tok)?;
1529                        expr = Expr::Sub(Box::new(expr), Box::new(atom));
1530                        has_offset = true;
1531                    } else {
1532                        break;
1533                    }
1534                }
1535                if has_offset {
1536                    Ok((Operand::Expression(expr), None))
1537                } else {
1538                    Ok((Operand::Label(name), None))
1539                }
1540            }
1541
1542            // Numeric label forward/backward references
1543            TokenKind::NumericLabelFwd(n) => {
1544                let n = *n;
1545                self.advance();
1546                Ok((Operand::Label(alloc::format!("{}f", n)), None))
1547            }
1548            TokenKind::NumericLabelBwd(n) => {
1549                let n = *n;
1550                self.advance();
1551                Ok((Operand::Label(alloc::format!("{}b", n)), None))
1552            }
1553
1554            // Star — indirect jump/call: *%rax, *(%rax), *label
1555            TokenKind::Star => {
1556                self.advance(); // consume '*'
1557                                // The suboperand is the actual target — parse recursively
1558                self.parse_att_operand()
1559            }
1560
1561            _ => Err(AsmError::Syntax {
1562                msg: alloc::format!("unexpected token in AT&T operand: '{}'", tok.text),
1563                span: tok.span,
1564            }),
1565        }
1566    }
1567
1568    /// Parse AT&T memory operand: `(%base)`, `(%base, %index)`,
1569    /// `(%base, %index, scale)`, `disp(%base, ...)`.
1570    /// `disp` has already been parsed; it's passed as the `disp` param.
1571    fn parse_att_memory_operand(
1572        &mut self,
1573        disp: i64,
1574        disp_label: Option<String>,
1575    ) -> Result<MemoryOperand, AsmError> {
1576        let open = self.peek().clone();
1577        if open.kind != TokenKind::OpenParen {
1578            return Err(AsmError::Syntax {
1579                msg: alloc::format!("expected '(' in AT&T memory operand, found '{}'", open.text),
1580                span: open.span,
1581            });
1582        }
1583        self.advance(); // consume '('
1584
1585        let mut base = None;
1586        let mut index = None;
1587        let mut scale: u8 = 1;
1588
1589        // Parse base register: %reg
1590        if self.peek().kind == TokenKind::Percent {
1591            self.advance(); // consume '%'
1592            let reg_tok = self.peek().clone();
1593            let mut lbuf = [0u8; 32];
1594            let lower = to_lower_buf(&reg_tok.text, &mut lbuf);
1595            base =
1596                Some(
1597                    parse_register_lower(lower, self.arch).ok_or_else(|| AsmError::Syntax {
1598                        msg: alloc::format!("unknown register: %{}", lower),
1599                        span: reg_tok.span,
1600                    })?,
1601                );
1602            self.advance();
1603        }
1604
1605        // Comma → index register
1606        if self.peek().kind == TokenKind::Comma {
1607            self.advance(); // consume ','
1608            if self.peek().kind == TokenKind::Percent {
1609                self.advance(); // consume '%'
1610                let reg_tok = self.peek().clone();
1611                let mut lbuf = [0u8; 32];
1612                let lower = to_lower_buf(&reg_tok.text, &mut lbuf);
1613                index = Some(parse_register_lower(lower, self.arch).ok_or_else(|| {
1614                    AsmError::Syntax {
1615                        msg: alloc::format!("unknown register: %{}", lower),
1616                        span: reg_tok.span,
1617                    }
1618                })?);
1619                self.advance();
1620            }
1621
1622            // Comma → scale factor
1623            if self.peek().kind == TokenKind::Comma {
1624                self.advance(); // consume ','
1625                let scale_tok = self.peek().clone();
1626                if let TokenKind::Number(n) = scale_tok.kind {
1627                    scale = n as u8;
1628                    self.advance();
1629                } else {
1630                    return Err(AsmError::Syntax {
1631                        msg: alloc::format!(
1632                            "expected scale factor (1,2,4,8), found '{}'",
1633                            scale_tok.text
1634                        ),
1635                        span: scale_tok.span,
1636                    });
1637                }
1638            }
1639        }
1640
1641        // Expect closing ')'
1642        let close = self.peek().clone();
1643        if close.kind != TokenKind::CloseParen {
1644            return Err(AsmError::Syntax {
1645                msg: alloc::format!(
1646                    "expected ')' in AT&T memory operand, found '{}'",
1647                    close.text
1648                ),
1649                span: close.span,
1650            });
1651        }
1652        self.advance();
1653
1654        Ok(MemoryOperand {
1655            base,
1656            index,
1657            scale,
1658            disp,
1659            disp_label,
1660            segment: None,
1661            size: None,
1662            addr_mode: AddrMode::Offset,
1663            index_subtract: false,
1664        })
1665    }
1666
1667    /// Parse a RISC-V memory operand: `offset(reg)`.
1668    /// Called after the offset has been consumed. Expects `(` reg `)`.
1669    fn parse_riscv_mem_operand(
1670        &mut self,
1671        offset: i128,
1672    ) -> Result<(Operand, Option<OperandSize>), AsmError> {
1673        let open_tok = self.advance().clone(); // consume '('
1674        debug_assert_eq!(open_tok.kind, TokenKind::OpenParen);
1675
1676        let reg_tok = self.peek().clone();
1677        let mut lbuf = [0u8; 32];
1678        let lower = to_lower_buf(&reg_tok.text, &mut lbuf);
1679        let reg = if let Some(r) = parse_register_lower(lower, self.arch) {
1680            self.advance();
1681            r
1682        } else {
1683            return Err(AsmError::Syntax {
1684                msg: alloc::format!(
1685                    "expected register in memory operand, found '{}'",
1686                    reg_tok.text
1687                ),
1688                span: reg_tok.span,
1689            });
1690        };
1691
1692        // Expect closing ')'
1693        let close = self.peek().clone();
1694        if close.kind != TokenKind::CloseParen {
1695            return Err(AsmError::Syntax {
1696                msg: alloc::format!("expected ')' after register, found '{}'", close.text),
1697                span: close.span,
1698            });
1699        }
1700        self.advance();
1701
1702        let mem = MemoryOperand {
1703            base: Some(reg),
1704            disp: offset as i64,
1705            ..Default::default()
1706        };
1707        Ok((Operand::Memory(Box::new(mem)), None))
1708    }
1709
1710    /// Parse a memory operand: `[base + index*scale + disp]`
1711    fn parse_memory_operand(&mut self) -> Result<MemoryOperand, AsmError> {
1712        let open = self.advance().clone(); // consume '['
1713        debug_assert_eq!(open.kind, TokenKind::OpenBracket);
1714
1715        let mut mem = MemoryOperand::default();
1716        let mut _expect_term = true;
1717        let mut sign: i64 = 1;
1718
1719        while self.peek().kind != TokenKind::CloseBracket {
1720            if self.at_end() {
1721                return Err(AsmError::Syntax {
1722                    msg: String::from("unterminated memory operand, expected ']'"),
1723                    span: open.span,
1724                });
1725            }
1726
1727            let tok = self.peek().clone();
1728
1729            match &tok.kind {
1730                TokenKind::Plus | TokenKind::Comma => {
1731                    // Comma inside brackets is ARM/AArch64 syntax: [Rn, #offset]
1732                    self.advance();
1733                    sign = 1;
1734                    _expect_term = true;
1735                    continue;
1736                }
1737                TokenKind::Minus => {
1738                    self.advance();
1739                    sign = -1;
1740                    _expect_term = true;
1741                    continue;
1742                }
1743                TokenKind::Ident => {
1744                    let mut lbuf = [0u8; 32];
1745                    let lower = to_lower_buf(&tok.text, &mut lbuf);
1746                    if let Some(reg) = parse_register_lower(lower, self.arch) {
1747                        self.advance();
1748                        // Check if this register is multiplied by a scale
1749                        if self.peek().kind == TokenKind::Star {
1750                            self.advance(); // consume '*'
1751                            let scale_tok = self.peek().clone();
1752                            if let TokenKind::Number(s) = scale_tok.kind {
1753                                if !matches!(s, 1 | 2 | 4 | 8) {
1754                                    return Err(AsmError::Syntax {
1755                                        msg: String::from("scale factor must be 1, 2, 4, or 8"),
1756                                        span: scale_tok.span,
1757                                    });
1758                                }
1759                                self.advance();
1760                                mem.index = Some(reg);
1761                                mem.scale = s as u8;
1762                                mem.index_subtract = sign < 0;
1763                            } else {
1764                                return Err(AsmError::Syntax {
1765                                    msg: String::from("expected scale factor (1, 2, 4, or 8)"),
1766                                    span: scale_tok.span,
1767                                });
1768                            }
1769                        } else if mem.base.is_none() {
1770                            mem.base = Some(reg);
1771                        } else if mem.index.is_none() {
1772                            mem.index = Some(reg);
1773                            mem.scale = 1;
1774                            mem.index_subtract = sign < 0;
1775                        } else {
1776                            return Err(AsmError::Syntax {
1777                                msg: String::from("too many registers in memory operand"),
1778                                span: tok.span,
1779                            });
1780                        }
1781                    } else {
1782                        // Label in memory operand
1783                        self.advance();
1784                        mem.disp_label = Some(tok.text.to_string());
1785                    }
1786                    _expect_term = false;
1787                }
1788                TokenKind::Number(n) => {
1789                    self.advance();
1790                    // Check if number*register (e.g., 4*rbx)
1791                    if self.peek().kind == TokenKind::Star {
1792                        self.advance(); // consume '*'
1793                        let reg_tok = self.peek().clone();
1794                        if reg_tok.kind == TokenKind::Ident {
1795                            let mut lbuf = [0u8; 32];
1796                            let lower = to_lower_buf(&reg_tok.text, &mut lbuf);
1797                            if let Some(reg) = parse_register_lower(lower, self.arch) {
1798                                if !matches!(*n, 1 | 2 | 4 | 8) {
1799                                    return Err(AsmError::Syntax {
1800                                        msg: String::from("scale factor must be 1, 2, 4, or 8"),
1801                                        span: tok.span,
1802                                    });
1803                                }
1804                                self.advance();
1805                                mem.index = Some(reg);
1806                                mem.scale = *n as u8;
1807                                mem.index_subtract = sign < 0;
1808                                _expect_term = false;
1809                                continue;
1810                            }
1811                        }
1812                        return Err(AsmError::Syntax {
1813                            msg: String::from("expected register after scale factor"),
1814                            span: reg_tok.span,
1815                        });
1816                    }
1817                    mem.disp = mem.disp.wrapping_add(sign * (*n as i64));
1818                    _expect_term = false;
1819                }
1820                _ => {
1821                    return Err(AsmError::Syntax {
1822                        msg: alloc::format!("unexpected token '{}' in memory operand", tok.text),
1823                        span: tok.span,
1824                    });
1825                }
1826            }
1827        }
1828
1829        self.advance(); // consume ']'
1830
1831        // ARM/AArch64 writeback: [Rn, #imm]!  → pre-index
1832        if self.peek().kind == TokenKind::Bang {
1833            self.advance(); // consume '!'
1834            mem.addr_mode = AddrMode::PreIndex;
1835        }
1836
1837        // Validate: RSP/ESP/SP cannot be used as a SIB index register.
1838        // In the SIB byte, index code 0b100 (RSP's base_code) means "no index."
1839        // R12 (base_code 4 + REX.X) IS valid because REX.X distinguishes it.
1840        if let Some(idx) = mem.index {
1841            if idx.base_code() == 4 && !idx.is_extended() {
1842                return Err(AsmError::Syntax {
1843                    msg: String::from("RSP/ESP/SP cannot be used as a SIB index register"),
1844                    span: open.span,
1845                });
1846            }
1847        }
1848
1849        Ok(mem)
1850    }
1851}
1852
1853/// Parse a register name — **case-insensitive**, zero heap allocations.
1854///
1855/// Architecture-aware: conflicting names like `r8`-`r15`, `sp` are resolved
1856/// based on the target architecture.
1857pub fn parse_register(name: &str, arch: Arch) -> Option<Register> {
1858    // Stack-based lowercase (register names are at most ~5 chars; 16 is plenty).
1859    let mut buf = [0u8; 16];
1860    let name = to_lower_buf(name, &mut buf);
1861    parse_register_lower(name, arch)
1862}
1863
1864/// Inner register parser — expects **already-lowered** input.
1865fn parse_register_lower(name: &str, arch: Arch) -> Option<Register> {
1866    use Register::*;
1867
1868    // Architecture-specific fast path for ARM / AArch64 / RISC-V
1869    match arch {
1870        Arch::Arm | Arch::Thumb => return parse_register_arm(name),
1871        Arch::Aarch64 => return parse_register_aarch64(name),
1872        Arch::Rv32 | Arch::Rv64 => return parse_register_riscv(name),
1873        _ => {}
1874    }
1875
1876    // x86 / x86-64 register names
1877    match name {
1878        // 64-bit GP
1879        "rax" => Some(Rax),
1880        "rcx" => Some(Rcx),
1881        "rdx" => Some(Rdx),
1882        "rbx" => Some(Rbx),
1883        "rsp" => Some(Rsp),
1884        "rbp" => Some(Rbp),
1885        "rsi" => Some(Rsi),
1886        "rdi" => Some(Rdi),
1887        "r8" => Some(R8),
1888        "r9" => Some(R9),
1889        "r10" => Some(R10),
1890        "r11" => Some(R11),
1891        "r12" => Some(R12),
1892        "r13" => Some(R13),
1893        "r14" => Some(R14),
1894        "r15" => Some(R15),
1895        // 32-bit GP
1896        "eax" => Some(Eax),
1897        "ecx" => Some(Ecx),
1898        "edx" => Some(Edx),
1899        "ebx" => Some(Ebx),
1900        "esp" => Some(Esp),
1901        "ebp" => Some(Ebp),
1902        "esi" => Some(Esi),
1903        "edi" => Some(Edi),
1904        "r8d" => Some(R8d),
1905        "r9d" => Some(R9d),
1906        "r10d" => Some(R10d),
1907        "r11d" => Some(R11d),
1908        "r12d" => Some(R12d),
1909        "r13d" => Some(R13d),
1910        "r14d" => Some(R14d),
1911        "r15d" => Some(R15d),
1912        // 16-bit GP
1913        "ax" => Some(Ax),
1914        "cx" => Some(Cx),
1915        "dx" => Some(Dx),
1916        "bx" => Some(Bx),
1917        "sp" => Some(Sp),
1918        "bp" => Some(Bp),
1919        "si" => Some(Si),
1920        "di" => Some(Di),
1921        "r8w" => Some(R8w),
1922        "r9w" => Some(R9w),
1923        "r10w" => Some(R10w),
1924        "r11w" => Some(R11w),
1925        "r12w" => Some(R12w),
1926        "r13w" => Some(R13w),
1927        "r14w" => Some(R14w),
1928        "r15w" => Some(R15w),
1929        // 8-bit GP
1930        "al" => Some(Al),
1931        "cl" => Some(Cl),
1932        "dl" => Some(Dl),
1933        "bl" => Some(Bl),
1934        "spl" => Some(Spl),
1935        "bpl" => Some(Bpl),
1936        "sil" => Some(Sil),
1937        "dil" => Some(Dil),
1938        "ah" => Some(Ah),
1939        "ch" => Some(Ch),
1940        "dh" => Some(Dh),
1941        "bh" => Some(Bh),
1942        "r8b" => Some(R8b),
1943        "r9b" => Some(R9b),
1944        "r10b" => Some(R10b),
1945        "r11b" => Some(R11b),
1946        "r12b" => Some(R12b),
1947        "r13b" => Some(R13b),
1948        "r14b" => Some(R14b),
1949        "r15b" => Some(R15b),
1950        // Special
1951        "rip" => Some(Rip),
1952        "eip" => Some(Eip),
1953        // Segment
1954        "cs" => Some(Cs),
1955        "ds" => Some(Ds),
1956        "es" => Some(Es),
1957        "fs" => Some(Fs),
1958        "gs" => Some(Gs),
1959        "ss" => Some(Ss),
1960        // XMM
1961        "xmm0" => Some(Xmm0),
1962        "xmm1" => Some(Xmm1),
1963        "xmm2" => Some(Xmm2),
1964        "xmm3" => Some(Xmm3),
1965        "xmm4" => Some(Xmm4),
1966        "xmm5" => Some(Xmm5),
1967        "xmm6" => Some(Xmm6),
1968        "xmm7" => Some(Xmm7),
1969        "xmm8" => Some(Xmm8),
1970        "xmm9" => Some(Xmm9),
1971        "xmm10" => Some(Xmm10),
1972        "xmm11" => Some(Xmm11),
1973        "xmm12" => Some(Xmm12),
1974        "xmm13" => Some(Xmm13),
1975        "xmm14" => Some(Xmm14),
1976        "xmm15" => Some(Xmm15),
1977        // YMM
1978        "ymm0" => Some(Ymm0),
1979        "ymm1" => Some(Ymm1),
1980        "ymm2" => Some(Ymm2),
1981        "ymm3" => Some(Ymm3),
1982        "ymm4" => Some(Ymm4),
1983        "ymm5" => Some(Ymm5),
1984        "ymm6" => Some(Ymm6),
1985        "ymm7" => Some(Ymm7),
1986        "ymm8" => Some(Ymm8),
1987        "ymm9" => Some(Ymm9),
1988        "ymm10" => Some(Ymm10),
1989        "ymm11" => Some(Ymm11),
1990        "ymm12" => Some(Ymm12),
1991        "ymm13" => Some(Ymm13),
1992        "ymm14" => Some(Ymm14),
1993        "ymm15" => Some(Ymm15),
1994        // ZMM
1995        "zmm0" => Some(Zmm0),
1996        "zmm1" => Some(Zmm1),
1997        "zmm2" => Some(Zmm2),
1998        "zmm3" => Some(Zmm3),
1999        "zmm4" => Some(Zmm4),
2000        "zmm5" => Some(Zmm5),
2001        "zmm6" => Some(Zmm6),
2002        "zmm7" => Some(Zmm7),
2003        "zmm8" => Some(Zmm8),
2004        "zmm9" => Some(Zmm9),
2005        "zmm10" => Some(Zmm10),
2006        "zmm11" => Some(Zmm11),
2007        "zmm12" => Some(Zmm12),
2008        "zmm13" => Some(Zmm13),
2009        "zmm14" => Some(Zmm14),
2010        "zmm15" => Some(Zmm15),
2011        "zmm16" => Some(Zmm16),
2012        "zmm17" => Some(Zmm17),
2013        "zmm18" => Some(Zmm18),
2014        "zmm19" => Some(Zmm19),
2015        "zmm20" => Some(Zmm20),
2016        "zmm21" => Some(Zmm21),
2017        "zmm22" => Some(Zmm22),
2018        "zmm23" => Some(Zmm23),
2019        "zmm24" => Some(Zmm24),
2020        "zmm25" => Some(Zmm25),
2021        "zmm26" => Some(Zmm26),
2022        "zmm27" => Some(Zmm27),
2023        "zmm28" => Some(Zmm28),
2024        "zmm29" => Some(Zmm29),
2025        "zmm30" => Some(Zmm30),
2026        "zmm31" => Some(Zmm31),
2027        // Opmask
2028        "k0" => Some(K0),
2029        "k1" => Some(K1),
2030        "k2" => Some(K2),
2031        "k3" => Some(K3),
2032        "k4" => Some(K4),
2033        "k5" => Some(K5),
2034        "k6" => Some(K6),
2035        "k7" => Some(K7),
2036        _ => None,
2037    }
2038}
2039
2040fn is_segment_name(name: &str) -> bool {
2041    name.eq_ignore_ascii_case("cs")
2042        || name.eq_ignore_ascii_case("ds")
2043        || name.eq_ignore_ascii_case("es")
2044        || name.eq_ignore_ascii_case("fs")
2045        || name.eq_ignore_ascii_case("gs")
2046        || name.eq_ignore_ascii_case("ss")
2047}
2048
2049fn parse_segment(name: &str) -> Option<Register> {
2050    if name.eq_ignore_ascii_case("cs") {
2051        Some(Register::Cs)
2052    } else if name.eq_ignore_ascii_case("ds") {
2053        Some(Register::Ds)
2054    } else if name.eq_ignore_ascii_case("es") {
2055        Some(Register::Es)
2056    } else if name.eq_ignore_ascii_case("fs") {
2057        Some(Register::Fs)
2058    } else if name.eq_ignore_ascii_case("gs") {
2059        Some(Register::Gs)
2060    } else if name.eq_ignore_ascii_case("ss") {
2061        Some(Register::Ss)
2062    } else {
2063        None
2064    }
2065}
2066
2067/// ARM32 register name parser.
2068fn parse_register_arm(name: &str) -> Option<Register> {
2069    use Register::*;
2070    match name {
2071        "r0" => Some(ArmR0),
2072        "r1" => Some(ArmR1),
2073        "r2" => Some(ArmR2),
2074        "r3" => Some(ArmR3),
2075        "r4" => Some(ArmR4),
2076        "r5" => Some(ArmR5),
2077        "r6" => Some(ArmR6),
2078        "r7" => Some(ArmR7),
2079        "r8" => Some(ArmR8),
2080        "r9" => Some(ArmR9),
2081        "r10" => Some(ArmR10),
2082        "r11" | "fp" => Some(ArmR11),
2083        "r12" | "ip" => Some(ArmR12),
2084        "r13" | "sp" => Some(ArmSp),
2085        "r14" | "lr" => Some(ArmLr),
2086        "r15" | "pc" => Some(ArmPc),
2087        "cpsr" => Some(ArmCpsr),
2088        _ => None,
2089    }
2090}
2091
2092/// AArch64 register name parser.
2093fn parse_register_aarch64(name: &str) -> Option<Register> {
2094    use Register::*;
2095    match name {
2096        "x0" => Some(A64X0),
2097        "x1" => Some(A64X1),
2098        "x2" => Some(A64X2),
2099        "x3" => Some(A64X3),
2100        "x4" => Some(A64X4),
2101        "x5" => Some(A64X5),
2102        "x6" => Some(A64X6),
2103        "x7" => Some(A64X7),
2104        "x8" => Some(A64X8),
2105        "x9" => Some(A64X9),
2106        "x10" => Some(A64X10),
2107        "x11" => Some(A64X11),
2108        "x12" => Some(A64X12),
2109        "x13" => Some(A64X13),
2110        "x14" => Some(A64X14),
2111        "x15" => Some(A64X15),
2112        "x16" => Some(A64X16),
2113        "x17" => Some(A64X17),
2114        "x18" => Some(A64X18),
2115        "x19" => Some(A64X19),
2116        "x20" => Some(A64X20),
2117        "x21" => Some(A64X21),
2118        "x22" => Some(A64X22),
2119        "x23" => Some(A64X23),
2120        "x24" => Some(A64X24),
2121        "x25" => Some(A64X25),
2122        "x26" => Some(A64X26),
2123        "x27" => Some(A64X27),
2124        "x28" => Some(A64X28),
2125        "x29" => Some(A64X29),
2126        "x30" => Some(A64X30),
2127        "fp" => Some(A64X29),
2128        "lr" => Some(A64X30),
2129        "sp" => Some(A64Sp),
2130        "xzr" => Some(A64Xzr),
2131        "w0" => Some(A64W0),
2132        "w1" => Some(A64W1),
2133        "w2" => Some(A64W2),
2134        "w3" => Some(A64W3),
2135        "w4" => Some(A64W4),
2136        "w5" => Some(A64W5),
2137        "w6" => Some(A64W6),
2138        "w7" => Some(A64W7),
2139        "w8" => Some(A64W8),
2140        "w9" => Some(A64W9),
2141        "w10" => Some(A64W10),
2142        "w11" => Some(A64W11),
2143        "w12" => Some(A64W12),
2144        "w13" => Some(A64W13),
2145        "w14" => Some(A64W14),
2146        "w15" => Some(A64W15),
2147        "w16" => Some(A64W16),
2148        "w17" => Some(A64W17),
2149        "w18" => Some(A64W18),
2150        "w19" => Some(A64W19),
2151        "w20" => Some(A64W20),
2152        "w21" => Some(A64W21),
2153        "w22" => Some(A64W22),
2154        "w23" => Some(A64W23),
2155        "w24" => Some(A64W24),
2156        "w25" => Some(A64W25),
2157        "w26" => Some(A64W26),
2158        "w27" => Some(A64W27),
2159        "w28" => Some(A64W28),
2160        "w29" => Some(A64W29),
2161        "w30" => Some(A64W30),
2162        "wzr" => Some(A64Wzr),
2163        // SIMD/FP vector registers (V0–V31)
2164        "v0" => Some(A64V0),
2165        "v1" => Some(A64V1),
2166        "v2" => Some(A64V2),
2167        "v3" => Some(A64V3),
2168        "v4" => Some(A64V4),
2169        "v5" => Some(A64V5),
2170        "v6" => Some(A64V6),
2171        "v7" => Some(A64V7),
2172        "v8" => Some(A64V8),
2173        "v9" => Some(A64V9),
2174        "v10" => Some(A64V10),
2175        "v11" => Some(A64V11),
2176        "v12" => Some(A64V12),
2177        "v13" => Some(A64V13),
2178        "v14" => Some(A64V14),
2179        "v15" => Some(A64V15),
2180        "v16" => Some(A64V16),
2181        "v17" => Some(A64V17),
2182        "v18" => Some(A64V18),
2183        "v19" => Some(A64V19),
2184        "v20" => Some(A64V20),
2185        "v21" => Some(A64V21),
2186        "v22" => Some(A64V22),
2187        "v23" => Some(A64V23),
2188        "v24" => Some(A64V24),
2189        "v25" => Some(A64V25),
2190        "v26" => Some(A64V26),
2191        "v27" => Some(A64V27),
2192        "v28" => Some(A64V28),
2193        "v29" => Some(A64V29),
2194        "v30" => Some(A64V30),
2195        "v31" => Some(A64V31),
2196        // SIMD/FP scalar quad registers (Q0–Q31)
2197        "q0" => Some(A64Q0),
2198        "q1" => Some(A64Q1),
2199        "q2" => Some(A64Q2),
2200        "q3" => Some(A64Q3),
2201        "q4" => Some(A64Q4),
2202        "q5" => Some(A64Q5),
2203        "q6" => Some(A64Q6),
2204        "q7" => Some(A64Q7),
2205        "q8" => Some(A64Q8),
2206        "q9" => Some(A64Q9),
2207        "q10" => Some(A64Q10),
2208        "q11" => Some(A64Q11),
2209        "q12" => Some(A64Q12),
2210        "q13" => Some(A64Q13),
2211        "q14" => Some(A64Q14),
2212        "q15" => Some(A64Q15),
2213        "q16" => Some(A64Q16),
2214        "q17" => Some(A64Q17),
2215        "q18" => Some(A64Q18),
2216        "q19" => Some(A64Q19),
2217        "q20" => Some(A64Q20),
2218        "q21" => Some(A64Q21),
2219        "q22" => Some(A64Q22),
2220        "q23" => Some(A64Q23),
2221        "q24" => Some(A64Q24),
2222        "q25" => Some(A64Q25),
2223        "q26" => Some(A64Q26),
2224        "q27" => Some(A64Q27),
2225        "q28" => Some(A64Q28),
2226        "q29" => Some(A64Q29),
2227        "q30" => Some(A64Q30),
2228        "q31" => Some(A64Q31),
2229        // SIMD/FP scalar double registers (D0–D31)
2230        "d0" => Some(A64D0),
2231        "d1" => Some(A64D1),
2232        "d2" => Some(A64D2),
2233        "d3" => Some(A64D3),
2234        "d4" => Some(A64D4),
2235        "d5" => Some(A64D5),
2236        "d6" => Some(A64D6),
2237        "d7" => Some(A64D7),
2238        "d8" => Some(A64D8),
2239        "d9" => Some(A64D9),
2240        "d10" => Some(A64D10),
2241        "d11" => Some(A64D11),
2242        "d12" => Some(A64D12),
2243        "d13" => Some(A64D13),
2244        "d14" => Some(A64D14),
2245        "d15" => Some(A64D15),
2246        "d16" => Some(A64D16),
2247        "d17" => Some(A64D17),
2248        "d18" => Some(A64D18),
2249        "d19" => Some(A64D19),
2250        "d20" => Some(A64D20),
2251        "d21" => Some(A64D21),
2252        "d22" => Some(A64D22),
2253        "d23" => Some(A64D23),
2254        "d24" => Some(A64D24),
2255        "d25" => Some(A64D25),
2256        "d26" => Some(A64D26),
2257        "d27" => Some(A64D27),
2258        "d28" => Some(A64D28),
2259        "d29" => Some(A64D29),
2260        "d30" => Some(A64D30),
2261        "d31" => Some(A64D31),
2262        // SIMD/FP scalar single registers (S0–S31)
2263        "s0" => Some(A64S0),
2264        "s1" => Some(A64S1),
2265        "s2" => Some(A64S2),
2266        "s3" => Some(A64S3),
2267        "s4" => Some(A64S4),
2268        "s5" => Some(A64S5),
2269        "s6" => Some(A64S6),
2270        "s7" => Some(A64S7),
2271        "s8" => Some(A64S8),
2272        "s9" => Some(A64S9),
2273        "s10" => Some(A64S10),
2274        "s11" => Some(A64S11),
2275        "s12" => Some(A64S12),
2276        "s13" => Some(A64S13),
2277        "s14" => Some(A64S14),
2278        "s15" => Some(A64S15),
2279        "s16" => Some(A64S16),
2280        "s17" => Some(A64S17),
2281        "s18" => Some(A64S18),
2282        "s19" => Some(A64S19),
2283        "s20" => Some(A64S20),
2284        "s21" => Some(A64S21),
2285        "s22" => Some(A64S22),
2286        "s23" => Some(A64S23),
2287        "s24" => Some(A64S24),
2288        "s25" => Some(A64S25),
2289        "s26" => Some(A64S26),
2290        "s27" => Some(A64S27),
2291        "s28" => Some(A64S28),
2292        "s29" => Some(A64S29),
2293        "s30" => Some(A64S30),
2294        "s31" => Some(A64S31),
2295        // SIMD/FP scalar half registers (H0–H31)
2296        "h0" => Some(A64H0),
2297        "h1" => Some(A64H1),
2298        "h2" => Some(A64H2),
2299        "h3" => Some(A64H3),
2300        "h4" => Some(A64H4),
2301        "h5" => Some(A64H5),
2302        "h6" => Some(A64H6),
2303        "h7" => Some(A64H7),
2304        "h8" => Some(A64H8),
2305        "h9" => Some(A64H9),
2306        "h10" => Some(A64H10),
2307        "h11" => Some(A64H11),
2308        "h12" => Some(A64H12),
2309        "h13" => Some(A64H13),
2310        "h14" => Some(A64H14),
2311        "h15" => Some(A64H15),
2312        "h16" => Some(A64H16),
2313        "h17" => Some(A64H17),
2314        "h18" => Some(A64H18),
2315        "h19" => Some(A64H19),
2316        "h20" => Some(A64H20),
2317        "h21" => Some(A64H21),
2318        "h22" => Some(A64H22),
2319        "h23" => Some(A64H23),
2320        "h24" => Some(A64H24),
2321        "h25" => Some(A64H25),
2322        "h26" => Some(A64H26),
2323        "h27" => Some(A64H27),
2324        "h28" => Some(A64H28),
2325        "h29" => Some(A64H29),
2326        "h30" => Some(A64H30),
2327        "h31" => Some(A64H31),
2328        // SIMD/FP scalar byte registers (B0–B31)
2329        "b0" => Some(A64B0),
2330        "b1" => Some(A64B1),
2331        "b2" => Some(A64B2),
2332        "b3" => Some(A64B3),
2333        "b4" => Some(A64B4),
2334        "b5" => Some(A64B5),
2335        "b6" => Some(A64B6),
2336        "b7" => Some(A64B7),
2337        "b8" => Some(A64B8),
2338        "b9" => Some(A64B9),
2339        "b10" => Some(A64B10),
2340        "b11" => Some(A64B11),
2341        "b12" => Some(A64B12),
2342        "b13" => Some(A64B13),
2343        "b14" => Some(A64B14),
2344        "b15" => Some(A64B15),
2345        "b16" => Some(A64B16),
2346        "b17" => Some(A64B17),
2347        "b18" => Some(A64B18),
2348        "b19" => Some(A64B19),
2349        "b20" => Some(A64B20),
2350        "b21" => Some(A64B21),
2351        "b22" => Some(A64B22),
2352        "b23" => Some(A64B23),
2353        "b24" => Some(A64B24),
2354        "b25" => Some(A64B25),
2355        "b26" => Some(A64B26),
2356        "b27" => Some(A64B27),
2357        "b28" => Some(A64B28),
2358        "b29" => Some(A64B29),
2359        "b30" => Some(A64B30),
2360        "b31" => Some(A64B31),
2361        // SVE scalable vector registers (Z0–Z31)
2362        "z0" => Some(A64Z0),
2363        "z1" => Some(A64Z1),
2364        "z2" => Some(A64Z2),
2365        "z3" => Some(A64Z3),
2366        "z4" => Some(A64Z4),
2367        "z5" => Some(A64Z5),
2368        "z6" => Some(A64Z6),
2369        "z7" => Some(A64Z7),
2370        "z8" => Some(A64Z8),
2371        "z9" => Some(A64Z9),
2372        "z10" => Some(A64Z10),
2373        "z11" => Some(A64Z11),
2374        "z12" => Some(A64Z12),
2375        "z13" => Some(A64Z13),
2376        "z14" => Some(A64Z14),
2377        "z15" => Some(A64Z15),
2378        "z16" => Some(A64Z16),
2379        "z17" => Some(A64Z17),
2380        "z18" => Some(A64Z18),
2381        "z19" => Some(A64Z19),
2382        "z20" => Some(A64Z20),
2383        "z21" => Some(A64Z21),
2384        "z22" => Some(A64Z22),
2385        "z23" => Some(A64Z23),
2386        "z24" => Some(A64Z24),
2387        "z25" => Some(A64Z25),
2388        "z26" => Some(A64Z26),
2389        "z27" => Some(A64Z27),
2390        "z28" => Some(A64Z28),
2391        "z29" => Some(A64Z29),
2392        "z30" => Some(A64Z30),
2393        "z31" => Some(A64Z31),
2394        // SVE predicate registers (P0–P15)
2395        "p0" => Some(A64P0),
2396        "p1" => Some(A64P1),
2397        "p2" => Some(A64P2),
2398        "p3" => Some(A64P3),
2399        "p4" => Some(A64P4),
2400        "p5" => Some(A64P5),
2401        "p6" => Some(A64P6),
2402        "p7" => Some(A64P7),
2403        "p8" => Some(A64P8),
2404        "p9" => Some(A64P9),
2405        "p10" => Some(A64P10),
2406        "p11" => Some(A64P11),
2407        "p12" => Some(A64P12),
2408        "p13" => Some(A64P13),
2409        "p14" => Some(A64P14),
2410        "p15" => Some(A64P15),
2411        _ => None,
2412    }
2413}
2414
2415/// Parse a RISC-V register name, supporting both hardware names (x0–x31) and
2416/// ABI names (zero, ra, sp, gp, tp, t0–t6, s0–s11, a0–a7, fp).
2417fn parse_register_riscv(name: &str) -> Option<Register> {
2418    use Register::*;
2419    match name {
2420        // Hardware names
2421        "x0" => Some(RvX0),
2422        "x1" => Some(RvX1),
2423        "x2" => Some(RvX2),
2424        "x3" => Some(RvX3),
2425        "x4" => Some(RvX4),
2426        "x5" => Some(RvX5),
2427        "x6" => Some(RvX6),
2428        "x7" => Some(RvX7),
2429        "x8" => Some(RvX8),
2430        "x9" => Some(RvX9),
2431        "x10" => Some(RvX10),
2432        "x11" => Some(RvX11),
2433        "x12" => Some(RvX12),
2434        "x13" => Some(RvX13),
2435        "x14" => Some(RvX14),
2436        "x15" => Some(RvX15),
2437        "x16" => Some(RvX16),
2438        "x17" => Some(RvX17),
2439        "x18" => Some(RvX18),
2440        "x19" => Some(RvX19),
2441        "x20" => Some(RvX20),
2442        "x21" => Some(RvX21),
2443        "x22" => Some(RvX22),
2444        "x23" => Some(RvX23),
2445        "x24" => Some(RvX24),
2446        "x25" => Some(RvX25),
2447        "x26" => Some(RvX26),
2448        "x27" => Some(RvX27),
2449        "x28" => Some(RvX28),
2450        "x29" => Some(RvX29),
2451        "x30" => Some(RvX30),
2452        "x31" => Some(RvX31),
2453        // ABI names
2454        "zero" => Some(RvX0),
2455        "ra" => Some(RvX1),
2456        "sp" => Some(RvX2),
2457        "gp" => Some(RvX3),
2458        "tp" => Some(RvX4),
2459        "t0" => Some(RvX5),
2460        "t1" => Some(RvX6),
2461        "t2" => Some(RvX7),
2462        "s0" => Some(RvX8),
2463        "fp" => Some(RvX8), // fp is alias for s0
2464        "s1" => Some(RvX9),
2465        "a0" => Some(RvX10),
2466        "a1" => Some(RvX11),
2467        "a2" => Some(RvX12),
2468        "a3" => Some(RvX13),
2469        "a4" => Some(RvX14),
2470        "a5" => Some(RvX15),
2471        "a6" => Some(RvX16),
2472        "a7" => Some(RvX17),
2473        "s2" => Some(RvX18),
2474        "s3" => Some(RvX19),
2475        "s4" => Some(RvX20),
2476        "s5" => Some(RvX21),
2477        "s6" => Some(RvX22),
2478        "s7" => Some(RvX23),
2479        "s8" => Some(RvX24),
2480        "s9" => Some(RvX25),
2481        "s10" => Some(RvX26),
2482        "s11" => Some(RvX27),
2483        "t3" => Some(RvX28),
2484        "t4" => Some(RvX29),
2485        "t5" => Some(RvX30),
2486        "t6" => Some(RvX31),
2487        // FP hardware names
2488        "f0" => Some(RvF0),
2489        "f1" => Some(RvF1),
2490        "f2" => Some(RvF2),
2491        "f3" => Some(RvF3),
2492        "f4" => Some(RvF4),
2493        "f5" => Some(RvF5),
2494        "f6" => Some(RvF6),
2495        "f7" => Some(RvF7),
2496        "f8" => Some(RvF8),
2497        "f9" => Some(RvF9),
2498        "f10" => Some(RvF10),
2499        "f11" => Some(RvF11),
2500        "f12" => Some(RvF12),
2501        "f13" => Some(RvF13),
2502        "f14" => Some(RvF14),
2503        "f15" => Some(RvF15),
2504        "f16" => Some(RvF16),
2505        "f17" => Some(RvF17),
2506        "f18" => Some(RvF18),
2507        "f19" => Some(RvF19),
2508        "f20" => Some(RvF20),
2509        "f21" => Some(RvF21),
2510        "f22" => Some(RvF22),
2511        "f23" => Some(RvF23),
2512        "f24" => Some(RvF24),
2513        "f25" => Some(RvF25),
2514        "f26" => Some(RvF26),
2515        "f27" => Some(RvF27),
2516        "f28" => Some(RvF28),
2517        "f29" => Some(RvF29),
2518        "f30" => Some(RvF30),
2519        "f31" => Some(RvF31),
2520        // FP ABI names
2521        "ft0" => Some(RvF0),
2522        "ft1" => Some(RvF1),
2523        "ft2" => Some(RvF2),
2524        "ft3" => Some(RvF3),
2525        "ft4" => Some(RvF4),
2526        "ft5" => Some(RvF5),
2527        "ft6" => Some(RvF6),
2528        "ft7" => Some(RvF7),
2529        "fs0" => Some(RvF8),
2530        "fs1" => Some(RvF9),
2531        "fa0" => Some(RvF10),
2532        "fa1" => Some(RvF11),
2533        "fa2" => Some(RvF12),
2534        "fa3" => Some(RvF13),
2535        "fa4" => Some(RvF14),
2536        "fa5" => Some(RvF15),
2537        "fa6" => Some(RvF16),
2538        "fa7" => Some(RvF17),
2539        "fs2" => Some(RvF18),
2540        "fs3" => Some(RvF19),
2541        "fs4" => Some(RvF20),
2542        "fs5" => Some(RvF21),
2543        "fs6" => Some(RvF22),
2544        "fs7" => Some(RvF23),
2545        "fs8" => Some(RvF24),
2546        "fs9" => Some(RvF25),
2547        "fs10" => Some(RvF26),
2548        "fs11" => Some(RvF27),
2549        "ft8" => Some(RvF28),
2550        "ft9" => Some(RvF29),
2551        "ft10" => Some(RvF30),
2552        "ft11" => Some(RvF31),
2553        // Vector registers (V extension) — hardware names
2554        "v0" => Some(RvV0),
2555        "v1" => Some(RvV1),
2556        "v2" => Some(RvV2),
2557        "v3" => Some(RvV3),
2558        "v4" => Some(RvV4),
2559        "v5" => Some(RvV5),
2560        "v6" => Some(RvV6),
2561        "v7" => Some(RvV7),
2562        "v8" => Some(RvV8),
2563        "v9" => Some(RvV9),
2564        "v10" => Some(RvV10),
2565        "v11" => Some(RvV11),
2566        "v12" => Some(RvV12),
2567        "v13" => Some(RvV13),
2568        "v14" => Some(RvV14),
2569        "v15" => Some(RvV15),
2570        "v16" => Some(RvV16),
2571        "v17" => Some(RvV17),
2572        "v18" => Some(RvV18),
2573        "v19" => Some(RvV19),
2574        "v20" => Some(RvV20),
2575        "v21" => Some(RvV21),
2576        "v22" => Some(RvV22),
2577        "v23" => Some(RvV23),
2578        "v24" => Some(RvV24),
2579        "v25" => Some(RvV25),
2580        "v26" => Some(RvV26),
2581        "v27" => Some(RvV27),
2582        "v28" => Some(RvV28),
2583        "v29" => Some(RvV29),
2584        "v30" => Some(RvV30),
2585        "v31" => Some(RvV31),
2586        _ => None,
2587    }
2588}
2589
2590/// Convenience: parse assembly text directly into statements.
2591pub fn parse_str(source: &str) -> Result<Vec<Statement>, AsmError> {
2592    let tokens = crate::lexer::tokenize(source)?;
2593    parse(&tokens)
2594}
2595
2596/// Strip AT&T mnemonic size suffix and return (base_mnemonic, size_hint).
2597///
2598/// GAS convention: `movq` → `mov` + Qword, `addl` → `add` + Dword, etc.
2599/// Also handles AT&T-specific mnemonics that have no Intel equivalent via
2600/// simple suffix stripping (e.g., `movzbl` → `movzx`, `cltq` → `cdqe`).
2601fn strip_att_suffix(mnemonic: &str) -> Option<(Mnemonic, OperandSize)> {
2602    // AT&T-specific mnemonic translations — these have completely different
2603    // naming conventions from Intel and cannot be derived by suffix stripping.
2604    let att_translations: &[(&str, &str, OperandSize)] = &[
2605        // movzx variants: movz{src_size}{dst_size}
2606        ("movzbl", "movzx", OperandSize::Dword),
2607        ("movzbw", "movzx", OperandSize::Word),
2608        ("movzbq", "movzx", OperandSize::Qword),
2609        ("movzwl", "movzx", OperandSize::Dword),
2610        ("movzwq", "movzx", OperandSize::Qword),
2611        // movsx variants: movs{src_size}{dst_size}
2612        ("movsbl", "movsx", OperandSize::Dword),
2613        ("movsbw", "movsx", OperandSize::Word),
2614        ("movsbq", "movsx", OperandSize::Qword),
2615        ("movswl", "movsx", OperandSize::Dword),
2616        ("movswq", "movsx", OperandSize::Qword),
2617        ("movslq", "movsxd", OperandSize::Qword),
2618        // Sign/zero-extend accumulator
2619        ("cbtw", "cbw", OperandSize::Word),
2620        ("cwtl", "cwde", OperandSize::Dword),
2621        ("cwtd", "cwd", OperandSize::Word),
2622        ("cltd", "cdq", OperandSize::Dword),
2623        ("cltq", "cdqe", OperandSize::Qword),
2624        ("cqto", "cqo", OperandSize::Qword),
2625    ];
2626
2627    for &(att, intel, size) in att_translations {
2628        if mnemonic == att {
2629            return Some((Mnemonic::from(intel), size));
2630        }
2631    }
2632
2633    if mnemonic.len() < 2 {
2634        return None;
2635    }
2636
2637    // Mnemonics that should NOT be stripped — they end in b/w/l/q naturally
2638    // and are not size-suffixed GAS mnemonics.
2639    let no_strip = [
2640        "call",
2641        "jmp",
2642        "ret",
2643        "nop",
2644        "hlt",
2645        "int",
2646        "syscall",
2647        "sysenter",
2648        "sysexit",
2649        "cpuid",
2650        "rdtsc",
2651        "rdtscp",
2652        "ud2",
2653        "leave",
2654        "enter",
2655        "pushf",
2656        "popf",
2657        "pushfq",
2658        "popfq",
2659        "lahf",
2660        "sahf",
2661        "clc",
2662        "stc",
2663        "cmc",
2664        "cld",
2665        "std",
2666        "cli",
2667        "sti",
2668        "rep",
2669        "repe",
2670        "repne",
2671        "repz",
2672        "repnz",
2673        "lock",
2674        "pause",
2675        "mfence",
2676        "lfence",
2677        "sfence",
2678        "endbr64",
2679        "endbr32",
2680        "iretq",
2681        "cdq",
2682        "cqo",
2683        "cbw",
2684        "cwde",
2685        "cdqe",
2686        "cwd",
2687        "xlat",
2688        "xlatb",
2689        "swapgs",
2690        "wrmsr",
2691        "rdmsr",
2692        "invd",
2693        "wbinvd",
2694        "clts",
2695        "monitor",
2696        "mwait",
2697        "rdrand",
2698        "rdseed",
2699        "xtest",
2700        "xend",
2701        "vzeroall",
2702        "vzeroupper",
2703        "int3",
2704        // setcc and cmovcc mnemonics — they end in condition codes, not size suffixes
2705        "setal",
2706        "setbl",
2707        "setcl",
2708        "setgl",
2709        "setol",
2710        "setnl",
2711        "setpl",
2712        // condition code endings
2713        "jal",
2714        "jbl",
2715        "jcl",
2716        "jgl",
2717        "jol",
2718        "jnl",
2719        "jpl",
2720        // string ops — movsb/w/d/q, stosb/w/d/q, lodsb/w/d/q, scasb/w/d/q, cmpsb/w/d/q
2721        "movsb",
2722        "movsw",
2723        "movsd",
2724        "movsq",
2725        "stosb",
2726        "stosw",
2727        "stosd",
2728        "stosq",
2729        "lodsb",
2730        "lodsw",
2731        "lodsd",
2732        "lodsq",
2733        "scasb",
2734        "scasw",
2735        "scasd",
2736        "scasq",
2737        "cmpsb",
2738        "cmpsw",
2739        "cmpsd",
2740        "cmpsq",
2741        "insb",
2742        "insw",
2743        "insd",
2744        "outsb",
2745        "outsw",
2746        "outsd",
2747        // IN/OUT with size are their own mnemonics (inb, outb, etc.)
2748        "inb",
2749        "inw",
2750        "inl",
2751        "outb",
2752        "outw",
2753        "outl",
2754        // Loop family
2755        "loop",
2756        "loope",
2757        "loopne",
2758        "loopz",
2759        "loopnz",
2760        "jecxz",
2761        "jrcxz",
2762        // cmpxchg family
2763        "cmpxchg8b",
2764        "cmpxchg16b",
2765        // bswap, xchg, cmpxchg, xadd
2766        "bswap",
2767    ];
2768
2769    let suffix = mnemonic.as_bytes()[mnemonic.len() - 1];
2770    let size = match suffix {
2771        b'b' => OperandSize::Byte,
2772        b'w' => OperandSize::Word,
2773        b'l' => OperandSize::Dword,
2774        b'q' => OperandSize::Qword,
2775        _ => return None,
2776    };
2777
2778    if no_strip.contains(&mnemonic) {
2779        return None;
2780    }
2781
2782    let base = &mnemonic[..mnemonic.len() - 1];
2783
2784    // Don't strip if the base would be empty
2785    if base.is_empty() {
2786        return None;
2787    }
2788
2789    // Known base mnemonics that commonly accept size suffixes
2790    let known_bases = [
2791        "mov", "add", "sub", "adc", "sbb", "and", "or", "xor", "cmp", "test", "push", "pop", "inc",
2792        "dec", "neg", "not", "mul", "imul", "div", "idiv", "lea", "xchg", "cmpxchg", "xadd",
2793        "movzx", "movsx", "movsxd", "shl", "shr", "sar", "rol", "ror", "rcl", "rcr", "bt", "bts",
2794        "btr", "btc", "bsf", "bsr", "set", "cmov", // + condition code suffix
2795        "in", "out", "movabs",
2796    ];
2797
2798    // Direct match or prefix match for setcc/cmovcc
2799    if known_bases.contains(&base) {
2800        return Some((Mnemonic::from(base), size));
2801    }
2802
2803    // cmovcc: cmovnel → cmovne (base = cmovne, suffix was 'l')
2804    // setcc: setnel → setne (won't happen typically, but handle it)
2805    // Jcc: jnel → jne (won't happen due to no_strip, but be safe)
2806    if base.starts_with("cmov") || base.starts_with("set") || base.starts_with('j') {
2807        return Some((Mnemonic::from(base), size));
2808    }
2809
2810    // Fallback: strip and return — GAS accepts suffixes on most mnemonics
2811    Some((Mnemonic::from(base), size))
2812}
2813
2814#[cfg(test)]
2815mod tests {
2816    use super::*;
2817
2818    fn parse_one(src: &str) -> Statement {
2819        let stmts = parse_str(src).unwrap();
2820        assert_eq!(
2821            stmts.len(),
2822            1,
2823            "expected 1 statement, got {}: {:?}",
2824            stmts.len(),
2825            stmts
2826        );
2827        stmts.into_iter().next().unwrap()
2828    }
2829
2830    fn parse_instr(src: &str) -> Instruction {
2831        match parse_one(src) {
2832            Statement::Instruction(i) => i,
2833            s => panic!("expected instruction, got {:?}", s),
2834        }
2835    }
2836
2837    // === Basic Instructions ===
2838
2839    #[test]
2840    fn parse_nop() {
2841        let i = parse_instr("nop");
2842        assert_eq!(i.mnemonic, "nop");
2843        assert!(i.operands.is_empty());
2844    }
2845
2846    #[test]
2847    fn parse_ret() {
2848        let i = parse_instr("ret");
2849        assert_eq!(i.mnemonic, "ret");
2850    }
2851
2852    #[test]
2853    fn parse_syscall() {
2854        let i = parse_instr("syscall");
2855        assert_eq!(i.mnemonic, "syscall");
2856    }
2857
2858    // === Register-Register ===
2859
2860    #[test]
2861    fn parse_mov_reg_reg() {
2862        let i = parse_instr("mov rax, rbx");
2863        assert_eq!(i.mnemonic, "mov");
2864        assert_eq!(i.operands.len(), 2);
2865        assert_eq!(i.operands[0], Operand::Register(Register::Rax));
2866        assert_eq!(i.operands[1], Operand::Register(Register::Rbx));
2867    }
2868
2869    #[test]
2870    fn parse_add_r32() {
2871        let i = parse_instr("add eax, ecx");
2872        assert_eq!(i.mnemonic, "add");
2873        assert_eq!(i.operands[0], Operand::Register(Register::Eax));
2874        assert_eq!(i.operands[1], Operand::Register(Register::Ecx));
2875    }
2876
2877    #[test]
2878    fn parse_xor_r8() {
2879        let i = parse_instr("xor al, bl");
2880        assert_eq!(i.operands[0], Operand::Register(Register::Al));
2881        assert_eq!(i.operands[1], Operand::Register(Register::Bl));
2882    }
2883
2884    // === Register-Immediate ===
2885
2886    #[test]
2887    fn parse_mov_reg_imm() {
2888        let i = parse_instr("mov rax, 42");
2889        assert_eq!(i.operands[0], Operand::Register(Register::Rax));
2890        assert_eq!(i.operands[1], Operand::Immediate(42));
2891    }
2892
2893    #[test]
2894    fn parse_mov_reg_hex() {
2895        let i = parse_instr("mov rdi, 0xDEAD");
2896        assert_eq!(i.operands[1], Operand::Immediate(0xDEAD));
2897    }
2898
2899    #[test]
2900    fn parse_add_imm_negative() {
2901        let i = parse_instr("add rsp, -8");
2902        assert_eq!(i.operands[1], Operand::Immediate(-8));
2903    }
2904
2905    #[test]
2906    fn parse_char_immediate() {
2907        let i = parse_instr("mov al, 'A'");
2908        assert_eq!(i.operands[1], Operand::Immediate(65));
2909    }
2910
2911    // === Memory Operands ===
2912
2913    #[test]
2914    fn parse_mem_base() {
2915        let i = parse_instr("mov rax, [rbx]");
2916        assert_eq!(i.operands[0], Operand::Register(Register::Rax));
2917        match &i.operands[1] {
2918            Operand::Memory(m) => {
2919                assert_eq!(m.base, Some(Register::Rbx));
2920                assert_eq!(m.index, None);
2921                assert_eq!(m.disp, 0);
2922            }
2923            _ => panic!("expected memory operand"),
2924        }
2925    }
2926
2927    #[test]
2928    fn parse_mem_base_disp() {
2929        let i = parse_instr("mov rax, [rbp + 8]");
2930        match &i.operands[1] {
2931            Operand::Memory(m) => {
2932                assert_eq!(m.base, Some(Register::Rbp));
2933                assert_eq!(m.disp, 8);
2934            }
2935            _ => panic!("expected memory operand"),
2936        }
2937    }
2938
2939    #[test]
2940    fn parse_mem_base_neg_disp() {
2941        let i = parse_instr("mov rax, [rbp - 0x10]");
2942        match &i.operands[1] {
2943            Operand::Memory(m) => {
2944                assert_eq!(m.base, Some(Register::Rbp));
2945                assert_eq!(m.disp, -16);
2946            }
2947            _ => panic!("expected memory operand"),
2948        }
2949    }
2950
2951    #[test]
2952    fn parse_mem_base_index() {
2953        let i = parse_instr("mov rax, [rbx + rcx]");
2954        match &i.operands[1] {
2955            Operand::Memory(m) => {
2956                assert_eq!(m.base, Some(Register::Rbx));
2957                assert_eq!(m.index, Some(Register::Rcx));
2958                assert_eq!(m.scale, 1);
2959            }
2960            _ => panic!("expected memory operand"),
2961        }
2962    }
2963
2964    #[test]
2965    fn parse_mem_base_index_scale() {
2966        let i = parse_instr("lea rax, [rbx + rcx*8]");
2967        match &i.operands[1] {
2968            Operand::Memory(m) => {
2969                assert_eq!(m.base, Some(Register::Rbx));
2970                assert_eq!(m.index, Some(Register::Rcx));
2971                assert_eq!(m.scale, 8);
2972            }
2973            _ => panic!("expected memory operand"),
2974        }
2975    }
2976
2977    #[test]
2978    fn parse_mem_full() {
2979        let i = parse_instr("mov rax, [rbx + rcx*4 + 16]");
2980        match &i.operands[1] {
2981            Operand::Memory(m) => {
2982                assert_eq!(m.base, Some(Register::Rbx));
2983                assert_eq!(m.index, Some(Register::Rcx));
2984                assert_eq!(m.scale, 4);
2985                assert_eq!(m.disp, 16);
2986            }
2987            _ => panic!("expected memory operand"),
2988        }
2989    }
2990
2991    #[test]
2992    fn parse_mem_disp_only() {
2993        let i = parse_instr("mov rax, [0x1000]");
2994        match &i.operands[1] {
2995            Operand::Memory(m) => {
2996                assert_eq!(m.base, None);
2997                assert_eq!(m.disp, 0x1000);
2998            }
2999            _ => panic!("expected memory operand"),
3000        }
3001    }
3002
3003    // === Size Hints ===
3004
3005    #[test]
3006    fn parse_byte_ptr() {
3007        let i = parse_instr("mov byte ptr [rax], 0");
3008        assert_eq!(i.size_hint, Some(OperandSize::Byte));
3009        match &i.operands[0] {
3010            Operand::Memory(m) => assert_eq!(m.base, Some(Register::Rax)),
3011            _ => panic!("expected memory operand"),
3012        }
3013    }
3014
3015    #[test]
3016    fn parse_qword_no_ptr() {
3017        let i = parse_instr("mov qword [rax], 0");
3018        assert_eq!(i.size_hint, Some(OperandSize::Qword));
3019    }
3020
3021    #[test]
3022    fn parse_dword_ptr() {
3023        let i = parse_instr("add dword ptr [rbp - 4], 1");
3024        assert_eq!(i.size_hint, Some(OperandSize::Dword));
3025    }
3026
3027    // === Labels ===
3028
3029    #[test]
3030    fn parse_label_def() {
3031        let stmt = parse_one("start:");
3032        match stmt {
3033            Statement::Label(name, _) => assert_eq!(name, "start"),
3034            _ => panic!("expected label"),
3035        }
3036    }
3037
3038    #[test]
3039    fn parse_label_ref() {
3040        let i = parse_instr("jmp loop");
3041        assert_eq!(i.operands[0], Operand::Label(String::from("loop")));
3042    }
3043
3044    #[test]
3045    fn parse_call_label() {
3046        let i = parse_instr("call printf");
3047        assert_eq!(i.operands[0], Operand::Label(String::from("printf")));
3048    }
3049
3050    #[test]
3051    fn parse_label_with_offset() {
3052        let i = parse_instr("lea rax, data + 4");
3053        match &i.operands[1] {
3054            Operand::Expression(Expr::Add(l, r)) => {
3055                assert_eq!(**l, Expr::Label(String::from("data")));
3056                assert_eq!(**r, Expr::Num(4));
3057            }
3058            _ => panic!("expected expression operand"),
3059        }
3060    }
3061
3062    // === Prefixes ===
3063
3064    #[test]
3065    fn parse_lock_prefix() {
3066        let i = parse_instr("lock add [rax], 1");
3067        assert_eq!(i.prefixes, vec![Prefix::Lock]);
3068        assert_eq!(i.mnemonic, "add");
3069    }
3070
3071    #[test]
3072    fn parse_rep_prefix() {
3073        let i = parse_instr("rep movsb");
3074        assert_eq!(i.prefixes, vec![Prefix::Rep]);
3075        assert_eq!(i.mnemonic, "movsb");
3076    }
3077
3078    // === Directives ===
3079
3080    #[test]
3081    fn parse_byte_directive() {
3082        let stmt = parse_one(".byte 0x90, 0xCC");
3083        match stmt {
3084            Statement::Data(d) => {
3085                assert_eq!(d.size, DataSize::Byte);
3086                assert_eq!(
3087                    d.values,
3088                    vec![DataValue::Integer(0x90), DataValue::Integer(0xCC)]
3089                );
3090            }
3091            _ => panic!("expected data"),
3092        }
3093    }
3094
3095    #[test]
3096    fn parse_word_directive() {
3097        let stmt = parse_one(".word 0x1234");
3098        match stmt {
3099            Statement::Data(d) => {
3100                assert_eq!(d.size, DataSize::Word);
3101                assert_eq!(d.values, vec![DataValue::Integer(0x1234)]);
3102            }
3103            _ => panic!("expected data"),
3104        }
3105    }
3106
3107    #[test]
3108    fn parse_ascii_directive() {
3109        let stmt = parse_one(".ascii \"hello\"");
3110        match stmt {
3111            Statement::Data(d) => {
3112                assert_eq!(d.size, DataSize::Byte);
3113                assert_eq!(d.values, vec![DataValue::Bytes(b"hello".to_vec())]);
3114            }
3115            _ => panic!("expected data"),
3116        }
3117    }
3118
3119    #[test]
3120    fn parse_asciz_null_terminates() {
3121        let stmt = parse_one(".asciz \"ok\"");
3122        match stmt {
3123            Statement::Data(d) => {
3124                assert_eq!(d.values, vec![DataValue::Bytes(b"ok\0".to_vec())]);
3125            }
3126            _ => panic!("expected data"),
3127        }
3128    }
3129
3130    #[test]
3131    fn parse_equ_directive() {
3132        let stmt = parse_one(".equ SYS_WRITE, 1");
3133        match stmt {
3134            Statement::Const(c) => {
3135                assert_eq!(c.name, "SYS_WRITE");
3136                assert_eq!(c.value, 1);
3137            }
3138            _ => panic!("expected const"),
3139        }
3140    }
3141
3142    #[test]
3143    fn parse_align_directive() {
3144        let stmt = parse_one(".align 16");
3145        match stmt {
3146            Statement::Align(a) => {
3147                assert_eq!(a.alignment, 16);
3148                assert_eq!(a.fill, None);
3149            }
3150            _ => panic!("expected align"),
3151        }
3152    }
3153
3154    #[test]
3155    fn parse_p2align_directive() {
3156        let stmt = parse_one(".p2align 4");
3157        match stmt {
3158            Statement::Align(a) => {
3159                assert_eq!(a.alignment, 16); // 2^4 = 16
3160            }
3161            _ => panic!("expected align"),
3162        }
3163    }
3164
3165    #[test]
3166    fn parse_fill_directive() {
3167        let stmt = parse_one(".fill 10, 1, 0x90");
3168        match stmt {
3169            Statement::Fill(f) => {
3170                assert_eq!(f.count, 10);
3171                assert_eq!(f.size, 1);
3172                assert_eq!(f.value, 0x90);
3173            }
3174            _ => panic!("expected fill"),
3175        }
3176    }
3177
3178    #[test]
3179    fn parse_space_directive() {
3180        let stmt = parse_one(".space 64");
3181        match stmt {
3182            Statement::Space(s) => {
3183                assert_eq!(s.size, 64);
3184                assert_eq!(s.fill, 0);
3185            }
3186            _ => panic!("expected space"),
3187        }
3188    }
3189
3190    #[test]
3191    fn parse_org_directive() {
3192        let stmt = parse_one(".org 0x1000");
3193        match stmt {
3194            Statement::Org(o) => {
3195                assert_eq!(o.offset, 0x1000);
3196                assert_eq!(o.fill, 0x00);
3197            }
3198            _ => panic!("expected org"),
3199        }
3200    }
3201
3202    #[test]
3203    fn parse_org_with_fill() {
3204        let stmt = parse_one(".org 0x100, 0xFF");
3205        match stmt {
3206            Statement::Org(o) => {
3207                assert_eq!(o.offset, 0x100);
3208                assert_eq!(o.fill, 0xFF);
3209            }
3210            _ => panic!("expected org"),
3211        }
3212    }
3213
3214    // === Multi-statement ===
3215
3216    #[test]
3217    fn parse_multi_line() {
3218        let stmts = parse_str("nop\nret").unwrap();
3219        assert_eq!(stmts.len(), 2);
3220        match (&stmts[0], &stmts[1]) {
3221            (Statement::Instruction(i1), Statement::Instruction(i2)) => {
3222                assert_eq!(i1.mnemonic, "nop");
3223                assert_eq!(i2.mnemonic, "ret");
3224            }
3225            _ => panic!("expected two instructions"),
3226        }
3227    }
3228
3229    #[test]
3230    fn parse_label_and_instruction() {
3231        let stmts = parse_str("start:\n  mov rax, 1").unwrap();
3232        assert_eq!(stmts.len(), 2);
3233        assert!(matches!(&stmts[0], Statement::Label(name, _) if name == "start"));
3234        assert!(matches!(&stmts[1], Statement::Instruction(_)));
3235    }
3236
3237    #[test]
3238    fn parse_semicolon_separated() {
3239        let stmts = parse_str("nop; ret").unwrap();
3240        assert_eq!(stmts.len(), 2);
3241    }
3242
3243    // === Case Insensitivity ===
3244
3245    #[test]
3246    fn case_insensitive_mnemonic() {
3247        let i = parse_instr("MOV RAX, RBX");
3248        assert_eq!(i.mnemonic, "mov");
3249        assert_eq!(i.operands[0], Operand::Register(Register::Rax));
3250    }
3251
3252    #[test]
3253    fn case_insensitive_register() {
3254        let i = parse_instr("xor EAX, eax");
3255        assert_eq!(i.operands[0], Operand::Register(Register::Eax));
3256        assert_eq!(i.operands[1], Operand::Register(Register::Eax));
3257    }
3258
3259    // === Extended Registers ===
3260
3261    #[test]
3262    fn parse_extended_reg() {
3263        let i = parse_instr("mov r8, r15");
3264        assert_eq!(i.operands[0], Operand::Register(Register::R8));
3265        assert_eq!(i.operands[1], Operand::Register(Register::R15));
3266    }
3267
3268    #[test]
3269    fn parse_extended_reg_dword() {
3270        let i = parse_instr("mov r8d, r15d");
3271        assert_eq!(i.operands[0], Operand::Register(Register::R8d));
3272        assert_eq!(i.operands[1], Operand::Register(Register::R15d));
3273    }
3274
3275    // === Edge Cases ===
3276
3277    #[test]
3278    fn parse_push_pop() {
3279        let i = parse_instr("push rbp");
3280        assert_eq!(i.mnemonic, "push");
3281        assert_eq!(i.operands[0], Operand::Register(Register::Rbp));
3282    }
3283
3284    #[test]
3285    fn parse_lea() {
3286        let i = parse_instr("lea rdi, [rip + 0x10]");
3287        assert_eq!(i.mnemonic, "lea");
3288        match &i.operands[1] {
3289            Operand::Memory(m) => {
3290                assert_eq!(m.base, Some(Register::Rip));
3291                assert_eq!(m.disp, 0x10);
3292            }
3293            _ => panic!("expected memory"),
3294        }
3295    }
3296
3297    #[test]
3298    fn parse_three_operand_imul() {
3299        let i = parse_instr("imul rax, rbx, 10");
3300        assert_eq!(i.operands.len(), 3);
3301        assert_eq!(i.operands[0], Operand::Register(Register::Rax));
3302        assert_eq!(i.operands[1], Operand::Register(Register::Rbx));
3303        assert_eq!(i.operands[2], Operand::Immediate(10));
3304    }
3305
3306    #[test]
3307    fn global_directive_ignored() {
3308        let stmts = parse_str(".global main\nmov rax, 1").unwrap();
3309        // .global is ignored, should get 1 instruction
3310        assert_eq!(stmts.len(), 1);
3311    }
3312
3313    #[test]
3314    fn section_directive_ignored() {
3315        let stmts = parse_str(".section .text\nnop").unwrap();
3316        assert_eq!(stmts.len(), 1);
3317    }
3318
3319    #[test]
3320    fn empty_input() {
3321        let stmts = parse_str("").unwrap();
3322        assert!(stmts.is_empty());
3323    }
3324
3325    #[test]
3326    fn only_labels() {
3327        let stmts = parse_str("start:\nend:").unwrap();
3328        assert_eq!(stmts.len(), 2);
3329        assert!(matches!(&stmts[0], Statement::Label(n, _) if n == "start"));
3330        assert!(matches!(&stmts[1], Statement::Label(n, _) if n == "end"));
3331    }
3332
3333    #[test]
3334    fn mem_with_label() {
3335        let i = parse_instr("mov rax, [msg]");
3336        match &i.operands[1] {
3337            Operand::Memory(m) => {
3338                assert_eq!(m.base, None);
3339                assert_eq!(m.disp_label, Some(String::from("msg")));
3340            }
3341            _ => panic!("expected memory operand with label"),
3342        }
3343    }
3344
3345    #[test]
3346    fn xmm_registers() {
3347        let i = parse_instr("movaps xmm0, xmm1");
3348        assert_eq!(i.operands[0], Operand::Register(Register::Xmm0));
3349        assert_eq!(i.operands[1], Operand::Register(Register::Xmm1));
3350    }
3351
3352    #[test]
3353    fn segment_override_mem() {
3354        let i = parse_instr("mov rax, fs:[0x28]");
3355        match &i.operands[1] {
3356            Operand::Memory(m) => {
3357                assert_eq!(m.segment, Some(Register::Fs));
3358                assert_eq!(m.disp, 0x28);
3359            }
3360            _ => panic!("expected segment memory operand"),
3361        }
3362    }
3363
3364    // === name = expression syntax ===
3365
3366    #[test]
3367    fn parse_name_equals_constant() {
3368        let stmt = parse_one("EXIT = 60");
3369        match stmt {
3370            Statement::Const(c) => {
3371                assert_eq!(c.name, "EXIT");
3372                assert_eq!(c.value, 60);
3373            }
3374            _ => panic!("expected const, got {:?}", stmt),
3375        }
3376    }
3377
3378    #[test]
3379    fn parse_name_equals_hex() {
3380        let stmt = parse_one("MAGIC = 0xDEAD");
3381        match stmt {
3382            Statement::Const(c) => {
3383                assert_eq!(c.name, "MAGIC");
3384                assert_eq!(c.value, 0xDEAD);
3385            }
3386            _ => panic!("expected const"),
3387        }
3388    }
3389
3390    #[test]
3391    fn parse_name_equals_negative() {
3392        let stmt = parse_one("NEG = -1");
3393        match stmt {
3394            Statement::Const(c) => {
3395                assert_eq!(c.name, "NEG");
3396                assert_eq!(c.value, -1);
3397            }
3398            _ => panic!("expected const"),
3399        }
3400    }
3401
3402    #[test]
3403    fn parse_set_directive() {
3404        let stmt = parse_one(".set COUNT, 42");
3405        match stmt {
3406            Statement::Const(c) => {
3407                assert_eq!(c.name, "COUNT");
3408                assert_eq!(c.value, 42);
3409            }
3410            _ => panic!("expected const"),
3411        }
3412    }
3413
3414    #[test]
3415    fn name_equals_used_in_program() {
3416        let stmts = parse_str("EXIT = 60\nmov eax, EXIT").unwrap();
3417        assert_eq!(stmts.len(), 2);
3418        assert!(matches!(&stmts[0], Statement::Const(_)));
3419        assert!(matches!(&stmts[1], Statement::Instruction(_)));
3420    }
3421
3422    #[test]
3423    fn parse_const_expr_with_identifier() {
3424        // .equ SIZE, 10 then .fill SIZE, 1, 0
3425        let stmts = parse_str("SIZE = 10\n.fill SIZE, 1, 0").unwrap();
3426        assert_eq!(stmts.len(), 2);
3427        match &stmts[1] {
3428            Statement::Fill(f) => assert_eq!(f.count, 10),
3429            _ => panic!("expected Fill"),
3430        }
3431    }
3432
3433    #[test]
3434    fn parse_const_chain() {
3435        // Constants referencing earlier constants
3436        let stmts = parse_str("A = 5\nB = A + 3\n.space B, 0").unwrap();
3437        assert_eq!(stmts.len(), 3);
3438        match &stmts[2] {
3439            Statement::Space(s) => assert_eq!(s.size, 8),
3440            _ => panic!("expected Space"),
3441        }
3442    }
3443
3444    #[test]
3445    fn parse_equ_identifier_in_const_expr() {
3446        let stmts = parse_str(".equ BASE, 100\n.equ TOTAL, BASE + 50").unwrap();
3447        match &stmts[1] {
3448            Statement::Const(c) => assert_eq!(c.value, 150),
3449            _ => panic!("expected Const"),
3450        }
3451    }
3452
3453    #[test]
3454    fn parse_label_plus_identifier_expression() {
3455        // When OFFSET is a constant, label+OFFSET should produce Expression
3456        // that gets partially resolved at parse time
3457        let stmts = parse_str("OFF = 8\nmov rax, data + OFF").unwrap();
3458        match &stmts[1] {
3459            Statement::Instruction(i) => {
3460                match &i.operands[1] {
3461                    // The parser resolves OFF→8, so this becomes Expression(label+8)
3462                    Operand::Expression(Expr::Add(l, r)) => {
3463                        assert_eq!(**l, Expr::Label(String::from("data")));
3464                        assert_eq!(**r, Expr::Num(8));
3465                    }
3466                    other => panic!("expected Expression, got {:?}", other),
3467                }
3468            }
3469            _ => panic!("expected Instruction"),
3470        }
3471    }
3472
3473    #[test]
3474    fn parse_all_constants_resolve_to_immediate() {
3475        // When all parts are constants, expression should collapse to Immediate
3476        let stmts = parse_str("BASE = 100\nOFF = 8\nmov eax, BASE + OFF").unwrap();
3477        match &stmts[2] {
3478            Statement::Instruction(i) => {
3479                assert_eq!(i.operands[1], Operand::Immediate(108));
3480            }
3481            _ => panic!("expected Instruction"),
3482        }
3483    }
3484
3485    #[test]
3486    fn parse_align_with_constant() {
3487        let stmts = parse_str("ALIGN_VAL = 8\n.align ALIGN_VAL").unwrap();
3488        match &stmts[1] {
3489            Statement::Align(a) => assert_eq!(a.alignment, 8),
3490            _ => panic!("expected Align"),
3491        }
3492    }
3493
3494    #[test]
3495    fn parse_label_minus_offset() {
3496        let i = parse_instr("jmp target - 8");
3497        match &i.operands[0] {
3498            Operand::Expression(Expr::Sub(l, r)) => {
3499                assert_eq!(**l, Expr::Label(String::from("target")));
3500                assert_eq!(**r, Expr::Num(8));
3501            }
3502            _ => panic!("expected Sub expression"),
3503        }
3504    }
3505
3506    #[test]
3507    fn parse_const_negation_precedence() {
3508        // -A + B should evaluate as (-A) + B, not -(A + B)
3509        let stmts = parse_str("A = 10\nB = 3\nX = -A + B\nmov eax, X").unwrap();
3510        match &stmts[3] {
3511            Statement::Instruction(i) => {
3512                assert_eq!(
3513                    i.operands[1],
3514                    Operand::Immediate(-7),
3515                    "-10 + 3 should be -7, not -13"
3516                );
3517            }
3518            _ => panic!("expected Instruction"),
3519        }
3520    }
3521
3522    #[test]
3523    fn parse_const_negation_only() {
3524        // -A should evaluate as -10
3525        let stmts = parse_str("A = 10\nX = -A\nmov eax, X").unwrap();
3526        match &stmts[2] {
3527            Statement::Instruction(i) => {
3528                assert_eq!(i.operands[1], Operand::Immediate(-10));
3529            }
3530            _ => panic!("expected Instruction"),
3531        }
3532    }
3533
3534    #[test]
3535    fn parse_const_negation_sub_chain() {
3536        // -A - B should be (-A) - B = -10 - 3 = -13
3537        let stmts = parse_str("A = 10\nB = 3\nX = -A - B\nmov eax, X").unwrap();
3538        match &stmts[3] {
3539            Statement::Instruction(i) => {
3540                assert_eq!(
3541                    i.operands[1],
3542                    Operand::Immediate(-13),
3543                    "-10 - 3 should be -13"
3544                );
3545            }
3546            _ => panic!("expected Instruction"),
3547        }
3548    }
3549
3550    #[test]
3551    fn parse_align_rejects_non_power_of_2() {
3552        let result = crate::parser::parse_str(".align 3");
3553        assert!(result.is_err(), ".align 3 should be rejected");
3554        let err = result.unwrap_err();
3555        let msg = alloc::format!("{err:?}");
3556        assert!(
3557            msg.contains("power of 2"),
3558            "error should mention power of 2, got: {msg}"
3559        );
3560    }
3561
3562    #[test]
3563    fn parse_align_accepts_power_of_2() {
3564        // These should all succeed
3565        for val in &["1", "2", "4", "8", "16", "32", "64", "4096"] {
3566            let src = alloc::format!(".align {val}");
3567            let result = crate::parser::parse_str(&src);
3568            assert!(
3569                result.is_ok(),
3570                ".align {val} should be accepted, got: {result:?}"
3571            );
3572        }
3573    }
3574
3575    // === 8th Audit: RSP/ESP as SIB index rejection ===
3576
3577    #[test]
3578    fn parse_rsp_as_index_rejects() {
3579        // [rbx + rsp*2] should fail — RSP cannot be SIB index
3580        let result = crate::parser::parse_str("mov rax, [rbx + rsp*2]");
3581        assert!(result.is_err(), "RSP as SIB index should be rejected");
3582    }
3583
3584    #[test]
3585    fn parse_esp_as_index_rejects() {
3586        // [ebx + esp*1] should fail — ESP cannot be SIB index
3587        let result = crate::parser::parse_str("mov eax, [ebx + esp*1]");
3588        assert!(result.is_err(), "ESP as SIB index should be rejected");
3589    }
3590
3591    #[test]
3592    fn parse_r12_as_index_accepts() {
3593        // [rbx + r12*2] should succeed — R12 IS valid as SIB index (uses REX.X)
3594        let result = crate::parser::parse_str("mov rax, [rbx + r12*2]");
3595        assert!(
3596            result.is_ok(),
3597            "R12 as SIB index should be accepted, got: {result:?}"
3598        );
3599    }
3600
3601    // === AT&T / GAS Syntax ===
3602
3603    fn parse_att(src: &str) -> Vec<Statement> {
3604        let tokens = crate::lexer::tokenize(src).unwrap();
3605        parse_with_syntax(&tokens, Arch::X86_64, Syntax::Att).unwrap()
3606    }
3607
3608    fn parse_att_instr(src: &str) -> Instruction {
3609        let stmts = parse_att(src);
3610        assert_eq!(stmts.len(), 1, "expected 1 statement, got {:?}", stmts);
3611        match stmts.into_iter().next().unwrap() {
3612            Statement::Instruction(i) => i,
3613            s => panic!("expected instruction, got {s:?}"),
3614        }
3615    }
3616
3617    #[test]
3618    fn att_register_operand() {
3619        let i = parse_att_instr("nop");
3620        assert_eq!(i.mnemonic, "nop");
3621        assert!(i.operands.is_empty());
3622    }
3623
3624    #[test]
3625    fn att_mov_imm_to_reg() {
3626        let i = parse_att_instr("movq $42, %rax");
3627        assert_eq!(i.mnemonic, "mov");
3628        assert_eq!(i.size_hint, Some(OperandSize::Qword));
3629        // Operands reversed: AT&T src,dst → Intel dst,src
3630        assert_eq!(i.operands.len(), 2);
3631        assert_eq!(i.operands[0], Operand::Register(Register::Rax));
3632        assert_eq!(i.operands[1], Operand::Immediate(42));
3633    }
3634
3635    #[test]
3636    fn att_mov_reg_to_reg() {
3637        let i = parse_att_instr("movl %eax, %ecx");
3638        assert_eq!(i.mnemonic, "mov");
3639        assert_eq!(i.size_hint, Some(OperandSize::Dword));
3640        assert_eq!(i.operands[0], Operand::Register(Register::Ecx));
3641        assert_eq!(i.operands[1], Operand::Register(Register::Eax));
3642    }
3643
3644    #[test]
3645    fn att_add_imm_to_reg() {
3646        let i = parse_att_instr("addl $0x10, %eax");
3647        assert_eq!(i.mnemonic, "add");
3648        assert_eq!(i.size_hint, Some(OperandSize::Dword));
3649        assert_eq!(i.operands[0], Operand::Register(Register::Eax));
3650        assert_eq!(i.operands[1], Operand::Immediate(0x10));
3651    }
3652
3653    #[test]
3654    fn att_negative_immediate() {
3655        let i = parse_att_instr("addq $-1, %rax");
3656        assert_eq!(i.mnemonic, "add");
3657        assert_eq!(i.operands[1], Operand::Immediate(-1));
3658    }
3659
3660    #[test]
3661    fn att_byte_suffix() {
3662        let i = parse_att_instr("movb $0x41, %al");
3663        assert_eq!(i.mnemonic, "mov");
3664        assert_eq!(i.size_hint, Some(OperandSize::Byte));
3665        assert_eq!(i.operands[0], Operand::Register(Register::Al));
3666    }
3667
3668    #[test]
3669    fn att_word_suffix() {
3670        let i = parse_att_instr("movw $0x1234, %ax");
3671        assert_eq!(i.mnemonic, "mov");
3672        assert_eq!(i.size_hint, Some(OperandSize::Word));
3673        assert_eq!(i.operands[0], Operand::Register(Register::Ax));
3674    }
3675
3676    #[test]
3677    fn att_memory_base_only() {
3678        let i = parse_att_instr("movq (%rax), %rbx");
3679        assert_eq!(i.mnemonic, "mov");
3680        // After reversal: [0]=rbx (dst), [1]=(%rax) (src)
3681        assert_eq!(i.operands[0], Operand::Register(Register::Rbx));
3682        if let Operand::Memory(m) = &i.operands[1] {
3683            assert_eq!(m.base, Some(Register::Rax));
3684            assert_eq!(m.disp, 0);
3685            assert!(m.index.is_none());
3686        } else {
3687            panic!("expected memory operand");
3688        }
3689    }
3690
3691    #[test]
3692    fn att_memory_disp_base() {
3693        let i = parse_att_instr("movl 8(%rsp), %eax");
3694        assert_eq!(i.mnemonic, "mov");
3695        if let Operand::Memory(m) = &i.operands[1] {
3696            assert_eq!(m.base, Some(Register::Rsp));
3697            assert_eq!(m.disp, 8);
3698        } else {
3699            panic!("expected memory operand");
3700        }
3701    }
3702
3703    #[test]
3704    fn att_memory_negative_disp() {
3705        let i = parse_att_instr("movq -16(%rbp), %rax");
3706        if let Operand::Memory(m) = &i.operands[1] {
3707            assert_eq!(m.base, Some(Register::Rbp));
3708            assert_eq!(m.disp, -16);
3709        } else {
3710            panic!("expected memory operand");
3711        }
3712    }
3713
3714    #[test]
3715    fn att_memory_base_index() {
3716        let i = parse_att_instr("movl (%rax, %rcx), %edx");
3717        if let Operand::Memory(m) = &i.operands[1] {
3718            assert_eq!(m.base, Some(Register::Rax));
3719            assert_eq!(m.index, Some(Register::Rcx));
3720            assert_eq!(m.scale, 1);
3721        } else {
3722            panic!("expected memory operand");
3723        }
3724    }
3725
3726    #[test]
3727    fn att_memory_base_index_scale() {
3728        let i = parse_att_instr("movq (%rax, %rcx, 4), %rdx");
3729        if let Operand::Memory(m) = &i.operands[1] {
3730            assert_eq!(m.base, Some(Register::Rax));
3731            assert_eq!(m.index, Some(Register::Rcx));
3732            assert_eq!(m.scale, 4);
3733            assert_eq!(m.disp, 0);
3734        } else {
3735            panic!("expected memory operand");
3736        }
3737    }
3738
3739    #[test]
3740    fn att_memory_disp_base_index_scale() {
3741        let i = parse_att_instr("movl 16(%rbx, %rsi, 8), %eax");
3742        if let Operand::Memory(m) = &i.operands[1] {
3743            assert_eq!(m.base, Some(Register::Rbx));
3744            assert_eq!(m.index, Some(Register::Rsi));
3745            assert_eq!(m.scale, 8);
3746            assert_eq!(m.disp, 16);
3747        } else {
3748            panic!("expected memory operand");
3749        }
3750    }
3751
3752    #[test]
3753    fn att_segment_override() {
3754        let i = parse_att_instr("movq %fs:0x28(%rax), %rbx");
3755        if let Operand::Memory(m) = &i.operands[1] {
3756            assert_eq!(m.segment, Some(Register::Fs));
3757            assert_eq!(m.base, Some(Register::Rax));
3758            assert_eq!(m.disp, 0x28);
3759        } else {
3760            panic!("expected memory operand");
3761        }
3762    }
3763
3764    #[test]
3765    fn att_push_pop() {
3766        let i = parse_att_instr("pushq %rbp");
3767        assert_eq!(i.mnemonic, "push");
3768        assert_eq!(i.operands[0], Operand::Register(Register::Rbp));
3769
3770        let i2 = parse_att_instr("popq %rbp");
3771        assert_eq!(i2.mnemonic, "pop");
3772        assert_eq!(i2.operands[0], Operand::Register(Register::Rbp));
3773    }
3774
3775    #[test]
3776    fn att_xor_reg_reg() {
3777        let i = parse_att_instr("xorl %eax, %eax");
3778        assert_eq!(i.mnemonic, "xor");
3779        // After reversal: both operands are eax
3780        assert_eq!(i.operands[0], Operand::Register(Register::Eax));
3781        assert_eq!(i.operands[1], Operand::Register(Register::Eax));
3782    }
3783
3784    #[test]
3785    fn att_call_label() {
3786        let i = parse_att_instr("call func");
3787        assert_eq!(i.mnemonic, "call");
3788        assert_eq!(i.operands[0], Operand::Label(String::from("func")));
3789    }
3790
3791    #[test]
3792    fn att_jmp_label() {
3793        let i = parse_att_instr("jmp done");
3794        assert_eq!(i.mnemonic, "jmp");
3795        assert_eq!(i.operands[0], Operand::Label(String::from("done")));
3796    }
3797
3798    #[test]
3799    fn att_jcc_label() {
3800        let i = parse_att_instr("jne loop");
3801        assert_eq!(i.mnemonic, "jne");
3802        assert_eq!(i.operands[0], Operand::Label(String::from("loop")));
3803    }
3804
3805    #[test]
3806    fn att_ret() {
3807        let i = parse_att_instr("ret");
3808        assert_eq!(i.mnemonic, "ret");
3809        assert!(i.operands.is_empty());
3810    }
3811
3812    #[test]
3813    fn att_syscall() {
3814        let i = parse_att_instr("syscall");
3815        assert_eq!(i.mnemonic, "syscall");
3816    }
3817
3818    #[test]
3819    fn att_lock_prefix() {
3820        let i = parse_att_instr("lock xchgl %eax, (%rbx)");
3821        assert_eq!(i.mnemonic, "xchg");
3822        assert!(i.prefixes.contains(&Prefix::Lock));
3823    }
3824
3825    #[test]
3826    fn att_lea() {
3827        let i = parse_att_instr("leaq 8(%rsp), %rax");
3828        assert_eq!(i.mnemonic, "lea");
3829        // After reversal: [0]=rax, [1]=8(%rsp)
3830        assert_eq!(i.operands[0], Operand::Register(Register::Rax));
3831        if let Operand::Memory(m) = &i.operands[1] {
3832            assert_eq!(m.base, Some(Register::Rsp));
3833            assert_eq!(m.disp, 8);
3834        } else {
3835            panic!("expected memory operand");
3836        }
3837    }
3838
3839    #[test]
3840    fn att_imm_label_ref() {
3841        let i = parse_att_instr("movq $myvar, %rax");
3842        assert_eq!(i.mnemonic, "mov");
3843        assert_eq!(i.operands[1], Operand::Label(String::from("myvar")));
3844    }
3845
3846    #[test]
3847    fn att_no_suffix_no_size_hint() {
3848        // No suffix → no size_hint
3849        let i = parse_att_instr("nop");
3850        assert!(i.size_hint.is_none());
3851    }
3852
3853    #[test]
3854    fn att_int_not_stripped() {
3855        // "int" should not have suffix stripped to "in" + Dword
3856        let i = parse_att_instr("int $0x80");
3857        assert_eq!(i.mnemonic, "int");
3858        assert_eq!(i.operands[0], Operand::Immediate(0x80));
3859    }
3860
3861    #[test]
3862    fn att_string_ops_not_stripped() {
3863        let i = parse_att_instr("movsb");
3864        assert_eq!(i.mnemonic, "movsb");
3865        let i = parse_att_instr("stosq");
3866        assert_eq!(i.mnemonic, "stosq");
3867    }
3868
3869    #[test]
3870    fn att_rep_prefix() {
3871        let i = parse_att_instr("rep movsb");
3872        assert_eq!(i.mnemonic, "movsb");
3873        assert!(i.prefixes.contains(&Prefix::Rep));
3874    }
3875
3876    #[test]
3877    fn att_cmp_operand_order() {
3878        // AT&T: cmpl $0, %eax → Intel: cmp eax, 0
3879        let i = parse_att_instr("cmpl $0, %eax");
3880        assert_eq!(i.mnemonic, "cmp");
3881        assert_eq!(i.operands[0], Operand::Register(Register::Eax));
3882        assert_eq!(i.operands[1], Operand::Immediate(0));
3883    }
3884
3885    #[test]
3886    fn att_test_operand_order() {
3887        // AT&T: testl %eax, %eax → Intel: test eax, eax
3888        let i = parse_att_instr("testl %eax, %eax");
3889        assert_eq!(i.mnemonic, "test");
3890        assert_eq!(i.operands[0], Operand::Register(Register::Eax));
3891        assert_eq!(i.operands[1], Operand::Register(Register::Eax));
3892    }
3893
3894    #[test]
3895    fn att_sub_mem_to_reg() {
3896        let i = parse_att_instr("subq 8(%rbp), %rax");
3897        assert_eq!(i.mnemonic, "sub");
3898        assert_eq!(i.operands[0], Operand::Register(Register::Rax));
3899        if let Operand::Memory(m) = &i.operands[1] {
3900            assert_eq!(m.base, Some(Register::Rbp));
3901            assert_eq!(m.disp, 8);
3902        } else {
3903            panic!("expected memory operand");
3904        }
3905    }
3906
3907    #[test]
3908    fn att_push_immediate() {
3909        let i = parse_att_instr("pushq $42");
3910        assert_eq!(i.mnemonic, "push");
3911        assert_eq!(i.operands[0], Operand::Immediate(42));
3912    }
3913
3914    #[test]
3915    fn att_numeric_label_fwd() {
3916        let i = parse_att_instr("jmp 1f");
3917        assert_eq!(i.mnemonic, "jmp");
3918        assert_eq!(i.operands[0], Operand::Label(String::from("1f")));
3919    }
3920
3921    #[test]
3922    fn att_numeric_label_bwd() {
3923        let i = parse_att_instr("jne 1b");
3924        assert_eq!(i.mnemonic, "jne");
3925        assert_eq!(i.operands[0], Operand::Label(String::from("1b")));
3926    }
3927
3928    #[test]
3929    fn att_syntax_directive_switches_mode() {
3930        let src = ".syntax att\nmovq $1, %rax";
3931        // Start with Intel, switch to AT&T via directive
3932        let tokens = crate::lexer::tokenize(src).unwrap();
3933        let stmts = parse_with_syntax(&tokens, Arch::X86_64, Syntax::Intel).unwrap();
3934        // Should have parsed the mov in AT&T mode after .syntax att
3935        let instr = stmts
3936            .iter()
3937            .find_map(|s| {
3938                if let Statement::Instruction(i) = s {
3939                    Some(i)
3940                } else {
3941                    None
3942                }
3943            })
3944            .expect("no instruction found");
3945        assert_eq!(instr.mnemonic, "mov");
3946        // Operand order should be reversed (AT&T)
3947        assert_eq!(instr.operands[0], Operand::Register(Register::Rax));
3948        assert_eq!(instr.operands[1], Operand::Immediate(1));
3949    }
3950
3951    #[test]
3952    fn att_star_indirect_reg() {
3953        let i = parse_att_instr("jmp *%rax");
3954        assert_eq!(i.mnemonic, "jmp");
3955        assert_eq!(i.operands[0], Operand::Register(Register::Rax));
3956    }
3957
3958    #[test]
3959    fn att_star_indirect_mem() {
3960        let i = parse_att_instr("call *(%rax)");
3961        assert_eq!(i.mnemonic, "call");
3962        if let Operand::Memory(m) = &i.operands[0] {
3963            assert_eq!(m.base, Some(Register::Rax));
3964        } else {
3965            panic!("expected memory operand");
3966        }
3967    }
3968
3969    // ── Literal pool parsing ────────────────────────────────
3970
3971    fn parse_aarch64(src: &str) -> Vec<Statement> {
3972        let tokens = crate::lexer::tokenize(src).unwrap();
3973        parse_with_syntax(&tokens, Arch::Aarch64, Syntax::Ual).unwrap()
3974    }
3975
3976    #[test]
3977    fn parse_ldr_literal_pool_x_reg() {
3978        let stmts = parse_aarch64("ldr x0, =0x12345678");
3979        assert_eq!(stmts.len(), 1);
3980        if let Statement::Instruction(instr) = &stmts[0] {
3981            assert_eq!(instr.mnemonic, "ldr");
3982            assert_eq!(instr.operands.len(), 2);
3983            assert!(matches!(
3984                &instr.operands[0],
3985                Operand::Register(Register::A64X0)
3986            ));
3987            assert!(matches!(
3988                &instr.operands[1],
3989                Operand::LiteralPoolValue(0x12345678)
3990            ));
3991        } else {
3992            panic!("expected instruction");
3993        }
3994    }
3995
3996    #[test]
3997    fn parse_ldr_literal_pool_w_reg() {
3998        let stmts = parse_aarch64("ldr w5, =42");
3999        assert_eq!(stmts.len(), 1);
4000        if let Statement::Instruction(instr) = &stmts[0] {
4001            assert_eq!(instr.mnemonic, "ldr");
4002            assert!(matches!(&instr.operands[1], Operand::LiteralPoolValue(42)));
4003        } else {
4004            panic!("expected instruction");
4005        }
4006    }
4007
4008    #[test]
4009    fn parse_ldr_literal_pool_negative() {
4010        let stmts = parse_aarch64("ldr x1, =-1");
4011        if let Statement::Instruction(instr) = &stmts[0] {
4012            assert!(matches!(&instr.operands[1], Operand::LiteralPoolValue(-1)));
4013        } else {
4014            panic!("expected instruction");
4015        }
4016    }
4017
4018    #[test]
4019    fn parse_ldr_literal_pool_hex_large() {
4020        let stmts = parse_aarch64("ldr x0, =0xDEADBEEFCAFEBABE");
4021        if let Statement::Instruction(instr) = &stmts[0] {
4022            if let Operand::LiteralPoolValue(v) = &instr.operands[1] {
4023                assert_eq!(*v, 0xDEADBEEFCAFEBABEu64 as i128);
4024            } else {
4025                panic!("expected LiteralPoolValue");
4026            }
4027        } else {
4028            panic!("expected instruction");
4029        }
4030    }
4031
4032    #[test]
4033    fn parse_ltorg_directive() {
4034        let stmts = parse_aarch64("ldr x0, =1\n.ltorg");
4035        assert_eq!(stmts.len(), 2);
4036        assert!(matches!(&stmts[1], Statement::Ltorg(_)));
4037    }
4038
4039    #[test]
4040    fn parse_pool_directive() {
4041        let stmts = parse_aarch64("ldr x0, =1\n.pool");
4042        assert_eq!(stmts.len(), 2);
4043        assert!(matches!(&stmts[1], Statement::Ltorg(_)));
4044    }
4045
4046    // ── SIMD/FP register parsing ────────────────────────────
4047
4048    #[test]
4049    fn parse_simd_v_registers() {
4050        // V0–V31 should parse as AArch64 vector registers
4051        for i in 0..32 {
4052            let src = alloc::format!("fmov v{}, v0", i);
4053            let stmts = parse_aarch64(&src);
4054            assert_eq!(stmts.len(), 1, "parsing 'fmov v{}, v0' failed", i);
4055            if let Statement::Instruction(instr) = &stmts[0] {
4056                if let Operand::Register(r) = &instr.operands[0] {
4057                    assert!(r.is_a64_simd_fp(), "v{} should be SIMD/FP", i);
4058                    assert_eq!(r.a64_reg_num(), i as u8, "v{} reg num", i);
4059                    assert_eq!(r.a64_simd_fp_bits(), 128, "v{} should be 128 bits", i);
4060                } else {
4061                    panic!("expected register for v{}", i);
4062                }
4063            }
4064        }
4065    }
4066
4067    #[test]
4068    fn parse_simd_q_registers() {
4069        for i in 0..32 {
4070            let src = alloc::format!("mov q{}, q0", i);
4071            let stmts = parse_aarch64(&src);
4072            if let Statement::Instruction(instr) = &stmts[0] {
4073                if let Operand::Register(r) = &instr.operands[0] {
4074                    assert!(r.is_a64_simd_fp(), "q{} should be SIMD/FP", i);
4075                    assert_eq!(r.a64_reg_num(), i as u8);
4076                    assert_eq!(r.a64_simd_fp_bits(), 128);
4077                }
4078            }
4079        }
4080    }
4081
4082    #[test]
4083    fn parse_simd_d_registers() {
4084        for i in 0..32 {
4085            let src = alloc::format!("fmov d{}, d0", i);
4086            let stmts = parse_aarch64(&src);
4087            if let Statement::Instruction(instr) = &stmts[0] {
4088                if let Operand::Register(r) = &instr.operands[0] {
4089                    assert!(r.is_a64_simd_fp(), "d{} should be SIMD/FP", i);
4090                    assert_eq!(r.a64_reg_num(), i as u8);
4091                    assert_eq!(r.a64_simd_fp_bits(), 64);
4092                }
4093            }
4094        }
4095    }
4096
4097    #[test]
4098    fn parse_simd_s_registers() {
4099        for i in 0..32 {
4100            let src = alloc::format!("fmov s{}, s0", i);
4101            let stmts = parse_aarch64(&src);
4102            if let Statement::Instruction(instr) = &stmts[0] {
4103                if let Operand::Register(r) = &instr.operands[0] {
4104                    assert!(r.is_a64_simd_fp(), "s{} should be SIMD/FP", i);
4105                    assert_eq!(r.a64_reg_num(), i as u8);
4106                    assert_eq!(r.a64_simd_fp_bits(), 32);
4107                }
4108            }
4109        }
4110    }
4111
4112    #[test]
4113    fn parse_simd_h_registers() {
4114        for i in 0..32 {
4115            let src = alloc::format!("fmov h{}, h0", i);
4116            let stmts = parse_aarch64(&src);
4117            if let Statement::Instruction(instr) = &stmts[0] {
4118                if let Operand::Register(r) = &instr.operands[0] {
4119                    assert!(r.is_a64_simd_fp(), "h{} should be SIMD/FP", i);
4120                    assert_eq!(r.a64_reg_num(), i as u8);
4121                    assert_eq!(r.a64_simd_fp_bits(), 16);
4122                }
4123            }
4124        }
4125    }
4126
4127    #[test]
4128    fn parse_simd_b_registers() {
4129        for i in 0..32 {
4130            let src = alloc::format!("fmov b{}, b0", i);
4131            let stmts = parse_aarch64(&src);
4132            if let Statement::Instruction(instr) = &stmts[0] {
4133                if let Operand::Register(r) = &instr.operands[0] {
4134                    assert!(r.is_a64_simd_fp(), "b{} should be SIMD/FP", i);
4135                    assert_eq!(r.a64_reg_num(), i as u8);
4136                    assert_eq!(r.a64_simd_fp_bits(), 8);
4137                }
4138            }
4139        }
4140    }
4141
4142    // ── Vector arrangement specifier parsing ──────────────────────────
4143    #[test]
4144    fn parse_vector_arrangement_all_specifiers() {
4145        let cases = [
4146            ("add v0.8b, v1.8b, v2.8b", VectorArrangement::B8),
4147            ("add v0.16b, v1.16b, v2.16b", VectorArrangement::B16),
4148            ("add v0.4h, v1.4h, v2.4h", VectorArrangement::H4),
4149            ("add v0.8h, v1.8h, v2.8h", VectorArrangement::H8),
4150            ("add v0.2s, v1.2s, v2.2s", VectorArrangement::S2),
4151            ("add v0.4s, v1.4s, v2.4s", VectorArrangement::S4),
4152            ("add v0.1d, v1.1d, v2.1d", VectorArrangement::D1),
4153            ("add v0.2d, v1.2d, v2.2d", VectorArrangement::D2),
4154        ];
4155        for (src, expected_arr) in &cases {
4156            let stmts = parse_aarch64(src);
4157            if let Statement::Instruction(instr) = &stmts[0] {
4158                assert_eq!(instr.operands.len(), 3, "source: {}", src);
4159                for (j, op) in instr.operands.iter().enumerate() {
4160                    match op {
4161                        Operand::VectorRegister(_, arr) => {
4162                            assert_eq!(arr, expected_arr, "source: {}, operand {}", src, j);
4163                        }
4164                        other => panic!(
4165                            "expected VectorRegister, got {:?} for source: {}, operand {}",
4166                            other, src, j
4167                        ),
4168                    }
4169                }
4170            } else {
4171                panic!("expected instruction for source: {}", src);
4172            }
4173        }
4174    }
4175
4176    #[test]
4177    fn parse_vector_arrangement_register_numbers() {
4178        // Verify that various register numbers parse correctly with arrangement
4179        for i in [0u32, 1, 7, 15, 16, 31] {
4180            let src = alloc::format!("add v{}.4s, v0.4s, v0.4s", i);
4181            let stmts = parse_aarch64(&src);
4182            if let Statement::Instruction(instr) = &stmts[0] {
4183                match &instr.operands[0] {
4184                    Operand::VectorRegister(reg, arr) => {
4185                        assert_eq!(reg.a64_reg_num(), i as u8, "v{}.4s reg num", i);
4186                        assert_eq!(*arr, VectorArrangement::S4);
4187                        assert!(reg.is_a64_vector());
4188                    }
4189                    other => panic!("expected VectorRegister, got {:?}", other),
4190                }
4191            }
4192        }
4193    }
4194
4195    #[test]
4196    fn parse_vector_arrangement_case_insensitive() {
4197        // Arrangement specifiers should be case insensitive
4198        let cases = ["add v0.4S, v1.4S, v2.4S", "add V0.4s, V1.4s, V2.4s"];
4199        for src in &cases {
4200            let stmts = parse_aarch64(src);
4201            if let Statement::Instruction(instr) = &stmts[0] {
4202                for op in &instr.operands {
4203                    match op {
4204                        Operand::VectorRegister(_, arr) => {
4205                            assert_eq!(*arr, VectorArrangement::S4, "source: {}", src);
4206                        }
4207                        other => panic!(
4208                            "expected VectorRegister, got {:?} for source: {}",
4209                            other, src
4210                        ),
4211                    }
4212                }
4213            }
4214        }
4215    }
4216
4217    #[test]
4218    fn parse_vector_reg_without_arrangement() {
4219        // V register without arrangement specifier should parse as plain Register
4220        let stmts = parse_aarch64("mov v0, v1");
4221        if let Statement::Instruction(instr) = &stmts[0] {
4222            match &instr.operands[0] {
4223                Operand::Register(reg) => {
4224                    assert!(reg.is_a64_vector());
4225                    assert_eq!(reg.a64_reg_num(), 0);
4226                }
4227                other => panic!("expected Register, got {:?}", other),
4228            }
4229        }
4230    }
4231
4232    #[test]
4233    fn parse_vector_arrangement_display() {
4234        let stmts = parse_aarch64("add v3.2d, v4.2d, v5.2d");
4235        if let Statement::Instruction(instr) = &stmts[0] {
4236            if let Operand::VectorRegister(reg, arr) = &instr.operands[0] {
4237                assert_eq!(reg.a64_reg_num(), 3);
4238                assert_eq!(*arr, VectorArrangement::D2);
4239                let display = alloc::format!("{}", instr.operands[0]);
4240                // Display format is "{register_debug_lower}.{arrangement_display}"
4241                assert!(
4242                    display.contains("2D") || display.contains("2d"),
4243                    "Display should contain arrangement: {}",
4244                    display
4245                );
4246            } else {
4247                panic!("expected VectorRegister");
4248            }
4249        }
4250    }
4251
4252    #[test]
4253    fn parse_vector_arrangement_element_properties() {
4254        // Verify element_bits, total_bits, lane_count through parsed arrangements
4255        let cases = [
4256            ("add v0.8b, v0.8b, v0.8b", 8u32, 64u32, 8u32),
4257            ("add v0.16b, v0.16b, v0.16b", 8, 128, 16),
4258            ("add v0.4h, v0.4h, v0.4h", 16, 64, 4),
4259            ("add v0.8h, v0.8h, v0.8h", 16, 128, 8),
4260            ("add v0.2s, v0.2s, v0.2s", 32, 64, 2),
4261            ("add v0.4s, v0.4s, v0.4s", 32, 128, 4),
4262            ("add v0.1d, v0.1d, v0.1d", 64, 64, 1),
4263            ("add v0.2d, v0.2d, v0.2d", 64, 128, 2),
4264        ];
4265        for (src, elem_bits, total_bits, lanes) in &cases {
4266            let stmts = parse_aarch64(src);
4267            if let Statement::Instruction(instr) = &stmts[0] {
4268                if let Operand::VectorRegister(_, arr) = &instr.operands[0] {
4269                    assert_eq!(arr.element_bits(), *elem_bits, "{}", src);
4270                    assert_eq!(arr.total_bits(), *total_bits, "{}", src);
4271                    assert_eq!(arr.lane_count(), *lanes, "{}", src);
4272                }
4273            }
4274        }
4275    }
4276
4277    // === RISC-V Register Parsing ===
4278
4279    fn parse_rv64(src: &str) -> Vec<Statement> {
4280        let tokens = crate::lexer::tokenize(src).unwrap();
4281        parse_with_syntax(&tokens, Arch::Rv64, Syntax::RiscV).unwrap()
4282    }
4283
4284    fn parse_rv64_instr(src: &str) -> Instruction {
4285        let stmts = parse_rv64(src);
4286        assert_eq!(stmts.len(), 1, "expected 1 statement, got {}", stmts.len());
4287        match stmts.into_iter().next().unwrap() {
4288            Statement::Instruction(i) => i,
4289            s => panic!("expected instruction, got {:?}", s),
4290        }
4291    }
4292
4293    #[test]
4294    fn riscv_fp_register_hardware_names() {
4295        // Verify all hardware names f0–f31 parse correctly
4296        for i in 0u8..32 {
4297            let src = alloc::format!("fadd.d f{}, f{}, f{}", i, i, i);
4298            let instr = parse_rv64_instr(&src);
4299            assert_eq!(instr.mnemonic, "fadd.d");
4300            assert_eq!(instr.operands.len(), 3);
4301            for op in &instr.operands {
4302                if let Operand::Register(reg) = op {
4303                    assert!(reg.is_riscv_fp(), "f{} should be FP register", i);
4304                    assert_eq!(reg.rv_fp_reg_num(), i, "f{} should map to reg {}", i, i);
4305                } else {
4306                    panic!("expected register operand for f{}", i);
4307                }
4308            }
4309        }
4310    }
4311
4312    #[test]
4313    fn riscv_fp_register_abi_ft() {
4314        // ft0–ft7 → f0–f7, ft8–ft11 → f28–f31
4315        let mapping: &[(&str, u8)] = &[
4316            ("ft0", 0),
4317            ("ft1", 1),
4318            ("ft2", 2),
4319            ("ft3", 3),
4320            ("ft4", 4),
4321            ("ft5", 5),
4322            ("ft6", 6),
4323            ("ft7", 7),
4324            ("ft8", 28),
4325            ("ft9", 29),
4326            ("ft10", 30),
4327            ("ft11", 31),
4328        ];
4329        for &(name, expected_num) in mapping {
4330            let src = alloc::format!("fmv.d {}, {}", name, name);
4331            let instr = parse_rv64_instr(&src);
4332            if let Operand::Register(reg) = &instr.operands[0] {
4333                assert!(reg.is_riscv_fp(), "{} should be FP", name);
4334                assert_eq!(
4335                    reg.rv_fp_reg_num(),
4336                    expected_num,
4337                    "{} → f{}",
4338                    name,
4339                    expected_num
4340                );
4341            } else {
4342                panic!("expected register for {}", name);
4343            }
4344        }
4345    }
4346
4347    #[test]
4348    fn riscv_fp_register_abi_fs() {
4349        // fs0–fs1 → f8–f9, fs2–fs11 → f18–f27
4350        let mapping: &[(&str, u8)] = &[
4351            ("fs0", 8),
4352            ("fs1", 9),
4353            ("fs2", 18),
4354            ("fs3", 19),
4355            ("fs4", 20),
4356            ("fs5", 21),
4357            ("fs6", 22),
4358            ("fs7", 23),
4359            ("fs8", 24),
4360            ("fs9", 25),
4361            ("fs10", 26),
4362            ("fs11", 27),
4363        ];
4364        for &(name, expected_num) in mapping {
4365            let src = alloc::format!("fmv.d {}, {}", name, name);
4366            let instr = parse_rv64_instr(&src);
4367            if let Operand::Register(reg) = &instr.operands[0] {
4368                assert!(reg.is_riscv_fp(), "{} should be FP", name);
4369                assert_eq!(
4370                    reg.rv_fp_reg_num(),
4371                    expected_num,
4372                    "{} → f{}",
4373                    name,
4374                    expected_num
4375                );
4376            } else {
4377                panic!("expected register for {}", name);
4378            }
4379        }
4380    }
4381
4382    #[test]
4383    fn riscv_fp_register_abi_fa() {
4384        // fa0–fa7 → f10–f17
4385        let mapping: &[(&str, u8)] = &[
4386            ("fa0", 10),
4387            ("fa1", 11),
4388            ("fa2", 12),
4389            ("fa3", 13),
4390            ("fa4", 14),
4391            ("fa5", 15),
4392            ("fa6", 16),
4393            ("fa7", 17),
4394        ];
4395        for &(name, expected_num) in mapping {
4396            let src = alloc::format!("fmv.d {}, {}", name, name);
4397            let instr = parse_rv64_instr(&src);
4398            if let Operand::Register(reg) = &instr.operands[0] {
4399                assert!(reg.is_riscv_fp(), "{} should be FP", name);
4400                assert_eq!(
4401                    reg.rv_fp_reg_num(),
4402                    expected_num,
4403                    "{} → f{}",
4404                    name,
4405                    expected_num
4406                );
4407            } else {
4408                panic!("expected register for {}", name);
4409            }
4410        }
4411    }
4412
4413    #[test]
4414    fn riscv_fp_mixed_with_integer_regs() {
4415        // FP load: flw ft0, 0(sp)  — FP dest, integer base
4416        let instr = parse_rv64_instr("flw ft0, 0(sp)");
4417        assert_eq!(instr.mnemonic, "flw");
4418        if let Operand::Register(reg) = &instr.operands[0] {
4419            assert!(reg.is_riscv_fp());
4420            assert_eq!(reg.rv_fp_reg_num(), 0);
4421        } else {
4422            panic!("expected FP register");
4423        }
4424    }
4425
4426    #[test]
4427    fn riscv_fp_not_integer() {
4428        // FP registers should not report as integer RISC-V registers
4429        let instr = parse_rv64_instr("fadd.d fa0, fa1, fa2");
4430        for op in &instr.operands {
4431            if let Operand::Register(reg) = op {
4432                assert!(reg.is_riscv_fp());
4433                assert!(!reg.is_riscv());
4434            }
4435        }
4436    }
4437
4438    #[test]
4439    fn riscv_integer_not_fp() {
4440        // Integer registers should not report as FP
4441        let instr = parse_rv64_instr("add a0, a1, a2");
4442        for op in &instr.operands {
4443            if let Operand::Register(reg) = op {
4444                assert!(reg.is_riscv());
4445                assert!(!reg.is_riscv_fp());
4446            }
4447        }
4448    }
4449}