1use alloc::boxed::Box;
9use alloc::collections::BTreeMap;
10use alloc::string::String;
11use alloc::string::ToString;
12use alloc::vec;
13use alloc::vec::Vec;
14
15use crate::error::{AsmError, Span};
16use crate::ir::*;
17use crate::lexer::{Token, TokenKind};
18
19#[inline]
22fn to_lower_buf<'b>(s: &str, buf: &'b mut [u8]) -> &'b str {
23 let len = s.len().min(buf.len());
24 buf[..len].copy_from_slice(&s.as_bytes()[..len]);
25 buf[..len].make_ascii_lowercase();
26 core::str::from_utf8(&buf[..len]).unwrap_or("")
29}
30
31pub fn parse(tokens: &[Token<'_>]) -> Result<Vec<Statement>, AsmError> {
38 parse_with_arch(tokens, Arch::X86_64)
39}
40
41pub fn parse_with_arch(tokens: &[Token<'_>], arch: Arch) -> Result<Vec<Statement>, AsmError> {
43 parse_with_syntax(tokens, arch, Syntax::Intel)
44}
45
46pub fn parse_with_syntax(
48 tokens: &[Token<'_>],
49 arch: Arch,
50 syntax: Syntax,
51) -> Result<Vec<Statement>, AsmError> {
52 let mut parser = Parser::new(tokens, arch, syntax);
53 parser.parse_program()
54}
55
56struct Parser<'a> {
57 tokens: &'a [Token<'a>],
58 pos: usize,
59 arch: Arch,
61 syntax: Syntax,
63 constants: BTreeMap<String, i128>,
67}
68
69impl<'a> Parser<'a> {
70 fn new(tokens: &'a [Token<'a>], arch: Arch, syntax: Syntax) -> Self {
71 Self {
72 tokens,
73 pos: 0,
74 arch,
75 syntax,
76 constants: BTreeMap::new(),
77 }
78 }
79
80 #[inline]
81 fn peek(&self) -> &Token<'a> {
82 &self.tokens[self.pos.min(self.tokens.len() - 1)]
83 }
84
85 #[inline]
86 fn advance(&mut self) -> &Token<'a> {
87 let tok = &self.tokens[self.pos.min(self.tokens.len() - 1)];
88 if self.pos < self.tokens.len() {
89 self.pos += 1;
90 }
91 tok
92 }
93
94 #[inline]
95 fn at_end(&self) -> bool {
96 self.pos >= self.tokens.len() || self.peek().kind == TokenKind::Eof
97 }
98
99 fn expect_ident(&mut self) -> Result<(String, Span), AsmError> {
100 let tok = self.advance();
101 if tok.kind == TokenKind::Ident {
102 Ok((tok.text.to_string(), tok.span))
103 } else {
104 Err(AsmError::Syntax {
105 msg: alloc::format!("expected identifier, found '{}'", tok.text),
106 span: tok.span,
107 })
108 }
109 }
110
111 #[inline]
112 fn skip_newlines(&mut self) {
113 while !self.at_end() && self.peek().kind == TokenKind::Newline {
114 self.advance();
115 }
116 }
117
118 fn parse_program(&mut self) -> Result<Vec<Statement>, AsmError> {
119 let mut stmts = Vec::with_capacity(self.tokens.len() / 3 + 1);
121 self.skip_newlines();
122 while !self.at_end() {
123 if let Some(stmt) = self.parse_statement()? {
124 stmts.push(stmt);
125 }
126 self.skip_newlines();
127 }
128 Ok(stmts)
129 }
130
131 fn parse_statement(&mut self) -> Result<Option<Statement>, AsmError> {
132 let tok = self.peek().clone();
133
134 match &tok.kind {
135 TokenKind::Eof => Ok(None),
136 TokenKind::Newline => {
137 self.advance();
138 Ok(None)
139 }
140
141 TokenKind::LabelDef => {
143 self.advance();
144 Ok(Some(Statement::Label(tok.text.to_string(), tok.span)))
145 }
146
147 TokenKind::NumericLabelDef(n) => {
149 self.advance();
150 let name = alloc::format!("{}", n);
151 Ok(Some(Statement::Label(name, tok.span)))
152 }
153
154 TokenKind::Directive => self.parse_directive(),
156
157 TokenKind::Ident => self.parse_instruction_or_prefix(),
159
160 _ => Err(AsmError::Syntax {
161 msg: alloc::format!("unexpected token '{}'", tok.text),
162 span: tok.span,
163 }),
164 }
165 }
166
167 fn parse_directive(&mut self) -> Result<Option<Statement>, AsmError> {
168 let tok = self.advance().clone();
169 let mut dir_buf = [0u8; 32];
170 let dir = to_lower_buf(&tok.text, &mut dir_buf);
171 let span = tok.span;
172
173 match dir {
174 ".byte" | ".db" => self.parse_data_directive(DataSize::Byte, span),
176 ".word" | ".dw" | ".short" => self.parse_data_directive(DataSize::Word, span),
177 ".long" | ".dd" | ".int" => self.parse_data_directive(DataSize::Long, span),
178 ".quad" | ".dq" => self.parse_data_directive(DataSize::Quad, span),
179
180 ".ascii" => self.parse_string_directive(false, span),
182 ".asciz" | ".string" => self.parse_string_directive(true, span),
183
184 ".equ" | ".set" => self.parse_equ_directive(span),
186
187 ".align" | ".balign" | ".p2align" => {
189 let is_p2 = dir == ".p2align";
190 self.parse_align_directive(is_p2, span)
191 }
192
193 ".fill" => self.parse_fill_directive(span),
195
196 ".space" | ".skip" => self.parse_space_directive(span),
198
199 ".org" => self.parse_org_directive(span),
201
202 ".global" | ".globl" | ".extern" => {
204 if !self.at_end() && self.peek().kind == TokenKind::Ident {
206 self.advance();
207 }
208 Ok(None)
209 }
210
211 ".text" | ".data" | ".bss" | ".rodata" | ".section" => {
213 while !self.at_end()
215 && self.peek().kind != TokenKind::Newline
216 && self.peek().kind != TokenKind::Eof
217 {
218 self.advance();
219 }
220 Ok(None)
221 }
222
223 ".code16" => Ok(Some(Statement::CodeMode(crate::ir::X86Mode::Mode16, span))),
225 ".code32" => Ok(Some(Statement::CodeMode(crate::ir::X86Mode::Mode32, span))),
226 ".code64" => Ok(Some(Statement::CodeMode(crate::ir::X86Mode::Mode64, span))),
227
228 ".ltorg" | ".pool" => Ok(Some(Statement::Ltorg(span))),
230
231 ".thumb" => Ok(Some(Statement::ThumbMode(true, span))),
233 ".arm" => Ok(Some(Statement::ThumbMode(false, span))),
234 ".thumb_func" => Ok(Some(Statement::ThumbFunc(span))),
235
236 ".syntax" => {
238 let next = self.peek().clone();
239 if next.kind == TokenKind::Ident {
240 self.advance();
241 if next.text.eq_ignore_ascii_case("att") {
242 self.syntax = Syntax::Att;
243 Ok(None)
244 } else if next.text.eq_ignore_ascii_case("intel") {
245 self.syntax = Syntax::Intel;
246 Ok(None)
247 } else {
248 Err(AsmError::Syntax {
249 msg: alloc::format!(
250 "unknown syntax '{}' (expected 'att' or 'intel')",
251 next.text
252 ),
253 span: next.span,
254 })
255 }
256 } else {
257 Err(AsmError::Syntax {
258 msg: String::from("expected 'att' or 'intel' after .syntax"),
259 span: next.span,
260 })
261 }
262 }
263
264 ".option" => {
266 let next = self.peek().clone();
267 if next.kind == TokenKind::Ident {
268 self.advance();
269 if next.text.eq_ignore_ascii_case("rvc") {
270 Ok(Some(Statement::OptionRvc(true, span)))
271 } else if next.text.eq_ignore_ascii_case("norvc") {
272 Ok(Some(Statement::OptionRvc(false, span)))
273 } else {
274 Err(AsmError::Syntax {
275 msg: alloc::format!(
276 "unknown option '{}' (expected 'rvc' or 'norvc')",
277 next.text
278 ),
279 span: next.span,
280 })
281 }
282 } else {
283 Err(AsmError::Syntax {
284 msg: String::from("expected 'rvc' or 'norvc' after .option"),
285 span: next.span,
286 })
287 }
288 }
289
290 _ => Err(AsmError::Syntax {
291 msg: alloc::format!("unknown directive '{}'", dir),
292 span,
293 }),
294 }
295 }
296
297 fn parse_data_directive(
298 &mut self,
299 size: DataSize,
300 span: Span,
301 ) -> Result<Option<Statement>, AsmError> {
302 let mut values = Vec::new();
303 loop {
304 let val = self.parse_data_value()?;
305 values.push(val);
306 if self.peek().kind == TokenKind::Comma {
307 self.advance();
308 } else {
309 break;
310 }
311 }
312 Ok(Some(Statement::Data(DataDecl { size, values, span })))
313 }
314
315 fn parse_data_value(&mut self) -> Result<DataValue, AsmError> {
316 let tok = self.peek().clone();
317 match &tok.kind {
318 TokenKind::Number(n) => {
319 self.advance();
320 Ok(DataValue::Integer(*n))
321 }
322 TokenKind::CharLit(ch) => {
323 self.advance();
324 Ok(DataValue::Integer(*ch as i128))
325 }
326 TokenKind::Ident => {
327 self.advance();
328 let label = tok.text.to_string();
329 let addend = if self.peek().kind == TokenKind::Plus {
331 self.advance();
332 let n = self.parse_const_expr()?;
333 n as i64
334 } else if self.peek().kind == TokenKind::Minus {
335 self.advance();
336 let n = self.parse_const_expr()?;
337 -(n as i64)
338 } else {
339 0
340 };
341 Ok(DataValue::Label(label, addend))
342 }
343 TokenKind::Minus => {
344 self.advance();
345 let next = self.peek().clone();
346 if let TokenKind::Number(n) = next.kind {
347 self.advance();
348 Ok(DataValue::Integer(-n))
349 } else {
350 Err(AsmError::Syntax {
351 msg: String::from("expected number after '-'"),
352 span: tok.span,
353 })
354 }
355 }
356 _ => Err(AsmError::Syntax {
357 msg: alloc::format!("expected data value, found '{}'", tok.text),
358 span: tok.span,
359 }),
360 }
361 }
362
363 fn parse_string_directive(
364 &mut self,
365 null_terminate: bool,
366 span: Span,
367 ) -> Result<Option<Statement>, AsmError> {
368 let tok = self.advance().clone();
369 if tok.kind != TokenKind::StringLit {
370 return Err(AsmError::Syntax {
371 msg: String::from("expected string literal"),
372 span: tok.span,
373 });
374 }
375 let mut bytes: Vec<u8> = tok.text.as_bytes().to_vec();
376 if null_terminate {
377 bytes.push(0);
378 }
379 Ok(Some(Statement::Data(DataDecl {
380 size: DataSize::Byte,
381 values: vec![DataValue::Bytes(bytes)],
382 span,
383 })))
384 }
385
386 fn parse_equ_directive(&mut self, span: Span) -> Result<Option<Statement>, AsmError> {
387 let (name, _) = self.expect_ident()?;
388 if self.peek().kind == TokenKind::Comma {
390 self.advance();
391 }
392 let value = self.parse_const_expr()?;
393 self.constants.insert(name.clone(), value);
394 Ok(Some(Statement::Const(ConstDef { name, value, span })))
395 }
396
397 fn parse_align_directive(
398 &mut self,
399 is_p2: bool,
400 span: Span,
401 ) -> Result<Option<Statement>, AsmError> {
402 let raw = self.parse_const_expr()? as u32;
403 let alignment = if is_p2 { 1u32 << raw } else { raw };
404
405 if alignment > 1 && !alignment.is_power_of_two() {
407 return Err(AsmError::Syntax {
408 msg: alloc::format!("alignment must be a power of 2, got {alignment}"),
409 span,
410 });
411 }
412
413 let fill = if self.peek().kind == TokenKind::Comma {
414 self.advance();
415 if matches!(
417 self.peek().kind,
418 TokenKind::Number(_) | TokenKind::Minus | TokenKind::Ident
419 ) {
420 Some(self.parse_const_expr()? as u8)
421 } else {
422 None
423 }
424 } else {
425 None
426 };
427
428 let max_skip = if self.peek().kind == TokenKind::Comma {
429 self.advance();
430 if matches!(
431 self.peek().kind,
432 TokenKind::Number(_) | TokenKind::Minus | TokenKind::Ident
433 ) {
434 Some(self.parse_const_expr()? as u32)
435 } else {
436 None
437 }
438 } else {
439 None
440 };
441
442 Ok(Some(Statement::Align(AlignDirective {
443 alignment,
444 fill,
445 max_skip,
446 span,
447 })))
448 }
449
450 fn parse_fill_directive(&mut self, span: Span) -> Result<Option<Statement>, AsmError> {
451 let count = self.parse_const_expr()? as u32;
452 let mut size = 1u8;
453 let mut value = 0i64;
454 if self.peek().kind == TokenKind::Comma {
455 self.advance();
456 size = self.parse_const_expr()? as u8;
457 if self.peek().kind == TokenKind::Comma {
458 self.advance();
459 value = self.parse_const_expr()? as i64;
460 }
461 }
462 Ok(Some(Statement::Fill(FillDirective {
463 count,
464 size,
465 value,
466 span,
467 })))
468 }
469
470 fn parse_space_directive(&mut self, span: Span) -> Result<Option<Statement>, AsmError> {
471 let size = self.parse_const_expr()? as u32;
472 let fill = if self.peek().kind == TokenKind::Comma {
473 self.advance();
474 self.parse_const_expr()? as u8
475 } else {
476 0
477 };
478 Ok(Some(Statement::Space(SpaceDirective { size, fill, span })))
479 }
480
481 fn parse_org_directive(&mut self, span: Span) -> Result<Option<Statement>, AsmError> {
482 let offset = self.parse_const_expr()? as u64;
483 let fill = if self.peek().kind == TokenKind::Comma {
485 self.advance(); self.parse_const_expr()? as u8
487 } else {
488 0x00
489 };
490 Ok(Some(Statement::Org(OrgDirective { offset, fill, span })))
491 }
492
493 fn parse_const_expr(&mut self) -> Result<i128, AsmError> {
505 self.const_expr_or()
506 }
507
508 fn const_expr_or(&mut self) -> Result<i128, AsmError> {
512 let mut val = self.const_expr_xor()?;
513 while self.peek().kind == TokenKind::Pipe {
514 self.advance();
515 val |= self.const_expr_xor()?;
516 }
517 Ok(val)
518 }
519
520 fn const_expr_xor(&mut self) -> Result<i128, AsmError> {
522 let mut val = self.const_expr_and()?;
523 while self.peek().kind == TokenKind::Caret {
524 self.advance();
525 val ^= self.const_expr_and()?;
526 }
527 Ok(val)
528 }
529
530 fn const_expr_and(&mut self) -> Result<i128, AsmError> {
532 let mut val = self.const_expr_shift()?;
533 while self.peek().kind == TokenKind::Ampersand {
534 self.advance();
535 val &= self.const_expr_shift()?;
536 }
537 Ok(val)
538 }
539
540 fn const_expr_shift(&mut self) -> Result<i128, AsmError> {
542 let mut val = self.const_expr_add()?;
543 loop {
544 match self.peek().kind {
545 TokenKind::LShift => {
546 self.advance();
547 let rhs = self.const_expr_add()?;
548 val = val.wrapping_shl(rhs as u32);
549 }
550 TokenKind::RShift => {
551 self.advance();
552 let rhs = self.const_expr_add()?;
553 val = val.wrapping_shr(rhs as u32);
554 }
555 _ => break,
556 }
557 }
558 Ok(val)
559 }
560
561 fn const_expr_add(&mut self) -> Result<i128, AsmError> {
563 let mut val = self.const_expr_mul()?;
564 loop {
565 match self.peek().kind {
566 TokenKind::Plus => {
567 self.advance();
568 val = val.wrapping_add(self.const_expr_mul()?);
569 }
570 TokenKind::Minus => {
571 self.advance();
572 val = val.wrapping_sub(self.const_expr_mul()?);
573 }
574 _ => break,
575 }
576 }
577 Ok(val)
578 }
579
580 fn const_expr_mul(&mut self) -> Result<i128, AsmError> {
582 let mut val = self.const_expr_unary()?;
583 loop {
584 match self.peek().kind {
585 TokenKind::Star => {
586 self.advance();
587 val = val.wrapping_mul(self.const_expr_unary()?);
588 }
589 TokenKind::Slash => {
590 let span = self.peek().span;
591 self.advance();
592 let rhs = self.const_expr_unary()?;
593 if rhs == 0 {
594 return Err(AsmError::Syntax {
595 msg: String::from("division by zero in constant expression"),
596 span,
597 });
598 }
599 val /= rhs;
600 }
601 TokenKind::Percent => {
602 let span = self.peek().span;
603 self.advance();
604 let rhs = self.const_expr_unary()?;
605 if rhs == 0 {
606 return Err(AsmError::Syntax {
607 msg: String::from("modulo by zero in constant expression"),
608 span,
609 });
610 }
611 val %= rhs;
612 }
613 _ => break,
614 }
615 }
616 Ok(val)
617 }
618
619 fn const_expr_unary(&mut self) -> Result<i128, AsmError> {
621 match self.peek().kind {
622 TokenKind::Minus => {
623 self.advance();
624 Ok(-self.const_expr_unary()?)
625 }
626 TokenKind::Tilde => {
627 self.advance();
628 Ok(!self.const_expr_unary()?)
629 }
630 _ => self.const_expr_atom(),
631 }
632 }
633
634 fn const_expr_atom(&mut self) -> Result<i128, AsmError> {
636 let tok = self.peek().clone();
637 match &tok.kind {
638 TokenKind::Number(n) => {
639 self.advance();
640 Ok(*n)
641 }
642 TokenKind::Ident => {
643 if let Some(&val) = self.constants.get(&*tok.text) {
644 self.advance();
645 Ok(val)
646 } else {
647 Err(AsmError::Syntax {
648 msg: alloc::format!(
649 "expected constant expression, found undefined identifier '{}'",
650 tok.text
651 ),
652 span: tok.span,
653 })
654 }
655 }
656 TokenKind::OpenParen => {
657 self.advance(); let val = self.parse_const_expr()?;
659 if self.peek().kind != TokenKind::CloseParen {
660 return Err(AsmError::Syntax {
661 msg: String::from("expected ')' in constant expression"),
662 span: self.peek().span,
663 });
664 }
665 self.advance(); Ok(val)
667 }
668 _ => Err(AsmError::Syntax {
669 msg: alloc::format!("expected constant expression, found '{}'", tok.text),
670 span: tok.span,
671 }),
672 }
673 }
674
675 fn parse_instruction_or_prefix(&mut self) -> Result<Option<Statement>, AsmError> {
676 let tok = self.peek().clone();
677
678 if self.pos + 1 < self.tokens.len() && self.tokens[self.pos + 1].kind == TokenKind::Equals {
680 let name = tok.text.to_string();
681 let span = tok.span;
682 self.advance(); self.advance(); let value = self.parse_const_expr()?;
685 self.constants.insert(name.clone(), value);
686 return Ok(Some(Statement::Const(ConstDef { name, value, span })));
687 }
688
689 let mut prefixes = PrefixList::new();
693 let mut mnemonic_pos = self.pos;
694 let mut current_span = tok.span;
695
696 loop {
697 let prefix = {
698 let text = &*self.tokens[mnemonic_pos].text;
699 if text.eq_ignore_ascii_case("lock") {
700 Some(Prefix::Lock)
701 } else if text.eq_ignore_ascii_case("rep")
702 || text.eq_ignore_ascii_case("repe")
703 || text.eq_ignore_ascii_case("repz")
704 {
705 Some(Prefix::Rep)
706 } else if text.eq_ignore_ascii_case("repne") || text.eq_ignore_ascii_case("repnz") {
707 Some(Prefix::Repne)
708 } else {
709 None
710 }
711 };
712 match prefix {
713 Some(p) => {
714 prefixes.push(p);
715 self.advance();
716 }
717 None => break,
718 }
719 if self.at_end() || self.peek().kind != TokenKind::Ident {
720 return Ok(Some(Statement::Instruction(Instruction {
722 mnemonic: {
723 let mut lbuf = [0u8; 32];
724 Mnemonic::from(to_lower_buf(&self.tokens[mnemonic_pos].text, &mut lbuf))
725 },
726 operands: OperandList::new(),
727 size_hint: None,
728 prefixes,
729 opmask: None,
730 zeroing: false,
731 broadcast: None,
732 span: current_span,
733 })));
734 }
735 mnemonic_pos = self.pos;
736 current_span = self.tokens[mnemonic_pos].span;
737 }
738
739 let mut mnemonic = {
741 let mut lbuf = [0u8; 32];
742 Mnemonic::from(to_lower_buf(&self.tokens[mnemonic_pos].text, &mut lbuf))
743 };
744 let mnemonic_span = current_span;
745 self.advance(); let mut operands = OperandList::new();
749 let mut size_hint = None;
750 let mut opmask: Option<Register> = None;
751 let mut zeroing = false;
752 let mut broadcast: Option<BroadcastMode> = None;
753
754 if self.syntax == Syntax::Att {
756 if let Some((base, sz)) = strip_att_suffix(&mnemonic) {
757 mnemonic = base;
758 size_hint = Some(sz);
759 }
760 }
761
762 if !self.at_end() && !self.is_statement_end() {
763 let (op, hint) = self.parse_operand()?;
764 let op = if self.peek().kind == TokenKind::Bang {
766 if let Operand::Register(r) = &op {
767 if r.is_arm() || r.is_aarch64() {
768 self.advance(); Operand::Memory(Box::new(MemoryOperand {
770 base: Some(*r),
771 index: None,
772 scale: 1,
773 disp: 0,
774 disp_label: None,
775 segment: None,
776 size: None,
777 addr_mode: AddrMode::PreIndex,
778 index_subtract: false,
779 }))
780 } else {
781 op
782 }
783 } else {
784 op
785 }
786 } else {
787 op
788 };
789 operands.push(op);
790 if hint.is_some() && size_hint.is_none() {
791 size_hint = hint;
792 }
793
794 if self.arch == Arch::X86_64 || self.arch == Arch::X86 {
796 self.parse_evex_decorators(&mut opmask, &mut zeroing, &mut broadcast)?;
797 }
798
799 while self.peek().kind == TokenKind::Comma {
800 self.advance(); let (op, hint) = self.parse_operand()?;
802 operands.push(op);
803 if hint.is_some() && size_hint.is_none() {
804 size_hint = hint;
805 }
806 if self.arch == Arch::X86_64 || self.arch == Arch::X86 {
808 self.parse_evex_decorators(&mut opmask, &mut zeroing, &mut broadcast)?;
809 }
810 }
811 }
812
813 if self.syntax == Syntax::Att && operands.len() >= 2 {
817 operands.reverse();
818 }
819
820 Ok(Some(Statement::Instruction(Instruction {
821 mnemonic,
822 operands,
823 size_hint,
824 prefixes,
825 opmask,
826 zeroing,
827 broadcast,
828 span: mnemonic_span,
829 })))
830 }
831
832 fn is_statement_end(&self) -> bool {
833 matches!(self.peek().kind, TokenKind::Newline | TokenKind::Eof)
834 }
835
836 fn parse_expr_atom(&mut self, ctx_tok: &Token<'a>) -> Result<Expr, AsmError> {
840 let next = self.peek().clone();
841 match &next.kind {
842 TokenKind::Number(n) => {
843 self.advance();
844 Ok(Expr::Num(*n))
845 }
846 TokenKind::Ident => {
847 self.advance();
848 if let Some(&val) = self.constants.get(&*next.text) {
850 Ok(Expr::Num(val))
851 } else {
852 Ok(Expr::Label(next.text.to_string()))
853 }
854 }
855 _ => Err(AsmError::Syntax {
856 msg: alloc::format!(
857 "expected number or identifier after '+'/'-' near '{}'",
858 ctx_tok.text
859 ),
860 span: next.span,
861 }),
862 }
863 }
864
865 fn parse_evex_decorators(
870 &mut self,
871 opmask: &mut Option<Register>,
872 zeroing: &mut bool,
873 broadcast: &mut Option<BroadcastMode>,
874 ) -> Result<(), AsmError> {
875 while self.peek().kind == TokenKind::OpenBrace {
876 let brace_span = self.peek().span;
877 self.advance(); let tok = self.peek().clone();
879 match tok.kind {
880 TokenKind::Ident => {
881 let mut lbuf = [0u8; 32];
882 let lower = to_lower_buf(&tok.text, &mut lbuf);
883 if lower == "z" {
884 *zeroing = true;
885 self.advance();
886 } else if let Some(kreg) = parse_register_lower(lower, Arch::X86_64) {
887 if kreg.is_opmask() {
888 *opmask = Some(kreg);
889 self.advance();
890 } else {
891 return Err(AsmError::Syntax {
892 msg: String::from("expected opmask register k0-k7"),
893 span: tok.span,
894 });
895 }
896 } else {
897 return Err(AsmError::Syntax {
898 msg: String::from("unexpected identifier in AVX-512 decorator"),
899 span: tok.span,
900 });
901 }
902 }
903 TokenKind::Number(_) => {
904 if tok.text == "1" {
906 self.advance(); let next = self.peek().clone();
909 if next.kind == TokenKind::Ident {
910 let mut lbuf = [0u8; 32];
911 let nlower = to_lower_buf(&next.text, &mut lbuf);
912 let mode = match nlower {
913 "to2" => Some(BroadcastMode::OneToTwo),
914 "to4" => Some(BroadcastMode::OneToFour),
915 "to8" => Some(BroadcastMode::OneToEight),
916 "to16" => Some(BroadcastMode::OneToSixteen),
917 _ => None,
918 };
919 if let Some(m) = mode {
920 *broadcast = Some(m);
921 self.advance();
922 } else {
923 return Err(AsmError::Syntax {
924 msg: String::from("expected 1to2, 1to4, 1to8, or 1to16"),
925 span: next.span,
926 });
927 }
928 } else {
929 return Err(AsmError::Syntax {
930 msg: String::from("expected broadcast specifier (1to2/4/8/16)"),
931 span: next.span,
932 });
933 }
934 } else {
935 return Err(AsmError::Syntax {
936 msg: String::from("unexpected number in AVX-512 decorator"),
937 span: tok.span,
938 });
939 }
940 }
941 _ => {
942 return Err(AsmError::Syntax {
946 msg: String::from("unexpected token in AVX-512 decorator"),
947 span: brace_span,
948 });
949 }
950 }
951 if self.peek().kind == TokenKind::CloseBrace {
953 self.advance();
954 } else {
955 return Err(AsmError::Syntax {
956 msg: String::from("expected '}' after AVX-512 decorator"),
957 span: self.peek().span,
958 });
959 }
960 }
961 Ok(())
962 }
963
964 fn parse_operand(&mut self) -> Result<(Operand, Option<OperandSize>), AsmError> {
965 let tok = self.peek().clone();
966
967 if tok.kind == TokenKind::Ident {
969 let mut lbuf = [0u8; 32];
970 let lower = to_lower_buf(&tok.text, &mut lbuf);
971 if let Some(sz) = self.try_parse_size_hint(lower) {
972 if self.peek().kind == TokenKind::Ident
974 && self.peek().text.eq_ignore_ascii_case("ptr")
975 {
976 self.advance(); }
978 let (op, _) = self.parse_operand_inner()?;
979 return Ok((op, Some(sz)));
980 }
981 }
982
983 self.parse_operand_inner()
984 }
985
986 fn try_parse_size_hint(&mut self, ident: &str) -> Option<OperandSize> {
988 if ident.eq_ignore_ascii_case("byte") {
989 self.advance();
990 Some(OperandSize::Byte)
991 } else if ident.eq_ignore_ascii_case("word") {
992 self.advance();
993 Some(OperandSize::Word)
994 } else if ident.eq_ignore_ascii_case("dword") {
995 self.advance();
996 Some(OperandSize::Dword)
997 } else if ident.eq_ignore_ascii_case("qword") {
998 self.advance();
999 Some(OperandSize::Qword)
1000 } else if ident.eq_ignore_ascii_case("xmmword") || ident.eq_ignore_ascii_case("oword") {
1001 self.advance();
1002 Some(OperandSize::Xmmword)
1003 } else if ident.eq_ignore_ascii_case("ymmword") {
1004 self.advance();
1005 Some(OperandSize::Ymmword)
1006 } else if ident.eq_ignore_ascii_case("zmmword") {
1007 self.advance();
1008 Some(OperandSize::Zmmword)
1009 } else {
1010 None
1011 }
1012 }
1013
1014 fn parse_operand_inner(&mut self) -> Result<(Operand, Option<OperandSize>), AsmError> {
1015 if self.syntax == Syntax::Att {
1017 return self.parse_att_operand();
1018 }
1019
1020 let tok = self.peek().clone();
1021
1022 match &tok.kind {
1023 TokenKind::OpenBrace => {
1027 self.advance(); let first = self.peek().clone();
1030 if let TokenKind::Ident = &first.kind {
1031 let mut lbuf = [0u8; 32];
1032 let lower = to_lower_buf(&first.text, &mut lbuf);
1033 if let Some(dot_pos) = lower.find('.') {
1034 let reg_part = &lower[..dot_pos];
1035 let arr_part = &lower[dot_pos + 1..];
1036 if let Some(reg) = parse_register_lower(reg_part, self.arch) {
1037 if reg.is_a64_sve_z() || reg.is_a64_vector() {
1038 if let Some(arr) = VectorArrangement::parse(arr_part) {
1039 self.advance(); if self.peek().kind != TokenKind::CloseBrace {
1041 return Err(AsmError::Syntax {
1042 msg: String::from("expected '}' after vector register"),
1043 span: self.peek().span,
1044 });
1045 }
1046 self.advance(); return Ok((Operand::VectorRegister(reg, arr), None));
1048 }
1049 }
1050 }
1051 }
1052 }
1053 let mut regs = Vec::new();
1055 loop {
1056 let rtok = self.peek().clone();
1057 if rtok.kind == TokenKind::CloseBrace {
1058 self.advance();
1059 break;
1060 }
1061 if rtok.kind == TokenKind::Comma {
1062 self.advance();
1063 continue;
1064 }
1065 if let TokenKind::Ident = &rtok.kind {
1066 let mut lbuf = [0u8; 32];
1067 let lower = to_lower_buf(&rtok.text, &mut lbuf);
1068 if let Some(reg) = parse_register_lower(lower, self.arch) {
1069 self.advance();
1070 regs.push(reg);
1071 continue;
1072 }
1073 }
1074 return Err(AsmError::Syntax {
1075 msg: alloc::format!(
1076 "expected register in register list, found '{}'",
1077 rtok.text
1078 ),
1079 span: rtok.span,
1080 });
1081 }
1082 Ok((Operand::RegisterList(regs), None))
1083 }
1084
1085 TokenKind::OpenBracket => {
1087 let mem = self.parse_memory_operand()?;
1088 Ok((Operand::Memory(Box::new(mem)), None))
1089 }
1090
1091 TokenKind::Equals => {
1093 self.advance(); let next = self.peek().clone();
1095 match next.kind {
1096 TokenKind::Number(n) => {
1097 self.advance();
1098 Ok((Operand::LiteralPoolValue(n), None))
1099 }
1100 TokenKind::Minus => {
1101 self.advance();
1102 if let TokenKind::Number(n) = self.peek().kind {
1103 self.advance();
1104 Ok((Operand::LiteralPoolValue(-n), None))
1105 } else {
1106 Err(AsmError::Syntax {
1107 msg: String::from("expected number after '=-'"),
1108 span: next.span,
1109 })
1110 }
1111 }
1112 _ => Err(AsmError::Syntax {
1113 msg: alloc::format!("expected number after '=', found '{}'", next.text),
1114 span: next.span,
1115 }),
1116 }
1117 }
1118
1119 TokenKind::OpenParen if matches!(self.arch, Arch::Rv32 | Arch::Rv64) => {
1121 self.parse_riscv_mem_operand(0)
1122 }
1123
1124 TokenKind::Number(n) => {
1126 let val = *n;
1127 self.advance();
1128 if matches!(self.arch, Arch::Rv32 | Arch::Rv64)
1130 && self.peek().kind == TokenKind::OpenParen
1131 {
1132 return self.parse_riscv_mem_operand(val);
1133 }
1134 Ok((Operand::Immediate(val), None))
1135 }
1136
1137 TokenKind::Minus => {
1139 self.advance();
1140 let next = self.peek().clone();
1141 if let TokenKind::Number(n) = next.kind {
1142 self.advance();
1143 let val = -n;
1144 if matches!(self.arch, Arch::Rv32 | Arch::Rv64)
1146 && self.peek().kind == TokenKind::OpenParen
1147 {
1148 return self.parse_riscv_mem_operand(val);
1149 }
1150 Ok((Operand::Immediate(val), None))
1151 } else {
1152 Err(AsmError::Syntax {
1153 msg: String::from("expected number after '-'"),
1154 span: tok.span,
1155 })
1156 }
1157 }
1158
1159 TokenKind::CharLit(ch) => {
1161 self.advance();
1162 Ok((Operand::Immediate(*ch as i128), None))
1163 }
1164
1165 TokenKind::Ident => {
1167 let mut lbuf = [0u8; 32];
1168 let lower = to_lower_buf(&tok.text, &mut lbuf);
1169
1170 if is_segment_name(lower) {
1172 let seg = match parse_segment(lower) {
1173 Some(s) => s,
1174 None => {
1175 return Err(AsmError::Syntax {
1176 msg: alloc::format!("unknown segment register: {}", lower),
1177 span: tok.span,
1178 });
1179 }
1180 };
1181 if self.pos + 1 < self.tokens.len()
1183 && self.tokens[self.pos + 1].kind == TokenKind::Colon
1184 {
1185 self.advance(); self.advance(); if self.peek().kind == TokenKind::OpenBracket {
1189 let mut mem = self.parse_memory_operand()?;
1190 mem.segment = Some(seg);
1191 return Ok((Operand::Memory(Box::new(mem)), None));
1192 }
1193 }
1194 }
1195
1196 if let Some(dot_pos) = lower.find('.') {
1198 let reg_part = &lower[..dot_pos];
1200 let arr_part = &lower[dot_pos + 1..];
1201 if let Some(reg) = parse_register_lower(reg_part, self.arch) {
1202 if reg.is_a64_vector() || reg.is_a64_sve_z() || reg.is_a64_sve_p() {
1203 if let Some(arr) = VectorArrangement::parse(arr_part) {
1204 self.advance();
1205 return Ok((Operand::VectorRegister(reg, arr), None));
1206 }
1207 }
1208 if reg.is_riscv_vec() && arr_part == "t" {
1210 self.advance();
1211 return Ok((Operand::Register(reg), None));
1212 }
1213 }
1214 }
1215
1216 if let Some(reg) = parse_register_lower(lower, self.arch) {
1217 self.advance();
1218 if reg.is_a64_sve_p() && self.peek().kind == TokenKind::Slash {
1220 let next_pos = self.pos + 1;
1221 if next_pos < self.tokens.len() {
1222 let qual_text = &self.tokens[next_pos].text;
1223 let qual = if qual_text.eq_ignore_ascii_case("m") {
1224 Some(SvePredQual::Merging)
1225 } else if qual_text.eq_ignore_ascii_case("z") {
1226 Some(SvePredQual::Zeroing)
1227 } else {
1228 None
1229 };
1230 if let Some(q) = qual {
1231 self.advance(); self.advance(); return Ok((Operand::SvePredicate(reg, q), None));
1234 }
1235 }
1236 }
1237 return Ok((Operand::Register(reg), None));
1238 }
1239
1240 if let Some(&val) = self.constants.get(&*tok.text) {
1242 self.advance();
1243 let mut result = val;
1245 loop {
1246 if self.peek().kind == TokenKind::Plus {
1247 self.advance();
1248 let next = self.peek().clone();
1249 match &next.kind {
1250 TokenKind::Number(n) => {
1251 self.advance();
1252 result += n;
1253 }
1254 TokenKind::Ident => {
1255 if let Some(&v) = self.constants.get(&*next.text) {
1256 self.advance();
1257 result += v;
1258 } else {
1259 break;
1260 }
1261 }
1262 _ => break,
1263 }
1264 } else if self.peek().kind == TokenKind::Minus {
1265 self.advance();
1266 let next = self.peek().clone();
1267 match &next.kind {
1268 TokenKind::Number(n) => {
1269 self.advance();
1270 result -= n;
1271 }
1272 TokenKind::Ident => {
1273 if let Some(&v) = self.constants.get(&*next.text) {
1274 self.advance();
1275 result -= v;
1276 } else {
1277 break;
1278 }
1279 }
1280 _ => break,
1281 }
1282 } else {
1283 break;
1284 }
1285 }
1286 return Ok((Operand::Immediate(result), None));
1287 }
1288
1289 self.advance();
1291 let mut expr: Expr = Expr::Label(tok.text.to_string());
1294 let mut is_expression = false;
1295 loop {
1296 if self.peek().kind == TokenKind::Plus {
1297 self.advance();
1298 let rhs = self.parse_expr_atom(&tok)?;
1299 expr = Expr::Add(Box::new(expr), Box::new(rhs));
1300 is_expression = true;
1301 } else if self.peek().kind == TokenKind::Minus {
1302 self.advance();
1303 let rhs = self.parse_expr_atom(&tok)?;
1304 expr = Expr::Sub(Box::new(expr), Box::new(rhs));
1305 is_expression = true;
1306 } else {
1307 break;
1308 }
1309 }
1310
1311 if is_expression {
1312 expr.resolve_constants(|name| self.constants.get(name).copied());
1314 if let Some(val) = expr.eval() {
1315 return Ok((Operand::Immediate(val), None));
1316 }
1317 return Ok((Operand::Expression(expr), None));
1318 }
1319
1320 Ok((Operand::Label(tok.text.to_string()), None))
1321 }
1322
1323 TokenKind::NumericLabelFwd(n) => {
1325 self.advance();
1326 Ok((Operand::Label(alloc::format!("{}f", n)), None))
1327 }
1328 TokenKind::NumericLabelBwd(n) => {
1329 self.advance();
1330 Ok((Operand::Label(alloc::format!("{}b", n)), None))
1331 }
1332
1333 _ => Err(AsmError::Syntax {
1334 msg: alloc::format!("expected operand, found '{}'", tok.text),
1335 span: tok.span,
1336 }),
1337 }
1338 }
1339
1340 fn parse_att_operand(&mut self) -> Result<(Operand, Option<OperandSize>), AsmError> {
1352 let tok = self.peek().clone();
1353
1354 match &tok.kind {
1355 TokenKind::Dollar => {
1357 self.advance(); let next = self.peek().clone();
1359 match &next.kind {
1360 TokenKind::Number(n) => {
1361 let val = *n;
1362 self.advance();
1363 Ok((Operand::Immediate(val), None))
1364 }
1365 TokenKind::Minus => {
1366 self.advance(); let num_tok = self.peek().clone();
1368 if let TokenKind::Number(n) = num_tok.kind {
1369 self.advance();
1370 Ok((Operand::Immediate(-n), None))
1371 } else {
1372 Err(AsmError::Syntax {
1373 msg: String::from("expected number after '$-'"),
1374 span: num_tok.span,
1375 })
1376 }
1377 }
1378 TokenKind::Ident => {
1379 let name = next.text.to_string();
1380 self.advance();
1381 if let Some(&val) = self.constants.get(&name) {
1383 Ok((Operand::Immediate(val), None))
1384 } else {
1385 Ok((Operand::Label(name), None))
1387 }
1388 }
1389 _ => Err(AsmError::Syntax {
1390 msg: alloc::format!(
1391 "expected number or identifier after '$', found '{}'",
1392 next.text
1393 ),
1394 span: next.span,
1395 }),
1396 }
1397 }
1398
1399 TokenKind::Percent => {
1401 self.advance(); let reg_tok = self.peek().clone();
1403 if reg_tok.kind != TokenKind::Ident {
1404 return Err(AsmError::Syntax {
1405 msg: alloc::format!(
1406 "expected register name after '%', found '{}'",
1407 reg_tok.text
1408 ),
1409 span: reg_tok.span,
1410 });
1411 }
1412 let mut lbuf = [0u8; 32];
1413 let lower = to_lower_buf(®_tok.text, &mut lbuf);
1414
1415 if is_segment_name(lower) {
1417 let seg = parse_segment(lower).unwrap();
1420 if self.pos + 1 < self.tokens.len()
1422 && self.tokens[self.pos + 1].kind == TokenKind::Colon
1423 {
1424 self.advance(); self.advance(); let (seg_disp, seg_disp_label) = match self.peek().kind {
1428 TokenKind::Number(n) => {
1429 let val = n;
1430 self.advance();
1431 (val as i64, None)
1432 }
1433 TokenKind::Minus => {
1434 self.advance();
1435 if let TokenKind::Number(n) = self.peek().kind {
1436 let val = n;
1437 self.advance();
1438 (-(val as i64), None)
1439 } else {
1440 (0, None)
1441 }
1442 }
1443 _ => (0, None),
1444 };
1445 let mut mem = self.parse_att_memory_operand(seg_disp, seg_disp_label)?;
1446 mem.segment = Some(seg);
1447 return Ok((Operand::Memory(Box::new(mem)), None));
1448 }
1449 }
1450
1451 if let Some(reg) = parse_register_lower(lower, self.arch) {
1453 self.advance();
1454 Ok((Operand::Register(reg), None))
1455 } else {
1456 Err(AsmError::Syntax {
1457 msg: alloc::format!("unknown register: %{}", lower),
1458 span: reg_tok.span,
1459 })
1460 }
1461 }
1462
1463 TokenKind::OpenParen => {
1465 let mem = self.parse_att_memory_operand(0, None)?;
1466 Ok((Operand::Memory(Box::new(mem)), None))
1467 }
1468
1469 TokenKind::Number(n) => {
1471 let val = *n;
1472 self.advance();
1473 if self.peek().kind == TokenKind::OpenParen {
1475 let mem = self.parse_att_memory_operand(val as i64, None)?;
1476 Ok((Operand::Memory(Box::new(mem)), None))
1477 } else {
1478 Ok((Operand::Immediate(val), None))
1480 }
1481 }
1482
1483 TokenKind::Minus => {
1485 self.advance(); let next = self.peek().clone();
1487 if let TokenKind::Number(n) = next.kind {
1488 self.advance();
1489 let val = -n;
1490 if self.peek().kind == TokenKind::OpenParen {
1491 let mem = self.parse_att_memory_operand(val as i64, None)?;
1492 Ok((Operand::Memory(Box::new(mem)), None))
1493 } else {
1494 Ok((Operand::Immediate(val), None))
1495 }
1496 } else {
1497 Err(AsmError::Syntax {
1498 msg: String::from("expected number after '-' in AT&T operand"),
1499 span: tok.span,
1500 })
1501 }
1502 }
1503
1504 TokenKind::Ident => {
1506 let name = tok.text.to_string();
1507 self.advance();
1508 if let Some(&val) = self.constants.get(&name) {
1510 if self.peek().kind == TokenKind::OpenParen {
1512 let mem = self.parse_att_memory_operand(val as i64, None)?;
1513 return Ok((Operand::Memory(Box::new(mem)), None));
1514 }
1515 return Ok((Operand::Immediate(val), None));
1516 }
1517 let mut expr = Expr::Label(name.clone());
1519 let mut has_offset = false;
1520 loop {
1521 if self.peek().kind == TokenKind::Plus {
1522 self.advance();
1523 let atom = self.parse_expr_atom(&tok)?;
1524 expr = Expr::Add(Box::new(expr), Box::new(atom));
1525 has_offset = true;
1526 } else if self.peek().kind == TokenKind::Minus {
1527 self.advance();
1528 let atom = self.parse_expr_atom(&tok)?;
1529 expr = Expr::Sub(Box::new(expr), Box::new(atom));
1530 has_offset = true;
1531 } else {
1532 break;
1533 }
1534 }
1535 if has_offset {
1536 Ok((Operand::Expression(expr), None))
1537 } else {
1538 Ok((Operand::Label(name), None))
1539 }
1540 }
1541
1542 TokenKind::NumericLabelFwd(n) => {
1544 let n = *n;
1545 self.advance();
1546 Ok((Operand::Label(alloc::format!("{}f", n)), None))
1547 }
1548 TokenKind::NumericLabelBwd(n) => {
1549 let n = *n;
1550 self.advance();
1551 Ok((Operand::Label(alloc::format!("{}b", n)), None))
1552 }
1553
1554 TokenKind::Star => {
1556 self.advance(); self.parse_att_operand()
1559 }
1560
1561 _ => Err(AsmError::Syntax {
1562 msg: alloc::format!("unexpected token in AT&T operand: '{}'", tok.text),
1563 span: tok.span,
1564 }),
1565 }
1566 }
1567
1568 fn parse_att_memory_operand(
1572 &mut self,
1573 disp: i64,
1574 disp_label: Option<String>,
1575 ) -> Result<MemoryOperand, AsmError> {
1576 let open = self.peek().clone();
1577 if open.kind != TokenKind::OpenParen {
1578 return Err(AsmError::Syntax {
1579 msg: alloc::format!("expected '(' in AT&T memory operand, found '{}'", open.text),
1580 span: open.span,
1581 });
1582 }
1583 self.advance(); let mut base = None;
1586 let mut index = None;
1587 let mut scale: u8 = 1;
1588
1589 if self.peek().kind == TokenKind::Percent {
1591 self.advance(); let reg_tok = self.peek().clone();
1593 let mut lbuf = [0u8; 32];
1594 let lower = to_lower_buf(®_tok.text, &mut lbuf);
1595 base =
1596 Some(
1597 parse_register_lower(lower, self.arch).ok_or_else(|| AsmError::Syntax {
1598 msg: alloc::format!("unknown register: %{}", lower),
1599 span: reg_tok.span,
1600 })?,
1601 );
1602 self.advance();
1603 }
1604
1605 if self.peek().kind == TokenKind::Comma {
1607 self.advance(); if self.peek().kind == TokenKind::Percent {
1609 self.advance(); let reg_tok = self.peek().clone();
1611 let mut lbuf = [0u8; 32];
1612 let lower = to_lower_buf(®_tok.text, &mut lbuf);
1613 index = Some(parse_register_lower(lower, self.arch).ok_or_else(|| {
1614 AsmError::Syntax {
1615 msg: alloc::format!("unknown register: %{}", lower),
1616 span: reg_tok.span,
1617 }
1618 })?);
1619 self.advance();
1620 }
1621
1622 if self.peek().kind == TokenKind::Comma {
1624 self.advance(); let scale_tok = self.peek().clone();
1626 if let TokenKind::Number(n) = scale_tok.kind {
1627 scale = n as u8;
1628 self.advance();
1629 } else {
1630 return Err(AsmError::Syntax {
1631 msg: alloc::format!(
1632 "expected scale factor (1,2,4,8), found '{}'",
1633 scale_tok.text
1634 ),
1635 span: scale_tok.span,
1636 });
1637 }
1638 }
1639 }
1640
1641 let close = self.peek().clone();
1643 if close.kind != TokenKind::CloseParen {
1644 return Err(AsmError::Syntax {
1645 msg: alloc::format!(
1646 "expected ')' in AT&T memory operand, found '{}'",
1647 close.text
1648 ),
1649 span: close.span,
1650 });
1651 }
1652 self.advance();
1653
1654 Ok(MemoryOperand {
1655 base,
1656 index,
1657 scale,
1658 disp,
1659 disp_label,
1660 segment: None,
1661 size: None,
1662 addr_mode: AddrMode::Offset,
1663 index_subtract: false,
1664 })
1665 }
1666
1667 fn parse_riscv_mem_operand(
1670 &mut self,
1671 offset: i128,
1672 ) -> Result<(Operand, Option<OperandSize>), AsmError> {
1673 let open_tok = self.advance().clone(); debug_assert_eq!(open_tok.kind, TokenKind::OpenParen);
1675
1676 let reg_tok = self.peek().clone();
1677 let mut lbuf = [0u8; 32];
1678 let lower = to_lower_buf(®_tok.text, &mut lbuf);
1679 let reg = if let Some(r) = parse_register_lower(lower, self.arch) {
1680 self.advance();
1681 r
1682 } else {
1683 return Err(AsmError::Syntax {
1684 msg: alloc::format!(
1685 "expected register in memory operand, found '{}'",
1686 reg_tok.text
1687 ),
1688 span: reg_tok.span,
1689 });
1690 };
1691
1692 let close = self.peek().clone();
1694 if close.kind != TokenKind::CloseParen {
1695 return Err(AsmError::Syntax {
1696 msg: alloc::format!("expected ')' after register, found '{}'", close.text),
1697 span: close.span,
1698 });
1699 }
1700 self.advance();
1701
1702 let mem = MemoryOperand {
1703 base: Some(reg),
1704 disp: offset as i64,
1705 ..Default::default()
1706 };
1707 Ok((Operand::Memory(Box::new(mem)), None))
1708 }
1709
1710 fn parse_memory_operand(&mut self) -> Result<MemoryOperand, AsmError> {
1712 let open = self.advance().clone(); debug_assert_eq!(open.kind, TokenKind::OpenBracket);
1714
1715 let mut mem = MemoryOperand::default();
1716 let mut _expect_term = true;
1717 let mut sign: i64 = 1;
1718
1719 while self.peek().kind != TokenKind::CloseBracket {
1720 if self.at_end() {
1721 return Err(AsmError::Syntax {
1722 msg: String::from("unterminated memory operand, expected ']'"),
1723 span: open.span,
1724 });
1725 }
1726
1727 let tok = self.peek().clone();
1728
1729 match &tok.kind {
1730 TokenKind::Plus | TokenKind::Comma => {
1731 self.advance();
1733 sign = 1;
1734 _expect_term = true;
1735 continue;
1736 }
1737 TokenKind::Minus => {
1738 self.advance();
1739 sign = -1;
1740 _expect_term = true;
1741 continue;
1742 }
1743 TokenKind::Ident => {
1744 let mut lbuf = [0u8; 32];
1745 let lower = to_lower_buf(&tok.text, &mut lbuf);
1746 if let Some(reg) = parse_register_lower(lower, self.arch) {
1747 self.advance();
1748 if self.peek().kind == TokenKind::Star {
1750 self.advance(); let scale_tok = self.peek().clone();
1752 if let TokenKind::Number(s) = scale_tok.kind {
1753 if !matches!(s, 1 | 2 | 4 | 8) {
1754 return Err(AsmError::Syntax {
1755 msg: String::from("scale factor must be 1, 2, 4, or 8"),
1756 span: scale_tok.span,
1757 });
1758 }
1759 self.advance();
1760 mem.index = Some(reg);
1761 mem.scale = s as u8;
1762 mem.index_subtract = sign < 0;
1763 } else {
1764 return Err(AsmError::Syntax {
1765 msg: String::from("expected scale factor (1, 2, 4, or 8)"),
1766 span: scale_tok.span,
1767 });
1768 }
1769 } else if mem.base.is_none() {
1770 mem.base = Some(reg);
1771 } else if mem.index.is_none() {
1772 mem.index = Some(reg);
1773 mem.scale = 1;
1774 mem.index_subtract = sign < 0;
1775 } else {
1776 return Err(AsmError::Syntax {
1777 msg: String::from("too many registers in memory operand"),
1778 span: tok.span,
1779 });
1780 }
1781 } else {
1782 self.advance();
1784 mem.disp_label = Some(tok.text.to_string());
1785 }
1786 _expect_term = false;
1787 }
1788 TokenKind::Number(n) => {
1789 self.advance();
1790 if self.peek().kind == TokenKind::Star {
1792 self.advance(); let reg_tok = self.peek().clone();
1794 if reg_tok.kind == TokenKind::Ident {
1795 let mut lbuf = [0u8; 32];
1796 let lower = to_lower_buf(®_tok.text, &mut lbuf);
1797 if let Some(reg) = parse_register_lower(lower, self.arch) {
1798 if !matches!(*n, 1 | 2 | 4 | 8) {
1799 return Err(AsmError::Syntax {
1800 msg: String::from("scale factor must be 1, 2, 4, or 8"),
1801 span: tok.span,
1802 });
1803 }
1804 self.advance();
1805 mem.index = Some(reg);
1806 mem.scale = *n as u8;
1807 mem.index_subtract = sign < 0;
1808 _expect_term = false;
1809 continue;
1810 }
1811 }
1812 return Err(AsmError::Syntax {
1813 msg: String::from("expected register after scale factor"),
1814 span: reg_tok.span,
1815 });
1816 }
1817 mem.disp = mem.disp.wrapping_add(sign * (*n as i64));
1818 _expect_term = false;
1819 }
1820 _ => {
1821 return Err(AsmError::Syntax {
1822 msg: alloc::format!("unexpected token '{}' in memory operand", tok.text),
1823 span: tok.span,
1824 });
1825 }
1826 }
1827 }
1828
1829 self.advance(); if self.peek().kind == TokenKind::Bang {
1833 self.advance(); mem.addr_mode = AddrMode::PreIndex;
1835 }
1836
1837 if let Some(idx) = mem.index {
1841 if idx.base_code() == 4 && !idx.is_extended() {
1842 return Err(AsmError::Syntax {
1843 msg: String::from("RSP/ESP/SP cannot be used as a SIB index register"),
1844 span: open.span,
1845 });
1846 }
1847 }
1848
1849 Ok(mem)
1850 }
1851}
1852
1853pub fn parse_register(name: &str, arch: Arch) -> Option<Register> {
1858 let mut buf = [0u8; 16];
1860 let name = to_lower_buf(name, &mut buf);
1861 parse_register_lower(name, arch)
1862}
1863
1864fn parse_register_lower(name: &str, arch: Arch) -> Option<Register> {
1866 use Register::*;
1867
1868 match arch {
1870 Arch::Arm | Arch::Thumb => return parse_register_arm(name),
1871 Arch::Aarch64 => return parse_register_aarch64(name),
1872 Arch::Rv32 | Arch::Rv64 => return parse_register_riscv(name),
1873 _ => {}
1874 }
1875
1876 match name {
1878 "rax" => Some(Rax),
1880 "rcx" => Some(Rcx),
1881 "rdx" => Some(Rdx),
1882 "rbx" => Some(Rbx),
1883 "rsp" => Some(Rsp),
1884 "rbp" => Some(Rbp),
1885 "rsi" => Some(Rsi),
1886 "rdi" => Some(Rdi),
1887 "r8" => Some(R8),
1888 "r9" => Some(R9),
1889 "r10" => Some(R10),
1890 "r11" => Some(R11),
1891 "r12" => Some(R12),
1892 "r13" => Some(R13),
1893 "r14" => Some(R14),
1894 "r15" => Some(R15),
1895 "eax" => Some(Eax),
1897 "ecx" => Some(Ecx),
1898 "edx" => Some(Edx),
1899 "ebx" => Some(Ebx),
1900 "esp" => Some(Esp),
1901 "ebp" => Some(Ebp),
1902 "esi" => Some(Esi),
1903 "edi" => Some(Edi),
1904 "r8d" => Some(R8d),
1905 "r9d" => Some(R9d),
1906 "r10d" => Some(R10d),
1907 "r11d" => Some(R11d),
1908 "r12d" => Some(R12d),
1909 "r13d" => Some(R13d),
1910 "r14d" => Some(R14d),
1911 "r15d" => Some(R15d),
1912 "ax" => Some(Ax),
1914 "cx" => Some(Cx),
1915 "dx" => Some(Dx),
1916 "bx" => Some(Bx),
1917 "sp" => Some(Sp),
1918 "bp" => Some(Bp),
1919 "si" => Some(Si),
1920 "di" => Some(Di),
1921 "r8w" => Some(R8w),
1922 "r9w" => Some(R9w),
1923 "r10w" => Some(R10w),
1924 "r11w" => Some(R11w),
1925 "r12w" => Some(R12w),
1926 "r13w" => Some(R13w),
1927 "r14w" => Some(R14w),
1928 "r15w" => Some(R15w),
1929 "al" => Some(Al),
1931 "cl" => Some(Cl),
1932 "dl" => Some(Dl),
1933 "bl" => Some(Bl),
1934 "spl" => Some(Spl),
1935 "bpl" => Some(Bpl),
1936 "sil" => Some(Sil),
1937 "dil" => Some(Dil),
1938 "ah" => Some(Ah),
1939 "ch" => Some(Ch),
1940 "dh" => Some(Dh),
1941 "bh" => Some(Bh),
1942 "r8b" => Some(R8b),
1943 "r9b" => Some(R9b),
1944 "r10b" => Some(R10b),
1945 "r11b" => Some(R11b),
1946 "r12b" => Some(R12b),
1947 "r13b" => Some(R13b),
1948 "r14b" => Some(R14b),
1949 "r15b" => Some(R15b),
1950 "rip" => Some(Rip),
1952 "eip" => Some(Eip),
1953 "cs" => Some(Cs),
1955 "ds" => Some(Ds),
1956 "es" => Some(Es),
1957 "fs" => Some(Fs),
1958 "gs" => Some(Gs),
1959 "ss" => Some(Ss),
1960 "xmm0" => Some(Xmm0),
1962 "xmm1" => Some(Xmm1),
1963 "xmm2" => Some(Xmm2),
1964 "xmm3" => Some(Xmm3),
1965 "xmm4" => Some(Xmm4),
1966 "xmm5" => Some(Xmm5),
1967 "xmm6" => Some(Xmm6),
1968 "xmm7" => Some(Xmm7),
1969 "xmm8" => Some(Xmm8),
1970 "xmm9" => Some(Xmm9),
1971 "xmm10" => Some(Xmm10),
1972 "xmm11" => Some(Xmm11),
1973 "xmm12" => Some(Xmm12),
1974 "xmm13" => Some(Xmm13),
1975 "xmm14" => Some(Xmm14),
1976 "xmm15" => Some(Xmm15),
1977 "ymm0" => Some(Ymm0),
1979 "ymm1" => Some(Ymm1),
1980 "ymm2" => Some(Ymm2),
1981 "ymm3" => Some(Ymm3),
1982 "ymm4" => Some(Ymm4),
1983 "ymm5" => Some(Ymm5),
1984 "ymm6" => Some(Ymm6),
1985 "ymm7" => Some(Ymm7),
1986 "ymm8" => Some(Ymm8),
1987 "ymm9" => Some(Ymm9),
1988 "ymm10" => Some(Ymm10),
1989 "ymm11" => Some(Ymm11),
1990 "ymm12" => Some(Ymm12),
1991 "ymm13" => Some(Ymm13),
1992 "ymm14" => Some(Ymm14),
1993 "ymm15" => Some(Ymm15),
1994 "zmm0" => Some(Zmm0),
1996 "zmm1" => Some(Zmm1),
1997 "zmm2" => Some(Zmm2),
1998 "zmm3" => Some(Zmm3),
1999 "zmm4" => Some(Zmm4),
2000 "zmm5" => Some(Zmm5),
2001 "zmm6" => Some(Zmm6),
2002 "zmm7" => Some(Zmm7),
2003 "zmm8" => Some(Zmm8),
2004 "zmm9" => Some(Zmm9),
2005 "zmm10" => Some(Zmm10),
2006 "zmm11" => Some(Zmm11),
2007 "zmm12" => Some(Zmm12),
2008 "zmm13" => Some(Zmm13),
2009 "zmm14" => Some(Zmm14),
2010 "zmm15" => Some(Zmm15),
2011 "zmm16" => Some(Zmm16),
2012 "zmm17" => Some(Zmm17),
2013 "zmm18" => Some(Zmm18),
2014 "zmm19" => Some(Zmm19),
2015 "zmm20" => Some(Zmm20),
2016 "zmm21" => Some(Zmm21),
2017 "zmm22" => Some(Zmm22),
2018 "zmm23" => Some(Zmm23),
2019 "zmm24" => Some(Zmm24),
2020 "zmm25" => Some(Zmm25),
2021 "zmm26" => Some(Zmm26),
2022 "zmm27" => Some(Zmm27),
2023 "zmm28" => Some(Zmm28),
2024 "zmm29" => Some(Zmm29),
2025 "zmm30" => Some(Zmm30),
2026 "zmm31" => Some(Zmm31),
2027 "k0" => Some(K0),
2029 "k1" => Some(K1),
2030 "k2" => Some(K2),
2031 "k3" => Some(K3),
2032 "k4" => Some(K4),
2033 "k5" => Some(K5),
2034 "k6" => Some(K6),
2035 "k7" => Some(K7),
2036 _ => None,
2037 }
2038}
2039
2040fn is_segment_name(name: &str) -> bool {
2041 name.eq_ignore_ascii_case("cs")
2042 || name.eq_ignore_ascii_case("ds")
2043 || name.eq_ignore_ascii_case("es")
2044 || name.eq_ignore_ascii_case("fs")
2045 || name.eq_ignore_ascii_case("gs")
2046 || name.eq_ignore_ascii_case("ss")
2047}
2048
2049fn parse_segment(name: &str) -> Option<Register> {
2050 if name.eq_ignore_ascii_case("cs") {
2051 Some(Register::Cs)
2052 } else if name.eq_ignore_ascii_case("ds") {
2053 Some(Register::Ds)
2054 } else if name.eq_ignore_ascii_case("es") {
2055 Some(Register::Es)
2056 } else if name.eq_ignore_ascii_case("fs") {
2057 Some(Register::Fs)
2058 } else if name.eq_ignore_ascii_case("gs") {
2059 Some(Register::Gs)
2060 } else if name.eq_ignore_ascii_case("ss") {
2061 Some(Register::Ss)
2062 } else {
2063 None
2064 }
2065}
2066
2067fn parse_register_arm(name: &str) -> Option<Register> {
2069 use Register::*;
2070 match name {
2071 "r0" => Some(ArmR0),
2072 "r1" => Some(ArmR1),
2073 "r2" => Some(ArmR2),
2074 "r3" => Some(ArmR3),
2075 "r4" => Some(ArmR4),
2076 "r5" => Some(ArmR5),
2077 "r6" => Some(ArmR6),
2078 "r7" => Some(ArmR7),
2079 "r8" => Some(ArmR8),
2080 "r9" => Some(ArmR9),
2081 "r10" => Some(ArmR10),
2082 "r11" | "fp" => Some(ArmR11),
2083 "r12" | "ip" => Some(ArmR12),
2084 "r13" | "sp" => Some(ArmSp),
2085 "r14" | "lr" => Some(ArmLr),
2086 "r15" | "pc" => Some(ArmPc),
2087 "cpsr" => Some(ArmCpsr),
2088 _ => None,
2089 }
2090}
2091
2092fn parse_register_aarch64(name: &str) -> Option<Register> {
2094 use Register::*;
2095 match name {
2096 "x0" => Some(A64X0),
2097 "x1" => Some(A64X1),
2098 "x2" => Some(A64X2),
2099 "x3" => Some(A64X3),
2100 "x4" => Some(A64X4),
2101 "x5" => Some(A64X5),
2102 "x6" => Some(A64X6),
2103 "x7" => Some(A64X7),
2104 "x8" => Some(A64X8),
2105 "x9" => Some(A64X9),
2106 "x10" => Some(A64X10),
2107 "x11" => Some(A64X11),
2108 "x12" => Some(A64X12),
2109 "x13" => Some(A64X13),
2110 "x14" => Some(A64X14),
2111 "x15" => Some(A64X15),
2112 "x16" => Some(A64X16),
2113 "x17" => Some(A64X17),
2114 "x18" => Some(A64X18),
2115 "x19" => Some(A64X19),
2116 "x20" => Some(A64X20),
2117 "x21" => Some(A64X21),
2118 "x22" => Some(A64X22),
2119 "x23" => Some(A64X23),
2120 "x24" => Some(A64X24),
2121 "x25" => Some(A64X25),
2122 "x26" => Some(A64X26),
2123 "x27" => Some(A64X27),
2124 "x28" => Some(A64X28),
2125 "x29" => Some(A64X29),
2126 "x30" => Some(A64X30),
2127 "fp" => Some(A64X29),
2128 "lr" => Some(A64X30),
2129 "sp" => Some(A64Sp),
2130 "xzr" => Some(A64Xzr),
2131 "w0" => Some(A64W0),
2132 "w1" => Some(A64W1),
2133 "w2" => Some(A64W2),
2134 "w3" => Some(A64W3),
2135 "w4" => Some(A64W4),
2136 "w5" => Some(A64W5),
2137 "w6" => Some(A64W6),
2138 "w7" => Some(A64W7),
2139 "w8" => Some(A64W8),
2140 "w9" => Some(A64W9),
2141 "w10" => Some(A64W10),
2142 "w11" => Some(A64W11),
2143 "w12" => Some(A64W12),
2144 "w13" => Some(A64W13),
2145 "w14" => Some(A64W14),
2146 "w15" => Some(A64W15),
2147 "w16" => Some(A64W16),
2148 "w17" => Some(A64W17),
2149 "w18" => Some(A64W18),
2150 "w19" => Some(A64W19),
2151 "w20" => Some(A64W20),
2152 "w21" => Some(A64W21),
2153 "w22" => Some(A64W22),
2154 "w23" => Some(A64W23),
2155 "w24" => Some(A64W24),
2156 "w25" => Some(A64W25),
2157 "w26" => Some(A64W26),
2158 "w27" => Some(A64W27),
2159 "w28" => Some(A64W28),
2160 "w29" => Some(A64W29),
2161 "w30" => Some(A64W30),
2162 "wzr" => Some(A64Wzr),
2163 "v0" => Some(A64V0),
2165 "v1" => Some(A64V1),
2166 "v2" => Some(A64V2),
2167 "v3" => Some(A64V3),
2168 "v4" => Some(A64V4),
2169 "v5" => Some(A64V5),
2170 "v6" => Some(A64V6),
2171 "v7" => Some(A64V7),
2172 "v8" => Some(A64V8),
2173 "v9" => Some(A64V9),
2174 "v10" => Some(A64V10),
2175 "v11" => Some(A64V11),
2176 "v12" => Some(A64V12),
2177 "v13" => Some(A64V13),
2178 "v14" => Some(A64V14),
2179 "v15" => Some(A64V15),
2180 "v16" => Some(A64V16),
2181 "v17" => Some(A64V17),
2182 "v18" => Some(A64V18),
2183 "v19" => Some(A64V19),
2184 "v20" => Some(A64V20),
2185 "v21" => Some(A64V21),
2186 "v22" => Some(A64V22),
2187 "v23" => Some(A64V23),
2188 "v24" => Some(A64V24),
2189 "v25" => Some(A64V25),
2190 "v26" => Some(A64V26),
2191 "v27" => Some(A64V27),
2192 "v28" => Some(A64V28),
2193 "v29" => Some(A64V29),
2194 "v30" => Some(A64V30),
2195 "v31" => Some(A64V31),
2196 "q0" => Some(A64Q0),
2198 "q1" => Some(A64Q1),
2199 "q2" => Some(A64Q2),
2200 "q3" => Some(A64Q3),
2201 "q4" => Some(A64Q4),
2202 "q5" => Some(A64Q5),
2203 "q6" => Some(A64Q6),
2204 "q7" => Some(A64Q7),
2205 "q8" => Some(A64Q8),
2206 "q9" => Some(A64Q9),
2207 "q10" => Some(A64Q10),
2208 "q11" => Some(A64Q11),
2209 "q12" => Some(A64Q12),
2210 "q13" => Some(A64Q13),
2211 "q14" => Some(A64Q14),
2212 "q15" => Some(A64Q15),
2213 "q16" => Some(A64Q16),
2214 "q17" => Some(A64Q17),
2215 "q18" => Some(A64Q18),
2216 "q19" => Some(A64Q19),
2217 "q20" => Some(A64Q20),
2218 "q21" => Some(A64Q21),
2219 "q22" => Some(A64Q22),
2220 "q23" => Some(A64Q23),
2221 "q24" => Some(A64Q24),
2222 "q25" => Some(A64Q25),
2223 "q26" => Some(A64Q26),
2224 "q27" => Some(A64Q27),
2225 "q28" => Some(A64Q28),
2226 "q29" => Some(A64Q29),
2227 "q30" => Some(A64Q30),
2228 "q31" => Some(A64Q31),
2229 "d0" => Some(A64D0),
2231 "d1" => Some(A64D1),
2232 "d2" => Some(A64D2),
2233 "d3" => Some(A64D3),
2234 "d4" => Some(A64D4),
2235 "d5" => Some(A64D5),
2236 "d6" => Some(A64D6),
2237 "d7" => Some(A64D7),
2238 "d8" => Some(A64D8),
2239 "d9" => Some(A64D9),
2240 "d10" => Some(A64D10),
2241 "d11" => Some(A64D11),
2242 "d12" => Some(A64D12),
2243 "d13" => Some(A64D13),
2244 "d14" => Some(A64D14),
2245 "d15" => Some(A64D15),
2246 "d16" => Some(A64D16),
2247 "d17" => Some(A64D17),
2248 "d18" => Some(A64D18),
2249 "d19" => Some(A64D19),
2250 "d20" => Some(A64D20),
2251 "d21" => Some(A64D21),
2252 "d22" => Some(A64D22),
2253 "d23" => Some(A64D23),
2254 "d24" => Some(A64D24),
2255 "d25" => Some(A64D25),
2256 "d26" => Some(A64D26),
2257 "d27" => Some(A64D27),
2258 "d28" => Some(A64D28),
2259 "d29" => Some(A64D29),
2260 "d30" => Some(A64D30),
2261 "d31" => Some(A64D31),
2262 "s0" => Some(A64S0),
2264 "s1" => Some(A64S1),
2265 "s2" => Some(A64S2),
2266 "s3" => Some(A64S3),
2267 "s4" => Some(A64S4),
2268 "s5" => Some(A64S5),
2269 "s6" => Some(A64S6),
2270 "s7" => Some(A64S7),
2271 "s8" => Some(A64S8),
2272 "s9" => Some(A64S9),
2273 "s10" => Some(A64S10),
2274 "s11" => Some(A64S11),
2275 "s12" => Some(A64S12),
2276 "s13" => Some(A64S13),
2277 "s14" => Some(A64S14),
2278 "s15" => Some(A64S15),
2279 "s16" => Some(A64S16),
2280 "s17" => Some(A64S17),
2281 "s18" => Some(A64S18),
2282 "s19" => Some(A64S19),
2283 "s20" => Some(A64S20),
2284 "s21" => Some(A64S21),
2285 "s22" => Some(A64S22),
2286 "s23" => Some(A64S23),
2287 "s24" => Some(A64S24),
2288 "s25" => Some(A64S25),
2289 "s26" => Some(A64S26),
2290 "s27" => Some(A64S27),
2291 "s28" => Some(A64S28),
2292 "s29" => Some(A64S29),
2293 "s30" => Some(A64S30),
2294 "s31" => Some(A64S31),
2295 "h0" => Some(A64H0),
2297 "h1" => Some(A64H1),
2298 "h2" => Some(A64H2),
2299 "h3" => Some(A64H3),
2300 "h4" => Some(A64H4),
2301 "h5" => Some(A64H5),
2302 "h6" => Some(A64H6),
2303 "h7" => Some(A64H7),
2304 "h8" => Some(A64H8),
2305 "h9" => Some(A64H9),
2306 "h10" => Some(A64H10),
2307 "h11" => Some(A64H11),
2308 "h12" => Some(A64H12),
2309 "h13" => Some(A64H13),
2310 "h14" => Some(A64H14),
2311 "h15" => Some(A64H15),
2312 "h16" => Some(A64H16),
2313 "h17" => Some(A64H17),
2314 "h18" => Some(A64H18),
2315 "h19" => Some(A64H19),
2316 "h20" => Some(A64H20),
2317 "h21" => Some(A64H21),
2318 "h22" => Some(A64H22),
2319 "h23" => Some(A64H23),
2320 "h24" => Some(A64H24),
2321 "h25" => Some(A64H25),
2322 "h26" => Some(A64H26),
2323 "h27" => Some(A64H27),
2324 "h28" => Some(A64H28),
2325 "h29" => Some(A64H29),
2326 "h30" => Some(A64H30),
2327 "h31" => Some(A64H31),
2328 "b0" => Some(A64B0),
2330 "b1" => Some(A64B1),
2331 "b2" => Some(A64B2),
2332 "b3" => Some(A64B3),
2333 "b4" => Some(A64B4),
2334 "b5" => Some(A64B5),
2335 "b6" => Some(A64B6),
2336 "b7" => Some(A64B7),
2337 "b8" => Some(A64B8),
2338 "b9" => Some(A64B9),
2339 "b10" => Some(A64B10),
2340 "b11" => Some(A64B11),
2341 "b12" => Some(A64B12),
2342 "b13" => Some(A64B13),
2343 "b14" => Some(A64B14),
2344 "b15" => Some(A64B15),
2345 "b16" => Some(A64B16),
2346 "b17" => Some(A64B17),
2347 "b18" => Some(A64B18),
2348 "b19" => Some(A64B19),
2349 "b20" => Some(A64B20),
2350 "b21" => Some(A64B21),
2351 "b22" => Some(A64B22),
2352 "b23" => Some(A64B23),
2353 "b24" => Some(A64B24),
2354 "b25" => Some(A64B25),
2355 "b26" => Some(A64B26),
2356 "b27" => Some(A64B27),
2357 "b28" => Some(A64B28),
2358 "b29" => Some(A64B29),
2359 "b30" => Some(A64B30),
2360 "b31" => Some(A64B31),
2361 "z0" => Some(A64Z0),
2363 "z1" => Some(A64Z1),
2364 "z2" => Some(A64Z2),
2365 "z3" => Some(A64Z3),
2366 "z4" => Some(A64Z4),
2367 "z5" => Some(A64Z5),
2368 "z6" => Some(A64Z6),
2369 "z7" => Some(A64Z7),
2370 "z8" => Some(A64Z8),
2371 "z9" => Some(A64Z9),
2372 "z10" => Some(A64Z10),
2373 "z11" => Some(A64Z11),
2374 "z12" => Some(A64Z12),
2375 "z13" => Some(A64Z13),
2376 "z14" => Some(A64Z14),
2377 "z15" => Some(A64Z15),
2378 "z16" => Some(A64Z16),
2379 "z17" => Some(A64Z17),
2380 "z18" => Some(A64Z18),
2381 "z19" => Some(A64Z19),
2382 "z20" => Some(A64Z20),
2383 "z21" => Some(A64Z21),
2384 "z22" => Some(A64Z22),
2385 "z23" => Some(A64Z23),
2386 "z24" => Some(A64Z24),
2387 "z25" => Some(A64Z25),
2388 "z26" => Some(A64Z26),
2389 "z27" => Some(A64Z27),
2390 "z28" => Some(A64Z28),
2391 "z29" => Some(A64Z29),
2392 "z30" => Some(A64Z30),
2393 "z31" => Some(A64Z31),
2394 "p0" => Some(A64P0),
2396 "p1" => Some(A64P1),
2397 "p2" => Some(A64P2),
2398 "p3" => Some(A64P3),
2399 "p4" => Some(A64P4),
2400 "p5" => Some(A64P5),
2401 "p6" => Some(A64P6),
2402 "p7" => Some(A64P7),
2403 "p8" => Some(A64P8),
2404 "p9" => Some(A64P9),
2405 "p10" => Some(A64P10),
2406 "p11" => Some(A64P11),
2407 "p12" => Some(A64P12),
2408 "p13" => Some(A64P13),
2409 "p14" => Some(A64P14),
2410 "p15" => Some(A64P15),
2411 _ => None,
2412 }
2413}
2414
2415fn parse_register_riscv(name: &str) -> Option<Register> {
2418 use Register::*;
2419 match name {
2420 "x0" => Some(RvX0),
2422 "x1" => Some(RvX1),
2423 "x2" => Some(RvX2),
2424 "x3" => Some(RvX3),
2425 "x4" => Some(RvX4),
2426 "x5" => Some(RvX5),
2427 "x6" => Some(RvX6),
2428 "x7" => Some(RvX7),
2429 "x8" => Some(RvX8),
2430 "x9" => Some(RvX9),
2431 "x10" => Some(RvX10),
2432 "x11" => Some(RvX11),
2433 "x12" => Some(RvX12),
2434 "x13" => Some(RvX13),
2435 "x14" => Some(RvX14),
2436 "x15" => Some(RvX15),
2437 "x16" => Some(RvX16),
2438 "x17" => Some(RvX17),
2439 "x18" => Some(RvX18),
2440 "x19" => Some(RvX19),
2441 "x20" => Some(RvX20),
2442 "x21" => Some(RvX21),
2443 "x22" => Some(RvX22),
2444 "x23" => Some(RvX23),
2445 "x24" => Some(RvX24),
2446 "x25" => Some(RvX25),
2447 "x26" => Some(RvX26),
2448 "x27" => Some(RvX27),
2449 "x28" => Some(RvX28),
2450 "x29" => Some(RvX29),
2451 "x30" => Some(RvX30),
2452 "x31" => Some(RvX31),
2453 "zero" => Some(RvX0),
2455 "ra" => Some(RvX1),
2456 "sp" => Some(RvX2),
2457 "gp" => Some(RvX3),
2458 "tp" => Some(RvX4),
2459 "t0" => Some(RvX5),
2460 "t1" => Some(RvX6),
2461 "t2" => Some(RvX7),
2462 "s0" => Some(RvX8),
2463 "fp" => Some(RvX8), "s1" => Some(RvX9),
2465 "a0" => Some(RvX10),
2466 "a1" => Some(RvX11),
2467 "a2" => Some(RvX12),
2468 "a3" => Some(RvX13),
2469 "a4" => Some(RvX14),
2470 "a5" => Some(RvX15),
2471 "a6" => Some(RvX16),
2472 "a7" => Some(RvX17),
2473 "s2" => Some(RvX18),
2474 "s3" => Some(RvX19),
2475 "s4" => Some(RvX20),
2476 "s5" => Some(RvX21),
2477 "s6" => Some(RvX22),
2478 "s7" => Some(RvX23),
2479 "s8" => Some(RvX24),
2480 "s9" => Some(RvX25),
2481 "s10" => Some(RvX26),
2482 "s11" => Some(RvX27),
2483 "t3" => Some(RvX28),
2484 "t4" => Some(RvX29),
2485 "t5" => Some(RvX30),
2486 "t6" => Some(RvX31),
2487 "f0" => Some(RvF0),
2489 "f1" => Some(RvF1),
2490 "f2" => Some(RvF2),
2491 "f3" => Some(RvF3),
2492 "f4" => Some(RvF4),
2493 "f5" => Some(RvF5),
2494 "f6" => Some(RvF6),
2495 "f7" => Some(RvF7),
2496 "f8" => Some(RvF8),
2497 "f9" => Some(RvF9),
2498 "f10" => Some(RvF10),
2499 "f11" => Some(RvF11),
2500 "f12" => Some(RvF12),
2501 "f13" => Some(RvF13),
2502 "f14" => Some(RvF14),
2503 "f15" => Some(RvF15),
2504 "f16" => Some(RvF16),
2505 "f17" => Some(RvF17),
2506 "f18" => Some(RvF18),
2507 "f19" => Some(RvF19),
2508 "f20" => Some(RvF20),
2509 "f21" => Some(RvF21),
2510 "f22" => Some(RvF22),
2511 "f23" => Some(RvF23),
2512 "f24" => Some(RvF24),
2513 "f25" => Some(RvF25),
2514 "f26" => Some(RvF26),
2515 "f27" => Some(RvF27),
2516 "f28" => Some(RvF28),
2517 "f29" => Some(RvF29),
2518 "f30" => Some(RvF30),
2519 "f31" => Some(RvF31),
2520 "ft0" => Some(RvF0),
2522 "ft1" => Some(RvF1),
2523 "ft2" => Some(RvF2),
2524 "ft3" => Some(RvF3),
2525 "ft4" => Some(RvF4),
2526 "ft5" => Some(RvF5),
2527 "ft6" => Some(RvF6),
2528 "ft7" => Some(RvF7),
2529 "fs0" => Some(RvF8),
2530 "fs1" => Some(RvF9),
2531 "fa0" => Some(RvF10),
2532 "fa1" => Some(RvF11),
2533 "fa2" => Some(RvF12),
2534 "fa3" => Some(RvF13),
2535 "fa4" => Some(RvF14),
2536 "fa5" => Some(RvF15),
2537 "fa6" => Some(RvF16),
2538 "fa7" => Some(RvF17),
2539 "fs2" => Some(RvF18),
2540 "fs3" => Some(RvF19),
2541 "fs4" => Some(RvF20),
2542 "fs5" => Some(RvF21),
2543 "fs6" => Some(RvF22),
2544 "fs7" => Some(RvF23),
2545 "fs8" => Some(RvF24),
2546 "fs9" => Some(RvF25),
2547 "fs10" => Some(RvF26),
2548 "fs11" => Some(RvF27),
2549 "ft8" => Some(RvF28),
2550 "ft9" => Some(RvF29),
2551 "ft10" => Some(RvF30),
2552 "ft11" => Some(RvF31),
2553 "v0" => Some(RvV0),
2555 "v1" => Some(RvV1),
2556 "v2" => Some(RvV2),
2557 "v3" => Some(RvV3),
2558 "v4" => Some(RvV4),
2559 "v5" => Some(RvV5),
2560 "v6" => Some(RvV6),
2561 "v7" => Some(RvV7),
2562 "v8" => Some(RvV8),
2563 "v9" => Some(RvV9),
2564 "v10" => Some(RvV10),
2565 "v11" => Some(RvV11),
2566 "v12" => Some(RvV12),
2567 "v13" => Some(RvV13),
2568 "v14" => Some(RvV14),
2569 "v15" => Some(RvV15),
2570 "v16" => Some(RvV16),
2571 "v17" => Some(RvV17),
2572 "v18" => Some(RvV18),
2573 "v19" => Some(RvV19),
2574 "v20" => Some(RvV20),
2575 "v21" => Some(RvV21),
2576 "v22" => Some(RvV22),
2577 "v23" => Some(RvV23),
2578 "v24" => Some(RvV24),
2579 "v25" => Some(RvV25),
2580 "v26" => Some(RvV26),
2581 "v27" => Some(RvV27),
2582 "v28" => Some(RvV28),
2583 "v29" => Some(RvV29),
2584 "v30" => Some(RvV30),
2585 "v31" => Some(RvV31),
2586 _ => None,
2587 }
2588}
2589
2590pub fn parse_str(source: &str) -> Result<Vec<Statement>, AsmError> {
2592 let tokens = crate::lexer::tokenize(source)?;
2593 parse(&tokens)
2594}
2595
2596fn strip_att_suffix(mnemonic: &str) -> Option<(Mnemonic, OperandSize)> {
2602 let att_translations: &[(&str, &str, OperandSize)] = &[
2605 ("movzbl", "movzx", OperandSize::Dword),
2607 ("movzbw", "movzx", OperandSize::Word),
2608 ("movzbq", "movzx", OperandSize::Qword),
2609 ("movzwl", "movzx", OperandSize::Dword),
2610 ("movzwq", "movzx", OperandSize::Qword),
2611 ("movsbl", "movsx", OperandSize::Dword),
2613 ("movsbw", "movsx", OperandSize::Word),
2614 ("movsbq", "movsx", OperandSize::Qword),
2615 ("movswl", "movsx", OperandSize::Dword),
2616 ("movswq", "movsx", OperandSize::Qword),
2617 ("movslq", "movsxd", OperandSize::Qword),
2618 ("cbtw", "cbw", OperandSize::Word),
2620 ("cwtl", "cwde", OperandSize::Dword),
2621 ("cwtd", "cwd", OperandSize::Word),
2622 ("cltd", "cdq", OperandSize::Dword),
2623 ("cltq", "cdqe", OperandSize::Qword),
2624 ("cqto", "cqo", OperandSize::Qword),
2625 ];
2626
2627 for &(att, intel, size) in att_translations {
2628 if mnemonic == att {
2629 return Some((Mnemonic::from(intel), size));
2630 }
2631 }
2632
2633 if mnemonic.len() < 2 {
2634 return None;
2635 }
2636
2637 let no_strip = [
2640 "call",
2641 "jmp",
2642 "ret",
2643 "nop",
2644 "hlt",
2645 "int",
2646 "syscall",
2647 "sysenter",
2648 "sysexit",
2649 "cpuid",
2650 "rdtsc",
2651 "rdtscp",
2652 "ud2",
2653 "leave",
2654 "enter",
2655 "pushf",
2656 "popf",
2657 "pushfq",
2658 "popfq",
2659 "lahf",
2660 "sahf",
2661 "clc",
2662 "stc",
2663 "cmc",
2664 "cld",
2665 "std",
2666 "cli",
2667 "sti",
2668 "rep",
2669 "repe",
2670 "repne",
2671 "repz",
2672 "repnz",
2673 "lock",
2674 "pause",
2675 "mfence",
2676 "lfence",
2677 "sfence",
2678 "endbr64",
2679 "endbr32",
2680 "iretq",
2681 "cdq",
2682 "cqo",
2683 "cbw",
2684 "cwde",
2685 "cdqe",
2686 "cwd",
2687 "xlat",
2688 "xlatb",
2689 "swapgs",
2690 "wrmsr",
2691 "rdmsr",
2692 "invd",
2693 "wbinvd",
2694 "clts",
2695 "monitor",
2696 "mwait",
2697 "rdrand",
2698 "rdseed",
2699 "xtest",
2700 "xend",
2701 "vzeroall",
2702 "vzeroupper",
2703 "int3",
2704 "setal",
2706 "setbl",
2707 "setcl",
2708 "setgl",
2709 "setol",
2710 "setnl",
2711 "setpl",
2712 "jal",
2714 "jbl",
2715 "jcl",
2716 "jgl",
2717 "jol",
2718 "jnl",
2719 "jpl",
2720 "movsb",
2722 "movsw",
2723 "movsd",
2724 "movsq",
2725 "stosb",
2726 "stosw",
2727 "stosd",
2728 "stosq",
2729 "lodsb",
2730 "lodsw",
2731 "lodsd",
2732 "lodsq",
2733 "scasb",
2734 "scasw",
2735 "scasd",
2736 "scasq",
2737 "cmpsb",
2738 "cmpsw",
2739 "cmpsd",
2740 "cmpsq",
2741 "insb",
2742 "insw",
2743 "insd",
2744 "outsb",
2745 "outsw",
2746 "outsd",
2747 "inb",
2749 "inw",
2750 "inl",
2751 "outb",
2752 "outw",
2753 "outl",
2754 "loop",
2756 "loope",
2757 "loopne",
2758 "loopz",
2759 "loopnz",
2760 "jecxz",
2761 "jrcxz",
2762 "cmpxchg8b",
2764 "cmpxchg16b",
2765 "bswap",
2767 ];
2768
2769 let suffix = mnemonic.as_bytes()[mnemonic.len() - 1];
2770 let size = match suffix {
2771 b'b' => OperandSize::Byte,
2772 b'w' => OperandSize::Word,
2773 b'l' => OperandSize::Dword,
2774 b'q' => OperandSize::Qword,
2775 _ => return None,
2776 };
2777
2778 if no_strip.contains(&mnemonic) {
2779 return None;
2780 }
2781
2782 let base = &mnemonic[..mnemonic.len() - 1];
2783
2784 if base.is_empty() {
2786 return None;
2787 }
2788
2789 let known_bases = [
2791 "mov", "add", "sub", "adc", "sbb", "and", "or", "xor", "cmp", "test", "push", "pop", "inc",
2792 "dec", "neg", "not", "mul", "imul", "div", "idiv", "lea", "xchg", "cmpxchg", "xadd",
2793 "movzx", "movsx", "movsxd", "shl", "shr", "sar", "rol", "ror", "rcl", "rcr", "bt", "bts",
2794 "btr", "btc", "bsf", "bsr", "set", "cmov", "in", "out", "movabs",
2796 ];
2797
2798 if known_bases.contains(&base) {
2800 return Some((Mnemonic::from(base), size));
2801 }
2802
2803 if base.starts_with("cmov") || base.starts_with("set") || base.starts_with('j') {
2807 return Some((Mnemonic::from(base), size));
2808 }
2809
2810 Some((Mnemonic::from(base), size))
2812}
2813
2814#[cfg(test)]
2815mod tests {
2816 use super::*;
2817
2818 fn parse_one(src: &str) -> Statement {
2819 let stmts = parse_str(src).unwrap();
2820 assert_eq!(
2821 stmts.len(),
2822 1,
2823 "expected 1 statement, got {}: {:?}",
2824 stmts.len(),
2825 stmts
2826 );
2827 stmts.into_iter().next().unwrap()
2828 }
2829
2830 fn parse_instr(src: &str) -> Instruction {
2831 match parse_one(src) {
2832 Statement::Instruction(i) => i,
2833 s => panic!("expected instruction, got {:?}", s),
2834 }
2835 }
2836
2837 #[test]
2840 fn parse_nop() {
2841 let i = parse_instr("nop");
2842 assert_eq!(i.mnemonic, "nop");
2843 assert!(i.operands.is_empty());
2844 }
2845
2846 #[test]
2847 fn parse_ret() {
2848 let i = parse_instr("ret");
2849 assert_eq!(i.mnemonic, "ret");
2850 }
2851
2852 #[test]
2853 fn parse_syscall() {
2854 let i = parse_instr("syscall");
2855 assert_eq!(i.mnemonic, "syscall");
2856 }
2857
2858 #[test]
2861 fn parse_mov_reg_reg() {
2862 let i = parse_instr("mov rax, rbx");
2863 assert_eq!(i.mnemonic, "mov");
2864 assert_eq!(i.operands.len(), 2);
2865 assert_eq!(i.operands[0], Operand::Register(Register::Rax));
2866 assert_eq!(i.operands[1], Operand::Register(Register::Rbx));
2867 }
2868
2869 #[test]
2870 fn parse_add_r32() {
2871 let i = parse_instr("add eax, ecx");
2872 assert_eq!(i.mnemonic, "add");
2873 assert_eq!(i.operands[0], Operand::Register(Register::Eax));
2874 assert_eq!(i.operands[1], Operand::Register(Register::Ecx));
2875 }
2876
2877 #[test]
2878 fn parse_xor_r8() {
2879 let i = parse_instr("xor al, bl");
2880 assert_eq!(i.operands[0], Operand::Register(Register::Al));
2881 assert_eq!(i.operands[1], Operand::Register(Register::Bl));
2882 }
2883
2884 #[test]
2887 fn parse_mov_reg_imm() {
2888 let i = parse_instr("mov rax, 42");
2889 assert_eq!(i.operands[0], Operand::Register(Register::Rax));
2890 assert_eq!(i.operands[1], Operand::Immediate(42));
2891 }
2892
2893 #[test]
2894 fn parse_mov_reg_hex() {
2895 let i = parse_instr("mov rdi, 0xDEAD");
2896 assert_eq!(i.operands[1], Operand::Immediate(0xDEAD));
2897 }
2898
2899 #[test]
2900 fn parse_add_imm_negative() {
2901 let i = parse_instr("add rsp, -8");
2902 assert_eq!(i.operands[1], Operand::Immediate(-8));
2903 }
2904
2905 #[test]
2906 fn parse_char_immediate() {
2907 let i = parse_instr("mov al, 'A'");
2908 assert_eq!(i.operands[1], Operand::Immediate(65));
2909 }
2910
2911 #[test]
2914 fn parse_mem_base() {
2915 let i = parse_instr("mov rax, [rbx]");
2916 assert_eq!(i.operands[0], Operand::Register(Register::Rax));
2917 match &i.operands[1] {
2918 Operand::Memory(m) => {
2919 assert_eq!(m.base, Some(Register::Rbx));
2920 assert_eq!(m.index, None);
2921 assert_eq!(m.disp, 0);
2922 }
2923 _ => panic!("expected memory operand"),
2924 }
2925 }
2926
2927 #[test]
2928 fn parse_mem_base_disp() {
2929 let i = parse_instr("mov rax, [rbp + 8]");
2930 match &i.operands[1] {
2931 Operand::Memory(m) => {
2932 assert_eq!(m.base, Some(Register::Rbp));
2933 assert_eq!(m.disp, 8);
2934 }
2935 _ => panic!("expected memory operand"),
2936 }
2937 }
2938
2939 #[test]
2940 fn parse_mem_base_neg_disp() {
2941 let i = parse_instr("mov rax, [rbp - 0x10]");
2942 match &i.operands[1] {
2943 Operand::Memory(m) => {
2944 assert_eq!(m.base, Some(Register::Rbp));
2945 assert_eq!(m.disp, -16);
2946 }
2947 _ => panic!("expected memory operand"),
2948 }
2949 }
2950
2951 #[test]
2952 fn parse_mem_base_index() {
2953 let i = parse_instr("mov rax, [rbx + rcx]");
2954 match &i.operands[1] {
2955 Operand::Memory(m) => {
2956 assert_eq!(m.base, Some(Register::Rbx));
2957 assert_eq!(m.index, Some(Register::Rcx));
2958 assert_eq!(m.scale, 1);
2959 }
2960 _ => panic!("expected memory operand"),
2961 }
2962 }
2963
2964 #[test]
2965 fn parse_mem_base_index_scale() {
2966 let i = parse_instr("lea rax, [rbx + rcx*8]");
2967 match &i.operands[1] {
2968 Operand::Memory(m) => {
2969 assert_eq!(m.base, Some(Register::Rbx));
2970 assert_eq!(m.index, Some(Register::Rcx));
2971 assert_eq!(m.scale, 8);
2972 }
2973 _ => panic!("expected memory operand"),
2974 }
2975 }
2976
2977 #[test]
2978 fn parse_mem_full() {
2979 let i = parse_instr("mov rax, [rbx + rcx*4 + 16]");
2980 match &i.operands[1] {
2981 Operand::Memory(m) => {
2982 assert_eq!(m.base, Some(Register::Rbx));
2983 assert_eq!(m.index, Some(Register::Rcx));
2984 assert_eq!(m.scale, 4);
2985 assert_eq!(m.disp, 16);
2986 }
2987 _ => panic!("expected memory operand"),
2988 }
2989 }
2990
2991 #[test]
2992 fn parse_mem_disp_only() {
2993 let i = parse_instr("mov rax, [0x1000]");
2994 match &i.operands[1] {
2995 Operand::Memory(m) => {
2996 assert_eq!(m.base, None);
2997 assert_eq!(m.disp, 0x1000);
2998 }
2999 _ => panic!("expected memory operand"),
3000 }
3001 }
3002
3003 #[test]
3006 fn parse_byte_ptr() {
3007 let i = parse_instr("mov byte ptr [rax], 0");
3008 assert_eq!(i.size_hint, Some(OperandSize::Byte));
3009 match &i.operands[0] {
3010 Operand::Memory(m) => assert_eq!(m.base, Some(Register::Rax)),
3011 _ => panic!("expected memory operand"),
3012 }
3013 }
3014
3015 #[test]
3016 fn parse_qword_no_ptr() {
3017 let i = parse_instr("mov qword [rax], 0");
3018 assert_eq!(i.size_hint, Some(OperandSize::Qword));
3019 }
3020
3021 #[test]
3022 fn parse_dword_ptr() {
3023 let i = parse_instr("add dword ptr [rbp - 4], 1");
3024 assert_eq!(i.size_hint, Some(OperandSize::Dword));
3025 }
3026
3027 #[test]
3030 fn parse_label_def() {
3031 let stmt = parse_one("start:");
3032 match stmt {
3033 Statement::Label(name, _) => assert_eq!(name, "start"),
3034 _ => panic!("expected label"),
3035 }
3036 }
3037
3038 #[test]
3039 fn parse_label_ref() {
3040 let i = parse_instr("jmp loop");
3041 assert_eq!(i.operands[0], Operand::Label(String::from("loop")));
3042 }
3043
3044 #[test]
3045 fn parse_call_label() {
3046 let i = parse_instr("call printf");
3047 assert_eq!(i.operands[0], Operand::Label(String::from("printf")));
3048 }
3049
3050 #[test]
3051 fn parse_label_with_offset() {
3052 let i = parse_instr("lea rax, data + 4");
3053 match &i.operands[1] {
3054 Operand::Expression(Expr::Add(l, r)) => {
3055 assert_eq!(**l, Expr::Label(String::from("data")));
3056 assert_eq!(**r, Expr::Num(4));
3057 }
3058 _ => panic!("expected expression operand"),
3059 }
3060 }
3061
3062 #[test]
3065 fn parse_lock_prefix() {
3066 let i = parse_instr("lock add [rax], 1");
3067 assert_eq!(i.prefixes, vec![Prefix::Lock]);
3068 assert_eq!(i.mnemonic, "add");
3069 }
3070
3071 #[test]
3072 fn parse_rep_prefix() {
3073 let i = parse_instr("rep movsb");
3074 assert_eq!(i.prefixes, vec![Prefix::Rep]);
3075 assert_eq!(i.mnemonic, "movsb");
3076 }
3077
3078 #[test]
3081 fn parse_byte_directive() {
3082 let stmt = parse_one(".byte 0x90, 0xCC");
3083 match stmt {
3084 Statement::Data(d) => {
3085 assert_eq!(d.size, DataSize::Byte);
3086 assert_eq!(
3087 d.values,
3088 vec![DataValue::Integer(0x90), DataValue::Integer(0xCC)]
3089 );
3090 }
3091 _ => panic!("expected data"),
3092 }
3093 }
3094
3095 #[test]
3096 fn parse_word_directive() {
3097 let stmt = parse_one(".word 0x1234");
3098 match stmt {
3099 Statement::Data(d) => {
3100 assert_eq!(d.size, DataSize::Word);
3101 assert_eq!(d.values, vec![DataValue::Integer(0x1234)]);
3102 }
3103 _ => panic!("expected data"),
3104 }
3105 }
3106
3107 #[test]
3108 fn parse_ascii_directive() {
3109 let stmt = parse_one(".ascii \"hello\"");
3110 match stmt {
3111 Statement::Data(d) => {
3112 assert_eq!(d.size, DataSize::Byte);
3113 assert_eq!(d.values, vec![DataValue::Bytes(b"hello".to_vec())]);
3114 }
3115 _ => panic!("expected data"),
3116 }
3117 }
3118
3119 #[test]
3120 fn parse_asciz_null_terminates() {
3121 let stmt = parse_one(".asciz \"ok\"");
3122 match stmt {
3123 Statement::Data(d) => {
3124 assert_eq!(d.values, vec![DataValue::Bytes(b"ok\0".to_vec())]);
3125 }
3126 _ => panic!("expected data"),
3127 }
3128 }
3129
3130 #[test]
3131 fn parse_equ_directive() {
3132 let stmt = parse_one(".equ SYS_WRITE, 1");
3133 match stmt {
3134 Statement::Const(c) => {
3135 assert_eq!(c.name, "SYS_WRITE");
3136 assert_eq!(c.value, 1);
3137 }
3138 _ => panic!("expected const"),
3139 }
3140 }
3141
3142 #[test]
3143 fn parse_align_directive() {
3144 let stmt = parse_one(".align 16");
3145 match stmt {
3146 Statement::Align(a) => {
3147 assert_eq!(a.alignment, 16);
3148 assert_eq!(a.fill, None);
3149 }
3150 _ => panic!("expected align"),
3151 }
3152 }
3153
3154 #[test]
3155 fn parse_p2align_directive() {
3156 let stmt = parse_one(".p2align 4");
3157 match stmt {
3158 Statement::Align(a) => {
3159 assert_eq!(a.alignment, 16); }
3161 _ => panic!("expected align"),
3162 }
3163 }
3164
3165 #[test]
3166 fn parse_fill_directive() {
3167 let stmt = parse_one(".fill 10, 1, 0x90");
3168 match stmt {
3169 Statement::Fill(f) => {
3170 assert_eq!(f.count, 10);
3171 assert_eq!(f.size, 1);
3172 assert_eq!(f.value, 0x90);
3173 }
3174 _ => panic!("expected fill"),
3175 }
3176 }
3177
3178 #[test]
3179 fn parse_space_directive() {
3180 let stmt = parse_one(".space 64");
3181 match stmt {
3182 Statement::Space(s) => {
3183 assert_eq!(s.size, 64);
3184 assert_eq!(s.fill, 0);
3185 }
3186 _ => panic!("expected space"),
3187 }
3188 }
3189
3190 #[test]
3191 fn parse_org_directive() {
3192 let stmt = parse_one(".org 0x1000");
3193 match stmt {
3194 Statement::Org(o) => {
3195 assert_eq!(o.offset, 0x1000);
3196 assert_eq!(o.fill, 0x00);
3197 }
3198 _ => panic!("expected org"),
3199 }
3200 }
3201
3202 #[test]
3203 fn parse_org_with_fill() {
3204 let stmt = parse_one(".org 0x100, 0xFF");
3205 match stmt {
3206 Statement::Org(o) => {
3207 assert_eq!(o.offset, 0x100);
3208 assert_eq!(o.fill, 0xFF);
3209 }
3210 _ => panic!("expected org"),
3211 }
3212 }
3213
3214 #[test]
3217 fn parse_multi_line() {
3218 let stmts = parse_str("nop\nret").unwrap();
3219 assert_eq!(stmts.len(), 2);
3220 match (&stmts[0], &stmts[1]) {
3221 (Statement::Instruction(i1), Statement::Instruction(i2)) => {
3222 assert_eq!(i1.mnemonic, "nop");
3223 assert_eq!(i2.mnemonic, "ret");
3224 }
3225 _ => panic!("expected two instructions"),
3226 }
3227 }
3228
3229 #[test]
3230 fn parse_label_and_instruction() {
3231 let stmts = parse_str("start:\n mov rax, 1").unwrap();
3232 assert_eq!(stmts.len(), 2);
3233 assert!(matches!(&stmts[0], Statement::Label(name, _) if name == "start"));
3234 assert!(matches!(&stmts[1], Statement::Instruction(_)));
3235 }
3236
3237 #[test]
3238 fn parse_semicolon_separated() {
3239 let stmts = parse_str("nop; ret").unwrap();
3240 assert_eq!(stmts.len(), 2);
3241 }
3242
3243 #[test]
3246 fn case_insensitive_mnemonic() {
3247 let i = parse_instr("MOV RAX, RBX");
3248 assert_eq!(i.mnemonic, "mov");
3249 assert_eq!(i.operands[0], Operand::Register(Register::Rax));
3250 }
3251
3252 #[test]
3253 fn case_insensitive_register() {
3254 let i = parse_instr("xor EAX, eax");
3255 assert_eq!(i.operands[0], Operand::Register(Register::Eax));
3256 assert_eq!(i.operands[1], Operand::Register(Register::Eax));
3257 }
3258
3259 #[test]
3262 fn parse_extended_reg() {
3263 let i = parse_instr("mov r8, r15");
3264 assert_eq!(i.operands[0], Operand::Register(Register::R8));
3265 assert_eq!(i.operands[1], Operand::Register(Register::R15));
3266 }
3267
3268 #[test]
3269 fn parse_extended_reg_dword() {
3270 let i = parse_instr("mov r8d, r15d");
3271 assert_eq!(i.operands[0], Operand::Register(Register::R8d));
3272 assert_eq!(i.operands[1], Operand::Register(Register::R15d));
3273 }
3274
3275 #[test]
3278 fn parse_push_pop() {
3279 let i = parse_instr("push rbp");
3280 assert_eq!(i.mnemonic, "push");
3281 assert_eq!(i.operands[0], Operand::Register(Register::Rbp));
3282 }
3283
3284 #[test]
3285 fn parse_lea() {
3286 let i = parse_instr("lea rdi, [rip + 0x10]");
3287 assert_eq!(i.mnemonic, "lea");
3288 match &i.operands[1] {
3289 Operand::Memory(m) => {
3290 assert_eq!(m.base, Some(Register::Rip));
3291 assert_eq!(m.disp, 0x10);
3292 }
3293 _ => panic!("expected memory"),
3294 }
3295 }
3296
3297 #[test]
3298 fn parse_three_operand_imul() {
3299 let i = parse_instr("imul rax, rbx, 10");
3300 assert_eq!(i.operands.len(), 3);
3301 assert_eq!(i.operands[0], Operand::Register(Register::Rax));
3302 assert_eq!(i.operands[1], Operand::Register(Register::Rbx));
3303 assert_eq!(i.operands[2], Operand::Immediate(10));
3304 }
3305
3306 #[test]
3307 fn global_directive_ignored() {
3308 let stmts = parse_str(".global main\nmov rax, 1").unwrap();
3309 assert_eq!(stmts.len(), 1);
3311 }
3312
3313 #[test]
3314 fn section_directive_ignored() {
3315 let stmts = parse_str(".section .text\nnop").unwrap();
3316 assert_eq!(stmts.len(), 1);
3317 }
3318
3319 #[test]
3320 fn empty_input() {
3321 let stmts = parse_str("").unwrap();
3322 assert!(stmts.is_empty());
3323 }
3324
3325 #[test]
3326 fn only_labels() {
3327 let stmts = parse_str("start:\nend:").unwrap();
3328 assert_eq!(stmts.len(), 2);
3329 assert!(matches!(&stmts[0], Statement::Label(n, _) if n == "start"));
3330 assert!(matches!(&stmts[1], Statement::Label(n, _) if n == "end"));
3331 }
3332
3333 #[test]
3334 fn mem_with_label() {
3335 let i = parse_instr("mov rax, [msg]");
3336 match &i.operands[1] {
3337 Operand::Memory(m) => {
3338 assert_eq!(m.base, None);
3339 assert_eq!(m.disp_label, Some(String::from("msg")));
3340 }
3341 _ => panic!("expected memory operand with label"),
3342 }
3343 }
3344
3345 #[test]
3346 fn xmm_registers() {
3347 let i = parse_instr("movaps xmm0, xmm1");
3348 assert_eq!(i.operands[0], Operand::Register(Register::Xmm0));
3349 assert_eq!(i.operands[1], Operand::Register(Register::Xmm1));
3350 }
3351
3352 #[test]
3353 fn segment_override_mem() {
3354 let i = parse_instr("mov rax, fs:[0x28]");
3355 match &i.operands[1] {
3356 Operand::Memory(m) => {
3357 assert_eq!(m.segment, Some(Register::Fs));
3358 assert_eq!(m.disp, 0x28);
3359 }
3360 _ => panic!("expected segment memory operand"),
3361 }
3362 }
3363
3364 #[test]
3367 fn parse_name_equals_constant() {
3368 let stmt = parse_one("EXIT = 60");
3369 match stmt {
3370 Statement::Const(c) => {
3371 assert_eq!(c.name, "EXIT");
3372 assert_eq!(c.value, 60);
3373 }
3374 _ => panic!("expected const, got {:?}", stmt),
3375 }
3376 }
3377
3378 #[test]
3379 fn parse_name_equals_hex() {
3380 let stmt = parse_one("MAGIC = 0xDEAD");
3381 match stmt {
3382 Statement::Const(c) => {
3383 assert_eq!(c.name, "MAGIC");
3384 assert_eq!(c.value, 0xDEAD);
3385 }
3386 _ => panic!("expected const"),
3387 }
3388 }
3389
3390 #[test]
3391 fn parse_name_equals_negative() {
3392 let stmt = parse_one("NEG = -1");
3393 match stmt {
3394 Statement::Const(c) => {
3395 assert_eq!(c.name, "NEG");
3396 assert_eq!(c.value, -1);
3397 }
3398 _ => panic!("expected const"),
3399 }
3400 }
3401
3402 #[test]
3403 fn parse_set_directive() {
3404 let stmt = parse_one(".set COUNT, 42");
3405 match stmt {
3406 Statement::Const(c) => {
3407 assert_eq!(c.name, "COUNT");
3408 assert_eq!(c.value, 42);
3409 }
3410 _ => panic!("expected const"),
3411 }
3412 }
3413
3414 #[test]
3415 fn name_equals_used_in_program() {
3416 let stmts = parse_str("EXIT = 60\nmov eax, EXIT").unwrap();
3417 assert_eq!(stmts.len(), 2);
3418 assert!(matches!(&stmts[0], Statement::Const(_)));
3419 assert!(matches!(&stmts[1], Statement::Instruction(_)));
3420 }
3421
3422 #[test]
3423 fn parse_const_expr_with_identifier() {
3424 let stmts = parse_str("SIZE = 10\n.fill SIZE, 1, 0").unwrap();
3426 assert_eq!(stmts.len(), 2);
3427 match &stmts[1] {
3428 Statement::Fill(f) => assert_eq!(f.count, 10),
3429 _ => panic!("expected Fill"),
3430 }
3431 }
3432
3433 #[test]
3434 fn parse_const_chain() {
3435 let stmts = parse_str("A = 5\nB = A + 3\n.space B, 0").unwrap();
3437 assert_eq!(stmts.len(), 3);
3438 match &stmts[2] {
3439 Statement::Space(s) => assert_eq!(s.size, 8),
3440 _ => panic!("expected Space"),
3441 }
3442 }
3443
3444 #[test]
3445 fn parse_equ_identifier_in_const_expr() {
3446 let stmts = parse_str(".equ BASE, 100\n.equ TOTAL, BASE + 50").unwrap();
3447 match &stmts[1] {
3448 Statement::Const(c) => assert_eq!(c.value, 150),
3449 _ => panic!("expected Const"),
3450 }
3451 }
3452
3453 #[test]
3454 fn parse_label_plus_identifier_expression() {
3455 let stmts = parse_str("OFF = 8\nmov rax, data + OFF").unwrap();
3458 match &stmts[1] {
3459 Statement::Instruction(i) => {
3460 match &i.operands[1] {
3461 Operand::Expression(Expr::Add(l, r)) => {
3463 assert_eq!(**l, Expr::Label(String::from("data")));
3464 assert_eq!(**r, Expr::Num(8));
3465 }
3466 other => panic!("expected Expression, got {:?}", other),
3467 }
3468 }
3469 _ => panic!("expected Instruction"),
3470 }
3471 }
3472
3473 #[test]
3474 fn parse_all_constants_resolve_to_immediate() {
3475 let stmts = parse_str("BASE = 100\nOFF = 8\nmov eax, BASE + OFF").unwrap();
3477 match &stmts[2] {
3478 Statement::Instruction(i) => {
3479 assert_eq!(i.operands[1], Operand::Immediate(108));
3480 }
3481 _ => panic!("expected Instruction"),
3482 }
3483 }
3484
3485 #[test]
3486 fn parse_align_with_constant() {
3487 let stmts = parse_str("ALIGN_VAL = 8\n.align ALIGN_VAL").unwrap();
3488 match &stmts[1] {
3489 Statement::Align(a) => assert_eq!(a.alignment, 8),
3490 _ => panic!("expected Align"),
3491 }
3492 }
3493
3494 #[test]
3495 fn parse_label_minus_offset() {
3496 let i = parse_instr("jmp target - 8");
3497 match &i.operands[0] {
3498 Operand::Expression(Expr::Sub(l, r)) => {
3499 assert_eq!(**l, Expr::Label(String::from("target")));
3500 assert_eq!(**r, Expr::Num(8));
3501 }
3502 _ => panic!("expected Sub expression"),
3503 }
3504 }
3505
3506 #[test]
3507 fn parse_const_negation_precedence() {
3508 let stmts = parse_str("A = 10\nB = 3\nX = -A + B\nmov eax, X").unwrap();
3510 match &stmts[3] {
3511 Statement::Instruction(i) => {
3512 assert_eq!(
3513 i.operands[1],
3514 Operand::Immediate(-7),
3515 "-10 + 3 should be -7, not -13"
3516 );
3517 }
3518 _ => panic!("expected Instruction"),
3519 }
3520 }
3521
3522 #[test]
3523 fn parse_const_negation_only() {
3524 let stmts = parse_str("A = 10\nX = -A\nmov eax, X").unwrap();
3526 match &stmts[2] {
3527 Statement::Instruction(i) => {
3528 assert_eq!(i.operands[1], Operand::Immediate(-10));
3529 }
3530 _ => panic!("expected Instruction"),
3531 }
3532 }
3533
3534 #[test]
3535 fn parse_const_negation_sub_chain() {
3536 let stmts = parse_str("A = 10\nB = 3\nX = -A - B\nmov eax, X").unwrap();
3538 match &stmts[3] {
3539 Statement::Instruction(i) => {
3540 assert_eq!(
3541 i.operands[1],
3542 Operand::Immediate(-13),
3543 "-10 - 3 should be -13"
3544 );
3545 }
3546 _ => panic!("expected Instruction"),
3547 }
3548 }
3549
3550 #[test]
3551 fn parse_align_rejects_non_power_of_2() {
3552 let result = crate::parser::parse_str(".align 3");
3553 assert!(result.is_err(), ".align 3 should be rejected");
3554 let err = result.unwrap_err();
3555 let msg = alloc::format!("{err:?}");
3556 assert!(
3557 msg.contains("power of 2"),
3558 "error should mention power of 2, got: {msg}"
3559 );
3560 }
3561
3562 #[test]
3563 fn parse_align_accepts_power_of_2() {
3564 for val in &["1", "2", "4", "8", "16", "32", "64", "4096"] {
3566 let src = alloc::format!(".align {val}");
3567 let result = crate::parser::parse_str(&src);
3568 assert!(
3569 result.is_ok(),
3570 ".align {val} should be accepted, got: {result:?}"
3571 );
3572 }
3573 }
3574
3575 #[test]
3578 fn parse_rsp_as_index_rejects() {
3579 let result = crate::parser::parse_str("mov rax, [rbx + rsp*2]");
3581 assert!(result.is_err(), "RSP as SIB index should be rejected");
3582 }
3583
3584 #[test]
3585 fn parse_esp_as_index_rejects() {
3586 let result = crate::parser::parse_str("mov eax, [ebx + esp*1]");
3588 assert!(result.is_err(), "ESP as SIB index should be rejected");
3589 }
3590
3591 #[test]
3592 fn parse_r12_as_index_accepts() {
3593 let result = crate::parser::parse_str("mov rax, [rbx + r12*2]");
3595 assert!(
3596 result.is_ok(),
3597 "R12 as SIB index should be accepted, got: {result:?}"
3598 );
3599 }
3600
3601 fn parse_att(src: &str) -> Vec<Statement> {
3604 let tokens = crate::lexer::tokenize(src).unwrap();
3605 parse_with_syntax(&tokens, Arch::X86_64, Syntax::Att).unwrap()
3606 }
3607
3608 fn parse_att_instr(src: &str) -> Instruction {
3609 let stmts = parse_att(src);
3610 assert_eq!(stmts.len(), 1, "expected 1 statement, got {:?}", stmts);
3611 match stmts.into_iter().next().unwrap() {
3612 Statement::Instruction(i) => i,
3613 s => panic!("expected instruction, got {s:?}"),
3614 }
3615 }
3616
3617 #[test]
3618 fn att_register_operand() {
3619 let i = parse_att_instr("nop");
3620 assert_eq!(i.mnemonic, "nop");
3621 assert!(i.operands.is_empty());
3622 }
3623
3624 #[test]
3625 fn att_mov_imm_to_reg() {
3626 let i = parse_att_instr("movq $42, %rax");
3627 assert_eq!(i.mnemonic, "mov");
3628 assert_eq!(i.size_hint, Some(OperandSize::Qword));
3629 assert_eq!(i.operands.len(), 2);
3631 assert_eq!(i.operands[0], Operand::Register(Register::Rax));
3632 assert_eq!(i.operands[1], Operand::Immediate(42));
3633 }
3634
3635 #[test]
3636 fn att_mov_reg_to_reg() {
3637 let i = parse_att_instr("movl %eax, %ecx");
3638 assert_eq!(i.mnemonic, "mov");
3639 assert_eq!(i.size_hint, Some(OperandSize::Dword));
3640 assert_eq!(i.operands[0], Operand::Register(Register::Ecx));
3641 assert_eq!(i.operands[1], Operand::Register(Register::Eax));
3642 }
3643
3644 #[test]
3645 fn att_add_imm_to_reg() {
3646 let i = parse_att_instr("addl $0x10, %eax");
3647 assert_eq!(i.mnemonic, "add");
3648 assert_eq!(i.size_hint, Some(OperandSize::Dword));
3649 assert_eq!(i.operands[0], Operand::Register(Register::Eax));
3650 assert_eq!(i.operands[1], Operand::Immediate(0x10));
3651 }
3652
3653 #[test]
3654 fn att_negative_immediate() {
3655 let i = parse_att_instr("addq $-1, %rax");
3656 assert_eq!(i.mnemonic, "add");
3657 assert_eq!(i.operands[1], Operand::Immediate(-1));
3658 }
3659
3660 #[test]
3661 fn att_byte_suffix() {
3662 let i = parse_att_instr("movb $0x41, %al");
3663 assert_eq!(i.mnemonic, "mov");
3664 assert_eq!(i.size_hint, Some(OperandSize::Byte));
3665 assert_eq!(i.operands[0], Operand::Register(Register::Al));
3666 }
3667
3668 #[test]
3669 fn att_word_suffix() {
3670 let i = parse_att_instr("movw $0x1234, %ax");
3671 assert_eq!(i.mnemonic, "mov");
3672 assert_eq!(i.size_hint, Some(OperandSize::Word));
3673 assert_eq!(i.operands[0], Operand::Register(Register::Ax));
3674 }
3675
3676 #[test]
3677 fn att_memory_base_only() {
3678 let i = parse_att_instr("movq (%rax), %rbx");
3679 assert_eq!(i.mnemonic, "mov");
3680 assert_eq!(i.operands[0], Operand::Register(Register::Rbx));
3682 if let Operand::Memory(m) = &i.operands[1] {
3683 assert_eq!(m.base, Some(Register::Rax));
3684 assert_eq!(m.disp, 0);
3685 assert!(m.index.is_none());
3686 } else {
3687 panic!("expected memory operand");
3688 }
3689 }
3690
3691 #[test]
3692 fn att_memory_disp_base() {
3693 let i = parse_att_instr("movl 8(%rsp), %eax");
3694 assert_eq!(i.mnemonic, "mov");
3695 if let Operand::Memory(m) = &i.operands[1] {
3696 assert_eq!(m.base, Some(Register::Rsp));
3697 assert_eq!(m.disp, 8);
3698 } else {
3699 panic!("expected memory operand");
3700 }
3701 }
3702
3703 #[test]
3704 fn att_memory_negative_disp() {
3705 let i = parse_att_instr("movq -16(%rbp), %rax");
3706 if let Operand::Memory(m) = &i.operands[1] {
3707 assert_eq!(m.base, Some(Register::Rbp));
3708 assert_eq!(m.disp, -16);
3709 } else {
3710 panic!("expected memory operand");
3711 }
3712 }
3713
3714 #[test]
3715 fn att_memory_base_index() {
3716 let i = parse_att_instr("movl (%rax, %rcx), %edx");
3717 if let Operand::Memory(m) = &i.operands[1] {
3718 assert_eq!(m.base, Some(Register::Rax));
3719 assert_eq!(m.index, Some(Register::Rcx));
3720 assert_eq!(m.scale, 1);
3721 } else {
3722 panic!("expected memory operand");
3723 }
3724 }
3725
3726 #[test]
3727 fn att_memory_base_index_scale() {
3728 let i = parse_att_instr("movq (%rax, %rcx, 4), %rdx");
3729 if let Operand::Memory(m) = &i.operands[1] {
3730 assert_eq!(m.base, Some(Register::Rax));
3731 assert_eq!(m.index, Some(Register::Rcx));
3732 assert_eq!(m.scale, 4);
3733 assert_eq!(m.disp, 0);
3734 } else {
3735 panic!("expected memory operand");
3736 }
3737 }
3738
3739 #[test]
3740 fn att_memory_disp_base_index_scale() {
3741 let i = parse_att_instr("movl 16(%rbx, %rsi, 8), %eax");
3742 if let Operand::Memory(m) = &i.operands[1] {
3743 assert_eq!(m.base, Some(Register::Rbx));
3744 assert_eq!(m.index, Some(Register::Rsi));
3745 assert_eq!(m.scale, 8);
3746 assert_eq!(m.disp, 16);
3747 } else {
3748 panic!("expected memory operand");
3749 }
3750 }
3751
3752 #[test]
3753 fn att_segment_override() {
3754 let i = parse_att_instr("movq %fs:0x28(%rax), %rbx");
3755 if let Operand::Memory(m) = &i.operands[1] {
3756 assert_eq!(m.segment, Some(Register::Fs));
3757 assert_eq!(m.base, Some(Register::Rax));
3758 assert_eq!(m.disp, 0x28);
3759 } else {
3760 panic!("expected memory operand");
3761 }
3762 }
3763
3764 #[test]
3765 fn att_push_pop() {
3766 let i = parse_att_instr("pushq %rbp");
3767 assert_eq!(i.mnemonic, "push");
3768 assert_eq!(i.operands[0], Operand::Register(Register::Rbp));
3769
3770 let i2 = parse_att_instr("popq %rbp");
3771 assert_eq!(i2.mnemonic, "pop");
3772 assert_eq!(i2.operands[0], Operand::Register(Register::Rbp));
3773 }
3774
3775 #[test]
3776 fn att_xor_reg_reg() {
3777 let i = parse_att_instr("xorl %eax, %eax");
3778 assert_eq!(i.mnemonic, "xor");
3779 assert_eq!(i.operands[0], Operand::Register(Register::Eax));
3781 assert_eq!(i.operands[1], Operand::Register(Register::Eax));
3782 }
3783
3784 #[test]
3785 fn att_call_label() {
3786 let i = parse_att_instr("call func");
3787 assert_eq!(i.mnemonic, "call");
3788 assert_eq!(i.operands[0], Operand::Label(String::from("func")));
3789 }
3790
3791 #[test]
3792 fn att_jmp_label() {
3793 let i = parse_att_instr("jmp done");
3794 assert_eq!(i.mnemonic, "jmp");
3795 assert_eq!(i.operands[0], Operand::Label(String::from("done")));
3796 }
3797
3798 #[test]
3799 fn att_jcc_label() {
3800 let i = parse_att_instr("jne loop");
3801 assert_eq!(i.mnemonic, "jne");
3802 assert_eq!(i.operands[0], Operand::Label(String::from("loop")));
3803 }
3804
3805 #[test]
3806 fn att_ret() {
3807 let i = parse_att_instr("ret");
3808 assert_eq!(i.mnemonic, "ret");
3809 assert!(i.operands.is_empty());
3810 }
3811
3812 #[test]
3813 fn att_syscall() {
3814 let i = parse_att_instr("syscall");
3815 assert_eq!(i.mnemonic, "syscall");
3816 }
3817
3818 #[test]
3819 fn att_lock_prefix() {
3820 let i = parse_att_instr("lock xchgl %eax, (%rbx)");
3821 assert_eq!(i.mnemonic, "xchg");
3822 assert!(i.prefixes.contains(&Prefix::Lock));
3823 }
3824
3825 #[test]
3826 fn att_lea() {
3827 let i = parse_att_instr("leaq 8(%rsp), %rax");
3828 assert_eq!(i.mnemonic, "lea");
3829 assert_eq!(i.operands[0], Operand::Register(Register::Rax));
3831 if let Operand::Memory(m) = &i.operands[1] {
3832 assert_eq!(m.base, Some(Register::Rsp));
3833 assert_eq!(m.disp, 8);
3834 } else {
3835 panic!("expected memory operand");
3836 }
3837 }
3838
3839 #[test]
3840 fn att_imm_label_ref() {
3841 let i = parse_att_instr("movq $myvar, %rax");
3842 assert_eq!(i.mnemonic, "mov");
3843 assert_eq!(i.operands[1], Operand::Label(String::from("myvar")));
3844 }
3845
3846 #[test]
3847 fn att_no_suffix_no_size_hint() {
3848 let i = parse_att_instr("nop");
3850 assert!(i.size_hint.is_none());
3851 }
3852
3853 #[test]
3854 fn att_int_not_stripped() {
3855 let i = parse_att_instr("int $0x80");
3857 assert_eq!(i.mnemonic, "int");
3858 assert_eq!(i.operands[0], Operand::Immediate(0x80));
3859 }
3860
3861 #[test]
3862 fn att_string_ops_not_stripped() {
3863 let i = parse_att_instr("movsb");
3864 assert_eq!(i.mnemonic, "movsb");
3865 let i = parse_att_instr("stosq");
3866 assert_eq!(i.mnemonic, "stosq");
3867 }
3868
3869 #[test]
3870 fn att_rep_prefix() {
3871 let i = parse_att_instr("rep movsb");
3872 assert_eq!(i.mnemonic, "movsb");
3873 assert!(i.prefixes.contains(&Prefix::Rep));
3874 }
3875
3876 #[test]
3877 fn att_cmp_operand_order() {
3878 let i = parse_att_instr("cmpl $0, %eax");
3880 assert_eq!(i.mnemonic, "cmp");
3881 assert_eq!(i.operands[0], Operand::Register(Register::Eax));
3882 assert_eq!(i.operands[1], Operand::Immediate(0));
3883 }
3884
3885 #[test]
3886 fn att_test_operand_order() {
3887 let i = parse_att_instr("testl %eax, %eax");
3889 assert_eq!(i.mnemonic, "test");
3890 assert_eq!(i.operands[0], Operand::Register(Register::Eax));
3891 assert_eq!(i.operands[1], Operand::Register(Register::Eax));
3892 }
3893
3894 #[test]
3895 fn att_sub_mem_to_reg() {
3896 let i = parse_att_instr("subq 8(%rbp), %rax");
3897 assert_eq!(i.mnemonic, "sub");
3898 assert_eq!(i.operands[0], Operand::Register(Register::Rax));
3899 if let Operand::Memory(m) = &i.operands[1] {
3900 assert_eq!(m.base, Some(Register::Rbp));
3901 assert_eq!(m.disp, 8);
3902 } else {
3903 panic!("expected memory operand");
3904 }
3905 }
3906
3907 #[test]
3908 fn att_push_immediate() {
3909 let i = parse_att_instr("pushq $42");
3910 assert_eq!(i.mnemonic, "push");
3911 assert_eq!(i.operands[0], Operand::Immediate(42));
3912 }
3913
3914 #[test]
3915 fn att_numeric_label_fwd() {
3916 let i = parse_att_instr("jmp 1f");
3917 assert_eq!(i.mnemonic, "jmp");
3918 assert_eq!(i.operands[0], Operand::Label(String::from("1f")));
3919 }
3920
3921 #[test]
3922 fn att_numeric_label_bwd() {
3923 let i = parse_att_instr("jne 1b");
3924 assert_eq!(i.mnemonic, "jne");
3925 assert_eq!(i.operands[0], Operand::Label(String::from("1b")));
3926 }
3927
3928 #[test]
3929 fn att_syntax_directive_switches_mode() {
3930 let src = ".syntax att\nmovq $1, %rax";
3931 let tokens = crate::lexer::tokenize(src).unwrap();
3933 let stmts = parse_with_syntax(&tokens, Arch::X86_64, Syntax::Intel).unwrap();
3934 let instr = stmts
3936 .iter()
3937 .find_map(|s| {
3938 if let Statement::Instruction(i) = s {
3939 Some(i)
3940 } else {
3941 None
3942 }
3943 })
3944 .expect("no instruction found");
3945 assert_eq!(instr.mnemonic, "mov");
3946 assert_eq!(instr.operands[0], Operand::Register(Register::Rax));
3948 assert_eq!(instr.operands[1], Operand::Immediate(1));
3949 }
3950
3951 #[test]
3952 fn att_star_indirect_reg() {
3953 let i = parse_att_instr("jmp *%rax");
3954 assert_eq!(i.mnemonic, "jmp");
3955 assert_eq!(i.operands[0], Operand::Register(Register::Rax));
3956 }
3957
3958 #[test]
3959 fn att_star_indirect_mem() {
3960 let i = parse_att_instr("call *(%rax)");
3961 assert_eq!(i.mnemonic, "call");
3962 if let Operand::Memory(m) = &i.operands[0] {
3963 assert_eq!(m.base, Some(Register::Rax));
3964 } else {
3965 panic!("expected memory operand");
3966 }
3967 }
3968
3969 fn parse_aarch64(src: &str) -> Vec<Statement> {
3972 let tokens = crate::lexer::tokenize(src).unwrap();
3973 parse_with_syntax(&tokens, Arch::Aarch64, Syntax::Ual).unwrap()
3974 }
3975
3976 #[test]
3977 fn parse_ldr_literal_pool_x_reg() {
3978 let stmts = parse_aarch64("ldr x0, =0x12345678");
3979 assert_eq!(stmts.len(), 1);
3980 if let Statement::Instruction(instr) = &stmts[0] {
3981 assert_eq!(instr.mnemonic, "ldr");
3982 assert_eq!(instr.operands.len(), 2);
3983 assert!(matches!(
3984 &instr.operands[0],
3985 Operand::Register(Register::A64X0)
3986 ));
3987 assert!(matches!(
3988 &instr.operands[1],
3989 Operand::LiteralPoolValue(0x12345678)
3990 ));
3991 } else {
3992 panic!("expected instruction");
3993 }
3994 }
3995
3996 #[test]
3997 fn parse_ldr_literal_pool_w_reg() {
3998 let stmts = parse_aarch64("ldr w5, =42");
3999 assert_eq!(stmts.len(), 1);
4000 if let Statement::Instruction(instr) = &stmts[0] {
4001 assert_eq!(instr.mnemonic, "ldr");
4002 assert!(matches!(&instr.operands[1], Operand::LiteralPoolValue(42)));
4003 } else {
4004 panic!("expected instruction");
4005 }
4006 }
4007
4008 #[test]
4009 fn parse_ldr_literal_pool_negative() {
4010 let stmts = parse_aarch64("ldr x1, =-1");
4011 if let Statement::Instruction(instr) = &stmts[0] {
4012 assert!(matches!(&instr.operands[1], Operand::LiteralPoolValue(-1)));
4013 } else {
4014 panic!("expected instruction");
4015 }
4016 }
4017
4018 #[test]
4019 fn parse_ldr_literal_pool_hex_large() {
4020 let stmts = parse_aarch64("ldr x0, =0xDEADBEEFCAFEBABE");
4021 if let Statement::Instruction(instr) = &stmts[0] {
4022 if let Operand::LiteralPoolValue(v) = &instr.operands[1] {
4023 assert_eq!(*v, 0xDEADBEEFCAFEBABEu64 as i128);
4024 } else {
4025 panic!("expected LiteralPoolValue");
4026 }
4027 } else {
4028 panic!("expected instruction");
4029 }
4030 }
4031
4032 #[test]
4033 fn parse_ltorg_directive() {
4034 let stmts = parse_aarch64("ldr x0, =1\n.ltorg");
4035 assert_eq!(stmts.len(), 2);
4036 assert!(matches!(&stmts[1], Statement::Ltorg(_)));
4037 }
4038
4039 #[test]
4040 fn parse_pool_directive() {
4041 let stmts = parse_aarch64("ldr x0, =1\n.pool");
4042 assert_eq!(stmts.len(), 2);
4043 assert!(matches!(&stmts[1], Statement::Ltorg(_)));
4044 }
4045
4046 #[test]
4049 fn parse_simd_v_registers() {
4050 for i in 0..32 {
4052 let src = alloc::format!("fmov v{}, v0", i);
4053 let stmts = parse_aarch64(&src);
4054 assert_eq!(stmts.len(), 1, "parsing 'fmov v{}, v0' failed", i);
4055 if let Statement::Instruction(instr) = &stmts[0] {
4056 if let Operand::Register(r) = &instr.operands[0] {
4057 assert!(r.is_a64_simd_fp(), "v{} should be SIMD/FP", i);
4058 assert_eq!(r.a64_reg_num(), i as u8, "v{} reg num", i);
4059 assert_eq!(r.a64_simd_fp_bits(), 128, "v{} should be 128 bits", i);
4060 } else {
4061 panic!("expected register for v{}", i);
4062 }
4063 }
4064 }
4065 }
4066
4067 #[test]
4068 fn parse_simd_q_registers() {
4069 for i in 0..32 {
4070 let src = alloc::format!("mov q{}, q0", i);
4071 let stmts = parse_aarch64(&src);
4072 if let Statement::Instruction(instr) = &stmts[0] {
4073 if let Operand::Register(r) = &instr.operands[0] {
4074 assert!(r.is_a64_simd_fp(), "q{} should be SIMD/FP", i);
4075 assert_eq!(r.a64_reg_num(), i as u8);
4076 assert_eq!(r.a64_simd_fp_bits(), 128);
4077 }
4078 }
4079 }
4080 }
4081
4082 #[test]
4083 fn parse_simd_d_registers() {
4084 for i in 0..32 {
4085 let src = alloc::format!("fmov d{}, d0", i);
4086 let stmts = parse_aarch64(&src);
4087 if let Statement::Instruction(instr) = &stmts[0] {
4088 if let Operand::Register(r) = &instr.operands[0] {
4089 assert!(r.is_a64_simd_fp(), "d{} should be SIMD/FP", i);
4090 assert_eq!(r.a64_reg_num(), i as u8);
4091 assert_eq!(r.a64_simd_fp_bits(), 64);
4092 }
4093 }
4094 }
4095 }
4096
4097 #[test]
4098 fn parse_simd_s_registers() {
4099 for i in 0..32 {
4100 let src = alloc::format!("fmov s{}, s0", i);
4101 let stmts = parse_aarch64(&src);
4102 if let Statement::Instruction(instr) = &stmts[0] {
4103 if let Operand::Register(r) = &instr.operands[0] {
4104 assert!(r.is_a64_simd_fp(), "s{} should be SIMD/FP", i);
4105 assert_eq!(r.a64_reg_num(), i as u8);
4106 assert_eq!(r.a64_simd_fp_bits(), 32);
4107 }
4108 }
4109 }
4110 }
4111
4112 #[test]
4113 fn parse_simd_h_registers() {
4114 for i in 0..32 {
4115 let src = alloc::format!("fmov h{}, h0", i);
4116 let stmts = parse_aarch64(&src);
4117 if let Statement::Instruction(instr) = &stmts[0] {
4118 if let Operand::Register(r) = &instr.operands[0] {
4119 assert!(r.is_a64_simd_fp(), "h{} should be SIMD/FP", i);
4120 assert_eq!(r.a64_reg_num(), i as u8);
4121 assert_eq!(r.a64_simd_fp_bits(), 16);
4122 }
4123 }
4124 }
4125 }
4126
4127 #[test]
4128 fn parse_simd_b_registers() {
4129 for i in 0..32 {
4130 let src = alloc::format!("fmov b{}, b0", i);
4131 let stmts = parse_aarch64(&src);
4132 if let Statement::Instruction(instr) = &stmts[0] {
4133 if let Operand::Register(r) = &instr.operands[0] {
4134 assert!(r.is_a64_simd_fp(), "b{} should be SIMD/FP", i);
4135 assert_eq!(r.a64_reg_num(), i as u8);
4136 assert_eq!(r.a64_simd_fp_bits(), 8);
4137 }
4138 }
4139 }
4140 }
4141
4142 #[test]
4144 fn parse_vector_arrangement_all_specifiers() {
4145 let cases = [
4146 ("add v0.8b, v1.8b, v2.8b", VectorArrangement::B8),
4147 ("add v0.16b, v1.16b, v2.16b", VectorArrangement::B16),
4148 ("add v0.4h, v1.4h, v2.4h", VectorArrangement::H4),
4149 ("add v0.8h, v1.8h, v2.8h", VectorArrangement::H8),
4150 ("add v0.2s, v1.2s, v2.2s", VectorArrangement::S2),
4151 ("add v0.4s, v1.4s, v2.4s", VectorArrangement::S4),
4152 ("add v0.1d, v1.1d, v2.1d", VectorArrangement::D1),
4153 ("add v0.2d, v1.2d, v2.2d", VectorArrangement::D2),
4154 ];
4155 for (src, expected_arr) in &cases {
4156 let stmts = parse_aarch64(src);
4157 if let Statement::Instruction(instr) = &stmts[0] {
4158 assert_eq!(instr.operands.len(), 3, "source: {}", src);
4159 for (j, op) in instr.operands.iter().enumerate() {
4160 match op {
4161 Operand::VectorRegister(_, arr) => {
4162 assert_eq!(arr, expected_arr, "source: {}, operand {}", src, j);
4163 }
4164 other => panic!(
4165 "expected VectorRegister, got {:?} for source: {}, operand {}",
4166 other, src, j
4167 ),
4168 }
4169 }
4170 } else {
4171 panic!("expected instruction for source: {}", src);
4172 }
4173 }
4174 }
4175
4176 #[test]
4177 fn parse_vector_arrangement_register_numbers() {
4178 for i in [0u32, 1, 7, 15, 16, 31] {
4180 let src = alloc::format!("add v{}.4s, v0.4s, v0.4s", i);
4181 let stmts = parse_aarch64(&src);
4182 if let Statement::Instruction(instr) = &stmts[0] {
4183 match &instr.operands[0] {
4184 Operand::VectorRegister(reg, arr) => {
4185 assert_eq!(reg.a64_reg_num(), i as u8, "v{}.4s reg num", i);
4186 assert_eq!(*arr, VectorArrangement::S4);
4187 assert!(reg.is_a64_vector());
4188 }
4189 other => panic!("expected VectorRegister, got {:?}", other),
4190 }
4191 }
4192 }
4193 }
4194
4195 #[test]
4196 fn parse_vector_arrangement_case_insensitive() {
4197 let cases = ["add v0.4S, v1.4S, v2.4S", "add V0.4s, V1.4s, V2.4s"];
4199 for src in &cases {
4200 let stmts = parse_aarch64(src);
4201 if let Statement::Instruction(instr) = &stmts[0] {
4202 for op in &instr.operands {
4203 match op {
4204 Operand::VectorRegister(_, arr) => {
4205 assert_eq!(*arr, VectorArrangement::S4, "source: {}", src);
4206 }
4207 other => panic!(
4208 "expected VectorRegister, got {:?} for source: {}",
4209 other, src
4210 ),
4211 }
4212 }
4213 }
4214 }
4215 }
4216
4217 #[test]
4218 fn parse_vector_reg_without_arrangement() {
4219 let stmts = parse_aarch64("mov v0, v1");
4221 if let Statement::Instruction(instr) = &stmts[0] {
4222 match &instr.operands[0] {
4223 Operand::Register(reg) => {
4224 assert!(reg.is_a64_vector());
4225 assert_eq!(reg.a64_reg_num(), 0);
4226 }
4227 other => panic!("expected Register, got {:?}", other),
4228 }
4229 }
4230 }
4231
4232 #[test]
4233 fn parse_vector_arrangement_display() {
4234 let stmts = parse_aarch64("add v3.2d, v4.2d, v5.2d");
4235 if let Statement::Instruction(instr) = &stmts[0] {
4236 if let Operand::VectorRegister(reg, arr) = &instr.operands[0] {
4237 assert_eq!(reg.a64_reg_num(), 3);
4238 assert_eq!(*arr, VectorArrangement::D2);
4239 let display = alloc::format!("{}", instr.operands[0]);
4240 assert!(
4242 display.contains("2D") || display.contains("2d"),
4243 "Display should contain arrangement: {}",
4244 display
4245 );
4246 } else {
4247 panic!("expected VectorRegister");
4248 }
4249 }
4250 }
4251
4252 #[test]
4253 fn parse_vector_arrangement_element_properties() {
4254 let cases = [
4256 ("add v0.8b, v0.8b, v0.8b", 8u32, 64u32, 8u32),
4257 ("add v0.16b, v0.16b, v0.16b", 8, 128, 16),
4258 ("add v0.4h, v0.4h, v0.4h", 16, 64, 4),
4259 ("add v0.8h, v0.8h, v0.8h", 16, 128, 8),
4260 ("add v0.2s, v0.2s, v0.2s", 32, 64, 2),
4261 ("add v0.4s, v0.4s, v0.4s", 32, 128, 4),
4262 ("add v0.1d, v0.1d, v0.1d", 64, 64, 1),
4263 ("add v0.2d, v0.2d, v0.2d", 64, 128, 2),
4264 ];
4265 for (src, elem_bits, total_bits, lanes) in &cases {
4266 let stmts = parse_aarch64(src);
4267 if let Statement::Instruction(instr) = &stmts[0] {
4268 if let Operand::VectorRegister(_, arr) = &instr.operands[0] {
4269 assert_eq!(arr.element_bits(), *elem_bits, "{}", src);
4270 assert_eq!(arr.total_bits(), *total_bits, "{}", src);
4271 assert_eq!(arr.lane_count(), *lanes, "{}", src);
4272 }
4273 }
4274 }
4275 }
4276
4277 fn parse_rv64(src: &str) -> Vec<Statement> {
4280 let tokens = crate::lexer::tokenize(src).unwrap();
4281 parse_with_syntax(&tokens, Arch::Rv64, Syntax::RiscV).unwrap()
4282 }
4283
4284 fn parse_rv64_instr(src: &str) -> Instruction {
4285 let stmts = parse_rv64(src);
4286 assert_eq!(stmts.len(), 1, "expected 1 statement, got {}", stmts.len());
4287 match stmts.into_iter().next().unwrap() {
4288 Statement::Instruction(i) => i,
4289 s => panic!("expected instruction, got {:?}", s),
4290 }
4291 }
4292
4293 #[test]
4294 fn riscv_fp_register_hardware_names() {
4295 for i in 0u8..32 {
4297 let src = alloc::format!("fadd.d f{}, f{}, f{}", i, i, i);
4298 let instr = parse_rv64_instr(&src);
4299 assert_eq!(instr.mnemonic, "fadd.d");
4300 assert_eq!(instr.operands.len(), 3);
4301 for op in &instr.operands {
4302 if let Operand::Register(reg) = op {
4303 assert!(reg.is_riscv_fp(), "f{} should be FP register", i);
4304 assert_eq!(reg.rv_fp_reg_num(), i, "f{} should map to reg {}", i, i);
4305 } else {
4306 panic!("expected register operand for f{}", i);
4307 }
4308 }
4309 }
4310 }
4311
4312 #[test]
4313 fn riscv_fp_register_abi_ft() {
4314 let mapping: &[(&str, u8)] = &[
4316 ("ft0", 0),
4317 ("ft1", 1),
4318 ("ft2", 2),
4319 ("ft3", 3),
4320 ("ft4", 4),
4321 ("ft5", 5),
4322 ("ft6", 6),
4323 ("ft7", 7),
4324 ("ft8", 28),
4325 ("ft9", 29),
4326 ("ft10", 30),
4327 ("ft11", 31),
4328 ];
4329 for &(name, expected_num) in mapping {
4330 let src = alloc::format!("fmv.d {}, {}", name, name);
4331 let instr = parse_rv64_instr(&src);
4332 if let Operand::Register(reg) = &instr.operands[0] {
4333 assert!(reg.is_riscv_fp(), "{} should be FP", name);
4334 assert_eq!(
4335 reg.rv_fp_reg_num(),
4336 expected_num,
4337 "{} → f{}",
4338 name,
4339 expected_num
4340 );
4341 } else {
4342 panic!("expected register for {}", name);
4343 }
4344 }
4345 }
4346
4347 #[test]
4348 fn riscv_fp_register_abi_fs() {
4349 let mapping: &[(&str, u8)] = &[
4351 ("fs0", 8),
4352 ("fs1", 9),
4353 ("fs2", 18),
4354 ("fs3", 19),
4355 ("fs4", 20),
4356 ("fs5", 21),
4357 ("fs6", 22),
4358 ("fs7", 23),
4359 ("fs8", 24),
4360 ("fs9", 25),
4361 ("fs10", 26),
4362 ("fs11", 27),
4363 ];
4364 for &(name, expected_num) in mapping {
4365 let src = alloc::format!("fmv.d {}, {}", name, name);
4366 let instr = parse_rv64_instr(&src);
4367 if let Operand::Register(reg) = &instr.operands[0] {
4368 assert!(reg.is_riscv_fp(), "{} should be FP", name);
4369 assert_eq!(
4370 reg.rv_fp_reg_num(),
4371 expected_num,
4372 "{} → f{}",
4373 name,
4374 expected_num
4375 );
4376 } else {
4377 panic!("expected register for {}", name);
4378 }
4379 }
4380 }
4381
4382 #[test]
4383 fn riscv_fp_register_abi_fa() {
4384 let mapping: &[(&str, u8)] = &[
4386 ("fa0", 10),
4387 ("fa1", 11),
4388 ("fa2", 12),
4389 ("fa3", 13),
4390 ("fa4", 14),
4391 ("fa5", 15),
4392 ("fa6", 16),
4393 ("fa7", 17),
4394 ];
4395 for &(name, expected_num) in mapping {
4396 let src = alloc::format!("fmv.d {}, {}", name, name);
4397 let instr = parse_rv64_instr(&src);
4398 if let Operand::Register(reg) = &instr.operands[0] {
4399 assert!(reg.is_riscv_fp(), "{} should be FP", name);
4400 assert_eq!(
4401 reg.rv_fp_reg_num(),
4402 expected_num,
4403 "{} → f{}",
4404 name,
4405 expected_num
4406 );
4407 } else {
4408 panic!("expected register for {}", name);
4409 }
4410 }
4411 }
4412
4413 #[test]
4414 fn riscv_fp_mixed_with_integer_regs() {
4415 let instr = parse_rv64_instr("flw ft0, 0(sp)");
4417 assert_eq!(instr.mnemonic, "flw");
4418 if let Operand::Register(reg) = &instr.operands[0] {
4419 assert!(reg.is_riscv_fp());
4420 assert_eq!(reg.rv_fp_reg_num(), 0);
4421 } else {
4422 panic!("expected FP register");
4423 }
4424 }
4425
4426 #[test]
4427 fn riscv_fp_not_integer() {
4428 let instr = parse_rv64_instr("fadd.d fa0, fa1, fa2");
4430 for op in &instr.operands {
4431 if let Operand::Register(reg) = op {
4432 assert!(reg.is_riscv_fp());
4433 assert!(!reg.is_riscv());
4434 }
4435 }
4436 }
4437
4438 #[test]
4439 fn riscv_integer_not_fp() {
4440 let instr = parse_rv64_instr("add a0, a1, a2");
4442 for op in &instr.operands {
4443 if let Operand::Register(reg) = op {
4444 assert!(reg.is_riscv());
4445 assert!(!reg.is_riscv_fp());
4446 }
4447 }
4448 }
4449}