1#[allow(unused_imports)]
7use alloc::format;
8use alloc::string::String;
9use alloc::string::ToString;
10#[allow(unused_imports)]
11use alloc::vec;
12use alloc::vec::Vec;
13
14use crate::encoder;
15use crate::error::{AsmError, Span};
16use crate::ir::*;
17use crate::lexer;
18use crate::linker::{AppliedRelocation, Linker};
19use crate::parser;
20use crate::preprocessor::Preprocessor;
21
22#[derive(Debug, Clone)]
24#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
25#[must_use]
26pub struct AssemblyResult {
27 bytes: Vec<u8>,
29 labels: Vec<(String, u64)>,
31 relocations: Vec<AppliedRelocation>,
33 base_address: u64,
35 source_annotations: Vec<(u64, String)>,
37}
38
39impl AssemblyResult {
40 #[must_use]
54 pub fn bytes(&self) -> &[u8] {
55 &self.bytes
56 }
57
58 #[must_use]
72 pub fn into_bytes(self) -> Vec<u8> {
73 self.bytes
74 }
75
76 #[must_use]
90 pub fn len(&self) -> usize {
91 self.bytes.len()
92 }
93
94 #[must_use]
106 pub fn is_empty(&self) -> bool {
107 self.bytes.is_empty()
108 }
109
110 #[must_use]
126 pub fn labels(&self) -> &[(String, u64)] {
127 &self.labels
128 }
129
130 #[must_use]
146 pub fn label_address(&self, name: &str) -> Option<u64> {
147 self.labels.iter().find(|(n, _)| n == name).map(|(_, a)| *a)
148 }
149
150 #[must_use]
166 pub fn relocations(&self) -> &[AppliedRelocation] {
167 &self.relocations
168 }
169
170 #[must_use]
185 pub fn base_address(&self) -> u64 {
186 self.base_address
187 }
188
189 #[must_use]
202 pub fn listing(&self) -> String {
203 use core::fmt::Write;
204
205 let mut out = String::new();
206 let base = self.base_address;
207
208 let mut sorted_labels = self.labels.clone();
210 sorted_labels.sort_by_key(|(_, addr)| *addr);
211
212 let mut label_at: alloc::collections::BTreeMap<u64, Vec<&str>> =
214 alloc::collections::BTreeMap::new();
215 for (name, addr) in &sorted_labels {
216 label_at.entry(*addr).or_default().push(name);
217 }
218
219 let mut source_at: alloc::collections::BTreeMap<u64, &str> =
221 alloc::collections::BTreeMap::new();
222 for (offset, text) in &self.source_annotations {
223 if !text.is_empty() {
224 source_at.insert(*offset, text);
225 }
226 }
227
228 let mut split_offsets: alloc::collections::BTreeSet<u64> =
230 label_at.keys().copied().collect();
231
232 for &ann_off in source_at.keys() {
234 split_offsets.insert(ann_off);
235 }
236
237 let bytes = &self.bytes;
239 let mut offset: u64 = base;
240 let mut i = 0;
241
242 while i < bytes.len() {
243 if let Some(names) = label_at.get(&offset) {
245 for name in names {
246 let _ = writeln!(out, "{:08X} {}:", offset, name);
247 }
248 }
249
250 let max_end = core::cmp::min(i + 8, bytes.len());
252 let mut chunk_end = max_end;
253
254 let range_end = offset + (max_end - i) as u64;
256 if range_end > offset + 1 {
257 for &split_off in split_offsets.range((offset + 1)..range_end) {
258 let split_at = (split_off - base) as usize;
259 if split_at < chunk_end && split_at > i {
260 chunk_end = split_at;
261 break;
262 }
263 }
264 }
265
266 let chunk = &bytes[i..chunk_end];
267 let hex: String = chunk.iter().fold(String::new(), |mut acc, b| {
268 let _ = write!(acc, "{:02X}", b);
269 acc
270 });
271
272 if let Some(source_text) = source_at.get(&offset) {
274 let _ = writeln!(out, "{:08X} {:<16} {}", offset, hex, source_text);
275 } else {
276 let _ = writeln!(out, "{:08X} {:<16}", offset, hex);
277 }
278
279 let chunk_len = chunk.len();
280 i += chunk_len;
281 offset += chunk_len as u64;
282 }
283
284 if let Some(names) = label_at.get(&offset) {
286 for name in names {
287 let _ = writeln!(out, "{:08X} {}:", offset, name);
288 }
289 }
290
291 out
292 }
293}
294
295#[derive(Debug, Clone, Copy, PartialEq, Eq)]
320#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
321pub struct ResourceLimits {
322 pub max_statements: usize,
325 pub max_labels: usize,
327 pub max_output_bytes: usize,
329 pub max_errors: usize,
331 pub max_recursion_depth: usize,
333 pub max_source_bytes: usize,
337 pub max_iterations: usize,
340}
341
342impl Default for ResourceLimits {
343 fn default() -> Self {
344 Self {
345 max_statements: 1_000_000,
346 max_labels: 100_000,
347 max_output_bytes: 16 * 1024 * 1024,
348 max_errors: 64,
349 max_recursion_depth: 256,
350 max_source_bytes: 64 * 1024 * 1024,
351 max_iterations: 100_000,
352 }
353 }
354}
355
356#[derive(Debug)]
372pub struct Assembler {
373 arch: Arch,
374 x86_mode: crate::ir::X86Mode,
377 syntax: Syntax,
378 opt_level: OptLevel,
379 linker: Linker,
380 preprocessor: Preprocessor,
382 errors: Vec<AsmError>,
384 fragment_annotations: Vec<(usize, String)>,
386 listing_enabled: bool,
389 resource_limits: ResourceLimits,
391 statement_count: usize,
393 label_count: usize,
395 literal_pool: Vec<LiteralPoolEntry>,
398 literal_pool_counter: usize,
400 rvc_enabled: bool,
404 thumb_func_pending: bool,
407 thumb_labels: Vec<String>,
410 estimated_output_bytes: usize,
414}
415
416#[derive(Debug, Clone)]
418struct LiteralPoolEntry {
419 value: i128,
421 size: u8,
423 label: String,
425}
426
427impl Assembler {
428 pub fn new(arch: Arch) -> Self {
430 let syntax = match arch {
431 Arch::Arm | Arch::Thumb | Arch::Aarch64 => Syntax::Ual,
432 Arch::Rv32 | Arch::Rv64 => Syntax::RiscV,
433 _ => Syntax::Intel,
434 };
435 let x86_mode = match arch {
436 Arch::X86 => crate::ir::X86Mode::Mode32,
437 Arch::X86_64 => crate::ir::X86Mode::Mode64,
438 _ => crate::ir::X86Mode::Mode64, };
440 Self {
441 arch,
442 x86_mode,
443 syntax,
444 opt_level: OptLevel::default(),
445 linker: Linker::new(),
446 preprocessor: Preprocessor::new(),
447 errors: Vec::new(),
448 fragment_annotations: Vec::new(),
449 listing_enabled: false,
450 resource_limits: ResourceLimits::default(),
451 statement_count: 0,
452 label_count: 0,
453 literal_pool: Vec::new(),
454 literal_pool_counter: 0,
455 rvc_enabled: false,
456 thumb_func_pending: false,
457 thumb_labels: Vec::new(),
458 estimated_output_bytes: 0,
459 }
460 }
461
462 pub fn limits(&mut self, limits: ResourceLimits) -> &mut Self {
466 self.resource_limits = limits;
467 self.preprocessor
468 .set_max_recursion_depth(limits.max_recursion_depth);
469 self.preprocessor.set_max_iterations(limits.max_iterations);
470 self
471 }
472
473 pub fn syntax(&mut self, syntax: Syntax) -> &mut Self {
478 self.syntax = syntax;
479 self
480 }
481
482 pub fn optimize(&mut self, level: OptLevel) -> &mut Self {
488 self.opt_level = level;
489 self
490 }
491
492 pub fn enable_listing(&mut self) -> &mut Self {
513 self.listing_enabled = true;
514 self
515 }
516
517 pub fn base_address(&mut self, addr: u64) -> &mut Self {
519 self.linker.set_base_address(addr);
520 self
521 }
522
523 pub fn define_external(&mut self, name: &str, addr: u64) -> &mut Self {
538 self.linker.define_external(name, addr);
539 self
540 }
541
542 pub fn define_constant(&mut self, name: &str, value: i128) -> &mut Self {
544 self.linker.define_constant(name, value);
545 self
546 }
547
548 pub fn emit(&mut self, source: &str) -> Result<&mut Self, AsmError> {
555 if source.len() > self.resource_limits.max_source_bytes {
557 return Err(AsmError::ResourceLimitExceeded {
558 resource: String::from("source bytes"),
559 limit: self.resource_limits.max_source_bytes,
560 });
561 }
562 let expanded = self.preprocessor.process(source)?;
564 let mut statements = parse_source(&expanded, self.arch, self.syntax)?;
565 self.process_statements(&mut statements, &expanded)?;
566 Ok(self)
567 }
568
569 pub fn define_preprocessor_symbol(&mut self, name: &str, value: i128) -> &mut Self {
574 self.preprocessor.define_symbol(name, value);
575 self
576 }
577
578 pub fn label(&mut self, name: &str) -> Result<&mut Self, AsmError> {
598 self.label_count += 1;
599 if self.label_count > self.resource_limits.max_labels {
600 return Err(AsmError::ResourceLimitExceeded {
601 resource: String::from("labels"),
602 limit: self.resource_limits.max_labels,
603 });
604 }
605 self.linker.add_label(name, Span::new(0, 0, 0, 0))?;
606 Ok(self)
607 }
608
609 pub fn db(&mut self, bytes: &[u8]) -> Result<&mut Self, AsmError> {
616 self.check_output_limit(bytes.len())?;
617 self.linker.add_bytes(bytes.to_vec(), Span::new(0, 0, 0, 0));
618 Ok(self)
619 }
620
621 pub fn dw(&mut self, value: u16) -> Result<&mut Self, AsmError> {
628 self.check_output_limit(2)?;
629 self.linker
630 .add_bytes(value.to_le_bytes().to_vec(), Span::new(0, 0, 0, 0));
631 Ok(self)
632 }
633
634 pub fn dd(&mut self, value: u32) -> Result<&mut Self, AsmError> {
641 self.check_output_limit(4)?;
642 self.linker
643 .add_bytes(value.to_le_bytes().to_vec(), Span::new(0, 0, 0, 0));
644 Ok(self)
645 }
646
647 pub fn dq(&mut self, value: u64) -> Result<&mut Self, AsmError> {
654 self.check_output_limit(8)?;
655 self.linker
656 .add_bytes(value.to_le_bytes().to_vec(), Span::new(0, 0, 0, 0));
657 Ok(self)
658 }
659
660 pub fn ascii(&mut self, s: &str) -> Result<&mut Self, AsmError> {
667 self.check_output_limit(s.len())?;
668 self.linker
669 .add_bytes(s.as_bytes().to_vec(), Span::new(0, 0, 0, 0));
670 Ok(self)
671 }
672
673 pub fn asciz(&mut self, s: &str) -> Result<&mut Self, AsmError> {
680 self.check_output_limit(s.len() + 1)?;
681 let mut bytes = s.as_bytes().to_vec();
682 bytes.push(0);
683 self.linker.add_bytes(bytes, Span::new(0, 0, 0, 0));
684 Ok(self)
685 }
686
687 pub fn align(&mut self, alignment: u32) -> &mut Self {
691 let use_nop = matches!(self.arch, Arch::X86 | Arch::X86_64);
692 self.linker
693 .add_alignment(alignment, 0x00, None, use_nop, Span::new(0, 0, 0, 0));
694 self
695 }
696
697 pub fn align_with_fill(&mut self, alignment: u32, fill: u8) -> &mut Self {
699 self.linker
700 .add_alignment(alignment, fill, None, false, Span::new(0, 0, 0, 0));
701 self
702 }
703
704 pub fn org(&mut self, target: u64) -> &mut Self {
706 self.linker.add_org(target, 0x00, Span::new(0, 0, 0, 0));
707 self
708 }
709
710 pub fn org_with_fill(&mut self, target: u64, fill: u8) -> &mut Self {
712 self.linker.add_org(target, fill, Span::new(0, 0, 0, 0));
713 self
714 }
715
716 pub fn fill(&mut self, count: u32, size: u8, value: i64) -> Result<&mut Self, AsmError> {
725 let total = count as usize * size as usize;
726 self.check_output_limit(total)?;
727 let mut bytes = Vec::with_capacity(total);
728 let val_bytes = value.to_le_bytes();
730 for _ in 0..count {
731 for &b in val_bytes.iter().take(size as usize) {
732 bytes.push(b);
733 }
734 if (size as usize) > 8 {
736 bytes.resize(bytes.len() + size as usize - 8, 0);
737 }
738 }
739 self.linker.add_bytes(bytes, Span::new(0, 0, 0, 0));
740 Ok(self)
741 }
742
743 pub fn space(&mut self, n: u32) -> Result<&mut Self, AsmError> {
750 self.check_output_limit(n as usize)?;
751 let bytes = alloc::vec![0u8; n as usize];
752 self.linker.add_bytes(bytes, Span::new(0, 0, 0, 0));
753 Ok(self)
754 }
755
756 pub fn current_fragment_count(&self) -> usize {
760 self.linker.fragment_count()
761 }
762
763 pub fn encode_one(&self, source: &str) -> Result<Vec<u8>, AsmError> {
784 use crate::encoder::encode_instruction;
785
786 let tokens = crate::lexer::tokenize(source)?;
787 let stmts = crate::parser::parse_with_syntax(&tokens, self.arch, self.syntax)?;
788 if stmts.is_empty() {
789 return Ok(Vec::new());
790 }
791 match &stmts[0] {
792 crate::ir::Statement::Instruction(instr) => {
793 let mut instr = instr.clone();
795 self.resolve_constants_in_instruction(&mut instr);
796 let encoded = encode_instruction(&instr, self.arch)?;
797 Ok(encoded.bytes.to_vec())
798 }
799 _ => Err(AsmError::Syntax {
800 msg: String::from("expected an instruction"),
801 span: crate::error::Span::new(0, 0, 0, 0),
802 }),
803 }
804 }
805
806 pub fn reset(&mut self) -> &mut Self {
826 self.linker = Linker::new();
827 self.preprocessor = Preprocessor::new();
828 self.errors.clear();
829 self.fragment_annotations.clear();
830 self.statement_count = 0;
832 self.label_count = 0;
833 self.literal_pool.clear();
834 self.literal_pool_counter = 0;
835 self.thumb_func_pending = false;
836 self.thumb_labels.clear();
837 self.estimated_output_bytes = 0;
838 self
841 }
842
843 fn check_output_limit(&mut self, additional: usize) -> Result<(), AsmError> {
848 self.estimated_output_bytes += additional;
849 if self.estimated_output_bytes > self.resource_limits.max_output_bytes {
850 return Err(AsmError::ResourceLimitExceeded {
851 resource: String::from("output bytes"),
852 limit: self.resource_limits.max_output_bytes,
853 });
854 }
855 Ok(())
856 }
857
858 pub fn finish(mut self) -> Result<AssemblyResult, AsmError> {
865 if !self.errors.is_empty() {
866 if self.errors.len() == 1 {
867 return Err(self.errors.remove(0));
868 }
869 return Err(AsmError::Multiple {
870 errors: self.errors,
871 });
872 }
873
874 let base = self.linker.base_address();
875
876 let flush_span = crate::error::Span::new(0, 0, 0, 0);
878 self.flush_literal_pool(flush_span)?;
879
880 let (bytes, mut labels, relocations, offsets) = self.linker.resolve()?;
881
882 for (name, addr) in labels.iter_mut() {
884 if self.thumb_labels.iter().any(|t| t == name) {
885 *addr |= 1;
886 }
887 }
888
889 if bytes.len() > self.resource_limits.max_output_bytes {
891 return Err(AsmError::ResourceLimitExceeded {
892 resource: String::from("output bytes"),
893 limit: self.resource_limits.max_output_bytes,
894 });
895 }
896
897 let source_annotations = self.build_source_annotations(&offsets);
900
901 Ok(AssemblyResult {
902 bytes,
903 labels,
904 relocations,
905 base_address: base,
906 source_annotations,
907 })
908 }
909
910 fn build_source_annotations(&self, offsets: &[u64]) -> Vec<(u64, String)> {
913 let mut annotations = Vec::new();
914 for &(frag_idx, ref text) in &self.fragment_annotations {
915 if frag_idx < offsets.len() {
916 annotations.push((offsets[frag_idx], text.clone()));
917 }
918 }
919 annotations
920 }
921
922 fn process_statements(
923 &mut self,
924 statements: &mut [Statement],
925 source: &str,
926 ) -> Result<(), AsmError> {
927 self.statement_count += statements.len();
929 if self.statement_count > self.resource_limits.max_statements {
930 return Err(AsmError::ResourceLimitExceeded {
931 resource: String::from("statements"),
932 limit: self.resource_limits.max_statements,
933 });
934 }
935
936 for stmt in statements.iter_mut() {
937 match stmt {
938 Statement::Label(name, span) => {
939 self.label_count += 1;
940 if self.label_count > self.resource_limits.max_labels {
941 return Err(AsmError::ResourceLimitExceeded {
942 resource: String::from("labels"),
943 limit: self.resource_limits.max_labels,
944 });
945 }
946 self.linker.add_label(name, *span)?;
947 if self.thumb_func_pending {
949 self.thumb_labels.push(name.clone());
950 self.thumb_func_pending = false;
951 }
952 }
953
954 Statement::Instruction(instr) => {
955 let frag_idx = self.linker.fragment_count();
956 self.resolve_constants_in_instruction(instr);
958 self.transform_literal_pool_operands(instr);
960 if self.opt_level == OptLevel::Size {
962 crate::optimize::optimize_instruction(instr, self.arch);
963 }
964 let encode_result = if self.x86_mode == crate::ir::X86Mode::Mode16 {
965 #[cfg(feature = "x86")]
966 {
967 encoder::encode_instruction_16(instr)
968 }
969 #[cfg(not(feature = "x86"))]
970 {
971 encoder::encode_instruction(instr, self.arch)
972 }
973 } else {
974 encoder::encode_instruction(instr, self.arch)
975 };
976 #[cfg(feature = "riscv")]
980 let encode_result = if self.rvc_enabled
981 && matches!(self.arch, Arch::Rv32 | Arch::Rv64)
982 && !instr.mnemonic.starts_with("c.")
983 {
984 match encode_result {
985 Ok(ref enc) if enc.bytes.len() == 4 && enc.relocation.is_none() => {
986 let is_rv64 = self.arch == Arch::Rv64;
987 if let Some(hw) = crate::riscv::try_compress(
988 &instr.mnemonic,
989 &instr.operands,
990 is_rv64,
991 instr.span,
992 ) {
993 Ok(crate::riscv::rvc_instr(hw))
994 } else {
995 encode_result
996 }
997 }
998 _ => encode_result,
999 }
1000 } else {
1001 encode_result
1002 };
1003 match encode_result {
1004 Ok(encoded) => {
1005 self.check_output_limit(encoded.bytes.len())?;
1006 self.linker.add_encoded(
1007 encoded.bytes,
1008 encoded.relocation,
1009 encoded.relax,
1010 instr.span,
1011 )?;
1012 self.annotate(frag_idx, source, instr.span);
1013 }
1014 Err(e) => {
1015 self.errors.push(e);
1016 if self.errors.len() >= self.resource_limits.max_errors {
1017 return Err(AsmError::ResourceLimitExceeded {
1018 resource: String::from("errors"),
1019 limit: self.resource_limits.max_errors,
1020 });
1021 }
1022 }
1023 }
1024 }
1025
1026 Statement::Data(data) => {
1027 let frag_idx = self.linker.fragment_count();
1028 let span = data.span;
1029 self.emit_data(data)?;
1030 self.annotate(frag_idx, source, span);
1031 }
1032
1033 Statement::Align(align) => {
1034 let frag_idx = self.linker.fragment_count();
1035 let span = align.span;
1036 let use_nop =
1040 align.fill.is_none() && matches!(self.arch, Arch::X86 | Arch::X86_64);
1041 self.linker.add_alignment(
1042 align.alignment,
1043 align.fill.unwrap_or(0x00),
1044 align.max_skip,
1045 use_nop,
1046 align.span,
1047 );
1048 self.annotate(frag_idx, source, span);
1049 }
1050
1051 Statement::Const(c) => {
1052 self.linker.define_constant(&c.name, c.value);
1053 }
1054
1055 Statement::Fill(fill) => {
1056 let frag_idx = self.linker.fragment_count();
1057 let span = fill.span;
1058 let total = fill.count as usize * fill.size as usize;
1059 self.check_output_limit(total)?;
1060 let mut bytes = Vec::with_capacity(total);
1061 let val_bytes = fill.value.to_le_bytes();
1064 for _ in 0..fill.count {
1065 for &b in val_bytes.iter().take(fill.size as usize) {
1066 bytes.push(b);
1067 }
1068 if (fill.size as usize) > 8 {
1070 bytes.resize(bytes.len() + fill.size as usize - 8, 0);
1071 }
1072 }
1073 self.linker.add_bytes(bytes, fill.span);
1074 self.annotate(frag_idx, source, span);
1075 }
1076
1077 Statement::Space(space) => {
1078 let frag_idx = self.linker.fragment_count();
1079 let span = space.span;
1080 self.check_output_limit(space.size as usize)?;
1081 let bytes = alloc::vec![space.fill; space.size as usize];
1082 self.linker.add_bytes(bytes, space.span);
1083 self.annotate(frag_idx, source, span);
1084 }
1085
1086 Statement::Org(org) => {
1087 let frag_idx = self.linker.fragment_count();
1088 let span = org.span;
1089 self.linker.add_org(org.offset, org.fill, org.span);
1093 self.annotate(frag_idx, source, span);
1094 }
1095
1096 Statement::CodeMode(mode, span) => {
1097 if !matches!(self.arch, Arch::X86 | Arch::X86_64) {
1099 return Err(AsmError::Syntax {
1100 msg: String::from(".code16/.code32/.code64 only valid for x86/x86-64"),
1101 span: *span,
1102 });
1103 }
1104 self.x86_mode = *mode;
1105 match mode {
1107 crate::ir::X86Mode::Mode16 | crate::ir::X86Mode::Mode32 => {
1108 self.arch = Arch::X86;
1109 }
1110 crate::ir::X86Mode::Mode64 => {
1111 self.arch = Arch::X86_64;
1112 }
1113 }
1114 }
1115
1116 Statement::Ltorg(span) => {
1117 let span = *span;
1119 self.flush_literal_pool(span)?;
1120 }
1121
1122 Statement::OptionRvc(enable, span) => {
1123 if !matches!(self.arch, Arch::Rv32 | Arch::Rv64) {
1125 return Err(AsmError::Syntax {
1126 msg: String::from(".option rvc/norvc is only valid for RISC-V"),
1127 span: *span,
1128 });
1129 }
1130 self.rvc_enabled = *enable;
1131 }
1132
1133 Statement::ThumbMode(is_thumb, span) => {
1134 if !matches!(self.arch, Arch::Arm | Arch::Thumb) {
1136 return Err(AsmError::Syntax {
1137 msg: String::from(".thumb/.arm only valid for ARM"),
1138 span: *span,
1139 });
1140 }
1141 self.arch = if *is_thumb { Arch::Thumb } else { Arch::Arm };
1142 }
1143
1144 Statement::ThumbFunc(span) => {
1145 if !matches!(self.arch, Arch::Arm | Arch::Thumb) {
1147 return Err(AsmError::Syntax {
1148 msg: String::from(".thumb_func only valid for ARM/Thumb"),
1149 span: *span,
1150 });
1151 }
1152 self.arch = Arch::Thumb;
1154 self.thumb_func_pending = true;
1155 }
1156 }
1157 }
1158 Ok(())
1159 }
1160
1161 #[inline]
1163 fn annotate(&mut self, frag_idx: usize, source: &str, span: Span) {
1164 if self.listing_enabled {
1165 let src_text = extract_source_line(source, span);
1166 if !src_text.is_empty() {
1167 self.fragment_annotations
1168 .push((frag_idx, src_text.to_string()));
1169 }
1170 }
1171 }
1172
1173 fn transform_literal_pool_operands(&mut self, instr: &mut Instruction) {
1183 let size: u8 = instr
1188 .operands
1189 .iter()
1190 .find_map(|op| {
1191 if let Operand::Register(r) = op {
1192 if r.is_arm() {
1193 return Some(4u8); }
1195 if r.is_aarch64() {
1196 return Some(if r.is_a64_64bit() { 8u8 } else { 4u8 });
1197 }
1198 }
1199 None
1200 })
1201 .unwrap_or(8);
1202
1203 for op in &mut instr.operands {
1204 if let Operand::LiteralPoolValue(val) = op {
1205 let val = *val;
1206
1207 let label = if let Some(existing) = self
1209 .literal_pool
1210 .iter()
1211 .find(|e| e.value == val && e.size == size)
1212 {
1213 existing.label.clone()
1214 } else {
1215 let label = alloc::format!(".Lpool_{}", self.literal_pool_counter);
1216 self.literal_pool_counter += 1;
1217 self.literal_pool.push(LiteralPoolEntry {
1218 value: val,
1219 size,
1220 label: label.clone(),
1221 });
1222 label
1223 };
1224
1225 *op = Operand::Label(label);
1226 }
1227 }
1228 }
1229
1230 fn flush_literal_pool(&mut self, span: Span) -> Result<(), AsmError> {
1235 if self.literal_pool.is_empty() {
1236 return Ok(());
1237 }
1238
1239 let max_align = self
1241 .literal_pool
1242 .iter()
1243 .map(|e| e.size as u32)
1244 .max()
1245 .unwrap_or(4);
1246 self.linker
1247 .add_alignment(max_align, 0x00, None, false, span);
1248
1249 let entries: Vec<LiteralPoolEntry> = core::mem::take(&mut self.literal_pool);
1251 for entry in &entries {
1252 self.linker.add_label(&entry.label, span)?;
1253 let bytes = match entry.size {
1254 4 => (entry.value as u32).to_le_bytes().to_vec(),
1255 8 => (entry.value as u64).to_le_bytes().to_vec(),
1256 _ => (entry.value as u64).to_le_bytes().to_vec(),
1257 };
1258 self.linker.add_bytes(bytes, span);
1259 }
1260
1261 Ok(())
1262 }
1263
1264 fn resolve_constants_in_instruction(&self, instr: &mut Instruction) {
1269 for op in &mut instr.operands {
1270 match op {
1271 Operand::Label(name) => {
1272 if let Some(&value) = self.linker.get_constant(name) {
1273 *op = Operand::Immediate(value);
1274 }
1275 }
1276 Operand::Expression(expr) => {
1277 expr.resolve_constants(|name| self.linker.get_constant(name).copied());
1279 if let Some(val) = expr.eval() {
1281 *op = Operand::Immediate(val);
1282 }
1283 }
1284 Operand::Memory(mem) => {
1285 if let Some(ref label) = mem.disp_label {
1287 if let Some(&value) = self.linker.get_constant(label) {
1288 mem.disp = mem.disp.wrapping_add(value as i64);
1289 mem.disp_label = None;
1290 }
1291 }
1292 }
1293 _ => {}
1294 }
1295 }
1296 }
1297
1298 fn emit_data(&mut self, data: &DataDecl) -> Result<(), AsmError> {
1300 use crate::encoder::Relocation;
1301
1302 let data_item_size: usize = match data.size {
1303 DataSize::Byte => 1,
1304 DataSize::Word => 2,
1305 DataSize::Long => 4,
1306 DataSize::Quad => 8,
1307 };
1308
1309 let mut pending: Vec<u8> = Vec::new();
1311
1312 for value in &data.values {
1313 match value {
1314 DataValue::Integer(n) => match data.size {
1315 DataSize::Byte => pending.push(*n as u8),
1316 DataSize::Word => pending.extend_from_slice(&(*n as u16).to_le_bytes()),
1317 DataSize::Long => pending.extend_from_slice(&(*n as u32).to_le_bytes()),
1318 DataSize::Quad => pending.extend_from_slice(&(*n as u64).to_le_bytes()),
1319 },
1320 DataValue::Bytes(b) => {
1321 pending.extend_from_slice(b);
1322 }
1323 DataValue::Label(name, addend) => {
1324 if let Some(&const_val) = self.linker.get_constant(name) {
1326 let val = const_val.wrapping_add(*addend as i128);
1327 match data.size {
1328 DataSize::Byte => pending.push(val as u8),
1329 DataSize::Word => {
1330 pending.extend_from_slice(&(val as u16).to_le_bytes())
1331 }
1332 DataSize::Long => {
1333 pending.extend_from_slice(&(val as u32).to_le_bytes())
1334 }
1335 DataSize::Quad => {
1336 pending.extend_from_slice(&(val as u64).to_le_bytes())
1337 }
1338 }
1339 continue;
1340 }
1341
1342 if !pending.is_empty() {
1344 self.linker
1345 .add_bytes(core::mem::take(&mut pending), data.span);
1346 }
1347 let mut slot = encoder::InstrBytes::new();
1349 for _ in 0..data_item_size {
1350 slot.push(0);
1351 }
1352 let reloc = Relocation {
1353 offset: 0,
1354 size: data_item_size as u8,
1355 label: alloc::rc::Rc::from(name.as_str()),
1356 kind: encoder::RelocKind::Absolute,
1357 addend: *addend,
1358 trailing_bytes: 0,
1359 };
1360 self.linker
1362 .add_encoded(slot, Some(reloc), None, data.span)?;
1363 }
1364 }
1365 }
1366
1367 if !pending.is_empty() {
1369 self.linker.add_bytes(pending, data.span);
1370 }
1371
1372 Ok(())
1373 }
1374}
1375
1376fn parse_source(source: &str, arch: Arch, syntax: Syntax) -> Result<Vec<Statement>, AsmError> {
1378 let tokens = lexer::tokenize(source)?;
1379 parser::parse_with_syntax(&tokens, arch, syntax)
1380}
1381
1382fn extract_source_line(source: &str, span: Span) -> &str {
1387 let offset = span.offset;
1388 if offset >= source.len() {
1389 return "";
1390 }
1391 let line_start = source[..offset].rfind('\n').map_or(0, |p| p + 1);
1393 let line_end = source[offset..]
1395 .find('\n')
1396 .map_or(source.len(), |p| offset + p);
1397 source[line_start..line_end].trim()
1398}
1399
1400#[cfg(test)]
1401mod tests {
1402 use super::*;
1403
1404 #[test]
1407 fn assemble_nop() {
1408 let mut asm = Assembler::new(Arch::X86_64);
1409 asm.emit("nop").unwrap();
1410 let result = asm.finish().unwrap();
1411 assert_eq!(result.bytes(), &[0x90]);
1412 }
1413
1414 #[test]
1415 fn assemble_ret() {
1416 let mut asm = Assembler::new(Arch::X86_64);
1417 asm.emit("ret").unwrap();
1418 let result = asm.finish().unwrap();
1419 assert_eq!(result.bytes(), &[0xC3]);
1420 }
1421
1422 #[test]
1423 fn assemble_multiple_instructions() {
1424 let mut asm = Assembler::new(Arch::X86_64);
1425 asm.emit("nop\nret").unwrap();
1426 let result = asm.finish().unwrap();
1427 assert_eq!(result.bytes(), &[0x90, 0xC3]);
1428 }
1429
1430 #[test]
1431 fn assemble_push_pop() {
1432 let mut asm = Assembler::new(Arch::X86_64);
1433 asm.emit("push rbp").unwrap();
1434 asm.emit("mov rbp, rsp").unwrap();
1435 asm.emit("pop rbp").unwrap();
1436 asm.emit("ret").unwrap();
1437 let result = asm.finish().unwrap();
1438 let bytes = result.bytes();
1439 assert_eq!(bytes[0], 0x55); assert_eq!(*bytes.last().unwrap(), 0xC3); }
1442
1443 #[test]
1444 fn assemble_with_label() {
1445 let mut asm = Assembler::new(Arch::X86_64);
1446 asm.emit("jmp target\ntarget:\nnop").unwrap();
1447 let result = asm.finish().unwrap();
1448 let bytes = result.bytes();
1449 assert_eq!(bytes[0], 0xEB); assert_eq!(bytes[1], 0x00); assert_eq!(bytes[2], 0x90); }
1454
1455 #[test]
1456 fn assemble_backward_jump() {
1457 let mut asm = Assembler::new(Arch::X86_64);
1458 asm.emit("loop_start:\nnop\njmp loop_start").unwrap();
1459 let result = asm.finish().unwrap();
1460 let bytes = result.bytes();
1461 assert_eq!(bytes[0], 0x90); assert_eq!(bytes[1], 0xEB); assert_eq!(bytes[2], 0xFD);
1466 }
1467
1468 #[test]
1469 fn assemble_conditional_jump() {
1470 let mut asm = Assembler::new(Arch::X86_64);
1471 asm.emit("cmp rax, 0\nje done\nnop\ndone:\nret").unwrap();
1472 let result = asm.finish().unwrap();
1473 let bytes = result.bytes();
1474 assert!(!bytes.is_empty());
1476 assert_eq!(*bytes.last().unwrap(), 0xC3);
1478 }
1479
1480 #[test]
1481 fn assemble_xor_self() {
1482 let mut asm = Assembler::new(Arch::X86_64);
1483 asm.emit("xor eax, eax").unwrap();
1484 let result = asm.finish().unwrap();
1485 assert_eq!(result.bytes(), &[0x31, 0xC0]);
1486 }
1487
1488 #[test]
1489 fn assemble_syscall_stub() {
1490 let mut asm = Assembler::new(Arch::X86_64);
1491 asm.emit("mov eax, 60\nxor edi, edi\nsyscall").unwrap();
1492 let result = asm.finish().unwrap();
1493 let bytes = result.bytes();
1494 assert_eq!(&bytes[0..5], &[0xB8, 0x3C, 0x00, 0x00, 0x00]);
1496 assert_eq!(&bytes[bytes.len() - 2..], &[0x0F, 0x05]);
1498 }
1499
1500 #[test]
1503 fn builder_api() {
1504 let mut asm = Assembler::new(Arch::X86_64);
1505 asm.emit("push rbp").unwrap();
1506 asm.db(&[0xCC]).unwrap(); asm.emit("pop rbp").unwrap();
1508 asm.emit("ret").unwrap();
1509 let result = asm.finish().unwrap();
1510 let bytes = result.bytes();
1511 assert_eq!(bytes[0], 0x55); assert_eq!(bytes[1], 0xCC); }
1514
1515 #[test]
1516 fn builder_label() {
1517 let mut asm = Assembler::new(Arch::X86_64);
1518 asm.emit("jmp target").unwrap();
1519 asm.label("target").unwrap();
1520 asm.emit("ret").unwrap();
1521 let result = asm.finish().unwrap();
1522 let bytes = result.bytes();
1523 assert_eq!(bytes[0], 0xEB);
1525 assert_eq!(*bytes.last().unwrap(), 0xC3);
1526 }
1527
1528 #[test]
1529 fn builder_data_words() {
1530 let mut asm = Assembler::new(Arch::X86_64);
1531 asm.dw(0x1234).unwrap();
1532 asm.dd(0xDEADBEEF).unwrap();
1533 let result = asm.finish().unwrap();
1534 let bytes = result.bytes();
1535 assert_eq!(&bytes[0..2], &[0x34, 0x12]);
1536 assert_eq!(&bytes[2..6], &[0xEF, 0xBE, 0xAD, 0xDE]);
1537 }
1538
1539 #[test]
1542 fn assemble_byte_directive() {
1543 let mut asm = Assembler::new(Arch::X86_64);
1544 asm.emit(".byte 0x90, 0xCC, 0xC3").unwrap();
1545 let result = asm.finish().unwrap();
1546 assert_eq!(result.bytes(), &[0x90, 0xCC, 0xC3]);
1547 }
1548
1549 #[test]
1550 fn assemble_word_directive() {
1551 let mut asm = Assembler::new(Arch::X86_64);
1552 asm.emit(".word 0x1234").unwrap();
1553 let result = asm.finish().unwrap();
1554 assert_eq!(result.bytes(), &[0x34, 0x12]);
1555 }
1556
1557 #[test]
1558 fn assemble_asciz_directive() {
1559 let mut asm = Assembler::new(Arch::X86_64);
1560 asm.emit(".asciz \"hello\"").unwrap();
1561 let result = asm.finish().unwrap();
1562 assert_eq!(result.bytes(), b"hello\0");
1563 }
1564
1565 #[test]
1566 fn assemble_equ_constant() {
1567 let mut asm = Assembler::new(Arch::X86_64);
1568 asm.emit(".equ EXIT, 60\nmov eax, EXIT").unwrap();
1569 let _result = asm.finish();
1571 }
1573
1574 #[test]
1575 fn assemble_fill_directive() {
1576 let mut asm = Assembler::new(Arch::X86_64);
1577 asm.emit(".fill 3, 1, 0x90").unwrap();
1578 let result = asm.finish().unwrap();
1579 assert_eq!(result.bytes(), &[0x90, 0x90, 0x90]);
1580 }
1581
1582 #[test]
1583 fn assemble_space_directive() {
1584 let mut asm = Assembler::new(Arch::X86_64);
1585 asm.emit(".space 4").unwrap();
1586 let result = asm.finish().unwrap();
1587 assert_eq!(result.bytes(), &[0, 0, 0, 0]);
1588 }
1589
1590 #[test]
1593 fn unknown_mnemonic_error() {
1594 let mut asm = Assembler::new(Arch::X86_64);
1595 asm.emit("foobar").unwrap(); let err = asm.finish().unwrap_err();
1597 assert!(matches!(err, AsmError::UnknownMnemonic { .. }));
1598 }
1599
1600 #[test]
1601 fn duplicate_label_error() {
1602 let mut asm = Assembler::new(Arch::X86_64);
1603 let err = asm.emit("foo:\nfoo:").unwrap_err();
1604 assert!(matches!(err, AsmError::DuplicateLabel { .. }));
1605 }
1606
1607 #[test]
1608 fn undefined_label_error() {
1609 let mut asm = Assembler::new(Arch::X86_64);
1610 asm.emit("jmp nowhere").unwrap();
1611 let err = asm.finish().unwrap_err();
1612 assert!(matches!(err, AsmError::UndefinedLabel { .. }));
1613 }
1614
1615 #[test]
1618 fn assemble_with_external() {
1619 let mut asm = Assembler::new(Arch::X86_64);
1620 asm.define_external("printf", 0x400000);
1621 asm.emit("mov rax, printf").unwrap();
1622 let result = asm.finish().unwrap();
1623 let bytes = result.bytes();
1624 assert_eq!(&bytes[bytes.len() - 8..], &0x400000u64.to_le_bytes());
1626 }
1627
1628 #[test]
1631 fn assemble_with_base_address() {
1632 let mut asm = Assembler::new(Arch::X86_64);
1633 asm.base_address(0x1000);
1634 asm.emit("nop").unwrap();
1635 let result = asm.finish().unwrap();
1636 assert_eq!(result.bytes(), &[0x90]);
1637 }
1638
1639 #[test]
1642 fn assemble_loop() {
1643 let mut asm = Assembler::new(Arch::X86_64);
1644 asm.emit(
1645 r#"
1646 mov ecx, 10
1647 loop_start:
1648 dec ecx
1649 jnz loop_start
1650 ret
1651 "#,
1652 )
1653 .unwrap();
1654 let result = asm.finish().unwrap();
1655 assert!(!result.is_empty());
1656 assert_eq!(*result.bytes().last().unwrap(), 0xC3);
1657 }
1658
1659 #[test]
1660 fn assemble_function_prologue_epilogue() {
1661 let mut asm = Assembler::new(Arch::X86_64);
1662 asm.emit(
1663 r#"
1664 push rbp
1665 mov rbp, rsp
1666 sub rsp, 0x20
1667 add rsp, 0x20
1668 pop rbp
1669 ret
1670 "#,
1671 )
1672 .unwrap();
1673 let result = asm.finish().unwrap();
1674 let bytes = result.bytes();
1675 assert_eq!(bytes[0], 0x55); assert_eq!(*bytes.last().unwrap(), 0xC3); }
1678
1679 #[test]
1680 fn result_length() {
1681 let mut asm = Assembler::new(Arch::X86_64);
1682 asm.emit("nop\nnop\nnop").unwrap();
1683 let result = asm.finish().unwrap();
1684 assert_eq!(result.len(), 3);
1685 }
1686
1687 #[test]
1688 fn result_into_bytes() {
1689 let mut asm = Assembler::new(Arch::X86_64);
1690 asm.emit("ret").unwrap();
1691 let result = asm.finish().unwrap();
1692 let bytes = result.into_bytes();
1693 assert_eq!(bytes, vec![0xC3]);
1694 }
1695
1696 #[test]
1699 fn semicolon_separated_instructions() {
1700 let mut asm = Assembler::new(Arch::X86_64);
1701 asm.emit("nop; nop; ret").unwrap();
1702 let result = asm.finish().unwrap();
1703 assert_eq!(result.bytes(), &[0x90, 0x90, 0xC3]);
1704 }
1705
1706 #[test]
1709 fn labels_returned() {
1710 let mut asm = Assembler::new(Arch::X86_64);
1711 asm.emit("start:\nnop\nnop\nend:\nret").unwrap();
1712 let result = asm.finish().unwrap();
1713 assert_eq!(result.label_address("start"), Some(0));
1714 assert_eq!(result.label_address("end"), Some(2));
1716 }
1717
1718 #[test]
1719 fn labels_with_base_address() {
1720 let mut asm = Assembler::new(Arch::X86_64);
1721 asm.base_address(0x400000);
1722 asm.emit("entry:\nnop").unwrap();
1723 let result = asm.finish().unwrap();
1724 assert_eq!(result.label_address("entry"), Some(0x400000));
1725 }
1726
1727 #[test]
1728 fn builder_label_address() {
1729 let mut asm = Assembler::new(Arch::X86_64);
1730 asm.label("before").unwrap();
1731 asm.emit("nop; nop; nop").unwrap();
1732 asm.label("after").unwrap();
1733 asm.emit("ret").unwrap();
1734 let result = asm.finish().unwrap();
1735 assert_eq!(result.label_address("before"), Some(0));
1736 assert_eq!(result.label_address("after"), Some(3));
1737 }
1738
1739 #[test]
1742 fn builder_syntax_and_optimize() {
1743 let mut asm = Assembler::new(Arch::X86_64);
1744 asm.syntax(Syntax::Intel);
1745 asm.optimize(OptLevel::Size);
1746 asm.emit("nop").unwrap();
1747 let result = asm.finish().unwrap();
1748 assert_eq!(result.bytes(), &[0x90]);
1749 }
1750
1751 #[test]
1754 fn builder_define_constant() {
1755 let mut asm = Assembler::new(Arch::X86_64);
1756 asm.define_constant("EXIT", 60);
1757 asm.emit("mov eax, EXIT").unwrap();
1758 let result = asm.finish().unwrap();
1759 assert_eq!(result.bytes(), &[0xB8, 0x3C, 0x00, 0x00, 0x00]);
1761 }
1762
1763 #[test]
1766 fn short_branch_uses_rel8() {
1767 let mut asm = Assembler::new(Arch::X86_64);
1768 asm.emit("je done\ndone:\nret").unwrap();
1769 let result = asm.finish().unwrap();
1770 assert_eq!(result.bytes(), &[0x74, 0x00, 0xC3]);
1772 }
1773
1774 #[test]
1777 fn quad_label_reference() {
1778 let mut asm = Assembler::new(Arch::X86_64);
1779 asm.base_address(0x1000);
1780 asm.emit("func:\nnop\nret\njump_table:\n.quad func")
1781 .unwrap();
1782 let result = asm.finish().unwrap();
1783 let bytes = result.bytes();
1784 let qw = u64::from_le_bytes(bytes[2..10].try_into().unwrap());
1787 assert_eq!(qw, 0x1000);
1788 }
1789
1790 #[test]
1791 fn long_label_reference() {
1792 let mut asm = Assembler::new(Arch::X86_64);
1793 asm.base_address(0x2000);
1794 asm.emit("entry:\nnop\n.long entry").unwrap();
1795 let result = asm.finish().unwrap();
1796 let bytes = result.bytes();
1797 let dw = u32::from_le_bytes(bytes[1..5].try_into().unwrap());
1799 assert_eq!(dw, 0x2000);
1800 }
1801
1802 #[test]
1803 fn name_equals_constant_in_instruction() {
1804 let mut asm = Assembler::new(Arch::X86_64);
1805 asm.emit("ANSWER = 42\nmov eax, ANSWER").unwrap();
1806 let result = asm.finish().unwrap();
1807 assert_eq!(result.bytes(), &[0xB8, 0x2A, 0x00, 0x00, 0x00]);
1809 }
1810
1811 #[test]
1814 fn listing_simple() {
1815 let mut asm = Assembler::new(Arch::X86_64);
1816 asm.enable_listing();
1817 asm.emit("nop\nret").unwrap();
1818 let result = asm.finish().unwrap();
1819 let listing = result.listing();
1820 assert!(listing.contains("00000000"));
1821 assert!(listing.contains("90")); assert!(listing.contains("C3")); assert!(listing.contains("nop"));
1825 assert!(listing.contains("ret"));
1826 }
1827
1828 #[test]
1829 fn listing_with_labels() {
1830 let mut asm = Assembler::new(Arch::X86_64);
1831 asm.enable_listing();
1832 asm.emit("start:\nnop\nend:\nret").unwrap();
1833 let result = asm.finish().unwrap();
1834 let listing = result.listing();
1835 assert!(listing.contains("start:"));
1836 assert!(listing.contains("end:"));
1837 assert!(listing.contains("nop"));
1839 assert!(listing.contains("ret"));
1840 }
1841
1842 #[test]
1843 fn listing_with_base_address() {
1844 let mut asm = Assembler::new(Arch::X86_64);
1845 asm.enable_listing();
1846 asm.base_address(0x401000);
1847 asm.emit("nop").unwrap();
1848 let result = asm.finish().unwrap();
1849 let listing = result.listing();
1850 assert!(listing.contains("00401000"));
1851 assert!(listing.contains("nop"));
1852 }
1853
1854 #[test]
1855 fn listing_base_address_accessor() {
1856 let mut asm = Assembler::new(Arch::X86_64);
1857 asm.base_address(0x1000);
1858 asm.emit("nop").unwrap();
1859 let result = asm.finish().unwrap();
1860 assert_eq!(result.base_address(), 0x1000);
1861 }
1862
1863 #[test]
1864 fn listing_hex_format() {
1865 let mut asm = Assembler::new(Arch::X86_64);
1866 asm.enable_listing();
1867 asm.emit("push rbp\nmov rbp, rsp").unwrap();
1868 let result = asm.finish().unwrap();
1869 let listing = result.listing();
1870 assert!(listing.contains("55"));
1872 assert!(listing.contains("4889E5"));
1874 assert!(listing.contains("push rbp"));
1876 assert!(listing.contains("mov rbp, rsp"));
1877 }
1878
1879 #[test]
1880 fn listing_source_annotations() {
1881 let mut asm = Assembler::new(Arch::X86_64);
1882 asm.enable_listing();
1883 asm.emit("mov eax, 1\nadd eax, 2\nret").unwrap();
1884 let result = asm.finish().unwrap();
1885 let listing = result.listing();
1886 assert!(listing.contains("mov eax, 1"));
1888 assert!(listing.contains("add eax, 2"));
1889 assert!(listing.contains("ret"));
1890 }
1891
1892 #[test]
1893 fn listing_data_annotation() {
1894 let mut asm = Assembler::new(Arch::X86_64);
1895 asm.enable_listing();
1896 asm.emit(".byte 0x90, 0xCC").unwrap();
1897 let result = asm.finish().unwrap();
1898 let listing = result.listing();
1899 assert!(listing.contains(".byte 0x90, 0xCC"));
1900 }
1901
1902 #[test]
1905 fn relocations_returned() {
1906 let mut asm = Assembler::new(Arch::X86_64);
1907 asm.emit("jmp target\nnop\ntarget:\nret").unwrap();
1908 let result = asm.finish().unwrap();
1909 assert!(!result.relocations().is_empty());
1910 assert_eq!(result.relocations()[0].label, "target");
1911 }
1912
1913 #[test]
1914 fn relocations_for_call() {
1915 let mut asm = Assembler::new(Arch::X86_64);
1916 asm.emit("call func\nfunc:\nret").unwrap();
1917 let result = asm.finish().unwrap();
1918 let relocs = result.relocations();
1919 assert!(!relocs.is_empty());
1920 assert_eq!(relocs[0].label, "func");
1921 assert_eq!(relocs[0].kind, crate::encoder::RelocKind::X86Relative);
1922 }
1923
1924 #[test]
1927 fn builder_ascii() {
1928 let mut asm = Assembler::new(Arch::X86_64);
1929 asm.ascii("AB").unwrap();
1930 let result = asm.finish().unwrap();
1931 assert_eq!(result.bytes(), &[0x41, 0x42]);
1932 }
1933
1934 #[test]
1935 fn builder_asciz() {
1936 let mut asm = Assembler::new(Arch::X86_64);
1937 asm.asciz("Hi").unwrap();
1938 let result = asm.finish().unwrap();
1939 assert_eq!(result.bytes(), &[0x48, 0x69, 0x00]);
1940 }
1941
1942 #[test]
1943 fn builder_align() {
1944 let mut asm = Assembler::new(Arch::X86_64);
1945 asm.db(&[0x90]).unwrap(); asm.align(4); asm.db(&[0xCC]).unwrap();
1948 let result = asm.finish().unwrap();
1949 assert_eq!(result.bytes().len(), 5); assert_eq!(result.bytes()[4], 0xCC);
1951 }
1952
1953 #[test]
1954 fn builder_align_with_fill() {
1955 let mut asm = Assembler::new(Arch::X86_64);
1956 asm.db(&[0x90]).unwrap();
1957 asm.align_with_fill(4, 0xAA);
1958 asm.db(&[0xCC]).unwrap();
1959 let result = asm.finish().unwrap();
1960 assert_eq!(result.bytes()[1], 0xAA);
1961 assert_eq!(result.bytes()[2], 0xAA);
1962 assert_eq!(result.bytes()[3], 0xAA);
1963 }
1964
1965 #[test]
1966 fn builder_org() {
1967 let mut asm = Assembler::new(Arch::X86_64);
1968 asm.db(&[0x90]).unwrap();
1969 asm.org(4);
1970 asm.db(&[0xCC]).unwrap();
1971 let result = asm.finish().unwrap();
1972 assert_eq!(result.bytes(), &[0x90, 0x00, 0x00, 0x00, 0xCC]);
1973 }
1974
1975 #[test]
1976 fn builder_org_with_fill() {
1977 let mut asm = Assembler::new(Arch::X86_64);
1978 asm.db(&[0x90]).unwrap();
1979 asm.org_with_fill(4, 0xFF);
1980 asm.db(&[0xCC]).unwrap();
1981 let result = asm.finish().unwrap();
1982 assert_eq!(result.bytes(), &[0x90, 0xFF, 0xFF, 0xFF, 0xCC]);
1983 }
1984
1985 #[test]
1986 fn builder_fill() {
1987 let mut asm = Assembler::new(Arch::X86_64);
1990 asm.fill(3, 2, 0xAB).unwrap();
1991 let result = asm.finish().unwrap();
1992 assert_eq!(result.bytes(), &[0xAB, 0x00, 0xAB, 0x00, 0xAB, 0x00]);
1993 }
1994
1995 #[test]
1996 fn builder_fill_size_1() {
1997 let mut asm = Assembler::new(Arch::X86_64);
1999 asm.fill(4, 1, 0xCC).unwrap();
2000 let result = asm.finish().unwrap();
2001 assert_eq!(result.bytes(), &[0xCC, 0xCC, 0xCC, 0xCC]);
2002 }
2003
2004 #[test]
2005 fn builder_fill_multi_byte_value() {
2006 let mut asm = Assembler::new(Arch::X86_64);
2008 asm.fill(1, 4, 0xDEADBEEFu32 as i64).unwrap();
2009 let result = asm.finish().unwrap();
2010 assert_eq!(result.bytes(), &[0xEF, 0xBE, 0xAD, 0xDE]);
2011 }
2012
2013 #[test]
2014 fn builder_fill_16bit_value() {
2015 let mut asm = Assembler::new(Arch::X86_64);
2017 asm.fill(2, 2, 0x1234).unwrap();
2018 let result = asm.finish().unwrap();
2019 assert_eq!(result.bytes(), &[0x34, 0x12, 0x34, 0x12]);
2020 }
2021
2022 #[test]
2023 fn builder_space() {
2024 let mut asm = Assembler::new(Arch::X86_64);
2025 asm.space(4).unwrap();
2026 let result = asm.finish().unwrap();
2027 assert_eq!(result.bytes(), &[0x00, 0x00, 0x00, 0x00]);
2028 }
2029
2030 #[test]
2033 fn listing_fill_annotation() {
2034 let mut asm = Assembler::new(Arch::X86_64);
2035 asm.enable_listing();
2036 asm.emit(".fill 2, 1, 0x90").unwrap();
2037 let result = asm.finish().unwrap();
2038 let listing = result.listing();
2039 assert!(listing.contains(".fill 2, 1, 0x90"));
2040 }
2041
2042 #[test]
2043 fn listing_space_annotation() {
2044 let mut asm = Assembler::new(Arch::X86_64);
2045 asm.enable_listing();
2046 asm.emit(".space 4").unwrap();
2047 let result = asm.finish().unwrap();
2048 let listing = result.listing();
2049 assert!(listing.contains(".space 4"));
2050 }
2051
2052 #[test]
2053 fn listing_align_annotation() {
2054 let mut asm = Assembler::new(Arch::X86_64);
2055 asm.enable_listing();
2056 asm.emit("nop\n.align 4\nnop").unwrap();
2057 let result = asm.finish().unwrap();
2058 let listing = result.listing();
2059 assert!(listing.contains(".align 4"));
2060 }
2061
2062 #[test]
2063 fn listing_org_annotation() {
2064 let mut asm = Assembler::new(Arch::X86_64);
2065 asm.enable_listing();
2066 asm.emit("nop\n.org 0x10\nnop").unwrap();
2067 let result = asm.finish().unwrap();
2068 let listing = result.listing();
2069 assert!(listing.contains(".org 0x10"));
2070 }
2071
2072 #[test]
2075 fn org_with_fill_byte() {
2076 let mut asm = Assembler::new(Arch::X86_64);
2077 asm.emit("nop\n.org 0x04, 0xFF\nnop").unwrap();
2078 let result = asm.finish().unwrap();
2079 assert_eq!(result.bytes(), &[0x90, 0xFF, 0xFF, 0xFF, 0x90]);
2081 }
2082
2083 #[test]
2086 fn att_syntax_basic() {
2087 let mut asm = Assembler::new(Arch::X86_64);
2088 asm.syntax(Syntax::Att);
2089 asm.emit("movq $1, %rax").unwrap();
2090 let result = asm.finish().unwrap();
2091 assert_eq!(result.bytes(), &[0xB8, 0x01, 0x00, 0x00, 0x00]);
2093 }
2094
2095 #[test]
2098 fn resource_limit_max_statements() {
2099 let mut asm = Assembler::new(Arch::X86_64);
2100 asm.limits(ResourceLimits {
2101 max_statements: 3,
2102 ..ResourceLimits::default()
2103 });
2104 asm.emit("nop; nop; nop").unwrap();
2106 let err = asm.emit("nop; nop").unwrap_err();
2108 match err {
2109 AsmError::ResourceLimitExceeded { resource, limit } => {
2110 assert_eq!(resource, "statements");
2111 assert_eq!(limit, 3);
2112 }
2113 other => panic!("expected ResourceLimitExceeded, got: {other:?}"),
2114 }
2115 }
2116
2117 #[test]
2118 fn resource_limit_max_labels() {
2119 let mut asm = Assembler::new(Arch::X86_64);
2120 asm.limits(ResourceLimits {
2121 max_labels: 2,
2122 ..ResourceLimits::default()
2123 });
2124 asm.label("a").unwrap();
2125 asm.label("b").unwrap();
2126 let err = asm.label("c").unwrap_err();
2127 match err {
2128 AsmError::ResourceLimitExceeded { resource, limit } => {
2129 assert_eq!(resource, "labels");
2130 assert_eq!(limit, 2);
2131 }
2132 other => panic!("expected ResourceLimitExceeded, got: {other:?}"),
2133 }
2134 }
2135
2136 #[test]
2137 fn resource_limit_max_labels_via_emit() {
2138 let mut asm = Assembler::new(Arch::X86_64);
2139 asm.limits(ResourceLimits {
2140 max_labels: 1,
2141 ..ResourceLimits::default()
2142 });
2143 asm.emit("a: nop").unwrap();
2144 let err = asm.emit("b: nop").unwrap_err();
2145 match err {
2146 AsmError::ResourceLimitExceeded { resource, limit } => {
2147 assert_eq!(resource, "labels");
2148 assert_eq!(limit, 1);
2149 }
2150 other => panic!("expected ResourceLimitExceeded, got: {other:?}"),
2151 }
2152 }
2153
2154 #[test]
2155 fn resource_limit_max_output_bytes() {
2156 let mut asm = Assembler::new(Arch::X86_64);
2157 asm.limits(ResourceLimits {
2158 max_output_bytes: 4,
2159 ..ResourceLimits::default()
2160 });
2161 asm.emit("nop; nop; nop; nop").unwrap(); let result = asm.finish();
2163 assert!(result.is_ok());
2164
2165 let mut asm2 = Assembler::new(Arch::X86_64);
2167 asm2.limits(ResourceLimits {
2168 max_output_bytes: 3,
2169 ..ResourceLimits::default()
2170 });
2171 let err = asm2.emit("nop; nop; nop; nop").unwrap_err(); match err {
2173 AsmError::ResourceLimitExceeded { resource, limit } => {
2174 assert_eq!(resource, "output bytes");
2175 assert_eq!(limit, 3);
2176 }
2177 other => panic!("expected ResourceLimitExceeded, got: {other:?}"),
2178 }
2179 }
2180
2181 #[test]
2182 fn resource_limits_default_does_not_interfere() {
2183 let mut asm = Assembler::new(Arch::X86_64);
2185 let source: String = (0..1000).map(|_| "nop; ").collect();
2187 asm.emit(&source).unwrap();
2188 let result = asm.finish().unwrap();
2189 assert_eq!(result.len(), 1000);
2190 }
2191
2192 #[test]
2193 fn resource_limit_max_recursion_depth() {
2194 let mut asm = Assembler::new(Arch::X86_64);
2195 asm.limits(ResourceLimits {
2196 max_recursion_depth: 3,
2197 ..ResourceLimits::default()
2198 });
2199 let result = asm.emit(".macro boom\nboom\n.endm\nboom");
2201 assert!(result.is_err());
2202 let err = result.unwrap_err();
2203 match err {
2204 AsmError::ResourceLimitExceeded { resource, limit } => {
2205 assert_eq!(resource, "macro recursion depth");
2206 assert_eq!(limit, 3);
2207 }
2208 _ => panic!("expected ResourceLimitExceeded, got {:?}", err),
2209 }
2210 }
2211
2212 #[test]
2215 fn encode_one_nop() {
2216 let asm = Assembler::new(Arch::X86_64);
2217 let bytes = asm.encode_one("nop").unwrap();
2218 assert_eq!(bytes, alloc::vec![0x90]);
2219 }
2220
2221 #[test]
2222 fn encode_one_ret() {
2223 let asm = Assembler::new(Arch::X86_64);
2224 let bytes = asm.encode_one("ret").unwrap();
2225 assert_eq!(bytes, alloc::vec![0xC3]);
2226 }
2227
2228 #[test]
2229 fn encode_one_empty_input() {
2230 let asm = Assembler::new(Arch::X86_64);
2231 let bytes = asm.encode_one("").unwrap();
2232 assert!(bytes.is_empty());
2233 }
2234
2235 #[test]
2236 fn encode_one_rejects_label() {
2237 let asm = Assembler::new(Arch::X86_64);
2238 assert!(asm.encode_one("foo:").is_err());
2239 }
2240
2241 #[test]
2242 fn encode_one_does_not_affect_state() {
2243 let asm = Assembler::new(Arch::X86_64);
2244 let _ = asm.encode_one("nop").unwrap();
2245 let result = asm.finish().unwrap();
2248 assert!(result.is_empty());
2249 }
2250
2251 #[test]
2254 fn define_preprocessor_symbol_ifdef() {
2255 let mut asm = Assembler::new(Arch::X86_64);
2256 asm.define_preprocessor_symbol("DEBUG", 1);
2257 asm.emit(".ifdef DEBUG\nnop\n.endif").unwrap();
2258 let result = asm.finish().unwrap();
2259 assert_eq!(result.bytes(), &[0x90]);
2260 }
2261
2262 #[test]
2263 fn define_preprocessor_symbol_skipped_when_missing() {
2264 let mut asm = Assembler::new(Arch::X86_64);
2265 asm.emit(".ifdef DEBUG\nnop\n.endif\nret").unwrap();
2267 let result = asm.finish().unwrap();
2268 assert_eq!(result.bytes(), &[0xC3]); }
2270
2271 #[test]
2274 fn builder_dq() {
2275 let mut asm = Assembler::new(Arch::X86_64);
2276 asm.dq(0xDEAD_BEEF_CAFE_BABE).unwrap();
2277 let result = asm.finish().unwrap();
2278 assert_eq!(result.bytes(), &0xDEAD_BEEF_CAFE_BABEu64.to_le_bytes());
2279 }
2280
2281 #[test]
2284 fn reset_clears_state_keeps_config() {
2285 let mut asm = Assembler::new(Arch::X86_64);
2286 asm.emit("nop").unwrap();
2287 asm.reset();
2288 asm.emit("ret").unwrap();
2289 let result = asm.finish().unwrap();
2290 assert_eq!(result.bytes(), &[0xC3]);
2292 }
2293
2294 #[test]
2295 fn reset_allows_reuse() {
2296 let mut asm = Assembler::new(Arch::X86_64);
2297 asm.emit("nop").unwrap();
2298 asm.reset();
2300 asm.emit("ret").unwrap();
2301 let result = asm.finish().unwrap();
2302 assert_eq!(result.bytes(), &[0xC3]);
2303 }
2304
2305 #[test]
2308 fn current_fragment_count_tracks_emissions() {
2309 let mut asm = Assembler::new(Arch::X86_64);
2310 assert_eq!(asm.current_fragment_count(), 0);
2311 asm.emit("nop").unwrap();
2312 assert!(asm.current_fragment_count() > 0);
2313 }
2314
2315 #[test]
2318 fn empty_assembly_result() {
2319 let asm = Assembler::new(Arch::X86_64);
2320 let result = asm.finish().unwrap();
2321 assert!(result.is_empty());
2322 assert_eq!(result.len(), 0);
2323 assert!(result.bytes().is_empty());
2324 }
2325
2326 #[test]
2329 fn labels_slice_access() {
2330 let mut asm = Assembler::new(Arch::X86_64);
2331 asm.emit("start:\nnop\nend:\nret").unwrap();
2332 let result = asm.finish().unwrap();
2333 let labels = result.labels();
2334 assert_eq!(labels.len(), 2);
2336 assert!(labels.iter().any(|(name, _)| name == "start"));
2338 assert!(labels.iter().any(|(name, _)| name == "end"));
2339 }
2340
2341 #[test]
2344 fn assemble_with_external_labels() {
2345 use crate::assemble_with;
2346 let bytes =
2347 assemble_with("call target", Arch::X86_64, 0x1000, &[("target", 0x2000)]).unwrap();
2348 assert_eq!(bytes[0], 0xE8);
2351 let rel = i32::from_le_bytes(bytes[1..5].try_into().unwrap());
2352 assert_eq!(rel, 0x0FFB);
2353 }
2354
2355 #[test]
2358 fn multiple_errors_collected() {
2359 let mut asm = Assembler::new(Arch::X86_64);
2360 asm.emit("badmnem1\nbadmnem2").unwrap();
2362 let err = asm.finish().unwrap_err();
2363 match err {
2364 AsmError::Multiple { errors } => assert_eq!(errors.len(), 2),
2365 _ => panic!("expected Multiple error, got: {err}"),
2366 }
2367 }
2368
2369 #[cfg(feature = "arm")]
2372 #[test]
2373 fn optimizer_noop_for_arm() {
2374 let mut asm = Assembler::new(Arch::Arm);
2375 asm.emit("mov r0, 0").unwrap();
2377 let result = asm.finish().unwrap();
2378 assert_eq!(result.len(), 4);
2380 assert_eq!(result.bytes(), &[0x00, 0x00, 0xA0, 0xE3]);
2381 }
2382
2383 #[test]
2386 fn org_directive_via_emit() {
2387 let mut asm = Assembler::new(Arch::X86_64);
2388 asm.emit("nop\n.org 0x10\nnop").unwrap();
2389 let result = asm.finish().unwrap();
2390 assert_eq!(result.len(), 17);
2392 assert_eq!(result.bytes()[0], 0x90); assert_eq!(result.bytes()[0x10], 0x90); for &b in &result.bytes()[1..0x10] {
2396 assert_eq!(b, 0x00);
2397 }
2398 }
2399
2400 #[test]
2403 fn listing_includes_label_and_hex() {
2404 let mut asm = Assembler::new(Arch::X86_64);
2405 asm.emit("start:\nnop\nret").unwrap();
2406 let result = asm.finish().unwrap();
2407 let listing = result.listing();
2408 assert!(
2410 listing.contains("start"),
2411 "listing should contain label 'start'"
2412 );
2413 assert!(
2415 listing.contains("90"),
2416 "listing should contain '90' for nop"
2417 );
2418 assert!(
2419 listing.contains("C3") || listing.contains("c3"),
2420 "listing should contain 'C3' for ret"
2421 );
2422 }
2423
2424 #[test]
2425 fn listing_with_base_address_format() {
2426 let mut asm = Assembler::new(Arch::X86_64);
2427 asm.base_address(0x401000);
2428 asm.emit("nop\nret").unwrap();
2429 let result = asm.finish().unwrap();
2430 let listing = result.listing();
2431 assert!(
2433 listing.contains("00401000") || listing.contains("401000"),
2434 "listing should contain base address"
2435 );
2436 }
2437
2438 #[test]
2441 fn org_builder_method() {
2442 let mut asm = Assembler::new(Arch::X86_64);
2443 asm.emit("nop").unwrap();
2444 asm.org(0x10);
2445 asm.emit("nop").unwrap();
2446 let result = asm.finish().unwrap();
2447 assert_eq!(result.len(), 17); }
2449
2450 #[test]
2453 fn jecxz_relaxes_to_long_form() {
2454 let mut asm = Assembler::new(Arch::X86_64);
2457 asm.emit("jecxz target").unwrap();
2458 asm.space(200).unwrap(); asm.emit("target:\nnop").unwrap();
2460 let result = asm.finish().unwrap();
2461 assert_eq!(result.bytes[0], 0x67);
2463 assert_eq!(result.bytes[1], 0xE3);
2464 assert_eq!(result.bytes[2], 0x02);
2465 assert_eq!(result.bytes[3], 0xEB);
2466 assert_eq!(result.bytes[4], 0x05);
2467 assert_eq!(result.bytes[5], 0xE9);
2468 assert_eq!(result.bytes[6], 0xC8);
2471 assert_eq!(result.bytes[7], 0x00);
2472 assert_eq!(result.bytes[8], 0x00);
2473 assert_eq!(result.bytes[9], 0x00);
2474 }
2475
2476 #[test]
2477 fn jecxz_relaxes_to_short_form_when_near() {
2478 let mut asm = Assembler::new(Arch::X86_64);
2480 asm.emit("jecxz target").unwrap();
2481 asm.emit("target:\nnop").unwrap();
2482 let result = asm.finish().unwrap();
2483 assert_eq!(result.bytes[0], 0x67);
2485 assert_eq!(result.bytes[1], 0xE3);
2486 assert_eq!(result.bytes[2], 0x00);
2488 assert_eq!(result.bytes[3], 0x90); }
2490
2491 #[test]
2494 fn single_error_not_wrapped_in_multiple() {
2495 let mut asm = Assembler::new(Arch::X86_64);
2496 asm.emit("badmnem").unwrap();
2497 let err = asm.finish().unwrap_err();
2498 assert!(matches!(err, AsmError::UnknownMnemonic { .. }));
2500 }
2501
2502 #[test]
2505 fn errors_collected_with_valid_instructions() {
2506 let mut asm = Assembler::new(Arch::X86_64);
2507 asm.emit("nop\nbadmnem\nret").unwrap();
2509 let err = asm.finish().unwrap_err();
2510 assert!(matches!(err, AsmError::UnknownMnemonic { .. }));
2512 }
2513
2514 #[test]
2517 fn errors_collected_across_emit_calls() {
2518 let mut asm = Assembler::new(Arch::X86_64);
2519 asm.emit("bad1").unwrap();
2520 asm.emit("bad2").unwrap();
2521 asm.emit("bad3").unwrap();
2522 let err = asm.finish().unwrap_err();
2523 match err {
2524 AsmError::Multiple { errors } => assert_eq!(errors.len(), 3),
2525 _ => panic!("expected Multiple error with 3 errors, got: {err}"),
2526 }
2527 }
2528
2529 #[test]
2532 fn reset_clears_errors() {
2533 let mut asm = Assembler::new(Arch::X86_64);
2534 asm.emit("badmnem").unwrap();
2535 asm.reset();
2536 asm.emit("nop").unwrap();
2537 let result = asm.finish().unwrap();
2538 assert_eq!(result.bytes(), &[0x90]);
2539 }
2540
2541 #[test]
2544 fn max_errors_limit_enforced() {
2545 let mut asm = Assembler::new(Arch::X86_64);
2546 asm.limits(ResourceLimits {
2547 max_errors: 2,
2548 ..ResourceLimits::default()
2549 });
2550 let result = asm.emit("bad1\nbad2\nbad3");
2552 assert!(result.is_err());
2553 let err = result.unwrap_err();
2554 assert!(matches!(err, AsmError::ResourceLimitExceeded { .. }));
2555 }
2556
2557 #[test]
2560 fn literal_pool_basic_x_reg() {
2561 let mut asm = Assembler::new(Arch::Aarch64);
2563 asm.emit("ldr x0, =0x12345678").unwrap();
2564 let result = asm.finish().unwrap();
2565 let bytes = result.bytes();
2566 assert!(
2568 bytes.len() >= 8,
2569 "expected at least 8 bytes, got {}",
2570 bytes.len()
2571 );
2572 let pool_start = bytes.len() - 8;
2574 let pool_val = u64::from_le_bytes(bytes[pool_start..pool_start + 8].try_into().unwrap());
2575 assert_eq!(pool_val, 0x12345678, "pool should contain the constant");
2576 }
2577
2578 #[test]
2579 fn literal_pool_basic_w_reg() {
2580 let mut asm = Assembler::new(Arch::Aarch64);
2582 asm.emit("ldr w0, =0x42").unwrap();
2583 let result = asm.finish().unwrap();
2584 let bytes = result.bytes();
2585 let pool_start = bytes.len() - 4;
2587 let pool_val = u32::from_le_bytes(bytes[pool_start..pool_start + 4].try_into().unwrap());
2588 assert_eq!(pool_val, 0x42, "pool should contain the constant");
2589 }
2590
2591 #[test]
2592 fn literal_pool_with_ltorg() {
2593 let mut asm = Assembler::new(Arch::Aarch64);
2595 asm.emit("ldr x0, =0xCAFE\n.ltorg").unwrap();
2596 let result = asm.finish().unwrap();
2597 let bytes = result.bytes();
2598 assert!(bytes.len() >= 12);
2600 let pool_start = bytes.len() - 8;
2602 let pool_val = u64::from_le_bytes(bytes[pool_start..pool_start + 8].try_into().unwrap());
2603 assert_eq!(pool_val, 0xCAFE);
2604 }
2605
2606 #[test]
2607 fn literal_pool_deduplication() {
2608 let mut asm = Assembler::new(Arch::Aarch64);
2610 asm.emit("ldr x0, =0x1234\nldr x1, =0x1234").unwrap();
2611 let result = asm.finish().unwrap();
2612 let bytes = result.bytes();
2613 assert!(
2618 bytes.len() <= 24,
2619 "expected <= 24 bytes with dedup, got {}",
2620 bytes.len()
2621 );
2622 }
2623
2624 #[test]
2625 fn literal_pool_multiple_values() {
2626 let mut asm = Assembler::new(Arch::Aarch64);
2628 asm.emit("ldr x0, =0xAAAA\nldr x1, =0xBBBB").unwrap();
2629 let result = asm.finish().unwrap();
2630 let bytes = result.bytes();
2631 let pool_end = bytes.len();
2633 let val2 = u64::from_le_bytes(bytes[pool_end - 8..pool_end].try_into().unwrap());
2634 let val1 = u64::from_le_bytes(bytes[pool_end - 16..pool_end - 8].try_into().unwrap());
2635 assert!(
2636 (val1 == 0xAAAA && val2 == 0xBBBB) || (val1 == 0xBBBB && val2 == 0xAAAA),
2637 "pool should contain both values, got {:#x} and {:#x}",
2638 val1,
2639 val2
2640 );
2641 }
2642
2643 #[test]
2644 fn literal_pool_ldr_encodes_pc_relative() {
2645 let mut asm = Assembler::new(Arch::Aarch64);
2647 asm.emit("ldr x0, =0xFF").unwrap();
2648 let result = asm.finish().unwrap();
2649 let bytes = result.bytes();
2650 let word = u32::from_le_bytes(bytes[0..4].try_into().unwrap());
2652 assert_eq!((word >> 30) & 0b11, 0b01, "opc should be 01 for 64-bit LDR");
2654 assert_eq!(
2656 (word >> 24) & 0b111111,
2657 0b011000,
2658 "should be LDR literal encoding"
2659 );
2660 assert_eq!(word & 0x1F, 0, "Rt should be X0");
2662 let imm19 = ((word >> 5) & 0x7FFFF) as i32;
2664 assert!(imm19 > 0, "imm19 should be positive (pool is after instr)");
2665 }
2666
2667 #[test]
2668 fn literal_pool_large_64bit_value() {
2669 let mut asm = Assembler::new(Arch::Aarch64);
2670 asm.emit("ldr x0, =0xDEADBEEFCAFEBABE").unwrap();
2671 let result = asm.finish().unwrap();
2672 let bytes = result.bytes();
2673 let pool_start = bytes.len() - 8;
2674 let pool_val = u64::from_le_bytes(bytes[pool_start..pool_start + 8].try_into().unwrap());
2675 assert_eq!(pool_val, 0xDEADBEEFCAFEBABE);
2676 }
2677
2678 #[test]
2679 fn literal_pool_negative_value() {
2680 let mut asm = Assembler::new(Arch::Aarch64);
2681 asm.emit("ldr x0, =-1").unwrap();
2682 let result = asm.finish().unwrap();
2683 let bytes = result.bytes();
2684 let pool_start = bytes.len() - 8;
2685 let pool_val = u64::from_le_bytes(bytes[pool_start..pool_start + 8].try_into().unwrap());
2686 assert_eq!(pool_val, 0xFFFFFFFFFFFFFFFF);
2688 }
2689
2690 #[test]
2691 fn literal_pool_pool_directive() {
2692 let mut asm = Assembler::new(Arch::Aarch64);
2694 asm.emit("ldr x0, =0xBEEF\n.pool").unwrap();
2695 let result = asm.finish().unwrap();
2696 let bytes = result.bytes();
2697 let pool_start = bytes.len() - 8;
2698 let pool_val = u64::from_le_bytes(bytes[pool_start..pool_start + 8].try_into().unwrap());
2699 assert_eq!(pool_val, 0xBEEF);
2700 }
2701
2702 #[test]
2703 fn literal_pool_reset_clears_pool() {
2704 let mut asm = Assembler::new(Arch::Aarch64);
2705 asm.emit("ldr x0, =0x1234").unwrap();
2706 asm.reset();
2707 asm.emit("nop").unwrap();
2709 let result = asm.finish().unwrap();
2710 assert_eq!(result.bytes(), &[0x1F, 0x20, 0x03, 0xD5]); }
2712
2713 #[test]
2716 fn arm_literal_pool_basic() {
2717 let mut asm = Assembler::new(Arch::Arm);
2719 asm.emit("ldr r0, =0x12345678").unwrap();
2720 let result = asm.finish().unwrap();
2721 let bytes = result.bytes();
2722 assert!(
2724 bytes.len() >= 8,
2725 "expected at least 8 bytes, got {}",
2726 bytes.len()
2727 );
2728 let pool_start = bytes.len() - 4;
2730 let pool_val = u32::from_le_bytes(bytes[pool_start..pool_start + 4].try_into().unwrap());
2731 assert_eq!(pool_val, 0x12345678, "pool should contain the constant");
2732 }
2733
2734 #[test]
2735 fn arm_literal_pool_small_value() {
2736 let mut asm = Assembler::new(Arch::Arm);
2738 asm.emit("ldr r3, =42").unwrap();
2739 let result = asm.finish().unwrap();
2740 let bytes = result.bytes();
2741 let pool_start = bytes.len() - 4;
2742 let pool_val = u32::from_le_bytes(bytes[pool_start..pool_start + 4].try_into().unwrap());
2743 assert_eq!(pool_val, 42);
2744 }
2745
2746 #[test]
2747 fn arm_literal_pool_negative_value() {
2748 let mut asm = Assembler::new(Arch::Arm);
2749 asm.emit("ldr r0, =-1").unwrap();
2750 let result = asm.finish().unwrap();
2751 let bytes = result.bytes();
2752 let pool_start = bytes.len() - 4;
2753 let pool_val = u32::from_le_bytes(bytes[pool_start..pool_start + 4].try_into().unwrap());
2754 assert_eq!(pool_val, 0xFFFFFFFF);
2756 }
2757
2758 #[test]
2759 fn arm_literal_pool_deduplication() {
2760 let mut asm = Assembler::new(Arch::Arm);
2762 asm.emit("ldr r0, =0xAABB\nldr r1, =0xAABB").unwrap();
2763 let result = asm.finish().unwrap();
2764 let bytes = result.bytes();
2765 assert!(
2768 bytes.len() <= 16,
2769 "expected <=16 bytes with dedup, got {}",
2770 bytes.len()
2771 );
2772 }
2773
2774 #[test]
2775 fn arm_literal_pool_multiple_values() {
2776 let mut asm = Assembler::new(Arch::Arm);
2778 asm.emit("ldr r0, =0x1111\nldr r1, =0x2222").unwrap();
2779 let result = asm.finish().unwrap();
2780 let bytes = result.bytes();
2781 let pool_end = bytes.len();
2783 let val2 = u32::from_le_bytes(bytes[pool_end - 4..pool_end].try_into().unwrap());
2784 let val1 = u32::from_le_bytes(bytes[pool_end - 8..pool_end - 4].try_into().unwrap());
2785 assert!(
2786 (val1 == 0x1111 && val2 == 0x2222) || (val1 == 0x2222 && val2 == 0x1111),
2787 "pool should contain both values, got {:#x} and {:#x}",
2788 val1,
2789 val2
2790 );
2791 }
2792
2793 #[test]
2794 fn arm_literal_pool_with_ltorg() {
2795 let mut asm = Assembler::new(Arch::Arm);
2797 asm.emit("ldr r0, =0xCAFE\n.ltorg").unwrap();
2798 let result = asm.finish().unwrap();
2799 let bytes = result.bytes();
2800 assert!(bytes.len() >= 8);
2801 let pool_start = bytes.len() - 4;
2802 let pool_val = u32::from_le_bytes(bytes[pool_start..pool_start + 4].try_into().unwrap());
2803 assert_eq!(pool_val, 0xCAFE);
2804 }
2805
2806 #[test]
2807 fn arm_literal_pool_pool_directive() {
2808 let mut asm = Assembler::new(Arch::Arm);
2810 asm.emit("ldr r0, =0xBEEF\n.pool").unwrap();
2811 let result = asm.finish().unwrap();
2812 let bytes = result.bytes();
2813 let pool_start = bytes.len() - 4;
2814 let pool_val = u32::from_le_bytes(bytes[pool_start..pool_start + 4].try_into().unwrap());
2815 assert_eq!(pool_val, 0xBEEF);
2816 }
2817
2818 #[test]
2819 fn arm_literal_pool_ldr_encodes_pc_relative() {
2820 let mut asm = Assembler::new(Arch::Arm);
2822 asm.emit("ldr r0, =0xFF").unwrap();
2823 let result = asm.finish().unwrap();
2824 let bytes = result.bytes();
2825 let word = u32::from_le_bytes(bytes[0..4].try_into().unwrap());
2827 assert_eq!(
2829 (word >> 26) & 0b11,
2830 0b01,
2831 "should be load/store word encoding"
2832 );
2833 assert_eq!((word >> 16) & 0xF, 15, "Rn should be PC (R15)");
2835 assert_eq!((word >> 12) & 0xF, 0, "Rd should be R0");
2837 assert_eq!((word >> 20) & 1, 1, "should be a load");
2839 }
2840
2841 #[test]
2842 fn arm_literal_pool_entry_always_4_bytes() {
2843 let mut asm = Assembler::new(Arch::Arm);
2845 asm.emit("ldr r0, =0x1\nldr r15, =0x2").unwrap();
2846 let result = asm.finish().unwrap();
2847 let bytes = result.bytes();
2848 assert!(
2851 bytes.len() <= 16,
2852 "ARM pool entries should be 4 bytes each, got {} total",
2853 bytes.len()
2854 );
2855 }
2856
2857 #[test]
2858 fn arm_literal_pool_hex_large() {
2859 let mut asm = Assembler::new(Arch::Arm);
2860 asm.emit("ldr r5, =0xDEADBEEF").unwrap();
2861 let result = asm.finish().unwrap();
2862 let bytes = result.bytes();
2863 let pool_start = bytes.len() - 4;
2864 let pool_val = u32::from_le_bytes(bytes[pool_start..pool_start + 4].try_into().unwrap());
2865 assert_eq!(pool_val, 0xDEADBEEF);
2866 }
2867}