1use crate::encoder::traits::ParsedInstruction;
8use crate::error::RasError;
9use std::collections::HashSet;
10
11#[derive(Debug, Clone)]
12pub struct Section {
13 pub name: String,
14 pub flags: SectionFlags,
15}
16
17#[derive(Debug, Clone)]
18pub struct SectionFlags {
19 pub alloc: bool,
20 pub exec: bool,
21 pub write: bool,
22}
23
24#[derive(Debug, Clone)]
25pub struct Symbol {
26 pub name: String,
27 pub global: bool,
28 pub section: String,
29}
30
31#[derive(Debug, Clone)]
32pub enum Line {
33 Label(Symbol),
34 Instruction(ParsedInstruction),
35 Data(Vec<u8>),
36}
37
38pub struct ParsedAssembly {
39 pub sections: Vec<Section>,
40 pub symbols: Vec<Symbol>,
41 pub instructions: Vec<ParsedInstruction>,
42 pub lines: Vec<Line>,
43}
44
45pub struct AssemblyParser {
46 current_section: String,
47 sections: Vec<Section>,
48 symbols: Vec<Symbol>,
49 instructions: Vec<ParsedInstruction>,
50 lines: Vec<Line>,
51 pending_globals: HashSet<String>,
52}
53
54impl Default for AssemblyParser {
55 fn default() -> Self {
56 Self::new()
57 }
58}
59
60impl AssemblyParser {
61 pub fn new() -> Self {
62 Self {
63 current_section: ".text".to_string(),
64 sections: Vec::new(),
65 symbols: Vec::new(),
66 instructions: Vec::new(),
67 lines: Vec::new(),
68 pending_globals: HashSet::new(),
69 }
70 }
71
72 pub fn parse(&mut self, text: &str) -> Result<ParsedAssembly, RasError> {
73 for line in text.lines() {
74 let line = line.trim();
75 if line.is_empty() || line.starts_with('#') || line.starts_with("//") {
76 continue;
77 }
78
79 if let Some(colon_pos) = line.find(':') {
82 let before = &line[..colon_pos];
83 if !before.is_empty() && !before.contains(char::is_whitespace) {
84 let label = before.trim();
85 let global = self.pending_globals.remove(label);
86 let sym = Symbol {
87 name: label.to_string(),
88 global,
89 section: self.current_section.clone(),
90 };
91 self.symbols.push(sym.clone());
92 self.lines.push(Line::Label(sym));
93 let rest = line[colon_pos + 1..].trim();
94 if !rest.is_empty() {
95 if rest.starts_with('.') {
96 self.parse_directive(rest)?;
97 } else {
98 self.parse_instruction(rest)?;
99 }
100 }
101 continue;
102 }
103 }
104
105 if line.starts_with('.') {
106 self.parse_directive(line)?;
107 } else {
108 self.parse_instruction(line)?;
109 }
110 }
111
112 Ok(ParsedAssembly {
113 sections: self.sections.clone(),
114 symbols: self.symbols.clone(),
115 instructions: self.instructions.clone(),
116 lines: self.lines.clone(),
117 })
118 }
119
120 fn parse_directive(&mut self, line: &str) -> Result<(), RasError> {
121 let parts: Vec<&str> = line.split_whitespace().collect();
122 if parts.is_empty() {
123 return Ok(());
124 }
125
126 match parts[0] {
127 ".text" => {
128 self.current_section = ".text".to_string();
129 self.sections.push(Section {
130 name: ".text".to_string(),
131 flags: SectionFlags {
132 alloc: true,
133 exec: true,
134 write: false,
135 },
136 });
137 }
138 ".data" => {
139 self.current_section = ".data".to_string();
140 self.sections.push(Section {
141 name: ".data".to_string(),
142 flags: SectionFlags {
143 alloc: true,
144 exec: false,
145 write: true,
146 },
147 });
148 }
149 ".global" | ".globl" => {
150 if parts.len() < 2 {
151 return Err(RasError::ParseError(
152 ".global requires a symbol name".to_string(),
153 ));
154 }
155 for name in &parts[1..] {
156 self.pending_globals.insert((*name).to_string());
157 }
158 }
159 ".asciz" | ".string" => {
160 let rest = line[parts[0].len()..].trim();
161 if let Some(bytes) = parse_quoted_string_bytes(rest, true) {
162 self.lines.push(Line::Data(bytes));
163 }
164 }
165 ".ascii" => {
166 let rest = line[parts[0].len()..].trim();
167 if let Some(bytes) = parse_quoted_string_bytes(rest, false) {
168 self.lines.push(Line::Data(bytes));
169 }
170 }
171 ".section" | ".align" | ".balign" | ".p2align" | ".byte" | ".short" | ".int"
172 | ".long" | ".quad" | ".zero" | ".space"
173 | ".skip" | ".cfi_startproc" | ".cfi_endproc" | ".cfi_def_cfa" | ".size" | ".type"
174 | ".ident" | ".file" => {}
175 _ => {}
176 }
177
178 Ok(())
179 }
180
181 fn parse_instruction(&mut self, line: &str) -> Result<(), RasError> {
182 let parts: Vec<&str> = line.split_whitespace().collect();
183 if parts.is_empty() {
184 return Ok(());
185 }
186
187 let opcode = parts[0].to_string();
188 let operands: Vec<String> = if parts.len() > 1 {
189 parts[1..]
190 .join(" ")
191 .split(',')
192 .map(|s| s.trim().to_string())
193 .collect()
194 } else {
195 Vec::new()
196 };
197
198 let inst = ParsedInstruction { opcode, operands };
199 self.instructions.push(inst.clone());
200 self.lines.push(Line::Instruction(inst));
201 Ok(())
202 }
203}
204
205fn parse_quoted_string_bytes(s: &str, null_terminate: bool) -> Option<Vec<u8>> {
206 let s = s.trim();
207 let s = s.strip_prefix('"')?.strip_suffix('"')?;
208 let mut out = Vec::new();
209 let mut chars = s.chars();
210 while let Some(c) = chars.next() {
211 if c == '\\' {
212 match chars.next()? {
213 'n' => out.push(b'\n'),
214 'r' => out.push(b'\r'),
215 't' => out.push(b'\t'),
216 '0' => out.push(0),
217 '"' => out.push(b'"'),
218 '\\' => out.push(b'\\'),
219 c => out.push(c as u8),
220 }
221 } else {
222 out.push(c as u8);
223 }
224 }
225 if null_terminate {
226 out.push(0);
227 }
228 Some(out)
229}
230
231#[cfg(test)]
232mod tests {
233 use super::*;
234
235 fn parse(src: &str) -> ParsedAssembly {
236 AssemblyParser::new().parse(src).expect("parse failed")
237 }
238
239 #[test]
240 fn empty_input_produces_empty_output() {
241 let asm = parse("");
242 assert!(asm.lines.is_empty());
243 assert!(asm.symbols.is_empty());
244 assert!(asm.instructions.is_empty());
245 }
246
247 #[test]
248 fn comments_and_blank_lines_are_skipped() {
249 let asm = parse("# this is a comment\n// also a comment\n\n");
250 assert!(asm.lines.is_empty());
251 }
252
253 #[test]
254 fn label_is_parsed_and_recorded_in_symbols() {
255 let asm = parse("main:");
256 assert_eq!(asm.symbols.len(), 1);
257 assert_eq!(asm.symbols[0].name, "main");
258 assert!(!asm.symbols[0].global);
259 assert!(matches!(&asm.lines[0], Line::Label(s) if s.name == "main"));
260 }
261
262 #[test]
263 fn global_directive_marks_following_label_as_global() {
264 let asm = parse(".globl main\nmain:");
265 assert_eq!(asm.symbols.len(), 1);
266 assert!(asm.symbols[0].global, "symbol should be global");
267 }
268
269 #[test]
270 fn global_without_label_does_not_panic() {
271 let asm = parse(".global _foo");
272 assert!(asm.symbols.is_empty(), "no label defined yet");
273 }
274
275 #[test]
276 fn global_missing_name_returns_error() {
277 let result = AssemblyParser::new().parse(".global");
278 assert!(result.is_err());
279 }
280
281 #[test]
282 fn instruction_with_no_operands() {
283 let asm = parse("ret");
284 assert_eq!(asm.instructions.len(), 1);
285 assert_eq!(asm.instructions[0].opcode, "ret");
286 assert!(asm.instructions[0].operands.is_empty());
287 }
288
289 #[test]
290 fn instruction_with_two_operands() {
291 let asm = parse("mov rax, rbx");
292 assert_eq!(asm.instructions[0].opcode, "mov");
293 assert_eq!(asm.instructions[0].operands, &["rax", "rbx"]);
294 }
295
296 #[test]
297 fn instruction_with_three_operands() {
298 let asm = parse("add x0, x1, x2");
299 assert_eq!(asm.instructions[0].operands, &["x0", "x1", "x2"]);
300 }
301
302 #[test]
303 fn text_and_data_directives_push_sections() {
304 let asm = parse(".text\n.data");
305 assert_eq!(asm.sections.len(), 2);
306 assert_eq!(asm.sections[0].name, ".text");
307 assert!(asm.sections[0].flags.exec);
308 assert!(!asm.sections[0].flags.write);
309 assert_eq!(asm.sections[1].name, ".data");
310 assert!(asm.sections[1].flags.write);
311 assert!(!asm.sections[1].flags.exec);
312 }
313
314 #[test]
315 fn label_section_tracks_current_section() {
316 let asm = parse(".data\nvar:");
317 assert_eq!(asm.symbols[0].section, ".data");
318 }
319
320 #[test]
321 fn known_no_op_directives_are_silently_ignored() {
322 let asm = parse(".align 16\n.size foo, 4\n.type foo, @function");
323 assert!(asm.lines.is_empty());
324 }
325
326 #[test]
327 fn mixed_labels_and_instructions_preserve_order() {
328 let asm = parse("foo:\n mov rax, 0\n ret");
329 assert_eq!(asm.lines.len(), 3);
330 assert!(matches!(&asm.lines[0], Line::Label(_)));
331 assert!(matches!(&asm.lines[1], Line::Instruction(_)));
332 assert!(matches!(&asm.lines[2], Line::Instruction(_)));
333 }
334
335 #[test]
336 fn multiple_globals_on_one_directive() {
337 let asm = parse(".global a b\na:\nb:");
339 assert!(asm.symbols.iter().all(|s| s.global));
340 assert_eq!(asm.symbols.len(), 2);
341 }
342
343 #[test]
344 fn inline_label_with_asciz_emits_label_and_data() {
345 let asm = parse(".L_fmt: .asciz \"%lld\\n\"");
346 assert_eq!(asm.symbols.len(), 1);
347 assert_eq!(asm.symbols[0].name, ".L_fmt");
348 assert!(matches!(&asm.lines[0], Line::Label(s) if s.name == ".L_fmt"));
349 let data_line = &asm.lines[1];
350 match data_line {
351 Line::Data(bytes) => {
352 assert_eq!(bytes, b"%lld\n\0");
353 }
354 _ => panic!("expected Line::Data, got {:?}", data_line),
355 }
356 }
357
358 #[test]
359 fn asciz_standalone_emits_data() {
360 let asm = parse(".asciz \"hello\"");
361 assert_eq!(asm.lines.len(), 1);
362 match &asm.lines[0] {
363 Line::Data(bytes) => assert_eq!(bytes, b"hello\0"),
364 _ => panic!("expected Line::Data"),
365 }
366 }
367}