1#![allow(clippy::arithmetic_side_effects)]
2use combine::{
14 attempt, between,
15 char::{alpha_num, char, digit, hex_digit, spaces, string},
16 combine_parse_partial, combine_parser_impl,
17 easy::{Error, Errors, Info},
18 eof, many, many1, one_of, optional, parse_mode, parser, sep_by, skip_many,
19 stream::state::{SourcePosition, State},
20 Parser, Stream,
21};
22
23#[derive(Clone, Debug, PartialEq, Eq)]
25pub enum Operand {
26 Register(i64),
28 Integer(i64),
30 Memory(i64, i64),
32 Label(String),
34}
35
36#[derive(Debug, PartialEq, Eq)]
38pub enum Statement {
39 Label { name: String },
41 Directive {
43 name: String,
44 operands: Vec<Operand>,
45 },
46 Instruction {
48 name: String,
49 operands: Vec<Operand>,
50 },
51}
52
53parser! {
54 fn ident[I]()(I) -> String where [I: Stream<Item=char>] {
55 many1(alpha_num().or(char('_')))
56 }
57}
58
59parser! {
60 fn mnemonic[I]()(I) -> String where [I: Stream<Item=char>] {
61 many1(alpha_num())
62 }
63}
64
65parser! {
66 fn integer[I]()(I) -> i64 where [I: Stream<Item=char>] {
67 let sign = optional(one_of("-+".chars()).skip(skip_many(char(' ')))).map(|x| match x {
68 Some('-') => -1,
69 _ => 1,
70 });
71 let hex = string("0x")
72 .with(many1(hex_digit()))
73 .map(|x: String| u64::from_str_radix(&x, 16).unwrap_or(0) as i64);
74 let dec = many1(digit()).map(|x: String| x.parse::<i64>().unwrap_or(0));
75 (sign, attempt(hex).or(dec))
76 .map(|(s, x)| s * x)
77 }
78}
79
80parser! {
81 fn register[I]()(I) -> i64 where [I: Stream<Item=char>] {
82 char('r')
83 .with(many1(digit()))
84 .map(|x: String| x.parse::<i64>().unwrap_or(0))
85 }
86}
87
88parser! {
89 fn operand[I]()(I) -> Operand where [I: Stream<Item=char>] {
90 let register_operand = register().map(Operand::Register);
91 let immediate = integer().map(Operand::Integer);
92 let memory = between(
93 char('['),
94 char(']'),
95 (register().skip(skip_many(char(' '))), optional(integer())),
96 )
97 .map(|t| Operand::Memory(t.0, t.1.unwrap_or(0)));
98 let label = ident().map(Operand::Label);
99 register_operand
100 .or(immediate)
101 .or(memory)
102 .or(label)
103 }
104}
105
106parser! {
107 fn label[I]()(I) -> Statement where [I: Stream<Item=char>] {
108 (ident(), char(':'))
109 .map(|t| Statement::Label { name: t.0 })
110 }
111}
112
113parser! {
114 fn directive[I]()(I) -> Statement where [I: Stream<Item=char>] {
115 let operands = sep_by(operand(), char(',').skip(skip_many(char(' '))));
116 (char('.').with(many1(alpha_num())).skip(skip_many(char(' '))), operands)
117 .map(|t| Statement::Directive { name: t.0, operands: t.1 })
118 }
119}
120
121parser! {
122 fn instruction[I]()(I) -> Statement where [I: Stream<Item=char>] {
123 let operands = sep_by(operand(), char(',').skip(skip_many(char(' '))));
124 (mnemonic().skip(skip_many(char(' '))), operands)
125 .map(|t| Statement::Instruction { name: t.0, operands: t.1 })
126 }
127}
128
129fn format_info(info: &Info<char, &str>) -> String {
130 match *info {
131 Info::Token(x) => format!("{x:?}"),
132 Info::Range(x) => format!("{x:?}"),
133 Info::Owned(ref x) => x.clone(),
134 Info::Borrowed(x) => x.to_string(),
135 }
136}
137
138fn format_error(error: &Error<char, &str>) -> String {
139 match *error {
140 Error::Unexpected(ref x) => format!("unexpected {}", format_info(x)),
141 Error::Expected(ref x) => format!("expected {}", format_info(x)),
142 Error::Message(ref x) => format_info(x),
143 Error::Other(ref x) => format!("{x:?}"),
144 }
145}
146
147fn format_parse_error(parse_error: &Errors<char, &str, SourcePosition>) -> String {
148 format!(
149 "Parse error at line {} column {}: {}",
150 parse_error.position.line,
151 parse_error.position.column,
152 parse_error
153 .errors
154 .iter()
155 .map(format_error)
156 .collect::<Vec<String>>()
157 .join(", ")
158 )
159}
160
161pub fn parse(input: &str) -> Result<Vec<Statement>, String> {
165 match spaces()
166 .with(many(
167 attempt(label())
168 .or(directive())
169 .or(instruction())
170 .skip(spaces()),
171 ))
172 .skip(eof())
173 .easy_parse(State::with_positioner(input, SourcePosition::default()))
174 {
175 Ok((insts, _)) => Ok(insts),
176 Err(err) => Err(format_parse_error(&err)),
177 }
178}
179
180#[cfg(test)]
181mod tests {
182 use super::{
183 ident, instruction, integer, mnemonic, operand, parse, register, Operand, Statement,
184 };
185 use combine::Parser;
186
187 #[test]
190 fn test_ident() {
191 assert_eq!(
192 ident().parse("entrypoint"),
193 Ok(("entrypoint".to_string(), ""))
194 );
195 assert_eq!(ident().parse("lbb_1"), Ok(("lbb_1".to_string(), "")));
196 assert_eq!(ident().parse("exit:"), Ok(("exit".to_string(), ":")));
197 }
198
199 #[test]
200 fn test_mnemonic() {
201 assert_eq!(mnemonic().parse("nop"), Ok(("nop".to_string(), "")));
202 assert_eq!(mnemonic().parse("add32"), Ok(("add32".to_string(), "")));
203 assert_eq!(mnemonic().parse("add32*"), Ok(("add32".to_string(), "*")));
204 }
205
206 #[test]
207 fn test_integer() {
208 assert_eq!(integer().parse("0"), Ok((0, "")));
209 assert_eq!(integer().parse("42"), Ok((42, "")));
210 assert_eq!(integer().parse("+42"), Ok((42, "")));
211 assert_eq!(integer().parse("-42"), Ok((-42, "")));
212 assert_eq!(integer().parse("0x0"), Ok((0, "")));
213 assert_eq!(
214 integer().parse("0x123456789abcdef0"),
215 Ok((0x123456789abcdef0, ""))
216 );
217 assert_eq!(integer().parse("-0x1f"), Ok((-31, "")));
218 }
219
220 #[test]
221 fn test_register() {
222 assert_eq!(register().parse("r0"), Ok((0, "")));
223 assert_eq!(register().parse("r15"), Ok((15, "")));
224 }
225
226 #[test]
227 fn test_operand() {
228 assert_eq!(operand().parse("r0"), Ok((Operand::Register(0), "")));
229 assert_eq!(operand().parse("r15"), Ok((Operand::Register(15), "")));
230 assert_eq!(operand().parse("0"), Ok((Operand::Integer(0), "")));
231 assert_eq!(operand().parse("42"), Ok((Operand::Integer(42), "")));
232 assert_eq!(operand().parse("[r1]"), Ok((Operand::Memory(1, 0), "")));
233 assert_eq!(operand().parse("[r3+5]"), Ok((Operand::Memory(3, 5), "")));
234 assert_eq!(
235 operand().parse("[r3+0x1f]"),
236 Ok((Operand::Memory(3, 31), ""))
237 );
238 assert_eq!(
239 operand().parse("[r3-0x1f]"),
240 Ok((Operand::Memory(3, -31), ""))
241 );
242 assert_eq!(operand().parse("[r5 + 3]"), Ok((Operand::Memory(5, 3), "")));
243 assert_eq!(
244 operand().parse("[r11 - 0x30]"),
245 Ok((Operand::Memory(11, -48), ""))
246 );
247 }
248
249 #[test]
250 fn test_instruction() {
251 assert_eq!(
252 instruction().parse("exit"),
253 Ok((
254 Statement::Instruction {
255 name: "exit".to_string(),
256 operands: vec![],
257 },
258 ""
259 ))
260 );
261
262 assert_eq!(
263 instruction().parse("call 2"),
264 Ok((
265 Statement::Instruction {
266 name: "call".to_string(),
267 operands: vec![Operand::Integer(2)],
268 },
269 ""
270 ))
271 );
272
273 assert_eq!(
274 instruction().parse("addi r1, 2"),
275 Ok((
276 Statement::Instruction {
277 name: "addi".to_string(),
278 operands: vec![Operand::Register(1), Operand::Integer(2)],
279 },
280 ""
281 ))
282 );
283
284 assert_eq!(
285 instruction().parse("ldxb r2, [r1+12]"),
286 Ok((
287 Statement::Instruction {
288 name: "ldxb".to_string(),
289 operands: vec![Operand::Register(2), Operand::Memory(1, 12)],
290 },
291 ""
292 ))
293 );
294
295 assert_eq!(
296 instruction().parse("lsh r3, 0x8"),
297 Ok((
298 Statement::Instruction {
299 name: "lsh".to_string(),
300 operands: vec![Operand::Register(3), Operand::Integer(8)],
301 },
302 ""
303 ))
304 );
305
306 assert_eq!(
307 instruction().parse("jne r3, 0x8, +37"),
308 Ok((
309 Statement::Instruction {
310 name: "jne".to_string(),
311 operands: vec![
312 Operand::Register(3),
313 Operand::Integer(8),
314 Operand::Integer(37)
315 ],
316 },
317 ""
318 ))
319 );
320
321 assert_eq!(
323 instruction().parse("jne r3,0x8,+37"),
324 Ok((
325 Statement::Instruction {
326 name: "jne".to_string(),
327 operands: vec![
328 Operand::Register(3),
329 Operand::Integer(8),
330 Operand::Integer(37)
331 ],
332 },
333 ""
334 ))
335 );
336 }
337
338 #[test]
341 fn test_empty() {
342 assert_eq!(parse(""), Ok(vec![]));
343 }
344
345 #[test]
346 fn test_exit() {
347 assert_eq!(
349 parse("exit"),
350 Ok(vec![Statement::Instruction {
351 name: "exit".to_string(),
352 operands: vec![],
353 }])
354 );
355 }
356
357 #[test]
358 fn test_lsh() {
359 assert_eq!(
361 parse("lsh r3, 0x20"),
362 Ok(vec![Statement::Instruction {
363 name: "lsh".to_string(),
364 operands: vec![Operand::Register(3), Operand::Integer(0x20)],
365 }])
366 );
367 }
368
369 #[test]
370 fn test_ja() {
371 assert_eq!(
373 parse("ja +1"),
374 Ok(vec![Statement::Instruction {
375 name: "ja".to_string(),
376 operands: vec![Operand::Integer(1)],
377 }])
378 );
379 }
380
381 #[test]
382 fn test_ldxh() {
383 assert_eq!(
385 parse("ldxh r4, [r1+12]"),
386 Ok(vec![Statement::Instruction {
387 name: "ldxh".to_string(),
388 operands: vec![Operand::Register(4), Operand::Memory(1, 12)],
389 }])
390 );
391 }
392
393 #[test]
394 fn test_tcp_sack() {
395 let src = "\
399ldxb r2, [r1+12]
400ldxb r3, [r1+13]
401lsh r3, 0x8
402or r3, r2
403mov r0, 0x0
404jne r3, 0x8, +37
405ldxb r2, [r1+23]
406jne r2, 0x6, +35
407ldxb r2, [r1+14]
408add r1, 0xe
409and r2, 0xf
410lsh r2, 0x2
411add r1, r2
412mov r0, 0x0
413ldxh r4, [r1+12]
414add r1, 0x14
415rsh r4, 0x2
416and r4, 0x3c
417mov r2, r4
418add r2, 0xffffffec
419mov r5, 0x15
420mov r3, 0x0
421jgt r5, r4, +20
422mov r5, r3
423lsh r5, 0x20
424arsh r5, 0x20
425mov r4, r1
426add r4, r5
427ldxb r5, [r4]
428jeq r5, 0x1, +4
429jeq r5, 0x0, +12
430mov r6, r3
431jeq r5, 0x5, +9
432ja +2
433add r3, 0x1
434mov r6, r3
435ldxb r3, [r4+1]
436add r3, r6
437lsh r3, 0x20
438arsh r3, 0x20
439jsgt r2, r3, -18
440ja +1
441mov r0, 0x1
442exit
443";
444
445 assert_eq!(
446 parse(src),
447 Ok(vec![
448 Statement::Instruction {
449 name: "ldxb".to_string(),
450 operands: vec![Operand::Register(2), Operand::Memory(1, 12)],
451 },
452 Statement::Instruction {
453 name: "ldxb".to_string(),
454 operands: vec![Operand::Register(3), Operand::Memory(1, 13)],
455 },
456 Statement::Instruction {
457 name: "lsh".to_string(),
458 operands: vec![Operand::Register(3), Operand::Integer(8)],
459 },
460 Statement::Instruction {
461 name: "or".to_string(),
462 operands: vec![Operand::Register(3), Operand::Register(2)],
463 },
464 Statement::Instruction {
465 name: "mov".to_string(),
466 operands: vec![Operand::Register(0), Operand::Integer(0)],
467 },
468 Statement::Instruction {
469 name: "jne".to_string(),
470 operands: vec![
471 Operand::Register(3),
472 Operand::Integer(8),
473 Operand::Integer(37)
474 ],
475 },
476 Statement::Instruction {
477 name: "ldxb".to_string(),
478 operands: vec![Operand::Register(2), Operand::Memory(1, 23)],
479 },
480 Statement::Instruction {
481 name: "jne".to_string(),
482 operands: vec![
483 Operand::Register(2),
484 Operand::Integer(6),
485 Operand::Integer(35)
486 ],
487 },
488 Statement::Instruction {
489 name: "ldxb".to_string(),
490 operands: vec![Operand::Register(2), Operand::Memory(1, 14)],
491 },
492 Statement::Instruction {
493 name: "add".to_string(),
494 operands: vec![Operand::Register(1), Operand::Integer(14)],
495 },
496 Statement::Instruction {
497 name: "and".to_string(),
498 operands: vec![Operand::Register(2), Operand::Integer(15)],
499 },
500 Statement::Instruction {
501 name: "lsh".to_string(),
502 operands: vec![Operand::Register(2), Operand::Integer(2)],
503 },
504 Statement::Instruction {
505 name: "add".to_string(),
506 operands: vec![Operand::Register(1), Operand::Register(2)],
507 },
508 Statement::Instruction {
509 name: "mov".to_string(),
510 operands: vec![Operand::Register(0), Operand::Integer(0)],
511 },
512 Statement::Instruction {
513 name: "ldxh".to_string(),
514 operands: vec![Operand::Register(4), Operand::Memory(1, 12)],
515 },
516 Statement::Instruction {
517 name: "add".to_string(),
518 operands: vec![Operand::Register(1), Operand::Integer(20)],
519 },
520 Statement::Instruction {
521 name: "rsh".to_string(),
522 operands: vec![Operand::Register(4), Operand::Integer(2)],
523 },
524 Statement::Instruction {
525 name: "and".to_string(),
526 operands: vec![Operand::Register(4), Operand::Integer(60)],
527 },
528 Statement::Instruction {
529 name: "mov".to_string(),
530 operands: vec![Operand::Register(2), Operand::Register(4)],
531 },
532 Statement::Instruction {
533 name: "add".to_string(),
534 operands: vec![Operand::Register(2), Operand::Integer(4294967276)],
535 },
536 Statement::Instruction {
537 name: "mov".to_string(),
538 operands: vec![Operand::Register(5), Operand::Integer(21)],
539 },
540 Statement::Instruction {
541 name: "mov".to_string(),
542 operands: vec![Operand::Register(3), Operand::Integer(0)],
543 },
544 Statement::Instruction {
545 name: "jgt".to_string(),
546 operands: vec![
547 Operand::Register(5),
548 Operand::Register(4),
549 Operand::Integer(20)
550 ],
551 },
552 Statement::Instruction {
553 name: "mov".to_string(),
554 operands: vec![Operand::Register(5), Operand::Register(3)],
555 },
556 Statement::Instruction {
557 name: "lsh".to_string(),
558 operands: vec![Operand::Register(5), Operand::Integer(32)],
559 },
560 Statement::Instruction {
561 name: "arsh".to_string(),
562 operands: vec![Operand::Register(5), Operand::Integer(32)],
563 },
564 Statement::Instruction {
565 name: "mov".to_string(),
566 operands: vec![Operand::Register(4), Operand::Register(1)],
567 },
568 Statement::Instruction {
569 name: "add".to_string(),
570 operands: vec![Operand::Register(4), Operand::Register(5)],
571 },
572 Statement::Instruction {
573 name: "ldxb".to_string(),
574 operands: vec![Operand::Register(5), Operand::Memory(4, 0)],
575 },
576 Statement::Instruction {
577 name: "jeq".to_string(),
578 operands: vec![
579 Operand::Register(5),
580 Operand::Integer(1),
581 Operand::Integer(4)
582 ],
583 },
584 Statement::Instruction {
585 name: "jeq".to_string(),
586 operands: vec![
587 Operand::Register(5),
588 Operand::Integer(0),
589 Operand::Integer(12)
590 ],
591 },
592 Statement::Instruction {
593 name: "mov".to_string(),
594 operands: vec![Operand::Register(6), Operand::Register(3)],
595 },
596 Statement::Instruction {
597 name: "jeq".to_string(),
598 operands: vec![
599 Operand::Register(5),
600 Operand::Integer(5),
601 Operand::Integer(9)
602 ],
603 },
604 Statement::Instruction {
605 name: "ja".to_string(),
606 operands: vec![Operand::Integer(2)],
607 },
608 Statement::Instruction {
609 name: "add".to_string(),
610 operands: vec![Operand::Register(3), Operand::Integer(1)],
611 },
612 Statement::Instruction {
613 name: "mov".to_string(),
614 operands: vec![Operand::Register(6), Operand::Register(3)],
615 },
616 Statement::Instruction {
617 name: "ldxb".to_string(),
618 operands: vec![Operand::Register(3), Operand::Memory(4, 1)],
619 },
620 Statement::Instruction {
621 name: "add".to_string(),
622 operands: vec![Operand::Register(3), Operand::Register(6)],
623 },
624 Statement::Instruction {
625 name: "lsh".to_string(),
626 operands: vec![Operand::Register(3), Operand::Integer(32)],
627 },
628 Statement::Instruction {
629 name: "arsh".to_string(),
630 operands: vec![Operand::Register(3), Operand::Integer(32)],
631 },
632 Statement::Instruction {
633 name: "jsgt".to_string(),
634 operands: vec![
635 Operand::Register(2),
636 Operand::Register(3),
637 Operand::Integer(-18)
638 ],
639 },
640 Statement::Instruction {
641 name: "ja".to_string(),
642 operands: vec![Operand::Integer(1)],
643 },
644 Statement::Instruction {
645 name: "mov".to_string(),
646 operands: vec![Operand::Register(0), Operand::Integer(1)],
647 },
648 Statement::Instruction {
649 name: "exit".to_string(),
650 operands: vec![],
651 }
652 ])
653 );
654 }
655
656 #[test]
657 fn test_error_eof() {
658 assert_eq!(
660 parse("lsh r"),
661 Err(
662 "Parse error at line 1 column 6: unexpected end of input, expected digit"
663 .to_string()
664 )
665 );
666 }
667
668 #[test]
669 fn test_error_unexpected_character() {
670 assert_eq!(
672 parse("exit\n^"),
673 Err(
674 "Parse error at line 2 column 1: unexpected '^', expected letter or digit, expected '_', expected '.', expected whitespaces, expected end of input".to_string()
675 )
676 );
677 }
678
679 #[test]
680 fn test_initial_whitespace() {
681 assert_eq!(
682 parse(
683 "
684 exit"
685 ),
686 Ok(vec![Statement::Instruction {
687 name: "exit".to_string(),
688 operands: vec![],
689 }])
690 );
691 }
692}