lc3_ensemble/
parse.rs

1//! Parsing assembly source code into an AST.
2//! 
3//! This module is used to convert strings (which represent assembly source code)
4//! into abstract syntax trees that maintain all of the information of the source code
5//! in an easier to handle format.
6//! 
7//! The main function to use from this module is [`parse_ast`], 
8//! which parses an assembly code program into an AST.
9//! 
10//! However, if needed, the internals of this module are also available:
11//! - [`lex`]: the implementation of the lexer/tokenizer
12//! - [`Parser`]: the main logic for the parser
13//! - [`Parse`]: the implementation to "parse" an AST component
14
15pub mod lex;
16
17use std::borrow::Cow;
18use std::ops::Range;
19
20use logos::{Logos, Span};
21
22use crate::ast::asm::{AsmInstr, Directive, Stmt, StmtKind};
23use crate::ast::{ImmOrReg, Offset, OffsetNewErr, PCOffset};
24use lex::{Ident, Token};
25use simple::*;
26
27use self::lex::LexErr;
28
29/// Parses an assembly source code string into a `Vec` of statements.
30/// 
31/// # Example
32/// ```
33/// use lc3_ensemble::parse::parse_ast;
34/// 
35/// let src = "
36///     .orig x3000
37///     THIS: ADD R0, R0, #0
38///     IS: ADD R1, R1, #1
39///     A: ADD R2, R2, #2
40///     PROGRAM: ADD R3, R3, #3
41///     .end
42/// ";
43/// 
44/// let ast = parse_ast(src).unwrap();
45/// assert_eq!(ast.len(), 6);
46/// ```
47pub fn parse_ast(s: &str) -> Result<Vec<Stmt>, ParseErr> {
48    let mut parser = Parser::new(s)?;
49    // Horrendous one-liner version of this:
50    // std::iter::from_fn(|| (!parser.is_empty()).then(|| parser.parse())).collect()
51    std::iter::from_fn(|| match parser.is_empty() {
52        true  => None,
53        false => Some(parser.parse()),
54    }).collect::<Result<Vec<_>, _>>()
55}
56
57enum ParseErrKind {
58    OffsetNew(OffsetNewErr),
59    Lex(LexErr),
60    Parse(Cow<'static, str>)
61}
62impl From<LexErr> for ParseErrKind {
63    fn from(value: LexErr) -> Self {
64        Self::Lex(value)
65    }
66}
67impl From<OffsetNewErr> for ParseErrKind {
68    fn from(value: OffsetNewErr) -> Self {
69        Self::OffsetNew(value)
70    }
71}
72/// Any error that occurs during parsing tokens.
73pub struct ParseErr {
74    /// The brief cause of this error.
75    kind: ParseErrKind,
76    /// Some kind of help (if it exists)
77    help: Cow<'static, str>,
78    /// The location of this error.
79    span: Span
80}
81impl ParseErr {
82    fn new<S: Into<Cow<'static, str>>>(msg: S, span: Span) -> Self {
83        Self { kind: ParseErrKind::Parse(msg.into()), help: Cow::Borrowed(""), span }
84    }
85
86    fn wrap<E: Into<ParseErrKind>>(err: E, span: Span) -> Self {
87        Self { kind: err.into(), help: Cow::Borrowed(""), span }
88    }
89
90    fn with_help<S: Into<Cow<'static, str>>>(mut self, help: S) -> Self {
91        self.help = help.into();
92        self
93    }
94}
95impl std::fmt::Debug for ParseErr {
96    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97        f.debug_struct("ParseErr")
98            .field("brief", match &self.kind {
99                ParseErrKind::OffsetNew(s) => s,
100                ParseErrKind::Lex(s) => s,
101                ParseErrKind::Parse(s) => s,
102            })
103            .field("span", &self.span)
104            .finish()
105    }
106}
107impl std::fmt::Display for ParseErr {
108    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
109        match &self.kind {
110            ParseErrKind::OffsetNew(e) => e.fmt(f),
111            ParseErrKind::Lex(e) => e.fmt(f),
112            ParseErrKind::Parse(s) => s.fmt(f),
113        }
114    }
115}
116impl std::error::Error for ParseErr {
117    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
118        match &self.kind {
119            ParseErrKind::OffsetNew(e) => Some(e),
120            ParseErrKind::Lex(e) => Some(e),
121            ParseErrKind::Parse(_) => None,
122        }
123    }
124}
125impl crate::err::Error for ParseErr {
126    fn span(&self) -> Option<crate::err::ErrSpan> {
127        Some(crate::err::ErrSpan::from(self.span.clone()))
128    }
129        
130    fn help(&self) -> Option<Cow<str>> {
131        match &self.kind {
132            ParseErrKind::OffsetNew(e) => e.help(),
133            ParseErrKind::Lex(e) => e.help(),
134            ParseErrKind::Parse(_) => Some(Cow::Borrowed(&self.help)),
135        }
136    }
137}
138
139/// Components that can be constructed from a sequence of tokens.
140pub trait Parse: Sized {
141    /// Attempt to convert the next sequence of tokens 
142    /// in the parser's state into a component.
143    /// 
144    /// If parsing fails, there are no guarantees about what happens to the input,
145    /// and the parser likely should not be used after an error is raised during parsing.
146    fn parse(parser: &mut Parser) -> Result<Self, ParseErr>;
147}
148
149/// The main parser struct, which holds the main logic for the parser.
150pub struct Parser {
151    tokens: Vec<(Token, Span)>,
152    index: usize,
153    spans: Vec<Span>,
154}
155impl Parser {
156    /// Creates a new parser from a given string.
157    /// 
158    /// In the instantiation process, 
159    /// this function will attempt to tokenize the string into tokens,
160    /// raising an error if it fails.
161    pub fn new(stream: &str) -> Result<Self, ParseErr> {
162        let tokens = Token::lexer(stream).spanned()
163            .map(|(m_token, span)| match m_token {
164                Ok(token) => Ok((token, span)),
165                Err(err)  => Err(ParseErr::wrap(err, span)),
166            })
167            .filter(|t| !matches!(t, Ok((Token::Comment, _)))) // filter comments
168            .collect::<Result<_, _>>()?;
169
170        Ok(Self { tokens, index: 0, spans: vec![] })
171    }
172
173    /// Peeks at the next token to read.
174    pub fn peek(&self) -> Option<&(Token, Span)> {
175        self.tokens[self.index..].first()
176    }
177    /// Advances the parser ahead by one token.
178    pub fn advance(&mut self) {
179        // Append the last token's span to the last span collector.
180        let last_tok_span = self.cursor();
181        if let Some(last_span) = self.spans.last_mut() {
182            last_span.end = last_tok_span.end;
183        }
184
185        self.index += 1;
186        self.index = self.index.min(self.tokens.len());
187    }
188    /// Gets the range of the next token to read (or an EOL range if there are no more tokens to read).
189    pub fn cursor(&self) -> Span {
190        match self.peek().or_else(|| self.tokens.last()) {
191            Some((_, span)) => span.clone(),
192            None => 0..0
193        }
194    }
195
196    /// Parses the current token stream into a component, erroring if not possible.
197    /// 
198    /// If parsing fails, there are no guarantees about what happens to the input,
199    /// and the parser likely should not be used after an error is raised during parsing.
200    pub fn parse<P: Parse>(&mut self) -> Result<P, ParseErr> {
201        P::parse(self)
202    }
203
204    /// Check if the next token matches the given component and consume it if so.
205    /// 
206    /// This function can error if the next token *does* match the given component,
207    /// but an error occurs in trying to convert it to that component.
208    pub fn match_<P: TokenParse>(&mut self) -> Result<Option<P>, ParseErr> {
209        let span = self.cursor();
210        match self.advance_if(P::match_) {
211            Ok(t)  => P::convert(t, span).map(Some),
212            Err(_) => Ok(None),
213        }
214    }
215
216    /// Applies the provided predicate to the next token in the input.
217    /// 
218    /// If an error is raised from the predicate, the parser does not advance its input.
219    pub fn advance_if<T>(&mut self, pred: impl FnOnce(Option<&Token>, Span) -> Result<T, ParseErr>) -> Result<T, ParseErr> {
220        let result = if let Some((tok, span)) = self.peek() {
221            pred(Some(tok), span.clone())
222        } else {
223            pred(None, self.cursor())
224        };
225        if result.is_ok() {
226            self.advance();
227        }
228        result
229    }
230
231    /// Calculates the span of the component created inside this region block.
232    pub fn spanned<T, E>(&mut self, f: impl FnOnce(&mut Parser) -> Result<T, E>) -> Result<(T, Range<usize>), E> {
233        let Range { start, end: _ } = self.cursor();
234        
235        self.spans.push(start..start);
236        let result = f(self);
237
238        // pop span
239        let span = self.spans.pop().unwrap();
240        if let Some(last_span) = self.spans.last_mut() {
241            last_span.end = span.end;
242        }
243
244        Ok((result?, span))
245    }
246
247    /// Checks whether the input for the parser is empty.
248    pub fn is_empty(&self) -> bool {
249        self.tokens[self.index..]
250            .iter()
251            .all(|(t, _)| t.is_whitespace())
252    }
253}
254
255impl<const N: u32> Parse for ImmOrReg<N> {
256    fn parse(parser: &mut Parser) -> Result<Self, ParseErr> {
257        match parser.match_()? {
258            Some(Either::Left(imm))  => Ok(ImmOrReg::Imm(imm)),
259            Some(Either::Right(reg)) => Ok(ImmOrReg::Reg(reg)),
260            None => Err(ParseErr::new("expected register or immediate value", parser.cursor()))
261        }
262    }
263}
264
265impl<OFF, const N: u32> Parse for PCOffset<OFF, N> 
266    where Offset<OFF, N>: TokenParse
267{
268    fn parse(parser: &mut Parser) -> Result<Self, ParseErr> {
269        match parser.match_()? {
270            Some(Either::Left(off)) => Ok(PCOffset::Offset(off)),
271            Some(Either::Right(label)) => Ok(PCOffset::Label(label)),
272            None => Err(ParseErr::new("expected offset or label", parser.cursor()))
273        }
274    }
275}
276
277/// Simple to parse components.
278/// 
279/// This module holds components that are very simple to parse
280/// (defined as only requiring a single token and no additional state from the parser).
281/// 
282/// The key type of this module is the [`TokenParse`] trait which defines
283/// how to "simply parse" a component. 
284/// See that trait for more details about its utility over [`Parse`].
285/// 
286/// This module also provides several utility parsers (e.g., [`Comma`] and [`Colon`])
287/// for use in more complex component parsing.
288pub mod simple {
289    use logos::Span;
290
291    use crate::ast::{Label, Offset, Reg};
292
293    use super::lex::{Ident, LexErr, Token};
294    use super::{Parse, ParseErr, Parser};
295
296    /// Components that can be constructed with a single token 
297    /// and require no additional parser state.
298    /// 
299    /// This has an advantage over [`Parse`] in that if parsing fails,
300    /// the parser is known to not advance its input. 
301    /// This can be taken advantage of with [`Parser::match_`], 
302    /// which only advances if parsing passes.
303    /// 
304    /// [`Parser::match_`]: super::Parser::match_
305    pub trait TokenParse: Sized {
306        /// An intermediate to hold the match before it is converted to the actual component.
307        type Intermediate;
308
309        /// Tries to match the next token to the given component, if possible.
310        /// 
311        /// If successful, this returns some value and the parser advances. 
312        /// If unsuccessful, this returns an error and the parser does not advance.
313        /// 
314        /// The value returned is an intermediate value which is later converted to the desired component.
315        fn match_(m_token: Option<&Token>, span: Span) -> Result<Self::Intermediate, ParseErr>;
316
317        /// Parses the intermediate into the given component, raising an error if conversion fails.
318        fn convert(imed: Self::Intermediate, span: Span) -> Result<Self, ParseErr>;
319    }
320    impl<S: TokenParse> Parse for S {
321        fn parse(parser: &mut Parser) -> Result<Self, ParseErr> {
322            let span = parser.cursor();
323            let imed = parser.advance_if(S::match_)?;
324            S::convert(imed, span)
325        }
326    }
327    trait DirectTokenParse: TokenParse<Intermediate = Self> {
328        fn match_(m_token: Option<&Token>, span: Span) -> Result<Self, ParseErr>;
329    }
330    impl<T: DirectTokenParse> TokenParse for T {
331        type Intermediate = Self;
332    
333        fn match_(m_token: Option<&Token>, span: Span) -> Result<Self::Intermediate, ParseErr> {
334            DirectTokenParse::match_(m_token, span)
335        }
336    
337        fn convert(imed: Self::Intermediate, _span: Span) -> Result<Self, ParseErr> {
338            Ok(imed)
339        }
340    }
341
342    /// Comma.
343    #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Default)]
344    pub struct Comma;
345    impl DirectTokenParse for Comma {
346        fn match_(m_token: Option<&Token>, span: Span) -> Result<Self::Intermediate, ParseErr> {
347            match m_token {
348                Some(Token::Comma) => Ok(Comma),
349                _ => Err(ParseErr::new("expected comma", span))
350            }
351        }
352    }
353
354    /// Colon.
355    #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Default)]
356    pub struct Colon;
357    impl DirectTokenParse for Colon {
358        fn match_(m_token: Option<&Token>, span: Span) -> Result<Self, ParseErr> {
359            match m_token {
360                Some(Token::Colon) => Ok(Colon),
361                _ => Err(ParseErr::new("expected colon", span))
362            }
363        }
364    }
365
366    /// A string literal.
367    #[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Default)]
368    pub struct StrLiteral(pub String);
369    impl DirectTokenParse for StrLiteral {
370        fn match_(m_token: Option<&Token>, span: Span) -> Result<Self, ParseErr> {
371            match m_token {
372                Some(Token::String(s)) => Ok(StrLiteral(s.to_string())),
373                _ => Err(ParseErr::new("expected string literal", span))
374            }
375        }
376    }
377
378    /// The end of a line or input.
379    #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Default)]
380    pub struct End;
381    impl DirectTokenParse for End {
382        fn match_(m_token: Option<&Token>, span: Span) -> Result<Self, ParseErr> {
383            match m_token {
384                None | Some(Token::NewLine) => Ok(End),
385                _ => Err(ParseErr::new("expected end of line", span))
386            }
387        }
388    }
389
390    /// An (signed or unsigned) int literal. 
391    /// This is primarily only used for `.fill`, which is sign-agnostic.
392    #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Default)]
393    pub struct IntLiteral(pub u16);
394    impl DirectTokenParse for IntLiteral {
395        fn match_(m_token: Option<&Token>, span: Span) -> Result<Self, ParseErr> {
396            match m_token {
397                Some(&Token::Unsigned(n)) => Ok(Self(n)),
398                Some(&Token::Signed(n)) => Ok(Self(n as u16)),
399                _ => Err(ParseErr::new("expected immediate value", span.clone()))
400            }
401        }
402    }
403
404    /// Either one component or another.
405    /// 
406    /// This is not meant to be used as a general purpose Either type.
407    /// It is only meant to be used for parsing.
408    pub enum Either<L, R> {
409        /// The first possible component.
410        Left(L),
411        /// The second possible component.
412        Right(R)
413    }
414    impl<L: TokenParse, R: TokenParse> TokenParse for Either<L, R> {
415        type Intermediate = Either<L::Intermediate, R::Intermediate>;
416        fn match_(m_token: Option<&Token>, span: Span) -> Result<Self::Intermediate, ParseErr> {
417            match L::match_(m_token, span.clone()) {
418                Ok(t) => Ok(Either::Left(t)),
419                Err(_) => match R::match_(m_token, span.clone()) {
420                    Ok(u) => Ok(Either::Right(u)),
421                    Err(_) => Err(ParseErr::new("could not parse", span)),
422                },
423            }
424        }
425        
426        fn convert(imed: Self::Intermediate, span: Span) -> Result<Self, ParseErr> {
427            match imed {
428                Either::Left(l)  => L::convert(l, span).map(Either::Left),
429                Either::Right(r) => R::convert(r, span).map(Either::Right),
430            }
431        }
432    }
433
434    impl DirectTokenParse for Reg {
435        fn match_(m_token: Option<&Token>, span: Span) -> Result<Self, ParseErr> {
436            match m_token {
437                Some(&Token::Reg(reg_no)) => Reg::try_from(reg_no)
438                    .map_err(|_| ParseErr::new(format!("invalid register number {reg_no}"), span)),
439                _ => Err(ParseErr::new("expected register", span))
440            }
441        }
442    }
443
444    impl<const N: u32> TokenParse for Offset<i16, N> {
445        type Intermediate = Either<i16, u16>;
446
447        fn match_(m_token: Option<&Token>, span: Span) -> Result<Self::Intermediate, ParseErr> {
448            match m_token {
449                Some(&Token::Unsigned(n)) => Ok(Either::Right(n)),
450                Some(&Token::Signed(n))   => Ok(Either::Left(n)),
451                _ => Err(ParseErr::new("expected immediate value", span.clone()))
452            }
453        }
454        
455        fn convert(imed: Self::Intermediate, span: Span) -> Result<Self, ParseErr> {
456            let off_val = match imed {
457                Either::Left(n)  => n,
458                Either::Right(n) => {
459                    <_>::try_from(n).map_err(|_| ParseErr::wrap(LexErr::DoesNotFitI16, span.clone()))?
460                },
461            };
462            
463            Self::new(off_val)
464                .map_err(|s| ParseErr::wrap(s, span))
465        }
466    }
467
468    impl<const N: u32> TokenParse for Offset<u16, N> {
469        type Intermediate = Either<u16, i16>;
470
471        fn match_(m_token: Option<&Token>, span: Span) -> Result<Self::Intermediate, ParseErr> {
472            match m_token {
473                Some(&Token::Unsigned(n)) => Ok(Either::Left(n)),
474                Some(&Token::Signed(n))   => Ok(Either::Right(n)),
475                _ => Err(ParseErr::new("expected immediate value", span.clone()))
476            }
477        }
478        
479        fn convert(imed: Self::Intermediate, span: Span) -> Result<Self, ParseErr> {
480            let off_val = match imed {
481                Either::Left(n)  => n,
482                Either::Right(n) => {
483                    <_>::try_from(n).map_err(|_| ParseErr::wrap(LexErr::DoesNotFitU16, span.clone()))?
484                },
485            };
486            
487            Self::new(off_val)
488                .map_err(|s| ParseErr::wrap(s, span))
489        }
490    }
491    impl DirectTokenParse for Label {
492        fn match_(m_token: Option<&Token>, span: Span) -> Result<Self, ParseErr> {
493            match m_token {
494                Some(Token::Ident(Ident::Label(s))) => Ok(Label::new(s.to_string(), span)),
495                _ => Err(ParseErr::new("expected label", span))
496            }
497        }
498    }
499}
500
501impl Parse for AsmInstr {
502    fn parse(parser: &mut Parser) -> Result<Self, ParseErr> {
503        let opcode = parser.advance_if(|mt, span| match mt {
504            Some(Token::Ident(id)) if !matches!(id, Ident::Label(_)) => Ok(id.clone()),
505            _ => Err(ParseErr::new("expected instruction", span))
506        })?;
507
508        match opcode {
509            Ident::ADD => {
510                let dr = parser.parse()?;
511                parser.parse::<Comma>()?;
512                let sr1 = parser.parse()?;
513                parser.parse::<Comma>()?;
514                let sr2 = parser.parse()?;
515
516                Ok(Self::ADD(dr, sr1, sr2))
517            },
518            Ident::AND => {
519                let dr = parser.parse()?;
520                parser.parse::<Comma>()?;
521                let sr1 = parser.parse()?;
522                parser.parse::<Comma>()?;
523                let sr2 = parser.parse()?;
524
525                Ok(Self::AND(dr, sr1, sr2))
526            },
527            Ident::BR => Ok(Self::BR(0b111, parser.parse()?)),
528            Ident::BRP => Ok(Self::BR(0b001, parser.parse()?)),
529            Ident::BRZ => Ok(Self::BR(0b010, parser.parse()?)),
530            Ident::BRZP => Ok(Self::BR(0b011, parser.parse()?)),
531            Ident::BRN => Ok(Self::BR(0b100, parser.parse()?)),
532            Ident::BRNP => Ok(Self::BR(0b101, parser.parse()?)),
533            Ident::BRNZ => Ok(Self::BR(0b110, parser.parse()?)),
534            Ident::BRNZP => Ok(Self::BR(0b111, parser.parse()?)),
535            Ident::JMP => Ok(Self::JMP(parser.parse()?)),
536            Ident::JSR => Ok(Self::JSR(parser.parse()?)),
537            Ident::JSRR => Ok(Self::JSRR(parser.parse()?)),
538            Ident::LD => {
539                let dr = parser.parse()?;
540                parser.parse::<Comma>()?;
541                let off = parser.parse()?;
542
543                Ok(Self::LD(dr, off))
544            },
545            Ident::LDI => {
546                let dr = parser.parse()?;
547                parser.parse::<Comma>()?;
548                let off = parser.parse()?;
549
550                Ok(Self::LDI(dr, off))
551            },
552            Ident::LDR => {
553                let dr = parser.parse()?;
554                parser.parse::<Comma>()?;
555                let br = parser.parse()?;
556                parser.parse::<Comma>()?;
557                let off = parser.parse()?;
558
559                Ok(Self::LDR(dr, br, off))
560            },
561            Ident::LEA => {
562                let dr = parser.parse()?;
563                parser.parse::<Comma>()?;
564                let off = parser.parse()?;
565
566                Ok(Self::LEA(dr, off))
567            },
568            Ident::NOT => {
569                let dr = parser.parse()?;
570                parser.parse::<Comma>()?;
571                let sr = parser.parse()?;
572
573                Ok(Self::NOT(dr, sr))
574            },
575            Ident::RET => Ok(Self::RET),
576            Ident::RTI => Ok(Self::RTI),
577            Ident::ST => {
578                let sr = parser.parse()?;
579                parser.parse::<Comma>()?;
580                let off = parser.parse()?;
581
582                Ok(Self::ST(sr, off))
583            },
584            Ident::STI => {
585                let sr = parser.parse()?;
586                parser.parse::<Comma>()?;
587                let off = parser.parse()?;
588
589                Ok(Self::STI(sr, off))
590            },
591            Ident::STR => {
592                let dr = parser.parse()?;
593                parser.parse::<Comma>()?;
594                let br = parser.parse()?;
595                parser.parse::<Comma>()?;
596                let off = parser.parse()?;
597
598                Ok(Self::STR(dr, br, off))
599            },
600            Ident::TRAP => Ok(Self::TRAP(parser.parse()?)),
601            Ident::NOP => {
602                // NOP can optionally accept a parameter.
603                let off = match parser.peek() {
604                    Some((Token::Signed(_) | Token::Unsigned(_) | Token::Ident(Ident::Label(_)), _)) => parser.parse()?,
605                    _ => PCOffset::Offset(Offset::new_trunc(0)),
606                };
607
608                Ok(Self::NOP(off))
609            },
610            Ident::GETC => Ok(Self::GETC),
611            Ident::OUT => Ok(Self::OUT),
612            Ident::PUTC => Ok(Self::PUTC),
613            Ident::PUTS => Ok(Self::PUTS),
614            Ident::IN => Ok(Self::IN),
615            Ident::PUTSP => Ok(Self::PUTSP),
616            Ident::HALT => Ok(Self::HALT),
617            Ident::Label(_) => Err(ParseErr::new("expected instruction", parser.cursor())) // should be unreachable
618        }
619    }
620}
621
622impl Parse for Directive {
623    fn parse(parser: &mut Parser) -> Result<Self, ParseErr> {
624        use Either::*;
625
626        let cursor = parser.cursor();
627        let directive = parser.advance_if(|mt, span| match mt {
628            Some(Token::Directive(id)) => Ok(id.to_string()),
629            _ => Err(ParseErr::new("expected directive", span))
630        })?;
631
632        match &*directive.to_uppercase() {
633            "ORIG" => Ok(Self::Orig(parser.parse()?)),
634            "FILL" => {
635                // .fill is weird.
636                //
637                // Unlike other numeric operands, it can accept both unsigned and signed literals,
638                // so it cannot be parsed with PCOffset's parser and has to be handled differently.
639                let span = parser.cursor();
640                let operand = match parser.match_()? {
641                    Some(Left(label))            => Ok(PCOffset::Label(label)),
642                    Some(Right(IntLiteral(off))) => Ok(PCOffset::Offset(Offset::new_trunc(off))),
643                    _ => Err(ParseErr::new("expected numeric or label", span))
644                }?;
645
646                Ok(Self::Fill(operand))
647            }
648            "BLKW" => {
649                let span = parser.cursor();
650                let block_size: Offset<_, 16> = parser.parse()?;
651                match block_size.get() != 0 {
652                    true  => Ok(Self::Blkw(block_size)),
653                    false => Err(ParseErr::new("block size must be greater than 0", span))
654                }
655            }
656            "STRINGZ" => {
657                let StrLiteral(s) = parser.parse()?;
658                Ok(Self::Stringz(s))
659            }
660            "END" => Ok(Self::End),
661            "EXTERNAL" => Ok(Self::External(parser.parse()?)),
662            _ => Err({
663                ParseErr::new("invalid directive", cursor)
664                    .with_help("the valid directives are .orig, .fill, .blkw, .stringz, .end, .external")
665            })
666        }
667    }
668}
669
670impl Parse for StmtKind {
671    fn parse(parser: &mut Parser) -> Result<Self, ParseErr> {
672        // This parser exists for consistency, but is not actually used.
673        // See it used in the implementation of nucleus in Stmt.
674        match parser.peek() {
675            Some((Token::Directive(_), _)) => Ok(StmtKind::Directive(parser.parse()?)),
676            Some((Token::Ident(id), _)) if !matches!(id, Ident::Label(_)) => Ok(StmtKind::Instr(parser.parse()?)),
677            _ => Err(ParseErr::new("expected instruction or directive", parser.cursor()))
678        }
679    }
680}
681impl Parse for Stmt {
682    fn parse(parser: &mut Parser) -> Result<Self, ParseErr> {
683        let mut labels = vec![];
684
685        // gets the span of the last token
686        // useful for better error messages
687        let mut last_label_span = None;
688
689        // Scan through labels and new lines until we find an instruction
690        while !parser.is_empty() {
691            let span = parser.cursor();
692            match parser.match_()? {
693                Some(Either::Left(label)) => {
694                    parser.match_::<Colon>()?; // skip colon if it exists
695
696                    last_label_span.replace(span.clone());
697                    labels.push(label);
698                }
699                Some(Either::Right(End)) => {},
700                _ => break
701            }
702        }
703        
704        let (nucleus, span) = parser.spanned(|parser| {
705            match parser.peek() {
706                Some((Token::Directive(_), _)) => Ok(StmtKind::Directive(parser.parse()?)),
707                Some((Token::Ident(id), _)) if !matches!(id, Ident::Label(_)) => Ok(StmtKind::Instr(parser.parse()?)),
708                _ => {
709                    // Parser didn't find a directive or instruction following a label.
710                    // Chances are the label was just a misspelled instruction.
711                    Err(ParseErr::new("expected instruction or directive", last_label_span.unwrap_or(parser.cursor())))
712                }
713            }
714        })?;
715
716        // assert end of line at end of statement
717        parser.parse::<End>()?;
718        // consume any extra NLs
719        while !parser.is_empty() && parser.match_::<End>()?.is_some() {}
720
721        Ok(Self { labels, nucleus, span })
722    }
723}
724
725#[cfg(test)]
726mod test {
727    use crate::ast::asm::Stmt;
728    use crate::ast::{Label, Offset, Reg};
729
730    use super::{Comma, End, Parse, ParseErr, Parser};
731
732    #[test]
733    fn test_parser_basic() -> Result<(), ParseErr> {
734        let mut parser = Parser::new("XOR R0, R0, #0")?;
735        parser.parse::<Label>()?;
736        parser.parse::<Reg>()?;
737        parser.parse::<Comma>()?;
738        parser.parse::<Reg>()?;
739        parser.parse::<Comma>()?;
740        parser.parse::<Offset<i16, 6>>()?;
741        parser.parse::<End>()?;
742        Ok(())
743    }
744
745    #[test]
746    fn test_parser_end() -> Result<(), ParseErr> {
747        let mut parser = Parser::new("\
748        FOO
749        BAR
750        BAZ
751
752
753        ")?;
754
755        parser.parse::<Label>()?;
756        parser.parse::<End>()?;
757        parser.parse::<Label>()?;
758        parser.parse::<End>()?;
759        parser.parse::<Label>()?;
760        assert!(parser.is_empty(), "expected parser to be empty");
761        parser.parse::<End>()?;
762        parser.parse::<End>()?;
763        parser.parse::<End>()?;
764        parser.parse::<End>()?;
765        parser.parse::<End>()?;
766        parser.parse::<End>()?;
767        parser.parse::<End>()?;
768        Ok(())
769    }
770
771    fn parse_str<P: Parse>(s: &str) -> Result<P, ParseErr> {
772        let mut parser = Parser::new(s)?;
773        let result = parser.parse()?;
774        assert!(parser.is_empty(), "expected parser to be complete after parsing");
775        Ok(result)
776    }
777    fn assert_parse<P: Parse>(s: &str) {
778        if let Err(e) = parse_str::<P>(s) {
779            panic!("failed to parse {s:?}: {e:?}");
780        }
781    }
782    fn assert_parse_fail<P: Parse + std::fmt::Debug>(s: &str) {
783        if let Ok(ast) = parse_str::<P>(s) {
784            panic!("unexpectedly succeeded to parse {s:?}: {ast:?}");
785        }
786    }
787
788    #[test]
789    fn test_instrs_directives_basic() {
790        // ADD
791        assert_parse::<Stmt>("ADD R0, R1, R2");
792        assert_parse::<Stmt>("ADD R0, R1, #5");
793        // AND
794        assert_parse::<Stmt>("AND R0, R1, R2");
795        assert_parse::<Stmt>("AND R0, R1, #5");
796        // BR*
797        assert_parse::<Stmt>("BR #9");
798        assert_parse::<Stmt>("BRn #9");
799        assert_parse::<Stmt>("BRz #9");
800        assert_parse::<Stmt>("BRnz #9");
801        assert_parse::<Stmt>("BRp #9");
802        assert_parse::<Stmt>("BRnp #9");
803        assert_parse::<Stmt>("BRzp #9");
804        assert_parse::<Stmt>("BRnzp #9");
805        assert_parse::<Stmt>("BR LABEL");
806        assert_parse::<Stmt>("BRn LABEL");
807        assert_parse::<Stmt>("BRz LABEL");
808        assert_parse::<Stmt>("BRnz LABEL");
809        assert_parse::<Stmt>("BRp LABEL");
810        assert_parse::<Stmt>("BRnp LABEL");
811        assert_parse::<Stmt>("BRzp LABEL");
812        assert_parse::<Stmt>("BRnzp LABEL");
813        // JMP
814        assert_parse::<Stmt>("JMP R0");
815        // JSR
816        assert_parse::<Stmt>("JSR #11");
817        assert_parse::<Stmt>("JSR LABEL");
818        // JSRR
819        assert_parse::<Stmt>("JSRR R0");
820        // LD
821        assert_parse::<Stmt>("LD R0, #9");
822        assert_parse::<Stmt>("LD R1, LABEL");
823        // LDI
824        assert_parse::<Stmt>("LDI R2, #9");
825        assert_parse::<Stmt>("LDI R3, LABEL");
826        // LDR
827        assert_parse::<Stmt>("LDR R4, R5, #6");
828        // LEA
829        assert_parse::<Stmt>("LEA R6, #9");
830        assert_parse::<Stmt>("LEA R7, LABEL");
831        // NOT
832        assert_parse::<Stmt>("NOT R0, R1");
833        // RET
834        assert_parse::<Stmt>("RET");
835        // RTI
836        assert_parse::<Stmt>("RTI");
837        // ST
838        assert_parse::<Stmt>("ST R0, #9");
839        assert_parse::<Stmt>("ST R1, LABEL");
840        // STI
841        assert_parse::<Stmt>("STI R2, #9");
842        assert_parse::<Stmt>("STI R3, LABEL");
843        // STR
844        assert_parse::<Stmt>("STR R4, R5, #6");
845        // TRAP
846        assert_parse::<Stmt>("TRAP x26");
847        // NOP
848        assert_parse::<Stmt>("NOP");
849        assert_parse::<Stmt>("NOP LABEL");
850        assert_parse::<Stmt>("NOP #9");
851        // TRAP aliases
852        assert_parse::<Stmt>("GETC");
853        assert_parse::<Stmt>("OUT");
854        assert_parse::<Stmt>("PUTC");
855        assert_parse::<Stmt>("PUTS");
856        assert_parse::<Stmt>("IN");
857        assert_parse::<Stmt>("PUTSP");
858        assert_parse::<Stmt>("HALT");
859        // .orig
860        assert_parse::<Stmt>(".orig x3000");
861        // .fill
862        assert_parse::<Stmt>(".fill 64");
863        assert_parse::<Stmt>(".fill LABEL");
864        // .blkw
865        assert_parse::<Stmt>(".blkw 64");
866        // .stringz
867        assert_parse::<Stmt>(r#".stringz "Hello!""#);
868        // .end
869        assert_parse::<Stmt>(".end");
870        // .external
871        assert_parse::<Stmt>(".external LABEL");
872    }
873
874    #[test]
875    fn test_labeled_stmts() {
876        assert_parse::<Stmt>("LABEL1 LABEL2 LABEL3 NOT R0, R0");
877        assert_parse::<Stmt>("LABEL1 LABEL2 LABEL3 .fill 0");
878        assert_parse::<Stmt>("
879            LABEL1:
880            LABEL2:
881            LABEL3:
882                NOT R0, R0
883        ");
884        assert_parse::<Stmt>("
885            LABEL1:
886            LABEL2:
887            LABEL3:
888                .fill 0
889        ");
890    }
891
892    #[test]
893    fn test_wrong_punct() {
894        assert_parse::<Stmt>("LDR R4, R5, 0");
895        assert_parse_fail::<Stmt>("LDR R4 R5 0");
896        assert_parse_fail::<Stmt>("LDR R4: R5: 0");
897
898        assert_parse_fail::<Stmt>("A, LDR R4, R5, 0");
899    }
900
901    #[test]
902    fn test_instrs_directives_wrong_type() {
903        // Types:
904        // Numeric (signed/unsigned), label, register
905        // ADD
906        assert_parse_fail::<Stmt>("ADD A, B, C");
907        assert_parse_fail::<Stmt>("ADD R0, B, C");
908        assert_parse_fail::<Stmt>("ADD R0, R1, C");
909        // BR*
910        assert_parse_fail::<Stmt>("BR R0");
911        // JMP
912        assert_parse_fail::<Stmt>("JMP #1");
913        assert_parse_fail::<Stmt>("JMP FOO");
914        // JSR/JSRR
915        assert_parse_fail::<Stmt>("JSRR #11");
916        assert_parse_fail::<Stmt>("JSRR LABEL");
917        assert_parse_fail::<Stmt>("JSR R0");
918        // LD
919        assert_parse_fail::<Stmt>("LD R0, R1");
920        assert_parse_fail::<Stmt>("LDR FOO, BAR, BAZ");
921        assert_parse_fail::<Stmt>("LDR R4, BAR, BAZ");
922        assert_parse_fail::<Stmt>("LDR R4, R5, BAZ");
923        // NOT
924        assert_parse_fail::<Stmt>("NOT A0, B1");
925        assert_parse_fail::<Stmt>("NOT R0, B1");
926        // TRAP
927        assert_parse_fail::<Stmt>("TRAP -1");
928        assert_parse_fail::<Stmt>("TRAP FOO");
929        // NOP
930        assert_parse_fail::<Stmt>("NOP R0");
931        // .orig
932        assert_parse_fail::<Stmt>(".orig FOO");
933        assert_parse_fail::<Stmt>(".orig R0");
934        assert_parse_fail::<Stmt>(".orig -1");
935        // .fill
936        assert_parse_fail::<Stmt>(".fill R0");
937        // .blkw
938        assert_parse_fail::<Stmt>(".blkw FOO");
939        assert_parse_fail::<Stmt>(".blkw R0");
940        // .stringz
941        assert_parse_fail::<Stmt>(r".stringz FOO");
942        assert_parse_fail::<Stmt>(r".stringz R0");
943        assert_parse_fail::<Stmt>(r".stringz 0");
944        // .external
945        assert_parse_fail::<Stmt>(".external R0");
946        assert_parse_fail::<Stmt>(".external 0");
947    }
948
949    #[test]
950    fn test_instrs_directives_not_real() {
951        assert_parse_fail::<Stmt>("MULT");
952        assert_parse_fail::<Stmt>("XOR R0, R0, #0");
953        assert_parse_fail::<Stmt>(".not_a_directive");
954    }
955
956    #[test]
957    fn test_instrs_directives_limits() {
958        // imm5
959        assert_parse::<Stmt>("ADD R0, R1, #0");
960        assert_parse::<Stmt>("ADD R2, R3, #15");
961        assert_parse::<Stmt>("ADD R6, R7, #-16");
962        assert_parse_fail::<Stmt>("ADD R4, R5, #16");
963        assert_parse_fail::<Stmt>("ADD R0, R1, #-17");
964        
965        // offset6
966        assert_parse::<Stmt>("LDR R0, R1, #0");
967        assert_parse::<Stmt>("LDR R2, R3, #31");
968        assert_parse::<Stmt>("LDR R6, R7, #-32");
969        assert_parse_fail::<Stmt>("LDR R4, R5, #32");
970        assert_parse_fail::<Stmt>("LDR R0, R1, #-33");
971
972        // PCoffset9
973        assert_parse::<Stmt>("BR #0");
974        assert_parse::<Stmt>("BRp #255");
975        assert_parse::<Stmt>("BRzp #-256");
976        assert_parse_fail::<Stmt>("BRz #256");
977        assert_parse_fail::<Stmt>("BRn #-257");
978
979        // PCoffset11
980        assert_parse::<Stmt>("JSR #0");
981        assert_parse::<Stmt>("JSR #1023");
982        assert_parse::<Stmt>("JSR #-1024");
983        assert_parse_fail::<Stmt>("JSR #1024");
984        assert_parse_fail::<Stmt>("JSR #-1025");
985
986        // TrapVect8
987        assert_parse::<Stmt>("TRAP #0");
988        assert_parse::<Stmt>("TRAP #255");
989        assert_parse_fail::<Stmt>("TRAP #256");
990        assert_parse_fail::<Stmt>("TRAP #-1");
991
992        // unsigned 16-bit
993        assert_parse::<Stmt>(".orig #0");
994        assert_parse::<Stmt>(".orig #65535");
995        assert_parse_fail::<Stmt>(".orig #65536");
996        assert_parse_fail::<Stmt>(".orig #-1");
997
998        // .fill
999        // dual unsigned/signed 16-bit
1000        assert_parse::<Stmt>(".fill #0");
1001        assert_parse::<Stmt>(".fill #-1");
1002        assert_parse::<Stmt>(".fill #65535");
1003        assert_parse::<Stmt>(".fill #-32768");
1004        assert_parse_fail::<Stmt>(".fill #65536");
1005        assert_parse_fail::<Stmt>(".orig #-32769");
1006
1007        // .blkw
1008        // non-zero unsigned 16-bit
1009        assert_parse::<Stmt>(".blkw #1");
1010        assert_parse::<Stmt>(".blkw #65535");
1011        assert_parse_fail::<Stmt>(".blkw #0");
1012        assert_parse_fail::<Stmt>(".blkw #-1");
1013        assert_parse_fail::<Stmt>(".blkw #65536");
1014    }
1015}