rano/ass/
parse.rs

1use super::*;
2use lex::{Terminal, Token as LToken, TokenVal as LTokenVal, Tokens as LTokens};
3
4use log::info;
5
6const HEUR: usize = 4;
7
8#[derive(Debug)]
9pub enum ParseErrorType {
10    /// Expected a comma after a nonterminal token (non instruction)
11    /// Often due to mistyping an instruction or directive
12    ExpectedComma,
13
14    /// Multiple labels assigned to the same memory location
15    MultiLabel(Span, Span),
16
17    /// Tried to refer to a label with a reserved name,
18    /// often a mistype due to the perpensity for forward declaration
19    ReservedReference(&'static str, Span),
20
21    /// Found a second argument to a memory instruction other than 'I',
22    /// which is illiegal
23    ExpectedIndirection(&'static str, Span),
24
25    /// Found indirection flag without a memory instruction, see reserved reference
26    BareIndirection,
27
28    /// Found a comma somewhere without a label in front of it, also often a mistype
29    BareComma,
30
31    /// Found nonterminal after register or IO instruction when none was expected
32    NoArgumentsExpected(&'static str, Span),
33
34    DirectiveLiteralMissing,
35
36    /// Could not parse a nonterminal as a hex value using [`from_str_radix`] after HEX or ORG
37    /// directive
38    LiteralHexValueFormat(Span),
39
40    /// see [`Self::LiteralHexValueFormat`]
41    LiteralDecValueFormat(Span),
42
43    /// Found non comment or newline when a newline was expected at the end of an instruction
44    UnexpectedToken(Span),
45}
46
47#[derive(Debug)]
48pub struct ParseError<'a> {
49    pub ctx: ParseContext<'a>,
50    pub span: Span,
51    pub ty: ParseErrorType,
52}
53
54impl<'t> HeadlineError for ParseError<'t> {
55    fn headline(&self) -> String {
56        match self.ty {
57            ParseErrorType::ExpectedComma => format!("Expected a comma after non-keyword"),
58            ParseErrorType::MultiLabel(_, _) => format!("May not place multiple labels on the same memory location!"),
59            ParseErrorType::ReservedReference(r, _) => format!("Instruction {} takes a label reference, not an instruction.", r),
60            ParseErrorType::ExpectedIndirection(i, _) => format!("Instruction {} takes only a label and an optional Indirection flag 'I'", i),
61            ParseErrorType::BareIndirection => format!("Indirection flag 'I' is a reserverd keyword and may only be used on memory operations"),
62            ParseErrorType::BareComma => format!("The comma ',' is a reserved token and may only be used directly after a label"),
63            ParseErrorType::NoArgumentsExpected(i, _) => format!("Instruction {} does not take any arguments", i),
64            ParseErrorType::DirectiveLiteralMissing => format!("Expected a literal value after directive"),
65            ParseErrorType::LiteralHexValueFormat(_) => format!("Could not parse value as Hexidecimal value literal"),
66            ParseErrorType::LiteralDecValueFormat(_) => format!("Could not parse value as Decimal value literal"),
67            ParseErrorType::UnexpectedToken(_) => format!("Instructions and directives must be terminated with a newline"),
68        }
69    }
70
71    fn body(&self) -> String {
72        match self.ty {
73            ParseErrorType::ExpectedComma => self.span.into_set().red_ctx(&self.ctx, 2),
74            ParseErrorType::MultiLabel(a, b) => a.into_set().insert(b).red_ctx(&self.ctx, 2),
75            ParseErrorType::ReservedReference(_, s) => s.into_set().red_ctx(&self.ctx, 2),
76            ParseErrorType::ExpectedIndirection(_, s) => s.into_set().red_ctx(&self.ctx, 2),
77            ParseErrorType::BareIndirection => self.span.into_set().red_ctx(&self.ctx, 2),
78            ParseErrorType::BareComma => self.span.into_set().red_ctx(&self.ctx, 2),
79            ParseErrorType::NoArgumentsExpected(_, s) => s.into_set().red_ctx(&self.ctx, 2),
80            ParseErrorType::DirectiveLiteralMissing => self.span.into_set().red_ctx(&self.ctx, 2),
81            ParseErrorType::LiteralHexValueFormat(s) => s.into_set().red_ctx(&self.ctx, 2),
82            ParseErrorType::LiteralDecValueFormat(s) => s.into_set().red_ctx(&self.ctx, 2),
83            ParseErrorType::UnexpectedToken(s) => s.into_set().red_ctx(&self.ctx, 2),
84        }
85    }
86}
87
88#[derive(Debug)]
89pub enum ReferenceToken {
90    LabelDef(Span, Box<ReferenceInstruction>),
91
92    And(Span, bool),
93    Add(Span, bool),
94    Lda(Span, bool),
95    Sta(Span, bool),
96    Bun(Span, bool),
97    Bsa(Span, bool),
98    Isz(Span, bool),
99
100    Cla(),
101    Cle(),
102    Cma(),
103    Cme(),
104    Cir(),
105    Cil(),
106    Inc(),
107    Spa(),
108    Sna(),
109    Sze(),
110    Hlt(),
111
112    // IO ops
113    Inp(),
114    Out(),
115    Ski(),
116    Sko(),
117    Ion(),
118    Iof(),
119
120    Org(u16),
121    Hex(u16),
122    Dec(i16),
123}
124
125#[derive(Debug)]
126pub struct ReferenceInstruction {
127    pub span: Span,
128    pub instr: ReferenceToken,
129}
130
131impl ReferenceInstruction {
132    fn new_mem<C: Fn(Span, bool) -> ReferenceToken>(
133        ins: Span,
134        reference: Span,
135        ind: bool,
136        c: C,
137    ) -> ReferenceInstruction {
138        ReferenceInstruction {
139            span: ins,
140            instr: c(reference, ind),
141        }
142    }
143
144    fn new_reg_io<C: Fn() -> ReferenceToken>(ins: Span, c: C) -> ReferenceInstruction {
145        ReferenceInstruction {
146            span: ins,
147            instr: c(),
148        }
149    }
150
151    fn label(self, lab: Option<Span>) -> Self {
152        match lab {
153            Some(lab) => ReferenceInstruction {
154                span: lab.join(self.span),
155                instr: ReferenceToken::LabelDef(lab, Box::new(self)),
156            },
157            None => self,
158        }
159    }
160}
161
162#[derive(Debug)]
163pub struct TokenTree<'a> {
164    pub ctx: ParseContext<'a>,
165    pub tokens: Box<[ReferenceInstruction]>,
166}
167
168fn mem_op<'l, 'c, 'a, C: Fn(Span, bool) -> ReferenceToken>(
169    left: &'l [LToken],
170    ctx: &'c ParseContext<'a>,
171    ty: C,
172    ty_name: &'static str,
173) -> Result<'a, (&'l [LToken], ReferenceInstruction)> {
174    if left[1].tval != LTokenVal::NonTerminal {
175        return Err(ParseError {
176            ctx: ctx.clone(),
177            span: left[0].span.join(left[2].span),
178            ty: ParseErrorType::ReservedReference(ty_name, left[2].span),
179        })
180        .map_err(ParseError::into);
181    }
182
183    let indirect = match left[2].tval {
184        LTokenVal::Terminal(Terminal::Indirection) => true,
185        LTokenVal::Terminal(Terminal::CommentStart) => false,
186        LTokenVal::Terminal(Terminal::Newline) => false,
187        _ => {
188            return Err(ParseError {
189                ctx: ctx.clone(),
190                span: left[0].span.join(left[2].span),
191                ty: ParseErrorType::ExpectedIndirection(ty_name, left[2].span),
192            })
193            .map_err(ParseError::into);
194        }
195    };
196
197    if indirect {
198        let instr =
199            ReferenceInstruction::new_mem(left[0].span.join(left[2].span), left[1].span, true, ty);
200
201        Ok((eat_nl_com(&left[3..], ctx, instr.span)?, instr))
202    } else {
203        let instr =
204            ReferenceInstruction::new_mem(left[0].span.join(left[1].span), left[1].span, false, ty);
205
206        Ok((eat_nl_com(&left[2..], ctx, instr.span)?, instr))
207    }
208}
209
210fn reg_op<'l, 'c, 'a, C: Fn() -> ReferenceToken>(
211    left: &'l [LToken],
212    ctx: &'c ParseContext<'a>,
213    ty: C,
214    ty_name: &'static str,
215) -> Result<'a, (&'l [LToken], ReferenceInstruction)> {
216    if left.len() > 1 {
217        match left[1].tval {
218            LTokenVal::Terminal(Terminal::CommentStart)
219            | LTokenVal::Terminal(Terminal::Newline) => {
220                let instr = ReferenceInstruction::new_reg_io(left[0].span, ty);
221                Ok((eat_nl_com(&left[1..], ctx, instr.span)?, instr))
222            }
223            _ => Err(ParseError {
224                ctx: ctx.clone(),
225                span: left[0].span.join(left[1].span),
226                ty: ParseErrorType::NoArgumentsExpected(ty_name, left[1].span),
227            })
228            .map_err(ParseError::into),
229        }
230    } else {
231        let instr = ReferenceInstruction::new_reg_io(left[0].span, ty);
232        Ok((&left[0..0], instr))
233    }
234}
235
236/// Eat comment and newline chars
237fn eat_nl_com<'l, 'c, 'a>(
238    mut left: &'l [LToken],
239    ctx: &'c ParseContext<'a>,
240    last_s: Span,
241) -> Result<'a, &'l [LToken]> {
242    let mut ate = 0;
243
244    loop {
245        // If we are eating the last line
246        if left.len() == 0 {
247            return Ok(left);
248        }
249
250        match left[0].tval {
251            LTokenVal::Terminal(Terminal::Newline) => {
252                ate += 1;
253                left = &left[1..];
254            }
255            LTokenVal::Terminal(Terminal::CommentStart) | LTokenVal::Ignored => {
256                left = &left[1..];
257            }
258            _ => break,
259        }
260    }
261
262    if ate != 0 {
263        Ok(left)
264    } else {
265        if let LTokenVal::Terminal(_) = left[0].tval {
266            Err(ParseError {
267                ctx: ctx.clone(),
268                span: last_s.join(left[0].span),
269                ty: ParseErrorType::UnexpectedToken(left[0].span),
270            })
271            .map_err(AssembleError::from)
272        } else {
273            Err(ParseError {
274                ctx: ctx.clone(),
275                span: last_s.join(left[0].span),
276                ty: ParseErrorType::NoArgumentsExpected("", left[0].span),
277            })
278            .map_err(AssembleError::from)
279        }
280    }
281}
282
283pub fn parse(
284    LTokens {
285        ctx,
286        tokens: ltokens,
287    }: LTokens,
288) -> Result<TokenTree> {
289    info!("Beginning parse step with heur of {}", HEUR);
290    let len = ltokens.len();
291
292    let mut tokens = Vec::with_capacity(len / HEUR);
293
294    // Eat blank lines or comment lines at beginning of file
295    let mut left = eat_nl_com(&*ltokens, &ctx, ltokens[0].span).unwrap_or_else(|_| &*ltokens);
296
297    let mut lab: Option<Span> = None;
298
299    while left.len() > 0 {
300        match left[0].tval {
301            LTokenVal::NonTerminal => {
302                if left[1].tval == LTokenVal::Terminal(Terminal::Comma) {
303                    if let Some(sp) = lab {
304                        Err(ParseError {
305                            ctx: ctx.clone(),
306                            span: sp.join(left[0].span),
307                            ty: ParseErrorType::MultiLabel(sp, left[0].span),
308                        })?;
309                    } else {
310                        lab = Some(left[0].span);
311                        left = &left[2..];
312                    }
313                } else {
314                    Err(ParseError {
315                        ctx: ctx.clone(),
316                        span: left[0].span.join(left[1].span),
317                        ty: ParseErrorType::ExpectedComma,
318                    })?;
319                }
320            }
321
322            LTokenVal::Ignored => {
323                unreachable!(
324                    "Comment values should only ever be handled in the CommentStart terminal case"
325                );
326            }
327
328            LTokenVal::Terminal(t) => match t {
329                Terminal::And => {
330                    let (nl, instr) = mem_op(left, &ctx, ReferenceToken::And, "AND")?;
331                    left = nl;
332                    tokens.push(instr.label(lab.take()));
333                }
334
335                Terminal::Add => {
336                    let (nl, instr) = mem_op(left, &ctx, ReferenceToken::Add, "ADD")?;
337                    left = nl;
338                    tokens.push(instr.label(lab.take()));
339                }
340
341                Terminal::Lda => {
342                    let (nl, instr) = mem_op(left, &ctx, ReferenceToken::Lda, "LDA")?;
343                    left = nl;
344                    tokens.push(instr.label(lab.take()));
345                }
346
347                Terminal::Sta => {
348                    let (nl, instr) = mem_op(left, &ctx, ReferenceToken::Sta, "STA")?;
349                    left = nl;
350                    tokens.push(instr.label(lab.take()));
351                }
352
353                Terminal::Bun => {
354                    let (nl, instr) = mem_op(left, &ctx, ReferenceToken::Bun, "BUN")?;
355                    left = nl;
356                    tokens.push(instr.label(lab.take()));
357                }
358
359                Terminal::Bsa => {
360                    let (nl, instr) = mem_op(left, &ctx, ReferenceToken::Bsa, "BSA")?;
361                    left = nl;
362                    tokens.push(instr.label(lab.take()));
363                }
364
365                Terminal::Isz => {
366                    let (nl, instr) = mem_op(left, &ctx, ReferenceToken::Isz, "ISZ")?;
367                    left = nl;
368                    tokens.push(instr.label(lab.take()));
369                }
370
371                // Register Ops
372                Terminal::Cla => {
373                    let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Cla, "CLA")?;
374                    left = nl;
375                    tokens.push(instr.label(lab.take()));
376                }
377
378                Terminal::Cle => {
379                    let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Cle, "CLE")?;
380                    left = nl;
381                    tokens.push(instr.label(lab.take()));
382                }
383
384                Terminal::Cma => {
385                    let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Cma, "CMA")?;
386                    left = nl;
387                    tokens.push(instr.label(lab.take()));
388                }
389
390                Terminal::Cme => {
391                    let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Cme, "CME")?;
392                    left = nl;
393                    tokens.push(instr.label(lab.take()));
394                }
395
396                Terminal::Cir => {
397                    let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Cir, "CIR")?;
398                    left = nl;
399                    tokens.push(instr.label(lab.take()));
400                }
401
402                Terminal::Cil => {
403                    let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Cil, "CIL")?;
404                    left = nl;
405                    tokens.push(instr.label(lab.take()));
406                }
407
408                Terminal::Inc => {
409                    let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Inc, "INC")?;
410                    left = nl;
411                    tokens.push(instr.label(lab.take()));
412                }
413
414                Terminal::Spa => {
415                    let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Spa, "SPA")?;
416                    left = nl;
417                    tokens.push(instr.label(lab.take()));
418                }
419
420                Terminal::Sna => {
421                    let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Sna, "SNA")?;
422                    left = nl;
423                    tokens.push(instr.label(lab.take()));
424                }
425
426                Terminal::Sze => {
427                    let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Sze, "SZE")?;
428                    left = nl;
429                    tokens.push(instr.label(lab.take()));
430                }
431
432                Terminal::Hlt => {
433                    let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Hlt, "HLT")?;
434                    left = nl;
435                    tokens.push(instr.label(lab.take()));
436                }
437
438                // IO Ops
439                Terminal::Inp => {
440                    let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Inp, "INP")?;
441                    left = nl;
442                    tokens.push(instr.label(lab.take()));
443                }
444
445                Terminal::Out => {
446                    let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Out, "OUT")?;
447                    left = nl;
448                    tokens.push(instr.label(lab.take()));
449                }
450
451                Terminal::Ski => {
452                    let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Ski, "SKI")?;
453                    left = nl;
454                    tokens.push(instr.label(lab.take()));
455                }
456
457                Terminal::Sko => {
458                    let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Sko, "SKO")?;
459                    left = nl;
460                    tokens.push(instr.label(lab.take()));
461                }
462
463                Terminal::Ion => {
464                    let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Ion, "ION")?;
465                    left = nl;
466                    tokens.push(instr.label(lab.take()));
467                }
468
469                Terminal::Iof => {
470                    let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Iof, "IOF")?;
471                    left = nl;
472                    tokens.push(instr.label(lab.take()));
473                }
474
475                // Directives
476                Terminal::Org => {
477                    if left[1].tval != LTokenVal::NonTerminal {
478                        return Err(ParseError {
479                            ctx,
480                            span: left[0].span,
481                            ty: ParseErrorType::DirectiveLiteralMissing,
482                        })
483                        .map_err(AssembleError::from);
484                    }
485
486                    if let Ok(v) = u16::from_str_radix(left[1].span.slice(&ctx.instr), 16) {
487                        let instr = ReferenceInstruction {
488                            span: left[0].span.join(left[1].span),
489                            instr: ReferenceToken::Org(v),
490                        };
491
492                        left = eat_nl_com(&left[2..], &ctx, left[0].span.join(left[1].span))?;
493                        tokens.push(instr);
494                    } else {
495                        return Err(ParseError {
496                            ctx,
497                            span: left[0].span.join(left[1].span),
498                            ty: ParseErrorType::LiteralHexValueFormat(left[1].span),
499                        })
500                        .map_err(AssembleError::from);
501                    }
502                }
503
504                Terminal::Hex => {
505                    if left[1].tval != LTokenVal::NonTerminal {
506                        return Err(ParseError {
507                            ctx,
508                            span: left[0].span,
509                            ty: ParseErrorType::DirectiveLiteralMissing,
510                        })
511                        .map_err(AssembleError::from);
512                    }
513
514                    if let Ok(v) = u16::from_str_radix(left[1].span.slice(&ctx.instr), 16) {
515                        let instr = ReferenceInstruction {
516                            span: left[0].span.join(left[1].span),
517                            instr: ReferenceToken::Hex(v),
518                        };
519
520                        left = eat_nl_com(&left[2..], &ctx, left[0].span.join(left[1].span))?;
521                        tokens.push(instr.label(lab.take()));
522                    } else {
523                        return Err(ParseError {
524                            ctx,
525                            span: left[0].span.join(left[1].span),
526                            ty: ParseErrorType::LiteralHexValueFormat(left[1].span),
527                        })
528                        .map_err(AssembleError::from);
529                    }
530                }
531
532                Terminal::Dec => {
533                    if left[1].tval != LTokenVal::NonTerminal {
534                        return Err(ParseError {
535                            ctx,
536                            span: left[0].span,
537                            ty: ParseErrorType::DirectiveLiteralMissing,
538                        })
539                        .map_err(AssembleError::from);
540                    }
541
542                    if let Ok(v) = i16::from_str_radix(left[1].span.slice(&ctx.instr), 10) {
543                        let instr = ReferenceInstruction {
544                            span: left[0].span.join(left[1].span),
545                            instr: ReferenceToken::Dec(v),
546                        };
547
548                        left = eat_nl_com(&left[2..], &ctx, left[0].span.join(left[1].span))?;
549                        tokens.push(instr.label(lab.take()));
550                    } else {
551                        return Err(ParseError {
552                            ctx,
553                            span: left[0].span.join(left[1].span),
554                            ty: ParseErrorType::LiteralDecValueFormat(left[1].span),
555                        })
556                        .map_err(AssembleError::from);
557                    }
558                }
559
560                // Much newlines yum yum yum
561                Terminal::Newline => {
562                    left = eat_nl_com(&left[..], &ctx, left[0].span)?;
563                }
564                Terminal::Comma => {
565                    return Err(ParseError {
566                        ctx,
567                        span: left[0].span,
568                        ty: ParseErrorType::BareComma,
569                    })
570                    .map_err(AssembleError::from)
571                }
572                Terminal::CommentStart => {
573                    left = eat_nl_com(&left[..], &ctx, left[0].span)?;
574                }
575                Terminal::Indirection => {
576                    return Err(ParseError {
577                        ctx,
578                        span: left[0].span,
579                        ty: ParseErrorType::BareIndirection,
580                    })
581                    .map_err(AssembleError::from)
582                }
583            },
584        }
585    }
586
587    info!(
588        "Best heur for this run would have been: {:.2}",
589        len as f32 / tokens.len() as f32
590    );
591
592    info!(
593        "Original Cap: {}, Cap: {}, Grew by: {}, wasted cap: {}",
594        len / HEUR,
595        tokens.capacity(),
596        tokens.capacity() - len / HEUR,
597        tokens.capacity() - tokens.len()
598    );
599
600    Ok(TokenTree {
601        ctx,
602        tokens: tokens.into_boxed_slice(),
603    })
604}