1use super::*;
2use lex::{Terminal, Token as LToken, TokenVal as LTokenVal, Tokens as LTokens};
3
4use log::info;
5
6const HEUR: usize = 4;
7
8#[derive(Debug)]
9pub enum ParseErrorType {
10 ExpectedComma,
13
14 MultiLabel(Span, Span),
16
17 ReservedReference(&'static str, Span),
20
21 ExpectedIndirection(&'static str, Span),
24
25 BareIndirection,
27
28 BareComma,
30
31 NoArgumentsExpected(&'static str, Span),
33
34 DirectiveLiteralMissing,
35
36 LiteralHexValueFormat(Span),
39
40 LiteralDecValueFormat(Span),
42
43 UnexpectedToken(Span),
45}
46
47#[derive(Debug)]
48pub struct ParseError<'a> {
49 pub ctx: ParseContext<'a>,
50 pub span: Span,
51 pub ty: ParseErrorType,
52}
53
54impl<'t> HeadlineError for ParseError<'t> {
55 fn headline(&self) -> String {
56 match self.ty {
57 ParseErrorType::ExpectedComma => format!("Expected a comma after non-keyword"),
58 ParseErrorType::MultiLabel(_, _) => format!("May not place multiple labels on the same memory location!"),
59 ParseErrorType::ReservedReference(r, _) => format!("Instruction {} takes a label reference, not an instruction.", r),
60 ParseErrorType::ExpectedIndirection(i, _) => format!("Instruction {} takes only a label and an optional Indirection flag 'I'", i),
61 ParseErrorType::BareIndirection => format!("Indirection flag 'I' is a reserverd keyword and may only be used on memory operations"),
62 ParseErrorType::BareComma => format!("The comma ',' is a reserved token and may only be used directly after a label"),
63 ParseErrorType::NoArgumentsExpected(i, _) => format!("Instruction {} does not take any arguments", i),
64 ParseErrorType::DirectiveLiteralMissing => format!("Expected a literal value after directive"),
65 ParseErrorType::LiteralHexValueFormat(_) => format!("Could not parse value as Hexidecimal value literal"),
66 ParseErrorType::LiteralDecValueFormat(_) => format!("Could not parse value as Decimal value literal"),
67 ParseErrorType::UnexpectedToken(_) => format!("Instructions and directives must be terminated with a newline"),
68 }
69 }
70
71 fn body(&self) -> String {
72 match self.ty {
73 ParseErrorType::ExpectedComma => self.span.into_set().red_ctx(&self.ctx, 2),
74 ParseErrorType::MultiLabel(a, b) => a.into_set().insert(b).red_ctx(&self.ctx, 2),
75 ParseErrorType::ReservedReference(_, s) => s.into_set().red_ctx(&self.ctx, 2),
76 ParseErrorType::ExpectedIndirection(_, s) => s.into_set().red_ctx(&self.ctx, 2),
77 ParseErrorType::BareIndirection => self.span.into_set().red_ctx(&self.ctx, 2),
78 ParseErrorType::BareComma => self.span.into_set().red_ctx(&self.ctx, 2),
79 ParseErrorType::NoArgumentsExpected(_, s) => s.into_set().red_ctx(&self.ctx, 2),
80 ParseErrorType::DirectiveLiteralMissing => self.span.into_set().red_ctx(&self.ctx, 2),
81 ParseErrorType::LiteralHexValueFormat(s) => s.into_set().red_ctx(&self.ctx, 2),
82 ParseErrorType::LiteralDecValueFormat(s) => s.into_set().red_ctx(&self.ctx, 2),
83 ParseErrorType::UnexpectedToken(s) => s.into_set().red_ctx(&self.ctx, 2),
84 }
85 }
86}
87
88#[derive(Debug)]
89pub enum ReferenceToken {
90 LabelDef(Span, Box<ReferenceInstruction>),
91
92 And(Span, bool),
93 Add(Span, bool),
94 Lda(Span, bool),
95 Sta(Span, bool),
96 Bun(Span, bool),
97 Bsa(Span, bool),
98 Isz(Span, bool),
99
100 Cla(),
101 Cle(),
102 Cma(),
103 Cme(),
104 Cir(),
105 Cil(),
106 Inc(),
107 Spa(),
108 Sna(),
109 Sze(),
110 Hlt(),
111
112 Inp(),
114 Out(),
115 Ski(),
116 Sko(),
117 Ion(),
118 Iof(),
119
120 Org(u16),
121 Hex(u16),
122 Dec(i16),
123}
124
125#[derive(Debug)]
126pub struct ReferenceInstruction {
127 pub span: Span,
128 pub instr: ReferenceToken,
129}
130
131impl ReferenceInstruction {
132 fn new_mem<C: Fn(Span, bool) -> ReferenceToken>(
133 ins: Span,
134 reference: Span,
135 ind: bool,
136 c: C,
137 ) -> ReferenceInstruction {
138 ReferenceInstruction {
139 span: ins,
140 instr: c(reference, ind),
141 }
142 }
143
144 fn new_reg_io<C: Fn() -> ReferenceToken>(ins: Span, c: C) -> ReferenceInstruction {
145 ReferenceInstruction {
146 span: ins,
147 instr: c(),
148 }
149 }
150
151 fn label(self, lab: Option<Span>) -> Self {
152 match lab {
153 Some(lab) => ReferenceInstruction {
154 span: lab.join(self.span),
155 instr: ReferenceToken::LabelDef(lab, Box::new(self)),
156 },
157 None => self,
158 }
159 }
160}
161
162#[derive(Debug)]
163pub struct TokenTree<'a> {
164 pub ctx: ParseContext<'a>,
165 pub tokens: Box<[ReferenceInstruction]>,
166}
167
168fn mem_op<'l, 'c, 'a, C: Fn(Span, bool) -> ReferenceToken>(
169 left: &'l [LToken],
170 ctx: &'c ParseContext<'a>,
171 ty: C,
172 ty_name: &'static str,
173) -> Result<'a, (&'l [LToken], ReferenceInstruction)> {
174 if left[1].tval != LTokenVal::NonTerminal {
175 return Err(ParseError {
176 ctx: ctx.clone(),
177 span: left[0].span.join(left[2].span),
178 ty: ParseErrorType::ReservedReference(ty_name, left[2].span),
179 })
180 .map_err(ParseError::into);
181 }
182
183 let indirect = match left[2].tval {
184 LTokenVal::Terminal(Terminal::Indirection) => true,
185 LTokenVal::Terminal(Terminal::CommentStart) => false,
186 LTokenVal::Terminal(Terminal::Newline) => false,
187 _ => {
188 return Err(ParseError {
189 ctx: ctx.clone(),
190 span: left[0].span.join(left[2].span),
191 ty: ParseErrorType::ExpectedIndirection(ty_name, left[2].span),
192 })
193 .map_err(ParseError::into);
194 }
195 };
196
197 if indirect {
198 let instr =
199 ReferenceInstruction::new_mem(left[0].span.join(left[2].span), left[1].span, true, ty);
200
201 Ok((eat_nl_com(&left[3..], ctx, instr.span)?, instr))
202 } else {
203 let instr =
204 ReferenceInstruction::new_mem(left[0].span.join(left[1].span), left[1].span, false, ty);
205
206 Ok((eat_nl_com(&left[2..], ctx, instr.span)?, instr))
207 }
208}
209
210fn reg_op<'l, 'c, 'a, C: Fn() -> ReferenceToken>(
211 left: &'l [LToken],
212 ctx: &'c ParseContext<'a>,
213 ty: C,
214 ty_name: &'static str,
215) -> Result<'a, (&'l [LToken], ReferenceInstruction)> {
216 if left.len() > 1 {
217 match left[1].tval {
218 LTokenVal::Terminal(Terminal::CommentStart)
219 | LTokenVal::Terminal(Terminal::Newline) => {
220 let instr = ReferenceInstruction::new_reg_io(left[0].span, ty);
221 Ok((eat_nl_com(&left[1..], ctx, instr.span)?, instr))
222 }
223 _ => Err(ParseError {
224 ctx: ctx.clone(),
225 span: left[0].span.join(left[1].span),
226 ty: ParseErrorType::NoArgumentsExpected(ty_name, left[1].span),
227 })
228 .map_err(ParseError::into),
229 }
230 } else {
231 let instr = ReferenceInstruction::new_reg_io(left[0].span, ty);
232 Ok((&left[0..0], instr))
233 }
234}
235
236fn eat_nl_com<'l, 'c, 'a>(
238 mut left: &'l [LToken],
239 ctx: &'c ParseContext<'a>,
240 last_s: Span,
241) -> Result<'a, &'l [LToken]> {
242 let mut ate = 0;
243
244 loop {
245 if left.len() == 0 {
247 return Ok(left);
248 }
249
250 match left[0].tval {
251 LTokenVal::Terminal(Terminal::Newline) => {
252 ate += 1;
253 left = &left[1..];
254 }
255 LTokenVal::Terminal(Terminal::CommentStart) | LTokenVal::Ignored => {
256 left = &left[1..];
257 }
258 _ => break,
259 }
260 }
261
262 if ate != 0 {
263 Ok(left)
264 } else {
265 if let LTokenVal::Terminal(_) = left[0].tval {
266 Err(ParseError {
267 ctx: ctx.clone(),
268 span: last_s.join(left[0].span),
269 ty: ParseErrorType::UnexpectedToken(left[0].span),
270 })
271 .map_err(AssembleError::from)
272 } else {
273 Err(ParseError {
274 ctx: ctx.clone(),
275 span: last_s.join(left[0].span),
276 ty: ParseErrorType::NoArgumentsExpected("", left[0].span),
277 })
278 .map_err(AssembleError::from)
279 }
280 }
281}
282
283pub fn parse(
284 LTokens {
285 ctx,
286 tokens: ltokens,
287 }: LTokens,
288) -> Result<TokenTree> {
289 info!("Beginning parse step with heur of {}", HEUR);
290 let len = ltokens.len();
291
292 let mut tokens = Vec::with_capacity(len / HEUR);
293
294 let mut left = eat_nl_com(&*ltokens, &ctx, ltokens[0].span).unwrap_or_else(|_| &*ltokens);
296
297 let mut lab: Option<Span> = None;
298
299 while left.len() > 0 {
300 match left[0].tval {
301 LTokenVal::NonTerminal => {
302 if left[1].tval == LTokenVal::Terminal(Terminal::Comma) {
303 if let Some(sp) = lab {
304 Err(ParseError {
305 ctx: ctx.clone(),
306 span: sp.join(left[0].span),
307 ty: ParseErrorType::MultiLabel(sp, left[0].span),
308 })?;
309 } else {
310 lab = Some(left[0].span);
311 left = &left[2..];
312 }
313 } else {
314 Err(ParseError {
315 ctx: ctx.clone(),
316 span: left[0].span.join(left[1].span),
317 ty: ParseErrorType::ExpectedComma,
318 })?;
319 }
320 }
321
322 LTokenVal::Ignored => {
323 unreachable!(
324 "Comment values should only ever be handled in the CommentStart terminal case"
325 );
326 }
327
328 LTokenVal::Terminal(t) => match t {
329 Terminal::And => {
330 let (nl, instr) = mem_op(left, &ctx, ReferenceToken::And, "AND")?;
331 left = nl;
332 tokens.push(instr.label(lab.take()));
333 }
334
335 Terminal::Add => {
336 let (nl, instr) = mem_op(left, &ctx, ReferenceToken::Add, "ADD")?;
337 left = nl;
338 tokens.push(instr.label(lab.take()));
339 }
340
341 Terminal::Lda => {
342 let (nl, instr) = mem_op(left, &ctx, ReferenceToken::Lda, "LDA")?;
343 left = nl;
344 tokens.push(instr.label(lab.take()));
345 }
346
347 Terminal::Sta => {
348 let (nl, instr) = mem_op(left, &ctx, ReferenceToken::Sta, "STA")?;
349 left = nl;
350 tokens.push(instr.label(lab.take()));
351 }
352
353 Terminal::Bun => {
354 let (nl, instr) = mem_op(left, &ctx, ReferenceToken::Bun, "BUN")?;
355 left = nl;
356 tokens.push(instr.label(lab.take()));
357 }
358
359 Terminal::Bsa => {
360 let (nl, instr) = mem_op(left, &ctx, ReferenceToken::Bsa, "BSA")?;
361 left = nl;
362 tokens.push(instr.label(lab.take()));
363 }
364
365 Terminal::Isz => {
366 let (nl, instr) = mem_op(left, &ctx, ReferenceToken::Isz, "ISZ")?;
367 left = nl;
368 tokens.push(instr.label(lab.take()));
369 }
370
371 Terminal::Cla => {
373 let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Cla, "CLA")?;
374 left = nl;
375 tokens.push(instr.label(lab.take()));
376 }
377
378 Terminal::Cle => {
379 let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Cle, "CLE")?;
380 left = nl;
381 tokens.push(instr.label(lab.take()));
382 }
383
384 Terminal::Cma => {
385 let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Cma, "CMA")?;
386 left = nl;
387 tokens.push(instr.label(lab.take()));
388 }
389
390 Terminal::Cme => {
391 let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Cme, "CME")?;
392 left = nl;
393 tokens.push(instr.label(lab.take()));
394 }
395
396 Terminal::Cir => {
397 let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Cir, "CIR")?;
398 left = nl;
399 tokens.push(instr.label(lab.take()));
400 }
401
402 Terminal::Cil => {
403 let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Cil, "CIL")?;
404 left = nl;
405 tokens.push(instr.label(lab.take()));
406 }
407
408 Terminal::Inc => {
409 let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Inc, "INC")?;
410 left = nl;
411 tokens.push(instr.label(lab.take()));
412 }
413
414 Terminal::Spa => {
415 let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Spa, "SPA")?;
416 left = nl;
417 tokens.push(instr.label(lab.take()));
418 }
419
420 Terminal::Sna => {
421 let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Sna, "SNA")?;
422 left = nl;
423 tokens.push(instr.label(lab.take()));
424 }
425
426 Terminal::Sze => {
427 let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Sze, "SZE")?;
428 left = nl;
429 tokens.push(instr.label(lab.take()));
430 }
431
432 Terminal::Hlt => {
433 let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Hlt, "HLT")?;
434 left = nl;
435 tokens.push(instr.label(lab.take()));
436 }
437
438 Terminal::Inp => {
440 let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Inp, "INP")?;
441 left = nl;
442 tokens.push(instr.label(lab.take()));
443 }
444
445 Terminal::Out => {
446 let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Out, "OUT")?;
447 left = nl;
448 tokens.push(instr.label(lab.take()));
449 }
450
451 Terminal::Ski => {
452 let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Ski, "SKI")?;
453 left = nl;
454 tokens.push(instr.label(lab.take()));
455 }
456
457 Terminal::Sko => {
458 let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Sko, "SKO")?;
459 left = nl;
460 tokens.push(instr.label(lab.take()));
461 }
462
463 Terminal::Ion => {
464 let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Ion, "ION")?;
465 left = nl;
466 tokens.push(instr.label(lab.take()));
467 }
468
469 Terminal::Iof => {
470 let (nl, instr) = reg_op(left, &ctx, ReferenceToken::Iof, "IOF")?;
471 left = nl;
472 tokens.push(instr.label(lab.take()));
473 }
474
475 Terminal::Org => {
477 if left[1].tval != LTokenVal::NonTerminal {
478 return Err(ParseError {
479 ctx,
480 span: left[0].span,
481 ty: ParseErrorType::DirectiveLiteralMissing,
482 })
483 .map_err(AssembleError::from);
484 }
485
486 if let Ok(v) = u16::from_str_radix(left[1].span.slice(&ctx.instr), 16) {
487 let instr = ReferenceInstruction {
488 span: left[0].span.join(left[1].span),
489 instr: ReferenceToken::Org(v),
490 };
491
492 left = eat_nl_com(&left[2..], &ctx, left[0].span.join(left[1].span))?;
493 tokens.push(instr);
494 } else {
495 return Err(ParseError {
496 ctx,
497 span: left[0].span.join(left[1].span),
498 ty: ParseErrorType::LiteralHexValueFormat(left[1].span),
499 })
500 .map_err(AssembleError::from);
501 }
502 }
503
504 Terminal::Hex => {
505 if left[1].tval != LTokenVal::NonTerminal {
506 return Err(ParseError {
507 ctx,
508 span: left[0].span,
509 ty: ParseErrorType::DirectiveLiteralMissing,
510 })
511 .map_err(AssembleError::from);
512 }
513
514 if let Ok(v) = u16::from_str_radix(left[1].span.slice(&ctx.instr), 16) {
515 let instr = ReferenceInstruction {
516 span: left[0].span.join(left[1].span),
517 instr: ReferenceToken::Hex(v),
518 };
519
520 left = eat_nl_com(&left[2..], &ctx, left[0].span.join(left[1].span))?;
521 tokens.push(instr.label(lab.take()));
522 } else {
523 return Err(ParseError {
524 ctx,
525 span: left[0].span.join(left[1].span),
526 ty: ParseErrorType::LiteralHexValueFormat(left[1].span),
527 })
528 .map_err(AssembleError::from);
529 }
530 }
531
532 Terminal::Dec => {
533 if left[1].tval != LTokenVal::NonTerminal {
534 return Err(ParseError {
535 ctx,
536 span: left[0].span,
537 ty: ParseErrorType::DirectiveLiteralMissing,
538 })
539 .map_err(AssembleError::from);
540 }
541
542 if let Ok(v) = i16::from_str_radix(left[1].span.slice(&ctx.instr), 10) {
543 let instr = ReferenceInstruction {
544 span: left[0].span.join(left[1].span),
545 instr: ReferenceToken::Dec(v),
546 };
547
548 left = eat_nl_com(&left[2..], &ctx, left[0].span.join(left[1].span))?;
549 tokens.push(instr.label(lab.take()));
550 } else {
551 return Err(ParseError {
552 ctx,
553 span: left[0].span.join(left[1].span),
554 ty: ParseErrorType::LiteralDecValueFormat(left[1].span),
555 })
556 .map_err(AssembleError::from);
557 }
558 }
559
560 Terminal::Newline => {
562 left = eat_nl_com(&left[..], &ctx, left[0].span)?;
563 }
564 Terminal::Comma => {
565 return Err(ParseError {
566 ctx,
567 span: left[0].span,
568 ty: ParseErrorType::BareComma,
569 })
570 .map_err(AssembleError::from)
571 }
572 Terminal::CommentStart => {
573 left = eat_nl_com(&left[..], &ctx, left[0].span)?;
574 }
575 Terminal::Indirection => {
576 return Err(ParseError {
577 ctx,
578 span: left[0].span,
579 ty: ParseErrorType::BareIndirection,
580 })
581 .map_err(AssembleError::from)
582 }
583 },
584 }
585 }
586
587 info!(
588 "Best heur for this run would have been: {:.2}",
589 len as f32 / tokens.len() as f32
590 );
591
592 info!(
593 "Original Cap: {}, Cap: {}, Grew by: {}, wasted cap: {}",
594 len / HEUR,
595 tokens.capacity(),
596 tokens.capacity() - len / HEUR,
597 tokens.capacity() - tokens.len()
598 );
599
600 Ok(TokenTree {
601 ctx,
602 tokens: tokens.into_boxed_slice(),
603 })
604}