1use crate::{
7 exec::{self, DebugInfo},
8 inst::{self, InstSet, Op},
9 parse::lexer::{
10 ErrorKind, ErrorMap, LinearMemory, ParseError, Span, Token, TokensWithError, WithSpan,
11 },
12};
13use logos::Logos;
14use std::{
15 fmt::{Debug, Display},
16 marker::PhantomData,
17 ops::Range,
18 str::FromStr,
19};
20
21macro_rules! store_err {
22 ($store:expr, $span:expr, $err:expr) => {
23 $store.entry($span).or_insert($err)
24 };
25}
26
27type Line = Vec<WithSpan<Token>>;
28
29#[derive(Clone)]
30pub struct Parser<'a, I> {
31 #[allow(dead_code)]
32 pub src: &'a str,
33 lines: Vec<Line>,
34 err: ErrorMap,
35 debug_info: DebugInfo,
36 _inst_set: PhantomData<I>,
37}
38
39impl<'a, I> Parser<'a, I>
40where
41 I: InstSet,
42 <I as FromStr>::Err: Display,
43{
44 pub fn new(src: &'a str) -> Self {
45 let (lines, err) = TokensWithError(Token::lexer(src)).lines();
46 Self {
47 src,
48 lines,
49 err,
50 debug_info: DebugInfo::default(),
51 _inst_set: PhantomData,
52 }
53 }
54
55 fn get_inst(line: &[WithSpan<Token>]) -> Result<Option<WithSpan<Inst<I>>>, ParseError> {
56 let span = {
57 let ((s, _), (e, _)) = (line.first().unwrap(), line.last().unwrap());
58 s.start..e.end
59 };
60
61 let rawline = line.iter().map(|(_, t)| t).cloned().collect::<Vec<_>>();
62
63 let (addr, (opcodeidx, opcode), (restidx, rest)) = match rawline.as_slice() {
64 [Token::BareNumber(addr), Token::Text(opcode), rest @ ..] => {
65 (Some(Addr::Bare(*addr)), (1, opcode), (2, rest))
66 }
67 [Token::Text(label), Token::Colon, Token::Text(opcode), rest @ ..] => {
68 (Some(Addr::Label(label.clone())), (2, opcode), (3, rest))
69 }
70 [Token::Text(opcode), rest @ ..] => (None, (0, opcode), (1, rest)),
71 [] => return Ok(None),
72 _ => {
73 return Err((span, ErrorKind::SyntaxError));
74 }
75 };
76
77 let opcode = match I::from_str(opcode) {
78 Ok(s) => s,
79 Err(e) => {
80 let span = line[opcodeidx].0.clone();
81
82 return Err((span, ErrorKind::InvalidOpcode(e.to_string())));
83 }
84 };
85
86 if let Some((idx, _)) = rest.iter().enumerate().find(|(_, t)| {
87 !matches!(
88 t,
89 Token::Gpr(_)
90 | Token::BareNumber(_)
91 | Token::Text(_)
92 | Token::Comma
93 | Token::Literal(_)
94 | Token::Indirect(_)
95 )
96 }) {
97 let span = line[restidx + idx].0.clone();
98
99 return Err((span, ErrorKind::InvalidOperand));
100 }
101
102 let mut ops = rest
103 .iter()
104 .filter(|t| !matches!(t, Token::Comma))
105 .cloned()
106 .map(Op::from)
107 .collect::<Vec<_>>();
108
109 let op = match ops.len() {
110 0 => Op::Null,
111 1 => ops.pop().unwrap(),
112 _ => Op::MultiOp(ops),
113 };
114
115 debug!(
116 "{:>4}\t{:>4}\t{:<4}",
117 addr.as_ref().map(ToString::to_string).unwrap_or_default(),
118 opcode,
119 op,
120 );
121
122 Ok(Some((span, Inst { addr, opcode, op })))
123 }
124
125 fn get_mem(line: &[WithSpan<Token>]) -> Result<Option<MemEnum>, ParseError> {
126 enum DataEnum {
127 LinearMemory(LinearMemory),
128 Normal(usize),
129 }
130
131 let rawline = line.iter().map(|(_, t)| t).cloned().collect::<Vec<_>>();
132
133 let (&Range { start, .. }, &Range { end, .. }) = if rawline.is_empty() {
134 return Ok(None);
135 } else {
136 (&line.first().unwrap().0, &line.last().unwrap().0)
137 };
138
139 let get_data = |t: &[Token], start_idx: usize| -> Result<DataEnum, ParseError> {
140 match t {
141 &[Token::BareNumber(n)] => Ok(DataEnum::Normal(n)),
142 &[Token::LinearMemory(mem)] => Ok(DataEnum::LinearMemory(mem)),
143 [] => Ok(DataEnum::Normal(0)),
144 _ => Err((line[start_idx].0.start..end, ErrorKind::SyntaxError)),
145 }
146 };
147
148 match rawline.as_slice() {
149 &[Token::BareNumber(addr), ref rest @ ..] => {
150 let res = match get_data(rest, 1)? {
151 DataEnum::LinearMemory(mem) => Some(MemEnum::Linear(
152 (addr..addr + mem.len)
153 .map(Addr::Bare)
154 .map(move |addr| (addr, mem.init))
155 .map(Mem::from)
156 .collect(),
157 )),
158 DataEnum::Normal(data) => Some(MemEnum::One(Mem {
159 addr: Addr::Bare(addr),
160 data,
161 })),
162 };
163
164 Ok(res)
165 }
166 [Token::Text(label), Token::Colon, rest @ ..] => Ok(Some(MemEnum::One(Mem {
167 addr: Addr::Label(label.clone()),
168 data: match get_data(rest, 2)? {
169 DataEnum::LinearMemory(_) => Err((start..end, ErrorKind::SyntaxError))?,
170 DataEnum::Normal(data) => data,
171 },
172 }))),
173 [] => Ok(None),
174 _ => Err((start..end, ErrorKind::SyntaxError)),
175 }
176 }
177
178 fn get_insts_and_mems(&mut self) -> (Vec<Span>, Vec<Inst<I>>, Vec<Mem>) {
179 let mut blocks = self
180 .lines
181 .split(Vec::is_empty)
182 .filter(|v| !v.is_empty())
183 .collect::<Vec<_>>();
184
185 assert!(blocks.len() >= 2, "Unable to parse. Your source may not contain blank line(s) between the program and the memory, or the memory might be absent");
186
187 let mems = blocks
188 .pop()
189 .unwrap()
190 .iter()
191 .map(|line| Self::get_mem(line))
192 .filter_map(|res| match res {
193 Ok(mem @ Some(_)) => mem,
194 Ok(None) => None,
195 Err((span, err)) => {
196 store_err!(self.err, span, err);
197 None
198 }
199 })
200 .fold(Vec::new(), |mut acc, mem| {
201 match mem {
202 MemEnum::Linear(mems) => acc.extend(mems),
203 MemEnum::One(mem) => acc.push(mem),
204 }
205
206 acc
207 });
208
209 let (inst_spans, insts): (Vec<_>, Vec<_>) = blocks
210 .concat()
211 .iter()
212 .map(|line| Self::get_inst(line))
213 .filter_map(|res| match res {
214 Ok(inst @ Some(_)) => inst,
215 Ok(None) => None,
216 Err((span, err)) => {
217 store_err!(self.err, span, err);
218 None
219 }
220 })
221 .unzip();
222
223 (inst_spans, insts, mems)
224 }
225
226 #[allow(clippy::type_complexity)]
227 pub fn parse(mut self) -> Result<(Vec<InstIr<I>>, Vec<MemIr>, DebugInfo), ErrorMap> {
228 let (inst_spans, mut insts, mut mems) = self.get_insts_and_mems();
229
230 self.debug_info.inst_spans = inst_spans;
231
232 linker::Linker::new(&mut insts, &mut mems).link();
233
234 if self.err.is_empty() {
235 Ok((
236 insts
237 .into_iter()
238 .map(InstIr::try_from)
239 .collect::<Result<Vec<_>, _>>()
240 .unwrap(),
241 mems.into_iter()
242 .map(MemIr::try_from)
243 .collect::<Result<Vec<_>, _>>()
244 .unwrap(),
245 self.debug_info,
246 ))
247 } else {
248 Err(self.err)
249 }
250 }
251}
252
253mod linker {
254 use super::{Addr, Debug, Display, Inst, Mem, Op};
255 use crate::inst::InstSet;
256 use std::{
257 collections::{HashMap, HashSet},
258 ops::Deref,
259 };
260
261 #[derive(Debug, Clone, Copy)]
262 enum Src {
263 Prog(usize),
264 Mem(usize),
265 }
266
267 impl From<Src> for Op {
268 fn from(value: Src) -> Self {
269 match value {
270 Src::Mem(addr) | Src::Prog(addr) => Op::Addr(addr),
271 }
272 }
273 }
274
275 #[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash)]
276 enum Instance {
277 MultiOp(usize, usize),
278 Single(usize),
279 }
280
281 #[derive(Debug, Clone, Default)]
282 struct SymbolData {
283 source: Option<Src>,
284 instances: HashSet<Instance>,
285 }
286
287 #[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash)]
288 enum Symbol {
289 Label(&'static str),
290 Addr(usize),
291 }
292
293 impl From<&Addr> for Symbol {
294 fn from(value: &Addr) -> Self {
295 match value {
296 &Addr::Bare(addr) => Self::Addr(addr),
297 Addr::Label(label) => label.into(),
298 }
299 }
300 }
301
302 impl<'a> From<&'a String> for Symbol {
303 fn from(value: &'a String) -> Self {
304 Self::Label(Box::leak(value.clone().into_boxed_str()))
306 }
307 }
308
309 impl Display for Symbol {
310 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
311 match self {
312 Symbol::Label(s) => Display::fmt(&s, f),
313 Symbol::Addr(addr) => Display::fmt(&addr, f),
314 }
315 }
316 }
317
318 impl TryFrom<&Op> for Symbol {
319 type Error = ();
320
321 fn try_from(value: &Op) -> Result<Self, Self::Error> {
322 match value {
323 &Op::Addr(addr) => Ok(Symbol::Addr(addr)),
324 Op::Fail(s) => Ok(s.into()),
325 Op::Indirect(op) => op.deref().try_into(),
326 _ => Err(()),
327 }
328 }
329 }
330
331 type SymbolTableInner = HashMap<Symbol, SymbolData>;
332
333 #[derive(Debug, Clone)]
334 struct SymbolTable(SymbolTableInner);
335
336 impl IntoIterator for SymbolTable {
337 type Item = (Src, HashSet<Instance>);
338 type IntoIter = std::iter::Map<
339 <SymbolTableInner as IntoIterator>::IntoIter,
340 fn(<SymbolTableInner as IntoIterator>::Item) -> Self::Item,
341 >;
342
343 fn into_iter(self) -> Self::IntoIter {
344 self.0
345 .into_iter()
346 .map(|(sym, SymbolData { source, instances })| {
347 (
348 source.unwrap_or_else(|| panic!("{sym} is undefined")),
349 instances,
350 )
351 })
352 }
353 }
354
355 impl SymbolTable {
356 pub fn new() -> Self {
357 Self(HashMap::new())
358 }
359
360 pub fn add_instance(&mut self, symbol: Symbol, instance: Instance) {
361 self.0
362 .entry(symbol)
363 .and_modify(|SymbolData { instances, .. }| {
364 (instances).insert(instance);
365 })
366 .or_insert_with(|| {
367 let mut instances = HashSet::new();
368 instances.insert(instance);
369 SymbolData {
370 source: None,
371 instances,
372 }
373 });
374 }
375
376 pub fn add_src(&mut self, symbol: Symbol, src: Src) {
377 self.0
378 .entry(symbol)
379 .and_modify(|SymbolData { source, .. }| {
380 if source.is_some() {
381 panic!("{symbol} is defined multiple times");
382 } else {
383 *source = Some(src);
384 }
385 }); }
387 }
388
389 impl Op {
390 fn link(&mut self, src: Src, mop_idx: Option<usize>) {
391 match self {
392 Op::Addr(_) | Op::Fail(_) => *self = src.into(),
393 Op::MultiOp(ops) if mop_idx.is_some() => ops[mop_idx.unwrap()] = src.into(),
394 Op::Indirect(op) => op.link(src, mop_idx),
395 _ => panic!("Symbol linking failed. Please report this error.\nop: {self:?}\nsrc: {src:?}\nmop_idx: {mop_idx:?}"),
396 }
397 }
398 }
399
400 pub struct Linker<'inst, 'mem, I> {
401 symbol_table: SymbolTable,
402 used_addrs: HashSet<usize>,
403 program: &'inst mut [Inst<I>],
404 memory: &'mem mut [Mem],
405 }
406
407 impl<'inst, 'mem, I> Linker<'inst, 'mem, I>
408 where
409 I: InstSet,
410 <I as std::str::FromStr>::Err: Display,
411 {
412 pub fn new(prog: &'inst mut [Inst<I>], mem: &'mem mut [Mem]) -> Self {
413 Self {
414 symbol_table: SymbolTable::new(),
415 used_addrs: HashSet::new(),
416 program: prog,
417 memory: mem,
418 }
419 }
420
421 fn find_symbols(&mut self) {
422 for (idx, Inst { op, .. }) in self.program.iter().enumerate() {
423 match op {
424 Op::MultiOp(ops) => {
425 for (mop_idx, sym) in ops
426 .iter()
427 .enumerate()
428 .filter_map(|(idx, op)| Symbol::try_from(op).map(|sym| (idx, sym)).ok())
429 {
430 self.symbol_table
431 .add_instance(sym, Instance::MultiOp(idx, mop_idx));
432 }
433 }
434 op => {
435 if let Ok(sym) = Symbol::try_from(op) {
437 self.symbol_table.add_instance(sym, Instance::Single(idx));
438 }
439 }
440 }
441 }
442 }
443
444 fn find_symbol_sources(&mut self) {
445 for (idx, Inst { addr, .. }) in self.program.iter().enumerate() {
447 if let Some(addr) = addr {
448 self.symbol_table.add_src(addr.into(), Src::Prog(idx));
450 }
451 }
452
453 for addr in self.memory.iter().filter_map(|Mem { addr, .. }| {
455 if let &Addr::Bare(addr) = addr {
456 Some(addr)
457 } else {
458 None
459 }
460 }) {
461 self.symbol_table
462 .add_src(Symbol::Addr(addr), Src::Mem(addr));
463 assert!(
464 self.used_addrs.insert(addr),
465 "{addr:?} is used multiple times"
466 );
467 }
468 }
469
470 fn readdress(&mut self) {
471 for (idx, Inst { addr, .. }) in self.program.iter_mut().enumerate() {
472 *addr = Some(Addr::Bare(idx));
473 }
474
475 let mut counter = 0;
476
477 for addr in self.memory.iter_mut().filter_map(|Mem { addr, .. }| {
478 if matches!(addr, Addr::Label(_)) {
479 Some(addr)
480 } else {
481 None
482 }
483 }) {
484 while self.used_addrs.contains(&counter) {
486 counter += 1;
487 }
488 self.symbol_table
489 .add_src(Symbol::from(&addr.clone()), Src::Mem(counter));
490
491 *addr = Addr::Bare(counter);
492
493 counter += 1;
494 }
495 }
496
497 pub fn link(mut self) {
498 self.find_symbols();
499 self.find_symbol_sources();
500 self.readdress();
501
502 for (src, instances) in self.symbol_table {
503 for instance in instances {
504 match instance {
505 Instance::MultiOp(idx, mop_idx) => {
506 self.program[idx].op.link(src, Some(mop_idx));
507 }
508 Instance::Single(idx) => self.program[idx].op.link(src, None),
509 }
510 }
511 }
512 }
513 }
514}
515
516#[derive(Debug, Clone)]
517pub enum Addr {
518 Bare(usize),
519 Label(String),
520}
521
522impl Display for Addr {
523 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
524 match self {
525 Self::Bare(addr) => write!(f, "{addr}"),
526 Self::Label(label) => write!(f, "{label}:"),
527 }
528 }
529}
530
531pub struct InstIr<I>
532where
533 I: InstSet,
534 <I as FromStr>::Err: Display,
535{
536 pub addr: usize,
537 pub inst: inst::Inst<I>,
538}
539
540impl<I> TryFrom<Inst<I>> for InstIr<I>
541where
542 I: InstSet,
543 <I as FromStr>::Err: Display,
544{
545 type Error = ();
546
547 fn try_from(Inst { addr, opcode, op }: Inst<I>) -> Result<Self, Self::Error> {
548 if let Some(Addr::Bare(addr)) = addr {
549 Ok(Self::new(addr, opcode, op))
550 } else {
551 Err(())
552 }
553 }
554}
555
556impl<I> InstIr<I>
557where
558 I: InstSet,
559 <I as FromStr>::Err: Display,
560{
561 pub fn new(addr: usize, opcode: I, op: Op) -> Self {
562 Self {
563 addr,
564 inst: inst::Inst::new(opcode, op),
565 }
566 }
567}
568
569impl<I> From<InstIr<I>> for (usize, exec::ExecInst)
570where
571 I: InstSet,
572 <I as FromStr>::Err: Display,
573{
574 fn from(InstIr { addr, inst }: InstIr<I>) -> Self {
575 (addr, inst.to_exec_inst())
576 }
577}
578
579pub struct Inst<I> {
580 pub addr: Option<Addr>,
581 pub opcode: I,
582 pub op: Op,
583}
584
585impl<I> Debug for Inst<I>
586where
587 I: Display,
588{
589 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
590 f.debug_struct("Inst")
591 .field("addr", &self.addr)
592 .field("opcode", &self.opcode.to_string())
593 .field("op", &self.op)
594 .finish()
595 }
596}
597
598impl<I> Display for Inst<I>
599where
600 I: Display,
601{
602 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
603 write!(
604 f,
605 "{} {} {}",
606 self.addr
607 .as_ref()
608 .map(ToString::to_string)
609 .unwrap_or_default(),
610 self.opcode,
611 self.op
612 )
613 }
614}
615
616enum MemEnum {
617 Linear(Vec<Mem>),
618 One(Mem),
619}
620
621pub struct Mem {
622 pub addr: Addr,
623 pub data: usize,
624}
625
626impl From<(Addr, usize)> for Mem {
627 fn from((addr, data): (Addr, usize)) -> Self {
628 Self { addr, data }
629 }
630}
631
632pub struct MemIr {
633 pub addr: usize,
634 pub data: usize,
635}
636
637impl TryFrom<Mem> for MemIr {
638 type Error = ();
639
640 fn try_from(Mem { addr, data }: Mem) -> Result<Self, Self::Error> {
641 if let Addr::Bare(addr) = addr {
642 Ok(Self { addr, data })
643 } else {
644 Err(())
645 }
646 }
647}