cambridge_asm/parse/
parser.rs

1// Copyright (c) 2021 Saadi Save
2// This Source Code Form is subject to the terms of the Mozilla Public
3// License, v. 2.0. If a copy of the MPL was not distributed with this
4// file, You can obtain one at http://mozilla.org/MPL/2.0/.
5
6use crate::{
7    exec::{self, DebugInfo},
8    inst::{self, InstSet, Op},
9    parse::lexer::{
10        ErrorKind, ErrorMap, LinearMemory, ParseError, Span, Token, TokensWithError, WithSpan,
11    },
12};
13use logos::Logos;
14use std::{
15    fmt::{Debug, Display},
16    marker::PhantomData,
17    ops::Range,
18    str::FromStr,
19};
20
21macro_rules! store_err {
22    ($store:expr, $span:expr, $err:expr) => {
23        $store.entry($span).or_insert($err)
24    };
25}
26
27type Line = Vec<WithSpan<Token>>;
28
29#[derive(Clone)]
30pub struct Parser<'a, I> {
31    #[allow(dead_code)]
32    pub src: &'a str,
33    lines: Vec<Line>,
34    err: ErrorMap,
35    debug_info: DebugInfo,
36    _inst_set: PhantomData<I>,
37}
38
39impl<'a, I> Parser<'a, I>
40where
41    I: InstSet,
42    <I as FromStr>::Err: Display,
43{
44    pub fn new(src: &'a str) -> Self {
45        let (lines, err) = TokensWithError(Token::lexer(src)).lines();
46        Self {
47            src,
48            lines,
49            err,
50            debug_info: DebugInfo::default(),
51            _inst_set: PhantomData,
52        }
53    }
54
55    fn get_inst(line: &[WithSpan<Token>]) -> Result<Option<WithSpan<Inst<I>>>, ParseError> {
56        let span = {
57            let ((s, _), (e, _)) = (line.first().unwrap(), line.last().unwrap());
58            s.start..e.end
59        };
60
61        let rawline = line.iter().map(|(_, t)| t).cloned().collect::<Vec<_>>();
62
63        let (addr, (opcodeidx, opcode), (restidx, rest)) = match rawline.as_slice() {
64            [Token::BareNumber(addr), Token::Text(opcode), rest @ ..] => {
65                (Some(Addr::Bare(*addr)), (1, opcode), (2, rest))
66            }
67            [Token::Text(label), Token::Colon, Token::Text(opcode), rest @ ..] => {
68                (Some(Addr::Label(label.clone())), (2, opcode), (3, rest))
69            }
70            [Token::Text(opcode), rest @ ..] => (None, (0, opcode), (1, rest)),
71            [] => return Ok(None),
72            _ => {
73                return Err((span, ErrorKind::SyntaxError));
74            }
75        };
76
77        let opcode = match I::from_str(opcode) {
78            Ok(s) => s,
79            Err(e) => {
80                let span = line[opcodeidx].0.clone();
81
82                return Err((span, ErrorKind::InvalidOpcode(e.to_string())));
83            }
84        };
85
86        if let Some((idx, _)) = rest.iter().enumerate().find(|(_, t)| {
87            !matches!(
88                t,
89                Token::Gpr(_)
90                    | Token::BareNumber(_)
91                    | Token::Text(_)
92                    | Token::Comma
93                    | Token::Literal(_)
94                    | Token::Indirect(_)
95            )
96        }) {
97            let span = line[restidx + idx].0.clone();
98
99            return Err((span, ErrorKind::InvalidOperand));
100        }
101
102        let mut ops = rest
103            .iter()
104            .filter(|t| !matches!(t, Token::Comma))
105            .cloned()
106            .map(Op::from)
107            .collect::<Vec<_>>();
108
109        let op = match ops.len() {
110            0 => Op::Null,
111            1 => ops.pop().unwrap(),
112            _ => Op::MultiOp(ops),
113        };
114
115        debug!(
116            "{:>4}\t{:>4}\t{:<4}",
117            addr.as_ref().map(ToString::to_string).unwrap_or_default(),
118            opcode,
119            op,
120        );
121
122        Ok(Some((span, Inst { addr, opcode, op })))
123    }
124
125    fn get_mem(line: &[WithSpan<Token>]) -> Result<Option<MemEnum>, ParseError> {
126        enum DataEnum {
127            LinearMemory(LinearMemory),
128            Normal(usize),
129        }
130
131        let rawline = line.iter().map(|(_, t)| t).cloned().collect::<Vec<_>>();
132
133        let (&Range { start, .. }, &Range { end, .. }) = if rawline.is_empty() {
134            return Ok(None);
135        } else {
136            (&line.first().unwrap().0, &line.last().unwrap().0)
137        };
138
139        let get_data = |t: &[Token], start_idx: usize| -> Result<DataEnum, ParseError> {
140            match t {
141                &[Token::BareNumber(n)] => Ok(DataEnum::Normal(n)),
142                &[Token::LinearMemory(mem)] => Ok(DataEnum::LinearMemory(mem)),
143                [] => Ok(DataEnum::Normal(0)),
144                _ => Err((line[start_idx].0.start..end, ErrorKind::SyntaxError)),
145            }
146        };
147
148        match rawline.as_slice() {
149            &[Token::BareNumber(addr), ref rest @ ..] => {
150                let res = match get_data(rest, 1)? {
151                    DataEnum::LinearMemory(mem) => Some(MemEnum::Linear(
152                        (addr..addr + mem.len)
153                            .map(Addr::Bare)
154                            .map(move |addr| (addr, mem.init))
155                            .map(Mem::from)
156                            .collect(),
157                    )),
158                    DataEnum::Normal(data) => Some(MemEnum::One(Mem {
159                        addr: Addr::Bare(addr),
160                        data,
161                    })),
162                };
163
164                Ok(res)
165            }
166            [Token::Text(label), Token::Colon, rest @ ..] => Ok(Some(MemEnum::One(Mem {
167                addr: Addr::Label(label.clone()),
168                data: match get_data(rest, 2)? {
169                    DataEnum::LinearMemory(_) => Err((start..end, ErrorKind::SyntaxError))?,
170                    DataEnum::Normal(data) => data,
171                },
172            }))),
173            [] => Ok(None),
174            _ => Err((start..end, ErrorKind::SyntaxError)),
175        }
176    }
177
178    fn get_insts_and_mems(&mut self) -> (Vec<Span>, Vec<Inst<I>>, Vec<Mem>) {
179        let mut blocks = self
180            .lines
181            .split(Vec::is_empty)
182            .filter(|v| !v.is_empty())
183            .collect::<Vec<_>>();
184
185        assert!(blocks.len() >= 2, "Unable to parse. Your source may not contain blank line(s) between the program and the memory, or the memory might be absent");
186
187        let mems = blocks
188            .pop()
189            .unwrap()
190            .iter()
191            .map(|line| Self::get_mem(line))
192            .filter_map(|res| match res {
193                Ok(mem @ Some(_)) => mem,
194                Ok(None) => None,
195                Err((span, err)) => {
196                    store_err!(self.err, span, err);
197                    None
198                }
199            })
200            .fold(Vec::new(), |mut acc, mem| {
201                match mem {
202                    MemEnum::Linear(mems) => acc.extend(mems),
203                    MemEnum::One(mem) => acc.push(mem),
204                }
205
206                acc
207            });
208
209        let (inst_spans, insts): (Vec<_>, Vec<_>) = blocks
210            .concat()
211            .iter()
212            .map(|line| Self::get_inst(line))
213            .filter_map(|res| match res {
214                Ok(inst @ Some(_)) => inst,
215                Ok(None) => None,
216                Err((span, err)) => {
217                    store_err!(self.err, span, err);
218                    None
219                }
220            })
221            .unzip();
222
223        (inst_spans, insts, mems)
224    }
225
226    #[allow(clippy::type_complexity)]
227    pub fn parse(mut self) -> Result<(Vec<InstIr<I>>, Vec<MemIr>, DebugInfo), ErrorMap> {
228        let (inst_spans, mut insts, mut mems) = self.get_insts_and_mems();
229
230        self.debug_info.inst_spans = inst_spans;
231
232        linker::Linker::new(&mut insts, &mut mems).link();
233
234        if self.err.is_empty() {
235            Ok((
236                insts
237                    .into_iter()
238                    .map(InstIr::try_from)
239                    .collect::<Result<Vec<_>, _>>()
240                    .unwrap(),
241                mems.into_iter()
242                    .map(MemIr::try_from)
243                    .collect::<Result<Vec<_>, _>>()
244                    .unwrap(),
245                self.debug_info,
246            ))
247        } else {
248            Err(self.err)
249        }
250    }
251}
252
253mod linker {
254    use super::{Addr, Debug, Display, Inst, Mem, Op};
255    use crate::inst::InstSet;
256    use std::{
257        collections::{HashMap, HashSet},
258        ops::Deref,
259    };
260
261    #[derive(Debug, Clone, Copy)]
262    enum Src {
263        Prog(usize),
264        Mem(usize),
265    }
266
267    impl From<Src> for Op {
268        fn from(value: Src) -> Self {
269            match value {
270                Src::Mem(addr) | Src::Prog(addr) => Op::Addr(addr),
271            }
272        }
273    }
274
275    #[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash)]
276    enum Instance {
277        MultiOp(usize, usize),
278        Single(usize),
279    }
280
281    #[derive(Debug, Clone, Default)]
282    struct SymbolData {
283        source: Option<Src>,
284        instances: HashSet<Instance>,
285    }
286
287    #[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash)]
288    enum Symbol {
289        Label(&'static str),
290        Addr(usize),
291    }
292
293    impl From<&Addr> for Symbol {
294        fn from(value: &Addr) -> Self {
295            match value {
296                &Addr::Bare(addr) => Self::Addr(addr),
297                Addr::Label(label) => label.into(),
298            }
299        }
300    }
301
302    impl<'a> From<&'a String> for Symbol {
303        fn from(value: &'a String) -> Self {
304            // leak it so that symbol copies are cheap
305            Self::Label(Box::leak(value.clone().into_boxed_str()))
306        }
307    }
308
309    impl Display for Symbol {
310        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
311            match self {
312                Symbol::Label(s) => Display::fmt(&s, f),
313                Symbol::Addr(addr) => Display::fmt(&addr, f),
314            }
315        }
316    }
317
318    impl TryFrom<&Op> for Symbol {
319        type Error = ();
320
321        fn try_from(value: &Op) -> Result<Self, Self::Error> {
322            match value {
323                &Op::Addr(addr) => Ok(Symbol::Addr(addr)),
324                Op::Fail(s) => Ok(s.into()),
325                Op::Indirect(op) => op.deref().try_into(),
326                _ => Err(()),
327            }
328        }
329    }
330
331    type SymbolTableInner = HashMap<Symbol, SymbolData>;
332
333    #[derive(Debug, Clone)]
334    struct SymbolTable(SymbolTableInner);
335
336    impl IntoIterator for SymbolTable {
337        type Item = (Src, HashSet<Instance>);
338        type IntoIter = std::iter::Map<
339            <SymbolTableInner as IntoIterator>::IntoIter,
340            fn(<SymbolTableInner as IntoIterator>::Item) -> Self::Item,
341        >;
342
343        fn into_iter(self) -> Self::IntoIter {
344            self.0
345                .into_iter()
346                .map(|(sym, SymbolData { source, instances })| {
347                    (
348                        source.unwrap_or_else(|| panic!("{sym} is undefined")),
349                        instances,
350                    )
351                })
352        }
353    }
354
355    impl SymbolTable {
356        pub fn new() -> Self {
357            Self(HashMap::new())
358        }
359
360        pub fn add_instance(&mut self, symbol: Symbol, instance: Instance) {
361            self.0
362                .entry(symbol)
363                .and_modify(|SymbolData { instances, .. }| {
364                    (instances).insert(instance);
365                })
366                .or_insert_with(|| {
367                    let mut instances = HashSet::new();
368                    instances.insert(instance);
369                    SymbolData {
370                        source: None,
371                        instances,
372                    }
373                });
374        }
375
376        pub fn add_src(&mut self, symbol: Symbol, src: Src) {
377            self.0
378                .entry(symbol)
379                .and_modify(|SymbolData { source, .. }| {
380                    if source.is_some() {
381                        panic!("{symbol} is defined multiple times");
382                    } else {
383                        *source = Some(src);
384                    }
385                }); // do nothing if symbol doesn't exist
386        }
387    }
388
389    impl Op {
390        fn link(&mut self, src: Src, mop_idx: Option<usize>) {
391            match self {
392                Op::Addr(_) | Op::Fail(_) => *self = src.into(),
393                Op::MultiOp(ops) if mop_idx.is_some() => ops[mop_idx.unwrap()] = src.into(),
394                Op::Indirect(op) => op.link(src, mop_idx),
395                _ => panic!("Symbol linking failed. Please report this error.\nop: {self:?}\nsrc: {src:?}\nmop_idx: {mop_idx:?}"),
396            }
397        }
398    }
399
400    pub struct Linker<'inst, 'mem, I> {
401        symbol_table: SymbolTable,
402        used_addrs: HashSet<usize>,
403        program: &'inst mut [Inst<I>],
404        memory: &'mem mut [Mem],
405    }
406
407    impl<'inst, 'mem, I> Linker<'inst, 'mem, I>
408    where
409        I: InstSet,
410        <I as std::str::FromStr>::Err: Display,
411    {
412        pub fn new(prog: &'inst mut [Inst<I>], mem: &'mem mut [Mem]) -> Self {
413            Self {
414                symbol_table: SymbolTable::new(),
415                used_addrs: HashSet::new(),
416                program: prog,
417                memory: mem,
418            }
419        }
420
421        fn find_symbols(&mut self) {
422            for (idx, Inst { op, .. }) in self.program.iter().enumerate() {
423                match op {
424                    Op::MultiOp(ops) => {
425                        for (mop_idx, sym) in ops
426                            .iter()
427                            .enumerate()
428                            .filter_map(|(idx, op)| Symbol::try_from(op).map(|sym| (idx, sym)).ok())
429                        {
430                            self.symbol_table
431                                .add_instance(sym, Instance::MultiOp(idx, mop_idx));
432                        }
433                    }
434                    op => {
435                        // Failure irrelevant
436                        if let Ok(sym) = Symbol::try_from(op) {
437                            self.symbol_table.add_instance(sym, Instance::Single(idx));
438                        }
439                    }
440                }
441            }
442        }
443
444        fn find_symbol_sources(&mut self) {
445            // find which of the symbols are instruction addresses
446            for (idx, Inst { addr, .. }) in self.program.iter().enumerate() {
447                if let Some(addr) = addr {
448                    // add_src automatically does nothing if symbol is absent
449                    self.symbol_table.add_src(addr.into(), Src::Prog(idx));
450                }
451            }
452
453            // leave explicit memory addresses untouched
454            for addr in self.memory.iter().filter_map(|Mem { addr, .. }| {
455                if let &Addr::Bare(addr) = addr {
456                    Some(addr)
457                } else {
458                    None
459                }
460            }) {
461                self.symbol_table
462                    .add_src(Symbol::Addr(addr), Src::Mem(addr));
463                assert!(
464                    self.used_addrs.insert(addr),
465                    "{addr:?} is used multiple times"
466                );
467            }
468        }
469
470        fn readdress(&mut self) {
471            for (idx, Inst { addr, .. }) in self.program.iter_mut().enumerate() {
472                *addr = Some(Addr::Bare(idx));
473            }
474
475            let mut counter = 0;
476
477            for addr in self.memory.iter_mut().filter_map(|Mem { addr, .. }| {
478                if matches!(addr, Addr::Label(_)) {
479                    Some(addr)
480                } else {
481                    None
482                }
483            }) {
484                // find unused address
485                while self.used_addrs.contains(&counter) {
486                    counter += 1;
487                }
488                self.symbol_table
489                    .add_src(Symbol::from(&addr.clone()), Src::Mem(counter));
490
491                *addr = Addr::Bare(counter);
492
493                counter += 1;
494            }
495        }
496
497        pub fn link(mut self) {
498            self.find_symbols();
499            self.find_symbol_sources();
500            self.readdress();
501
502            for (src, instances) in self.symbol_table {
503                for instance in instances {
504                    match instance {
505                        Instance::MultiOp(idx, mop_idx) => {
506                            self.program[idx].op.link(src, Some(mop_idx));
507                        }
508                        Instance::Single(idx) => self.program[idx].op.link(src, None),
509                    }
510                }
511            }
512        }
513    }
514}
515
516#[derive(Debug, Clone)]
517pub enum Addr {
518    Bare(usize),
519    Label(String),
520}
521
522impl Display for Addr {
523    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
524        match self {
525            Self::Bare(addr) => write!(f, "{addr}"),
526            Self::Label(label) => write!(f, "{label}:"),
527        }
528    }
529}
530
531pub struct InstIr<I>
532where
533    I: InstSet,
534    <I as FromStr>::Err: Display,
535{
536    pub addr: usize,
537    pub inst: inst::Inst<I>,
538}
539
540impl<I> TryFrom<Inst<I>> for InstIr<I>
541where
542    I: InstSet,
543    <I as FromStr>::Err: Display,
544{
545    type Error = ();
546
547    fn try_from(Inst { addr, opcode, op }: Inst<I>) -> Result<Self, Self::Error> {
548        if let Some(Addr::Bare(addr)) = addr {
549            Ok(Self::new(addr, opcode, op))
550        } else {
551            Err(())
552        }
553    }
554}
555
556impl<I> InstIr<I>
557where
558    I: InstSet,
559    <I as FromStr>::Err: Display,
560{
561    pub fn new(addr: usize, opcode: I, op: Op) -> Self {
562        Self {
563            addr,
564            inst: inst::Inst::new(opcode, op),
565        }
566    }
567}
568
569impl<I> From<InstIr<I>> for (usize, exec::ExecInst)
570where
571    I: InstSet,
572    <I as FromStr>::Err: Display,
573{
574    fn from(InstIr { addr, inst }: InstIr<I>) -> Self {
575        (addr, inst.to_exec_inst())
576    }
577}
578
579pub struct Inst<I> {
580    pub addr: Option<Addr>,
581    pub opcode: I,
582    pub op: Op,
583}
584
585impl<I> Debug for Inst<I>
586where
587    I: Display,
588{
589    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
590        f.debug_struct("Inst")
591            .field("addr", &self.addr)
592            .field("opcode", &self.opcode.to_string())
593            .field("op", &self.op)
594            .finish()
595    }
596}
597
598impl<I> Display for Inst<I>
599where
600    I: Display,
601{
602    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
603        write!(
604            f,
605            "{} {} {}",
606            self.addr
607                .as_ref()
608                .map(ToString::to_string)
609                .unwrap_or_default(),
610            self.opcode,
611            self.op
612        )
613    }
614}
615
616enum MemEnum {
617    Linear(Vec<Mem>),
618    One(Mem),
619}
620
621pub struct Mem {
622    pub addr: Addr,
623    pub data: usize,
624}
625
626impl From<(Addr, usize)> for Mem {
627    fn from((addr, data): (Addr, usize)) -> Self {
628        Self { addr, data }
629    }
630}
631
632pub struct MemIr {
633    pub addr: usize,
634    pub data: usize,
635}
636
637impl TryFrom<Mem> for MemIr {
638    type Error = ();
639
640    fn try_from(Mem { addr, data }: Mem) -> Result<Self, Self::Error> {
641        if let Addr::Bare(addr) = addr {
642            Ok(Self { addr, data })
643        } else {
644            Err(())
645        }
646    }
647}