cambridge_asm/parse/
parser.rs

1// Copyright (c) 2021 Saadi Save
2// This Source Code Form is subject to the terms of the Mozilla Public
3// License, v. 2.0. If a copy of the MPL was not distributed with this
4// file, You can obtain one at http://mozilla.org/MPL/2.0/.
5
6use crate::{
7    exec::{self, DebugInfo},
8    inst::{self, InstSet, Op},
9    parse::lexer::{
10        ErrorKind, ErrorMap, LinearMemory, ParseError, Span, Token, TokensWithError, WithSpan,
11    },
12};
13use logos::Logos;
14use std::{
15    collections::BTreeMap,
16    fmt::{Debug, Display},
17    marker::PhantomData,
18    ops::Range,
19    str::FromStr,
20};
21
22macro_rules! store_err {
23    ($store:expr, $span:expr, $err:expr) => {
24        $store.entry($span).or_insert($err)
25    };
26}
27
28type Line = Vec<WithSpan<Token>>;
29
30#[derive(Clone)]
31pub struct Parser<'a, I> {
32    #[allow(dead_code)]
33    pub src: &'a str,
34    lines: Vec<Line>,
35    err: ErrorMap,
36    debug_info: DebugInfo,
37    _inst_set: PhantomData<I>,
38}
39
40impl<'a, I> Parser<'a, I>
41where
42    I: InstSet,
43    <I as FromStr>::Err: Display,
44{
45    pub fn new(src: &'a str) -> Self {
46        let (lines, err) = TokensWithError(Token::lexer(src)).lines();
47        Self {
48            src,
49            lines,
50            err,
51            debug_info: DebugInfo::default(),
52            _inst_set: PhantomData,
53        }
54    }
55
56    fn get_inst(line: &[WithSpan<Token>]) -> Result<Option<WithSpan<Inst<I>>>, ParseError> {
57        let span = {
58            let ((s, _), (e, _)) = (line.first().unwrap(), line.last().unwrap());
59            s.start..e.end
60        };
61
62        let rawline = line.iter().map(|(_, t)| t).cloned().collect::<Vec<_>>();
63
64        let (addr, (opcodeidx, opcode), (restidx, rest)) = match rawline.as_slice() {
65            [Token::BareNumber(addr), Token::Text(opcode), rest @ ..] => {
66                (Some(Addr::Bare(*addr)), (1, opcode), (2, rest))
67            }
68            [Token::Text(label), Token::Colon, Token::Text(opcode), rest @ ..] => {
69                (Some(Addr::Label(label.clone())), (2, opcode), (3, rest))
70            }
71            [Token::Text(opcode), rest @ ..] => (None, (0, opcode), (1, rest)),
72            [] => return Ok(None),
73            _ => {
74                return Err((span, ErrorKind::SyntaxError));
75            }
76        };
77
78        let opcode = match I::from_str(opcode) {
79            Ok(s) => s,
80            Err(e) => {
81                let span = line[opcodeidx].0.clone();
82
83                return Err((span, ErrorKind::InvalidOpcode(e.to_string())));
84            }
85        };
86
87        if let Some((idx, _)) = rest.iter().enumerate().find(|(_, t)| {
88            !matches!(
89                t,
90                Token::Gpr(_)
91                    | Token::BareNumber(_)
92                    | Token::Text(_)
93                    | Token::Comma
94                    | Token::Literal(_)
95                    | Token::Indirect(_)
96            )
97        }) {
98            let span = line[restidx + idx].0.clone();
99
100            return Err((span, ErrorKind::InvalidOperand));
101        }
102
103        let mut ops = rest
104            .iter()
105            .filter(|t| !matches!(t, Token::Comma))
106            .cloned()
107            .map(Op::from)
108            .collect::<Vec<_>>();
109
110        let op = match ops.len() {
111            0 => Op::Null,
112            1 => ops.pop().unwrap(),
113            _ => Op::MultiOp(ops),
114        };
115
116        debug!(
117            "{:>4}\t{:>4}\t{:<4}",
118            addr.as_ref().map(ToString::to_string).unwrap_or_default(),
119            opcode,
120            op,
121        );
122
123        Ok(Some((span, Inst { addr, opcode, op })))
124    }
125
126    fn get_mem(line: &[WithSpan<Token>]) -> Result<Option<MemEnum>, ParseError> {
127        enum DataEnum {
128            LinearMemory(LinearMemory),
129            Normal(usize),
130        }
131
132        let rawline = line.iter().map(|(_, t)| t).cloned().collect::<Vec<_>>();
133
134        let (&Range { start, .. }, &Range { end, .. }) = if rawline.is_empty() {
135            return Ok(None);
136        } else {
137            (&line.first().unwrap().0, &line.last().unwrap().0)
138        };
139
140        let get_data = |t: &[Token], start_idx: usize| -> Result<DataEnum, ParseError> {
141            match t {
142                &[Token::BareNumber(n)] => Ok(DataEnum::Normal(n)),
143                &[Token::LinearMemory(mem)] => Ok(DataEnum::LinearMemory(mem)),
144                [] => Ok(DataEnum::Normal(0)),
145                _ => Err((line[start_idx].0.start..end, ErrorKind::SyntaxError)),
146            }
147        };
148
149        match rawline.as_slice() {
150            &[Token::BareNumber(addr), ref rest @ ..] => {
151                let res = match get_data(rest, 1)? {
152                    DataEnum::LinearMemory(mem) => Some(MemEnum::Linear(
153                        (addr..addr + mem.len)
154                            .map(Addr::Bare)
155                            .map(move |addr| (addr, mem.init))
156                            .map(Mem::from)
157                            .collect(),
158                    )),
159                    DataEnum::Normal(data) => Some(MemEnum::One(Mem {
160                        addr: Addr::Bare(addr),
161                        data,
162                    })),
163                };
164
165                Ok(res)
166            }
167            [Token::Text(label), Token::Colon, rest @ ..] => Ok(Some(MemEnum::One(Mem {
168                addr: Addr::Label(label.clone()),
169                data: match get_data(rest, 2)? {
170                    DataEnum::LinearMemory(_) => Err((start..end, ErrorKind::SyntaxError))?,
171                    DataEnum::Normal(data) => data,
172                },
173            }))),
174            [] => Ok(None),
175            _ => Err((start..end, ErrorKind::SyntaxError)),
176        }
177    }
178
179    fn get_insts_and_mems(&mut self) -> (Vec<Span>, Vec<Inst<I>>, Vec<Mem>) {
180        let mut blocks = self
181            .lines
182            .split(Vec::is_empty)
183            .filter(|v| !v.is_empty())
184            .collect::<Vec<_>>();
185
186        assert!(blocks.len() >= 2, "Unable to parse. Your source may not contain blank line(s) between the program and the memory, or the memory might be absent");
187
188        let mems = blocks
189            .pop()
190            .unwrap()
191            .iter()
192            .map(|line| Self::get_mem(line))
193            .filter_map(|res| match res {
194                Ok(mem @ Some(_)) => mem,
195                Ok(None) => None,
196                Err((span, err)) => {
197                    store_err!(self.err, span, err);
198                    None
199                }
200            })
201            .fold(Vec::new(), |mut acc, mem| {
202                match mem {
203                    MemEnum::Linear(mems) => acc.extend(mems),
204                    MemEnum::One(mem) => acc.push(mem),
205                };
206
207                acc
208            });
209
210        let (inst_spans, insts): (Vec<_>, Vec<_>) = blocks
211            .concat()
212            .iter()
213            .map(|line| Self::get_inst(line))
214            .filter_map(|res| match res {
215                Ok(inst @ Some(_)) => inst,
216                Ok(None) => None,
217                Err((span, err)) => {
218                    store_err!(self.err, span, err);
219                    None
220                }
221            })
222            .unzip();
223
224        (inst_spans, insts, mems)
225    }
226
227    fn process_insts(&mut self, insts: Vec<Inst<I>>) -> Vec<InstIr<I>> {
228        fn op_addr_eq(op: &Op, addr: &Addr) -> bool {
229            match (op, addr) {
230                (Op::Addr(x), Addr::Bare(bare)) => x == bare,
231                (Op::Fail(x), Addr::Label(label)) => x == label,
232                (Op::Indirect(op), addr) => op_addr_eq(op.as_ref(), addr),
233                _ => false,
234            }
235        }
236
237        self.debug_info
238            .prog
239            .extend(
240                insts
241                    .iter()
242                    .enumerate()
243                    .filter_map(|(idx, Inst { addr, .. })| {
244                        addr.as_ref().map(|a| (idx, a.as_dbg_string()))
245                    }),
246            );
247
248        let mut links = Vec::new();
249
250        for (i, Inst { addr, .. }) in insts.iter().enumerate() {
251            for (j, Inst { op, .. }) in insts.iter().enumerate() {
252                if let Some(addr) = addr {
253                    match op {
254                        Op::MultiOp(vec) => {
255                            for (idx, op) in vec.iter().enumerate() {
256                                if op_addr_eq(op, addr) {
257                                    links.push((i, j, Some(idx)));
258                                }
259                            }
260                        }
261                        _ => {
262                            if op_addr_eq(op, addr) {
263                                links.push((i, j, None));
264                            }
265                        }
266                    }
267                }
268            }
269        }
270
271        let mut ir = insts.into_iter().enumerate().collect::<Vec<_>>();
272
273        for (to, from, multiop_idx) in links {
274            match &ir[from].1.op {
275                Op::MultiOp(ops) => {
276                    let mut ops = ops.clone();
277                    // unwrap ok because mutiop_idx will always exist if operand is multiop
278                    ops[multiop_idx.unwrap()] = Op::Addr(to);
279                    ir[from].1.op = Op::MultiOp(ops);
280                }
281                Op::Addr(_) | Op::Fail(_) => ir[from].1.op = Op::Addr(to),
282                Op::Indirect(_) => {
283                    if let Op::Indirect(op) = &mut ir[from].1.op {
284                        if matches!(op.as_ref(), Op::Addr(_) | Op::Fail(_)) {
285                            *op.as_mut() = Op::Addr(to);
286                        }
287                    }
288                }
289                _ => {}
290            };
291        }
292
293        ir.into_iter()
294            .map(|(idx, Inst { opcode, op, .. })| InstIr::new(idx, opcode, op))
295            .collect()
296    }
297
298    fn process_mems(&mut self, mems: Vec<Mem>, prog: &mut [InstIr<I>]) -> Vec<MemIr> {
299        fn op_label_eq(op: &Op, label: &str) -> bool {
300            match op {
301                Op::Fail(x) => x == label,
302                Op::Indirect(op) => op_label_eq(op.as_ref(), label),
303                _ => false,
304            }
305        }
306
307        let mut label_mems = Vec::new();
308        let mut raw_mems = Vec::new();
309
310        for Mem { addr, data } in mems {
311            match addr {
312                Addr::Bare(bare) => raw_mems.push((bare, data)),
313                Addr::Label(label) => label_mems.push((label, data)),
314            }
315        }
316
317        let mut links = vec![];
318
319        for (i, (addr, _)) in label_mems.iter().enumerate() {
320            for (
321                j,
322                InstIr {
323                    inst: inst::Inst { op, .. },
324                    ..
325                },
326            ) in prog.iter().enumerate()
327            {
328                match op {
329                    Op::MultiOp(vec) => {
330                        for (idx, op) in vec.iter().enumerate() {
331                            if op_label_eq(op, addr) {
332                                links.push((i, j, Some(idx)));
333                            }
334                        }
335                    }
336                    _ => {
337                        if op_label_eq(op, addr) {
338                            links.push((i, j, None));
339                        }
340                    }
341                }
342            }
343        }
344
345        let unused_addrs: Vec<_> = {
346            let mut used_addr = raw_mems.iter().map(|x| x.0).collect::<Vec<_>>();
347
348            used_addr.sort_unstable();
349
350            let (first, last) = if used_addr.is_empty() {
351                (0, 0)
352            } else {
353                // unwrap ok because vector is guaranteed to not be empty
354                (
355                    used_addr.first().copied().unwrap(),
356                    used_addr.last().copied().unwrap(),
357                )
358            };
359
360            (0..first).chain(last + 1..).take(links.len()).collect()
361        };
362
363        assert!(
364            unused_addrs.len() >= links.len(),
365            "One of the memory addresses is too big"
366        );
367
368        let mut newlinks = BTreeMap::new();
369
370        // linking
371        for ((memaddr, progaddr, multiop_idx), uid) in links.into_iter().zip(unused_addrs) {
372            let (addr, data) = &label_mems[memaddr];
373
374            let uid = newlinks.entry(addr).or_insert((uid, *data)).0;
375
376            self.debug_info
377                .mem
378                .entry(uid)
379                .or_insert_with(|| addr.clone());
380
381            let cir = &mut prog[progaddr];
382
383            match cir.inst.op {
384                Op::MultiOp(ref mut ops) if multiop_idx.is_some() => {
385                    ops[multiop_idx.unwrap()] = Op::Addr(uid);
386                }
387                Op::Fail(_) => cir.inst.op = Op::Addr(uid),
388                Op::Indirect(_) => {
389                    if let Op::Indirect(op) = &mut cir.inst.op {
390                        if matches!(op.as_ref(), Op::Fail(_)) {
391                            *op.as_mut() = Op::Addr(uid);
392                        }
393                    }
394                }
395                _ => {}
396            }
397        }
398
399        newlinks
400            .values()
401            .copied()
402            .chain(raw_mems)
403            .map(|(addr, data)| MemIr { addr, data })
404            .collect()
405    }
406
407    #[allow(clippy::type_complexity)]
408    pub fn parse(mut self) -> Result<(Vec<InstIr<I>>, Vec<MemIr>, DebugInfo), ErrorMap> {
409        let (inst_spans, insts, mems) = self.get_insts_and_mems();
410
411        self.debug_info.inst_spans = inst_spans;
412
413        let mut inst_ir = self.process_insts(insts);
414
415        let mem_ir = self.process_mems(mems, &mut inst_ir);
416
417        if self.err.is_empty() {
418            Ok((inst_ir, mem_ir, self.debug_info))
419        } else {
420            Err(self.err)
421        }
422    }
423}
424
425#[derive(Debug, Clone)]
426pub enum Addr {
427    Bare(usize),
428    Label(String),
429}
430
431impl Addr {
432    fn as_dbg_string(&self) -> String {
433        match self {
434            Addr::Label(label) => label.clone(),
435            Addr::Bare(bare) => bare.to_string(),
436        }
437    }
438}
439
440impl Display for Addr {
441    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
442        match self {
443            Self::Bare(addr) => write!(f, "{addr}"),
444            Self::Label(label) => write!(f, "{label}:"),
445        }
446    }
447}
448
449pub struct InstIr<I>
450where
451    I: InstSet,
452    <I as FromStr>::Err: Display,
453{
454    pub addr: usize,
455    pub inst: inst::Inst<I>,
456}
457
458impl<I> InstIr<I>
459where
460    I: InstSet,
461    <I as FromStr>::Err: Display,
462{
463    pub fn new(addr: usize, opcode: I, op: Op) -> Self {
464        Self {
465            addr,
466            inst: inst::Inst::new(opcode, op),
467        }
468    }
469}
470
471impl<I> From<InstIr<I>> for (usize, exec::ExecInst)
472where
473    I: InstSet,
474    <I as FromStr>::Err: Display,
475{
476    fn from(InstIr { addr, inst }: InstIr<I>) -> Self {
477        (addr, inst.to_exec_inst())
478    }
479}
480
481pub struct Inst<I> {
482    pub addr: Option<Addr>,
483    pub opcode: I,
484    pub op: Op,
485}
486
487impl<I> Debug for Inst<I>
488where
489    I: Display,
490{
491    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
492        f.debug_struct("Inst")
493            .field("addr", &self.addr)
494            .field("opcode", &self.opcode.to_string())
495            .field("op", &self.op)
496            .finish()
497    }
498}
499
500impl<I> Display for Inst<I>
501where
502    I: Display,
503{
504    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
505        write!(
506            f,
507            "{} {} {}",
508            self.addr
509                .as_ref()
510                .map(ToString::to_string)
511                .unwrap_or_default(),
512            self.opcode,
513            self.op
514        )
515    }
516}
517
518enum MemEnum {
519    Linear(Vec<Mem>),
520    One(Mem),
521}
522
523pub struct Mem {
524    pub addr: Addr,
525    pub data: usize,
526}
527
528impl From<(Addr, usize)> for Mem {
529    fn from((addr, data): (Addr, usize)) -> Self {
530        Self { addr, data }
531    }
532}
533
534pub struct MemIr {
535    pub addr: usize,
536    pub data: usize,
537}