1use crate::{
7 exec::{self, DebugInfo},
8 inst::{self, InstSet, Op},
9 parse::lexer::{
10 ErrorKind, ErrorMap, LinearMemory, ParseError, Span, Token, TokensWithError, WithSpan,
11 },
12};
13use logos::Logos;
14use std::{
15 collections::BTreeMap,
16 fmt::{Debug, Display},
17 marker::PhantomData,
18 ops::Range,
19 str::FromStr,
20};
21
22macro_rules! store_err {
23 ($store:expr, $span:expr, $err:expr) => {
24 $store.entry($span).or_insert($err)
25 };
26}
27
28type Line = Vec<WithSpan<Token>>;
29
30#[derive(Clone)]
31pub struct Parser<'a, I> {
32 #[allow(dead_code)]
33 pub src: &'a str,
34 lines: Vec<Line>,
35 err: ErrorMap,
36 debug_info: DebugInfo,
37 _inst_set: PhantomData<I>,
38}
39
40impl<'a, I> Parser<'a, I>
41where
42 I: InstSet,
43 <I as FromStr>::Err: Display,
44{
45 pub fn new(src: &'a str) -> Self {
46 let (lines, err) = TokensWithError(Token::lexer(src)).lines();
47 Self {
48 src,
49 lines,
50 err,
51 debug_info: DebugInfo::default(),
52 _inst_set: PhantomData,
53 }
54 }
55
56 fn get_inst(line: &[WithSpan<Token>]) -> Result<Option<WithSpan<Inst<I>>>, ParseError> {
57 let span = {
58 let ((s, _), (e, _)) = (line.first().unwrap(), line.last().unwrap());
59 s.start..e.end
60 };
61
62 let rawline = line.iter().map(|(_, t)| t).cloned().collect::<Vec<_>>();
63
64 let (addr, (opcodeidx, opcode), (restidx, rest)) = match rawline.as_slice() {
65 [Token::BareNumber(addr), Token::Text(opcode), rest @ ..] => {
66 (Some(Addr::Bare(*addr)), (1, opcode), (2, rest))
67 }
68 [Token::Text(label), Token::Colon, Token::Text(opcode), rest @ ..] => {
69 (Some(Addr::Label(label.clone())), (2, opcode), (3, rest))
70 }
71 [Token::Text(opcode), rest @ ..] => (None, (0, opcode), (1, rest)),
72 [] => return Ok(None),
73 _ => {
74 return Err((span, ErrorKind::SyntaxError));
75 }
76 };
77
78 let opcode = match I::from_str(opcode) {
79 Ok(s) => s,
80 Err(e) => {
81 let span = line[opcodeidx].0.clone();
82
83 return Err((span, ErrorKind::InvalidOpcode(e.to_string())));
84 }
85 };
86
87 if let Some((idx, _)) = rest.iter().enumerate().find(|(_, t)| {
88 !matches!(
89 t,
90 Token::Gpr(_)
91 | Token::BareNumber(_)
92 | Token::Text(_)
93 | Token::Comma
94 | Token::Literal(_)
95 | Token::Indirect(_)
96 )
97 }) {
98 let span = line[restidx + idx].0.clone();
99
100 return Err((span, ErrorKind::InvalidOperand));
101 }
102
103 let mut ops = rest
104 .iter()
105 .filter(|t| !matches!(t, Token::Comma))
106 .cloned()
107 .map(Op::from)
108 .collect::<Vec<_>>();
109
110 let op = match ops.len() {
111 0 => Op::Null,
112 1 => ops.pop().unwrap(),
113 _ => Op::MultiOp(ops),
114 };
115
116 debug!(
117 "{:>4}\t{:>4}\t{:<4}",
118 addr.as_ref().map(ToString::to_string).unwrap_or_default(),
119 opcode,
120 op,
121 );
122
123 Ok(Some((span, Inst { addr, opcode, op })))
124 }
125
126 fn get_mem(line: &[WithSpan<Token>]) -> Result<Option<MemEnum>, ParseError> {
127 enum DataEnum {
128 LinearMemory(LinearMemory),
129 Normal(usize),
130 }
131
132 let rawline = line.iter().map(|(_, t)| t).cloned().collect::<Vec<_>>();
133
134 let (&Range { start, .. }, &Range { end, .. }) = if rawline.is_empty() {
135 return Ok(None);
136 } else {
137 (&line.first().unwrap().0, &line.last().unwrap().0)
138 };
139
140 let get_data = |t: &[Token], start_idx: usize| -> Result<DataEnum, ParseError> {
141 match t {
142 &[Token::BareNumber(n)] => Ok(DataEnum::Normal(n)),
143 &[Token::LinearMemory(mem)] => Ok(DataEnum::LinearMemory(mem)),
144 [] => Ok(DataEnum::Normal(0)),
145 _ => Err((line[start_idx].0.start..end, ErrorKind::SyntaxError)),
146 }
147 };
148
149 match rawline.as_slice() {
150 &[Token::BareNumber(addr), ref rest @ ..] => {
151 let res = match get_data(rest, 1)? {
152 DataEnum::LinearMemory(mem) => Some(MemEnum::Linear(
153 (addr..addr + mem.len)
154 .map(Addr::Bare)
155 .map(move |addr| (addr, mem.init))
156 .map(Mem::from)
157 .collect(),
158 )),
159 DataEnum::Normal(data) => Some(MemEnum::One(Mem {
160 addr: Addr::Bare(addr),
161 data,
162 })),
163 };
164
165 Ok(res)
166 }
167 [Token::Text(label), Token::Colon, rest @ ..] => Ok(Some(MemEnum::One(Mem {
168 addr: Addr::Label(label.clone()),
169 data: match get_data(rest, 2)? {
170 DataEnum::LinearMemory(_) => Err((start..end, ErrorKind::SyntaxError))?,
171 DataEnum::Normal(data) => data,
172 },
173 }))),
174 [] => Ok(None),
175 _ => Err((start..end, ErrorKind::SyntaxError)),
176 }
177 }
178
179 fn get_insts_and_mems(&mut self) -> (Vec<Span>, Vec<Inst<I>>, Vec<Mem>) {
180 let mut blocks = self
181 .lines
182 .split(Vec::is_empty)
183 .filter(|v| !v.is_empty())
184 .collect::<Vec<_>>();
185
186 assert!(blocks.len() >= 2, "Unable to parse. Your source may not contain blank line(s) between the program and the memory, or the memory might be absent");
187
188 let mems = blocks
189 .pop()
190 .unwrap()
191 .iter()
192 .map(|line| Self::get_mem(line))
193 .filter_map(|res| match res {
194 Ok(mem @ Some(_)) => mem,
195 Ok(None) => None,
196 Err((span, err)) => {
197 store_err!(self.err, span, err);
198 None
199 }
200 })
201 .fold(Vec::new(), |mut acc, mem| {
202 match mem {
203 MemEnum::Linear(mems) => acc.extend(mems),
204 MemEnum::One(mem) => acc.push(mem),
205 };
206
207 acc
208 });
209
210 let (inst_spans, insts): (Vec<_>, Vec<_>) = blocks
211 .concat()
212 .iter()
213 .map(|line| Self::get_inst(line))
214 .filter_map(|res| match res {
215 Ok(inst @ Some(_)) => inst,
216 Ok(None) => None,
217 Err((span, err)) => {
218 store_err!(self.err, span, err);
219 None
220 }
221 })
222 .unzip();
223
224 (inst_spans, insts, mems)
225 }
226
227 fn process_insts(&mut self, insts: Vec<Inst<I>>) -> Vec<InstIr<I>> {
228 fn op_addr_eq(op: &Op, addr: &Addr) -> bool {
229 match (op, addr) {
230 (Op::Addr(x), Addr::Bare(bare)) => x == bare,
231 (Op::Fail(x), Addr::Label(label)) => x == label,
232 (Op::Indirect(op), addr) => op_addr_eq(op.as_ref(), addr),
233 _ => false,
234 }
235 }
236
237 self.debug_info
238 .prog
239 .extend(
240 insts
241 .iter()
242 .enumerate()
243 .filter_map(|(idx, Inst { addr, .. })| {
244 addr.as_ref().map(|a| (idx, a.as_dbg_string()))
245 }),
246 );
247
248 let mut links = Vec::new();
249
250 for (i, Inst { addr, .. }) in insts.iter().enumerate() {
251 for (j, Inst { op, .. }) in insts.iter().enumerate() {
252 if let Some(addr) = addr {
253 match op {
254 Op::MultiOp(vec) => {
255 for (idx, op) in vec.iter().enumerate() {
256 if op_addr_eq(op, addr) {
257 links.push((i, j, Some(idx)));
258 }
259 }
260 }
261 _ => {
262 if op_addr_eq(op, addr) {
263 links.push((i, j, None));
264 }
265 }
266 }
267 }
268 }
269 }
270
271 let mut ir = insts.into_iter().enumerate().collect::<Vec<_>>();
272
273 for (to, from, multiop_idx) in links {
274 match &ir[from].1.op {
275 Op::MultiOp(ops) => {
276 let mut ops = ops.clone();
277 ops[multiop_idx.unwrap()] = Op::Addr(to);
279 ir[from].1.op = Op::MultiOp(ops);
280 }
281 Op::Addr(_) | Op::Fail(_) => ir[from].1.op = Op::Addr(to),
282 Op::Indirect(_) => {
283 if let Op::Indirect(op) = &mut ir[from].1.op {
284 if matches!(op.as_ref(), Op::Addr(_) | Op::Fail(_)) {
285 *op.as_mut() = Op::Addr(to);
286 }
287 }
288 }
289 _ => {}
290 };
291 }
292
293 ir.into_iter()
294 .map(|(idx, Inst { opcode, op, .. })| InstIr::new(idx, opcode, op))
295 .collect()
296 }
297
298 fn process_mems(&mut self, mems: Vec<Mem>, prog: &mut [InstIr<I>]) -> Vec<MemIr> {
299 fn op_label_eq(op: &Op, label: &str) -> bool {
300 match op {
301 Op::Fail(x) => x == label,
302 Op::Indirect(op) => op_label_eq(op.as_ref(), label),
303 _ => false,
304 }
305 }
306
307 let mut label_mems = Vec::new();
308 let mut raw_mems = Vec::new();
309
310 for Mem { addr, data } in mems {
311 match addr {
312 Addr::Bare(bare) => raw_mems.push((bare, data)),
313 Addr::Label(label) => label_mems.push((label, data)),
314 }
315 }
316
317 let mut links = vec![];
318
319 for (i, (addr, _)) in label_mems.iter().enumerate() {
320 for (
321 j,
322 InstIr {
323 inst: inst::Inst { op, .. },
324 ..
325 },
326 ) in prog.iter().enumerate()
327 {
328 match op {
329 Op::MultiOp(vec) => {
330 for (idx, op) in vec.iter().enumerate() {
331 if op_label_eq(op, addr) {
332 links.push((i, j, Some(idx)));
333 }
334 }
335 }
336 _ => {
337 if op_label_eq(op, addr) {
338 links.push((i, j, None));
339 }
340 }
341 }
342 }
343 }
344
345 let unused_addrs: Vec<_> = {
346 let mut used_addr = raw_mems.iter().map(|x| x.0).collect::<Vec<_>>();
347
348 used_addr.sort_unstable();
349
350 let (first, last) = if used_addr.is_empty() {
351 (0, 0)
352 } else {
353 (
355 used_addr.first().copied().unwrap(),
356 used_addr.last().copied().unwrap(),
357 )
358 };
359
360 (0..first).chain(last + 1..).take(links.len()).collect()
361 };
362
363 assert!(
364 unused_addrs.len() >= links.len(),
365 "One of the memory addresses is too big"
366 );
367
368 let mut newlinks = BTreeMap::new();
369
370 for ((memaddr, progaddr, multiop_idx), uid) in links.into_iter().zip(unused_addrs) {
372 let (addr, data) = &label_mems[memaddr];
373
374 let uid = newlinks.entry(addr).or_insert((uid, *data)).0;
375
376 self.debug_info
377 .mem
378 .entry(uid)
379 .or_insert_with(|| addr.clone());
380
381 let cir = &mut prog[progaddr];
382
383 match cir.inst.op {
384 Op::MultiOp(ref mut ops) if multiop_idx.is_some() => {
385 ops[multiop_idx.unwrap()] = Op::Addr(uid);
386 }
387 Op::Fail(_) => cir.inst.op = Op::Addr(uid),
388 Op::Indirect(_) => {
389 if let Op::Indirect(op) = &mut cir.inst.op {
390 if matches!(op.as_ref(), Op::Fail(_)) {
391 *op.as_mut() = Op::Addr(uid);
392 }
393 }
394 }
395 _ => {}
396 }
397 }
398
399 newlinks
400 .values()
401 .copied()
402 .chain(raw_mems)
403 .map(|(addr, data)| MemIr { addr, data })
404 .collect()
405 }
406
407 #[allow(clippy::type_complexity)]
408 pub fn parse(mut self) -> Result<(Vec<InstIr<I>>, Vec<MemIr>, DebugInfo), ErrorMap> {
409 let (inst_spans, insts, mems) = self.get_insts_and_mems();
410
411 self.debug_info.inst_spans = inst_spans;
412
413 let mut inst_ir = self.process_insts(insts);
414
415 let mem_ir = self.process_mems(mems, &mut inst_ir);
416
417 if self.err.is_empty() {
418 Ok((inst_ir, mem_ir, self.debug_info))
419 } else {
420 Err(self.err)
421 }
422 }
423}
424
425#[derive(Debug, Clone)]
426pub enum Addr {
427 Bare(usize),
428 Label(String),
429}
430
431impl Addr {
432 fn as_dbg_string(&self) -> String {
433 match self {
434 Addr::Label(label) => label.clone(),
435 Addr::Bare(bare) => bare.to_string(),
436 }
437 }
438}
439
440impl Display for Addr {
441 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
442 match self {
443 Self::Bare(addr) => write!(f, "{addr}"),
444 Self::Label(label) => write!(f, "{label}:"),
445 }
446 }
447}
448
449pub struct InstIr<I>
450where
451 I: InstSet,
452 <I as FromStr>::Err: Display,
453{
454 pub addr: usize,
455 pub inst: inst::Inst<I>,
456}
457
458impl<I> InstIr<I>
459where
460 I: InstSet,
461 <I as FromStr>::Err: Display,
462{
463 pub fn new(addr: usize, opcode: I, op: Op) -> Self {
464 Self {
465 addr,
466 inst: inst::Inst::new(opcode, op),
467 }
468 }
469}
470
471impl<I> From<InstIr<I>> for (usize, exec::ExecInst)
472where
473 I: InstSet,
474 <I as FromStr>::Err: Display,
475{
476 fn from(InstIr { addr, inst }: InstIr<I>) -> Self {
477 (addr, inst.to_exec_inst())
478 }
479}
480
481pub struct Inst<I> {
482 pub addr: Option<Addr>,
483 pub opcode: I,
484 pub op: Op,
485}
486
487impl<I> Debug for Inst<I>
488where
489 I: Display,
490{
491 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
492 f.debug_struct("Inst")
493 .field("addr", &self.addr)
494 .field("opcode", &self.opcode.to_string())
495 .field("op", &self.op)
496 .finish()
497 }
498}
499
500impl<I> Display for Inst<I>
501where
502 I: Display,
503{
504 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
505 write!(
506 f,
507 "{} {} {}",
508 self.addr
509 .as_ref()
510 .map(ToString::to_string)
511 .unwrap_or_default(),
512 self.opcode,
513 self.op
514 )
515 }
516}
517
518enum MemEnum {
519 Linear(Vec<Mem>),
520 One(Mem),
521}
522
523pub struct Mem {
524 pub addr: Addr,
525 pub data: usize,
526}
527
528impl From<(Addr, usize)> for Mem {
529 fn from((addr, data): (Addr, usize)) -> Self {
530 Self { addr, data }
531 }
532}
533
534pub struct MemIr {
535 pub addr: usize,
536 pub data: usize,
537}