rgbds_obj/
rpn.rs

1use std::cmp::Ordering;
2use std::convert::TryInto;
3use std::error::Error;
4use std::fmt::{self, Display, Formatter};
5use std::iter;
6
7/// A [Reverse Polish Notation] expression.
8///
9/// # Printing
10///
11/// An expression can be printed in one of two ways.
12/// By default, the expression is printed using [Reverse Polish Notation] still; however, if the
13/// [`#` "alternate printing" flag][fmt#sign0] is passed, the expression will be pretty-printed using infix
14/// notation instead, with a minimal amount of parentheses.
15///
16/// [Reverse Polish Notation]: https://en.wikipedia.org/wiki/Reverse_Polish_notation
17#[derive(Debug)]
18pub struct RpnExpr(Vec<u8>);
19
20/// A section memory type.
21#[derive(Debug, PartialEq)]
22pub enum SectType {
23    Wram0,
24    Vram,
25    Romx,
26    Rom0,
27    Hram,
28    Wramx,
29    Sram,
30    Oam,
31}
32impl SectType {
33    /// The section type's name.
34    pub fn name(&self) -> &'static str {
35        use SectType::*;
36
37        match self {
38            Wram0 => "WRAM0",
39            Vram => "VRAM",
40            Romx => "ROMX",
41            Rom0 => "ROM0",
42            Hram => "HRAM",
43            Wramx => "WRAMX",
44            Sram => "SRAM",
45            Oam => "OAM",
46        }
47    }
48}
49
50impl RpnExpr {
51    /// Constructs a RPN expression from its byte serialization.
52    /// This does not check the expression's correctness.
53    pub fn from_bytes(bytes: Vec<u8>) -> Self {
54        Self(bytes)
55    }
56
57    /// Retrieves the expression's serialized bytes.
58    pub fn bytes(&self) -> &[u8] {
59        &self.0
60    }
61
62    /// Yields an iterator over the expression's "operations".
63    /// "Operations" may not mean what you think; refer to [`RpnOp`] for more information.
64    pub fn iter(&self) -> Iter {
65        Iter::new(self)
66    }
67}
68
69/// An iterator over a [RPN expression][RpnExpr]'s operations (this includes literals).
70///
71/// Since a RPN expression does not validate the serialized data it's given when constructed,
72/// the iteration may fail at any point.
73pub struct Iter<'a>(&'a RpnExpr, usize); // Expression, pointer
74impl<'a> Iter<'a> {
75    fn new(expr: &'a RpnExpr) -> Self {
76        Self(expr, 0)
77    }
78
79    fn read_u8(&mut self) -> Option<u8> {
80        let val = self.0.bytes().get(self.1).copied();
81        self.1 += 1;
82        val
83    }
84
85    fn read_u32(&mut self) -> Option<u32> {
86        let val = self
87            .0
88            .bytes()
89            .get(self.1..self.1 + 4)
90            .map(|bytes| u32::from_le_bytes(bytes.try_into().unwrap()));
91        self.1 += 4;
92        val
93    }
94
95    fn read_string(&mut self) -> Option<&'a [u8]> {
96        let start = self.1;
97        loop {
98            let c = *self.0.bytes().get(self.1)?;
99            self.1 += 1;
100
101            if c == 0 {
102                return Some(&self.0.bytes()[start..self.1]);
103            }
104        }
105    }
106
107    fn read_sect_type(&mut self) -> Option<SectType> {
108        use SectType::*;
109
110        let val = match self.0.bytes().get(self.1) {
111            Some(0) => Some(Wram0),
112            Some(1) => Some(Vram),
113            Some(2) => Some(Romx),
114            Some(3) => Some(Rom0),
115            Some(4) => Some(Hram),
116            Some(5) => Some(Wramx),
117            Some(6) => Some(Sram),
118            Some(7) => Some(Oam),
119            _ => None,
120        };
121        self.1 += 1;
122        val
123    }
124}
125impl<'a> Iterator for Iter<'a> {
126    type Item = Result<RpnOp<'a>, RpnIterError>;
127
128    fn next(&mut self) -> Option<Self::Item> {
129        if self.1 == self.0.bytes().len() {
130            return None;
131        }
132        let err = Err(RpnIterError::new(self.1)); // What to return in case of error
133
134        let operator = self.0.bytes()[self.1];
135        self.1 += 1;
136        Some(match operator {
137            0x00 => Ok(RpnOp::Add),
138            0x01 => Ok(RpnOp::Sub),
139            0x02 => Ok(RpnOp::Mul),
140            0x03 => Ok(RpnOp::Div),
141            0x04 => Ok(RpnOp::Mod),
142            0x05 => Ok(RpnOp::Neg),
143            0x06 => Ok(RpnOp::Pow),
144            0x10 => Ok(RpnOp::BinOr),
145            0x11 => Ok(RpnOp::BinAnd),
146            0x12 => Ok(RpnOp::Xor),
147            0x13 => Ok(RpnOp::Cpl),
148            0x21 => Ok(RpnOp::And),
149            0x22 => Ok(RpnOp::Or),
150            0x23 => Ok(RpnOp::Not),
151            0x30 => Ok(RpnOp::Eq),
152            0x31 => Ok(RpnOp::Neq),
153            0x32 => Ok(RpnOp::Gt),
154            0x33 => Ok(RpnOp::Lt),
155            0x34 => Ok(RpnOp::Gte),
156            0x35 => Ok(RpnOp::Lte),
157            0x40 => Ok(RpnOp::Lsh),
158            0x41 => Ok(RpnOp::Rsh),
159            0x42 => Ok(RpnOp::Ursh),
160            0x50 => self.read_u32().map_or(err, |id| Ok(RpnOp::BankSym(id))),
161            0x51 => self
162                .read_string()
163                .map_or(err, |string| Ok(RpnOp::BankSect(string))),
164            0x52 => Ok(RpnOp::BankSelf),
165            0x53 => self
166                .read_string()
167                .map_or(err, |string| Ok(RpnOp::SizeofSect(string))),
168            0x54 => self
169                .read_string()
170                .map_or(err, |string| Ok(RpnOp::StartofSect(string))),
171            0x55 => self
172                .read_sect_type()
173                .map_or(err, |sect_type| Ok(RpnOp::SizeofSectType(sect_type))),
174            0x56 => self
175                .read_sect_type()
176                .map_or(err, |sect_type| Ok(RpnOp::StartofSectType(sect_type))),
177            0x60 => Ok(RpnOp::HramCheck),
178            0x61 => Ok(RpnOp::RstCheck),
179            0x62 => self.read_u8().map_or(err, |mask| Ok(RpnOp::BitIndex(mask))),
180            0x70 => Ok(RpnOp::High),
181            0x71 => Ok(RpnOp::Low),
182            0x72 => Ok(RpnOp::BitWidth),
183            0x73 => Ok(RpnOp::TzCount),
184            0x80 => self.read_u32().map_or(err, |id| Ok(RpnOp::Int(id))),
185            0x81 => self.read_u32().map_or(err, |id| Ok(RpnOp::Sym(id))),
186            _ => err,
187        })
188    }
189
190    fn size_hint(&self) -> (usize, Option<usize>) {
191        if self.1 == self.0.bytes().len() {
192            return (0, Some(0));
193        }
194
195        // At any point, there may be a single element left (if it takes a string as argument);
196        // however, each element consumes at least one byte, so we have an upper bound.
197        (1, Some(self.0.bytes().len() - self.1))
198    }
199}
200
201/// An error produced while iterating on a [RPN expression][RpnExpr].
202/// This can be an early EOF, an operator trying to popping an item off of an empty RPN stack, etc.
203#[derive(Debug)]
204pub struct RpnIterError(usize);
205impl RpnIterError {
206    fn new(ofs: usize) -> Self {
207        Self(ofs)
208    }
209
210    /// The offset within the expression at which the error was encountered.
211    pub fn offset(&self) -> usize {
212        self.0
213    }
214}
215impl Display for RpnIterError {
216    fn fmt(&self, fmt: &mut Formatter) -> Result<(), fmt::Error> {
217        write!(fmt, "Error parsing RPN expression at offset {}", self.0)
218    }
219}
220impl Error for RpnIterError {}
221
222// RPN expression printing
223
224impl RpnExpr {
225    fn fmt_rpn(&self, fmt: &mut Formatter) -> Result<(), fmt::Error> {
226        for (op, prefix) in self.iter().zip(iter::successors(Some(""), |_| Some(" "))) {
227            match op {
228                Err(err) => write!(fmt, "{prefix}<RPN error@${:04x}>", err.offset())?,
229                Ok(op) => write!(fmt, "{prefix}{op}")?,
230            }
231        }
232        Ok(())
233    }
234
235    fn fmt_infix(&self, fmt: &mut Formatter) -> Result<(), fmt::Error> {
236        let mut nodes = Vec::new();
237        // Node ID stack
238        let mut stack = Vec::new();
239        // Pops a node ID off the stack, erroring out of the whole function if it's empty
240        // Additionally, gives the popped node its parent's ID
241        macro_rules! pop {
242            ($parent:expr) => {
243                if let Some(tree) = stack.pop() {
244                    tree
245                } else {
246                    return write!(fmt, "<bad RPN expr, emptied stack>");
247                }
248            };
249        }
250
251        // First, build the expression tree from the RPN
252        for op in self.iter() {
253            match op {
254                Err(err) => return write!(fmt, "<RPN error@${:04x}>", err.offset()),
255                Ok(op) => {
256                    let next_id = nodes.len();
257
258                    let children = match op.arity() {
259                        Arity::Literal => RpnTreeNodeType::Literal,
260                        Arity::Unary => {
261                            let operand = pop!(next_id);
262                            RpnTreeNodeType::Unary(operand)
263                        }
264                        Arity::Binary => {
265                            let rhs = pop!(next_id);
266                            let lhs = pop!(next_id);
267                            RpnTreeNodeType::Binary { lhs, rhs }
268                        }
269                    };
270                    nodes.push(RpnTreeNode::new(op, children));
271                    stack.push(next_id);
272                }
273            }
274        }
275
276        if stack.len() != 1 {
277            return write!(fmt, "<bad RPN expr, finished with {} elems>", stack.len());
278        }
279        // Now, traverse the expression tree
280        write_node(&nodes, stack[0], fmt)
281    }
282}
283impl Display for RpnExpr {
284    fn fmt(&self, fmt: &mut Formatter) -> Result<(), fmt::Error> {
285        if fmt.alternate() {
286            self.fmt_infix(fmt)
287        } else {
288            self.fmt_rpn(fmt)
289        }
290    }
291}
292
293/// A RPN operation; since this means "operation on the RPN stack" here, this includes literals,
294/// not just operators.
295// We don't have `Eq` because symbol IDs are file-relative
296#[derive(Debug, PartialEq)]
297pub enum RpnOp<'a> {
298    /// `+` operator.
299    Add,
300    /// `-` operator.
301    Sub,
302    /// `*` operator.
303    Mul,
304    /// `/` operator.
305    Div,
306    /// `%` operator.
307    Mod,
308    /// Unary `-` operator.
309    Neg,
310    /// `**` operator.
311    Pow,
312    /// `|` operator.
313    BinOr,
314    /// `&` operator.
315    BinAnd,
316    /// `^` operator.
317    Xor,
318    /// `~` operator.
319    Cpl,
320    /// `&&` operator.
321    And,
322    /// `||` operator.
323    Or,
324    /// `!` operator.
325    Not,
326    /// `==` operator.
327    Eq,
328    /// `!=` operator.
329    Neq,
330    /// `>` operator.
331    Gt,
332    /// `<` operator.
333    Lt,
334    /// `>=` operator.
335    Gte,
336    /// `<=` operator.
337    Lte,
338    /// `<<` operator.
339    Lsh,
340    /// `>>` operator.
341    Rsh,
342    /// `>>>` operator.
343    Ursh,
344    /// `BANK(Symbol)`
345    BankSym(u32),
346    /// `BANK("section")`
347    BankSect(&'a [u8]),
348    /// `BANK(@)`
349    BankSelf,
350    /// `SIZEOF("section")`
351    SizeofSect(&'a [u8]),
352    /// `STARTOF("section")`
353    StartofSect(&'a [u8]),
354    /// `SIZEOF(SectionType)`
355    SizeofSectType(SectType),
356    /// `STARTOF(SectionType)`
357    StartofSectType(SectType),
358    /// HRAM check (check if the value is in HRAM range, then `& 0xFF`).
359    HramCheck,
360    /// `rst` check (check if the value is a `rst` target, then `| 0xC7`).
361    RstCheck,
362    /// `bit/res/set` bit index (check if the value is in 0-7 range, then `<< 3` and `| u8`
363    BitIndex(u8),
364    /// `HIGH(value)`
365    High,
366    /// `LOW(value)`
367    Low,
368    /// `BITWIDTH(value)`
369    BitWidth,
370    /// `TZCOUNT(value)`
371    TzCount,
372    /// 32-bit literal.
373    Int(u32),
374    /// Symbol (referenced by 32-bit ID).
375    Sym(u32),
376}
377/// A RPN operation's [arity].
378///
379/// [arity]: https://en.wikipedia.org/wiki/Arity
380pub enum Arity {
381    Literal,
382    Unary,
383    Binary,
384}
385impl RpnOp<'_> {
386    /// The operation's arity.
387    pub fn arity(&self) -> Arity {
388        use Arity::*;
389        use RpnOp::*;
390
391        match self {
392            Add => Binary,
393            Sub => Binary,
394            Mul => Binary,
395            Div => Binary,
396            Mod => Binary,
397            Neg => Unary,
398            Pow => Binary,
399            BinOr => Binary,
400            BinAnd => Binary,
401            Xor => Binary,
402            Cpl => Unary,
403            And => Binary,
404            Or => Binary,
405            Not => Unary,
406            Eq => Binary,
407            Neq => Binary,
408            Gt => Binary,
409            Lt => Binary,
410            Gte => Binary,
411            Lte => Binary,
412            Lsh => Binary,
413            Rsh => Binary,
414            Ursh => Binary,
415            BankSym(..) => Literal,
416            BankSect(..) => Literal,
417            BankSelf => Literal,
418            SizeofSect(..) => Literal,
419            StartofSect(..) => Literal,
420            SizeofSectType(..) => Literal,
421            StartofSectType(..) => Literal,
422            HramCheck => Unary,
423            RstCheck => Unary,
424            BitIndex(..) => Unary,
425            High => Unary,
426            Low => Unary,
427            BitWidth => Unary,
428            TzCount => Unary,
429            Int(..) => Literal,
430            Sym(..) => Literal,
431        }
432    }
433
434    /// The operation's precedence.
435    ///
436    /// # Panics
437    ///
438    /// This function panics if the operation is not a binary operator.
439    pub fn precedence(&self) -> u8 {
440        use RpnOp::*;
441
442        // "Operators" in rgbasm(5)
443        match self {
444            Pow => 6,
445            Mul | Div | Mod => 5,
446            Lsh | Rsh | Ursh => 4,
447            BinAnd | BinOr | Xor => 3,
448            Add | Sub => 2,
449            Eq | Neq | Gt | Lt | Gte | Lte => 1,
450            And | Or => 0,
451
452            // There is no precedence for non-binary operators...
453            Neg | Cpl | Not | BankSym(..) | BankSect(..) | BankSelf | SizeofSect(..)
454            | StartofSect(..) | SizeofSectType(..) | StartofSectType(..) | HramCheck | RstCheck
455            | BitIndex(..) | High | Low | BitWidth | TzCount | Int(..) | Sym(..) => {
456                panic!(
457                    "Non-binary operators (such as {:?}) have no precedence",
458                    self,
459                )
460            }
461        }
462    }
463
464    /// Whether this operation is associative; that is, if `A op (B op C) == (A op B) op C`.
465    ///
466    /// # Panics
467    ///
468    /// This function panics if the operation is not a binary operator.
469    pub fn is_associative(&self) -> bool {
470        use RpnOp::*;
471
472        match self {
473            Add | Mul | BinOr | BinAnd | Xor | And | Or => true,
474            Sub | Div | Mod | Pow | Eq | Neq | Gt | Lt | Gte | Lte | Lsh | Rsh | Ursh => false,
475
476            // There is no associativity for non-binary operators...
477            Neg | Cpl | Not | BankSym(..) | BankSect(..) | BankSelf | SizeofSect(..)
478            | StartofSect(..) | SizeofSectType(..) | StartofSectType(..) | HramCheck | RstCheck
479            | BitIndex(..) | High | Low | BitWidth | TzCount | Int(..) | Sym(..) => {
480                panic!(
481                    "Non-binary operators (such as {:?}) have no associativity",
482                    self,
483                )
484            }
485        }
486    }
487
488    /// Computes whether parens are needed (for pretty-printing) around a child expression, with
489    /// the given parent.
490    pub fn needs_parens(&self, parent: &RpnOp<'_>, is_left: bool) -> bool {
491        use Arity::*;
492
493        match (parent.arity(), self.arity()) {
494            // Literals have no precedence, so no unnecessary parens please.
495            (Literal, _) | (_, Literal) => false,
496            // Unary operations always have priority over binary...
497            // ...so parens are necessary for a unary enclosing a binary or unary...
498            (Unary, Binary | Unary) => true,
499            // ...and not for the reverse.
500            (Binary, Unary) => false,
501            (Binary, Binary) => {
502                // For binary, we need to compare the precedences
503                use Ordering::*;
504
505                match parent.precedence().cmp(&self.precedence()) {
506                    Less => false,   // The child has priority
507                    Greater => true, // The parent has priority, so override that by using parens
508                    Equal => {
509                        // Parens are only required for the right branch if the parent operation is
510                        // *not* associative with the child one (which implies they're the same)
511                        !is_left && (self != parent || !self.is_associative())
512                    }
513                }
514            }
515        }
516    }
517}
518
519impl Display for RpnOp<'_> {
520    fn fmt(&self, fmt: &mut Formatter) -> Result<(), fmt::Error> {
521        use RpnOp::*;
522
523        match self {
524            Add => write!(fmt, "+"),
525            Sub => write!(fmt, "-"),
526            Mul => write!(fmt, "*"),
527            Div => write!(fmt, "/"),
528            Mod => write!(fmt, "%"),
529            Neg => write!(fmt, "-()"),
530            Pow => write!(fmt, "**"),
531            BinOr => write!(fmt, "|"),
532            BinAnd => write!(fmt, "&"),
533            Xor => write!(fmt, "^"),
534            Cpl => write!(fmt, "~"),
535            And => write!(fmt, "&&"),
536            Or => write!(fmt, "||"),
537            Not => write!(fmt, "!"),
538            Eq => write!(fmt, "=="),
539            Neq => write!(fmt, "!="),
540            Gt => write!(fmt, ">"),
541            Lt => write!(fmt, "<"),
542            Gte => write!(fmt, ">="),
543            Lte => write!(fmt, "<="),
544            Lsh => write!(fmt, "<<"),
545            Rsh => write!(fmt, ">>"),
546            Ursh => write!(fmt, ">>>"),
547            BankSym(id) => {
548                if *id == u32::MAX {
549                    write!(fmt, "BANK(@)")
550                } else {
551                    write!(fmt, "BANK(Sym#{id})")
552                }
553            }
554            BankSect(name) => write!(fmt, "BANK(\"{}\")", String::from_utf8_lossy(name)),
555            BankSelf => write!(fmt, "BANK(@)"),
556            SizeofSect(name) => write!(fmt, "SIZEOF(\"{}\")", String::from_utf8_lossy(name)),
557            StartofSect(name) => write!(fmt, "STARTOF(\"{}\")", String::from_utf8_lossy(name)),
558            SizeofSectType(sect_type) => write!(fmt, "SIZEOF({})", sect_type.name()),
559            StartofSectType(sect_type) => write!(fmt, "STARTOF({})", sect_type.name()),
560            HramCheck => write!(fmt, "HRAM?"),
561            RstCheck => write!(fmt, "RST?"),
562            BitIndex(mask) => {
563                const OP_NAMES: [&str; 4] = ["INVALID", "BIT", "RES", "SET"];
564                const REG_NAMES: [&str; 8] = ["B", "C", "D", "E", "H", "L", "[HL]", "A"];
565                let op_id = (mask >> 6) as usize;
566                let reg_id = (mask & 0b111) as usize;
567                write!(fmt, "{}<{}>", OP_NAMES[op_id], REG_NAMES[reg_id])
568            }
569            High => write!(fmt, "HIGH"),
570            Low => write!(fmt, "LOW"),
571            BitWidth => write!(fmt, "BITWIDTH"),
572            TzCount => write!(fmt, "TZCOUNT"),
573            Int(val) => write!(fmt, "${val:04x}"),
574            Sym(id) => {
575                if *id == u32::MAX {
576                    write!(fmt, "@")
577                } else {
578                    write!(fmt, "Sym#{id}")
579                }
580            }
581        }
582    }
583}
584
585#[derive(Debug)]
586struct RpnTreeNode<'op> {
587    op: RpnOp<'op>,
588    children: RpnTreeNodeType,
589}
590impl<'op> RpnTreeNode<'op> {
591    pub fn new(op: RpnOp<'op>, children: RpnTreeNodeType) -> Self {
592        Self { op, children }
593    }
594}
595#[derive(Debug)]
596enum RpnTreeNodeType {
597    Literal,
598    Unary(usize),
599    Binary { lhs: usize, rhs: usize },
600}
601
602fn write_node(nodes: &[RpnTreeNode], id: usize, fmt: &mut Formatter) -> Result<(), fmt::Error> {
603    use RpnOp::*;
604    use RpnTreeNodeType::*;
605    let node = &nodes[id];
606
607    let write_child_node = |id, fmt: &mut Formatter, is_left: bool| {
608        let child_node: &RpnTreeNode = &nodes[id];
609        let needs_parens = child_node.op.needs_parens(&node.op, is_left);
610        if needs_parens {
611            write!(fmt, "(")?;
612        }
613        write_node(nodes, id, fmt)?;
614        if needs_parens {
615            write!(fmt, ")")?;
616        }
617        Ok(())
618    };
619
620    match node.children {
621        // Literals are printed just the same
622        Literal => write!(fmt, "{}", node.op),
623        // Gulp, these can be a bit funky
624        Unary(operand) => match node.op {
625            // These are printed like functions
626            HramCheck | RstCheck | High | Low | BitWidth | TzCount => {
627                write!(fmt, "{}(", node.op)?;
628                write_child_node(operand, fmt, true)?;
629                write!(fmt, ")")
630            }
631            // This is printed like an instruction
632            BitIndex(..) => {
633                write!(fmt, "{} ", node.op)?;
634                write_child_node(operand, fmt, true)
635            }
636            // The rest is simply affixed to the node
637            _ => {
638                if matches!(node.op, Neg) {
639                    write!(fmt, "-")?;
640                } else {
641                    write!(fmt, "{}", node.op)?;
642                }
643                write_child_node(operand, fmt, true)
644            }
645        },
646        // Fairly uniform
647        Binary { lhs, rhs } => {
648            write_child_node(lhs, fmt, true)?;
649            write!(fmt, " {} ", node.op)?;
650            write_child_node(rhs, fmt, false)
651        }
652    }
653}