Skip to main content

mago_syntax_core/
parser.rs

1//! Shared primitives for parser token streams.
2//!
3//! Every syntax crate in the workspace runs the same lookahead pattern:
4//! pull tokens from a lexer into a small ring buffer, serve up `peek(n)`
5//! / `consume` / `pop_front` against it, and never shrink back to zero.
6//! This module owns the buffer so there is one tuned implementation for
7//! all four grammars.
8
9use std::fmt::Debug;
10use std::mem::MaybeUninit;
11
12/// Fixed-capacity ring buffer for parser lookahead.
13///
14/// Slots are [`MaybeUninit`] because the token type is expected to be
15/// [`Copy`] and the buffer tracks occupancy through `head` and `len`.
16/// Only `head..head + len` indices (modulo `CAP`) are considered
17/// initialised at any moment.
18///
19/// `CAP` **must be a power of two**; the indexing arithmetic uses a
20/// bitmask (`& (CAP - 1)`) instead of a modulo for speed. A debug-build
21/// assertion rejects non-power-of-two capacities during construction.
22pub struct LookaheadBuf<T: Copy, const CAP: usize> {
23    slots: [MaybeUninit<T>; CAP],
24    head: usize,
25    len: usize,
26}
27
28impl<T: Copy, const CAP: usize> Debug for LookaheadBuf<T, CAP> {
29    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30        f.debug_struct("LookaheadBuf").field("cap", &CAP).field("head", &self.head).field("len", &self.len).finish()
31    }
32}
33
34impl<T: Copy, const CAP: usize> Default for LookaheadBuf<T, CAP> {
35    fn default() -> Self {
36        Self::new()
37    }
38}
39
40impl<T: Copy, const CAP: usize> LookaheadBuf<T, CAP> {
41    /// Create an empty buffer. `CAP` must be a power of two; violating
42    /// this in debug builds trips a panic on first use.
43    #[inline(always)]
44    pub fn new() -> Self {
45        debug_assert!(CAP.is_power_of_two(), "LookaheadBuf CAP must be a power of two");
46        Self { slots: [const { MaybeUninit::uninit() }; CAP], head: 0, len: 0 }
47    }
48
49    /// Number of tokens currently buffered.
50    #[inline(always)]
51    pub fn len(&self) -> usize {
52        self.len
53    }
54
55    /// Whether the buffer is empty.
56    #[inline(always)]
57    pub fn is_empty(&self) -> bool {
58        self.len == 0
59    }
60
61    /// Whether the buffer is full. Callers that rely on the ring never
62    /// overflowing (no expansion is possible) should check this before
63    /// pushing.
64    #[inline(always)]
65    pub fn is_full(&self) -> bool {
66        self.len == CAP
67    }
68
69    /// Capacity, identical to the `CAP` const generic.
70    #[inline(always)]
71    pub const fn capacity(&self) -> usize {
72        CAP
73    }
74
75    /// Push a token at the back.
76    ///
77    /// Hard-panics on overflow: the ring has no heap fallback and
78    /// overwriting the oldest slot would silently corrupt the buffer.
79    /// Callers must size `CAP` large enough for the deepest lookahead
80    /// the parser ever performs.
81    #[inline(always)]
82    pub fn push_back(&mut self, value: T) {
83        assert!(self.len < CAP, "LookaheadBuf overflow: pushed {CAP} tokens without consuming");
84        let idx = (self.head + self.len) & (CAP - 1);
85        self.slots[idx] = MaybeUninit::new(value);
86        self.len += 1;
87    }
88
89    /// Pop the front token, or `None` if the buffer is empty.
90    #[inline(always)]
91    pub fn pop_front(&mut self) -> Option<T> {
92        if self.len == 0 {
93            return None;
94        }
95        // SAFETY: `head` is within `len` occupied slots by construction;
96        // those slots are always written before being read.
97        let value = unsafe { self.slots[self.head].assume_init() };
98        self.head = (self.head + 1) & (CAP - 1);
99        self.len -= 1;
100        Some(value)
101    }
102
103    /// Copy the `n`th-ahead token without consuming it (0 = next).
104    #[inline(always)]
105    pub fn get(&self, n: usize) -> Option<T> {
106        if n >= self.len {
107            return None;
108        }
109        let idx = (self.head + n) & (CAP - 1);
110        // SAFETY: `n < self.len` and all slots in [head, head+len) are
111        // initialised by construction.
112        Some(unsafe { self.slots[idx].assume_init() })
113    }
114
115    /// Discard every buffered token. Leaves the ring ready for reuse.
116    #[inline(always)]
117    pub fn clear(&mut self) {
118        self.head = 0;
119        self.len = 0;
120    }
121}
122
123#[cfg(test)]
124mod tests {
125    use super::LookaheadBuf;
126
127    #[test]
128    fn roundtrip_smoke() {
129        let mut buf: LookaheadBuf<u32, 8> = LookaheadBuf::new();
130        assert!(buf.is_empty());
131        for i in 0..6 {
132            buf.push_back(i);
133        }
134        assert_eq!(buf.len(), 6);
135        assert_eq!(buf.get(0), Some(0));
136        assert_eq!(buf.get(5), Some(5));
137        assert_eq!(buf.get(6), None);
138        for i in 0..6 {
139            assert_eq!(buf.pop_front(), Some(i));
140        }
141        assert!(buf.is_empty());
142    }
143
144    #[test]
145    fn wraps_around() {
146        let mut buf: LookaheadBuf<u32, 4> = LookaheadBuf::new();
147        for i in 0..4 {
148            buf.push_back(i);
149        }
150        assert!(buf.is_full());
151        for _ in 0..3 {
152            buf.pop_front();
153        }
154        assert_eq!(buf.len(), 1);
155        buf.push_back(99);
156        buf.push_back(100);
157        assert_eq!(buf.get(0), Some(3));
158        assert_eq!(buf.get(1), Some(99));
159        assert_eq!(buf.get(2), Some(100));
160    }
161}