Skip to main content

mago_syntax_core/
parser.rs

1//! Shared primitives for parser token streams.
2//!
3//! Every syntax crate in the workspace runs the same lookahead pattern:
4//! pull tokens from a lexer into a small ring buffer, serve up `peek(n)`
5//! / `consume` / `pop_front` against it, and never shrink back to zero.
6//! This module owns the buffer so there is one tuned implementation for
7//! all four grammars.
8
9use std::fmt::Debug;
10use std::mem::MaybeUninit;
11
12/// Fixed-capacity ring buffer for parser lookahead.
13///
14/// Slots are [`MaybeUninit`] because the token type is expected to be
15/// [`Copy`] and the buffer tracks occupancy through `head` and `len`.
16/// Only `head..head + len` indices (modulo `CAP`) are considered
17/// initialised at any moment.
18///
19/// `CAP` **must be a power of two**; the indexing arithmetic uses a
20/// bitmask (`& (CAP - 1)`) instead of a modulo for speed. A debug-build
21/// assertion rejects non-power-of-two capacities during construction.
22pub struct LookaheadBuf<T: Copy, const CAP: usize> {
23    slots: [MaybeUninit<T>; CAP],
24    head: usize,
25    len: usize,
26}
27
28impl<T: Copy, const CAP: usize> Debug for LookaheadBuf<T, CAP> {
29    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30        f.debug_struct("LookaheadBuf")
31            .field("cap", &CAP)
32            .field("head", &self.head)
33            .field("len", &self.len)
34            .field("slots", &"<opaque>")
35            .finish()
36    }
37}
38
39impl<T: Copy, const CAP: usize> Default for LookaheadBuf<T, CAP> {
40    fn default() -> Self {
41        Self::new()
42    }
43}
44
45impl<T: Copy, const CAP: usize> LookaheadBuf<T, CAP> {
46    /// Create an empty buffer. `CAP` must be a power of two; violating
47    /// this in debug builds trips a panic on first use.
48    ///
49    /// # Panics
50    ///
51    /// In debug builds, panics if `CAP` is not a power of two.
52    #[inline(always)]
53    #[must_use]
54    pub fn new() -> Self {
55        debug_assert!(CAP.is_power_of_two(), "LookaheadBuf CAP must be a power of two");
56        Self { slots: [const { MaybeUninit::uninit() }; CAP], head: 0, len: 0 }
57    }
58
59    /// Number of tokens currently buffered.
60    #[inline(always)]
61    pub fn len(&self) -> usize {
62        self.len
63    }
64
65    /// Whether the buffer is empty.
66    #[inline(always)]
67    pub fn is_empty(&self) -> bool {
68        self.len == 0
69    }
70
71    /// Whether the buffer is full. Callers that rely on the ring never
72    /// overflowing (no expansion is possible) should check this before
73    /// pushing.
74    #[inline(always)]
75    pub fn is_full(&self) -> bool {
76        self.len == CAP
77    }
78
79    /// Capacity, identical to the `CAP` const generic.
80    #[inline(always)]
81    pub const fn capacity(&self) -> usize {
82        CAP
83    }
84
85    /// Push a token at the back.
86    ///
87    /// Hard-panics on overflow: the ring has no heap fallback and
88    /// overwriting the oldest slot would silently corrupt the buffer.
89    /// Callers must size `CAP` large enough for the deepest lookahead
90    /// the parser ever performs.
91    ///
92    /// # Panics
93    ///
94    /// Panics if `CAP` tokens have been pushed without any being consumed.
95    #[inline(always)]
96    pub fn push_back(&mut self, value: T) {
97        assert!(self.len < CAP, "LookaheadBuf overflow: pushed {CAP} tokens without consuming");
98        let idx = (self.head + self.len) & (CAP - 1);
99        self.slots[idx] = MaybeUninit::new(value);
100        self.len += 1;
101    }
102
103    /// Pop the front token, or `None` if the buffer is empty.
104    #[inline(always)]
105    pub fn pop_front(&mut self) -> Option<T> {
106        if self.len == 0 {
107            return None;
108        }
109        // SAFETY: `head` is within `len` occupied slots by construction;
110        // those slots are always written before being read.
111        let value = unsafe { self.slots[self.head].assume_init() };
112        self.head = (self.head + 1) & (CAP - 1);
113        self.len -= 1;
114        Some(value)
115    }
116
117    /// Copy the `n`th-ahead token without consuming it (0 = next).
118    #[inline(always)]
119    pub fn get(&self, n: usize) -> Option<T> {
120        if n >= self.len {
121            return None;
122        }
123        let idx = (self.head + n) & (CAP - 1);
124        // SAFETY: `n < self.len` and all slots in [head, head+len) are
125        // initialised by construction.
126        Some(unsafe { self.slots[idx].assume_init() })
127    }
128
129    /// Discard every buffered token. Leaves the ring ready for reuse.
130    #[inline(always)]
131    pub fn clear(&mut self) {
132        self.head = 0;
133        self.len = 0;
134    }
135}
136
137#[cfg(test)]
138mod tests {
139    use super::LookaheadBuf;
140
141    #[test]
142    fn roundtrip_smoke() {
143        let mut buf: LookaheadBuf<u32, 8> = LookaheadBuf::new();
144        assert!(buf.is_empty());
145        for i in 0..6 {
146            buf.push_back(i);
147        }
148        assert_eq!(buf.len(), 6);
149        assert_eq!(buf.get(0), Some(0));
150        assert_eq!(buf.get(5), Some(5));
151        assert_eq!(buf.get(6), None);
152        for i in 0..6 {
153            assert_eq!(buf.pop_front(), Some(i));
154        }
155        assert!(buf.is_empty());
156    }
157
158    #[test]
159    fn wraps_around() {
160        let mut buf: LookaheadBuf<u32, 4> = LookaheadBuf::new();
161        for i in 0..4 {
162            buf.push_back(i);
163        }
164        assert!(buf.is_full());
165        for _ in 0..3 {
166            buf.pop_front();
167        }
168        assert_eq!(buf.len(), 1);
169        buf.push_back(99);
170        buf.push_back(100);
171        assert_eq!(buf.get(0), Some(3));
172        assert_eq!(buf.get(1), Some(99));
173        assert_eq!(buf.get(2), Some(100));
174    }
175}