Skip to main content

perl_token/
token.rs

1use std::{ops::Range, sync::Arc};
2
3use crate::TokenKind;
4
5/// Byte span carried by a [`Token`].
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub struct TokenSpan {
8    /// Starting byte position.
9    pub start: usize,
10    /// Ending byte position.
11    pub end: usize,
12}
13
14impl TokenSpan {
15    /// Create a span from raw byte positions.
16    pub const fn new(start: usize, end: usize) -> Self {
17        Self { start, end }
18    }
19
20    /// Create a span, returning an error when `end < start`.
21    pub fn try_new(start: usize, end: usize) -> Result<Self, TokenSpanError> {
22        if end < start {
23            return Err(TokenSpanError::EndBeforeStart { start, end });
24        }
25
26        Ok(Self { start, end })
27    }
28
29    /// Span length in bytes.
30    pub const fn len(self) -> usize {
31        self.end.saturating_sub(self.start)
32    }
33
34    /// Whether the span length is zero bytes.
35    pub const fn is_empty(self) -> bool {
36        self.len() == 0
37    }
38
39    /// Convert this span to a standard `Range`.
40    pub const fn range(self) -> Range<usize> {
41        self.start..self.end
42    }
43
44    /// Return whether `offset` is inside this half-open span.
45    ///
46    /// The start is inclusive and the end is exclusive, matching Rust
47    /// [`Range`] semantics. Empty spans contain no offsets.
48    pub const fn contains(self, offset: usize) -> bool {
49        self.start <= offset && offset < self.end
50    }
51
52    /// Return whether `offset` touches this span, including the end boundary.
53    ///
54    /// This is useful for cursor-oriented callers that need positions at token
55    /// boundaries to resolve to the adjacent token. Empty spans touch exactly
56    /// their single boundary offset.
57    pub const fn touches(self, offset: usize) -> bool {
58        self.start <= offset && offset <= self.end
59    }
60
61    /// Return whether this span overlaps `other`.
62    ///
63    /// Spans are treated as half-open byte ranges, so adjacent spans such as
64    /// `0..2` and `2..4` do not overlap. Empty spans never overlap.
65    pub const fn overlaps(self, other: Self) -> bool {
66        !self.is_empty() && !other.is_empty() && self.start < other.end && other.start < self.end
67    }
68
69    /// Return the smallest span covering both spans.
70    pub const fn cover(self, other: Self) -> Self {
71        Self { start: min_usize(self.start, other.start), end: max_usize(self.end, other.end) }
72    }
73}
74
75const fn min_usize(left: usize, right: usize) -> usize {
76    if left <= right { left } else { right }
77}
78
79const fn max_usize(left: usize, right: usize) -> usize {
80    if left >= right { left } else { right }
81}
82
83/// Error type for checked token/span constructors.
84#[derive(Debug, Clone, PartialEq, Eq)]
85pub enum TokenSpanError {
86    /// End offset is before start offset.
87    EndBeforeStart {
88        /// Start byte offset that was supplied.
89        start: usize,
90        /// End byte offset that violated `end >= start`.
91        end: usize,
92    },
93    /// Empty span is only valid for EOF or explicit synthetic tokens.
94    EmptySpanNotAllowed {
95        /// Token kind that disallows an empty span.
96        kind: TokenKind,
97        /// Byte offset where the empty span was constructed.
98        at: usize,
99    },
100}
101
102impl std::fmt::Display for TokenSpanError {
103    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104        match self {
105            Self::EndBeforeStart { start, end } => {
106                write!(f, "token span invariant violated: end ({end}) < start ({start})")
107            }
108            Self::EmptySpanNotAllowed { kind, at } => {
109                write!(f, "empty span not allowed for token kind {kind:?} at byte {at}")
110            }
111        }
112    }
113}
114
115impl std::error::Error for TokenSpanError {}
116
117#[inline]
118const fn allows_empty_span(kind: TokenKind) -> bool {
119    matches!(kind, TokenKind::Eof | TokenKind::Unknown)
120}
121
122#[inline]
123fn validate_non_empty_span(
124    kind: TokenKind,
125    start: usize,
126    is_empty: bool,
127) -> Result<(), TokenSpanError> {
128    if is_empty && !allows_empty_span(kind) {
129        return Err(TokenSpanError::EmptySpanNotAllowed { kind, at: start });
130    }
131
132    Ok(())
133}
134
135/// Borrowed view over token data for allocation-sensitive paths.
136///
137/// Unlike [`Token`], this type borrows source text and does not allocate.
138/// Convert to [`Token`] explicitly with [`TokenRef::to_owned_token`] or `From`.
139#[derive(Debug, Clone, Copy, PartialEq, Eq)]
140pub struct TokenRef<'src> {
141    /// Token classification for parser decision making
142    pub kind: TokenKind,
143    /// Borrowed source text slice
144    pub text: &'src str,
145    /// Starting byte position for error reporting and location tracking
146    pub start: usize,
147    /// Ending byte position for span calculation and navigation
148    pub end: usize,
149}
150
151impl<'src> TokenRef<'src> {
152    /// Create a borrowed token view with the given kind, source text, and byte span.
153    pub fn new(kind: TokenKind, text: &'src str, start: usize, end: usize) -> Self {
154        Self { kind, text, start, end }
155    }
156
157    /// Create a borrowed token view with checked span ordering.
158    ///
159    /// Unlike [`TokenRef::new`], this rejects spans where `end < start`.
160    pub fn try_new(
161        kind: TokenKind,
162        text: &'src str,
163        start: usize,
164        end: usize,
165    ) -> Result<Self, TokenSpanError> {
166        let span = TokenSpan::try_new(start, end)?;
167        Ok(Self { kind, text, start: span.start, end: span.end })
168    }
169
170    /// Create a borrowed token view while enforcing span invariants.
171    ///
172    /// Rules:
173    /// - `start <= end`
174    /// - zero-length spans are accepted for EOF and explicit synthetic unknown tokens
175    pub fn new_checked(
176        kind: TokenKind,
177        text: &'src str,
178        start: usize,
179        end: usize,
180    ) -> Result<Self, TokenSpanError> {
181        let token = Self::try_new(kind, text, start, end)?;
182        validate_non_empty_span(token.kind, token.start, token.is_empty())?;
183
184        Ok(token)
185    }
186
187    /// Return the token span length in bytes.
188    pub fn len(self) -> usize {
189        TokenSpan::new(self.start, self.end).len()
190    }
191
192    /// Return whether the token span is empty.
193    pub fn is_empty(self) -> bool {
194        self.len() == 0
195    }
196
197    /// Return the token span as `(start, end)`.
198    pub fn span(self) -> (usize, usize) {
199        (self.start, self.end)
200    }
201
202    /// Return a human-readable display name for this token.
203    pub fn display_name(self) -> &'static str {
204        self.kind.display_name()
205    }
206
207    /// Convert this borrowed token view into an owned [`Token`].
208    pub fn to_owned_token(self) -> Token {
209        Token::new(self.kind, self.text, self.start, self.end)
210    }
211}
212
213/// Token produced by the lexer and consumed by the parser.
214///
215/// Stores the token kind, original source text, and byte span. The text is kept
216/// in an `Arc<str>` so buffering and lookahead can clone tokens cheaply.
217#[derive(Debug, Clone, PartialEq)]
218pub struct Token {
219    /// Token classification for parser decision making
220    pub kind: TokenKind,
221    /// Original source text for precise reconstruction
222    pub text: Arc<str>,
223    /// Starting byte position for error reporting and location tracking
224    pub start: usize,
225    /// Ending byte position for span calculation and navigation
226    pub end: usize,
227}
228
229impl Token {
230    /// Create a new token with the given kind, source text, and byte span.
231    ///
232    /// # Examples
233    ///
234    /// ```rust
235    /// use perl_token::{Token, TokenKind};
236    ///
237    /// let tok = Token::new(TokenKind::Sub, "sub", 0, 3);
238    /// assert_eq!(tok.kind, TokenKind::Sub);
239    /// assert_eq!(&*tok.text, "sub");
240    /// ```
241    pub fn new(kind: TokenKind, text: impl Into<Arc<str>>, start: usize, end: usize) -> Self {
242        Token { kind, text: text.into(), start, end }
243    }
244
245    /// Create a token with checked span ordering.
246    ///
247    /// Unlike [`Token::new`], this rejects spans where `end < start`.
248    pub fn try_new(
249        kind: TokenKind,
250        text: impl Into<Arc<str>>,
251        start: usize,
252        end: usize,
253    ) -> Result<Self, TokenSpanError> {
254        let span = TokenSpan::try_new(start, end)?;
255        Ok(Self { kind, text: text.into(), start: span.start, end: span.end })
256    }
257
258    /// Create a token while enforcing span invariants.
259    ///
260    /// Rules:
261    /// - `start <= end`
262    /// - zero-length spans are accepted for EOF and explicit synthetic unknown tokens
263    pub fn new_checked(
264        kind: TokenKind,
265        text: impl Into<Arc<str>>,
266        start: usize,
267        end: usize,
268    ) -> Result<Self, TokenSpanError> {
269        let token = Self::try_new(kind, text, start, end)?;
270        validate_non_empty_span(token.kind, token.start, token.is_empty())?;
271
272        Ok(token)
273    }
274
275    /// Create an EOF token at `pos`.
276    pub fn eof_at(pos: usize) -> Self {
277        Self::new(TokenKind::Eof, "", pos, pos)
278    }
279
280    /// Create an unknown (synthetic) token at `start..end`.
281    pub fn unknown_at(text: impl Into<Arc<str>>, start: usize, end: usize) -> Self {
282        let bounded_end = end.max(start);
283        Self::new(TokenKind::Unknown, text, start, bounded_end)
284    }
285
286    /// Return this token's byte span.
287    pub fn span(&self) -> TokenSpan {
288        TokenSpan::new(self.start, self.end)
289    }
290
291    /// Return this token's byte span as `Range<usize>`.
292    pub fn range(&self) -> Range<usize> {
293        self.span().range()
294    }
295
296    /// Clone this token with a new checked span.
297    pub fn with_span(&self, start: usize, end: usize) -> Result<Self, TokenSpanError> {
298        Self::new_checked(self.kind, self.text.clone(), start, end)
299    }
300
301    /// Clone this token with a new token kind.
302    pub fn with_kind(&self, kind: TokenKind) -> Self {
303        Self::new(kind, self.text.clone(), self.start, self.end)
304    }
305
306    /// Return the token span length in bytes.
307    ///
308    /// This uses saturating subtraction so malformed spans (where `end < start`)
309    /// are treated as zero-length instead of underflowing.
310    ///
311    /// # Examples
312    ///
313    /// ```rust
314    /// use perl_token::{Token, TokenKind};
315    ///
316    /// let tok = Token::new(TokenKind::Identifier, "foo", 10, 13);
317    /// assert_eq!(tok.len(), 3);
318    /// ```
319    pub fn len(&self) -> usize {
320        self.span().len()
321    }
322
323    /// Return whether the token span is empty.
324    ///
325    /// # Examples
326    ///
327    /// ```rust
328    /// use perl_token::{Token, TokenKind};
329    ///
330    /// let tok = Token::new(TokenKind::Eof, "", 8, 8);
331    /// assert!(tok.is_empty());
332    /// ```
333    pub fn is_empty(&self) -> bool {
334        self.len() == 0
335    }
336
337    /// Return a human-readable display name for this token.
338    pub fn display_name(&self) -> &'static str {
339        self.kind.display_name()
340    }
341
342    /// Return a borrowed token view over this token.
343    pub fn as_ref_token(&self) -> TokenRef<'_> {
344        TokenRef { kind: self.kind, text: self.text.as_ref(), start: self.start, end: self.end }
345    }
346}
347
348impl From<TokenRef<'_>> for Token {
349    fn from(value: TokenRef<'_>) -> Self {
350        value.to_owned_token()
351    }
352}