Skip to main content

perl_token/
token.rs

1use std::{ops::Range, sync::Arc};
2
3use crate::TokenKind;
4
5/// Byte span carried by a [`Token`].
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub struct TokenSpan {
8    /// Starting byte position.
9    pub start: usize,
10    /// Ending byte position.
11    pub end: usize,
12}
13
14impl TokenSpan {
15    /// Create a span from raw byte positions.
16    pub const fn new(start: usize, end: usize) -> Self {
17        Self { start, end }
18    }
19
20    /// Create a span, returning an error when `end < start`.
21    pub fn try_new(start: usize, end: usize) -> Result<Self, TokenSpanError> {
22        if end < start {
23            return Err(TokenSpanError::EndBeforeStart { start, end });
24        }
25
26        Ok(Self { start, end })
27    }
28
29    /// Span length in bytes.
30    pub const fn len(self) -> usize {
31        self.end.saturating_sub(self.start)
32    }
33
34    /// Whether the span length is zero bytes.
35    pub const fn is_empty(self) -> bool {
36        self.len() == 0
37    }
38
39    /// Convert this span to a standard `Range`.
40    pub const fn range(self) -> Range<usize> {
41        self.start..self.end
42    }
43
44    /// Return whether `offset` is inside this half-open span.
45    ///
46    /// The start is inclusive and the end is exclusive, matching Rust
47    /// [`Range`] semantics. Empty spans contain no offsets.
48    pub const fn contains(self, offset: usize) -> bool {
49        self.start <= offset && offset < self.end
50    }
51
52    /// Return whether `offset` touches this span, including the end boundary.
53    ///
54    /// This is useful for cursor-oriented callers that need positions at token
55    /// boundaries to resolve to the adjacent token. Empty spans touch exactly
56    /// their single boundary offset.
57    pub const fn touches(self, offset: usize) -> bool {
58        self.start <= offset && offset <= self.end
59    }
60
61    /// Return whether this span overlaps `other`.
62    ///
63    /// Spans are treated as half-open byte ranges, so adjacent spans such as
64    /// `0..2` and `2..4` do not overlap. Empty spans never overlap.
65    pub const fn overlaps(self, other: Self) -> bool {
66        !self.is_empty() && !other.is_empty() && self.start < other.end && other.start < self.end
67    }
68
69    /// Return the smallest span covering both spans.
70    pub const fn cover(self, other: Self) -> Self {
71        Self { start: min_usize(self.start, other.start), end: max_usize(self.end, other.end) }
72    }
73}
74
75const fn min_usize(left: usize, right: usize) -> usize {
76    if left <= right { left } else { right }
77}
78
79const fn max_usize(left: usize, right: usize) -> usize {
80    if left >= right { left } else { right }
81}
82
83/// Error type for checked token/span constructors.
84#[derive(Debug, Clone, PartialEq, Eq)]
85pub enum TokenSpanError {
86    /// End offset is before start offset.
87    EndBeforeStart { start: usize, end: usize },
88    /// Empty span is only valid for EOF or explicit synthetic tokens.
89    EmptySpanNotAllowed { kind: TokenKind, at: usize },
90}
91
92impl std::fmt::Display for TokenSpanError {
93    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
94        match self {
95            Self::EndBeforeStart { start, end } => {
96                write!(f, "token span invariant violated: end ({end}) < start ({start})")
97            }
98            Self::EmptySpanNotAllowed { kind, at } => {
99                write!(f, "empty span not allowed for token kind {kind:?} at byte {at}")
100            }
101        }
102    }
103}
104
105impl std::error::Error for TokenSpanError {}
106
107#[inline]
108const fn allows_empty_span(kind: TokenKind) -> bool {
109    matches!(kind, TokenKind::Eof | TokenKind::Unknown)
110}
111
112#[inline]
113fn validate_non_empty_span(
114    kind: TokenKind,
115    start: usize,
116    is_empty: bool,
117) -> Result<(), TokenSpanError> {
118    if is_empty && !allows_empty_span(kind) {
119        return Err(TokenSpanError::EmptySpanNotAllowed { kind, at: start });
120    }
121
122    Ok(())
123}
124
125/// Borrowed view over token data for allocation-sensitive paths.
126///
127/// Unlike [`Token`], this type borrows source text and does not allocate.
128/// Convert to [`Token`] explicitly with [`TokenRef::to_owned_token`] or `From`.
129#[derive(Debug, Clone, Copy, PartialEq, Eq)]
130pub struct TokenRef<'src> {
131    /// Token classification for parser decision making
132    pub kind: TokenKind,
133    /// Borrowed source text slice
134    pub text: &'src str,
135    /// Starting byte position for error reporting and location tracking
136    pub start: usize,
137    /// Ending byte position for span calculation and navigation
138    pub end: usize,
139}
140
141impl<'src> TokenRef<'src> {
142    /// Create a borrowed token view with the given kind, source text, and byte span.
143    pub fn new(kind: TokenKind, text: &'src str, start: usize, end: usize) -> Self {
144        Self { kind, text, start, end }
145    }
146
147    /// Create a borrowed token view with checked span ordering.
148    ///
149    /// Unlike [`TokenRef::new`], this rejects spans where `end < start`.
150    pub fn try_new(
151        kind: TokenKind,
152        text: &'src str,
153        start: usize,
154        end: usize,
155    ) -> Result<Self, TokenSpanError> {
156        let span = TokenSpan::try_new(start, end)?;
157        Ok(Self { kind, text, start: span.start, end: span.end })
158    }
159
160    /// Create a borrowed token view while enforcing span invariants.
161    ///
162    /// Rules:
163    /// - `start <= end`
164    /// - zero-length spans are accepted for EOF and explicit synthetic unknown tokens
165    pub fn new_checked(
166        kind: TokenKind,
167        text: &'src str,
168        start: usize,
169        end: usize,
170    ) -> Result<Self, TokenSpanError> {
171        let token = Self::try_new(kind, text, start, end)?;
172        validate_non_empty_span(token.kind, token.start, token.is_empty())?;
173
174        Ok(token)
175    }
176
177    /// Return the token span length in bytes.
178    pub fn len(self) -> usize {
179        TokenSpan::new(self.start, self.end).len()
180    }
181
182    /// Return whether the token span is empty.
183    pub fn is_empty(self) -> bool {
184        self.len() == 0
185    }
186
187    /// Return the token span as `(start, end)`.
188    pub fn span(self) -> (usize, usize) {
189        (self.start, self.end)
190    }
191
192    /// Return a human-readable display name for this token.
193    pub fn display_name(self) -> &'static str {
194        self.kind.display_name()
195    }
196
197    /// Convert this borrowed token view into an owned [`Token`].
198    pub fn to_owned_token(self) -> Token {
199        Token::new(self.kind, self.text, self.start, self.end)
200    }
201}
202
203/// Token produced by the lexer and consumed by the parser.
204///
205/// Stores the token kind, original source text, and byte span. The text is kept
206/// in an `Arc<str>` so buffering and lookahead can clone tokens cheaply.
207#[derive(Debug, Clone, PartialEq)]
208pub struct Token {
209    /// Token classification for parser decision making
210    pub kind: TokenKind,
211    /// Original source text for precise reconstruction
212    pub text: Arc<str>,
213    /// Starting byte position for error reporting and location tracking
214    pub start: usize,
215    /// Ending byte position for span calculation and navigation
216    pub end: usize,
217}
218
219impl Token {
220    /// Create a new token with the given kind, source text, and byte span.
221    ///
222    /// # Examples
223    ///
224    /// ```rust
225    /// use perl_token::{Token, TokenKind};
226    ///
227    /// let tok = Token::new(TokenKind::Sub, "sub", 0, 3);
228    /// assert_eq!(tok.kind, TokenKind::Sub);
229    /// assert_eq!(&*tok.text, "sub");
230    /// ```
231    pub fn new(kind: TokenKind, text: impl Into<Arc<str>>, start: usize, end: usize) -> Self {
232        Token { kind, text: text.into(), start, end }
233    }
234
235    /// Create a token with checked span ordering.
236    ///
237    /// Unlike [`Token::new`], this rejects spans where `end < start`.
238    pub fn try_new(
239        kind: TokenKind,
240        text: impl Into<Arc<str>>,
241        start: usize,
242        end: usize,
243    ) -> Result<Self, TokenSpanError> {
244        let span = TokenSpan::try_new(start, end)?;
245        Ok(Self { kind, text: text.into(), start: span.start, end: span.end })
246    }
247
248    /// Create a token while enforcing span invariants.
249    ///
250    /// Rules:
251    /// - `start <= end`
252    /// - zero-length spans are accepted for EOF and explicit synthetic unknown tokens
253    pub fn new_checked(
254        kind: TokenKind,
255        text: impl Into<Arc<str>>,
256        start: usize,
257        end: usize,
258    ) -> Result<Self, TokenSpanError> {
259        let token = Self::try_new(kind, text, start, end)?;
260        validate_non_empty_span(token.kind, token.start, token.is_empty())?;
261
262        Ok(token)
263    }
264
265    /// Create an EOF token at `pos`.
266    pub fn eof_at(pos: usize) -> Self {
267        Self::new(TokenKind::Eof, "", pos, pos)
268    }
269
270    /// Create an unknown (synthetic) token at `start..end`.
271    pub fn unknown_at(text: impl Into<Arc<str>>, start: usize, end: usize) -> Self {
272        let bounded_end = end.max(start);
273        Self::new(TokenKind::Unknown, text, start, bounded_end)
274    }
275
276    /// Return this token's byte span.
277    pub fn span(&self) -> TokenSpan {
278        TokenSpan::new(self.start, self.end)
279    }
280
281    /// Return this token's byte span as `Range<usize>`.
282    pub fn range(&self) -> Range<usize> {
283        self.span().range()
284    }
285
286    /// Clone this token with a new checked span.
287    pub fn with_span(&self, start: usize, end: usize) -> Result<Self, TokenSpanError> {
288        Self::new_checked(self.kind, self.text.clone(), start, end)
289    }
290
291    /// Clone this token with a new token kind.
292    pub fn with_kind(&self, kind: TokenKind) -> Self {
293        Self::new(kind, self.text.clone(), self.start, self.end)
294    }
295
296    /// Return the token span length in bytes.
297    ///
298    /// This uses saturating subtraction so malformed spans (where `end < start`)
299    /// are treated as zero-length instead of underflowing.
300    ///
301    /// # Examples
302    ///
303    /// ```rust
304    /// use perl_token::{Token, TokenKind};
305    ///
306    /// let tok = Token::new(TokenKind::Identifier, "foo", 10, 13);
307    /// assert_eq!(tok.len(), 3);
308    /// ```
309    pub fn len(&self) -> usize {
310        self.span().len()
311    }
312
313    /// Return whether the token span is empty.
314    ///
315    /// # Examples
316    ///
317    /// ```rust
318    /// use perl_token::{Token, TokenKind};
319    ///
320    /// let tok = Token::new(TokenKind::Eof, "", 8, 8);
321    /// assert!(tok.is_empty());
322    /// ```
323    pub fn is_empty(&self) -> bool {
324        self.len() == 0
325    }
326
327    /// Return a human-readable display name for this token.
328    pub fn display_name(&self) -> &'static str {
329        self.kind.display_name()
330    }
331
332    /// Return a borrowed token view over this token.
333    pub fn as_ref_token(&self) -> TokenRef<'_> {
334        TokenRef { kind: self.kind, text: self.text.as_ref(), start: self.start, end: self.end }
335    }
336}
337
338impl From<TokenRef<'_>> for Token {
339    fn from(value: TokenRef<'_>) -> Self {
340        value.to_owned_token()
341    }
342}