tokit/utils.rs
1pub use delimited::*;
2pub use escaped::*;
3pub use expected::*;
4pub use generic_arraydeque::GenericArrayDeque;
5pub use lexeme::*;
6pub use located::*;
7pub use message::Message;
8pub use positioned_char::*;
9pub use sliced::*;
10pub use span::*;
11pub use spanned::*;
12pub use to_equivalent::*;
13
14/// Re-export of generic-arraydeque for direct access.
15pub use generic_arraydeque::{self, typenum};
16
17/// Trackers for preventing infinite recursion in parsers.
18pub mod recursion_tracker;
19/// A token tracker for tracking tokens in a lexer.
20pub mod token_tracker;
21/// A tracker for tracking recursion depth and tokens.
22pub mod tracker;
23
24/// A module for custom comparing traits.
25pub mod cmp;
26/// A module for displaying in a human-friendly way.
27pub mod human_display;
28/// A module for displaying in SDL.
29pub mod sdl_display;
30/// A module for displaying in syntax trees.
31pub mod syntax_tree_display;
32
33/// Common delimiters used in lexing and parsing.
34pub mod delimiter;
35
36/// Common knowledge types for lexing and parsing.
37pub mod knowledge;
38
39/// A module for container types with small size optimizations.
40#[cfg(feature = "smallvec")]
41#[cfg_attr(docsrs, doc(cfg(feature = "smallvec")))]
42pub mod container;
43
44/// Marker types used in various utilities.
45pub mod marker;
46
47mod delimited;
48mod escaped;
49mod expected;
50mod lexeme;
51mod located;
52mod message;
53mod positioned_char;
54mod sliced;
55mod span;
56mod spanned;
57mod to_equivalent;
58
59/// Enables accessing the source span of a parsed element.
60///
61/// This trait provides a way to retrieve the span information associated with
62/// a parsed element without taking ownership of the element itself. This is
63/// useful for scenarios where you need to reference the location of the element
64/// in the source input, such as for error reporting or diagnostics.
65///
66/// ## Usage Patterns
67/// Common scenarios for using this trait:
68/// - **Error reporting**: Attaching span information to error messages
69/// - **Diagnostics**: Highlighting source locations in IDEs or tools
70/// - **Logging**: Recording where certain elements were parsed from
71/// - **Analysis**: Performing source-based analysis or transformations
72///
73/// ## Implementation Notes
74///
75/// Implementing types should ensure that:
76/// - The returned span is accurate and corresponds to the element's location in the source
77/// - The method is efficient and does not involve unnecessary allocations or computations
78/// - The trait is implemented for all relevant types
79/// - The span information is preserved during parsing and transformations
80/// - The implementation is consistent with other span-related traits
81/// - The method is efficient (ideally zero-cost)
82/// - The returned reference is valid for the lifetime of the element
83pub trait AsSpan<Span> {
84 /// Consumes this element and returns the owned source span.
85 ///
86 /// This method takes ownership of the element and extracts its span information
87 /// as an owned value. This is useful when you need to transfer ownership of
88 /// the span data to another data structure or when the element itself is no
89 /// longer needed but the location information should be preserved.
90 fn as_span(&self) -> &Span;
91}
92
93/// Enables consuming a parsed element to extract its source span.
94///
95/// This trait provides a way to take ownership of the span information from
96/// a parsed element, which is useful when the element itself is no longer
97/// needed but the span data should be preserved or transferred to another
98/// data structure.
99///
100/// ## Usage Patterns
101///
102/// Common scenarios for using this trait:
103/// - **AST construction**: Building higher-level AST nodes that need owned spans
104/// - **Error collection**: Gathering span information for batch error reporting
105/// - **Transformation**: Converting between different representations while preserving location
106/// - **Optimization**: Avoiding clones when transferring ownership is acceptable
107///
108/// ## Implementation Notes
109///
110/// Implementing types should ensure that:
111/// - The returned span is equivalent to what `AsSpan::spanned()` would return
112/// - All span information is preserved during the conversion
113/// - The conversion is efficient (ideally zero-cost)
114pub trait IntoSpan<Span>: AsSpan<Span> {
115 /// Consumes this element and returns the owned source span.
116 ///
117 /// This method takes ownership of the element and extracts its span information
118 /// as an owned value. This is useful when you need to transfer ownership of
119 /// the span data to another data structure or when the element itself is no
120 /// longer needed but the location information should be preserved.
121 fn into_span(self) -> Span;
122}
123
124/// Enables destructuring a parsed element into its constituent components.
125///
126/// This trait provides a way to break down complex parsed elements into their
127/// individual parts, taking ownership of each component. This is particularly
128/// useful for transformation, analysis, or when building different representations
129/// of the parsed data.
130///
131/// ## Design Philosophy
132///
133/// The trait uses an associated type rather than generic parameters to ensure
134/// that each implementing type has exactly one way to be decomposed. This provides
135/// type safety and makes the interface predictable for consumers.
136///
137/// ## Usage Patterns
138///
139/// Common scenarios for using this trait:
140/// - **AST transformation**: Converting parsed elements into different AST representations
141/// - **Analysis**: Extracting specific components for validation or processing
142/// - **Serialization**: Breaking down elements for custom serialization formats
143/// - **Testing**: Accessing individual components for detailed assertions
144///
145/// ## Examples
146///
147/// ```rust,ignore
148/// // Extracting components for transformation
149/// let float_value: FloatValue<&str, SimpleSpan> = parse_float("3.14e-2")?;
150/// let (span, int_part, frac_part, exp_part) = float_value.into_components();
151///
152/// // Building a custom representation
153/// let custom_float = CustomFloat {
154/// location: span,
155/// integer: int_part,
156/// fractional: frac_part,
157/// exponent: exp_part,
158/// };
159///
160/// // Component analysis
161/// let int_literal: IntValue<&str, SimpleSpan> = parse_int("-42")?;
162/// let (span, sign, digits) = int_literal.into_components();
163///
164/// if sign.is_some() {
165/// println!("Found negative integer at {:?}", span);
166/// }
167/// ```
168///
169/// ## Implementation Guidelines
170///
171/// When implementing this trait:
172/// - Include all meaningful components of the parsed element
173/// - Order components logically (typically: span first, then sub-components in source order)
174/// - Use tuples for simple decomposition, custom structs for complex cases
175/// - Ensure the decomposition is complete (no information loss)
176/// - Document the component structure clearly
177///
178/// ## Component Ordering Convention
179///
180/// To maintain consistency across implementations, follow this ordering:
181/// 1. **Overall span**: The span covering the entire element
182/// 2. **Required components**: Core parts that are always present
183/// 3. **Optional components**: Parts that may or may not be present
184/// 4. **Sub-elements**: Nested parsed elements in source order
185pub trait IntoComponents {
186 /// The tuple or struct type containing the decomposed components.
187 ///
188 /// This associated type defines the structure returned by `into_components()`.
189 /// It should include all meaningful parts of the parsed element in a logical
190 /// order that makes sense for the specific element type.
191 type Components;
192
193 /// Consumes this element and returns its constituent components.
194 ///
195 /// This method breaks down the parsed element into its individual parts,
196 /// providing owned access to each component. The exact structure of the
197 /// returned components is defined by the `Components` associated type.
198 fn into_components(self) -> Self::Components;
199}
200
201/// A trait for checking if a token is an ASCII character.
202pub trait IsAsciiChar {
203 /// Returns `true` if self is equal to the given ASCII character.
204 fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool;
205
206 /// Checks if the value is an ASCII decimal digit:
207 /// U+0030 '0' ..= U+0039 '9'.
208 fn is_ascii_digit(&self) -> bool;
209
210 /// Returns `true` if self is one of the given ASCII characters.
211 #[cfg_attr(not(tarpaulin), inline(always))]
212 fn one_of(&self, choices: &[ascii::AsciiChar]) -> bool {
213 choices.iter().any(|&ch| self.is_ascii_char(ch))
214 }
215}
216
217impl<T> IsAsciiChar for &T
218where
219 T: IsAsciiChar + ?Sized,
220{
221 #[cfg_attr(not(tarpaulin), inline(always))]
222 fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
223 <T as IsAsciiChar>::is_ascii_char(*self, ch)
224 }
225
226 #[cfg_attr(not(tarpaulin), inline(always))]
227 fn is_ascii_digit(&self) -> bool {
228 <T as IsAsciiChar>::is_ascii_digit(*self)
229 }
230
231 #[cfg_attr(not(tarpaulin), inline(always))]
232 fn one_of(&self, choices: &[ascii::AsciiChar]) -> bool {
233 <T as IsAsciiChar>::one_of(*self, choices)
234 }
235}
236
237impl<T> IsAsciiChar for &mut T
238where
239 T: IsAsciiChar + ?Sized,
240{
241 #[cfg_attr(not(tarpaulin), inline(always))]
242 fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
243 <T as IsAsciiChar>::is_ascii_char(*self, ch)
244 }
245
246 #[cfg_attr(not(tarpaulin), inline(always))]
247 fn is_ascii_digit(&self) -> bool {
248 <T as IsAsciiChar>::is_ascii_digit(*self)
249 }
250
251 #[cfg_attr(not(tarpaulin), inline(always))]
252 fn one_of(&self, choices: &[ascii::AsciiChar]) -> bool {
253 <T as IsAsciiChar>::one_of(*self, choices)
254 }
255}
256
257impl IsAsciiChar for char {
258 #[cfg_attr(not(tarpaulin), inline(always))]
259 fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
260 if self.is_ascii() {
261 *self as u8 == ch as u8
262 } else {
263 false
264 }
265 }
266
267 #[cfg_attr(not(tarpaulin), inline(always))]
268 fn is_ascii_digit(&self) -> bool {
269 char::is_ascii_digit(self)
270 }
271}
272
273impl IsAsciiChar for u8 {
274 #[cfg_attr(not(tarpaulin), inline(always))]
275 fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
276 *self == ch as u8
277 }
278
279 #[cfg_attr(not(tarpaulin), inline(always))]
280 fn is_ascii_digit(&self) -> bool {
281 u8::is_ascii_digit(self)
282 }
283}
284
285impl IsAsciiChar for str {
286 #[cfg_attr(not(tarpaulin), inline(always))]
287 fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
288 self.len() == 1 && self.as_bytes()[0] == ch as u8
289 }
290
291 #[cfg_attr(not(tarpaulin), inline(always))]
292 fn is_ascii_digit(&self) -> bool {
293 self.len() == 1 && self.as_bytes()[0].is_ascii_digit()
294 }
295}
296
297impl IsAsciiChar for [u8] {
298 #[cfg_attr(not(tarpaulin), inline(always))]
299 fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
300 self.len() == 1 && self[0] == ch as u8
301 }
302
303 #[cfg_attr(not(tarpaulin), inline(always))]
304 fn is_ascii_digit(&self) -> bool {
305 self.len() == 1 && self[0].is_ascii_digit()
306 }
307}
308
309#[cfg(feature = "bstr")]
310impl IsAsciiChar for bstr::BStr {
311 #[cfg_attr(not(tarpaulin), inline(always))]
312 fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
313 <[u8] as IsAsciiChar>::is_ascii_char(self, ch)
314 }
315
316 #[cfg_attr(not(tarpaulin), inline(always))]
317 fn is_ascii_digit(&self) -> bool {
318 <[u8] as IsAsciiChar>::is_ascii_digit(self)
319 }
320}
321
322#[cfg(feature = "bytes")]
323impl IsAsciiChar for bytes::Bytes {
324 #[cfg_attr(not(tarpaulin), inline(always))]
325 fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
326 <[u8] as IsAsciiChar>::is_ascii_char(self, ch)
327 }
328
329 #[cfg_attr(not(tarpaulin), inline(always))]
330 fn is_ascii_digit(&self) -> bool {
331 <[u8] as IsAsciiChar>::is_ascii_digit(self)
332 }
333}
334
335#[cfg(feature = "hipstr")]
336impl IsAsciiChar for hipstr::HipByt<'_> {
337 #[cfg_attr(not(tarpaulin), inline(always))]
338 fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
339 <[u8] as IsAsciiChar>::is_ascii_char(self, ch)
340 }
341
342 #[cfg_attr(not(tarpaulin), inline(always))]
343 fn is_ascii_digit(&self) -> bool {
344 <[u8] as IsAsciiChar>::is_ascii_digit(self)
345 }
346}
347
348#[cfg(feature = "hipstr")]
349impl IsAsciiChar for hipstr::HipStr<'_> {
350 #[cfg_attr(not(tarpaulin), inline(always))]
351 fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
352 <str as IsAsciiChar>::is_ascii_char(self, ch)
353 }
354
355 #[cfg_attr(not(tarpaulin), inline(always))]
356 fn is_ascii_digit(&self) -> bool {
357 <str as IsAsciiChar>::is_ascii_digit(self)
358 }
359}
360
361/// A trait for character-like types that can report their encoded length in bytes.
362///
363/// `CharLen` provides a uniform way to query the byte length of different character
364/// types, which is essential for converting positioned characters into byte spans.
365///
366/// # Implementations
367///
368/// LogoSky provides implementations for:
369/// - **`u8`**: Always returns `1` (single byte)
370/// - **`char`**: Returns `len_utf8()` (1-4 bytes depending on the character)
371/// - **`&T`**: Delegates to `T::len()` for any `T: CharLen`
372///
373/// # Design Note
374///
375/// This trait is **sealed** and cannot be implemented outside of LogoSky. If you need
376/// to work with a custom character type, use [`Lexeme::span_with`] or
377/// [`UnknownLexeme::from_range`](crate::error::UnknownLexeme::from_range) and provide your own length function.
378///
379/// # Use Cases
380///
381/// - **Span calculation**: Convert positioned characters to byte spans automatically
382/// - **UTF-8 handling**: Properly account for multi-byte characters
383/// - **Error reporting**: Determine the exact byte range of an unexpected character
384///
385/// # Examples
386///
387/// ## Automatic Length Detection
388///
389/// ```rust
390/// use tokit::utils::{Lexeme, PositionedChar};
391///
392/// // ASCII character (1 byte)
393/// let ascii = Lexeme::from(PositionedChar::with_position('a', 10));
394/// let span = ascii.span();
395/// assert_eq!(span.len(), 1);
396///
397/// // Multi-byte UTF-8 character (3 bytes)
398/// let emoji = Lexeme::from(PositionedChar::with_position('€', 20));
399/// let span = emoji.span();
400/// assert_eq!(span.len(), 3);
401/// ```
402///
403/// ## With Custom Length Function
404///
405/// ```rust
406/// use tokit::utils::{Lexeme, PositionedChar};
407///
408/// // For types that don't implement CharLen, use span_with
409/// struct CustomChar(char);
410///
411/// let lexeme = Lexeme::from(PositionedChar::with_position(CustomChar('€'), 5));
412/// let span = lexeme.span_with(|c| c.0.len_utf8());
413///
414/// assert_eq!(span.start(), 5);
415/// assert_eq!(span.end(), 8);
416/// ```
417#[allow(clippy::len_without_is_empty)]
418pub trait CharLen: sealed::Sealed {
419 /// Returns the length of this character in bytes.
420 ///
421 /// # Examples
422 ///
423 /// ```rust
424 /// use tokit::utils::{Lexeme, PositionedChar};
425 ///
426 /// // The trait is used internally by span()
427 /// let ascii = Lexeme::from(PositionedChar::with_position('A', 0));
428 /// assert_eq!(ascii.span().len(), 1);
429 ///
430 /// let euro = Lexeme::from(PositionedChar::with_position('€', 0));
431 /// assert_eq!(euro.span().len(), 3);
432 ///
433 /// let crab = Lexeme::from(PositionedChar::with_position('🦀', 0));
434 /// assert_eq!(crab.span().len(), 4);
435 /// ```
436 fn char_len(&self) -> usize;
437}
438
439mod sealed {
440 use super::{CharLen, PositionedChar};
441
442 pub trait Sealed {}
443
444 impl Sealed for u8 {}
445 impl Sealed for char {}
446 impl<T: Sealed> Sealed for PositionedChar<T> {}
447
448 impl<T: Sealed> Sealed for &T {}
449
450 impl CharLen for u8 {
451 #[cfg_attr(not(tarpaulin), inline(always))]
452 fn char_len(&self) -> usize {
453 1
454 }
455 }
456
457 impl CharLen for char {
458 #[cfg_attr(not(tarpaulin), inline(always))]
459 fn char_len(&self) -> usize {
460 self.len_utf8()
461 }
462 }
463
464 impl<T: CharLen> CharLen for PositionedChar<T> {
465 #[cfg_attr(not(tarpaulin), inline(always))]
466 fn char_len(&self) -> usize {
467 self.char_ref().char_len()
468 }
469 }
470
471 impl<T: CharLen> CharLen for &T {
472 #[cfg_attr(not(tarpaulin), inline(always))]
473 fn char_len(&self) -> usize {
474 (*self).char_len()
475 }
476 }
477}