tokit/
utils.rs

1pub use delimited::*;
2pub use escaped::*;
3pub use expected::*;
4pub use generic_arraydeque::GenericArrayDeque;
5pub use lexeme::*;
6pub use located::*;
7pub use message::Message;
8pub use positioned_char::*;
9pub use sliced::*;
10pub use span::*;
11pub use spanned::*;
12pub use to_equivalent::*;
13
14/// Re-export of generic-arraydeque for direct access.
15pub use generic_arraydeque::{self, typenum};
16
17/// Trackers for preventing infinite recursion in parsers.
18pub mod recursion_tracker;
19/// A token tracker for tracking tokens in a lexer.
20pub mod token_tracker;
21/// A tracker for tracking recursion depth and tokens.
22pub mod tracker;
23
24/// A module for custom comparing traits.
25pub mod cmp;
26/// A module for displaying in a human-friendly way.
27pub mod human_display;
28/// A module for displaying in SDL.
29pub mod sdl_display;
30/// A module for displaying in syntax trees.
31pub mod syntax_tree_display;
32
33/// Common delimiters used in lexing and parsing.
34pub mod delimiter;
35
36/// Common knowledge types for lexing and parsing.
37pub mod knowledge;
38
39/// A module for container types with small size optimizations.
40#[cfg(feature = "smallvec")]
41#[cfg_attr(docsrs, doc(cfg(feature = "smallvec")))]
42pub mod container;
43
44/// Marker types used in various utilities.
45pub mod marker;
46
47mod delimited;
48mod escaped;
49mod expected;
50mod lexeme;
51mod located;
52mod message;
53mod positioned_char;
54mod sliced;
55mod span;
56mod spanned;
57mod to_equivalent;
58
59/// Enables accessing the source span of a parsed element.
60///
61/// This trait provides a way to retrieve the span information associated with
62/// a parsed element without taking ownership of the element itself. This is
63/// useful for scenarios where you need to reference the location of the element
64/// in the source input, such as for error reporting or diagnostics.
65///
66/// ## Usage Patterns
67/// Common scenarios for using this trait:
68/// - **Error reporting**: Attaching span information to error messages
69/// - **Diagnostics**: Highlighting source locations in IDEs or tools
70/// - **Logging**: Recording where certain elements were parsed from
71/// - **Analysis**: Performing source-based analysis or transformations
72///
73/// ## Implementation Notes
74///
75/// Implementing types should ensure that:
76///   - The returned span is accurate and corresponds to the element's location in the source
77///   - The method is efficient and does not involve unnecessary allocations or computations
78///   - The trait is implemented for all relevant types
79///   - The span information is preserved during parsing and transformations
80///   - The implementation is consistent with other span-related traits
81///   - The method is efficient (ideally zero-cost)
82///   - The returned reference is valid for the lifetime of the element
83pub trait AsSpan<Span> {
84  /// Consumes this element and returns the owned source span.
85  ///
86  /// This method takes ownership of the element and extracts its span information
87  /// as an owned value. This is useful when you need to transfer ownership of
88  /// the span data to another data structure or when the element itself is no
89  /// longer needed but the location information should be preserved.
90  fn as_span(&self) -> &Span;
91}
92
93/// Enables consuming a parsed element to extract its source span.
94///
95/// This trait provides a way to take ownership of the span information from
96/// a parsed element, which is useful when the element itself is no longer
97/// needed but the span data should be preserved or transferred to another
98/// data structure.
99///
100/// ## Usage Patterns
101///
102/// Common scenarios for using this trait:
103/// - **AST construction**: Building higher-level AST nodes that need owned spans
104/// - **Error collection**: Gathering span information for batch error reporting
105/// - **Transformation**: Converting between different representations while preserving location
106/// - **Optimization**: Avoiding clones when transferring ownership is acceptable
107///
108/// ## Implementation Notes
109///
110/// Implementing types should ensure that:
111/// - The returned span is equivalent to what `AsSpan::spanned()` would return
112/// - All span information is preserved during the conversion
113/// - The conversion is efficient (ideally zero-cost)
114pub trait IntoSpan<Span>: AsSpan<Span> {
115  /// Consumes this element and returns the owned source span.
116  ///
117  /// This method takes ownership of the element and extracts its span information
118  /// as an owned value. This is useful when you need to transfer ownership of
119  /// the span data to another data structure or when the element itself is no
120  /// longer needed but the location information should be preserved.
121  fn into_span(self) -> Span;
122}
123
124/// Enables destructuring a parsed element into its constituent components.
125///
126/// This trait provides a way to break down complex parsed elements into their
127/// individual parts, taking ownership of each component. This is particularly
128/// useful for transformation, analysis, or when building different representations
129/// of the parsed data.
130///
131/// ## Design Philosophy
132///
133/// The trait uses an associated type rather than generic parameters to ensure
134/// that each implementing type has exactly one way to be decomposed. This provides
135/// type safety and makes the interface predictable for consumers.
136///
137/// ## Usage Patterns
138///
139/// Common scenarios for using this trait:
140/// - **AST transformation**: Converting parsed elements into different AST representations
141/// - **Analysis**: Extracting specific components for validation or processing
142/// - **Serialization**: Breaking down elements for custom serialization formats
143/// - **Testing**: Accessing individual components for detailed assertions
144///
145/// ## Examples
146///
147/// ```rust,ignore
148/// // Extracting components for transformation
149/// let float_value: FloatValue<&str, SimpleSpan> = parse_float("3.14e-2")?;
150/// let (span, int_part, frac_part, exp_part) = float_value.into_components();
151///
152/// // Building a custom representation
153/// let custom_float = CustomFloat {
154///     location: span,
155///     integer: int_part,
156///     fractional: frac_part,
157///     exponent: exp_part,
158/// };
159///
160/// // Component analysis
161/// let int_literal: IntValue<&str, SimpleSpan> = parse_int("-42")?;
162/// let (span, sign, digits) = int_literal.into_components();
163///
164/// if sign.is_some() {
165///     println!("Found negative integer at {:?}", span);
166/// }
167/// ```
168///
169/// ## Implementation Guidelines
170///
171/// When implementing this trait:
172/// - Include all meaningful components of the parsed element
173/// - Order components logically (typically: span first, then sub-components in source order)
174/// - Use tuples for simple decomposition, custom structs for complex cases
175/// - Ensure the decomposition is complete (no information loss)
176/// - Document the component structure clearly
177///
178/// ## Component Ordering Convention
179///
180/// To maintain consistency across implementations, follow this ordering:
181/// 1. **Overall span**: The span covering the entire element
182/// 2. **Required components**: Core parts that are always present
183/// 3. **Optional components**: Parts that may or may not be present
184/// 4. **Sub-elements**: Nested parsed elements in source order
185pub trait IntoComponents {
186  /// The tuple or struct type containing the decomposed components.
187  ///
188  /// This associated type defines the structure returned by `into_components()`.
189  /// It should include all meaningful parts of the parsed element in a logical
190  /// order that makes sense for the specific element type.
191  type Components;
192
193  /// Consumes this element and returns its constituent components.
194  ///
195  /// This method breaks down the parsed element into its individual parts,
196  /// providing owned access to each component. The exact structure of the
197  /// returned components is defined by the `Components` associated type.
198  fn into_components(self) -> Self::Components;
199}
200
201/// A trait for checking if a token is an ASCII character.
202pub trait IsAsciiChar {
203  /// Returns `true` if self is equal to the given ASCII character.
204  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool;
205
206  /// Checks if the value is an ASCII decimal digit:
207  /// U+0030 '0' ..= U+0039 '9'.
208  fn is_ascii_digit(&self) -> bool;
209
210  /// Returns `true` if self is one of the given ASCII characters.
211  #[cfg_attr(not(tarpaulin), inline(always))]
212  fn one_of(&self, choices: &[ascii::AsciiChar]) -> bool {
213    choices.iter().any(|&ch| self.is_ascii_char(ch))
214  }
215}
216
217impl<T> IsAsciiChar for &T
218where
219  T: IsAsciiChar + ?Sized,
220{
221  #[cfg_attr(not(tarpaulin), inline(always))]
222  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
223    <T as IsAsciiChar>::is_ascii_char(*self, ch)
224  }
225
226  #[cfg_attr(not(tarpaulin), inline(always))]
227  fn is_ascii_digit(&self) -> bool {
228    <T as IsAsciiChar>::is_ascii_digit(*self)
229  }
230
231  #[cfg_attr(not(tarpaulin), inline(always))]
232  fn one_of(&self, choices: &[ascii::AsciiChar]) -> bool {
233    <T as IsAsciiChar>::one_of(*self, choices)
234  }
235}
236
237impl<T> IsAsciiChar for &mut T
238where
239  T: IsAsciiChar + ?Sized,
240{
241  #[cfg_attr(not(tarpaulin), inline(always))]
242  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
243    <T as IsAsciiChar>::is_ascii_char(*self, ch)
244  }
245
246  #[cfg_attr(not(tarpaulin), inline(always))]
247  fn is_ascii_digit(&self) -> bool {
248    <T as IsAsciiChar>::is_ascii_digit(*self)
249  }
250
251  #[cfg_attr(not(tarpaulin), inline(always))]
252  fn one_of(&self, choices: &[ascii::AsciiChar]) -> bool {
253    <T as IsAsciiChar>::one_of(*self, choices)
254  }
255}
256
257impl IsAsciiChar for char {
258  #[cfg_attr(not(tarpaulin), inline(always))]
259  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
260    if self.is_ascii() {
261      *self as u8 == ch as u8
262    } else {
263      false
264    }
265  }
266
267  #[cfg_attr(not(tarpaulin), inline(always))]
268  fn is_ascii_digit(&self) -> bool {
269    char::is_ascii_digit(self)
270  }
271}
272
273impl IsAsciiChar for u8 {
274  #[cfg_attr(not(tarpaulin), inline(always))]
275  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
276    *self == ch as u8
277  }
278
279  #[cfg_attr(not(tarpaulin), inline(always))]
280  fn is_ascii_digit(&self) -> bool {
281    u8::is_ascii_digit(self)
282  }
283}
284
285impl IsAsciiChar for str {
286  #[cfg_attr(not(tarpaulin), inline(always))]
287  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
288    self.len() == 1 && self.as_bytes()[0] == ch as u8
289  }
290
291  #[cfg_attr(not(tarpaulin), inline(always))]
292  fn is_ascii_digit(&self) -> bool {
293    self.len() == 1 && self.as_bytes()[0].is_ascii_digit()
294  }
295}
296
297impl IsAsciiChar for [u8] {
298  #[cfg_attr(not(tarpaulin), inline(always))]
299  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
300    self.len() == 1 && self[0] == ch as u8
301  }
302
303  #[cfg_attr(not(tarpaulin), inline(always))]
304  fn is_ascii_digit(&self) -> bool {
305    self.len() == 1 && self[0].is_ascii_digit()
306  }
307}
308
309#[cfg(feature = "bstr")]
310impl IsAsciiChar for bstr::BStr {
311  #[cfg_attr(not(tarpaulin), inline(always))]
312  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
313    <[u8] as IsAsciiChar>::is_ascii_char(self, ch)
314  }
315
316  #[cfg_attr(not(tarpaulin), inline(always))]
317  fn is_ascii_digit(&self) -> bool {
318    <[u8] as IsAsciiChar>::is_ascii_digit(self)
319  }
320}
321
322#[cfg(feature = "bytes")]
323impl IsAsciiChar for bytes::Bytes {
324  #[cfg_attr(not(tarpaulin), inline(always))]
325  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
326    <[u8] as IsAsciiChar>::is_ascii_char(self, ch)
327  }
328
329  #[cfg_attr(not(tarpaulin), inline(always))]
330  fn is_ascii_digit(&self) -> bool {
331    <[u8] as IsAsciiChar>::is_ascii_digit(self)
332  }
333}
334
335#[cfg(feature = "hipstr")]
336impl IsAsciiChar for hipstr::HipByt<'_> {
337  #[cfg_attr(not(tarpaulin), inline(always))]
338  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
339    <[u8] as IsAsciiChar>::is_ascii_char(self, ch)
340  }
341
342  #[cfg_attr(not(tarpaulin), inline(always))]
343  fn is_ascii_digit(&self) -> bool {
344    <[u8] as IsAsciiChar>::is_ascii_digit(self)
345  }
346}
347
348#[cfg(feature = "hipstr")]
349impl IsAsciiChar for hipstr::HipStr<'_> {
350  #[cfg_attr(not(tarpaulin), inline(always))]
351  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
352    <str as IsAsciiChar>::is_ascii_char(self, ch)
353  }
354
355  #[cfg_attr(not(tarpaulin), inline(always))]
356  fn is_ascii_digit(&self) -> bool {
357    <str as IsAsciiChar>::is_ascii_digit(self)
358  }
359}
360
361/// A trait for character-like types that can report their encoded length in bytes.
362///
363/// `CharLen` provides a uniform way to query the byte length of different character
364/// types, which is essential for converting positioned characters into byte spans.
365///
366/// # Implementations
367///
368/// LogoSky provides implementations for:
369/// - **`u8`**: Always returns `1` (single byte)
370/// - **`char`**: Returns `len_utf8()` (1-4 bytes depending on the character)
371/// - **`&T`**: Delegates to `T::len()` for any `T: CharLen`
372///
373/// # Design Note
374///
375/// This trait is **sealed** and cannot be implemented outside of LogoSky. If you need
376/// to work with a custom character type, use [`Lexeme::span_with`] or
377/// [`UnknownLexeme::from_range`](crate::error::UnknownLexeme::from_range) and provide your own length function.
378///
379/// # Use Cases
380///
381/// - **Span calculation**: Convert positioned characters to byte spans automatically
382/// - **UTF-8 handling**: Properly account for multi-byte characters
383/// - **Error reporting**: Determine the exact byte range of an unexpected character
384///
385/// # Examples
386///
387/// ## Automatic Length Detection
388///
389/// ```rust
390/// use tokit::utils::{Lexeme, PositionedChar};
391///
392/// // ASCII character (1 byte)
393/// let ascii = Lexeme::from(PositionedChar::with_position('a', 10));
394/// let span = ascii.span();
395/// assert_eq!(span.len(), 1);
396///
397/// // Multi-byte UTF-8 character (3 bytes)
398/// let emoji = Lexeme::from(PositionedChar::with_position('€', 20));
399/// let span = emoji.span();
400/// assert_eq!(span.len(), 3);
401/// ```
402///
403/// ## With Custom Length Function
404///
405/// ```rust
406/// use tokit::utils::{Lexeme, PositionedChar};
407///
408/// // For types that don't implement CharLen, use span_with
409/// struct CustomChar(char);
410///
411/// let lexeme = Lexeme::from(PositionedChar::with_position(CustomChar('€'), 5));
412/// let span = lexeme.span_with(|c| c.0.len_utf8());
413///
414/// assert_eq!(span.start(), 5);
415/// assert_eq!(span.end(), 8);
416/// ```
417#[allow(clippy::len_without_is_empty)]
418pub trait CharLen: sealed::Sealed {
419  /// Returns the length of this character in bytes.
420  ///
421  /// # Examples
422  ///
423  /// ```rust
424  /// use tokit::utils::{Lexeme, PositionedChar};
425  ///
426  /// // The trait is used internally by span()
427  /// let ascii = Lexeme::from(PositionedChar::with_position('A', 0));
428  /// assert_eq!(ascii.span().len(), 1);
429  ///
430  /// let euro = Lexeme::from(PositionedChar::with_position('€', 0));
431  /// assert_eq!(euro.span().len(), 3);
432  ///
433  /// let crab = Lexeme::from(PositionedChar::with_position('🦀', 0));
434  /// assert_eq!(crab.span().len(), 4);
435  /// ```
436  fn char_len(&self) -> usize;
437}
438
439mod sealed {
440  use super::{CharLen, PositionedChar};
441
442  pub trait Sealed {}
443
444  impl Sealed for u8 {}
445  impl Sealed for char {}
446  impl<T: Sealed> Sealed for PositionedChar<T> {}
447
448  impl<T: Sealed> Sealed for &T {}
449
450  impl CharLen for u8 {
451    #[cfg_attr(not(tarpaulin), inline(always))]
452    fn char_len(&self) -> usize {
453      1
454    }
455  }
456
457  impl CharLen for char {
458    #[cfg_attr(not(tarpaulin), inline(always))]
459    fn char_len(&self) -> usize {
460      self.len_utf8()
461    }
462  }
463
464  impl<T: CharLen> CharLen for PositionedChar<T> {
465    #[cfg_attr(not(tarpaulin), inline(always))]
466    fn char_len(&self) -> usize {
467      self.char_ref().char_len()
468    }
469  }
470
471  impl<T: CharLen> CharLen for &T {
472    #[cfg_attr(not(tarpaulin), inline(always))]
473    fn char_len(&self) -> usize {
474      (*self).char_len()
475    }
476  }
477}