tokit 0.0.0

Blazing fast parser combinators: parse-while-lexing (zero-copy), deterministic LALR-style parsing, no backtracking. Flexible emitters for fail-fast runtime or greedy compiler diagnostics
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
pub use delimited::*;
pub use escaped::*;
pub use expected::*;
pub use generic_arraydeque::GenericArrayDeque;
pub use lexeme::*;
pub use located::*;
pub use message::Message;
pub use positioned_char::*;
pub use sliced::*;
pub use span::*;
pub use spanned::*;
pub use to_equivalent::*;

/// Re-export of generic-arraydeque for direct access.
pub use generic_arraydeque::{self, typenum};

/// Trackers for preventing infinite recursion in parsers.
pub mod recursion_tracker;
/// A token tracker for tracking tokens in a lexer.
pub mod token_tracker;
/// A tracker for tracking recursion depth and tokens.
pub mod tracker;

/// A module for custom comparing traits.
pub mod cmp;
/// A module for displaying in a human-friendly way.
pub mod human_display;
/// A module for displaying in SDL.
pub mod sdl_display;
/// A module for displaying in syntax trees.
pub mod syntax_tree_display;

/// Common delimiters used in lexing and parsing.
pub mod delimiter;

/// Common knowledge types for lexing and parsing.
pub mod knowledge;

/// A module for container types with small size optimizations.
#[cfg(feature = "smallvec")]
#[cfg_attr(docsrs, doc(cfg(feature = "smallvec")))]
pub mod container;

/// Marker types used in various utilities.
pub mod marker;

mod delimited;
mod escaped;
mod expected;
mod lexeme;
mod located;
mod message;
mod positioned_char;
mod sliced;
mod span;
mod spanned;
mod to_equivalent;

/// Enables accessing the source span of a parsed element.
///
/// This trait provides a way to retrieve the span information associated with
/// a parsed element without taking ownership of the element itself. This is
/// useful for scenarios where you need to reference the location of the element
/// in the source input, such as for error reporting or diagnostics.
///
/// ## Usage Patterns
/// Common scenarios for using this trait:
/// - **Error reporting**: Attaching span information to error messages
/// - **Diagnostics**: Highlighting source locations in IDEs or tools
/// - **Logging**: Recording where certain elements were parsed from
/// - **Analysis**: Performing source-based analysis or transformations
///
/// ## Implementation Notes
///
/// Implementing types should ensure that:
///   - The returned span is accurate and corresponds to the element's location in the source
///   - The method is efficient and does not involve unnecessary allocations or computations
///   - The trait is implemented for all relevant types
///   - The span information is preserved during parsing and transformations
///   - The implementation is consistent with other span-related traits
///   - The method is efficient (ideally zero-cost)
///   - The returned reference is valid for the lifetime of the element
pub trait AsSpan<Span> {
  /// Consumes this element and returns the owned source span.
  ///
  /// This method takes ownership of the element and extracts its span information
  /// as an owned value. This is useful when you need to transfer ownership of
  /// the span data to another data structure or when the element itself is no
  /// longer needed but the location information should be preserved.
  fn as_span(&self) -> &Span;
}

/// Enables consuming a parsed element to extract its source span.
///
/// This trait provides a way to take ownership of the span information from
/// a parsed element, which is useful when the element itself is no longer
/// needed but the span data should be preserved or transferred to another
/// data structure.
///
/// ## Usage Patterns
///
/// Common scenarios for using this trait:
/// - **AST construction**: Building higher-level AST nodes that need owned spans
/// - **Error collection**: Gathering span information for batch error reporting
/// - **Transformation**: Converting between different representations while preserving location
/// - **Optimization**: Avoiding clones when transferring ownership is acceptable
///
/// ## Implementation Notes
///
/// Implementing types should ensure that:
/// - The returned span is equivalent to what `AsSpan::spanned()` would return
/// - All span information is preserved during the conversion
/// - The conversion is efficient (ideally zero-cost)
pub trait IntoSpan<Span>: AsSpan<Span> {
  /// Consumes this element and returns the owned source span.
  ///
  /// This method takes ownership of the element and extracts its span information
  /// as an owned value. This is useful when you need to transfer ownership of
  /// the span data to another data structure or when the element itself is no
  /// longer needed but the location information should be preserved.
  fn into_span(self) -> Span;
}

/// Enables destructuring a parsed element into its constituent components.
///
/// This trait provides a way to break down complex parsed elements into their
/// individual parts, taking ownership of each component. This is particularly
/// useful for transformation, analysis, or when building different representations
/// of the parsed data.
///
/// ## Design Philosophy
///
/// The trait uses an associated type rather than generic parameters to ensure
/// that each implementing type has exactly one way to be decomposed. This provides
/// type safety and makes the interface predictable for consumers.
///
/// ## Usage Patterns
///
/// Common scenarios for using this trait:
/// - **AST transformation**: Converting parsed elements into different AST representations
/// - **Analysis**: Extracting specific components for validation or processing
/// - **Serialization**: Breaking down elements for custom serialization formats
/// - **Testing**: Accessing individual components for detailed assertions
///
/// ## Examples
///
/// ```rust,ignore
/// // Extracting components for transformation
/// let float_value: FloatValue<&str, SimpleSpan> = parse_float("3.14e-2")?;
/// let (span, int_part, frac_part, exp_part) = float_value.into_components();
///
/// // Building a custom representation
/// let custom_float = CustomFloat {
///     location: span,
///     integer: int_part,
///     fractional: frac_part,
///     exponent: exp_part,
/// };
///
/// // Component analysis
/// let int_literal: IntValue<&str, SimpleSpan> = parse_int("-42")?;
/// let (span, sign, digits) = int_literal.into_components();
///
/// if sign.is_some() {
///     println!("Found negative integer at {:?}", span);
/// }
/// ```
///
/// ## Implementation Guidelines
///
/// When implementing this trait:
/// - Include all meaningful components of the parsed element
/// - Order components logically (typically: span first, then sub-components in source order)
/// - Use tuples for simple decomposition, custom structs for complex cases
/// - Ensure the decomposition is complete (no information loss)
/// - Document the component structure clearly
///
/// ## Component Ordering Convention
///
/// To maintain consistency across implementations, follow this ordering:
/// 1. **Overall span**: The span covering the entire element
/// 2. **Required components**: Core parts that are always present
/// 3. **Optional components**: Parts that may or may not be present
/// 4. **Sub-elements**: Nested parsed elements in source order
pub trait IntoComponents {
  /// The tuple or struct type containing the decomposed components.
  ///
  /// This associated type defines the structure returned by `into_components()`.
  /// It should include all meaningful parts of the parsed element in a logical
  /// order that makes sense for the specific element type.
  type Components;

  /// Consumes this element and returns its constituent components.
  ///
  /// This method breaks down the parsed element into its individual parts,
  /// providing owned access to each component. The exact structure of the
  /// returned components is defined by the `Components` associated type.
  fn into_components(self) -> Self::Components;
}

/// A trait for checking if a token is an ASCII character.
pub trait IsAsciiChar {
  /// Returns `true` if self is equal to the given ASCII character.
  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool;

  /// Checks if the value is an ASCII decimal digit:
  /// U+0030 '0' ..= U+0039 '9'.
  fn is_ascii_digit(&self) -> bool;

  /// Returns `true` if self is one of the given ASCII characters.
  #[cfg_attr(not(tarpaulin), inline(always))]
  fn one_of(&self, choices: &[ascii::AsciiChar]) -> bool {
    choices.iter().any(|&ch| self.is_ascii_char(ch))
  }
}

impl<T> IsAsciiChar for &T
where
  T: IsAsciiChar + ?Sized,
{
  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
    <T as IsAsciiChar>::is_ascii_char(*self, ch)
  }

  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_digit(&self) -> bool {
    <T as IsAsciiChar>::is_ascii_digit(*self)
  }

  #[cfg_attr(not(tarpaulin), inline(always))]
  fn one_of(&self, choices: &[ascii::AsciiChar]) -> bool {
    <T as IsAsciiChar>::one_of(*self, choices)
  }
}

impl<T> IsAsciiChar for &mut T
where
  T: IsAsciiChar + ?Sized,
{
  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
    <T as IsAsciiChar>::is_ascii_char(*self, ch)
  }

  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_digit(&self) -> bool {
    <T as IsAsciiChar>::is_ascii_digit(*self)
  }

  #[cfg_attr(not(tarpaulin), inline(always))]
  fn one_of(&self, choices: &[ascii::AsciiChar]) -> bool {
    <T as IsAsciiChar>::one_of(*self, choices)
  }
}

impl IsAsciiChar for char {
  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
    if self.is_ascii() {
      *self as u8 == ch as u8
    } else {
      false
    }
  }

  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_digit(&self) -> bool {
    char::is_ascii_digit(self)
  }
}

impl IsAsciiChar for u8 {
  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
    *self == ch as u8
  }

  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_digit(&self) -> bool {
    u8::is_ascii_digit(self)
  }
}

impl IsAsciiChar for str {
  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
    self.len() == 1 && self.as_bytes()[0] == ch as u8
  }

  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_digit(&self) -> bool {
    self.len() == 1 && self.as_bytes()[0].is_ascii_digit()
  }
}

impl IsAsciiChar for [u8] {
  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
    self.len() == 1 && self[0] == ch as u8
  }

  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_digit(&self) -> bool {
    self.len() == 1 && self[0].is_ascii_digit()
  }
}

#[cfg(feature = "bstr")]
impl IsAsciiChar for bstr::BStr {
  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
    <[u8] as IsAsciiChar>::is_ascii_char(self, ch)
  }

  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_digit(&self) -> bool {
    <[u8] as IsAsciiChar>::is_ascii_digit(self)
  }
}

#[cfg(feature = "bytes")]
impl IsAsciiChar for bytes::Bytes {
  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
    <[u8] as IsAsciiChar>::is_ascii_char(self, ch)
  }

  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_digit(&self) -> bool {
    <[u8] as IsAsciiChar>::is_ascii_digit(self)
  }
}

#[cfg(feature = "hipstr")]
impl IsAsciiChar for hipstr::HipByt<'_> {
  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
    <[u8] as IsAsciiChar>::is_ascii_char(self, ch)
  }

  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_digit(&self) -> bool {
    <[u8] as IsAsciiChar>::is_ascii_digit(self)
  }
}

#[cfg(feature = "hipstr")]
impl IsAsciiChar for hipstr::HipStr<'_> {
  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_char(&self, ch: ascii::AsciiChar) -> bool {
    <str as IsAsciiChar>::is_ascii_char(self, ch)
  }

  #[cfg_attr(not(tarpaulin), inline(always))]
  fn is_ascii_digit(&self) -> bool {
    <str as IsAsciiChar>::is_ascii_digit(self)
  }
}

/// A trait for character-like types that can report their encoded length in bytes.
///
/// `CharLen` provides a uniform way to query the byte length of different character
/// types, which is essential for converting positioned characters into byte spans.
///
/// # Implementations
///
/// LogoSky provides implementations for:
/// - **`u8`**: Always returns `1` (single byte)
/// - **`char`**: Returns `len_utf8()` (1-4 bytes depending on the character)
/// - **`&T`**: Delegates to `T::len()` for any `T: CharLen`
///
/// # Design Note
///
/// This trait is **sealed** and cannot be implemented outside of LogoSky. If you need
/// to work with a custom character type, use [`Lexeme::span_with`] or
/// [`UnknownLexeme::from_range`](crate::error::UnknownLexeme::from_range) and provide your own length function.
///
/// # Use Cases
///
/// - **Span calculation**: Convert positioned characters to byte spans automatically
/// - **UTF-8 handling**: Properly account for multi-byte characters
/// - **Error reporting**: Determine the exact byte range of an unexpected character
///
/// # Examples
///
/// ## Automatic Length Detection
///
/// ```rust
/// use tokit::utils::{Lexeme, PositionedChar};
///
/// // ASCII character (1 byte)
/// let ascii = Lexeme::from(PositionedChar::with_position('a', 10));
/// let span = ascii.span();
/// assert_eq!(span.len(), 1);
///
/// // Multi-byte UTF-8 character (3 bytes)
/// let emoji = Lexeme::from(PositionedChar::with_position('€', 20));
/// let span = emoji.span();
/// assert_eq!(span.len(), 3);
/// ```
///
/// ## With Custom Length Function
///
/// ```rust
/// use tokit::utils::{Lexeme, PositionedChar};
///
/// // For types that don't implement CharLen, use span_with
/// struct CustomChar(char);
///
/// let lexeme = Lexeme::from(PositionedChar::with_position(CustomChar('€'), 5));
/// let span = lexeme.span_with(|c| c.0.len_utf8());
///
/// assert_eq!(span.start(), 5);
/// assert_eq!(span.end(), 8);
/// ```
#[allow(clippy::len_without_is_empty)]
pub trait CharLen: sealed::Sealed {
  /// Returns the length of this character in bytes.
  ///
  /// # Examples
  ///
  /// ```rust
  /// use tokit::utils::{Lexeme, PositionedChar};
  ///
  /// // The trait is used internally by span()
  /// let ascii = Lexeme::from(PositionedChar::with_position('A', 0));
  /// assert_eq!(ascii.span().len(), 1);
  ///
  /// let euro = Lexeme::from(PositionedChar::with_position('€', 0));
  /// assert_eq!(euro.span().len(), 3);
  ///
  /// let crab = Lexeme::from(PositionedChar::with_position('🦀', 0));
  /// assert_eq!(crab.span().len(), 4);
  /// ```
  fn char_len(&self) -> usize;
}

mod sealed {
  use super::{CharLen, PositionedChar};

  pub trait Sealed {}

  impl Sealed for u8 {}
  impl Sealed for char {}
  impl<T: Sealed> Sealed for PositionedChar<T> {}

  impl<T: Sealed> Sealed for &T {}

  impl CharLen for u8 {
    #[cfg_attr(not(tarpaulin), inline(always))]
    fn char_len(&self) -> usize {
      1
    }
  }

  impl CharLen for char {
    #[cfg_attr(not(tarpaulin), inline(always))]
    fn char_len(&self) -> usize {
      self.len_utf8()
    }
  }

  impl<T: CharLen> CharLen for PositionedChar<T> {
    #[cfg_attr(not(tarpaulin), inline(always))]
    fn char_len(&self) -> usize {
      self.char_ref().char_len()
    }
  }

  impl<T: CharLen> CharLen for &T {
    #[cfg_attr(not(tarpaulin), inline(always))]
    fn char_len(&self) -> usize {
      (*self).char_len()
    }
  }
}