tokit/types/
lit.rs

1//! Literal token types for language syntax trees.
2//!
3//! This module provides generic literal types that represent various kinds of
4//! literal values found in programming languages: numbers, strings, booleans, etc.
5//! Each literal type carries its source representation along with span information.
6//!
7//! # Design Philosophy
8//!
9//! All literal types follow the same pattern as [`Ident`](super::Ident):
10//!
11//! - **Generic string type `S`**: Support `&str`, `String`, or interned strings
12//! - **Language marker `Lang`**: Type-safe language distinction
13//! - **Span tracking**: All literals carry source location for diagnostics
14//! - **Error recovery**: Implement [`ErrorNode`] for placeholder creation
15//!
16//! # Available Literal Types
17//!
18//! ## Generic Literal
19//!
20//! - [`Lit`]: Generic literal (any literal type)
21//!
22//! ## Numeric Literals
23//!
24//! - [`LitDecimal`]: Base-10 integer (e.g., `42`, `1_000`)
25//! - [`LitHex`]: Hexadecimal integer (e.g., `0xFF`, `0x1A2B`)
26//! - [`LitOctal`]: Octal integer (e.g., `0o77`, `0o644`)
27//! - [`LitBinary`]: Binary integer (e.g., `0b1010`, `0b1111_0000`)
28//! - [`LitFloat`](crate::types::lit::LitFloat): Floating-point (e.g., `3.14`, `1.0e-5`)
29//! - [`LitHexFloat`]: Hexadecimal float (e.g., `0x1.8p3`)
30//!
31//! ## String Literals
32//!
33//! - [`LitString`]: Single-line string (e.g., `"hello"`)
34//! - [`LitMultilineString`]: Multi-line string (e.g., `"""..."""`)
35//! - [`LitRawString`]: Raw string without escape processing (e.g., `r"C:\path"`)
36//!
37//! ## Character/Byte Literals
38//!
39//! - [`LitChar`]: Character literal (e.g., `'a'`, `'\n'`)
40//! - [`LitByte`]: Byte literal (e.g., `b'a'`, `b'\x7F'`)
41//! - [`LitByteString`]: Byte string (e.g., `b"bytes"`)
42//!
43//! ## Boolean and Null
44//!
45//! - [`LitBool`]: Boolean literal (`true`/`false`)
46//! - [`LitNull`]: Null/nil literal
47//!
48//! # Common Usage Patterns
49//!
50//! ## Zero-Copy Parsing
51//!
52//! ```rust,ignore
53//! use tokit::types::{Lit, LitDecimal, LitString};
54//! use tokit::utils::SimpleSpan;
55//!
56//! // Parse literals without allocating
57//! type YulLit<'a> = Lit<&'a str, YulLang>;
58//! type YulDecimal<'a> = LitDecimal<&'a str, YulLang>;
59//! type YulString<'a> = LitString<&'a str, YulLang>;
60//!
61//! let generic = YulLit::new(Span::new(0, 2), "42");
62//! let num = YulDecimal::new(Span::new(0, 2), "42");
63//! let str = YulString::new(Span::new(5, 12), "\"hello\"");
64//! ```
65//!
66//! ## Owned Literals
67//!
68//! ```rust,ignore
69//! // Store literals in AST nodes
70//! type OwnedDecimal = LitDecimal<String, MyLang>;
71//!
72//! let lit = OwnedDecimal::new(span, source.to_string());
73//! ```
74//!
75//! # Error Recovery
76//!
77//! All literal types implement [`ErrorNode`] when `S: ErrorNode`:
78//!
79//! ```rust,ignore
80//! use tokit::types::LitDecimal;
81//! use tokit::error::ErrorNode;
82//!
83//! // Create placeholder for malformed literal
84//! let bad_lit = LitDecimal::<String, YulLang>::error(span);
85//!
86//! // Create placeholder for missing literal
87//! let missing_lit = LitDecimal::<String, YulLang>::missing(span);
88//! ```
89
90use core::marker::PhantomData;
91
92use crate::{
93  error::ErrorNode,
94  utils::{AsSpan, IntoComponents},
95};
96
97/// A macro to generate literal type structures.
98///
99/// This reduces boilerplate by generating identical structure and implementations
100/// for all literal types.
101macro_rules! define_literal {
102  (
103    $(#[$meta:meta])*
104    $name:ident,
105    $doc:expr,
106    $example_str:expr,
107    $example_desc:expr
108  ) => {
109    paste::paste! {
110      $(#[$meta])*
111      #[doc = $doc]
112      ///
113      /// # Type Parameters
114      ///
115      /// - `S`: The source string type (`&str`, `String`, interned string, etc.)
116      /// - `Lang`: Language marker type for type safety
117      ///
118      /// # Examples
119      ///
120      /// ## Creating Literals
121      ///
122      /// ```rust
123      #[doc = "use tokit::types::" $name ";"]
124      /// use tokit::utils::SimpleSpan;
125      /// # struct MyLang;
126      ///
127      #[doc = "let lit = " $name "::<&str, MyLang>::new("]
128      #[doc = "    Span::new(0, 4),"]
129      #[doc = "    " $example_str ]
130      /// );
131      ///
132      #[doc = "assert_eq!(lit.source_ref(), &" $example_str ");"]
133      /// ```
134      ///
135      /// ## With Error Recovery
136      ///
137      /// ```rust,ignore
138      #[doc = "use tokit::types::" $name ";"]
139      /// use tokit::error::ErrorNode;
140      ///
141      #[doc = "// " $example_desc]
142      #[doc = "let bad_lit = " $name "::<String, YulLang>::error(span);"]
143      /// ```
144      #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
145      pub struct $name<D, S = $crate::__private::utils::SimpleSpan, Lang = ()> {
146        span: S,
147        data: D,
148        _lang: PhantomData<Lang>,
149      }
150    }
151
152    impl<D, S, Lang> AsSpan<S> for $name<D, S, Lang> {
153      #[cfg_attr(not(tarpaulin), inline(always))]
154      fn as_span(&self) -> &S {
155        self.span_ref()
156      }
157    }
158
159    impl<D, S, Lang> IntoComponents for $name<D, S, Lang> {
160      type Components = (S, D);
161
162      #[cfg_attr(not(tarpaulin), inline(always))]
163      fn into_components(self) -> Self::Components {
164        (self.span, self.data)
165      }
166    }
167
168    impl<D, S, Lang> $name<D, S, Lang> {
169      /// Creates a new literal with the given span and source string.
170      ///
171      /// # Parameters
172      ///
173      /// - `span`: The source location of this literal
174      /// - `data`: The literal's data
175      #[cfg_attr(not(tarpaulin), inline(always))]
176      pub const fn new(span: S, data: D) -> Self {
177        Self {
178          span,
179          data,
180          _lang: PhantomData,
181        }
182      }
183
184      /// Returns the span (source location) of this literal.
185      #[cfg_attr(not(tarpaulin), inline(always))]
186      pub const fn span(&self) -> S where S: ::core::marker::Copy {
187        self.span
188      }
189
190      /// Returns an immutable reference to the span.
191      #[cfg_attr(not(tarpaulin), inline(always))]
192      pub const fn span_ref(&self) -> &S {
193        &self.span
194      }
195
196      /// Returns a mutable reference to the span.
197      #[cfg_attr(not(tarpaulin), inline(always))]
198      pub const fn span_mut(&mut self) -> &mut S {
199        &mut self.span
200      }
201
202      /// Returns a mutable reference to the source string.
203      #[cfg_attr(not(tarpaulin), inline(always))]
204      pub const fn data_mut(&mut self) -> &mut D {
205        &mut self.data
206      }
207
208      /// Returns an immutable reference to the source string.
209      ///
210      /// This is the most common way to access the literal's text.
211      #[cfg_attr(not(tarpaulin), inline(always))]
212      pub const fn data_ref(&self) -> &D {
213        &self.data
214      }
215
216      /// Returns a copy of the source string by value.
217      ///
218      /// Only available when `S` implements [`Copy`].
219      #[cfg_attr(not(tarpaulin), inline(always))]
220      pub const fn data(&self) -> D
221      where
222        D: Copy,
223      {
224        self.data
225      }
226    }
227
228    impl<D, S, Lang> ErrorNode<S> for $name<D, S, Lang>
229    where
230      D: ErrorNode<S>,
231      S: Clone,
232    {
233      /// Creates a placeholder literal for **malformed content**.
234      #[cfg_attr(not(tarpaulin), inline(always))]
235      fn error(span: S) -> Self {
236        Self::new(span.clone(), D::error(span))
237      }
238
239      /// Creates a placeholder literal for **missing required content**.
240      #[cfg_attr(not(tarpaulin), inline(always))]
241      fn missing(span: S) -> Self {
242        Self::new(span.clone(), D::missing(span))
243      }
244    }
245  };
246}
247
248// Generic literal
249define_literal!(
250  /// A generic literal.
251  ///
252  /// Represents any kind of literal value without distinguishing between specific
253  /// types (numeric, string, boolean, etc.). Useful when the exact literal type
254  /// doesn't matter for your use case.
255  Lit,
256  "A generic literal (any literal type).",
257  "\"value\"",
258  "Malformed literal"
259);
260
261// Numeric literals
262define_literal!(
263  /// A decimal (base-10) integer literal.
264  ///
265  /// Represents numeric literals in standard decimal notation, such as `42`, `1000`,
266  /// or `123_456`. The source string may include underscores for readability but
267  /// represents a single integer value.
268  LitDecimal,
269  "A decimal integer literal (e.g., `42`, `1_000`).",
270  "\"42\"",
271  "Malformed decimal literal like \"12abc\""
272);
273
274define_literal!(
275  /// A hexadecimal (base-16) integer literal.
276  ///
277  /// Represents integer literals in hexadecimal notation, typically prefixed with
278  /// `0x` or `0X`, such as `0xFF`, `0x1A2B`, or `0xDEAD_BEEF`.
279  LitHex,
280  "A hexadecimal integer literal (e.g., `0xFF`, `0x1A2B`).",
281  "\"0xFF\"",
282  "Malformed hex literal like \"0xGG\""
283);
284
285define_literal!(
286  /// An octal (base-8) integer literal.
287  ///
288  /// Represents integer literals in octal notation, typically prefixed with `0o`,
289  /// such as `0o77`, `0o644`, or `0o755`.
290  LitOctal,
291  "An octal integer literal (e.g., `0o77`, `0o644`).",
292  "\"0o77\"",
293  "Malformed octal literal like \"0o89\""
294);
295
296define_literal!(
297  /// A binary (base-2) integer literal.
298  ///
299  /// Represents integer literals in binary notation, typically prefixed with `0b`,
300  /// such as `0b1010`, `0b11110000`, or `0b1111_0000`.
301  LitBinary,
302  "A binary integer literal (e.g., `0b1010`, `0b1111_0000`).",
303  "\"0b1010\"",
304  "Malformed binary literal like \"0b123\""
305);
306
307define_literal!(
308  /// A floating-point literal.
309  ///
310  /// Represents floating-point literals in standard decimal notation with optional
311  /// fractional and exponent parts, such as `3.14`, `1.0`, `2.5e-3`, or `6.022e23`.
312  LitFloat,
313  "A floating-point literal (e.g., `3.14`, `1.0e-5`).",
314  "\"3.14\"",
315  "Malformed float literal like \"3.14.15\""
316);
317
318define_literal!(
319  /// A hexadecimal floating-point literal.
320  ///
321  /// Represents floating-point literals in hexadecimal notation with binary exponent,
322  /// such as `0x1.8p3` (which equals 12.0 in decimal). Used in languages like C and Rust
323  /// for precise floating-point representation.
324  LitHexFloat,
325  "A hexadecimal floating-point literal (e.g., `0x1.8p3`).",
326  "\"0x1.8p3\"",
327  "Malformed hex float like \"0x1.Gp3\""
328);
329
330// String literals
331define_literal!(
332  /// A single-line string literal.
333  ///
334  /// Represents string literals enclosed in quotes, typically on a single line,
335  /// such as `"hello"`, `"world\n"`, or `"escaped \"quotes\""`. May contain
336  /// escape sequences.
337  LitString,
338  "A single-line string literal (e.g., `\"hello\"`, `\"world\\n\"`).",
339  "\"\\\"hello\\\"\"",
340  "Malformed string like unterminated \"hello"
341);
342
343define_literal!(
344  /// A multi-line string literal.
345  ///
346  /// Represents string literals that span multiple lines, often with special delimiters
347  /// like triple quotes (`"""..."""` or `'''...'''`). Common in languages like Python,
348  /// Kotlin, and Swift.
349  LitMultilineString,
350  "A multi-line string literal (e.g., `\"\"\"...\"\"\"`).",
351  "\"\\\"\\\"\\\"multi\\nline\\\"\\\"\\\"\"",
352  "Malformed multiline string"
353);
354
355define_literal!(
356  /// A raw string literal.
357  ///
358  /// Represents string literals where escape sequences are not processed, often
359  /// prefixed with `r` (e.g., Rust's `r"C:\path"`, Python's `r"\n stays literal"`).
360  /// Useful for regular expressions and file paths.
361  LitRawString,
362  "A raw string literal without escape processing (e.g., `r\"C:\\path\"`).",
363  "\"r\\\"C:\\\\path\\\"\"",
364  "Malformed raw string"
365);
366
367// Character and byte literals
368define_literal!(
369  /// A character literal.
370  ///
371  /// Represents a single character enclosed in single quotes, such as `'a'`, `'\\n'`,
372  /// or `'\\u{1F600}'`. May contain escape sequences for special characters.
373  LitChar,
374  "A character literal (e.g., `'a'`, `'\\n'`, `'\\u{1F600}'`).",
375  "\"'a'\"",
376  "Malformed char like unclosed 'a"
377);
378
379define_literal!(
380  /// A byte literal.
381  ///
382  /// Represents a single byte value enclosed in single quotes with a `b` prefix,
383  /// such as `b'a'`, `b'\\x7F'`, or `b'\\n'`. Used in languages like Rust for
384  /// ASCII/byte manipulation.
385  LitByte,
386  "A byte literal (e.g., `b'a'`, `b'\\x7F'`).",
387  "\"b'a'\"",
388  "Malformed byte literal"
389);
390
391define_literal!(
392  /// A byte string literal.
393  ///
394  /// Represents a sequence of bytes enclosed in quotes with a `b` prefix, such as
395  /// `b"bytes"`, `b"\\x48\\x65\\x6C\\x6C\\x6F"`. Used for binary data or ASCII strings.
396  LitByteString,
397  "A byte string literal (e.g., `b\"bytes\"`, `b\"\\x48\\x65\\x6C\\x6C\\x6F\"`).",
398  "\"b\\\"bytes\\\"\"",
399  "Malformed byte string"
400);
401
402// Boolean and null
403define_literal!(
404  /// A boolean literal.
405  ///
406  /// Represents boolean values `true` or `false`. The source string contains the
407  /// actual keyword as it appears in source code.
408  LitBool,
409  "A boolean literal (`true` or `false`).",
410  "\"true\"",
411  "Malformed boolean like \"tru\" or \"fals\""
412);
413
414define_literal!(
415  /// A null/nil literal.
416  ///
417  /// Represents the null, nil, or None value in various programming languages.
418  /// The source string contains the keyword as it appears (e.g., `null`, `nil`,
419  /// `None`, `nullptr`).
420  LitNull,
421  "A null/nil literal (e.g., `null`, `nil`, `None`).",
422  "\"null\"",
423  "Malformed null literal"
424);