tokit/types/lit.rs
1//! Literal token types for language syntax trees.
2//!
3//! This module provides generic literal types that represent various kinds of
4//! literal values found in programming languages: numbers, strings, booleans, etc.
5//! Each literal type carries its source representation along with span information.
6//!
7//! # Design Philosophy
8//!
9//! All literal types follow the same pattern as [`Ident`](super::Ident):
10//!
11//! - **Generic string type `S`**: Support `&str`, `String`, or interned strings
12//! - **Language marker `Lang`**: Type-safe language distinction
13//! - **Span tracking**: All literals carry source location for diagnostics
14//! - **Error recovery**: Implement [`ErrorNode`] for placeholder creation
15//!
16//! # Available Literal Types
17//!
18//! ## Generic Literal
19//!
20//! - [`Lit`]: Generic literal (any literal type)
21//!
22//! ## Numeric Literals
23//!
24//! - [`LitDecimal`]: Base-10 integer (e.g., `42`, `1_000`)
25//! - [`LitHex`]: Hexadecimal integer (e.g., `0xFF`, `0x1A2B`)
26//! - [`LitOctal`]: Octal integer (e.g., `0o77`, `0o644`)
27//! - [`LitBinary`]: Binary integer (e.g., `0b1010`, `0b1111_0000`)
28//! - [`LitFloat`](crate::types::lit::LitFloat): Floating-point (e.g., `3.14`, `1.0e-5`)
29//! - [`LitHexFloat`]: Hexadecimal float (e.g., `0x1.8p3`)
30//!
31//! ## String Literals
32//!
33//! - [`LitString`]: Single-line string (e.g., `"hello"`)
34//! - [`LitMultilineString`]: Multi-line string (e.g., `"""..."""`)
35//! - [`LitRawString`]: Raw string without escape processing (e.g., `r"C:\path"`)
36//!
37//! ## Character/Byte Literals
38//!
39//! - [`LitChar`]: Character literal (e.g., `'a'`, `'\n'`)
40//! - [`LitByte`]: Byte literal (e.g., `b'a'`, `b'\x7F'`)
41//! - [`LitByteString`]: Byte string (e.g., `b"bytes"`)
42//!
43//! ## Boolean and Null
44//!
45//! - [`LitBool`]: Boolean literal (`true`/`false`)
46//! - [`LitNull`]: Null/nil literal
47//!
48//! # Common Usage Patterns
49//!
50//! ## Zero-Copy Parsing
51//!
52//! ```rust,ignore
53//! use tokit::types::{Lit, LitDecimal, LitString};
54//! use tokit::utils::SimpleSpan;
55//!
56//! // Parse literals without allocating
57//! type YulLit<'a> = Lit<&'a str, YulLang>;
58//! type YulDecimal<'a> = LitDecimal<&'a str, YulLang>;
59//! type YulString<'a> = LitString<&'a str, YulLang>;
60//!
61//! let generic = YulLit::new(Span::new(0, 2), "42");
62//! let num = YulDecimal::new(Span::new(0, 2), "42");
63//! let str = YulString::new(Span::new(5, 12), "\"hello\"");
64//! ```
65//!
66//! ## Owned Literals
67//!
68//! ```rust,ignore
69//! // Store literals in AST nodes
70//! type OwnedDecimal = LitDecimal<String, MyLang>;
71//!
72//! let lit = OwnedDecimal::new(span, source.to_string());
73//! ```
74//!
75//! # Error Recovery
76//!
77//! All literal types implement [`ErrorNode`] when `S: ErrorNode`:
78//!
79//! ```rust,ignore
80//! use tokit::types::LitDecimal;
81//! use tokit::error::ErrorNode;
82//!
83//! // Create placeholder for malformed literal
84//! let bad_lit = LitDecimal::<String, YulLang>::error(span);
85//!
86//! // Create placeholder for missing literal
87//! let missing_lit = LitDecimal::<String, YulLang>::missing(span);
88//! ```
89
90use core::marker::PhantomData;
91
92use crate::{
93 error::ErrorNode,
94 utils::{AsSpan, IntoComponents},
95};
96
97/// A macro to generate literal type structures.
98///
99/// This reduces boilerplate by generating identical structure and implementations
100/// for all literal types.
101macro_rules! define_literal {
102 (
103 $(#[$meta:meta])*
104 $name:ident,
105 $doc:expr,
106 $example_str:expr,
107 $example_desc:expr
108 ) => {
109 paste::paste! {
110 $(#[$meta])*
111 #[doc = $doc]
112 ///
113 /// # Type Parameters
114 ///
115 /// - `S`: The source string type (`&str`, `String`, interned string, etc.)
116 /// - `Lang`: Language marker type for type safety
117 ///
118 /// # Examples
119 ///
120 /// ## Creating Literals
121 ///
122 /// ```rust
123 #[doc = "use tokit::types::" $name ";"]
124 /// use tokit::utils::SimpleSpan;
125 /// # struct MyLang;
126 ///
127 #[doc = "let lit = " $name "::<&str, MyLang>::new("]
128 #[doc = " Span::new(0, 4),"]
129 #[doc = " " $example_str ]
130 /// );
131 ///
132 #[doc = "assert_eq!(lit.source_ref(), &" $example_str ");"]
133 /// ```
134 ///
135 /// ## With Error Recovery
136 ///
137 /// ```rust,ignore
138 #[doc = "use tokit::types::" $name ";"]
139 /// use tokit::error::ErrorNode;
140 ///
141 #[doc = "// " $example_desc]
142 #[doc = "let bad_lit = " $name "::<String, YulLang>::error(span);"]
143 /// ```
144 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
145 pub struct $name<D, S = $crate::__private::utils::SimpleSpan, Lang = ()> {
146 span: S,
147 data: D,
148 _lang: PhantomData<Lang>,
149 }
150 }
151
152 impl<D, S, Lang> AsSpan<S> for $name<D, S, Lang> {
153 #[cfg_attr(not(tarpaulin), inline(always))]
154 fn as_span(&self) -> &S {
155 self.span_ref()
156 }
157 }
158
159 impl<D, S, Lang> IntoComponents for $name<D, S, Lang> {
160 type Components = (S, D);
161
162 #[cfg_attr(not(tarpaulin), inline(always))]
163 fn into_components(self) -> Self::Components {
164 (self.span, self.data)
165 }
166 }
167
168 impl<D, S, Lang> $name<D, S, Lang> {
169 /// Creates a new literal with the given span and source string.
170 ///
171 /// # Parameters
172 ///
173 /// - `span`: The source location of this literal
174 /// - `data`: The literal's data
175 #[cfg_attr(not(tarpaulin), inline(always))]
176 pub const fn new(span: S, data: D) -> Self {
177 Self {
178 span,
179 data,
180 _lang: PhantomData,
181 }
182 }
183
184 /// Returns the span (source location) of this literal.
185 #[cfg_attr(not(tarpaulin), inline(always))]
186 pub const fn span(&self) -> S where S: ::core::marker::Copy {
187 self.span
188 }
189
190 /// Returns an immutable reference to the span.
191 #[cfg_attr(not(tarpaulin), inline(always))]
192 pub const fn span_ref(&self) -> &S {
193 &self.span
194 }
195
196 /// Returns a mutable reference to the span.
197 #[cfg_attr(not(tarpaulin), inline(always))]
198 pub const fn span_mut(&mut self) -> &mut S {
199 &mut self.span
200 }
201
202 /// Returns a mutable reference to the source string.
203 #[cfg_attr(not(tarpaulin), inline(always))]
204 pub const fn data_mut(&mut self) -> &mut D {
205 &mut self.data
206 }
207
208 /// Returns an immutable reference to the source string.
209 ///
210 /// This is the most common way to access the literal's text.
211 #[cfg_attr(not(tarpaulin), inline(always))]
212 pub const fn data_ref(&self) -> &D {
213 &self.data
214 }
215
216 /// Returns a copy of the source string by value.
217 ///
218 /// Only available when `S` implements [`Copy`].
219 #[cfg_attr(not(tarpaulin), inline(always))]
220 pub const fn data(&self) -> D
221 where
222 D: Copy,
223 {
224 self.data
225 }
226 }
227
228 impl<D, S, Lang> ErrorNode<S> for $name<D, S, Lang>
229 where
230 D: ErrorNode<S>,
231 S: Clone,
232 {
233 /// Creates a placeholder literal for **malformed content**.
234 #[cfg_attr(not(tarpaulin), inline(always))]
235 fn error(span: S) -> Self {
236 Self::new(span.clone(), D::error(span))
237 }
238
239 /// Creates a placeholder literal for **missing required content**.
240 #[cfg_attr(not(tarpaulin), inline(always))]
241 fn missing(span: S) -> Self {
242 Self::new(span.clone(), D::missing(span))
243 }
244 }
245 };
246}
247
248// Generic literal
249define_literal!(
250 /// A generic literal.
251 ///
252 /// Represents any kind of literal value without distinguishing between specific
253 /// types (numeric, string, boolean, etc.). Useful when the exact literal type
254 /// doesn't matter for your use case.
255 Lit,
256 "A generic literal (any literal type).",
257 "\"value\"",
258 "Malformed literal"
259);
260
261// Numeric literals
262define_literal!(
263 /// A decimal (base-10) integer literal.
264 ///
265 /// Represents numeric literals in standard decimal notation, such as `42`, `1000`,
266 /// or `123_456`. The source string may include underscores for readability but
267 /// represents a single integer value.
268 LitDecimal,
269 "A decimal integer literal (e.g., `42`, `1_000`).",
270 "\"42\"",
271 "Malformed decimal literal like \"12abc\""
272);
273
274define_literal!(
275 /// A hexadecimal (base-16) integer literal.
276 ///
277 /// Represents integer literals in hexadecimal notation, typically prefixed with
278 /// `0x` or `0X`, such as `0xFF`, `0x1A2B`, or `0xDEAD_BEEF`.
279 LitHex,
280 "A hexadecimal integer literal (e.g., `0xFF`, `0x1A2B`).",
281 "\"0xFF\"",
282 "Malformed hex literal like \"0xGG\""
283);
284
285define_literal!(
286 /// An octal (base-8) integer literal.
287 ///
288 /// Represents integer literals in octal notation, typically prefixed with `0o`,
289 /// such as `0o77`, `0o644`, or `0o755`.
290 LitOctal,
291 "An octal integer literal (e.g., `0o77`, `0o644`).",
292 "\"0o77\"",
293 "Malformed octal literal like \"0o89\""
294);
295
296define_literal!(
297 /// A binary (base-2) integer literal.
298 ///
299 /// Represents integer literals in binary notation, typically prefixed with `0b`,
300 /// such as `0b1010`, `0b11110000`, or `0b1111_0000`.
301 LitBinary,
302 "A binary integer literal (e.g., `0b1010`, `0b1111_0000`).",
303 "\"0b1010\"",
304 "Malformed binary literal like \"0b123\""
305);
306
307define_literal!(
308 /// A floating-point literal.
309 ///
310 /// Represents floating-point literals in standard decimal notation with optional
311 /// fractional and exponent parts, such as `3.14`, `1.0`, `2.5e-3`, or `6.022e23`.
312 LitFloat,
313 "A floating-point literal (e.g., `3.14`, `1.0e-5`).",
314 "\"3.14\"",
315 "Malformed float literal like \"3.14.15\""
316);
317
318define_literal!(
319 /// A hexadecimal floating-point literal.
320 ///
321 /// Represents floating-point literals in hexadecimal notation with binary exponent,
322 /// such as `0x1.8p3` (which equals 12.0 in decimal). Used in languages like C and Rust
323 /// for precise floating-point representation.
324 LitHexFloat,
325 "A hexadecimal floating-point literal (e.g., `0x1.8p3`).",
326 "\"0x1.8p3\"",
327 "Malformed hex float like \"0x1.Gp3\""
328);
329
330// String literals
331define_literal!(
332 /// A single-line string literal.
333 ///
334 /// Represents string literals enclosed in quotes, typically on a single line,
335 /// such as `"hello"`, `"world\n"`, or `"escaped \"quotes\""`. May contain
336 /// escape sequences.
337 LitString,
338 "A single-line string literal (e.g., `\"hello\"`, `\"world\\n\"`).",
339 "\"\\\"hello\\\"\"",
340 "Malformed string like unterminated \"hello"
341);
342
343define_literal!(
344 /// A multi-line string literal.
345 ///
346 /// Represents string literals that span multiple lines, often with special delimiters
347 /// like triple quotes (`"""..."""` or `'''...'''`). Common in languages like Python,
348 /// Kotlin, and Swift.
349 LitMultilineString,
350 "A multi-line string literal (e.g., `\"\"\"...\"\"\"`).",
351 "\"\\\"\\\"\\\"multi\\nline\\\"\\\"\\\"\"",
352 "Malformed multiline string"
353);
354
355define_literal!(
356 /// A raw string literal.
357 ///
358 /// Represents string literals where escape sequences are not processed, often
359 /// prefixed with `r` (e.g., Rust's `r"C:\path"`, Python's `r"\n stays literal"`).
360 /// Useful for regular expressions and file paths.
361 LitRawString,
362 "A raw string literal without escape processing (e.g., `r\"C:\\path\"`).",
363 "\"r\\\"C:\\\\path\\\"\"",
364 "Malformed raw string"
365);
366
367// Character and byte literals
368define_literal!(
369 /// A character literal.
370 ///
371 /// Represents a single character enclosed in single quotes, such as `'a'`, `'\\n'`,
372 /// or `'\\u{1F600}'`. May contain escape sequences for special characters.
373 LitChar,
374 "A character literal (e.g., `'a'`, `'\\n'`, `'\\u{1F600}'`).",
375 "\"'a'\"",
376 "Malformed char like unclosed 'a"
377);
378
379define_literal!(
380 /// A byte literal.
381 ///
382 /// Represents a single byte value enclosed in single quotes with a `b` prefix,
383 /// such as `b'a'`, `b'\\x7F'`, or `b'\\n'`. Used in languages like Rust for
384 /// ASCII/byte manipulation.
385 LitByte,
386 "A byte literal (e.g., `b'a'`, `b'\\x7F'`).",
387 "\"b'a'\"",
388 "Malformed byte literal"
389);
390
391define_literal!(
392 /// A byte string literal.
393 ///
394 /// Represents a sequence of bytes enclosed in quotes with a `b` prefix, such as
395 /// `b"bytes"`, `b"\\x48\\x65\\x6C\\x6C\\x6F"`. Used for binary data or ASCII strings.
396 LitByteString,
397 "A byte string literal (e.g., `b\"bytes\"`, `b\"\\x48\\x65\\x6C\\x6C\\x6F\"`).",
398 "\"b\\\"bytes\\\"\"",
399 "Malformed byte string"
400);
401
402// Boolean and null
403define_literal!(
404 /// A boolean literal.
405 ///
406 /// Represents boolean values `true` or `false`. The source string contains the
407 /// actual keyword as it appears in source code.
408 LitBool,
409 "A boolean literal (`true` or `false`).",
410 "\"true\"",
411 "Malformed boolean like \"tru\" or \"fals\""
412);
413
414define_literal!(
415 /// A null/nil literal.
416 ///
417 /// Represents the null, nil, or None value in various programming languages.
418 /// The source string contains the keyword as it appears (e.g., `null`, `nil`,
419 /// `None`, `nullptr`).
420 LitNull,
421 "A null/nil literal (e.g., `null`, `nil`, `None`).",
422 "\"null\"",
423 "Malformed null literal"
424);