litext 1.0.0 - Docs.rs

//! # litext
//!
//! A procedural macro library for extracting literal contents from tokens.
//! Built for proc-macro authors who need to pull string, numeric, char, bool,
//! byte, and C string literal contents from `TokenStream` input.
//!
//! ## Overview
//!
//! When writing procedural macros, you often receive tokens that represent literals
//! and need to extract their actual values. This crate provides the [`extract`]
//! function and [`litext!`] macro for this purpose, supporting a wide variety
//! of literal types with span tracking for precise error reporting.
//!
//! ## Features
//!
//! - Extract content from string, raw string, byte string, and C string literals
//! - Parse integer literals (decimal, hex `0xFF`, octal `0o77`, binary `0b1010`)
//!   with underscore separators (`1_000`) and type suffixes (`42u8`)
//! - Parse float literals with scientific notation (`1e10`), underscore separators,
//!   and type suffixes (`3.14f32`)
//! - Extract character literals with full escape support (`\n`, `\t`, `\\`, `\'`,
//!   `\"`, `\0`, `\x41`, `\u{1F600}`)
//! - Extract byte literals (`b'a'`, `b'\xff'`) and byte strings (`b"..."`)
//! - Parse boolean literals (`true`, `false`) from identifiers
//! - Span-aware types (`LitStr`, `LitInt<T>`, `LitFloat<T>`, `LitBool`,
//!   `LitChar`, `LitByte`, `LitByteStr`, `LitCStr`) for precise error reporting
//! - Round-trip support via [`ToTokens`] trait
//! - Extensible via the [`FromLit`] trait for custom types
//! - Result-returning variants via the `try` keyword for flexible error handling
//!
//! ## Quick Start
//!
//! ```ignore
//! use litext::{litext, TokenStream};
//!
//! fn my_macro(input: TokenStream) -> TokenStream {
//!     // Using the macro with try to get a Result (recommended for error handling)
//!     let result: Result<String, TokenStream> = litext!(try input);
//!     match result {
//!         Ok(text) => quote::quote! { /* use text */ },
//!         Err(e) => e, // Forward the error
//!     };
//!
//!     // Or return early on error (convenient, concise)
//!     let text: String = litext!(input);
//!     quote::quote! { /* use text */ }
//! }
//! ```
//!
//! ## Literal Types Supported
//!
//! | Target Type | Example Input | Extracted Value |
//! |-------------|---------------|----------------|
//! | `String`, `LitStr` | `"hello"`, `r#"raw"#` | String content |
//! | Integer types, `LitInt<T>` | `42`, `0xFF_i32`, `0b1010` | Parsed integer |
//! | Float types, `LitFloat<T>` | `3.14`, `1e10_f32` | Parsed float |
//! | `char`, `LitChar` | `'x'`, `'\n'`, `'\u{1F600}'` | Unicode character |
//! | `bool`, `LitBool` | `true`, `false` | Boolean value |
//! | `u8`, `LitByte` | `b'a'`, `b'\xff'` | Byte value |
//! | `Vec<u8>`, `LitByteStr` | `b"hello"`, `br#"..."#` | Byte vector |
//! | `CString`, `LitCStr` | `c"hello"`, `cr#"..."#` | C string |
//!
//! ## See Also
//!
//! - Span-aware literal types and `ToTokens` trait for round-tripping
//! - [`FromLit`] trait for implementing custom literal parsing

#![warn(missing_docs)]

use proc_macro2::{Literal, Span, TokenStream, TokenTree};

mod literal;
pub use literal::*;

/// A convenience macro for extracting string literal content from a token stream.
///
/// This macro is the primary entry point for extracting literal content in proc-macro
/// implementations. It wraps [`extract`] and returns early with a compile error if
/// extraction fails.
///
/// # Syntax
///
/// ```ignore
/// litext!(input)              // Extract as String
/// litext!(input as String)    // Extract as String (explicit)
/// litext!(input as T)         // Extract as custom type T implementing FromLit
/// litext!(try input)           // Extract as String, keep Result
/// litext!(try input as T)         // Extract as T, keep Result
/// ```
///
/// # Arguments
///
/// - `input` - An expression that evaluates to a `TokenStream`
/// - `as T` - Optional: the target type to extract into. Defaults to `String` if omitted.
/// - `try` - Optional: when present, returns `Result<T, TokenStream>` instead of
///   returning early on error. Useful when you want to handle errors differently.
///
/// # Behavior
///
/// - Without `try`: Passes the input to [`extract::<T>`]. If extraction succeeds,
///   returns the value. If extraction fails, returns the error `TokenStream`.
/// - With `try`: Returns `Result<T, TokenStream>` from [`extract`], allowing you
///   to handle success and error cases explicitly.
///
/// # Examples
///
/// ```ignore
/// use litext::{litext, TokenStream};
///
/// fn my_string_macro(input: TokenStream) -> TokenStream {
///     // Extract a string literal, return early on error
///     let text: String = litext!(input);
///     quote::quote! { /* use text */ }
/// }
///
/// fn my_int_macro(input: TokenStream) -> TokenStream {
///     // Extract an integer literal, return early on error
///     let num: i32 = litext!(input as i32);
///     quote::quote! { /* use num */ }
/// }
///
/// fn my_custom_macro(input: TokenStream) -> TokenStream {
///     // Extract as a custom type, return early on error
///     let lit: LitStr = litext!(input as LitStr);
///     quote::quote! { #lit }
/// }
///
/// fn my_try_macro(input: TokenStream) -> TokenStream {
///     // Extract but handle error explicitly
///     let result: Result<String, TokenStream> = litext!(try input);
///     match result {
///         Ok(text) => quote::quote! { /* use text */ },
///         Err(e) => e, // Forward the error
///     }
/// }
/// ```
///
/// # See Also
///
/// - [`extract`] for extracting without the macro wrapper
/// - [`FromLit`] for implementing custom extraction behavior
#[macro_export]
macro_rules! litext {
    ($input:ident $(as String)?) => {
        match $crate::extract::<String>($input) {
            Ok(s) => s,
            Err(e) => return e,
        }
    };

    (try $input:ident) => {
        $crate::extract::<String>($input)
    };

    ($input:ident as $T:ty) => {
        match $crate::extract::<$T>($input) {
            Ok(v) => v,
            Err(e) => return e,
        }
    };

    (try $input:ident as $T:ty) => {
        $crate::extract::<$T>($input)
    };
}

/// Extracts the inner value from a token stream representing a literal.
///
/// This function is the core of the crate. It takes a `TokenStream` and attempts to
/// extract a single literal token, then converts it to the target type `T` using
/// the [`FromLit`] trait.
///
/// # Type Parameter
///
/// - `T` - The target type implementing [`FromLit`]. Supported types include:
///   - `String`, `LitStr` - String and raw string literals
///   - `i8`–`i128`, `u8`–`u128`, `isize`, `usize`, `LitInt<T>` - Integer literals
///   - `f32`, `f64`, `LitFloat<T>` - Float literals
///   - `char`, `LitChar` - Character literals
///   - `bool`, `LitBool` - Boolean literals (from `true`/`false` identifiers)
///   - `u8`, `LitByte` - Byte literals (`b'a'`)
///   - `Vec<u8>`, `LitByteStr` - Byte string literals (`b"..."`)
///   - `CString`, `LitCStr` - C string literals (`c"..."`)
///
/// # Arguments
///
/// - `input` - A `TokenStream` that should contain exactly one literal token or identifier
///
/// # Limitations
///
/// - Negative numbers like `-42` are not literals in Rust's token stream -- they are
///   two tokens (a `-` punct and a positive literal). `extract` does not support
///   them. Handle negation at the expression level instead.
///
/// # Return Value
///
/// - `Ok(T)` - The extracted value of type `T`
/// - `Err(TokenStream)` - An error token stream that will trigger a compile error
///   when returned from a proc-macro. This occurs when:
///   - The input is empty
///   - The input contains more than one token
///   - The input is not a recognized literal or identifier
///   - The literal cannot be parsed as the target type (e.g., overflow)
///
/// # What Can Be Extracted
///
/// | Target Type | Accepted Inputs |
/// |-------------|-----------------|
/// | `String`, `LitStr` | `"hello"`, `r#"raw"#"`, `r"C:\path"` |
/// | `i8`–`i128`, `u8`–`u128`, `isize`, `usize` | `42`, `0xFF`, `0o77`, `0b1010`, `1_000`, `42_i32` |
/// | `f32`, `f64` | `3.14`, `1e10`, `1.0_f32`, `1_000.5` |
/// | `char`, `LitChar` | `'a'`, `'\n'`, `'\u{1F600}'`, `'\x41'` |
/// | `bool`, `LitBool` | `true`, `false` (as identifiers) |
/// | `u8`, `LitByte` | `b'a'`, `b'\n'`, `b'\xff'` |
/// | `Vec<u8>`, `LitByteStr` | `b"hello"`, `br#"raw"#` |
/// | `CString`, `LitCStr` | `c"hello"`, `cr#"raw"#` |
///
/// # Error Handling
///
/// This function does not panic. All errors are returned as `TokenStream` values
/// containing error tokens. When returned from a proc-macro entry point function,
/// these error tokens cause the compiler to display the error message and abort
/// compilation.
///
/// # Examples
///
/// ```ignore
/// use litext::{extract, TokenStream};
///
/// fn process_string(input: TokenStream) -> Result<String, TokenStream> {
///     extract::<String>(input)
/// }
///
/// fn process_int(input: TokenStream) -> Result<i64, TokenStream> {
///     extract::<i64>(input)
/// }
///
/// fn process_float(input: TokenStream) -> Result<f64, TokenStream> {
///     extract::<f64>(input)
/// }
///
/// fn process_char(input: TokenStream) -> Result<char, TokenStream> {
///     extract::<char>(input)
/// }
///
/// fn process_custom(input: TokenStream) -> Result<LitStr, TokenStream> {
///     extract::<LitStr>(input)
/// }
/// ```
///
/// # See Also
///
/// - [`litext!`] macro for a convenient wrapper that returns early on error
/// - [`FromLit`] trait for the conversion logic
/// - Span-aware types and `ToTokens` trait for round-tripping
#[inline]
pub fn extract<T: literal::FromLit>(input: TokenStream) -> Result<T, TokenStream> {
    let mut iter = input.into_iter();

    let token = if let Some(t) = iter.next() { t } else {
        #[cold]
        fn got_nothing() -> TokenStream {
            comperr::error(Span::call_site(), "expected a literal, got nothing")
        }
        return Err(got_nothing());
    };

    if let Some(next_token) = iter.next() {
        #[cold]
        fn too_many_tokens(span: Span) -> TokenStream {
            comperr::error(span, "expected exactly one literal")
        }
        return Err(too_many_tokens(next_token.span()));
    }

    match token {
        TokenTree::Literal(lit) => T::from_lit(lit),
        TokenTree::Ident(ident) => T::from_ident(ident),
        TokenTree::Punct(p) => Err(comperr::error(
            p.span(),
            "expected a literal, found punctuation",
        )),
        TokenTree::Group(g) => Err(comperr::error(g.span(), "expected a literal, found group")),
    }
}

/// Parses a string literal token and returns the unescaped content.
#[inline]
pub(crate) fn parse_lit(lit: Literal) -> Result<String, TokenStream> {
    let raw = lit.to_string();
    let span = lit.span();

    if raw.starts_with('b') && raw.len() > 1 {
        let c = raw.chars().nth(1).unwrap();
        if c == '"' || c == 'r' {
            return Err(comperr::error(
                span,
                "expected a string literal, not a byte string",
            ));
        }
    }

    if raw.starts_with('r') {
        return parse_raw(&raw).ok_or_else(|| comperr::error(span, "malformed raw string literal"));
    }

    if raw.starts_with('"') && raw.ends_with('"') && raw.len() >= 2 {
        return unescape(&raw[1..raw.len() - 1], span);
    }

    Err(comperr::error(span, "expected a string literal"))
}

/// Extracts the inner content from a raw string literal like `r#"..."#`.
#[inline]
pub(crate) fn parse_raw(raw: &str) -> Option<String> {
    let rest = raw.strip_prefix('r')?;
    let hashes = rest.chars().take_while(|c| *c == '#').count();
    let hash_str = "#".repeat(hashes);
    let inner = rest
        .strip_prefix(&hash_str)?
        .strip_prefix('"')?
        .strip_suffix(&hash_str)?
        .strip_suffix('"')?;
    Some(inner.to_string())
}

/// Processes escape sequences in a string and returns the unescaped result.
#[inline]
pub(crate) fn unescape(s: &str, span: Span) -> Result<String, TokenStream> {
    let mut output = String::with_capacity(s.len());
    let mut chars = s.chars();

    while let Some(c) = chars.next() {
        if c != '\\' {
            output.push(c);
            continue;
        }

        match chars.next() {
            Some('n') => output.push('\n'),
            Some('r') => output.push('\r'),
            Some('t') => output.push('\t'),
            Some('\\') => output.push('\\'),
            Some('"') => output.push('"'),
            Some('0') => output.push('\0'),

            Some('x') => {
                let h1 = chars
                    .next()
                    .ok_or_else(|| comperr::error(span, "invalid \\x escape"))?;
                let h2 = chars
                    .next()
                    .ok_or_else(|| comperr::error(span, "invalid \\x escape"))?;
                let hex = format!("{h1}{h2}");
                let byte = u8::from_str_radix(&hex, 16)
                    .map_err(|_| comperr::error(span, "invalid \\x escape"))?;
                if byte > 0x7F {
                    return Err(comperr::error(
                        span,
                        "\\x escape must be in range 0x00..=0x7F",
                    ));
                }
                output.push(byte as char);
            }

            Some('u') => {
                match chars.next() {
                    Some('{') => {}
                    _ => return Err(comperr::error(span, "invalid \\u escape, expected '{'")),
                }
                let mut hex = String::new();
                loop {
                    match chars.next() {
                        Some('}') => break,
                        Some(c) => hex.push(c),
                        None => return Err(comperr::error(span, "unterminated \\u escape")),
                    }
                }
                let codepoint = u32::from_str_radix(&hex, 16)
                    .map_err(|_| comperr::error(span, "invalid \\u codepoint"))?;
                let ch = char::from_u32(codepoint)
                    .ok_or_else(|| comperr::error(span, "invalid unicode codepoint"))?;
                output.push(ch);
            }

            Some('\n') => {
                while let Some(&c) = chars.as_str().chars().next().as_ref() {
                    if c.is_whitespace() {
                        chars.next();
                    } else {
                        break;
                    }
                }
            }

            _ => return Err(comperr::error(span, "invalid escape sequence")),
        }
    }

    Ok(output)
}