inpt 0.1.5

A derive crate for dumb type-level text parsing.
Documentation
//! Wrapper types used to split up input in common ways.

use std::borrow::Cow;

use crate::{CharClass, Inpt, InptError, InptStep, RecursionGuard};

/// Single-word `T` surrounded by whitespace: `hello world`
#[derive(Inpt, Debug, PartialEq, Eq, Clone, Copy)]
#[inpt(regex = r"([\S]+)", trim = r"\s")]
pub struct Spaced<T> {
    pub inner: T,
}

/// Single-word `T` surrounded by whitespace or punctuation: `hello.world`
#[derive(Inpt, Debug, PartialEq, Eq, Clone, Copy)]
#[inpt(regex = r"([^\s\p{Punctuation}]+)", trim = r"\s\p{Punctuation}")]
pub struct Word<T> {
    pub inner: T,
}

/// Single-line `T` followed by a line break: `hello\nworld`
#[derive(Inpt, Debug, PartialEq, Eq, Clone, Copy)]
#[inpt(regex = r"([^\n]+)")]
pub struct Line<T> {
    pub inner: T,
}

/// Multi-line `T` followed by a blank line: `hel\nlo\n\nworld`
#[derive(Inpt, Debug, PartialEq, Eq, Clone, Copy)]
#[inpt(regex = r"((?s).*?)(?:\n\s*\n|$)")]
pub struct Group<T> {
    pub inner: T,
}

/// Unescapes the given string, using fairly permissive common-sense rules.
pub fn unescape(s: &str) -> Cow<str> {
    if !s.contains('\\') {
        return Cow::Borrowed(s);
    }

    let mut buf = String::with_capacity(s.len());
    let mut chars = s.chars();
    while let Some(c) = chars.next() {
        if c == '\\' {
            let Some(c) = chars.next() else {
                unreachable!()
            };
            buf.push(match c {
                '0' => '\u{0}',
                'a' => '\u{07}',
                'b' => '\u{08}',
                'v' => '\u{0B}',
                'f' => '\u{0C}',
                'n' => '\n',
                'r' => '\r',
                't' => '\t',
                'e' | 'E' => '\u{1B}',
                _ => c,
            });
        } else {
            buf.push(c);
        }
    }
    Cow::Owned(buf)
}

/// Arbitrary `T` surrounded by double quotes `"hello"world`
///
/// Internal quotes can be escaped with backslash.
#[derive(Inpt, Debug, PartialEq, Eq, Clone, Copy)]
#[inpt(regex = r#""((?s:[^\\]|\\.)*?)""#)]
pub struct Quoted<T> {
    pub inner: T,
}

impl<S: AsRef<str>> Quoted<S> {
    /// Unescapes this quoted string, using fairly permissive common-sense rules.
    ///
    /// If you are having lifetime issues, try manually calling the free [unescape] fn.
    pub fn unescape(&self) -> Cow<str> {
        unescape(self.inner.as_ref())
    }
}

/// Arbitrary `T` surrounded by single quotes: `'hello'world`
///
/// Internal quotes can be escaped with backslash.
#[derive(Inpt, Debug, PartialEq, Eq, Clone, Copy)]
#[inpt(regex = r"'((?s:[^\\]|\\.)*?)'")]
pub struct SingleQuoted<T> {
    pub inner: T,
}

impl<S: AsRef<str>> SingleQuoted<S> {
    /// Unescapes this quoted string, using fairly permissive common-sense rules.
    ///
    /// If you are having lifetime issues, try manually calling the free [unescape] fn.
    pub fn unescape(&self) -> Cow<str> {
        unescape(self.inner.as_ref())
    }
}

/// Arbitrary `T` inside a pair of matching brackets, parentheses, or braces.
///
/// Braces, parentheses, or brackets inside quotes are not counted.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub struct AnyBracketed<const OPEN: char, const CLOSE: char, T> {
    pub inner: T,
}

impl<'s, const OPEN: char, const CLOSE: char, T> Inpt<'s> for AnyBracketed<OPEN, CLOSE, T>
where
    T: Inpt<'s>,
{
    fn step(
        text: &'s str,
        end: bool,
        trimmed: CharClass,
        guard: &mut RecursionGuard,
    ) -> crate::InptStep<'s, Self> {
        guard.check(text, |guard| {
            if text.starts_with(OPEN) {
                let mut depth = 0;
                let mut chars = text.char_indices();
                let closed = 'matched: loop {
                    let (pos, c) = match chars.next() {
                        Some(c) => c,
                        None => break 'matched Err(InptError::expected_lit_at_end(&CLOSE)),
                    };
                    // count depth
                    if c == OPEN {
                        depth += 1;
                    }
                    if c == CLOSE {
                        depth -= 1;
                    }
                    if depth == 0 && (!end || pos + CLOSE.len_utf8() == text.len()) {
                        break Ok(pos);
                    }
                    // ignore opening or closing chars inside strings
                    if let Some(q) = ['"', '\''].iter().find(|q| c == **q) {
                        'quoted: loop {
                            match chars.next() {
                                // unexpected end
                                None => break 'matched Err(InptError::expected_lit_at_end(q)),
                                // escape character
                                Some((_, '\\')) => {
                                    let _ = chars.next();
                                }
                                // end quote
                                Some((_, c)) if c == *q => break 'quoted,
                                // other character
                                _ => (),
                            }
                        }
                    }
                };
                let step = match closed {
                    Ok(closed) => crate::InptStep {
                        data: T::step(&text[OPEN.len_utf8()..closed], true, trimmed, guard).data,
                        rest: &text[closed + CLOSE.len_utf8()..],
                    },
                    Err(e) => crate::InptStep {
                        data: Err(e),
                        rest: match text.rfind(CLOSE) {
                            Some(pos) => &text[pos..],
                            None => &text[text.len()..],
                        },
                    },
                };
                step.map(|inner| AnyBracketed { inner })
            } else {
                InptStep {
                    data: Err(InptError::expected_lit_at_start(&OPEN)),
                    rest: text,
                }
            }
        })
    }
}

/// Arbitrary `T` inside matching parentheses: `(hello)world`
pub type Parenthetical<T> = AnyBracketed<'(', ')', T>;
/// Arbitrary `T` inside matching square brackets: `[hello]world`
pub type Bracketed<T> = AnyBracketed<'[', ']', T>;
/// Arbitrary `T` inside matching curly braces: `{hello}world`
pub type Braced<T> = AnyBracketed<'{', '}', T>;
/// Arbitrary `T` inside matching angle braces: `<hello>world`
pub type AngleBraced<T> = AnyBracketed<'<', '>', T>;