inpt/
split.rs

1//! Wrapper types used to split up input in common ways.
2
3use std::borrow::Cow;
4
5use crate::{CharClass, Inpt, InptError, InptStep, RecursionGuard};
6
7/// Single-word `T` surrounded by whitespace: `hello world`
8#[derive(Inpt, Debug, PartialEq, Eq, Clone, Copy)]
9#[inpt(regex = r"([\S]+)", trim = r"\s")]
10pub struct Spaced<T> {
11    pub inner: T,
12}
13
14/// Single-word `T` surrounded by whitespace or punctuation: `hello.world`
15#[derive(Inpt, Debug, PartialEq, Eq, Clone, Copy)]
16#[inpt(regex = r"([^\s\p{Punctuation}]+)", trim = r"\s\p{Punctuation}")]
17pub struct Word<T> {
18    pub inner: T,
19}
20
21/// Single-line `T` followed by a line break: `hello\nworld`
22#[derive(Inpt, Debug, PartialEq, Eq, Clone, Copy)]
23#[inpt(regex = r"([^\n]+)")]
24pub struct Line<T> {
25    pub inner: T,
26}
27
28/// Multi-line `T` followed by a blank line: `hel\nlo\n\nworld`
29#[derive(Inpt, Debug, PartialEq, Eq, Clone, Copy)]
30#[inpt(regex = r"((?s).*?)(?:\n\s*\n|$)")]
31pub struct Group<T> {
32    pub inner: T,
33}
34
35/// Unescapes the given string, using fairly permissive common-sense rules.
36pub fn unescape(s: &str) -> Cow<str> {
37    if !s.contains('\\') {
38        return Cow::Borrowed(s);
39    }
40
41    let mut buf = String::with_capacity(s.len());
42    let mut chars = s.chars();
43    while let Some(c) = chars.next() {
44        if c == '\\' {
45            let Some(c) = chars.next() else {
46                unreachable!()
47            };
48            buf.push(match c {
49                '0' => '\u{0}',
50                'a' => '\u{07}',
51                'b' => '\u{08}',
52                'v' => '\u{0B}',
53                'f' => '\u{0C}',
54                'n' => '\n',
55                'r' => '\r',
56                't' => '\t',
57                'e' | 'E' => '\u{1B}',
58                _ => c,
59            });
60        } else {
61            buf.push(c);
62        }
63    }
64    Cow::Owned(buf)
65}
66
67/// Arbitrary `T` surrounded by double quotes `"hello"world`
68///
69/// Internal quotes can be escaped with backslash.
70#[derive(Inpt, Debug, PartialEq, Eq, Clone, Copy)]
71#[inpt(regex = r#""((?s:[^\\]|\\.)*?)""#)]
72pub struct Quoted<T> {
73    pub inner: T,
74}
75
76impl<S: AsRef<str>> Quoted<S> {
77    /// Unescapes this quoted string, using fairly permissive common-sense rules.
78    ///
79    /// If you are having lifetime issues, try manually calling the free [unescape] fn.
80    pub fn unescape(&self) -> Cow<str> {
81        unescape(self.inner.as_ref())
82    }
83}
84
85/// Arbitrary `T` surrounded by single quotes: `'hello'world`
86///
87/// Internal quotes can be escaped with backslash.
88#[derive(Inpt, Debug, PartialEq, Eq, Clone, Copy)]
89#[inpt(regex = r"'((?s:[^\\]|\\.)*?)'")]
90pub struct SingleQuoted<T> {
91    pub inner: T,
92}
93
94impl<S: AsRef<str>> SingleQuoted<S> {
95    /// Unescapes this quoted string, using fairly permissive common-sense rules.
96    ///
97    /// If you are having lifetime issues, try manually calling the free [unescape] fn.
98    pub fn unescape(&self) -> Cow<str> {
99        unescape(self.inner.as_ref())
100    }
101}
102
103/// Arbitrary `T` inside a pair of matching brackets, parentheses, or braces.
104///
105/// Braces, parentheses, or brackets inside quotes are not counted.
106#[derive(Debug, PartialEq, Eq, Clone, Copy)]
107pub struct AnyBracketed<const OPEN: char, const CLOSE: char, T> {
108    pub inner: T,
109}
110
111impl<'s, const OPEN: char, const CLOSE: char, T> Inpt<'s> for AnyBracketed<OPEN, CLOSE, T>
112where
113    T: Inpt<'s>,
114{
115    fn step(
116        text: &'s str,
117        end: bool,
118        trimmed: CharClass,
119        guard: &mut RecursionGuard,
120    ) -> crate::InptStep<'s, Self> {
121        guard.check(text, |guard| {
122            if text.starts_with(OPEN) {
123                let mut depth = 0;
124                let mut chars = text.char_indices();
125                let closed = 'matched: loop {
126                    let (pos, c) = match chars.next() {
127                        Some(c) => c,
128                        None => break 'matched Err(InptError::expected_lit_at_end(&CLOSE)),
129                    };
130                    // count depth
131                    if c == OPEN {
132                        depth += 1;
133                    }
134                    if c == CLOSE {
135                        depth -= 1;
136                    }
137                    if depth == 0 && (!end || pos + CLOSE.len_utf8() == text.len()) {
138                        break Ok(pos);
139                    }
140                    // ignore opening or closing chars inside strings
141                    if let Some(q) = ['"', '\''].iter().find(|q| c == **q) {
142                        'quoted: loop {
143                            match chars.next() {
144                                // unexpected end
145                                None => break 'matched Err(InptError::expected_lit_at_end(q)),
146                                // escape character
147                                Some((_, '\\')) => {
148                                    let _ = chars.next();
149                                }
150                                // end quote
151                                Some((_, c)) if c == *q => break 'quoted,
152                                // other character
153                                _ => (),
154                            }
155                        }
156                    }
157                };
158                let step = match closed {
159                    Ok(closed) => crate::InptStep {
160                        data: T::step(&text[OPEN.len_utf8()..closed], true, trimmed, guard).data,
161                        rest: &text[closed + CLOSE.len_utf8()..],
162                    },
163                    Err(e) => crate::InptStep {
164                        data: Err(e),
165                        rest: match text.rfind(CLOSE) {
166                            Some(pos) => &text[pos..],
167                            None => &text[text.len()..],
168                        },
169                    },
170                };
171                step.map(|inner| AnyBracketed { inner })
172            } else {
173                InptStep {
174                    data: Err(InptError::expected_lit_at_start(&OPEN)),
175                    rest: text,
176                }
177            }
178        })
179    }
180}
181
182/// Arbitrary `T` inside matching parentheses: `(hello)world`
183pub type Parenthetical<T> = AnyBracketed<'(', ')', T>;
184/// Arbitrary `T` inside matching square brackets: `[hello]world`
185pub type Bracketed<T> = AnyBracketed<'[', ']', T>;
186/// Arbitrary `T` inside matching curly braces: `{hello}world`
187pub type Braced<T> = AnyBracketed<'{', '}', T>;
188/// Arbitrary `T` inside matching angle braces: `<hello>world`
189pub type AngleBraced<T> = AnyBracketed<'<', '>', T>;