char_classes_proc_macro/
lib.rs

1use std::{convert::identity, iter::{once, Peekable}};
2
3use litrs::{ByteStringLit, StringLit};
4use proc_macro::{Delimiter, Group, Ident, Punct, Spacing::*, Span, TokenStream, TokenTree, Literal};
5
6/// Make `compile_error! {"..."}`
7#[must_use]
8fn err(msg: &str, span: Span) -> TokenStream {
9    let s = |mut tt: TokenTree| {
10        tt.set_span(span);
11        tt
12    };
13
14    <TokenStream as FromIterator<TokenTree>>::from_iter([
15        Punct::new(':', Joint).into(),
16        Punct::new(':', Joint).into(),
17        Ident::new("core", span).into(),
18        Punct::new(':', Joint).into(),
19        Punct::new(':', Joint).into(),
20        Ident::new("compile_error", span).into(),
21        Punct::new('!', Joint).into(),
22        Group::new(Delimiter::Brace, s(
23            Literal::string(msg).into(),
24        ).into()).into(),
25    ].map(s))
26}
27
28fn matches(stream: TokenStream) -> TokenStream {
29    <TokenStream as FromIterator<TokenTree>>::from_iter([
30        Punct::new(':', Joint).into(),
31        Punct::new(':', Joint).into(),
32        Ident::new("core", Span::call_site()).into(),
33        Punct::new(':', Joint).into(),
34        Punct::new(':', Joint).into(),
35        Ident::new("matches", Span::call_site()).into(),
36        Punct::new('!', Joint).into(),
37        Group::new(Delimiter::Parenthesis, stream).into()
38    ])
39}
40
41fn tts(tt: impl Into<TokenTree>) -> TokenStream {
42    let tt: TokenTree = tt.into();
43    TokenStream::from(tt)
44}
45
46fn stream(i: impl IntoIterator<Item = TokenTree>) -> TokenStream {
47    i.into_iter().collect()
48}
49fn streams(i: impl IntoIterator<Item = TokenStream>) -> TokenStream {
50    i.into_iter().collect()
51}
52
53enum Mode {
54    Normal,
55    Exclude,
56    Not(TokenTree),
57}
58
59use Mode::*;
60
61impl Mode {
62    fn resolve(iter: &mut Peekable<impl Iterator<Item = TokenTree>>) -> Self {
63        match iter.peek() {
64            Some(TokenTree::Punct(p)) if p.as_char() == '!' => {
65                Not(iter.next().unwrap())
66            },
67            Some(TokenTree::Punct(p)) if p.as_char() == '^' => {
68                iter.next().unwrap();
69                Exclude
70            },
71            _ => Normal,
72        }
73    }
74
75    fn run(
76        self,
77        expr: TokenStream,
78        pat: TokenStream,
79        com: TokenTree,
80    ) -> TokenStream {
81        match self {
82            Normal => matches(streams([expr, tts(com), pat])),
83            Not(not) => once(not)
84                .chain(Normal.run(expr, pat, com))
85                .collect(),
86            Exclude => stream([
87                Ident::new("match", com.span()).into(),
88                Group::new(Delimiter::None, expr).into(),
89                Group::new(
90                    Delimiter::Brace,
91                    streams([
92                        none(),
93                        tts(Punct::new('|', Alone)),
94                        pat,
95                        stream([
96                            Punct::new('=', Joint).into(),
97                            Punct::new('>', Alone).into(),
98                            Ident::new("false", Span::call_site()).into(),
99                            Punct::new(',', Alone).into(),
100                        ]),
101                        some(tts(Ident::new("_", Span::call_site()))),
102                        stream([
103                            Punct::new('=', Joint).into(),
104                            Punct::new('>', Alone).into(),
105                            Ident::new("true", Span::call_site()).into(),
106                            Punct::new(',', Alone).into(),
107                        ]),
108                    ]),
109                ).into(),
110            ]),
111        }
112    }
113}
114
115fn first_elem(stream: TokenStream) -> TokenStream {
116    let stream = [
117        TokenStream::from(TokenTree::Punct(Punct::new('&', Joint))),
118        stream,
119    ].into_iter().collect();
120    <TokenStream as FromIterator<TokenTree>>::from_iter([
121        Punct::new(':', Joint).into(),
122        Punct::new(':', Joint).into(),
123        Ident::new("char_classes", Span::call_site()).into(),
124        Punct::new(':', Joint).into(),
125        Punct::new(':', Joint).into(),
126        Ident::new("FirstElem", Span::call_site()).into(),
127        Punct::new(':', Joint).into(),
128        Punct::new(':', Joint).into(),
129        Ident::new("first_elem", Span::call_site()).into(),
130        Group::new(Delimiter::Parenthesis, stream).into()
131    ])
132}
133
134enum Str {
135    Norm(String),
136    Byte(Vec<u8>),
137}
138
139fn lit_str(tt: &TokenTree) -> Result<Str, TokenStream> {
140    StringLit::try_from(tt)
141        .map(|s| Str::Norm(s.into_value().into_owned()))
142        .map_err(|e| e.to_string())
143        .or_else(|e| ByteStringLit::try_from(tt)
144            .map(|b| Str::Byte(b.into_value().into_owned()))
145            .map_err(|e2| format!("{e}\n{e2}")))
146        .map_err(|e| err(&e, tt.span()))
147}
148
149trait Spaned {
150    fn spaned(self, span: Span) -> Self;
151}
152impl Spaned for TokenTree {
153    fn spaned(mut self, span: Span) -> Self {
154        self.set_span(span);
155        self
156    }
157}
158impl Spaned for Literal {
159    fn spaned(mut self, span: Span) -> Self {
160        self.set_span(span);
161        self
162    }
163}
164impl Spaned for Punct {
165    fn spaned(mut self, span: Span) -> Self {
166        self.set_span(span);
167        self
168    }
169}
170
171trait ToPat: Sized {
172    fn to_pat(self, span: Span) -> TokenStream;
173}
174impl ToPat for u8 {
175    fn to_pat(self, span: Span) -> TokenStream {
176        TokenTree::from(Literal::byte_character(self).spaned(span)).into()
177    }
178}
179impl ToPat for char {
180    fn to_pat(self, span: Span) -> TokenStream {
181        TokenTree::from(Literal::character(self).spaned(span)).into()
182    }
183}
184impl<T: ToPat> ToPat for (T, T) {
185    fn to_pat(self, span: Span) -> TokenStream {
186        let (from, to) = self;
187        TokenStream::from_iter([
188            from.to_pat(span),
189            <TokenStream as FromIterator<TokenTree>>::from_iter([
190                Punct::new('.', Joint).into(),
191                Punct::new('.', Joint).into(),
192                Punct::new('=', Joint).into(),
193            ]),
194            to.to_pat(span),
195        ])
196    }
197}
198
199trait IsDash {
200    fn is_dash(&self) -> bool;
201}
202impl IsDash for u8 {
203    fn is_dash(&self) -> bool {
204        *self == b'-'
205    }
206}
207impl IsDash for char {
208    fn is_dash(&self) -> bool {
209        *self == '-'
210    }
211}
212
213trait Expected: Iterator<Item = TokenTree> + Sized {
214    fn expected(&mut self, ty: &str) -> Result<TokenTree, TokenStream> {
215        self.next()
216            .ok_or_else(||
217        {
218            let msg = format!("unexpected end of input, expected a {ty}");
219            err(&msg, Span::call_site())
220        })
221    }
222}
223impl<T: Iterator<Item = TokenTree>> Expected for T { }
224
225fn none() -> TokenStream {
226    stream([
227        Punct::new(':', Joint).into(),
228        Punct::new(':', Joint).into(),
229        Ident::new("core", Span::call_site()).into(),
230        Punct::new(':', Joint).into(),
231        Punct::new(':', Joint).into(),
232        Ident::new("option", Span::call_site()).into(),
233        Punct::new(':', Joint).into(),
234        Punct::new(':', Joint).into(),
235        Ident::new("Option", Span::call_site()).into(),
236        Punct::new(':', Joint).into(),
237        Punct::new(':', Joint).into(),
238        Ident::new("None", Span::call_site()).into(),
239    ])
240}
241
242fn some(input: TokenStream) -> TokenStream {
243    stream([
244        Punct::new(':', Joint).into(),
245        Punct::new(':', Joint).into(),
246        Ident::new("core", Span::call_site()).into(),
247        Punct::new(':', Joint).into(),
248        Punct::new(':', Joint).into(),
249        Ident::new("option", Span::call_site()).into(),
250        Punct::new(':', Joint).into(),
251        Punct::new(':', Joint).into(),
252        Ident::new("Option", Span::call_site()).into(),
253        Punct::new(':', Joint).into(),
254        Punct::new(':', Joint).into(),
255        Ident::new("Some", Span::call_site()).into(),
256        Group::new(Delimiter::Parenthesis, input).into(),
257    ])
258}
259
260fn to_pats<T, I>(iter: I, span: Span) -> Result<TokenStream, TokenStream>
261where T: ToPat + IsDash,
262      I: IntoIterator<Item = T>,
263{
264    let mut iter = iter.into_iter().peekable();
265    let Some(mut first) = iter.next() else {
266        return Err(err("not support empty pattern", span));
267    };
268    let mut result = TokenStream::new();
269    let mut sep: fn(&mut TokenStream) = |_| ();
270
271    while let Some(cur) = iter.next() {
272        sep(&mut result);
273
274        if let Some(to) = iter.next_if(|_| cur.is_dash()) {
275            result.extend([(first, to).to_pat(span)]);
276
277            if let Some(next) = iter.next() {
278                first = next;
279            } else {
280                return Ok(some(result));
281            }
282        } else {
283            result.extend([first.to_pat(span)]);
284            first = cur;
285        }
286
287        sep = |result| {
288            result.extend([TokenTree::from(Punct::new('|', Alone))]);
289        };
290    }
291
292    sep(&mut result);
293    result.extend([first.to_pat(span)]);
294    Ok(some(result))
295}
296
297/// Like `char_classes::any()`, expand into [`matches`] for better performance
298///
299/// - `^"..."` is exclude pattern
300/// - `!"..."` like `!any!(...)`
301///
302/// # Examples
303///
304/// ```ignore
305/// use char_classes::any;
306///
307/// assert!(any!("ab",      'a'));
308/// assert!(any!("ab",      'b'));
309/// assert!(any!("ab",      'b'));
310/// assert!(any!("a-c",     'a'));
311/// assert!(any!("a-c",     'b'));
312/// assert!(any!("a-c",     'c'));
313/// assert!(any!(b"ab",    b'a'));
314/// assert!(any!(b"ab",    b'b'));
315///
316/// assert!(! any!(^b"ab",   b'b'));
317/// assert!(! any!(^"ab",   ""));
318/// assert!(any!(!"ab",   ""));
319///
320/// assert!(any!(b"ab")(b'b'));
321/// ```
322///
323/// **predicate mode**:
324///
325/// ```
326/// use char_classes::any;
327///
328/// assert!(any!(b"ab")(b"b"));
329/// assert!(any!(!b"ab")(b"c"));
330/// assert!(any!(^b"ab")(b"c"));
331///
332/// assert!(any!(!b"ab")(b""));
333/// assert!(! any!(^b"ab")(b""));
334/// ```
335#[proc_macro]
336pub fn any(input: TokenStream) -> TokenStream {
337    any_impl(input).unwrap_or_else(identity)
338}
339
340fn any_impl(input: TokenStream) -> Result<TokenStream, TokenStream> {
341    let mut iter = input.into_iter().peekable();
342    let mode = Mode::resolve(&mut iter);
343    let first = iter.expected("literal")?;
344    let lit_str = lit_str(&first)?;
345
346    let pat = match lit_str {
347        Str::Norm(s) => to_pats(s.chars(), first.span()),
348        Str::Byte(bytes) => to_pats(bytes, first.span()),
349    }?;
350    let predicate_mode = iter.peek().is_none();
351    let com = iter.next()
352        .unwrap_or_else(|| Punct::new(',', Alone).into());
353
354    let output = if predicate_mode {
355        let name = TokenTree::from(Ident::new("input", first.span()));
356        let expr = first_elem(name.clone().into());
357
358        once(Punct::new('|', Joint).into())
359            .chain([name, Punct::new('|', Alone).into()])
360            .chain(mode.run(expr, pat, com))
361            .collect()
362    } else {
363        mode.run(first_elem(iter.collect()), pat, com)
364    };
365
366    Ok(tts(Group::new(Delimiter::None, output)))
367}