char_classes_proc_macro/
lib.rs

1use std::{convert::identity, iter::once};
2
3use litrs::{ByteStringLit, StringLit};
4use proc_macro::{Delimiter, Group, Ident, Punct, Spacing::*, Span, TokenStream, TokenTree, Literal};
5
6/// Make `compile_error! {"..."}`
7#[must_use]
8fn err(msg: &str, span: Span) -> TokenStream {
9    let s = |mut tt: TokenTree| {
10        tt.set_span(span);
11        tt
12    };
13
14    <TokenStream as FromIterator<TokenTree>>::from_iter([
15        Punct::new(':', Joint).into(),
16        Punct::new(':', Joint).into(),
17        Ident::new("core", span).into(),
18        Punct::new(':', Joint).into(),
19        Punct::new(':', Joint).into(),
20        Ident::new("compile_error", span).into(),
21        Punct::new('!', Joint).into(),
22        Group::new(Delimiter::Brace, s(
23            Literal::string(msg).into(),
24        ).into()).into(),
25    ].map(s))
26}
27
28fn matches(stream: TokenStream) -> TokenStream {
29    <TokenStream as FromIterator<TokenTree>>::from_iter([
30        Punct::new(':', Joint).into(),
31        Punct::new(':', Joint).into(),
32        Ident::new("core", Span::call_site()).into(),
33        Punct::new(':', Joint).into(),
34        Punct::new(':', Joint).into(),
35        Ident::new("matches", Span::call_site()).into(),
36        Punct::new('!', Joint).into(),
37        Group::new(Delimiter::Parenthesis, stream).into()
38    ])
39}
40
41fn first_elem(stream: TokenStream) -> TokenStream {
42    let stream = [
43        TokenStream::from(TokenTree::Punct(Punct::new('&', Joint))),
44        stream,
45    ].into_iter().collect();
46    <TokenStream as FromIterator<TokenTree>>::from_iter([
47        Punct::new(':', Joint).into(),
48        Punct::new(':', Joint).into(),
49        Ident::new("char_classes", Span::call_site()).into(),
50        Punct::new(':', Joint).into(),
51        Punct::new(':', Joint).into(),
52        Ident::new("FirstElem", Span::call_site()).into(),
53        Punct::new(':', Joint).into(),
54        Punct::new(':', Joint).into(),
55        Ident::new("first_elem", Span::call_site()).into(),
56        Group::new(Delimiter::Parenthesis, stream).into()
57    ])
58}
59
60enum Str {
61    Norm(String),
62    Byte(Vec<u8>),
63}
64
65fn lit_str(tt: &TokenTree) -> Result<Str, TokenStream> {
66    StringLit::try_from(tt)
67        .map(|s| Str::Norm(s.into_value().into_owned()))
68        .map_err(|e| e.to_string())
69        .or_else(|e| ByteStringLit::try_from(tt)
70            .map(|b| Str::Byte(b.into_value().into_owned()))
71            .map_err(|e2| format!("{e}\n{e2}")))
72        .map_err(|e| err(&e, tt.span()))
73}
74
75trait Spaned {
76    fn spaned(self, span: Span) -> Self;
77}
78impl Spaned for TokenTree {
79    fn spaned(mut self, span: Span) -> Self {
80        self.set_span(span);
81        self
82    }
83}
84impl Spaned for Literal {
85    fn spaned(mut self, span: Span) -> Self {
86        self.set_span(span);
87        self
88    }
89}
90
91trait ToPat: Sized {
92    fn to_pat(self, span: Span) -> TokenStream;
93}
94impl ToPat for u8 {
95    fn to_pat(self, span: Span) -> TokenStream {
96        TokenTree::from(Literal::byte_character(self).spaned(span)).into()
97    }
98}
99impl ToPat for char {
100    fn to_pat(self, span: Span) -> TokenStream {
101        TokenTree::from(Literal::character(self).spaned(span)).into()
102    }
103}
104impl<T: ToPat> ToPat for (T, T) {
105    fn to_pat(self, span: Span) -> TokenStream {
106        let (from, to) = self;
107        TokenStream::from_iter([
108            from.to_pat(span),
109            <TokenStream as FromIterator<TokenTree>>::from_iter([
110                Punct::new('.', Joint).into(),
111                Punct::new('.', Joint).into(),
112                Punct::new('=', Joint).into(),
113            ]),
114            to.to_pat(span),
115        ])
116    }
117}
118
119trait IsDash {
120    fn is_dash(&self) -> bool;
121}
122impl IsDash for u8 {
123    fn is_dash(&self) -> bool {
124        *self == b'-'
125    }
126}
127impl IsDash for char {
128    fn is_dash(&self) -> bool {
129        *self == '-'
130    }
131}
132
133fn some(stream: TokenStream, span: Span) -> TokenStream {
134    TokenStream::from_iter([
135        TokenTree::from(Ident::new("Some", span)),
136        TokenTree::from(Group::new(Delimiter::Parenthesis, stream)),
137    ])
138}
139
140fn to_pats<T, I>(iter: I, span: Span) -> Result<TokenStream, TokenStream>
141where T: ToPat + IsDash,
142      I: IntoIterator<Item = T>,
143{
144    let mut iter = iter.into_iter().peekable();
145    let Some(mut first) = iter.next() else {
146        return Err(err("cannot support empty pattern", span));
147    };
148    let mut result = TokenStream::new();
149    let mut sep: fn(&mut TokenStream) = |_| ();
150
151    while let Some(cur) = iter.next() {
152        sep(&mut result);
153
154        if let Some(to) = iter.next_if(|_| cur.is_dash()) {
155            result.extend([(first, to).to_pat(span)]);
156
157            if let Some(next) = iter.next() {
158                first = next;
159            } else {
160                return Ok(some(result, span));
161            }
162        } else {
163            result.extend([first.to_pat(span)]);
164            first = cur;
165        }
166
167        sep = |result| {
168            result.extend([TokenTree::from(Punct::new('|', Alone))]);
169        };
170    }
171
172    sep(&mut result);
173    result.extend([first.to_pat(span)]);
174    Ok(some(result, span))
175}
176
177/// Like `char_classes::any()`, expand into [`matches`] for better performance
178///
179/// # Examples
180///
181/// ```ignore
182/// use char_classes::any;
183///
184/// assert!(any!("ab",      'a'));
185/// assert!(any!("ab",      'b'));
186/// assert!(any!("ab",      'b'));
187/// assert!(any!("a-c",     'a'));
188/// assert!(any!("a-c",     'b'));
189/// assert!(any!("a-c",     'c'));
190/// assert!(any!(b"ab",    b'a'));
191/// assert!(any!(b"ab",    b'b'));
192///
193/// assert!(any!(b"ab")(b'b'));
194/// ```
195#[proc_macro]
196pub fn any(input: TokenStream) -> TokenStream {
197    let mut iter = input.into_iter();
198    let Some(first) = iter.next() else {
199        return err("unexpected end of input, expected a literal", Span::call_site());
200    };
201    let comma = iter.next();
202    if comma.as_ref().is_some_and(|comma| {
203        !matches!(&comma, TokenTree::Punct(p) if p.as_char() == ',')
204    }) {
205        return err("unexpected token, expected a comma", comma.unwrap().span());
206    }
207    let lit_str = match lit_str(&first) {
208        Ok(s) => s,
209        Err(e) => return e,
210    };
211    match lit_str {
212        Str::Norm(s) => to_pats(s.chars(), first.span()),
213        Str::Byte(bytes) => to_pats(bytes, first.span()),
214    }.map_or_else(identity, |pat| {
215        if let Some(comma) = comma {
216            matches(first_elem(iter.collect()).into_iter()
217                .chain([comma])
218                .chain(pat)
219                .collect())
220        } else {
221            let name = TokenTree::from(Ident::new("input", first.span()));
222            let mut comma = Punct::new(',', Alone);
223            comma.set_span(first.span());
224
225            let expr = once(Punct::new('|', Joint).into())
226                .chain([name.clone(), Punct::new('|', Alone).into()])
227                .chain(matches(first_elem(name.into())
228                        .into_iter()
229                        .chain([comma.into()])
230                        .chain(pat)
231                        .collect()))
232                .collect();
233            TokenTree::from(Group::new(Delimiter::None, expr)).into()
234        }
235    })
236}