logos_codegen/
lib.rs

1//! <img src="https://raw.githubusercontent.com/maciejhirsz/logos/master/logos.svg?sanitize=true" alt="Logos logo" width="250" align="right">
2//!
3//! # Logos
4//!
5//! This is a `#[derive]` macro crate, [for documentation go to main crate](https://docs.rs/logos2).
6
7// The `quote!` macro requires deep recursion.
8#![recursion_limit = "196"]
9#![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")]
10
11mod error;
12mod generator;
13mod graph;
14mod leaf;
15mod mir;
16mod parser;
17mod util;
18
19use generator::Generator;
20use graph::{DisambiguationError, Fork, Graph, Rope};
21use leaf::Leaf;
22use parser::{IgnoreFlags, Mode, Parser};
23use quote::ToTokens;
24use util::MaybeVoid;
25
26use proc_macro2::{Delimiter, TokenStream, TokenTree};
27use quote::quote;
28use syn::parse_quote;
29use syn::spanned::Spanned;
30use syn::{Fields, ItemEnum};
31
32const LOGOS_ATTR: &str = "logos";
33const ERROR_ATTR: &str = "error";
34const TOKEN_ATTR: &str = "token";
35const REGEX_ATTR: &str = "regex";
36
37/// Generate a `Logos` implementation for the given struct, provided as a stream of rust tokens.
38pub fn generate(input: TokenStream) -> TokenStream {
39    let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
40
41    let name = &item.ident;
42
43    let mut parser = Parser::default();
44
45    for param in item.generics.params {
46        parser.parse_generic(param);
47    }
48
49    for attr in &mut item.attrs {
50        parser.try_parse_logos(attr);
51    }
52
53    let mut ropes = Vec::new();
54    let mut regex_ids = Vec::new();
55    let mut graph = Graph::new();
56
57    {
58        let errors = &mut parser.errors;
59
60        for literal in &parser.skips {
61            match literal.to_mir(&parser.subpatterns, IgnoreFlags::Empty, errors) {
62                Ok(mir) => {
63                    let then = graph.push(Leaf::new_skip(literal.span()).priority(mir.priority()));
64                    let id = graph.regex(mir, then);
65
66                    regex_ids.push(id);
67                }
68                Err(err) => {
69                    errors.err(err, literal.span());
70                }
71            }
72        }
73    }
74
75    for variant in &mut item.variants {
76        let field = match &mut variant.fields {
77            Fields::Unit => MaybeVoid::Void,
78            Fields::Unnamed(fields) => {
79                if fields.unnamed.len() != 1 {
80                    parser.err(
81                        format!(
82                            "Logos currently only supports variants with one field, found {}",
83                            fields.unnamed.len(),
84                        ),
85                        fields.span(),
86                    );
87                }
88
89                let ty = &mut fields
90                    .unnamed
91                    .first_mut()
92                    .expect("Already checked len; qed")
93                    .ty;
94                let ty = parser.get_type(ty);
95
96                MaybeVoid::Some(ty)
97            }
98            Fields::Named(fields) => {
99                parser.err("Logos doesn't support named fields yet.", fields.span());
100
101                MaybeVoid::Void
102            }
103        };
104
105        // Lazy leaf constructor to avoid cloning
106        let var_ident = &variant.ident;
107        let leaf = move |span| Leaf::new(var_ident, span).field(field.clone());
108
109        for attr in &mut variant.attrs {
110            let attr_name = match attr.path().get_ident() {
111                Some(ident) => ident.to_string(),
112                None => continue,
113            };
114
115            match attr_name.as_str() {
116                ERROR_ATTR => {
117                    // TODO: Remove in future versions
118                    parser.err(
119                        "\
120                        Since 0.13 Logos no longer requires the #[error] variant.\n\
121                        \n\
122                        For help with migration see release notes: \
123                        https://github.com/maciejhirsz/logos/releases\
124                        ",
125                        attr.span(),
126                    );
127                }
128                TOKEN_ATTR => {
129                    let definition = match parser.parse_definition(attr) {
130                        Some(definition) => definition,
131                        None => {
132                            parser.err("Expected #[token(...)]", attr.span());
133                            continue;
134                        }
135                    };
136
137                    if definition.ignore_flags.is_empty() {
138                        let bytes = definition.literal.to_bytes();
139                        let then = graph.push(
140                            leaf(definition.literal.span())
141                                .priority(definition.priority.unwrap_or(bytes.len() * 2))
142                                .callback(definition.callback),
143                        );
144
145                        ropes.push(Rope::new(bytes, then));
146                    } else {
147                        let mir = definition
148                            .literal
149                            .escape_regex()
150                            .to_mir(
151                                &Default::default(),
152                                definition.ignore_flags,
153                                &mut parser.errors,
154                            )
155                            .expect("The literal should be perfectly valid regex");
156
157                        let then = graph.push(
158                            leaf(definition.literal.span())
159                                .priority(definition.priority.unwrap_or_else(|| mir.priority()))
160                                .callback(definition.callback),
161                        );
162                        let id = graph.regex(mir, then);
163
164                        regex_ids.push(id);
165                    }
166                }
167                REGEX_ATTR => {
168                    let definition = match parser.parse_definition(attr) {
169                        Some(definition) => definition,
170                        None => {
171                            parser.err("Expected #[regex(...)]", attr.span());
172                            continue;
173                        }
174                    };
175                    let mir = match definition.literal.to_mir(
176                        &parser.subpatterns,
177                        definition.ignore_flags,
178                        &mut parser.errors,
179                    ) {
180                        Ok(mir) => mir,
181                        Err(err) => {
182                            parser.err(err, definition.literal.span());
183                            continue;
184                        }
185                    };
186
187                    let then = graph.push(
188                        leaf(definition.literal.span())
189                            .priority(definition.priority.unwrap_or_else(|| mir.priority()))
190                            .callback(definition.callback),
191                    );
192                    let id = graph.regex(mir, then);
193
194                    regex_ids.push(id);
195                }
196                _ => (),
197            }
198        }
199    }
200
201    let mut root = Fork::new();
202
203    let error_type = parser.error_type.take();
204    let extras = parser.extras.take();
205    let source = parser
206        .source
207        .take()
208        .map(strip_wrapping_parens)
209        .unwrap_or(match parser.mode {
210            Mode::Utf8 => quote!(str),
211            Mode::Binary => quote!([u8]),
212        });
213    let logos_path = parser
214        .logos_path
215        .take()
216        .unwrap_or_else(|| parse_quote!(::logos));
217
218    let generics = parser.generics();
219    let this = quote!(#name #generics);
220
221    let impl_logos = |body| {
222        quote! {
223            impl<'s> #logos_path::Logos<'s> for #this {
224                type Error = #error_type;
225
226                type Extras = #extras;
227
228                type Source = #source;
229
230                fn lex(lex: &mut #logos_path::Lexer<'s, Self>) {
231                    #body
232                }
233            }
234        }
235    };
236
237    for id in regex_ids {
238        let fork = graph.fork_off(id);
239
240        root.merge(fork, &mut graph);
241    }
242    for rope in ropes {
243        root.merge(rope.into_fork(&mut graph), &mut graph);
244    }
245    while let Some(id) = root.miss.take() {
246        let fork = graph.fork_off(id);
247
248        if fork.branches().next().is_some() {
249            root.merge(fork, &mut graph);
250        } else {
251            break;
252        }
253    }
254
255    for &DisambiguationError(a, b) in graph.errors() {
256        let a = graph[a].unwrap_leaf();
257        let b = graph[b].unwrap_leaf();
258        let disambiguate = a.priority + 1;
259
260        let mut err = |a: &Leaf, b: &Leaf| {
261            parser.err(
262                format!(
263                    "\
264                    A definition of variant `{a}` can match the same input as another definition of variant `{b}`.\n\
265                    \n\
266                    hint: Consider giving one definition a higher priority: \
267                    #[regex(..., priority = {disambiguate})]\
268                    ",
269                ),
270                a.span
271            );
272        };
273
274        err(a, b);
275        err(b, a);
276    }
277
278    if let Some(errors) = parser.errors.render() {
279        return impl_logos(errors);
280    }
281
282    let root = graph.push(root);
283
284    graph.shake(root);
285
286    let generator = Generator::new(name, &this, root, &graph);
287
288    let body = generator.generate();
289    impl_logos(quote! {
290        use #logos_path::internal::{LexerInternal, CallbackResult};
291
292        type Lexer<'s> = #logos_path::Lexer<'s, #this>;
293
294        fn _end<'s>(lex: &mut Lexer<'s>) {
295            lex.end()
296        }
297
298        fn _error<'s>(lex: &mut Lexer<'s>) {
299            lex.bump_unchecked(1);
300
301            lex.error();
302        }
303
304        #body
305    })
306}
307
308/// Strip all logos attributes from the given struct, allowing it to be used in code without `logos-derive` present.
309pub fn strip_attributes(input: TokenStream) -> TokenStream {
310    let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
311
312    strip_attrs_from_vec(&mut item.attrs);
313
314    for attr in &mut item.attrs {
315        if let syn::Meta::List(meta) = &mut attr.meta {
316            if meta.path.is_ident("derive") {
317                let mut tokens =
318                    std::mem::replace(&mut meta.tokens, TokenStream::new()).into_iter();
319
320                while let Some(TokenTree::Ident(ident)) = tokens.next() {
321                    let punct = tokens.next();
322
323                    if ident == "Logos" {
324                        continue;
325                    }
326
327                    meta.tokens.extend([TokenTree::Ident(ident)]);
328                    meta.tokens.extend(punct);
329                }
330            }
331        }
332    }
333
334    for variant in &mut item.variants {
335        strip_attrs_from_vec(&mut variant.attrs);
336        for field in &mut variant.fields {
337            strip_attrs_from_vec(&mut field.attrs);
338        }
339    }
340
341    item.to_token_stream()
342}
343
344fn strip_attrs_from_vec(attrs: &mut Vec<syn::Attribute>) {
345    attrs.retain(|attr| !is_logos_attr(attr))
346}
347
348fn is_logos_attr(attr: &syn::Attribute) -> bool {
349    attr.path().is_ident(LOGOS_ATTR)
350        || attr.path().is_ident(TOKEN_ATTR)
351        || attr.path().is_ident(REGEX_ATTR)
352}
353
354fn strip_wrapping_parens(t: TokenStream) -> TokenStream {
355    let tts: Vec<TokenTree> = t.into_iter().collect();
356
357    if tts.len() != 1 {
358        tts.into_iter().collect()
359    } else {
360        match tts.into_iter().next().unwrap() {
361            TokenTree::Group(g) => {
362                if g.delimiter() == Delimiter::Parenthesis {
363                    g.stream()
364                } else {
365                    core::iter::once(TokenTree::Group(g)).collect()
366                }
367            }
368            tt => core::iter::once(tt).collect(),
369        }
370    }
371}