logos_codegen/
lib.rs

1//! <img src="https://raw.githubusercontent.com/maciejhirsz/logos/master/logos.svg?sanitize=true" alt="Logos logo" width="250" align="right">
2//!
3//! # Logos
4//!
5//! This is a `#[derive]` macro crate, [for documentation go to main crate](https://docs.rs/logos).
6
7// The `quote!` macro requires deep recursion.
8#![recursion_limit = "196"]
9#![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")]
10
11mod error;
12mod generator;
13#[cfg(not(feature = "fuzzing"))]
14mod graph;
15#[cfg(feature = "fuzzing")]
16pub mod graph;
17mod leaf;
18#[cfg(not(feature = "fuzzing"))]
19mod mir;
20#[cfg(feature = "fuzzing")]
21pub mod mir;
22mod parser;
23mod util;
24
25#[macro_use]
26#[allow(missing_docs)]
27mod macros;
28
29use generator::Generator;
30use graph::{DisambiguationError, Fork, Graph, Rope};
31use leaf::Leaf;
32use parser::{IgnoreFlags, Mode, Parser};
33use quote::ToTokens;
34use util::MaybeVoid;
35
36use proc_macro2::{Delimiter, TokenStream, TokenTree};
37use quote::quote;
38use syn::parse_quote;
39use syn::spanned::Spanned;
40use syn::{Fields, ItemEnum};
41
42const LOGOS_ATTR: &str = "logos";
43const ERROR_ATTR: &str = "error";
44const TOKEN_ATTR: &str = "token";
45const REGEX_ATTR: &str = "regex";
46
47/// Generate a `Logos` implementation for the given struct, provided as a stream of rust tokens.
48pub fn generate(input: TokenStream) -> TokenStream {
49    debug!("Reading input token streams");
50
51    let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
52
53    let name = &item.ident;
54
55    let mut parser = Parser::default();
56
57    for param in item.generics.params {
58        parser.parse_generic(param);
59    }
60
61    for attr in &mut item.attrs {
62        parser.try_parse_logos(attr);
63    }
64
65    let mut ropes = Vec::new();
66    let mut regex_ids = Vec::new();
67    let mut graph = Graph::new();
68
69    {
70        let errors = &mut parser.errors;
71
72        for mut skip in parser.skips.drain(..) {
73            match skip
74                .literal
75                .to_mir(&parser.subpatterns, IgnoreFlags::Empty, errors)
76            {
77                Ok(mir) => {
78                    let then = graph.push(
79                        Leaf::new_skip(skip.literal.span())
80                            .priority(skip.priority.take().unwrap_or_else(|| mir.priority()))
81                            .callback(Some(skip.into_callback())),
82                    );
83                    let id = graph.regex(mir, then);
84
85                    regex_ids.push(id);
86                }
87                Err(err) => {
88                    errors.err(err, skip.literal.span());
89                }
90            }
91        }
92    }
93
94    debug!("Iterating through enum variants");
95
96    for variant in &mut item.variants {
97        let field = match &mut variant.fields {
98            Fields::Unit => MaybeVoid::Void,
99            Fields::Unnamed(fields) => {
100                if fields.unnamed.len() != 1 {
101                    parser.err(
102                        format!(
103                            "Logos currently only supports variants with one field, found {}",
104                            fields.unnamed.len(),
105                        ),
106                        fields.span(),
107                    );
108                }
109
110                let ty = &mut fields
111                    .unnamed
112                    .first_mut()
113                    .expect("Already checked len; qed")
114                    .ty;
115                let ty = parser.get_type(ty);
116
117                MaybeVoid::Some(ty)
118            }
119            Fields::Named(fields) => {
120                parser.err("Logos doesn't support named fields yet.", fields.span());
121
122                MaybeVoid::Void
123            }
124        };
125
126        // Lazy leaf constructor to avoid cloning
127        let var_ident = &variant.ident;
128        let leaf = move |span| Leaf::new(var_ident, span).field(field.clone());
129
130        for attr in &mut variant.attrs {
131            let attr_name = match attr.path().get_ident() {
132                Some(ident) => ident.to_string(),
133                None => continue,
134            };
135
136            match attr_name.as_str() {
137                ERROR_ATTR => {
138                    // TODO: Remove in future versions
139                    parser.err(
140                        "\
141                        Since 0.13 Logos no longer requires the #[error] variant.\n\
142                        \n\
143                        For help with migration see release notes: \
144                        https://github.com/maciejhirsz/logos/releases\
145                        ",
146                        attr.span(),
147                    );
148                }
149                TOKEN_ATTR => {
150                    let definition = match parser.parse_definition(attr) {
151                        Some(definition) => definition,
152                        None => {
153                            parser.err("Expected #[token(...)]", attr.span());
154                            continue;
155                        }
156                    };
157
158                    if definition.ignore_flags.is_empty() {
159                        let bytes = definition.literal.to_bytes();
160                        let then = graph.push(
161                            leaf(definition.literal.span())
162                                .priority(definition.priority.unwrap_or(bytes.len() * 2))
163                                .callback(definition.callback),
164                        );
165
166                        ropes.push(Rope::new(bytes, then));
167                    } else {
168                        let mir = definition
169                            .literal
170                            .escape_regex()
171                            .to_mir(
172                                &Default::default(),
173                                definition.ignore_flags,
174                                &mut parser.errors,
175                            )
176                            .expect("The literal should be perfectly valid regex");
177
178                        let then = graph.push(
179                            leaf(definition.literal.span())
180                                .priority(definition.priority.unwrap_or_else(|| mir.priority()))
181                                .callback(definition.callback),
182                        );
183                        let id = graph.regex(mir, then);
184
185                        regex_ids.push(id);
186                    }
187                }
188                REGEX_ATTR => {
189                    let definition = match parser.parse_definition(attr) {
190                        Some(definition) => definition,
191                        None => {
192                            parser.err("Expected #[regex(...)]", attr.span());
193                            continue;
194                        }
195                    };
196                    let mir = match definition.literal.to_mir(
197                        &parser.subpatterns,
198                        definition.ignore_flags,
199                        &mut parser.errors,
200                    ) {
201                        Ok(mir) => mir,
202                        Err(err) => {
203                            parser.err(err, definition.literal.span());
204                            continue;
205                        }
206                    };
207
208                    let then = graph.push(
209                        leaf(definition.literal.span())
210                            .priority(definition.priority.unwrap_or_else(|| mir.priority()))
211                            .callback(definition.callback),
212                    );
213                    let id = graph.regex(mir, then);
214
215                    regex_ids.push(id);
216                }
217                _ => (),
218            }
219        }
220    }
221
222    let mut root = Fork::new();
223
224    debug!("Parsing additional options (extras, source, ...)");
225
226    let (error_type, error_callback) = parser::ErrorType::unwrap(parser.error_type.take());
227    let extras = parser.extras.take();
228    let source = parser
229        .source
230        .take()
231        .map(strip_wrapping_parens)
232        .unwrap_or(match parser.mode {
233            Mode::Utf8 => quote!(str),
234            Mode::Binary => quote!([u8]),
235        });
236    let logos_path = parser
237        .logos_path
238        .take()
239        .unwrap_or_else(|| parse_quote!(::logos));
240
241    let make_error_impl = match error_callback {
242        Some(leaf::Callback::Label(label)) => Some(quote! {
243            #[inline]
244            fn make_error(mut lex: &mut #logos_path::Lexer<'s, Self>) {
245                use #logos_path::{Lexer, internal::LexerInternal};
246
247                let error = #label(&mut lex);
248                lex.set(Err(error));
249            }
250        }),
251        Some(leaf::Callback::Inline(inline)) => {
252            let leaf::InlineCallback { arg, body, .. } = *inline;
253
254            Some(quote! {
255                #[inline]
256                fn make_error(#arg: &mut #logos_path::Lexer<'s, Self>) {
257                    use #logos_path::internal::LexerInternal;
258
259                    let error = { #body };
260                    #arg.set(Err(error))
261                }
262            })
263        }
264        _ => None,
265    };
266
267    let generics = parser.generics();
268    let this = quote!(#name #generics);
269
270    let impl_logos = |body| {
271        quote! {
272            impl<'s> #logos_path::Logos<'s> for #this {
273                type Error = #error_type;
274
275                type Extras = #extras;
276
277                type Source = #source;
278
279                fn lex(lex: &mut #logos_path::Lexer<'s, Self>) {
280                    #body
281                }
282
283                #make_error_impl
284            }
285        }
286    };
287
288    for id in regex_ids {
289        let fork = graph.fork_off(id);
290
291        root.merge(fork, &mut graph);
292    }
293    for rope in ropes {
294        root.merge(rope.into_fork(&mut graph), &mut graph);
295    }
296    while let Some(id) = root.miss.take() {
297        let fork = graph.fork_off(id);
298
299        if fork.branches().next().is_some() {
300            root.merge(fork, &mut graph);
301        } else {
302            break;
303        }
304    }
305
306    debug!("Checking if any two tokens have the same priority");
307
308    for &DisambiguationError(a, b) in graph.errors() {
309        let a = graph[a].unwrap_leaf();
310        let b = graph[b].unwrap_leaf();
311        let disambiguate = a.priority + 1;
312
313        let mut err = |a: &Leaf, b: &Leaf| {
314            parser.err(
315                format!(
316                    "\
317                    A definition of variant `{a}` can match the same input as another definition of variant `{b}`.\n\
318                    \n\
319                    hint: Consider giving one definition a higher priority: \
320                    #[{attr}(..., priority = {disambiguate})]\
321                    ",
322                    attr = match a.callback {
323                        Some(_) => "regex",
324                        None => "skip"
325                    }
326                ),
327                a.span
328            );
329        };
330
331        err(a, b);
332        err(b, a);
333    }
334
335    if let Some(errors) = parser.errors.render() {
336        return impl_logos(errors);
337    }
338
339    let root = graph.push(root);
340
341    graph.shake(root);
342
343    #[cfg(feature = "debug")]
344    {
345        debug!("Generating graphs");
346
347        if let Some(path) = parser.export_dir {
348            let path = std::path::Path::new(&path);
349            let dir = if path.extension().is_none() {
350                path
351            } else {
352                path.parent().unwrap_or(std::path::Path::new(""))
353            };
354            match std::fs::create_dir_all(dir) {
355                Ok(()) => {
356                    if path.extension() == Some(std::ffi::OsStr::new("dot"))
357                        || path.extension().is_none()
358                    {
359                        match graph.get_dot() {
360                            Ok(s) => {
361                                let dot_path = if path.extension().is_none() {
362                                    path.join(format!("{}.dot", name.to_string().to_lowercase()))
363                                } else {
364                                    path.to_path_buf()
365                                };
366                                if let Err(e) = std::fs::write(dot_path, s) {
367                                    debug!("Error writing dot graph: {}", e);
368                                }
369                            }
370                            Err(e) => {
371                                debug!("Error generating dot graph: {}", e);
372                            }
373                        }
374                    }
375
376                    if path.extension() == Some(std::ffi::OsStr::new("mmd"))
377                        || path.extension().is_none()
378                    {
379                        match graph.get_mermaid() {
380                            Ok(s) => {
381                                let mermaid_path = if path.extension().is_none() {
382                                    path.join(format!("{}.mmd", name.to_string().to_lowercase()))
383                                } else {
384                                    path.to_path_buf()
385                                };
386                                if let Err(e) = std::fs::write(mermaid_path, s) {
387                                    debug!("Error writing mermaid graph: {}", e);
388                                }
389                            }
390                            Err(e) => {
391                                debug!("Error generating mermaid graph: {}", e);
392                            }
393                        }
394                    }
395                }
396                Err(e) => {
397                    debug!("Error creating graph export dir: {}", e);
398                }
399            }
400        }
401    }
402
403    debug!("Generating code from graph:\n{graph:#?}");
404
405    let generator = Generator::new(name, &this, root, &graph);
406
407    let body = generator.generate();
408    impl_logos(quote! {
409        use #logos_path::internal::{LexerInternal, CallbackResult, SkipCallbackResult};
410
411        type Lexer<'s> = #logos_path::Lexer<'s, #this>;
412
413        fn _end<'s>(lex: &mut Lexer<'s>) {
414            lex.end()
415        }
416
417        fn _error<'s>(lex: &mut Lexer<'s>) {
418            lex.bump_unchecked(1);
419
420            lex.error();
421        }
422
423        #body
424    })
425}
426
427/// Strip all logos attributes from the given struct, allowing it to be used in code without `logos-derive` present.
428pub fn strip_attributes(input: TokenStream) -> TokenStream {
429    let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
430
431    strip_attrs_from_vec(&mut item.attrs);
432
433    for attr in &mut item.attrs {
434        if let syn::Meta::List(meta) = &mut attr.meta {
435            if meta.path.is_ident("derive") {
436                let mut tokens =
437                    std::mem::replace(&mut meta.tokens, TokenStream::new()).into_iter();
438
439                while let Some(TokenTree::Ident(ident)) = tokens.next() {
440                    let punct = tokens.next();
441
442                    if ident == "Logos" {
443                        continue;
444                    }
445
446                    meta.tokens.extend([TokenTree::Ident(ident)]);
447                    meta.tokens.extend(punct);
448                }
449            }
450        }
451    }
452
453    for variant in &mut item.variants {
454        strip_attrs_from_vec(&mut variant.attrs);
455        for field in &mut variant.fields {
456            strip_attrs_from_vec(&mut field.attrs);
457        }
458    }
459
460    item.to_token_stream()
461}
462
463fn strip_attrs_from_vec(attrs: &mut Vec<syn::Attribute>) {
464    attrs.retain(|attr| !is_logos_attr(attr))
465}
466
467fn is_logos_attr(attr: &syn::Attribute) -> bool {
468    attr.path().is_ident(LOGOS_ATTR)
469        || attr.path().is_ident(TOKEN_ATTR)
470        || attr.path().is_ident(REGEX_ATTR)
471}
472
473fn strip_wrapping_parens(t: TokenStream) -> TokenStream {
474    let tts: Vec<TokenTree> = t.into_iter().collect();
475
476    if tts.len() != 1 {
477        tts.into_iter().collect()
478    } else {
479        match tts.into_iter().next().unwrap() {
480            TokenTree::Group(g) => {
481                if g.delimiter() == Delimiter::Parenthesis {
482                    g.stream()
483                } else {
484                    core::iter::once(TokenTree::Group(g)).collect()
485                }
486            }
487            tt => core::iter::once(tt).collect(),
488        }
489    }
490}