logos_codegen/
lib.rs

1//! <img src="https://raw.githubusercontent.com/maciejhirsz/logos/master/logos.svg?sanitize=true" alt="Logos logo" width="250" align="right">
2//!
3//! # Logos
4//!
5//! This is a `#[derive]` macro crate, [for documentation go to main crate](https://docs.rs/logos).
6
7// The `quote!` macro requires deep recursion.
8#![recursion_limit = "196"]
9#![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")]
10
11mod error;
12mod generator;
13mod graph;
14mod leaf;
15mod parser;
16mod pattern;
17mod util;
18
19#[macro_use]
20#[allow(missing_docs)]
21mod macros;
22
23use std::error::Error;
24use std::ffi::OsStr;
25use std::path::Path;
26
27use error::Errors;
28use generator::Generator;
29use graph::{Graph, GraphError};
30use leaf::Leaf;
31use parser::Parser;
32use pattern::Pattern;
33use quote::ToTokens;
34
35use proc_macro2::{TokenStream, TokenTree};
36use quote::quote;
37use syn::spanned::Spanned;
38use syn::{parse_quote, LitBool};
39use syn::{Fields, ItemEnum};
40
41use crate::graph::Config;
42use crate::leaf::VariantKind;
43use crate::parser::{ErrorType, Subpatterns};
44
45const LOGOS_ATTR: &str = "logos";
46const ERROR_ATTR: &str = "error";
47const TOKEN_ATTR: &str = "token";
48const REGEX_ATTR: &str = "regex";
49
50/// Generate a `Logos` implementation for the given struct, provided as a stream of rust tokens.
51pub fn generate(input: TokenStream) -> TokenStream {
52    debug!("Reading input token streams");
53
54    let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
55    let item_span = item.span();
56
57    let name = &item.ident;
58
59    let mut parser = Parser::default();
60
61    for param in item.generics.params {
62        parser.parse_generic(param);
63    }
64
65    for attr in &mut item.attrs {
66        parser.try_parse_logos(attr);
67    }
68
69    debug!("Iterating through subpatterns and skips");
70
71    let utf8_mode = parser
72        .utf8_mode
73        .as_ref()
74        .map(LitBool::value)
75        .unwrap_or(true);
76    let config = Config { utf8_mode };
77    let subpatterns = Subpatterns::new(&parser.subpatterns, utf8_mode, &mut parser.errors);
78
79    let mut pats = Vec::new();
80
81    for skip in parser.skips.drain(..) {
82        let Some(pattern_source) = subpatterns.subst_subpatterns(
83            &skip.literal.escape(false),
84            skip.literal.span(),
85            &mut parser.errors,
86        ) else {
87            continue;
88        };
89
90        let pattern = match Pattern::compile(
91            false,
92            &pattern_source,
93            skip.literal.token().to_string(),
94            skip.literal.unicode(),
95            false,
96        ) {
97            Ok(pattern) => pattern,
98            Err(err) => {
99                parser.errors.err(err, skip.literal.span());
100                continue;
101            }
102        };
103
104        let default_priority = pattern.priority();
105        pats.push(
106            Leaf::new(skip.literal.span(), pattern)
107                .priority(skip.priority.unwrap_or(default_priority))
108                .callback(skip.into_callback()),
109        );
110    }
111
112    debug!("Iterating through enum variants");
113
114    for variant in &mut item.variants {
115        let var_ident = variant.ident.clone();
116
117        let var_kind = match &mut variant.fields {
118            Fields::Unit => VariantKind::Unit(var_ident),
119            Fields::Unnamed(fields) => {
120                if fields.unnamed.len() != 1 {
121                    parser.err(
122                        format!(
123                            "Logos currently only supports variants with one field, found {}",
124                            fields.unnamed.len(),
125                        ),
126                        fields.span(),
127                    );
128                }
129
130                let ty = &mut fields
131                    .unnamed
132                    .first_mut()
133                    .expect("Already checked len; qed")
134                    .ty;
135                let ty = parser.get_type(ty);
136
137                VariantKind::Value(var_ident, ty)
138            }
139            Fields::Named(fields) => {
140                parser.err("Logos doesn't support named fields yet.", fields.span());
141
142                VariantKind::Skip
143            }
144        };
145
146        for attr in &mut variant.attrs {
147            let attr_name = match attr.path().get_ident() {
148                Some(ident) => ident.to_string(),
149                None => continue,
150            };
151
152            match attr_name.as_str() {
153                ERROR_ATTR => {
154                    // TODO: Remove in future versions
155                    parser.err(
156                        concat!(
157                            "Since 0.13 Logos no longer requires the #[error] variant.",
158                            "\n\n",
159                            "For help with migration see release notes: ",
160                            "https://github.com/maciejhirsz/logos/releases"
161                        ),
162                        attr.span(),
163                    );
164                }
165                TOKEN_ATTR => {
166                    let definition = match parser.parse_definition(attr) {
167                        Some(definition) => definition,
168                        None => {
169                            parser.err("Expected #[token(...)]", attr.span());
170                            continue;
171                        }
172                    };
173
174                    let pattern_res = if definition.ignore_flags.ignore_case {
175                        let pattern_src = definition.literal.escape(true);
176                        Pattern::compile(
177                            true,
178                            &pattern_src,
179                            definition.literal.token().to_string(),
180                            definition.literal.unicode(),
181                            true,
182                        )
183                    } else {
184                        Pattern::compile_lit(&definition.literal)
185                    };
186
187                    let pattern = match pattern_res {
188                        Ok(pattern) => pattern,
189                        Err(err) => {
190                            parser.err(err, definition.literal.span());
191                            continue;
192                        }
193                    };
194
195                    let literal_len = match &definition.literal {
196                        parser::Literal::Utf8(lit_str) => lit_str.value().len(),
197                        parser::Literal::Bytes(lit_byte_str) => lit_byte_str.value().len(),
198                    };
199
200                    pats.push(
201                        Leaf::new(definition.literal.span(), pattern)
202                            .variant_kind(var_kind.clone())
203                            .priority(definition.priority.unwrap_or(literal_len * 2))
204                            .callback(definition.callback),
205                    );
206                }
207                REGEX_ATTR => {
208                    let definition = match parser.parse_definition(attr) {
209                        Some(definition) => definition,
210                        None => {
211                            parser.err("Expected #[regex(...)]", attr.span());
212                            continue;
213                        }
214                    };
215
216                    let Some(pattern_source) = subpatterns.subst_subpatterns(
217                        &definition.literal.escape(false),
218                        definition.literal.span(),
219                        &mut parser.errors,
220                    ) else {
221                        continue;
222                    };
223
224                    let unicode = definition.literal.unicode();
225                    let ignore_case = definition.ignore_flags.ignore_case;
226                    let pattern = match Pattern::compile(
227                        false,
228                        &pattern_source,
229                        definition.literal.token().to_string(),
230                        unicode,
231                        ignore_case,
232                    ) {
233                        Ok(pattern) => pattern,
234                        Err(err) => {
235                            parser.err(err, definition.literal.span());
236                            continue;
237                        }
238                    };
239
240                    let allow_greedy = definition.allow_greedy.unwrap_or(false);
241                    if !allow_greedy && pattern.check_for_greedy_all() {
242                        parser.err(concat!(
243                            "This pattern contains an unbounded greedy dot repetition (.* or .+). ",
244                            "This will cause the entirety of the input to be read for every token. ",
245                            "Consider making your repetition non-greedy or changing it to a more ",
246                            "specific character class. If this is the intended behavior, add ",
247                            "#[regex(..., allow_greedy = true)]"
248                        ), definition.literal.span());
249                    }
250
251                    let default_priority = pattern.priority();
252                    pats.push(
253                        Leaf::new(definition.literal.span(), pattern)
254                            .variant_kind(var_kind.clone())
255                            .priority(definition.priority.unwrap_or(default_priority))
256                            .callback(definition.callback),
257                    );
258                }
259                _ => (),
260            }
261        }
262    }
263
264    debug!("Parsing additional options (extras, utf8, ...)");
265
266    let ErrorType {
267        ty: error_type,
268        callback: error_callback,
269    } = parser.error_type.take().unwrap_or_default();
270    let extras = parser.extras.take();
271    let non_utf8_pats = pats
272        .iter()
273        .filter(|leaf| !leaf.pattern.hir().properties().is_utf8())
274        .collect::<Vec<_>>();
275    if utf8_mode && !non_utf8_pats.is_empty() {
276        // If utf8 mode is specified, make sure no patterns match illegal utf8
277        for leaf in non_utf8_pats {
278            parser.err(format!(concat!(
279                "UTF-8 mode is requested, but the pattern {} of variant `{}` can match invalid utf8.\n",
280                "You can disable UTF-8 mode with #[logos(utf8 = false)]"
281            ), leaf.pattern.source(), leaf.kind), leaf.span);
282        }
283    };
284
285    let source = match utf8_mode {
286        true => quote!(str),
287        false => quote!([u8]),
288    };
289    let logos_path = parser
290        .logos_path
291        .take()
292        .unwrap_or_else(|| parse_quote!(::logos));
293
294    let generics = parser.generics();
295    let this = quote!(#name #generics);
296
297    let impl_logos = |body| {
298        quote! {
299            impl<'s> #logos_path::Logos<'s> for #this {
300                type Error = #error_type;
301
302                type Extras = #extras;
303
304                type Source = #source;
305
306                fn lex(lex: &mut #logos_path::Lexer<'s, Self>)
307                    -> std::option::Option<std::result::Result<Self, <Self as #logos_path::Logos<'s>>::Error>> {
308                    #body
309                }
310            }
311        }
312    };
313
314    if cfg!(feature = "debug") {
315        let leaves_rendered = pats
316            .iter()
317            .enumerate()
318            .map(|(leaf_id, leaf)| format!("  {}: {} (priority: {})", leaf_id, leaf, leaf.priority))
319            .collect::<Vec<_>>()
320            .join("\n");
321        debug!("Generated leaves:\n{leaves_rendered}");
322    }
323
324    debug!("Generating graph from leaves");
325
326    let graph = match Graph::new(pats, config) {
327        Ok(nfa) => nfa,
328        Err(msg) => {
329            let mut errors = Errors::default();
330            errors.err(msg, item_span);
331            return impl_logos(errors.render().unwrap());
332        }
333    };
334
335    debug!("Generated Automaton:\n{:?}", graph.dfa());
336    debug!("Generated Graph:\n{graph}");
337    debug!("Root node: {:?}", graph.root());
338
339    if cfg!(feature = "debug") {
340        if let Some(export_path) = parser.export_path.as_ref() {
341            debug!("Exporting graphs");
342            let lower_name = name.to_string().to_lowercase();
343
344            if let Err(err) = generate_graphs(export_path, &lower_name, &graph) {
345                debug!("Failed to export graphs: {err}");
346            }
347        }
348    }
349
350    debug!("Checking if any two tokens have the same priority");
351
352    for error in graph.errors() {
353        match error {
354            GraphError::Disambiguation(matching) => {
355                for leaf_id in matching {
356                    let leaf = &graph.leaves()[leaf_id.0];
357                    let priority = leaf.priority;
358
359                    let matching = matching
360                        .iter()
361                        .filter(|&id| id != leaf_id)
362                        .map(|matchind_id| format!("  {}", &graph.leaves()[matchind_id.0]))
363                        .collect::<Vec<_>>()
364                        .join("\n");
365
366                    parser.err(
367                        format!(
368                            concat!(
369                                "The pattern {} can match simultaneously with the following variants:\n",
370                                "{}\n",
371                                "\n",
372                                "(all at the priority {})"
373                            ),
374                            leaf, matching, priority
375                        ),
376                        leaf.span,
377                    );
378                }
379            }
380            GraphError::NoUniveralStart => {
381                parser.err(concat!(
382                    "The state machine implementing this lexer is missing a universal start state,",
383                    "which is unsupported by logos. This is most likely do to a lookbehind assertion ",
384                    "at the start of the regex."
385                ), item_span);
386            }
387            GraphError::EmptyMatch(leaf_id) => {
388                parser.err(
389                    format!(
390                        "The pattern {} can match the empty string, which is unsupported by logos.",
391                        &graph.leaves()[leaf_id.0],
392                    ),
393                    graph.leaves()[leaf_id.0].span,
394                );
395            }
396        }
397    }
398
399    if let Some(errors) = parser.errors.render() {
400        return impl_logos(errors);
401    }
402
403    debug!("Generating code from graph");
404
405    let config = crate::generator::Config {
406        use_state_machine_codegen: cfg!(feature = "state_machine_codegen"),
407    };
408    let mut generator = Generator::new(config, name, &this, &graph, &error_callback);
409
410    let body = generator.generate();
411    impl_logos(quote! {
412        use #logos_path::internal::{
413            LexerInternal,
414            CallbackRetVal,
415            CallbackResult,
416            SkipRetVal,
417            SkipResult,
418        };
419        use std::result::Result as _Result;
420        use std::option::Option as _Option;
421        use #logos_path::Logos;
422
423        type _Lexer<'s> = #logos_path::Lexer<'s, #this>;
424
425        #body
426    })
427}
428
429/// Strip all logos attributes from the given struct, allowing it to be used in code without `logos-derive` present.
430pub fn strip_attributes(input: TokenStream) -> TokenStream {
431    let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
432
433    strip_attrs_from_vec(&mut item.attrs);
434
435    for attr in &mut item.attrs {
436        if let syn::Meta::List(meta) = &mut attr.meta {
437            if meta.path.is_ident("derive") {
438                let mut tokens =
439                    std::mem::replace(&mut meta.tokens, TokenStream::new()).into_iter();
440
441                while let Some(TokenTree::Ident(ident)) = tokens.next() {
442                    let punct = tokens.next();
443
444                    if ident == "Logos" {
445                        continue;
446                    }
447
448                    meta.tokens.extend([TokenTree::Ident(ident)]);
449                    meta.tokens.extend(punct);
450                }
451            }
452        }
453    }
454
455    for variant in &mut item.variants {
456        strip_attrs_from_vec(&mut variant.attrs);
457        for field in &mut variant.fields {
458            strip_attrs_from_vec(&mut field.attrs);
459        }
460    }
461
462    item.to_token_stream()
463}
464
465fn strip_attrs_from_vec(attrs: &mut Vec<syn::Attribute>) {
466    attrs.retain(|attr| !is_logos_attr(attr))
467}
468
469fn is_logos_attr(attr: &syn::Attribute) -> bool {
470    attr.path().is_ident(LOGOS_ATTR)
471        || attr.path().is_ident(TOKEN_ATTR)
472        || attr.path().is_ident(REGEX_ATTR)
473}
474
475fn generate_graphs(path_str: &str, name: &str, graph: &Graph) -> Result<(), Box<dyn Error>> {
476    let path = Path::new(path_str).to_owned();
477
478    let (dot_path, mmd_path) = match path.extension().map(OsStr::to_str) {
479        Some(Some("dot")) => (Some(path), None),
480        Some(Some("mmd")) => (None, Some(path)),
481        Some(_) => {
482            return Err(String::from(
483                "Export path must end in '.dot' or '.mmd', or it must be a directory.",
484            )
485            .into())
486        }
487        None => {
488            let dot_path = path.join(format!("{name}.dot"));
489            let mmd_path = path.join(format!("{name}.mmd"));
490            (Some(dot_path), Some(mmd_path))
491        }
492    };
493
494    for (path, is_dot) in [(dot_path, true), (mmd_path, false)] {
495        let Some(path) = path else { continue };
496
497        if let Some(parent) = path.parent() {
498            std::fs::create_dir_all(parent)?;
499        }
500
501        let s = if is_dot {
502            graph.get_dot()
503        } else {
504            graph.get_mermaid()
505        }?;
506        std::fs::write(path, s)?;
507    }
508
509    Ok(())
510}