Skip to main content

logos_codegen/
lib.rs

1//! <img src="https://raw.githubusercontent.com/maciejhirsz/logos/master/logos.svg?sanitize=true" alt="Logos logo" width="250" align="right">
2//!
3//! # Logos
4//!
5//! This is a `#[derive]` macro crate, [for documentation go to main crate](https://docs.rs/logos).
6
7// The `quote!` macro requires deep recursion.
8#![recursion_limit = "196"]
9#![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")]
10
11mod error;
12mod generator;
13mod graph;
14mod leaf;
15mod parser;
16mod pattern;
17mod util;
18
19#[macro_use]
20#[allow(missing_docs)]
21mod macros;
22
23use std::error::Error;
24use std::ffi::OsStr;
25use std::mem;
26use std::path::Path;
27
28use error::Errors;
29use generator::Generator;
30use graph::{Graph, GraphError};
31use leaf::Leaf;
32use parser::Parser;
33use pattern::Pattern;
34use quote::ToTokens;
35
36use proc_macro2::{TokenStream, TokenTree};
37use quote::quote;
38use syn::spanned::Spanned;
39use syn::{parse_quote, LitBool};
40use syn::{Fields, ItemEnum};
41
42use crate::graph::Config;
43use crate::leaf::VariantKind;
44use crate::parser::{Definition, ErrorType, Subpatterns};
45
46const LOGOS_ATTR: &str = "logos";
47const ERROR_ATTR: &str = "error";
48const TOKEN_ATTR: &str = "token";
49const REGEX_ATTR: &str = "regex";
50
51/// Generate a `Logos` implementation for the given struct, provided as a stream of rust tokens.
52pub fn generate(input: TokenStream) -> TokenStream {
53    debug!("Reading input token streams");
54
55    let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
56    let item_span = item.span();
57
58    let name = &item.ident;
59
60    let mut parser = Parser::default();
61
62    for param in item.generics.params {
63        parser.parse_generic(param);
64    }
65
66    for attr in &mut item.attrs {
67        parser.try_parse_logos(attr);
68    }
69
70    debug!("Iterating through subpatterns and skips");
71
72    let utf8_mode = parser
73        .utf8_mode
74        .as_ref()
75        .map(LitBool::value)
76        .unwrap_or(true);
77    let config = Config { utf8_mode };
78    let subpatterns = Subpatterns::new(&parser.subpatterns, utf8_mode, &mut parser.errors);
79
80    let mut pats = Vec::new();
81
82    for skip in mem::take(&mut parser.skips) {
83        let Some(pattern_source) = subpatterns.subst_subpatterns(
84            &skip.literal.escape(false),
85            skip.literal.span(),
86            &mut parser.errors,
87        ) else {
88            continue;
89        };
90
91        let pattern = match Pattern::compile(
92            false,
93            &pattern_source,
94            skip.literal.token().to_string(),
95            skip.literal.unicode(),
96            false,
97        ) {
98            Ok(pattern) => pattern,
99            Err(err) => {
100                parser.errors.err(err, skip.literal.span());
101                continue;
102            }
103        };
104        greedy_dotall_check(&skip, &pattern, &mut parser);
105
106        let default_priority = pattern.priority();
107        pats.push(
108            Leaf::new(skip.literal.span(), pattern)
109                .priority(skip.priority.unwrap_or(default_priority))
110                .callback(skip.callback),
111        );
112    }
113
114    debug!("Iterating through enum variants");
115
116    for variant in &mut item.variants {
117        let var_ident = variant.ident.clone();
118
119        let var_kind = match &mut variant.fields {
120            Fields::Unit => VariantKind::Unit(var_ident),
121            Fields::Unnamed(fields) => {
122                if fields.unnamed.len() != 1 {
123                    parser.err(
124                        format!(
125                            "Logos currently only supports variants with one field, found {}",
126                            fields.unnamed.len(),
127                        ),
128                        fields.span(),
129                    );
130                }
131
132                let ty = &mut fields
133                    .unnamed
134                    .first_mut()
135                    .expect("Already checked len; qed")
136                    .ty;
137                let ty = parser.get_type(ty);
138
139                VariantKind::Value(var_ident, ty)
140            }
141            Fields::Named(fields) => {
142                parser.err("Logos doesn't support named fields yet.", fields.span());
143
144                VariantKind::Skip
145            }
146        };
147
148        for attr in &mut variant.attrs {
149            let attr_name = match attr.path().get_ident() {
150                Some(ident) => ident.to_string(),
151                None => continue,
152            };
153
154            match attr_name.as_str() {
155                ERROR_ATTR => {
156                    // TODO: Remove in future versions
157                    parser.err(
158                        concat!(
159                            "Since 0.13 Logos no longer requires the #[error] variant.",
160                            "\n\n",
161                            "For help with migration see release notes: ",
162                            "https://github.com/maciejhirsz/logos/releases"
163                        ),
164                        attr.span(),
165                    );
166                }
167                TOKEN_ATTR => {
168                    let definition = match parser.parse_definition_attr(attr) {
169                        Some(definition) => definition,
170                        None => {
171                            parser.err("Expected #[token(...)]", attr.span());
172                            continue;
173                        }
174                    };
175
176                    let pattern_res = if definition.ignore_flags.ignore_case {
177                        let pattern_src = definition.literal.escape(true);
178                        Pattern::compile(
179                            true,
180                            &pattern_src,
181                            definition.literal.token().to_string(),
182                            definition.literal.unicode(),
183                            true,
184                        )
185                    } else {
186                        Pattern::compile_lit(&definition.literal)
187                    };
188
189                    let pattern = match pattern_res {
190                        Ok(pattern) => pattern,
191                        Err(err) => {
192                            parser.err(err, definition.literal.span());
193                            continue;
194                        }
195                    };
196
197                    let literal_len = match &definition.literal {
198                        parser::Literal::Utf8(lit_str) => lit_str.value().len(),
199                        parser::Literal::Bytes(lit_byte_str) => lit_byte_str.value().len(),
200                    };
201
202                    pats.push(
203                        Leaf::new(definition.literal.span(), pattern)
204                            .variant_kind(var_kind.clone())
205                            .priority(definition.priority.unwrap_or(literal_len * 2))
206                            .callback(definition.callback),
207                    );
208                }
209                REGEX_ATTR => {
210                    let definition = match parser.parse_definition_attr(attr) {
211                        Some(definition) => definition,
212                        None => {
213                            parser.err("Expected #[regex(...)]", attr.span());
214                            continue;
215                        }
216                    };
217
218                    let Some(pattern_source) = subpatterns.subst_subpatterns(
219                        &definition.literal.escape(false),
220                        definition.literal.span(),
221                        &mut parser.errors,
222                    ) else {
223                        continue;
224                    };
225
226                    let unicode = definition.literal.unicode();
227                    let ignore_case = definition.ignore_flags.ignore_case;
228                    let pattern = match Pattern::compile(
229                        false,
230                        &pattern_source,
231                        definition.literal.token().to_string(),
232                        unicode,
233                        ignore_case,
234                    ) {
235                        Ok(pattern) => pattern,
236                        Err(err) => {
237                            parser.err(err, definition.literal.span());
238                            continue;
239                        }
240                    };
241
242                    greedy_dotall_check(&definition, &pattern, &mut parser);
243                    let default_priority = pattern.priority();
244                    pats.push(
245                        Leaf::new(definition.literal.span(), pattern)
246                            .variant_kind(var_kind.clone())
247                            .priority(definition.priority.unwrap_or(default_priority))
248                            .callback(definition.callback),
249                    );
250                }
251                _ => (),
252            }
253        }
254    }
255
256    debug!("Parsing additional options (extras, utf8, ...)");
257
258    let ErrorType {
259        ty: error_type,
260        callback: error_callback,
261    } = parser.error_type.take().unwrap_or_default();
262    let extras = parser.extras.take();
263    let non_utf8_pats = pats
264        .iter()
265        .filter(|leaf| !leaf.pattern.hir().properties().is_utf8())
266        .collect::<Vec<_>>();
267    if utf8_mode && !non_utf8_pats.is_empty() {
268        // If utf8 mode is specified, make sure no patterns match illegal utf8
269        for leaf in non_utf8_pats {
270            parser.err(format!(concat!(
271                "UTF-8 mode is requested, but the pattern {} of variant `{}` can match invalid utf8.\n",
272                "You can disable UTF-8 mode with #[logos(utf8 = false)]"
273            ), leaf.pattern.source(), leaf.kind), leaf.span);
274        }
275    };
276
277    let source = match utf8_mode {
278        true => quote!(str),
279        false => quote!([u8]),
280    };
281    let logos_path = parser
282        .logos_path
283        .take()
284        .unwrap_or_else(|| parse_quote!(::logos));
285
286    let generics = parser.generics();
287    let this = quote!(#name #generics);
288
289    let impl_logos = |body| {
290        quote! {
291            impl<'s> #logos_path::Logos<'s> for #this {
292                type Error = #error_type;
293
294                type Extras = #extras;
295
296                type Source = #source;
297
298                fn lex(lex: &mut #logos_path::Lexer<'s, Self>)
299                    -> core::option::Option<core::result::Result<Self, <Self as #logos_path::Logos<'s>>::Error>> {
300                    #body
301                }
302            }
303        }
304    };
305
306    if cfg!(feature = "debug") {
307        let leaves_rendered = pats
308            .iter()
309            .enumerate()
310            .map(|(leaf_id, leaf)| format!("  {}: {} (priority: {})", leaf_id, leaf, leaf.priority))
311            .collect::<Vec<_>>()
312            .join("\n");
313        debug!("Generated leaves:\n{leaves_rendered}");
314    }
315
316    debug!("Generating graph from leaves");
317
318    let graph = match Graph::new(pats, config) {
319        Ok(nfa) => nfa,
320        Err(msg) => {
321            let mut errors = Errors::default();
322            errors.err(msg, item_span);
323            return impl_logos(errors.render().unwrap());
324        }
325    };
326
327    debug!("Generated Automaton:\n{:?}", graph.dfa());
328    debug!("Generated Graph:\n{graph}");
329    debug!("Root node: {:?}", graph.root());
330
331    if cfg!(feature = "debug") {
332        if let Some(export_path) = parser.export_path.as_ref() {
333            debug!("Exporting graphs");
334            let lower_name = name.to_string().to_lowercase();
335
336            if let Err(err) = generate_graphs(export_path, &lower_name, &graph) {
337                debug!("Failed to export graphs: {err}");
338            }
339        }
340    }
341
342    debug!("Checking if any two tokens have the same priority");
343
344    for error in graph.errors() {
345        match error {
346            GraphError::Disambiguation(matching) => {
347                for leaf_id in matching {
348                    let leaf = &graph.leaves()[leaf_id.0];
349                    let priority = leaf.priority;
350
351                    let matching = matching
352                        .iter()
353                        .filter(|&id| id != leaf_id)
354                        .map(|match_id| format!("  {}", &graph.leaves()[match_id.0]))
355                        .collect::<Vec<_>>()
356                        .join("\n");
357
358                    parser.err(
359                        format!(
360                            concat!(
361                                "The pattern {} can match simultaneously with the following variants:\n",
362                                "{}\n",
363                                "\n",
364                                "(all at the priority {})"
365                            ),
366                            leaf, matching, priority
367                        ),
368                        leaf.span,
369                    );
370                }
371            }
372            GraphError::NoUniversalStart => {
373                parser.err(concat!(
374                    "The state machine implementing this lexer is missing a universal start state,",
375                    "which is unsupported by logos. This is most likely do to a lookbehind assertion ",
376                    "at the start of the regex."
377                ), item_span);
378            }
379            GraphError::EmptyMatch(leaf_id) => {
380                parser.err(
381                    format!(
382                        "The pattern {} can match the empty string, which is unsupported by logos.",
383                        &graph.leaves()[leaf_id.0],
384                    ),
385                    graph.leaves()[leaf_id.0].span,
386                );
387            }
388        }
389    }
390
391    if let Some(errors) = parser.errors.render() {
392        return impl_logos(errors);
393    }
394
395    debug!("Generating code from graph");
396
397    let config = generator::Config {
398        use_state_machine_codegen: cfg!(feature = "state_machine_codegen"),
399    };
400    let mut generator = Generator::new(config, name, &this, &graph, &error_callback);
401
402    let body = generator.generate();
403    impl_logos(quote! {
404        use #logos_path::internal::{
405            LexerInternal,
406            CallbackRetVal,
407            CallbackResult,
408            SkipRetVal,
409            SkipResult,
410        };
411        use core::result::Result as _Result;
412        use core::option::Option as _Option;
413        use #logos_path::Logos;
414
415        type _Lexer<'s> = #logos_path::Lexer<'s, #this>;
416
417        #body
418    })
419}
420
421fn greedy_dotall_check(definition: &Definition, pattern: &Pattern, parser: &mut Parser) {
422    let allow_greedy = definition.allow_greedy.unwrap_or(false);
423    if !allow_greedy && pattern.check_for_greedy_all() {
424        parser.err(
425            concat!(
426                "This pattern contains an unbounded greedy dot repetition, i.e. `.*` or `.+` ",
427                "(or a character class that is equivalent to a dot, i.e., `[^\\n]*`). ",
428                "This will cause the entirety of the input to be read for every token. ",
429                "Consider making your repetition non-greedy or changing it to a more ",
430                "specific character class. If this is the intended behavior, add ",
431                "#[regex(..., allow_greedy = true)] or",
432                "#[logos(skip(..., allow_greedy = true))]"
433            ),
434            definition.literal.span(),
435        );
436    }
437}
438
439/// Strip all logos attributes from the given struct, allowing it to be used in code without `logos-derive` present.
440pub fn strip_attributes(input: TokenStream) -> TokenStream {
441    let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
442
443    strip_attrs_from_vec(&mut item.attrs);
444
445    for attr in &mut item.attrs {
446        if let syn::Meta::List(meta) = &mut attr.meta {
447            if meta.path.is_ident("derive") {
448                let mut tokens =
449                    std::mem::replace(&mut meta.tokens, TokenStream::new()).into_iter();
450
451                while let Some(TokenTree::Ident(ident)) = tokens.next() {
452                    let punct = tokens.next();
453
454                    if ident == "Logos" {
455                        continue;
456                    }
457
458                    meta.tokens.extend([TokenTree::Ident(ident)]);
459                    meta.tokens.extend(punct);
460                }
461            }
462        }
463    }
464
465    for variant in &mut item.variants {
466        strip_attrs_from_vec(&mut variant.attrs);
467        for field in &mut variant.fields {
468            strip_attrs_from_vec(&mut field.attrs);
469        }
470    }
471
472    item.to_token_stream()
473}
474
475fn strip_attrs_from_vec(attrs: &mut Vec<syn::Attribute>) {
476    attrs.retain(|attr| !is_logos_attr(attr))
477}
478
479fn is_logos_attr(attr: &syn::Attribute) -> bool {
480    attr.path().is_ident(LOGOS_ATTR)
481        || attr.path().is_ident(TOKEN_ATTR)
482        || attr.path().is_ident(REGEX_ATTR)
483}
484
485fn generate_graphs(path_str: &str, name: &str, graph: &Graph) -> Result<(), Box<dyn Error>> {
486    let path = Path::new(path_str).to_owned();
487
488    let (dot_path, mmd_path) = match path.extension().map(OsStr::to_str) {
489        Some(Some("dot")) => (Some(path), None),
490        Some(Some("mmd")) => (None, Some(path)),
491        Some(_) => {
492            return Err(String::from(
493                "Export path must end in '.dot' or '.mmd', or it must be a directory.",
494            )
495            .into())
496        }
497        None => {
498            let dot_path = path.join(format!("{name}.dot"));
499            let mmd_path = path.join(format!("{name}.mmd"));
500            (Some(dot_path), Some(mmd_path))
501        }
502    };
503
504    for (path, is_dot) in [(dot_path, true), (mmd_path, false)] {
505        let Some(path) = path else { continue };
506
507        if let Some(parent) = path.parent() {
508            std::fs::create_dir_all(parent)?;
509        }
510
511        let s = if is_dot {
512            graph.get_dot()
513        } else {
514            graph.get_mermaid()
515        }?;
516        std::fs::write(path, s)?;
517    }
518
519    Ok(())
520}