logos_codegen/parser/
mod.rs

1use beef::lean::Cow;
2use proc_macro2::{Span, TokenStream, TokenTree};
3use quote::quote;
4use syn::spanned::Spanned;
5use syn::{Attribute, GenericParam, Lit, Meta, Type};
6
7use crate::error::Errors;
8use crate::leaf::{Callback, InlineCallback};
9use crate::util::{expect_punct, MaybeVoid};
10use crate::LOGOS_ATTR;
11
12mod definition;
13mod error_type;
14mod ignore_flags;
15mod nested;
16mod skip;
17mod subpattern;
18mod type_params;
19
20pub use self::definition::{Definition, Literal};
21pub use self::error_type::ErrorType;
22pub use self::ignore_flags::IgnoreFlags;
23use self::nested::{AttributeParser, Nested, NestedValue};
24pub use self::skip::{Skip, SkipCallback};
25pub use self::subpattern::Subpatterns;
26use self::type_params::{replace_lifetime, traverse_type, TypeParams};
27
28#[derive(Default)]
29pub struct Parser {
30    pub errors: Errors,
31    pub mode: Mode,
32    pub source: Option<TokenStream>,
33    pub skips: Vec<Skip>,
34    pub extras: MaybeVoid,
35    pub error_type: Option<ErrorType>,
36    pub subpatterns: Subpatterns,
37    pub logos_path: Option<TokenStream>,
38    #[cfg(feature = "debug")]
39    pub export_dir: Option<String>,
40    types: TypeParams,
41}
42
43#[derive(Default)]
44pub enum Mode {
45    #[default]
46    Utf8,
47    Binary,
48}
49
50impl Parser {
51    pub fn parse_generic(&mut self, param: GenericParam) {
52        match param {
53            GenericParam::Lifetime(lt) => {
54                self.types.explicit_lifetime(lt, &mut self.errors);
55            }
56            GenericParam::Type(ty) => {
57                self.types.add(ty.ident);
58            }
59            GenericParam::Const(c) => {
60                self.err("Logos doesn't support const generics.", c.span());
61            }
62        }
63    }
64
65    pub fn generics(&mut self) -> Option<TokenStream> {
66        self.types.generics(&mut self.errors)
67    }
68
69    fn parse_attr(&mut self, attr: &mut Attribute) -> Option<AttributeParser> {
70        match &mut attr.meta {
71            Meta::List(list) => {
72                let tokens = std::mem::replace(&mut list.tokens, TokenStream::new());
73
74                Some(AttributeParser::new(tokens))
75            }
76            _ => None,
77        }
78    }
79
80    /// Try to parse the main `#[logos(...)]`, does nothing if
81    /// the attribute's name isn't `logos`.
82    pub fn try_parse_logos(&mut self, attr: &mut Attribute) {
83        if !attr.path().is_ident(LOGOS_ATTR) {
84            return;
85        }
86
87        let nested = match self.parse_attr(attr) {
88            Some(tokens) => tokens,
89            None => {
90                self.err("Expected #[logos(...)]", attr.span());
91                return;
92            }
93        };
94
95        for nested in nested {
96            let (name, value) = match nested {
97                Nested::Named(name, value) => (name, value),
98                Nested::Unexpected(tokens) | Nested::Unnamed(tokens) => {
99                    self.err("Invalid nested attribute", tokens.span());
100                    continue;
101                }
102            };
103
104            // IMPORTANT: Keep these sorted alphabetically for binary search down the line
105            #[allow(clippy::type_complexity)]
106            static NESTED_LOOKUP: &[(&str, fn(&mut Parser, Span, NestedValue))] = &[
107                ("crate", |parser, span, value| match value {
108                    NestedValue::Assign(logos_path) => parser.logos_path = Some(logos_path),
109                    _ => {
110                        parser.err("Expected: #[logos(crate = path::to::logos)]", span);
111                    }
112                }),
113                ("error", |parser, span, value| match value {
114                    NestedValue::Assign(value) => {
115                        let span = value.span();
116
117                        let error_ty = ErrorType::new(value);
118
119                        if let Some(previous) = parser.error_type.replace(error_ty) {
120                            parser
121                                .err("Error type can be defined only once", span)
122                                .err("Previous definition here", previous.span());
123                        }
124                    }
125                    NestedValue::Group(value) => {
126                        let span = value.span();
127                        let mut nested = AttributeParser::new(value);
128                        let ty = match nested.parsed::<Type>() {
129                            Some(Ok(ty)) => ty,
130                            Some(Err(e)) => {
131                                parser.err(e.to_string(), e.span());
132                                return;
133                            }
134                            None => {
135                                parser.err("Expected #[logos(error(SomeType))]", span);
136                                return;
137                            }
138                        };
139
140                        let mut error_type = {
141                            use quote::ToTokens;
142                            ErrorType::new(ty.into_token_stream())
143                        };
144
145                        for (position, next) in nested.enumerate() {
146                            match next {
147                                Nested::Unexpected(tokens) => {
148                                    parser.err("Unexpected token in attribute", tokens.span());
149                                }
150                                Nested::Unnamed(tokens) => match position {
151                                    0 => error_type.callback = parser.parse_callback(tokens),
152                                    _ => {
153                                        parser.err(
154                                            "\
155                                            Expected a named argument at this position\n\
156                                            \n\
157                                            hint: If you are trying to define a callback here use: callback = ...\
158                                            ",
159                                            tokens.span(),
160                                        );
161                                    }
162                                },
163                                Nested::Named(name, value) => {
164                                    error_type.named_attr(name, value, parser);
165                                }
166                            }
167                        }
168
169                        if let Some(previous) = parser.error_type.replace(error_type) {
170                            parser
171                                .err("Error type can be defined only once", span)
172                                .err("Previous definition here", previous.span());
173                        }
174                    }
175                    _ => {
176                        parser.err(
177                            concat!(
178                                "Expected: #[logos(error = SomeType)] or ",
179                                "#[logos(error(SomeType[, callback))]"
180                            ),
181                            span,
182                        );
183                    }
184                }),
185                ("export_dir", |parser, span, value| match value {
186                    #[cfg(feature = "debug")]
187                    NestedValue::Assign(value) => {
188                        let span = value.span();
189
190                        match syn::parse2::<Literal>(value) {
191                            Ok(Literal::Utf8(str)) => {
192                                if let Some(previous) = parser.export_dir.replace(str.value()) {
193                                    parser
194                                        .err("Export dir can be defined only once", span)
195                                        .err("Previous definition here", previous.span());
196                                }
197                            }
198                            Ok(_) => {
199                                parser.err("Expected a &str", span);
200                            }
201                            Err(e) => {
202                                parser.err(e.to_string(), span);
203                            }
204                        }
205                    }
206                    #[cfg(feature = "debug")]
207                    _ => {
208                        parser.err(
209                            "Expected #[logos(export_dir = \"path/to/export/dir\")]",
210                            span,
211                        );
212                    }
213                    #[cfg(not(feature = "debug"))]
214                    _ => {
215                        parser.err("Enable the 'debug' feature to export graphs", span);
216                    }
217                }),
218                ("extras", |parser, span, value| match value {
219                    NestedValue::Assign(value) => {
220                        let span = value.span();
221
222                        if let MaybeVoid::Some(previous) = parser.extras.replace(value) {
223                            parser
224                                .err("Extras can be defined only once", span)
225                                .err("Previous definition here", previous.span());
226                        }
227                    }
228                    _ => {
229                        parser.err("Expected: #[logos(extras = SomeType)]", span);
230                    }
231                }),
232                ("skip", |parser, span, value| match value {
233                    NestedValue::Literal(lit) => {
234                        if let Some(literal) = parser.parse_literal(Lit::new(lit)) {
235                            parser.skips.push(Skip::new(literal));
236                        }
237                    }
238                    NestedValue::Group(tokens) => {
239                        let token_span = tokens.span();
240                        if let Some(skip) = parser.parse_skip(tokens) {
241                            parser.skips.push(skip);
242                        } else {
243                            parser.err(
244                                "Expected #[logos(skip(\"regex literal\"[, [callback = ] callback, priority = priority]))]",
245                                token_span,
246                            );
247                        }
248                    }
249                    _ => {
250                        parser.err(
251                            "Expected: #[logos(skip \"regex literal\")] or #[logos(skip(...))]",
252                            span,
253                        );
254                    }
255                }),
256                ("source", |parser, span, value| match value {
257                    NestedValue::Assign(value) => {
258                        let span = value.span();
259                        if let Some(previous) = parser.source.replace(value) {
260                            parser
261                                .err("Source can be defined only once", span)
262                                .err("Previous definition here", previous.span());
263                        }
264                    }
265                    _ => {
266                        parser.err("Expected: #[logos(source = SomeType)]", span);
267                    }
268                }),
269                ("subpattern", |parser, span, value| match value {
270                    NestedValue::KeywordAssign(name, value) => {
271                        parser.subpatterns.add(name, value, &mut parser.errors);
272                    }
273                    _ => {
274                        parser.err(r#"Expected: #[logos(subpattern name = r"regex")]"#, span);
275                    }
276                }),
277                ("type", |parser, span, value| match value {
278                    NestedValue::KeywordAssign(generic, ty) => {
279                        parser.types.set(generic, ty, &mut parser.errors);
280                    }
281                    _ => {
282                        parser.err("Expected: #[logos(type T = SomeType)]", span);
283                    }
284                }),
285            ];
286
287            match NESTED_LOOKUP.binary_search_by_key(&name.to_string().as_str(), |(n, _)| n) {
288                Ok(idx) => NESTED_LOOKUP[idx].1(self, name.span(), value),
289                Err(_) => {
290                    let mut err = format!(
291                        "Unknown nested attribute #[logos({name})], expected one of: {}",
292                        NESTED_LOOKUP[0].0
293                    );
294
295                    for (allowed, _) in &NESTED_LOOKUP[1..] {
296                        err.push_str(", ");
297                        err.push_str(allowed);
298                    }
299
300                    self.err(err, name.span());
301                }
302            }
303        }
304    }
305
306    pub fn parse_skip(&mut self, stream: TokenStream) -> Option<Skip> {
307        // We don't call parse_attr here because we only want to parse what is inside the parentheses
308        let mut nested = AttributeParser::new(stream);
309
310        let literal = match nested.parsed::<Lit>()? {
311            Ok(lit) => self.parse_literal(lit)?,
312            Err(err) => {
313                self.err(err.to_string(), err.span());
314
315                return None;
316            }
317        };
318
319        let mut skip = Skip::new(literal);
320
321        for (position, next) in nested.enumerate() {
322            match next {
323                Nested::Unexpected(tokens) => {
324                    self.err("Unexpected token in attribute", tokens.span());
325                }
326                Nested::Unnamed(tokens) => match position {
327                    0 => skip.callback = self.parse_skip_callback(tokens),
328                    _ => {
329                        self.err(
330                            "\
331                            Expected a named argument at this position\n\
332                            \n\
333                            hint: If you are trying to define a callback here use: callback = ...\
334                            ",
335                            tokens.span(),
336                        );
337                    }
338                },
339                Nested::Named(name, value) => {
340                    skip.named_attr(name, value, self);
341                }
342            }
343        }
344
345        Some(skip)
346    }
347
348    pub fn parse_literal(&mut self, lit: Lit) -> Option<Literal> {
349        match lit {
350            Lit::Str(string) => Some(Literal::Utf8(string)),
351            Lit::ByteStr(bytes) => {
352                self.mode = Mode::Binary;
353
354                Some(Literal::Bytes(bytes))
355            }
356            _ => {
357                self.err("Expected a &str or &[u8] slice", lit.span());
358
359                None
360            }
361        }
362    }
363
364    /// Parse attribute definition of a token:
365    ///
366    /// + `#[token(literal[, callback])]`
367    /// + `#[regex(literal[, callback])]`
368    pub fn parse_definition(&mut self, attr: &mut Attribute) -> Option<Definition> {
369        let mut nested = self.parse_attr(attr)?;
370
371        let literal = match nested.parsed::<Lit>()? {
372            Ok(lit) => self.parse_literal(lit)?,
373            Err(err) => {
374                self.err(err.to_string(), err.span());
375
376                return None;
377            }
378        };
379
380        let mut def = Definition::new(literal);
381
382        for (position, next) in nested.enumerate() {
383            match next {
384                Nested::Unexpected(tokens) => {
385                    self.err("Unexpected token in attribute", tokens.span());
386                }
387                Nested::Unnamed(tokens) => match position {
388                    0 => def.callback = self.parse_callback(tokens),
389                    _ => {
390                        self.err(
391                            "\
392                            Expected a named argument at this position\n\
393                            \n\
394                            hint: If you are trying to define a callback here use: callback = ...\
395                            ",
396                            tokens.span(),
397                        );
398                    }
399                },
400                Nested::Named(name, value) => {
401                    def.named_attr(name, value, self);
402                }
403            }
404        }
405
406        Some(def)
407    }
408
409    fn parse_callback(&mut self, tokens: TokenStream) -> Option<Callback> {
410        let span = tokens.span();
411        let mut tokens = tokens.into_iter();
412
413        if let Some(tt) = expect_punct(tokens.next(), '|') {
414            let mut label = TokenStream::from(tt);
415
416            label.extend(tokens);
417
418            return Some(Callback::Label(label));
419        }
420
421        let first = tokens.next();
422        let error = expect_punct(tokens.next(), '|');
423
424        let arg = match (error, first) {
425            (None, Some(TokenTree::Ident(arg))) => arg,
426            _ => {
427                self.err(
428                    "Inline callbacks must use closure syntax with exactly one parameter",
429                    span,
430                );
431                return None;
432            }
433        };
434
435        let body = match tokens.next() {
436            Some(TokenTree::Group(group)) => group.stream(),
437            Some(first) => {
438                let mut body = TokenStream::from(first);
439
440                body.extend(tokens);
441                body
442            }
443            None => {
444                self.err("Callback missing a body", span);
445                return None;
446            }
447        };
448
449        let inline = InlineCallback { arg, body, span };
450
451        Some(inline.into())
452    }
453
454    fn parse_skip_callback(&mut self, tokens: TokenStream) -> Option<SkipCallback> {
455        let span = tokens.span();
456        Some(match self.parse_callback(tokens) {
457            Some(Callback::Inline(inline)) => SkipCallback::Inline(inline),
458            Some(Callback::Label(label)) => SkipCallback::Label(label),
459            Some(Callback::Skip(_) | Callback::SkipCallback(_)) => {
460                unreachable!(
461                    "internal error: `parse_callback` should only return Some(Callback::{{Inline, Label}}) or None.",
462                )
463            }
464            None => {
465                self.err("Not a valid callback", span);
466                return None;
467            }
468        })
469    }
470
471    /// Checks if `ty` is a declared generic param, if so replaces it
472    /// with a concrete type defined using #[logos(type T = Type)]
473    ///
474    /// If no matching generic param is found, all lifetimes are fixed
475    /// to the source lifetime
476    pub fn get_type(&self, ty: &mut Type) -> TokenStream {
477        traverse_type(ty, &mut |ty| {
478            if let Type::Path(tp) = ty {
479                // Skip types that begin with `self::`
480                if tp.qself.is_none() {
481                    // If `ty` is a generic type parameter, try to find
482                    // its concrete type defined with #[logos(type T = Type)]
483                    if let Some(substitute) = self.types.find(&tp.path) {
484                        *ty = substitute;
485                    }
486                }
487            }
488            // If `ty` is a concrete type, fix its lifetimes to 'source
489            replace_lifetime(ty);
490        });
491
492        quote!(#ty)
493    }
494
495    pub fn err<M>(&mut self, message: M, span: Span) -> &mut Errors
496    where
497        M: Into<Cow<'static, str>>,
498    {
499        self.errors.err(message, span)
500    }
501}