1#![recursion_limit = "196"]
9#![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")]
10
11mod error;
12mod generator;
13mod graph;
14mod leaf;
15mod parser;
16mod pattern;
17mod util;
18
19#[macro_use]
20#[allow(missing_docs)]
21mod macros;
22
23use std::error::Error;
24use std::ffi::OsStr;
25use std::path::Path;
26
27use error::Errors;
28use generator::Generator;
29use graph::{Graph, GraphError};
30use leaf::Leaf;
31use parser::Parser;
32use pattern::Pattern;
33use quote::ToTokens;
34
35use proc_macro2::{TokenStream, TokenTree};
36use quote::quote;
37use syn::spanned::Spanned;
38use syn::{parse_quote, LitBool};
39use syn::{Fields, ItemEnum};
40
41use crate::graph::Config;
42use crate::leaf::VariantKind;
43use crate::parser::{ErrorType, Subpatterns};
44
45const LOGOS_ATTR: &str = "logos";
46const ERROR_ATTR: &str = "error";
47const TOKEN_ATTR: &str = "token";
48const REGEX_ATTR: &str = "regex";
49
50pub fn generate(input: TokenStream) -> TokenStream {
52 debug!("Reading input token streams");
53
54 let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
55 let item_span = item.span();
56
57 let name = &item.ident;
58
59 let mut parser = Parser::default();
60
61 for param in item.generics.params {
62 parser.parse_generic(param);
63 }
64
65 for attr in &mut item.attrs {
66 parser.try_parse_logos(attr);
67 }
68
69 debug!("Iterating through subpatterns and skips");
70
71 let utf8_mode = parser
72 .utf8_mode
73 .as_ref()
74 .map(LitBool::value)
75 .unwrap_or(true);
76 let config = Config { utf8_mode };
77 let subpatterns = Subpatterns::new(&parser.subpatterns, utf8_mode, &mut parser.errors);
78
79 let mut pats = Vec::new();
80
81 for skip in parser.skips.drain(..) {
82 let Some(pattern_source) = subpatterns.subst_subpatterns(
83 &skip.literal.escape(false),
84 skip.literal.span(),
85 &mut parser.errors,
86 ) else {
87 continue;
88 };
89
90 let pattern = match Pattern::compile(
91 false,
92 &pattern_source,
93 skip.literal.token().to_string(),
94 skip.literal.unicode(),
95 false,
96 ) {
97 Ok(pattern) => pattern,
98 Err(err) => {
99 parser.errors.err(err, skip.literal.span());
100 continue;
101 }
102 };
103
104 let default_priority = pattern.priority();
105 pats.push(
106 Leaf::new(skip.literal.span(), pattern)
107 .priority(skip.priority.unwrap_or(default_priority))
108 .callback(skip.into_callback()),
109 );
110 }
111
112 debug!("Iterating through enum variants");
113
114 for variant in &mut item.variants {
115 let var_ident = variant.ident.clone();
116
117 let var_kind = match &mut variant.fields {
118 Fields::Unit => VariantKind::Unit(var_ident),
119 Fields::Unnamed(fields) => {
120 if fields.unnamed.len() != 1 {
121 parser.err(
122 format!(
123 "Logos currently only supports variants with one field, found {}",
124 fields.unnamed.len(),
125 ),
126 fields.span(),
127 );
128 }
129
130 let ty = &mut fields
131 .unnamed
132 .first_mut()
133 .expect("Already checked len; qed")
134 .ty;
135 let ty = parser.get_type(ty);
136
137 VariantKind::Value(var_ident, ty)
138 }
139 Fields::Named(fields) => {
140 parser.err("Logos doesn't support named fields yet.", fields.span());
141
142 VariantKind::Skip
143 }
144 };
145
146 for attr in &mut variant.attrs {
147 let attr_name = match attr.path().get_ident() {
148 Some(ident) => ident.to_string(),
149 None => continue,
150 };
151
152 match attr_name.as_str() {
153 ERROR_ATTR => {
154 parser.err(
156 concat!(
157 "Since 0.13 Logos no longer requires the #[error] variant.",
158 "\n\n",
159 "For help with migration see release notes: ",
160 "https://github.com/maciejhirsz/logos/releases"
161 ),
162 attr.span(),
163 );
164 }
165 TOKEN_ATTR => {
166 let definition = match parser.parse_definition(attr) {
167 Some(definition) => definition,
168 None => {
169 parser.err("Expected #[token(...)]", attr.span());
170 continue;
171 }
172 };
173
174 let pattern_res = if definition.ignore_flags.ignore_case {
175 let pattern_src = definition.literal.escape(true);
176 Pattern::compile(
177 true,
178 &pattern_src,
179 definition.literal.token().to_string(),
180 definition.literal.unicode(),
181 true,
182 )
183 } else {
184 Pattern::compile_lit(&definition.literal)
185 };
186
187 let pattern = match pattern_res {
188 Ok(pattern) => pattern,
189 Err(err) => {
190 parser.err(err, definition.literal.span());
191 continue;
192 }
193 };
194
195 let literal_len = match &definition.literal {
196 parser::Literal::Utf8(lit_str) => lit_str.value().len(),
197 parser::Literal::Bytes(lit_byte_str) => lit_byte_str.value().len(),
198 };
199
200 pats.push(
201 Leaf::new(definition.literal.span(), pattern)
202 .variant_kind(var_kind.clone())
203 .priority(definition.priority.unwrap_or(literal_len * 2))
204 .callback(definition.callback),
205 );
206 }
207 REGEX_ATTR => {
208 let definition = match parser.parse_definition(attr) {
209 Some(definition) => definition,
210 None => {
211 parser.err("Expected #[regex(...)]", attr.span());
212 continue;
213 }
214 };
215
216 let Some(pattern_source) = subpatterns.subst_subpatterns(
217 &definition.literal.escape(false),
218 definition.literal.span(),
219 &mut parser.errors,
220 ) else {
221 continue;
222 };
223
224 let unicode = definition.literal.unicode();
225 let ignore_case = definition.ignore_flags.ignore_case;
226 let pattern = match Pattern::compile(
227 false,
228 &pattern_source,
229 definition.literal.token().to_string(),
230 unicode,
231 ignore_case,
232 ) {
233 Ok(pattern) => pattern,
234 Err(err) => {
235 parser.err(err, definition.literal.span());
236 continue;
237 }
238 };
239
240 let allow_greedy = definition.allow_greedy.unwrap_or(false);
241 if !allow_greedy && pattern.check_for_greedy_all() {
242 parser.err(concat!(
243 "This pattern contains an unbounded greedy dot repetition (.* or .+). ",
244 "This will cause the entirety of the input to be read for every token. ",
245 "Consider making your repetition non-greedy or changing it to a more ",
246 "specific character class. If this is the intended behavior, add ",
247 "#[regex(..., allow_greedy = true)]"
248 ), definition.literal.span());
249 }
250
251 let default_priority = pattern.priority();
252 pats.push(
253 Leaf::new(definition.literal.span(), pattern)
254 .variant_kind(var_kind.clone())
255 .priority(definition.priority.unwrap_or(default_priority))
256 .callback(definition.callback),
257 );
258 }
259 _ => (),
260 }
261 }
262 }
263
264 debug!("Parsing additional options (extras, utf8, ...)");
265
266 let ErrorType {
267 ty: error_type,
268 callback: error_callback,
269 } = parser.error_type.take().unwrap_or_default();
270 let extras = parser.extras.take();
271 let non_utf8_pats = pats
272 .iter()
273 .filter(|leaf| !leaf.pattern.hir().properties().is_utf8())
274 .collect::<Vec<_>>();
275 if utf8_mode && !non_utf8_pats.is_empty() {
276 for leaf in non_utf8_pats {
278 parser.err(format!(concat!(
279 "UTF-8 mode is requested, but the pattern {} of variant `{}` can match invalid utf8.\n",
280 "You can disable UTF-8 mode with #[logos(utf8 = false)]"
281 ), leaf.pattern.source(), leaf.kind), leaf.span);
282 }
283 };
284
285 let source = match utf8_mode {
286 true => quote!(str),
287 false => quote!([u8]),
288 };
289 let logos_path = parser
290 .logos_path
291 .take()
292 .unwrap_or_else(|| parse_quote!(::logos));
293
294 let generics = parser.generics();
295 let this = quote!(#name #generics);
296
297 let impl_logos = |body| {
298 quote! {
299 impl<'s> #logos_path::Logos<'s> for #this {
300 type Error = #error_type;
301
302 type Extras = #extras;
303
304 type Source = #source;
305
306 fn lex(lex: &mut #logos_path::Lexer<'s, Self>)
307 -> std::option::Option<std::result::Result<Self, <Self as #logos_path::Logos<'s>>::Error>> {
308 #body
309 }
310 }
311 }
312 };
313
314 if cfg!(feature = "debug") {
315 let leaves_rendered = pats
316 .iter()
317 .enumerate()
318 .map(|(leaf_id, leaf)| format!(" {}: {} (priority: {})", leaf_id, leaf, leaf.priority))
319 .collect::<Vec<_>>()
320 .join("\n");
321 debug!("Generated leaves:\n{leaves_rendered}");
322 }
323
324 debug!("Generating graph from leaves");
325
326 let graph = match Graph::new(pats, config) {
327 Ok(nfa) => nfa,
328 Err(msg) => {
329 let mut errors = Errors::default();
330 errors.err(msg, item_span);
331 return impl_logos(errors.render().unwrap());
332 }
333 };
334
335 debug!("Generated Automaton:\n{:?}", graph.dfa());
336 debug!("Generated Graph:\n{graph}");
337 debug!("Root node: {:?}", graph.root());
338
339 if cfg!(feature = "debug") {
340 if let Some(export_path) = parser.export_path.as_ref() {
341 debug!("Exporting graphs");
342 let lower_name = name.to_string().to_lowercase();
343
344 if let Err(err) = generate_graphs(export_path, &lower_name, &graph) {
345 debug!("Failed to export graphs: {err}");
346 }
347 }
348 }
349
350 debug!("Checking if any two tokens have the same priority");
351
352 for error in graph.errors() {
353 match error {
354 GraphError::Disambiguation(matching) => {
355 for leaf_id in matching {
356 let leaf = &graph.leaves()[leaf_id.0];
357 let priority = leaf.priority;
358
359 let matching = matching
360 .iter()
361 .filter(|&id| id != leaf_id)
362 .map(|matchind_id| format!(" {}", &graph.leaves()[matchind_id.0]))
363 .collect::<Vec<_>>()
364 .join("\n");
365
366 parser.err(
367 format!(
368 concat!(
369 "The pattern {} can match simultaneously with the following variants:\n",
370 "{}\n",
371 "\n",
372 "(all at the priority {})"
373 ),
374 leaf, matching, priority
375 ),
376 leaf.span,
377 );
378 }
379 }
380 GraphError::NoUniveralStart => {
381 parser.err(concat!(
382 "The state machine implementing this lexer is missing a universal start state,",
383 "which is unsupported by logos. This is most likely do to a lookbehind assertion ",
384 "at the start of the regex."
385 ), item_span);
386 }
387 GraphError::EmptyMatch(leaf_id) => {
388 parser.err(
389 format!(
390 "The pattern {} can match the empty string, which is unsupported by logos.",
391 &graph.leaves()[leaf_id.0],
392 ),
393 graph.leaves()[leaf_id.0].span,
394 );
395 }
396 }
397 }
398
399 if let Some(errors) = parser.errors.render() {
400 return impl_logos(errors);
401 }
402
403 debug!("Generating code from graph");
404
405 let config = crate::generator::Config {
406 use_state_machine_codegen: cfg!(feature = "state_machine_codegen"),
407 };
408 let mut generator = Generator::new(config, name, &this, &graph, &error_callback);
409
410 let body = generator.generate();
411 impl_logos(quote! {
412 use #logos_path::internal::{
413 LexerInternal,
414 CallbackRetVal,
415 CallbackResult,
416 SkipRetVal,
417 SkipResult,
418 };
419 use std::result::Result as _Result;
420 use std::option::Option as _Option;
421 use #logos_path::Logos;
422
423 type _Lexer<'s> = #logos_path::Lexer<'s, #this>;
424
425 #body
426 })
427}
428
429pub fn strip_attributes(input: TokenStream) -> TokenStream {
431 let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
432
433 strip_attrs_from_vec(&mut item.attrs);
434
435 for attr in &mut item.attrs {
436 if let syn::Meta::List(meta) = &mut attr.meta {
437 if meta.path.is_ident("derive") {
438 let mut tokens =
439 std::mem::replace(&mut meta.tokens, TokenStream::new()).into_iter();
440
441 while let Some(TokenTree::Ident(ident)) = tokens.next() {
442 let punct = tokens.next();
443
444 if ident == "Logos" {
445 continue;
446 }
447
448 meta.tokens.extend([TokenTree::Ident(ident)]);
449 meta.tokens.extend(punct);
450 }
451 }
452 }
453 }
454
455 for variant in &mut item.variants {
456 strip_attrs_from_vec(&mut variant.attrs);
457 for field in &mut variant.fields {
458 strip_attrs_from_vec(&mut field.attrs);
459 }
460 }
461
462 item.to_token_stream()
463}
464
465fn strip_attrs_from_vec(attrs: &mut Vec<syn::Attribute>) {
466 attrs.retain(|attr| !is_logos_attr(attr))
467}
468
469fn is_logos_attr(attr: &syn::Attribute) -> bool {
470 attr.path().is_ident(LOGOS_ATTR)
471 || attr.path().is_ident(TOKEN_ATTR)
472 || attr.path().is_ident(REGEX_ATTR)
473}
474
475fn generate_graphs(path_str: &str, name: &str, graph: &Graph) -> Result<(), Box<dyn Error>> {
476 let path = Path::new(path_str).to_owned();
477
478 let (dot_path, mmd_path) = match path.extension().map(OsStr::to_str) {
479 Some(Some("dot")) => (Some(path), None),
480 Some(Some("mmd")) => (None, Some(path)),
481 Some(_) => {
482 return Err(String::from(
483 "Export path must end in '.dot' or '.mmd', or it must be a directory.",
484 )
485 .into())
486 }
487 None => {
488 let dot_path = path.join(format!("{name}.dot"));
489 let mmd_path = path.join(format!("{name}.mmd"));
490 (Some(dot_path), Some(mmd_path))
491 }
492 };
493
494 for (path, is_dot) in [(dot_path, true), (mmd_path, false)] {
495 let Some(path) = path else { continue };
496
497 if let Some(parent) = path.parent() {
498 std::fs::create_dir_all(parent)?;
499 }
500
501 let s = if is_dot {
502 graph.get_dot()
503 } else {
504 graph.get_mermaid()
505 }?;
506 std::fs::write(path, s)?;
507 }
508
509 Ok(())
510}