1#![recursion_limit = "196"]
9#![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")]
10
11mod error;
12mod generator;
13mod graph;
14mod leaf;
15mod parser;
16mod pattern;
17mod util;
18
19#[macro_use]
20#[allow(missing_docs)]
21mod macros;
22
23use std::error::Error;
24use std::ffi::OsStr;
25use std::mem;
26use std::path::Path;
27
28use error::Errors;
29use generator::Generator;
30use graph::{Graph, GraphError};
31use leaf::Leaf;
32use parser::Parser;
33use pattern::Pattern;
34use quote::ToTokens;
35
36use proc_macro2::{TokenStream, TokenTree};
37use quote::quote;
38use syn::spanned::Spanned;
39use syn::{parse_quote, LitBool};
40use syn::{Fields, ItemEnum};
41
42use crate::graph::Config;
43use crate::leaf::VariantKind;
44use crate::parser::{Definition, ErrorType, Subpatterns};
45
46const LOGOS_ATTR: &str = "logos";
47const ERROR_ATTR: &str = "error";
48const TOKEN_ATTR: &str = "token";
49const REGEX_ATTR: &str = "regex";
50
51pub fn generate(input: TokenStream) -> TokenStream {
53 debug!("Reading input token streams");
54
55 let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
56 let item_span = item.span();
57
58 let name = &item.ident;
59
60 let mut parser = Parser::default();
61
62 for param in item.generics.params {
63 parser.parse_generic(param);
64 }
65
66 for attr in &mut item.attrs {
67 parser.try_parse_logos(attr);
68 }
69
70 debug!("Iterating through subpatterns and skips");
71
72 let utf8_mode = parser
73 .utf8_mode
74 .as_ref()
75 .map(LitBool::value)
76 .unwrap_or(true);
77 let config = Config { utf8_mode };
78 let subpatterns = Subpatterns::new(&parser.subpatterns, utf8_mode, &mut parser.errors);
79
80 let mut pats = Vec::new();
81
82 for skip in mem::take(&mut parser.skips) {
83 let Some(pattern_source) = subpatterns.subst_subpatterns(
84 &skip.literal.escape(false),
85 skip.literal.span(),
86 &mut parser.errors,
87 ) else {
88 continue;
89 };
90
91 let pattern = match Pattern::compile(
92 false,
93 &pattern_source,
94 skip.literal.token().to_string(),
95 skip.literal.unicode(),
96 false,
97 ) {
98 Ok(pattern) => pattern,
99 Err(err) => {
100 parser.errors.err(err, skip.literal.span());
101 continue;
102 }
103 };
104 greedy_dotall_check(&skip, &pattern, &mut parser);
105
106 let default_priority = pattern.priority();
107 pats.push(
108 Leaf::new(skip.literal.span(), pattern)
109 .priority(skip.priority.unwrap_or(default_priority))
110 .callback(skip.callback),
111 );
112 }
113
114 debug!("Iterating through enum variants");
115
116 for variant in &mut item.variants {
117 let var_ident = variant.ident.clone();
118
119 let var_kind = match &mut variant.fields {
120 Fields::Unit => VariantKind::Unit(var_ident),
121 Fields::Unnamed(fields) => {
122 if fields.unnamed.len() != 1 {
123 parser.err(
124 format!(
125 "Logos currently only supports variants with one field, found {}",
126 fields.unnamed.len(),
127 ),
128 fields.span(),
129 );
130 }
131
132 let ty = &mut fields
133 .unnamed
134 .first_mut()
135 .expect("Already checked len; qed")
136 .ty;
137 let ty = parser.get_type(ty);
138
139 VariantKind::Value(var_ident, ty)
140 }
141 Fields::Named(fields) => {
142 parser.err("Logos doesn't support named fields yet.", fields.span());
143
144 VariantKind::Skip
145 }
146 };
147
148 for attr in &mut variant.attrs {
149 let attr_name = match attr.path().get_ident() {
150 Some(ident) => ident.to_string(),
151 None => continue,
152 };
153
154 match attr_name.as_str() {
155 ERROR_ATTR => {
156 parser.err(
158 concat!(
159 "Since 0.13 Logos no longer requires the #[error] variant.",
160 "\n\n",
161 "For help with migration see release notes: ",
162 "https://github.com/maciejhirsz/logos/releases"
163 ),
164 attr.span(),
165 );
166 }
167 TOKEN_ATTR => {
168 let definition = match parser.parse_definition_attr(attr) {
169 Some(definition) => definition,
170 None => {
171 parser.err("Expected #[token(...)]", attr.span());
172 continue;
173 }
174 };
175
176 let pattern_res = if definition.ignore_flags.ignore_case {
177 let pattern_src = definition.literal.escape(true);
178 Pattern::compile(
179 true,
180 &pattern_src,
181 definition.literal.token().to_string(),
182 definition.literal.unicode(),
183 true,
184 )
185 } else {
186 Pattern::compile_lit(&definition.literal)
187 };
188
189 let pattern = match pattern_res {
190 Ok(pattern) => pattern,
191 Err(err) => {
192 parser.err(err, definition.literal.span());
193 continue;
194 }
195 };
196
197 let literal_len = match &definition.literal {
198 parser::Literal::Utf8(lit_str) => lit_str.value().len(),
199 parser::Literal::Bytes(lit_byte_str) => lit_byte_str.value().len(),
200 };
201
202 pats.push(
203 Leaf::new(definition.literal.span(), pattern)
204 .variant_kind(var_kind.clone())
205 .priority(definition.priority.unwrap_or(literal_len * 2))
206 .callback(definition.callback),
207 );
208 }
209 REGEX_ATTR => {
210 let definition = match parser.parse_definition_attr(attr) {
211 Some(definition) => definition,
212 None => {
213 parser.err("Expected #[regex(...)]", attr.span());
214 continue;
215 }
216 };
217
218 let Some(pattern_source) = subpatterns.subst_subpatterns(
219 &definition.literal.escape(false),
220 definition.literal.span(),
221 &mut parser.errors,
222 ) else {
223 continue;
224 };
225
226 let unicode = definition.literal.unicode();
227 let ignore_case = definition.ignore_flags.ignore_case;
228 let pattern = match Pattern::compile(
229 false,
230 &pattern_source,
231 definition.literal.token().to_string(),
232 unicode,
233 ignore_case,
234 ) {
235 Ok(pattern) => pattern,
236 Err(err) => {
237 parser.err(err, definition.literal.span());
238 continue;
239 }
240 };
241
242 greedy_dotall_check(&definition, &pattern, &mut parser);
243 let default_priority = pattern.priority();
244 pats.push(
245 Leaf::new(definition.literal.span(), pattern)
246 .variant_kind(var_kind.clone())
247 .priority(definition.priority.unwrap_or(default_priority))
248 .callback(definition.callback),
249 );
250 }
251 _ => (),
252 }
253 }
254 }
255
256 debug!("Parsing additional options (extras, utf8, ...)");
257
258 let ErrorType {
259 ty: error_type,
260 callback: error_callback,
261 } = parser.error_type.take().unwrap_or_default();
262 let extras = parser.extras.take();
263 let non_utf8_pats = pats
264 .iter()
265 .filter(|leaf| !leaf.pattern.hir().properties().is_utf8())
266 .collect::<Vec<_>>();
267 if utf8_mode && !non_utf8_pats.is_empty() {
268 for leaf in non_utf8_pats {
270 parser.err(format!(concat!(
271 "UTF-8 mode is requested, but the pattern {} of variant `{}` can match invalid utf8.\n",
272 "You can disable UTF-8 mode with #[logos(utf8 = false)]"
273 ), leaf.pattern.source(), leaf.kind), leaf.span);
274 }
275 };
276
277 let source = match utf8_mode {
278 true => quote!(str),
279 false => quote!([u8]),
280 };
281 let logos_path = parser
282 .logos_path
283 .take()
284 .unwrap_or_else(|| parse_quote!(::logos));
285
286 let generics = parser.generics();
287 let this = quote!(#name #generics);
288
289 let impl_logos = |body| {
290 quote! {
291 impl<'s> #logos_path::Logos<'s> for #this {
292 type Error = #error_type;
293
294 type Extras = #extras;
295
296 type Source = #source;
297
298 fn lex(lex: &mut #logos_path::Lexer<'s, Self>)
299 -> core::option::Option<core::result::Result<Self, <Self as #logos_path::Logos<'s>>::Error>> {
300 #body
301 }
302 }
303 }
304 };
305
306 if cfg!(feature = "debug") {
307 let leaves_rendered = pats
308 .iter()
309 .enumerate()
310 .map(|(leaf_id, leaf)| format!(" {}: {} (priority: {})", leaf_id, leaf, leaf.priority))
311 .collect::<Vec<_>>()
312 .join("\n");
313 debug!("Generated leaves:\n{leaves_rendered}");
314 }
315
316 debug!("Generating graph from leaves");
317
318 let graph = match Graph::new(pats, config) {
319 Ok(nfa) => nfa,
320 Err(msg) => {
321 let mut errors = Errors::default();
322 errors.err(msg, item_span);
323 return impl_logos(errors.render().unwrap());
324 }
325 };
326
327 debug!("Generated Automaton:\n{:?}", graph.dfa());
328 debug!("Generated Graph:\n{graph}");
329 debug!("Root node: {:?}", graph.root());
330
331 if cfg!(feature = "debug") {
332 if let Some(export_path) = parser.export_path.as_ref() {
333 debug!("Exporting graphs");
334 let lower_name = name.to_string().to_lowercase();
335
336 if let Err(err) = generate_graphs(export_path, &lower_name, &graph) {
337 debug!("Failed to export graphs: {err}");
338 }
339 }
340 }
341
342 debug!("Checking if any two tokens have the same priority");
343
344 for error in graph.errors() {
345 match error {
346 GraphError::Disambiguation(matching) => {
347 for leaf_id in matching {
348 let leaf = &graph.leaves()[leaf_id.0];
349 let priority = leaf.priority;
350
351 let matching = matching
352 .iter()
353 .filter(|&id| id != leaf_id)
354 .map(|match_id| format!(" {}", &graph.leaves()[match_id.0]))
355 .collect::<Vec<_>>()
356 .join("\n");
357
358 parser.err(
359 format!(
360 concat!(
361 "The pattern {} can match simultaneously with the following variants:\n",
362 "{}\n",
363 "\n",
364 "(all at the priority {})"
365 ),
366 leaf, matching, priority
367 ),
368 leaf.span,
369 );
370 }
371 }
372 GraphError::NoUniversalStart => {
373 parser.err(concat!(
374 "The state machine implementing this lexer is missing a universal start state,",
375 "which is unsupported by logos. This is most likely do to a lookbehind assertion ",
376 "at the start of the regex."
377 ), item_span);
378 }
379 GraphError::EmptyMatch(leaf_id) => {
380 parser.err(
381 format!(
382 "The pattern {} can match the empty string, which is unsupported by logos.",
383 &graph.leaves()[leaf_id.0],
384 ),
385 graph.leaves()[leaf_id.0].span,
386 );
387 }
388 }
389 }
390
391 if let Some(errors) = parser.errors.render() {
392 return impl_logos(errors);
393 }
394
395 debug!("Generating code from graph");
396
397 let config = generator::Config {
398 use_state_machine_codegen: cfg!(feature = "state_machine_codegen"),
399 };
400 let mut generator = Generator::new(config, name, &this, &graph, &error_callback);
401
402 let body = generator.generate();
403 impl_logos(quote! {
404 use #logos_path::internal::{
405 LexerInternal,
406 CallbackRetVal,
407 CallbackResult,
408 SkipRetVal,
409 SkipResult,
410 };
411 use core::result::Result as _Result;
412 use core::option::Option as _Option;
413 use #logos_path::Logos;
414
415 type _Lexer<'s> = #logos_path::Lexer<'s, #this>;
416
417 #body
418 })
419}
420
421fn greedy_dotall_check(definition: &Definition, pattern: &Pattern, parser: &mut Parser) {
422 let allow_greedy = definition.allow_greedy.unwrap_or(false);
423 if !allow_greedy && pattern.check_for_greedy_all() {
424 parser.err(
425 concat!(
426 "This pattern contains an unbounded greedy dot repetition, i.e. `.*` or `.+` ",
427 "(or a character class that is equivalent to a dot, i.e., `[^\\n]*`). ",
428 "This will cause the entirety of the input to be read for every token. ",
429 "Consider making your repetition non-greedy or changing it to a more ",
430 "specific character class. If this is the intended behavior, add ",
431 "#[regex(..., allow_greedy = true)] or",
432 "#[logos(skip(..., allow_greedy = true))]"
433 ),
434 definition.literal.span(),
435 );
436 }
437}
438
439pub fn strip_attributes(input: TokenStream) -> TokenStream {
441 let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
442
443 strip_attrs_from_vec(&mut item.attrs);
444
445 for attr in &mut item.attrs {
446 if let syn::Meta::List(meta) = &mut attr.meta {
447 if meta.path.is_ident("derive") {
448 let mut tokens =
449 std::mem::replace(&mut meta.tokens, TokenStream::new()).into_iter();
450
451 while let Some(TokenTree::Ident(ident)) = tokens.next() {
452 let punct = tokens.next();
453
454 if ident == "Logos" {
455 continue;
456 }
457
458 meta.tokens.extend([TokenTree::Ident(ident)]);
459 meta.tokens.extend(punct);
460 }
461 }
462 }
463 }
464
465 for variant in &mut item.variants {
466 strip_attrs_from_vec(&mut variant.attrs);
467 for field in &mut variant.fields {
468 strip_attrs_from_vec(&mut field.attrs);
469 }
470 }
471
472 item.to_token_stream()
473}
474
475fn strip_attrs_from_vec(attrs: &mut Vec<syn::Attribute>) {
476 attrs.retain(|attr| !is_logos_attr(attr))
477}
478
479fn is_logos_attr(attr: &syn::Attribute) -> bool {
480 attr.path().is_ident(LOGOS_ATTR)
481 || attr.path().is_ident(TOKEN_ATTR)
482 || attr.path().is_ident(REGEX_ATTR)
483}
484
485fn generate_graphs(path_str: &str, name: &str, graph: &Graph) -> Result<(), Box<dyn Error>> {
486 let path = Path::new(path_str).to_owned();
487
488 let (dot_path, mmd_path) = match path.extension().map(OsStr::to_str) {
489 Some(Some("dot")) => (Some(path), None),
490 Some(Some("mmd")) => (None, Some(path)),
491 Some(_) => {
492 return Err(String::from(
493 "Export path must end in '.dot' or '.mmd', or it must be a directory.",
494 )
495 .into())
496 }
497 None => {
498 let dot_path = path.join(format!("{name}.dot"));
499 let mmd_path = path.join(format!("{name}.mmd"));
500 (Some(dot_path), Some(mmd_path))
501 }
502 };
503
504 for (path, is_dot) in [(dot_path, true), (mmd_path, false)] {
505 let Some(path) = path else { continue };
506
507 if let Some(parent) = path.parent() {
508 std::fs::create_dir_all(parent)?;
509 }
510
511 let s = if is_dot {
512 graph.get_dot()
513 } else {
514 graph.get_mermaid()
515 }?;
516 std::fs::write(path, s)?;
517 }
518
519 Ok(())
520}