1#![recursion_limit = "196"]
9#![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")]
10
11mod error;
12mod generator;
13#[cfg(not(feature = "fuzzing"))]
14mod graph;
15#[cfg(feature = "fuzzing")]
16pub mod graph;
17mod leaf;
18#[cfg(not(feature = "fuzzing"))]
19mod mir;
20#[cfg(feature = "fuzzing")]
21pub mod mir;
22mod parser;
23mod util;
24
25#[macro_use]
26#[allow(missing_docs)]
27mod macros;
28
29use generator::Generator;
30use graph::{DisambiguationError, Fork, Graph, Rope};
31use leaf::Leaf;
32use parser::{IgnoreFlags, Mode, Parser};
33use quote::ToTokens;
34use util::MaybeVoid;
35
36use proc_macro2::{Delimiter, TokenStream, TokenTree};
37use quote::quote;
38use syn::parse_quote;
39use syn::spanned::Spanned;
40use syn::{Fields, ItemEnum};
41
42const LOGOS_ATTR: &str = "logos";
43const ERROR_ATTR: &str = "error";
44const TOKEN_ATTR: &str = "token";
45const REGEX_ATTR: &str = "regex";
46
47pub fn generate(input: TokenStream) -> TokenStream {
49 debug!("Reading input token streams");
50
51 let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
52
53 let name = &item.ident;
54
55 let mut parser = Parser::default();
56
57 for param in item.generics.params {
58 parser.parse_generic(param);
59 }
60
61 for attr in &mut item.attrs {
62 parser.try_parse_logos(attr);
63 }
64
65 let mut ropes = Vec::new();
66 let mut regex_ids = Vec::new();
67 let mut graph = Graph::new();
68
69 {
70 let errors = &mut parser.errors;
71
72 for mut skip in parser.skips.drain(..) {
73 match skip
74 .literal
75 .to_mir(&parser.subpatterns, IgnoreFlags::Empty, errors)
76 {
77 Ok(mir) => {
78 let then = graph.push(
79 Leaf::new_skip(skip.literal.span())
80 .priority(skip.priority.take().unwrap_or_else(|| mir.priority()))
81 .callback(Some(skip.into_callback())),
82 );
83 let id = graph.regex(mir, then);
84
85 regex_ids.push(id);
86 }
87 Err(err) => {
88 errors.err(err, skip.literal.span());
89 }
90 }
91 }
92 }
93
94 debug!("Iterating through enum variants");
95
96 for variant in &mut item.variants {
97 let field = match &mut variant.fields {
98 Fields::Unit => MaybeVoid::Void,
99 Fields::Unnamed(fields) => {
100 if fields.unnamed.len() != 1 {
101 parser.err(
102 format!(
103 "Logos currently only supports variants with one field, found {}",
104 fields.unnamed.len(),
105 ),
106 fields.span(),
107 );
108 }
109
110 let ty = &mut fields
111 .unnamed
112 .first_mut()
113 .expect("Already checked len; qed")
114 .ty;
115 let ty = parser.get_type(ty);
116
117 MaybeVoid::Some(ty)
118 }
119 Fields::Named(fields) => {
120 parser.err("Logos doesn't support named fields yet.", fields.span());
121
122 MaybeVoid::Void
123 }
124 };
125
126 let var_ident = &variant.ident;
128 let leaf = move |span| Leaf::new(var_ident, span).field(field.clone());
129
130 for attr in &mut variant.attrs {
131 let attr_name = match attr.path().get_ident() {
132 Some(ident) => ident.to_string(),
133 None => continue,
134 };
135
136 match attr_name.as_str() {
137 ERROR_ATTR => {
138 parser.err(
140 "\
141 Since 0.13 Logos no longer requires the #[error] variant.\n\
142 \n\
143 For help with migration see release notes: \
144 https://github.com/maciejhirsz/logos/releases\
145 ",
146 attr.span(),
147 );
148 }
149 TOKEN_ATTR => {
150 let definition = match parser.parse_definition(attr) {
151 Some(definition) => definition,
152 None => {
153 parser.err("Expected #[token(...)]", attr.span());
154 continue;
155 }
156 };
157
158 if definition.ignore_flags.is_empty() {
159 let bytes = definition.literal.to_bytes();
160 let then = graph.push(
161 leaf(definition.literal.span())
162 .priority(definition.priority.unwrap_or(bytes.len() * 2))
163 .callback(definition.callback),
164 );
165
166 ropes.push(Rope::new(bytes, then));
167 } else {
168 let mir = definition
169 .literal
170 .escape_regex()
171 .to_mir(
172 &Default::default(),
173 definition.ignore_flags,
174 &mut parser.errors,
175 )
176 .expect("The literal should be perfectly valid regex");
177
178 let then = graph.push(
179 leaf(definition.literal.span())
180 .priority(definition.priority.unwrap_or_else(|| mir.priority()))
181 .callback(definition.callback),
182 );
183 let id = graph.regex(mir, then);
184
185 regex_ids.push(id);
186 }
187 }
188 REGEX_ATTR => {
189 let definition = match parser.parse_definition(attr) {
190 Some(definition) => definition,
191 None => {
192 parser.err("Expected #[regex(...)]", attr.span());
193 continue;
194 }
195 };
196 let mir = match definition.literal.to_mir(
197 &parser.subpatterns,
198 definition.ignore_flags,
199 &mut parser.errors,
200 ) {
201 Ok(mir) => mir,
202 Err(err) => {
203 parser.err(err, definition.literal.span());
204 continue;
205 }
206 };
207
208 let then = graph.push(
209 leaf(definition.literal.span())
210 .priority(definition.priority.unwrap_or_else(|| mir.priority()))
211 .callback(definition.callback),
212 );
213 let id = graph.regex(mir, then);
214
215 regex_ids.push(id);
216 }
217 _ => (),
218 }
219 }
220 }
221
222 let mut root = Fork::new();
223
224 debug!("Parsing additional options (extras, source, ...)");
225
226 let (error_type, error_callback) = parser::ErrorType::unwrap(parser.error_type.take());
227 let extras = parser.extras.take();
228 let source = parser
229 .source
230 .take()
231 .map(strip_wrapping_parens)
232 .unwrap_or(match parser.mode {
233 Mode::Utf8 => quote!(str),
234 Mode::Binary => quote!([u8]),
235 });
236 let logos_path = parser
237 .logos_path
238 .take()
239 .unwrap_or_else(|| parse_quote!(::logos));
240
241 let make_error_impl = match error_callback {
242 Some(leaf::Callback::Label(label)) => Some(quote! {
243 #[inline]
244 fn make_error(mut lex: &mut #logos_path::Lexer<'s, Self>) {
245 use #logos_path::{Lexer, internal::LexerInternal};
246
247 let error = #label(&mut lex);
248 lex.set(Err(error));
249 }
250 }),
251 Some(leaf::Callback::Inline(inline)) => {
252 let leaf::InlineCallback { arg, body, .. } = *inline;
253
254 Some(quote! {
255 #[inline]
256 fn make_error(#arg: &mut #logos_path::Lexer<'s, Self>) {
257 use #logos_path::internal::LexerInternal;
258
259 let error = { #body };
260 #arg.set(Err(error))
261 }
262 })
263 }
264 _ => None,
265 };
266
267 let generics = parser.generics();
268 let this = quote!(#name #generics);
269
270 let impl_logos = |body| {
271 quote! {
272 impl<'s> #logos_path::Logos<'s> for #this {
273 type Error = #error_type;
274
275 type Extras = #extras;
276
277 type Source = #source;
278
279 fn lex(lex: &mut #logos_path::Lexer<'s, Self>) {
280 #body
281 }
282
283 #make_error_impl
284 }
285 }
286 };
287
288 for id in regex_ids {
289 let fork = graph.fork_off(id);
290
291 root.merge(fork, &mut graph);
292 }
293 for rope in ropes {
294 root.merge(rope.into_fork(&mut graph), &mut graph);
295 }
296 while let Some(id) = root.miss.take() {
297 let fork = graph.fork_off(id);
298
299 if fork.branches().next().is_some() {
300 root.merge(fork, &mut graph);
301 } else {
302 break;
303 }
304 }
305
306 debug!("Checking if any two tokens have the same priority");
307
308 for &DisambiguationError(a, b) in graph.errors() {
309 let a = graph[a].unwrap_leaf();
310 let b = graph[b].unwrap_leaf();
311 let disambiguate = a.priority + 1;
312
313 let mut err = |a: &Leaf, b: &Leaf| {
314 parser.err(
315 format!(
316 "\
317 A definition of variant `{a}` can match the same input as another definition of variant `{b}`.\n\
318 \n\
319 hint: Consider giving one definition a higher priority: \
320 #[{attr}(..., priority = {disambiguate})]\
321 ",
322 attr = match a.callback {
323 Some(_) => "regex",
324 None => "skip"
325 }
326 ),
327 a.span
328 );
329 };
330
331 err(a, b);
332 err(b, a);
333 }
334
335 if let Some(errors) = parser.errors.render() {
336 return impl_logos(errors);
337 }
338
339 let root = graph.push(root);
340
341 graph.shake(root);
342
343 #[cfg(feature = "debug")]
344 {
345 debug!("Generating graphs");
346
347 if let Some(path) = parser.export_dir {
348 let path = std::path::Path::new(&path);
349 let dir = if path.extension().is_none() {
350 path
351 } else {
352 path.parent().unwrap_or(std::path::Path::new(""))
353 };
354 match std::fs::create_dir_all(dir) {
355 Ok(()) => {
356 if path.extension() == Some(std::ffi::OsStr::new("dot"))
357 || path.extension().is_none()
358 {
359 match graph.get_dot() {
360 Ok(s) => {
361 let dot_path = if path.extension().is_none() {
362 path.join(format!("{}.dot", name.to_string().to_lowercase()))
363 } else {
364 path.to_path_buf()
365 };
366 if let Err(e) = std::fs::write(dot_path, s) {
367 debug!("Error writing dot graph: {}", e);
368 }
369 }
370 Err(e) => {
371 debug!("Error generating dot graph: {}", e);
372 }
373 }
374 }
375
376 if path.extension() == Some(std::ffi::OsStr::new("mmd"))
377 || path.extension().is_none()
378 {
379 match graph.get_mermaid() {
380 Ok(s) => {
381 let mermaid_path = if path.extension().is_none() {
382 path.join(format!("{}.mmd", name.to_string().to_lowercase()))
383 } else {
384 path.to_path_buf()
385 };
386 if let Err(e) = std::fs::write(mermaid_path, s) {
387 debug!("Error writing mermaid graph: {}", e);
388 }
389 }
390 Err(e) => {
391 debug!("Error generating mermaid graph: {}", e);
392 }
393 }
394 }
395 }
396 Err(e) => {
397 debug!("Error creating graph export dir: {}", e);
398 }
399 }
400 }
401 }
402
403 debug!("Generating code from graph:\n{graph:#?}");
404
405 let generator = Generator::new(name, &this, root, &graph);
406
407 let body = generator.generate();
408 impl_logos(quote! {
409 use #logos_path::internal::{LexerInternal, CallbackResult, SkipCallbackResult};
410
411 type Lexer<'s> = #logos_path::Lexer<'s, #this>;
412
413 fn _end<'s>(lex: &mut Lexer<'s>) {
414 lex.end()
415 }
416
417 fn _error<'s>(lex: &mut Lexer<'s>) {
418 lex.bump_unchecked(1);
419
420 lex.error();
421 }
422
423 #body
424 })
425}
426
427pub fn strip_attributes(input: TokenStream) -> TokenStream {
429 let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
430
431 strip_attrs_from_vec(&mut item.attrs);
432
433 for attr in &mut item.attrs {
434 if let syn::Meta::List(meta) = &mut attr.meta {
435 if meta.path.is_ident("derive") {
436 let mut tokens =
437 std::mem::replace(&mut meta.tokens, TokenStream::new()).into_iter();
438
439 while let Some(TokenTree::Ident(ident)) = tokens.next() {
440 let punct = tokens.next();
441
442 if ident == "Logos" {
443 continue;
444 }
445
446 meta.tokens.extend([TokenTree::Ident(ident)]);
447 meta.tokens.extend(punct);
448 }
449 }
450 }
451 }
452
453 for variant in &mut item.variants {
454 strip_attrs_from_vec(&mut variant.attrs);
455 for field in &mut variant.fields {
456 strip_attrs_from_vec(&mut field.attrs);
457 }
458 }
459
460 item.to_token_stream()
461}
462
463fn strip_attrs_from_vec(attrs: &mut Vec<syn::Attribute>) {
464 attrs.retain(|attr| !is_logos_attr(attr))
465}
466
467fn is_logos_attr(attr: &syn::Attribute) -> bool {
468 attr.path().is_ident(LOGOS_ATTR)
469 || attr.path().is_ident(TOKEN_ATTR)
470 || attr.path().is_ident(REGEX_ATTR)
471}
472
473fn strip_wrapping_parens(t: TokenStream) -> TokenStream {
474 let tts: Vec<TokenTree> = t.into_iter().collect();
475
476 if tts.len() != 1 {
477 tts.into_iter().collect()
478 } else {
479 match tts.into_iter().next().unwrap() {
480 TokenTree::Group(g) => {
481 if g.delimiter() == Delimiter::Parenthesis {
482 g.stream()
483 } else {
484 core::iter::once(TokenTree::Group(g)).collect()
485 }
486 }
487 tt => core::iter::once(tt).collect(),
488 }
489 }
490}