Skip to main content

build_automata/
lib.rs

1/// This library provides functions to build Rust module files representing
2/// deterministic finite automata built from ABNF grammars.
3///
4/// Part of the [`static-automata`](https://crates.io/crates/static-automata)
5/// framework.
6use std::{
7	borrow::Cow,
8	fs,
9	io::{self, Read, Write},
10	path::Path,
11	process::{Command, Stdio},
12};
13
14use cargo_metadata::{
15	MetadataCommand,
16	camino::{Utf8Path, Utf8PathBuf},
17};
18use grammar::{Grammar, GrammarType};
19use iregex::automata::{DFA, RangeSet};
20use proc_macro2::{Span, TokenStream};
21use quote::{ToTokens, quote};
22use syn::{
23	Token,
24	parse::{Parse, ParseStream},
25	punctuated::Punctuated,
26};
27
28mod grammar;
29mod token;
30
31use token::Token;
32
33const HEADER: &str = "/// This file is auto-generated by `static-automata`. Do not edit.\n";
34
35/// Options.
36#[derive(Debug, Default)]
37pub struct Options {
38	/// Do not actually generate the module files.
39	pub dry_run: bool,
40}
41
42/// Build the automata with the default options.
43///
44/// This will scan the current Rust project and look for `#[grammar]`
45/// annotations.
46///
47/// If an error occurs, it will be printed to the error output.
48pub fn build_automata() {
49	let options = Options::default();
50	build_automata_with(&options)
51}
52
53/// Build the automata with the given options.
54///
55/// This will scan the current Rust project and look for `#[grammar]`
56/// annotations.
57///
58/// If an error occurs, it will be printed to the error output.
59pub fn build_automata_with(options: &Options) {
60	if let Err(e) = try_build_automata_with(options) {
61		let _ = e.try_print();
62	}
63}
64
65/// Tries to build the automata with the default options.
66///
67/// This will scan the current Rust project and look for `#[grammar]`
68/// annotations.
69///
70/// Returns any eventual error that occured.
71pub fn try_build_automata() -> Result<(), Error> {
72	let options = Options::default();
73	try_build_automata_with(&options)
74}
75
76/// Tries to build the automata with the given options.
77///
78/// This will scan the current Rust project and look for `#[grammar]`
79/// annotations.
80///
81/// Returns any eventual error that occured.
82pub fn try_build_automata_with(options: &Options) -> Result<(), Error> {
83	let metadata = MetadataCommand::new().exec()?;
84
85	let package = metadata.root_package().ok_or(Error::NoRootPackage)?;
86
87	for target in &package.targets {
88		scan_target(&options, target)?;
89	}
90
91	Ok(())
92}
93
94/// Automaton generation error.
95#[derive(Debug, thiserror::Error)]
96pub enum Error {
97	#[error(transparent)]
98	Metadata(#[from] cargo_metadata::Error),
99
100	#[error("no root package")]
101	NoRootPackage,
102
103	#[error("invalid identifier `{0}`")]
104	InvalidIdent(String, Span),
105
106	#[error(transparent)]
107	Io(#[from] io::Error),
108
109	#[error(transparent)]
110	Syntax(#[from] syn::Error),
111
112	#[error(transparent)]
113	Grammar(#[from] grammar::GrammarError),
114
115	#[error(transparent)]
116	Term(#[from] term::Error),
117}
118
119impl Error {
120	fn try_print(self) -> Result<(), term::Error> {
121		if let Some(mut t) = term::stdout() {
122			t.fg(term::color::RED)?;
123			t.attr(term::Attr::Bold)?;
124			write!(t, "    Error ")?;
125			t.reset()?;
126			writeln!(t, "{self}")?;
127		}
128
129		Ok(())
130	}
131}
132
133fn scan_target(options: &Options, target: &cargo_metadata::Target) -> Result<(), Error> {
134	scan_file(options, &[], &target.src_path, target.is_example())
135}
136
137fn scan_file(
138	options: &Options,
139	mod_path: &[syn::Ident],
140	filepath: &Utf8Path,
141	prefer_sub_dir: bool,
142) -> Result<(), Error> {
143	let content = fs::read_to_string(filepath)?;
144	let module: syn::File = syn::parse_str(&content)?;
145	let mut dir = filepath.to_owned();
146	dir.pop();
147	scan_items(options, mod_path, &module.items, &dir, prefer_sub_dir)
148}
149
150fn scan_items(
151	options: &Options,
152	mod_path: &[syn::Ident],
153	items: &[syn::Item],
154	dir: &Utf8Path,
155	prefer_sub_dir: bool,
156) -> Result<(), Error> {
157	for item in items {
158		if let syn::Item::Mod(m) = item {
159			let mut sub_mod_path = mod_path.to_vec();
160			sub_mod_path.push(m.ident.clone());
161
162			let mut sub_dir = dir.to_owned();
163			sub_dir.push(m.ident.to_string());
164
165			match &m.content {
166				Some((_, sub_items)) if !sub_items.is_empty() => {
167					scan_items(options, &sub_mod_path, sub_items, &sub_dir, false)?;
168				}
169				_ => {
170					let attributes = ModuleAttributes::parse(&m.attrs)?;
171					let filepath = submodule_path(dir, &m.ident);
172
173					match attributes.grammar {
174						Some(attrs) => {
175							let filepath = filepath.unwrap_or_else(|| {
176								default_submodule_path(dir, &m.ident, prefer_sub_dir)
177							});
178							let grammar = attrs.load(dir)?;
179
180							if let Some(mut t) = term::stdout() {
181								t.fg(term::color::GREEN)?;
182								t.attr(term::Attr::Bold)?;
183								write!(t, "    Building ")?;
184								t.reset()?;
185								writeln!(t, "grammar ({filepath})")?;
186							}
187
188							let tokens = build_grammars(&grammar, &attrs.exports)?;
189							write_grammar(options, tokens, filepath)?;
190						}
191						None => {
192							if let Some(filepath) = filepath {
193								scan_file(options, &sub_mod_path, &filepath, false)?;
194							}
195						}
196					}
197				}
198			}
199		}
200	}
201	Ok(())
202}
203
204fn submodule_path(parent_path: &Utf8Path, ident: &syn::Ident) -> Option<Utf8PathBuf> {
205	let mut candidate = parent_path.to_owned();
206	candidate.push(format!("{ident}.rs"));
207
208	if candidate.exists() {
209		Some(candidate.clone())
210	} else {
211		let mut candidate = parent_path.to_owned();
212		candidate.push(ident.to_string());
213		candidate.push("mod.rs");
214
215		if candidate.exists() {
216			Some(candidate)
217		} else {
218			None
219		}
220	}
221}
222
223fn default_submodule_path(
224	parent_path: &Utf8Path,
225	ident: &syn::Ident,
226	prefer_sub_dir: bool,
227) -> Utf8PathBuf {
228	let mut result = parent_path.to_owned();
229
230	if prefer_sub_dir {
231		result.push(ident.to_string());
232		result.push("mod.rs");
233	} else {
234		result.push(format!("{ident}.rs"));
235	}
236
237	result
238}
239
240fn build_grammars(grammar: &str, exports: &[Export]) -> Result<TokenStream, Error> {
241	let mut result = TokenStream::new();
242
243	for e in exports {
244		result.extend(build_grammar(grammar, e)?);
245	}
246
247	Ok(result)
248}
249
250fn name_to_ident(name: &str, span: Span) -> Result<syn::Ident, Error> {
251	let mut up = true;
252
253	let mut string = String::new();
254	for c in name.chars() {
255		if c.is_control() || c.is_whitespace() || c.is_ascii_punctuation() {
256			up = true;
257		} else {
258			let c = if up {
259				up = false;
260				c.to_uppercase().next().unwrap()
261			} else {
262				c.to_lowercase().next().unwrap()
263			};
264
265			string.push(c);
266		}
267	}
268
269	syn::parse_str(&string).map_err(|_| Error::InvalidIdent(name.to_owned(), span))
270}
271
272fn build_grammar(grammar: &str, entry_point: &Export) -> Result<TokenStream, Error> {
273	let entry_point_name = entry_point.name.value();
274
275	let ident = match &entry_point.ident {
276		Some(ident) => ident.clone(),
277		None => name_to_ident(&entry_point_name, entry_point.name.span())?,
278	};
279
280	let grammar = Grammar::<char>::new(GrammarType::Abnf, grammar, Some(&entry_point_name))?;
281
282	let dfa = grammar.build_automaton();
283
284	if let Some(mut t) = term::stdout() {
285		t.fg(term::color::GREEN)?;
286		t.attr(term::Attr::Bold)?;
287		write!(t, "   Automaton ")?;
288		t.reset()?;
289		writeln!(t, "`{ident}` has {} states", dfa.states().len())?;
290	}
291
292	let methods = generate_automaton_methods(&dfa);
293
294	Ok(quote! {
295		pub struct #ident {
296			state: u32
297		}
298
299		impl #ident {
300			#methods
301
302			pub const fn validate_str(s: &str) -> bool {
303				Self::validate_bytes(s.as_bytes())
304			}
305
306			pub const fn validate_bytes(bytes: &[u8]) -> bool {
307				let mut i = 0;
308
309				let mut automaton = Self::new();
310
311				while i < bytes.len() {
312					match ::static_automata::decode_utf8_char(bytes, i) {
313						Ok((c, len)) => {
314							if !automaton.push(c) {
315								return false
316							}
317
318							i += len;
319						}
320						Err(_) => {
321							return false
322						}
323					}
324				}
325
326				automaton.is_accepting()
327			}
328		}
329	})
330}
331
332fn write_grammar(options: &Options, tokens: TokenStream, path: impl AsRef<Path>) -> io::Result<()> {
333	if options.dry_run {
334		return Ok(());
335	}
336
337	let path = path.as_ref();
338
339	if let Some(parent) = path.parent() {
340		fs::create_dir_all(parent)?;
341	}
342
343	let child = Command::new("rustfmt")
344		.arg("--emit")
345		.arg("stdout")
346		.stdin(Stdio::piped())
347		.stdout(Stdio::piped())
348		.spawn()?;
349
350	let mut rustfmt_in = child.stdin.unwrap();
351	rustfmt_in.write_all(tokens.to_string().as_bytes())?;
352
353	std::mem::drop(rustfmt_in);
354
355	let mut rustfmt_in = child.stdout.unwrap();
356	let mut buffer = [0u8; 1024];
357
358	let mut file = fs::File::create(path)?;
359	file.write_all(HEADER.as_bytes())?;
360
361	loop {
362		let len = rustfmt_in.read(&mut buffer)?;
363		if len == 0 {
364			break;
365		}
366
367		file.write_all(&buffer[..len])?;
368	}
369
370	Ok(())
371}
372
373fn generate_automaton_methods<T: Token>(automaton: &DFA<u32, RangeSet<T>>) -> TokenStream {
374	let token_type = T::rust_type();
375	let initial_state = *automaton.initial_state();
376	let final_states = automaton.final_states();
377
378	let states = automaton.transitions().iter().map(|(q, transitions)| {
379		let transitions = transitions.iter().map(|(set, target)| {
380			let pattern = T::rust_pattern(set);
381			quote! {
382				#pattern => #target
383			}
384		});
385
386		quote! {
387			#q => match token {
388				#(#transitions,)*
389				_ => return false
390			}
391		}
392	});
393
394	quote! {
395		pub const fn new() -> Self {
396			Self {
397				state: #initial_state
398			}
399		}
400
401		pub const fn push(&mut self, token: #token_type) -> bool {
402			self.state = match self.state {
403				#(#states,)*
404				_ => return false
405			};
406
407			true
408		}
409
410		pub const fn is_accepting(&self) -> bool {
411			matches!(self.state, #(#final_states)|*)
412		}
413	}
414}
415
416/// Module attributes.
417#[derive(Default)]
418struct ModuleAttributes {
419	/// Grammar attributes.
420	grammar: Option<GrammarAttributes>,
421}
422
423impl ModuleAttributes {
424	fn parse(attrs: &[syn::Attribute]) -> Result<Self, syn::Error> {
425		let mut result = Self::default();
426
427		let mut grammar: Option<String> = None;
428		let mut in_block = false;
429
430		for attr in attrs {
431			if attr.path().is_ident("doc") {
432				if let syn::Meta::NameValue(meta) = &attr.meta {
433					let syn::Expr::Lit(e) = &meta.value else {
434						continue;
435					};
436
437					let lit: syn::Lit = e.lit.clone().into();
438					let syn::Lit::Str(lit) = lit else {
439						continue;
440					};
441
442					let value = lit.value();
443					let Some(line) = value.strip_prefix(" ") else {
444						continue;
445					};
446
447					match grammar.as_mut() {
448						Some(grammar) => {
449							if let Some(lang) = line.strip_prefix("```").map(str::trim) {
450								if in_block {
451									grammar.push('\n');
452									in_block = false;
453								} else {
454									in_block = lang == "abnf";
455								}
456							} else if in_block {
457								grammar.push_str(line);
458							}
459						}
460						None => {
461							if line.starts_with("```abnf") {
462								grammar = Some(String::new());
463								in_block = true;
464								continue;
465							}
466						}
467					}
468				}
469			} else if attr.path().is_ident("grammar") {
470				match &attr.meta {
471					syn::Meta::List(meta) => {
472						let grammar_attrs = syn::parse2(meta.tokens.to_token_stream())?;
473						result.grammar.get_or_insert_default().extend(grammar_attrs);
474					}
475					_ => {
476						todo!()
477					}
478				}
479			}
480		}
481
482		if let Some(grammar) = grammar {
483			result.grammar.get_or_insert_default().content = Some(grammar);
484		}
485
486		Ok(result)
487	}
488}
489
490struct Export {
491	name: syn::LitStr,
492	ident: Option<syn::Ident>,
493}
494
495impl Parse for Export {
496	fn parse(input: ParseStream) -> syn::Result<Self> {
497		let name = input.parse()?;
498
499		let ident = if input.peek(Token![as]) {
500			let _: Token![as] = input.parse()?;
501			Some(input.parse()?)
502		} else {
503			None
504		};
505
506		Ok(Self { name, ident })
507	}
508}
509
510/// Grammar attributes.
511#[derive(Default)]
512struct GrammarAttributes {
513	content: Option<String>,
514	file: Option<Utf8PathBuf>,
515	exports: Vec<Export>,
516}
517
518impl GrammarAttributes {
519	fn add(&mut self, attr: GrammarAttribute) {
520		match attr {
521			GrammarAttribute::File(path) => self.file = Some(path),
522			GrammarAttribute::Export(export) => {
523				self.exports.extend(export);
524			}
525		}
526	}
527
528	fn extend(&mut self, other: Self) {
529		if let Some(value) = other.file {
530			self.file = Some(value)
531		}
532
533		self.exports.extend(other.exports);
534	}
535
536	fn load(&self, base: &Utf8Path) -> io::Result<Cow<'_, str>> {
537		match &self.file {
538			Some(relative_path) => {
539				let path = base.join(relative_path);
540				fs::read_to_string(path).map(Cow::Owned)
541			}
542			None => Ok(Cow::Borrowed(self.content.as_deref().unwrap_or_default())),
543		}
544	}
545}
546
547impl Parse for GrammarAttributes {
548	fn parse(input: ParseStream) -> syn::Result<Self> {
549		let attributes = Punctuated::<GrammarAttribute, Token![,]>::parse_terminated(input)?;
550		let mut result = Self::default();
551
552		for attr in attributes {
553			result.add(attr);
554		}
555
556		Ok(result)
557	}
558}
559
560enum GrammarAttribute {
561	File(Utf8PathBuf),
562	Export(Punctuated<Export, Token![,]>),
563}
564
565impl Parse for GrammarAttribute {
566	fn parse(input: ParseStream) -> syn::Result<Self> {
567		let ident: syn::Ident = input.parse()?;
568
569		if ident == "file" {
570			let _: Token![=] = input.parse()?;
571			let path: syn::LitStr = input.parse()?;
572			Ok(Self::File(path.value().into()))
573		} else if ident == "export" {
574			let content;
575			let _ = syn::parenthesized!(content in input);
576			content
577				.parse_terminated(Export::parse, Token![,])
578				.map(Self::Export)
579		} else {
580			Err(syn::parse::Error::new(
581				ident.span(),
582				"unexpected identifier",
583			))
584		}
585	}
586}