1use std::{
7 borrow::Cow,
8 fs,
9 io::{self, Read, Write},
10 path::Path,
11 process::{Command, Stdio},
12};
13
14use cargo_metadata::{
15 MetadataCommand,
16 camino::{Utf8Path, Utf8PathBuf},
17};
18use grammar::{Grammar, GrammarType};
19use iregex::automata::{DFA, RangeSet};
20use proc_macro2::{Span, TokenStream};
21use quote::{ToTokens, quote};
22use syn::{
23 Token,
24 parse::{Parse, ParseStream},
25 punctuated::Punctuated,
26};
27
28mod grammar;
29mod token;
30
31use token::Token;
32
33const HEADER: &str = "/// This file is auto-generated by `static-automata`. Do not edit.\n";
34
35#[derive(Debug, Default)]
37pub struct Options {
38 pub dry_run: bool,
40}
41
42pub fn build_automata() {
49 let options = Options::default();
50 build_automata_with(&options)
51}
52
53pub fn build_automata_with(options: &Options) {
60 if let Err(e) = try_build_automata_with(options) {
61 let _ = e.try_print();
62 }
63}
64
65pub fn try_build_automata() -> Result<(), Error> {
72 let options = Options::default();
73 try_build_automata_with(&options)
74}
75
76pub fn try_build_automata_with(options: &Options) -> Result<(), Error> {
83 let metadata = MetadataCommand::new().exec()?;
84
85 let package = metadata.root_package().ok_or(Error::NoRootPackage)?;
86
87 for target in &package.targets {
88 scan_target(&options, target)?;
89 }
90
91 Ok(())
92}
93
94#[derive(Debug, thiserror::Error)]
96pub enum Error {
97 #[error(transparent)]
98 Metadata(#[from] cargo_metadata::Error),
99
100 #[error("no root package")]
101 NoRootPackage,
102
103 #[error("invalid identifier `{0}`")]
104 InvalidIdent(String, Span),
105
106 #[error(transparent)]
107 Io(#[from] io::Error),
108
109 #[error(transparent)]
110 Syntax(#[from] syn::Error),
111
112 #[error(transparent)]
113 Grammar(#[from] grammar::GrammarError),
114
115 #[error(transparent)]
116 Term(#[from] term::Error),
117}
118
119impl Error {
120 fn try_print(self) -> Result<(), term::Error> {
121 if let Some(mut t) = term::stdout() {
122 t.fg(term::color::RED)?;
123 t.attr(term::Attr::Bold)?;
124 write!(t, " Error ")?;
125 t.reset()?;
126 writeln!(t, "{self}")?;
127 }
128
129 Ok(())
130 }
131}
132
133fn scan_target(options: &Options, target: &cargo_metadata::Target) -> Result<(), Error> {
134 scan_file(options, &[], &target.src_path, target.is_example())
135}
136
137fn scan_file(
138 options: &Options,
139 mod_path: &[syn::Ident],
140 filepath: &Utf8Path,
141 prefer_sub_dir: bool,
142) -> Result<(), Error> {
143 let content = fs::read_to_string(filepath)?;
144 let module: syn::File = syn::parse_str(&content)?;
145 let mut dir = filepath.to_owned();
146 dir.pop();
147 scan_items(options, mod_path, &module.items, &dir, prefer_sub_dir)
148}
149
150fn scan_items(
151 options: &Options,
152 mod_path: &[syn::Ident],
153 items: &[syn::Item],
154 dir: &Utf8Path,
155 prefer_sub_dir: bool,
156) -> Result<(), Error> {
157 for item in items {
158 if let syn::Item::Mod(m) = item {
159 let mut sub_mod_path = mod_path.to_vec();
160 sub_mod_path.push(m.ident.clone());
161
162 let mut sub_dir = dir.to_owned();
163 sub_dir.push(m.ident.to_string());
164
165 match &m.content {
166 Some((_, sub_items)) if !sub_items.is_empty() => {
167 scan_items(options, &sub_mod_path, sub_items, &sub_dir, false)?;
168 }
169 _ => {
170 let attributes = ModuleAttributes::parse(&m.attrs)?;
171 let filepath = submodule_path(dir, &m.ident);
172
173 match attributes.grammar {
174 Some(attrs) => {
175 let filepath = filepath.unwrap_or_else(|| {
176 default_submodule_path(dir, &m.ident, prefer_sub_dir)
177 });
178 let grammar = attrs.load(dir)?;
179
180 if let Some(mut t) = term::stdout() {
181 t.fg(term::color::GREEN)?;
182 t.attr(term::Attr::Bold)?;
183 write!(t, " Building ")?;
184 t.reset()?;
185 writeln!(t, "grammar ({filepath})")?;
186 }
187
188 let tokens = build_grammars(&grammar, &attrs.exports)?;
189 write_grammar(options, tokens, filepath)?;
190 }
191 None => {
192 if let Some(filepath) = filepath {
193 scan_file(options, &sub_mod_path, &filepath, false)?;
194 }
195 }
196 }
197 }
198 }
199 }
200 }
201 Ok(())
202}
203
204fn submodule_path(parent_path: &Utf8Path, ident: &syn::Ident) -> Option<Utf8PathBuf> {
205 let mut candidate = parent_path.to_owned();
206 candidate.push(format!("{ident}.rs"));
207
208 if candidate.exists() {
209 Some(candidate.clone())
210 } else {
211 let mut candidate = parent_path.to_owned();
212 candidate.push(ident.to_string());
213 candidate.push("mod.rs");
214
215 if candidate.exists() {
216 Some(candidate)
217 } else {
218 None
219 }
220 }
221}
222
223fn default_submodule_path(
224 parent_path: &Utf8Path,
225 ident: &syn::Ident,
226 prefer_sub_dir: bool,
227) -> Utf8PathBuf {
228 let mut result = parent_path.to_owned();
229
230 if prefer_sub_dir {
231 result.push(ident.to_string());
232 result.push("mod.rs");
233 } else {
234 result.push(format!("{ident}.rs"));
235 }
236
237 result
238}
239
240fn build_grammars(grammar: &str, exports: &[Export]) -> Result<TokenStream, Error> {
241 let mut result = TokenStream::new();
242
243 for e in exports {
244 result.extend(build_grammar(grammar, e)?);
245 }
246
247 Ok(result)
248}
249
250fn name_to_ident(name: &str, span: Span) -> Result<syn::Ident, Error> {
251 let mut up = true;
252
253 let mut string = String::new();
254 for c in name.chars() {
255 if c.is_control() || c.is_whitespace() || c.is_ascii_punctuation() {
256 up = true;
257 } else {
258 let c = if up {
259 up = false;
260 c.to_uppercase().next().unwrap()
261 } else {
262 c.to_lowercase().next().unwrap()
263 };
264
265 string.push(c);
266 }
267 }
268
269 syn::parse_str(&string).map_err(|_| Error::InvalidIdent(name.to_owned(), span))
270}
271
272fn build_grammar(grammar: &str, entry_point: &Export) -> Result<TokenStream, Error> {
273 let entry_point_name = entry_point.name.value();
274
275 let ident = match &entry_point.ident {
276 Some(ident) => ident.clone(),
277 None => name_to_ident(&entry_point_name, entry_point.name.span())?,
278 };
279
280 let grammar = Grammar::<char>::new(GrammarType::Abnf, grammar, Some(&entry_point_name))?;
281
282 let dfa = grammar.build_automaton();
283
284 if let Some(mut t) = term::stdout() {
285 t.fg(term::color::GREEN)?;
286 t.attr(term::Attr::Bold)?;
287 write!(t, " Automaton ")?;
288 t.reset()?;
289 writeln!(t, "`{ident}` has {} states", dfa.states().len())?;
290 }
291
292 let methods = generate_automaton_methods(&dfa);
293
294 Ok(quote! {
295 pub struct #ident {
296 state: u32
297 }
298
299 impl #ident {
300 #methods
301
302 pub const fn validate_str(s: &str) -> bool {
303 Self::validate_bytes(s.as_bytes())
304 }
305
306 pub const fn validate_bytes(bytes: &[u8]) -> bool {
307 let mut i = 0;
308
309 let mut automaton = Self::new();
310
311 while i < bytes.len() {
312 match ::static_automata::decode_utf8_char(bytes, i) {
313 Ok((c, len)) => {
314 if !automaton.push(c) {
315 return false
316 }
317
318 i += len;
319 }
320 Err(_) => {
321 return false
322 }
323 }
324 }
325
326 automaton.is_accepting()
327 }
328 }
329 })
330}
331
332fn write_grammar(options: &Options, tokens: TokenStream, path: impl AsRef<Path>) -> io::Result<()> {
333 if options.dry_run {
334 return Ok(());
335 }
336
337 let path = path.as_ref();
338
339 if let Some(parent) = path.parent() {
340 fs::create_dir_all(parent)?;
341 }
342
343 let child = Command::new("rustfmt")
344 .arg("--emit")
345 .arg("stdout")
346 .stdin(Stdio::piped())
347 .stdout(Stdio::piped())
348 .spawn()?;
349
350 let mut rustfmt_in = child.stdin.unwrap();
351 rustfmt_in.write_all(tokens.to_string().as_bytes())?;
352
353 std::mem::drop(rustfmt_in);
354
355 let mut rustfmt_in = child.stdout.unwrap();
356 let mut buffer = [0u8; 1024];
357
358 let mut file = fs::File::create(path)?;
359 file.write_all(HEADER.as_bytes())?;
360
361 loop {
362 let len = rustfmt_in.read(&mut buffer)?;
363 if len == 0 {
364 break;
365 }
366
367 file.write_all(&buffer[..len])?;
368 }
369
370 Ok(())
371}
372
373fn generate_automaton_methods<T: Token>(automaton: &DFA<u32, RangeSet<T>>) -> TokenStream {
374 let token_type = T::rust_type();
375 let initial_state = *automaton.initial_state();
376 let final_states = automaton.final_states();
377
378 let states = automaton.transitions().iter().map(|(q, transitions)| {
379 let transitions = transitions.iter().map(|(set, target)| {
380 let pattern = T::rust_pattern(set);
381 quote! {
382 #pattern => #target
383 }
384 });
385
386 quote! {
387 #q => match token {
388 #(#transitions,)*
389 _ => return false
390 }
391 }
392 });
393
394 quote! {
395 pub const fn new() -> Self {
396 Self {
397 state: #initial_state
398 }
399 }
400
401 pub const fn push(&mut self, token: #token_type) -> bool {
402 self.state = match self.state {
403 #(#states,)*
404 _ => return false
405 };
406
407 true
408 }
409
410 pub const fn is_accepting(&self) -> bool {
411 matches!(self.state, #(#final_states)|*)
412 }
413 }
414}
415
416#[derive(Default)]
418struct ModuleAttributes {
419 grammar: Option<GrammarAttributes>,
421}
422
423impl ModuleAttributes {
424 fn parse(attrs: &[syn::Attribute]) -> Result<Self, syn::Error> {
425 let mut result = Self::default();
426
427 let mut grammar: Option<String> = None;
428 let mut in_block = false;
429
430 for attr in attrs {
431 if attr.path().is_ident("doc") {
432 if let syn::Meta::NameValue(meta) = &attr.meta {
433 let syn::Expr::Lit(e) = &meta.value else {
434 continue;
435 };
436
437 let lit: syn::Lit = e.lit.clone().into();
438 let syn::Lit::Str(lit) = lit else {
439 continue;
440 };
441
442 let value = lit.value();
443 let Some(line) = value.strip_prefix(" ") else {
444 continue;
445 };
446
447 match grammar.as_mut() {
448 Some(grammar) => {
449 if let Some(lang) = line.strip_prefix("```").map(str::trim) {
450 if in_block {
451 grammar.push('\n');
452 in_block = false;
453 } else {
454 in_block = lang == "abnf";
455 }
456 } else if in_block {
457 grammar.push_str(line);
458 }
459 }
460 None => {
461 if line.starts_with("```abnf") {
462 grammar = Some(String::new());
463 in_block = true;
464 continue;
465 }
466 }
467 }
468 }
469 } else if attr.path().is_ident("grammar") {
470 match &attr.meta {
471 syn::Meta::List(meta) => {
472 let grammar_attrs = syn::parse2(meta.tokens.to_token_stream())?;
473 result.grammar.get_or_insert_default().extend(grammar_attrs);
474 }
475 _ => {
476 todo!()
477 }
478 }
479 }
480 }
481
482 if let Some(grammar) = grammar {
483 result.grammar.get_or_insert_default().content = Some(grammar);
484 }
485
486 Ok(result)
487 }
488}
489
490struct Export {
491 name: syn::LitStr,
492 ident: Option<syn::Ident>,
493}
494
495impl Parse for Export {
496 fn parse(input: ParseStream) -> syn::Result<Self> {
497 let name = input.parse()?;
498
499 let ident = if input.peek(Token![as]) {
500 let _: Token![as] = input.parse()?;
501 Some(input.parse()?)
502 } else {
503 None
504 };
505
506 Ok(Self { name, ident })
507 }
508}
509
510#[derive(Default)]
512struct GrammarAttributes {
513 content: Option<String>,
514 file: Option<Utf8PathBuf>,
515 exports: Vec<Export>,
516}
517
518impl GrammarAttributes {
519 fn add(&mut self, attr: GrammarAttribute) {
520 match attr {
521 GrammarAttribute::File(path) => self.file = Some(path),
522 GrammarAttribute::Export(export) => {
523 self.exports.extend(export);
524 }
525 }
526 }
527
528 fn extend(&mut self, other: Self) {
529 if let Some(value) = other.file {
530 self.file = Some(value)
531 }
532
533 self.exports.extend(other.exports);
534 }
535
536 fn load(&self, base: &Utf8Path) -> io::Result<Cow<'_, str>> {
537 match &self.file {
538 Some(relative_path) => {
539 let path = base.join(relative_path);
540 fs::read_to_string(path).map(Cow::Owned)
541 }
542 None => Ok(Cow::Borrowed(self.content.as_deref().unwrap_or_default())),
543 }
544 }
545}
546
547impl Parse for GrammarAttributes {
548 fn parse(input: ParseStream) -> syn::Result<Self> {
549 let attributes = Punctuated::<GrammarAttribute, Token![,]>::parse_terminated(input)?;
550 let mut result = Self::default();
551
552 for attr in attributes {
553 result.add(attr);
554 }
555
556 Ok(result)
557 }
558}
559
560enum GrammarAttribute {
561 File(Utf8PathBuf),
562 Export(Punctuated<Export, Token![,]>),
563}
564
565impl Parse for GrammarAttribute {
566 fn parse(input: ParseStream) -> syn::Result<Self> {
567 let ident: syn::Ident = input.parse()?;
568
569 if ident == "file" {
570 let _: Token![=] = input.parse()?;
571 let path: syn::LitStr = input.parse()?;
572 Ok(Self::File(path.value().into()))
573 } else if ident == "export" {
574 let content;
575 let _ = syn::parenthesized!(content in input);
576 content
577 .parse_terminated(Export::parse, Token![,])
578 .map(Self::Export)
579 } else {
580 Err(syn::parse::Error::new(
581 ident.span(),
582 "unexpected identifier",
583 ))
584 }
585 }
586}