use std::{
borrow::Cow,
fs,
io::{self, Read, Write},
path::Path,
process::{Command, Stdio},
};
use cargo_metadata::{
MetadataCommand,
camino::{Utf8Path, Utf8PathBuf},
};
use grammar::{Grammar, GrammarType};
use iregex::automata::{DFA, RangeSet};
use proc_macro2::{Span, TokenStream};
use quote::{ToTokens, quote};
use syn::{
Token,
parse::{Parse, ParseStream},
punctuated::Punctuated,
};
mod grammar;
mod token;
use token::Token;
const HEADER: &str = "/// This file is auto-generated by `static-automata`. Do not edit.\n";
#[derive(Debug, Default)]
pub struct Options {
pub dry_run: bool,
}
pub fn build_automata() {
let options = Options::default();
build_automata_with(&options)
}
pub fn build_automata_with(options: &Options) {
if let Err(e) = try_build_automata_with(options) {
let _ = e.try_print();
}
}
pub fn try_build_automata() -> Result<(), Error> {
let options = Options::default();
try_build_automata_with(&options)
}
pub fn try_build_automata_with(options: &Options) -> Result<(), Error> {
let metadata = MetadataCommand::new().exec()?;
let package = metadata.root_package().ok_or(Error::NoRootPackage)?;
for target in &package.targets {
scan_target(&options, target)?;
}
Ok(())
}
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error(transparent)]
Metadata(#[from] cargo_metadata::Error),
#[error("no root package")]
NoRootPackage,
#[error("invalid identifier `{0}`")]
InvalidIdent(String, Span),
#[error(transparent)]
Io(#[from] io::Error),
#[error(transparent)]
Syntax(#[from] syn::Error),
#[error(transparent)]
Grammar(#[from] grammar::GrammarError),
#[error(transparent)]
Term(#[from] term::Error),
}
impl Error {
fn try_print(self) -> Result<(), term::Error> {
if let Some(mut t) = term::stdout() {
t.fg(term::color::RED)?;
t.attr(term::Attr::Bold)?;
write!(t, " Error ")?;
t.reset()?;
writeln!(t, "{self}")?;
}
Ok(())
}
}
fn scan_target(options: &Options, target: &cargo_metadata::Target) -> Result<(), Error> {
scan_file(options, &[], &target.src_path, target.is_example())
}
fn scan_file(
options: &Options,
mod_path: &[syn::Ident],
filepath: &Utf8Path,
prefer_sub_dir: bool,
) -> Result<(), Error> {
let content = fs::read_to_string(filepath)?;
let module: syn::File = syn::parse_str(&content)?;
let mut dir = filepath.to_owned();
dir.pop();
scan_items(options, mod_path, &module.items, &dir, prefer_sub_dir)
}
fn scan_items(
options: &Options,
mod_path: &[syn::Ident],
items: &[syn::Item],
dir: &Utf8Path,
prefer_sub_dir: bool,
) -> Result<(), Error> {
for item in items {
if let syn::Item::Mod(m) = item {
let mut sub_mod_path = mod_path.to_vec();
sub_mod_path.push(m.ident.clone());
let mut sub_dir = dir.to_owned();
sub_dir.push(m.ident.to_string());
match &m.content {
Some((_, sub_items)) if !sub_items.is_empty() => {
scan_items(options, &sub_mod_path, sub_items, &sub_dir, false)?;
}
_ => {
let attributes = ModuleAttributes::parse(&m.attrs)?;
let filepath = submodule_path(dir, &m.ident);
match attributes.grammar {
Some(attrs) => {
let filepath = filepath.unwrap_or_else(|| {
default_submodule_path(dir, &m.ident, prefer_sub_dir)
});
let grammar = attrs.load(dir)?;
if let Some(mut t) = term::stdout() {
t.fg(term::color::GREEN)?;
t.attr(term::Attr::Bold)?;
write!(t, " Building ")?;
t.reset()?;
writeln!(t, "grammar ({filepath})")?;
}
let tokens = build_grammars(&grammar, &attrs.exports)?;
write_grammar(options, tokens, filepath)?;
}
None => {
if let Some(filepath) = filepath {
scan_file(options, &sub_mod_path, &filepath, false)?;
}
}
}
}
}
}
}
Ok(())
}
fn submodule_path(parent_path: &Utf8Path, ident: &syn::Ident) -> Option<Utf8PathBuf> {
let mut candidate = parent_path.to_owned();
candidate.push(format!("{ident}.rs"));
if candidate.exists() {
Some(candidate.clone())
} else {
let mut candidate = parent_path.to_owned();
candidate.push(ident.to_string());
candidate.push("mod.rs");
if candidate.exists() {
Some(candidate)
} else {
None
}
}
}
fn default_submodule_path(
parent_path: &Utf8Path,
ident: &syn::Ident,
prefer_sub_dir: bool,
) -> Utf8PathBuf {
let mut result = parent_path.to_owned();
if prefer_sub_dir {
result.push(ident.to_string());
result.push("mod.rs");
} else {
result.push(format!("{ident}.rs"));
}
result
}
fn build_grammars(grammar: &str, exports: &[Export]) -> Result<TokenStream, Error> {
let mut result = TokenStream::new();
for e in exports {
result.extend(build_grammar(grammar, e)?);
}
Ok(result)
}
fn name_to_ident(name: &str, span: Span) -> Result<syn::Ident, Error> {
let mut up = true;
let mut string = String::new();
for c in name.chars() {
if c.is_control() || c.is_whitespace() || c.is_ascii_punctuation() {
up = true;
} else {
let c = if up {
up = false;
c.to_uppercase().next().unwrap()
} else {
c.to_lowercase().next().unwrap()
};
string.push(c);
}
}
syn::parse_str(&string).map_err(|_| Error::InvalidIdent(name.to_owned(), span))
}
fn build_grammar(grammar: &str, entry_point: &Export) -> Result<TokenStream, Error> {
let entry_point_name = entry_point.name.value();
let ident = match &entry_point.ident {
Some(ident) => ident.clone(),
None => name_to_ident(&entry_point_name, entry_point.name.span())?,
};
let grammar = Grammar::<char>::new(GrammarType::Abnf, grammar, Some(&entry_point_name))?;
let dfa = grammar.build_automaton();
if let Some(mut t) = term::stdout() {
t.fg(term::color::GREEN)?;
t.attr(term::Attr::Bold)?;
write!(t, " Automaton ")?;
t.reset()?;
writeln!(t, "`{ident}` has {} states", dfa.states().len())?;
}
let methods = generate_automaton_methods(&dfa);
Ok(quote! {
pub struct #ident {
state: u32
}
impl #ident {
#methods
pub const fn validate_str(s: &str) -> bool {
Self::validate_bytes(s.as_bytes())
}
pub const fn validate_bytes(bytes: &[u8]) -> bool {
let mut i = 0;
let mut automaton = Self::new();
while i < bytes.len() {
match ::static_automata::decode_utf8_char(bytes, i) {
Ok((c, len)) => {
if !automaton.push(c) {
return false
}
i += len;
}
Err(_) => {
return false
}
}
}
automaton.is_accepting()
}
}
})
}
fn write_grammar(options: &Options, tokens: TokenStream, path: impl AsRef<Path>) -> io::Result<()> {
if options.dry_run {
return Ok(());
}
let path = path.as_ref();
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?;
}
let child = Command::new("rustfmt")
.arg("--emit")
.arg("stdout")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()?;
let mut rustfmt_in = child.stdin.unwrap();
rustfmt_in.write_all(tokens.to_string().as_bytes())?;
std::mem::drop(rustfmt_in);
let mut rustfmt_in = child.stdout.unwrap();
let mut buffer = [0u8; 1024];
let mut file = fs::File::create(path)?;
file.write_all(HEADER.as_bytes())?;
loop {
let len = rustfmt_in.read(&mut buffer)?;
if len == 0 {
break;
}
file.write_all(&buffer[..len])?;
}
Ok(())
}
fn generate_automaton_methods<T: Token>(automaton: &DFA<u32, RangeSet<T>>) -> TokenStream {
let token_type = T::rust_type();
let initial_state = *automaton.initial_state();
let final_states = automaton.final_states();
let states = automaton.transitions().iter().map(|(q, transitions)| {
let transitions = transitions.iter().map(|(set, target)| {
let pattern = T::rust_pattern(set);
quote! {
#pattern => #target
}
});
quote! {
#q => match token {
#(#transitions,)*
_ => return false
}
}
});
quote! {
pub const fn new() -> Self {
Self {
state: #initial_state
}
}
pub const fn push(&mut self, token: #token_type) -> bool {
self.state = match self.state {
#(#states,)*
_ => unreachable!()
};
true
}
pub const fn is_accepting(&self) -> bool {
matches!(self.state, #(#final_states)|*)
}
}
}
#[derive(Default)]
struct ModuleAttributes {
grammar: Option<GrammarAttributes>,
}
impl ModuleAttributes {
fn parse(attrs: &[syn::Attribute]) -> Result<Self, syn::Error> {
let mut result = Self::default();
let mut grammar: Option<String> = None;
let mut in_block = false;
for attr in attrs {
if attr.path().is_ident("doc") {
if let syn::Meta::NameValue(meta) = &attr.meta {
let syn::Expr::Lit(e) = &meta.value else {
continue;
};
let lit: syn::Lit = e.lit.clone().into();
let syn::Lit::Str(lit) = lit else {
continue;
};
let value = lit.value();
let Some(line) = value.strip_prefix(" ") else {
continue;
};
match grammar.as_mut() {
Some(grammar) => {
if let Some(lang) = line.strip_prefix("```").map(str::trim) {
if in_block {
grammar.push('\n');
in_block = false;
} else {
in_block = lang == "abnf";
}
} else if in_block {
grammar.push_str(line);
}
}
None => {
if line.starts_with("```abnf") {
grammar = Some(String::new());
in_block = true;
continue;
}
}
}
}
} else if attr.path().is_ident("grammar") {
match &attr.meta {
syn::Meta::List(meta) => {
let grammar_attrs = syn::parse2(meta.tokens.to_token_stream())?;
result.grammar.get_or_insert_default().extend(grammar_attrs);
}
_ => {
todo!()
}
}
}
}
if let Some(grammar) = grammar {
result.grammar.get_or_insert_default().content = Some(grammar);
}
Ok(result)
}
}
struct Export {
name: syn::LitStr,
ident: Option<syn::Ident>,
}
impl Parse for Export {
fn parse(input: ParseStream) -> syn::Result<Self> {
let name = input.parse()?;
let ident = if input.peek(Token![as]) {
let _: Token![as] = input.parse()?;
Some(input.parse()?)
} else {
None
};
Ok(Self { name, ident })
}
}
#[derive(Default)]
struct GrammarAttributes {
content: Option<String>,
file: Option<Utf8PathBuf>,
exports: Vec<Export>,
}
impl GrammarAttributes {
fn add(&mut self, attr: GrammarAttribute) {
match attr {
GrammarAttribute::File(path) => self.file = Some(path),
GrammarAttribute::Export(export) => {
self.exports.extend(export);
}
}
}
fn extend(&mut self, other: Self) {
if let Some(value) = other.file {
self.file = Some(value)
}
self.exports.extend(other.exports);
}
fn load(&self, base: &Utf8Path) -> io::Result<Cow<'_, str>> {
match &self.file {
Some(relative_path) => {
let path = base.join(relative_path);
fs::read_to_string(path).map(Cow::Owned)
}
None => Ok(Cow::Borrowed(self.content.as_deref().unwrap_or_default())),
}
}
}
impl Parse for GrammarAttributes {
fn parse(input: ParseStream) -> syn::Result<Self> {
let attributes = Punctuated::<GrammarAttribute, Token![,]>::parse_terminated(input)?;
let mut result = Self::default();
for attr in attributes {
result.add(attr);
}
Ok(result)
}
}
enum GrammarAttribute {
File(Utf8PathBuf),
Export(Punctuated<Export, Token![,]>),
}
impl Parse for GrammarAttribute {
fn parse(input: ParseStream) -> syn::Result<Self> {
let ident: syn::Ident = input.parse()?;
if ident == "file" {
let _: Token![=] = input.parse()?;
let path: syn::LitStr = input.parse()?;
Ok(Self::File(path.value().into()))
} else if ident == "export" {
let content;
let _ = syn::parenthesized!(content in input);
content
.parse_terminated(Export::parse, Token![,])
.map(Self::Export)
} else {
Err(syn::parse::Error::new(
ident.span(),
"unexpected identifier",
))
}
}
}