use std::collections::{HashMap, HashSet};
use std::fs::read_dir;
use std::hash::Hash;
use std::path::Path;
use convert_case::{Casing, Case};
use proc_macro::TokenStream;
use quote::quote;
use syn::parse_macro_input;
use toml::{Table, Value};
#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
enum FormatPattern {
Single(i64),
InclusiveRange(i64, i64),
Others
}
#[derive(Debug)]
enum Localization {
Simple(String),
Plural(String, HashMap<String, HashMap<String, HashMap<FormatPattern, String>>>)
}
#[derive(Debug)]
enum LocTreeNode {
Tree(HashMap<String, LocTreeNode>),
Localizations(HashMap<String, Localization>)
}
#[derive(Debug, PartialEq, Eq)]
enum ArgKind<'a> {
Format(&'a str),
Plural(&'a str, &'a str)
}
fn ident(name: &str) -> proc_macro2::Ident {
proc_macro2::Ident::new(name, proc_macro2::Span::call_site())
}
fn merge_locs(source_field: &str, dest: &mut LocTreeNode, src: LocTreeNode) {
match (dest, src) {
(LocTreeNode::Tree(dest), LocTreeNode::Tree(src)) => {
for (key, value) in src {
if let Some(existing) = dest.get_mut(&key) {
merge_locs(&format!("{}{}.", source_field, key), existing, value);
} else {
dest.insert(key, value);
}
}
},
(LocTreeNode::Localizations(dest), LocTreeNode::Localizations(src)) => {
for (key, value) in src {
if dest.contains_key(&key) {
panic!("more localizations with the same name found for field `{source_field}{key}`")
}
dest.insert(key, value);
}
},
_ => {
panic!("node `{source_field}` defined at one place as a tree node and at another place as an end localization")
}
}
}
struct LocParser {
root: HashMap<String, LocTreeNode>,
fallback_lang: Option<String>
}
impl LocParser {
pub fn new() -> Self {
Self {
root: HashMap::new(),
fallback_lang: None
}
}
pub fn set_fallback_lang(&mut self, lang: &str) {
self.fallback_lang = Some(lang.to_string());
}
fn format_plural_arg(format: &str, plural: &str) -> String {
format!("plural__{}___{}", format, plural)
}
fn extract_plural_args<'a>(source_field: &str, fmt: &'a str) -> (String, Vec<(&'a str, &'a str)>) {
#[derive(PartialEq, Eq)]
enum ParseState {
Normal,
Plural,
}
let mut state = ParseState::Normal;
let mut out = String::new();
let mut plural_args = Vec::new();
for split in fmt.split_inclusive(['{', '%', '}']) {
if let Some(split) = split.strip_suffix('%') {
state = ParseState::Plural;
out.push_str(split);
continue
}
if split.ends_with('{') {
out.push_str(split);
continue
}
if let Some(name) = split.strip_suffix('}') {
if state != ParseState::Plural {
out.push_str(split);
continue
}
let Some((format, plural)) = name.split_once('.') else {
panic!("plural argument must be in notation `format`.`plural` in field `{source_field}`")
};
plural_args.push((format, plural));
out.push_str(&Self::format_plural_arg(format, plural));
out.push('}');
state = ParseState::Normal;
} else {
out.push_str(split);
}
}
(out, plural_args)
}
fn extract_format_args(fmt: &str) -> Vec<(&str, &str)> {
#[derive(PartialEq, Eq)]
enum ParseState {
Normal,
Plural,
Format,
FormatSpecifier,
}
let mut state = ParseState::Normal;
let mut args = Vec::new();
let mut last_name = "";
for split in fmt.split_inclusive(['{', '%', ':', '}']) {
if split.ends_with('%') {
state = ParseState::Plural;
continue
}
if split.ends_with('{') {
if state != ParseState::Plural {
state = ParseState::Format;
}
continue
}
if let Some(name) = split.strip_suffix(':') {
if state == ParseState::Format {
state = ParseState::FormatSpecifier;
last_name = name;
}
continue
}
if let Some(name) = split.strip_suffix('}') {
if state == ParseState::FormatSpecifier {
args.push((last_name, name))
}
if state == ParseState::Format {
args.push((name, ""))
}
state = ParseState::Normal;
continue
}
}
args
}
fn parse_args_in_string<'a>(source_field: &str, fmt: &'a str) -> Vec<ArgKind<'a>> {
#[derive(PartialEq, Eq)]
enum ParseState {
Normal,
Plural,
Format,
}
let mut state = ParseState::Normal;
let mut args = Vec::new();
for split in fmt.split_inclusive(['{', '%', ':', '}']) {
if split.ends_with('%') {
state = ParseState::Plural;
continue
}
if split.ends_with('{') {
if state != ParseState::Plural {
state = ParseState::Format;
}
continue
}
if let Some(name) = split.strip_suffix([':', '}']) {
match state {
ParseState::Plural => {
let Some((format, plural)) = name.split_once('.') else {
panic!("plural argument must be in notation `format`.`plural` in field `{source_field}`")
};
args.push(ArgKind::Plural(format, plural))
},
ParseState::Format => args.push(ArgKind::Format(name)),
_ => {}
}
state = ParseState::Normal;
}
}
args
}
fn try_parse_end_node(source_field: &str, table: &Table) -> Option<HashMap<String, Localization>> {
if table.is_empty() { None? }
let mut map = HashMap::new();
for (lang, val) in table {
let val = match val {
Value::String(val) => Localization::Simple(val.to_string()),
Value::Array(val) => {
let mut values = val.iter();
let Some(Value::String(fmt)) = values.next() else {
panic!("first array element must be the format string in field `{source_field}`")
};
let Some(Value::Table(plural_args)) = values.next() else {
panic!("second array element must be a table of plural arguments in field `{source_field}`")
};
if values.len() > 0 {
panic!("array has excess elements in field `{source_field}`")
}
let args = Self::parse_args_in_string(source_field, fmt);
let mut root = HashMap::new();
for plural in &args {
let ArgKind::Plural(format, plural) = plural else { continue };
let format_root = root.entry(format.to_string()).or_insert(HashMap::new());
let Some(plural_table) = plural_args.get(*format) else { continue };
let Some(plural_table) = plural_table.get(*plural) else { continue };
let Value::Table(plural_table) = plural_table else {
panic!("plural arguments table is not a table in field `{source_field}.(plurals).{format}.{plural}`")
};
let mut plural_map = HashMap::new();
for (key, value) in plural_table {
let Value::String(value) = value else {
panic!("plural argument value must be a string in field `{source_field}.(plurals).{format}.{plural}.{key}`")
};
let pattern = if key == "_" {
FormatPattern::Others
} else if let Ok(val) = key.parse::<i64>() {
FormatPattern::Single(val)
} else if let Some((start, end)) = key.split_once("..=")
&& let Ok(start) = start.parse::<i64>()
&& let Ok(end) = end.parse::<i64>()
{
if start > end {
panic!("plural argument key range must be ascending in field `{source_field}.(plurals).{format}.{plural}.{key}`")
}
FormatPattern::InclusiveRange(start, end)
} else {
panic!("plural argument key may be a single value, inclusive range or \"_\" in field `{source_field}.(plurals).{format}.{plural}.{key}`")
};
plural_map.insert(pattern, value.to_string());
}
format_root.insert(plural.to_string(), plural_map);
}
Localization::Plural(
fmt.to_string(),
root
)
},
_ => None?
};
map.insert(lang.to_string(), val);
}
Some(map)
}
fn iter_tree(source_field: &str, src: &Value) -> LocTreeNode {
let Value::Table(table) = src else {
panic!("expected a table on field `{source_field}`")
};
if let Some(end) = Self::try_parse_end_node(source_field, table) {
return LocTreeNode::Localizations(end)
}
let mut map = HashMap::new();
for (key, value) in table {
map.insert(key.to_string(), Self::iter_tree(&format!("{}.{}", source_field, key), value));
}
LocTreeNode::Tree(map)
}
pub fn parse(&mut self, src: toml::Table) {
let root_node = Self::iter_tree("root", &toml::Value::Table(src));
let LocTreeNode::Tree(tree) = root_node else {
panic!("root node must be a tree")
};
for (key, value) in tree.into_iter() {
if let Some(existing) = self.root.get_mut(&key) {
merge_locs(&key, existing, value);
} else {
self.root.insert(key, value);
}
}
}
fn output_locs(&self, source_field: &str, struct_name: &str, fn_name: &str, langs: &HashMap<String, Localization>, named: bool) -> proc_macro2::TokenStream {
let struct_ident = ident(struct_name);
let fn_ident = ident(fn_name);
let static_fn_ident = ident(&format!("{}_static", fn_name));
let mut languages = Vec::new();
let mut locs = Vec::new();
let mut args: Option<Vec<_>> = None;
let mut plural_format_args = HashSet::new();
for (lang, loc) in langs {
languages.push(lang);
let (format, plurals) = match loc {
Localization::Simple(format) => (format, None),
Localization::Plural(format, plurals) => (format, Some(plurals))
};
let mut format_args = Self::extract_format_args(format);
format_args.sort();
if format_args.is_empty() {
locs.push(quote! { #format });
} else {
let (format, requested_plurals) = Self::extract_plural_args(source_field, format);
let mut plural_inits = Vec::new();
for (format, plural) in &requested_plurals {
let plural_ranges = plurals
.unwrap_or_else(|| panic!("no plural arguments provided for plural `{format}.{plural}` in field `{source_field}"))
.get(&format.to_string())
.unwrap_or_else(|| panic!("could not find plural arg `{format}.{plural}` in field `{source_field}"))
.get(&plural.to_string())
.unwrap_or_else(|| panic!("could not find plural arg `{format}.{plural}` in field `{source_field}"));
let mut plural_ranges = plural_ranges.iter()
.collect::<Vec<_>>();
let mut has_others = false;
plural_ranges.sort();
let matches = plural_ranges.iter()
.map(|(pat, val)| match pat {
FormatPattern::Single(num) => quote! { #num => #val },
FormatPattern::InclusiveRange(start, end) => quote! { #start..=#end => #val },
FormatPattern::Others => {
has_others = true;
quote! { _ => #val }
},
})
.collect::<Vec<_>>();
if !has_others {
panic!("missing \"_\" pattern in plural arg `{format}.{plural}` in field `{source_field}`")
}
plural_format_args.insert(format.to_string());
let plural_var_name = ident(&Self::format_plural_arg(format, plural));
let format_var_name = ident(format);
plural_inits.push(quote! {
let #plural_var_name = match #format_var_name.as_range_index() {
#(#matches),*
};
})
}
locs.push(quote! { {
#(#plural_inits)*
format!(#format)
} });
}
if let Some(old_args) = &args {
if old_args != &format_args {
panic!("arguments mismatch between individual languages in field `{source_field}`")
}
} else {
args = Some(format_args);
}
}
let args = args.unwrap_or_else(|| panic!("no languages provided in field `{source_field}`"));
let arg_traits = args.iter().map(|(name, fmt)| {
let base_trait = match fmt.chars().last() {
Some('?') => quote! { std::fmt::Debug },
Some('b') => quote! { std::fmt::Binary },
Some('e') => quote! { std::fmt::LowerExp },
Some('E') => quote! { std::fmt::UpperExp },
Some('x') => quote! { std::fmt::LowerHex },
Some('X') => quote! { std::fmt::UpperHex },
Some('o') => quote! { std::fmt::Octal },
Some('p') => quote! { std::fmt::Pointer },
Some(_) | None => quote! { std::fmt::Display },
};
let name = name.to_string();
if plural_format_args.contains(&name) {
quote! { #base_trait + static_i18n::LocRangeIndex }
} else {
quote! { #base_trait }
}
}).collect::<Vec<_>>();
let (mut fallback_value, return_type, static_return_type) = if args.is_empty() {
(
quote! { concat!("LOCALE NOT FOUND FOR ", #source_field) },
quote! { &str },
quote! { &'static str }
)
} else {
(
quote! { concat!("LOCALE NOT FOUND FOR ", #source_field).to_string() },
quote! { String },
quote! { String }
)
};
let args = args.iter().map(|(x, _)| ident(x)).collect::<Vec<_>>();
if let Some(fallback_lang) = &self.fallback_lang {
let Some(idx) = languages.iter().position(|x| x == &fallback_lang) else {
panic!("fallback language `{fallback_lang}` missing in field `{source_field}`")
};
languages.remove(idx);
fallback_value = locs.remove(idx);
}
if named {
let arg_struct = ident(&format!("Args{}{}", struct_name, fn_name.to_case(Case::Pascal)));
let arg_generics = args.iter().map(|x| ident(&x.to_string().to_uppercase())).collect::<Vec<_>>();
quote! {
struct #arg_struct<#(#arg_generics: #arg_traits),*> {
#(#args: #arg_generics),*
}
impl #struct_ident {
pub fn #fn_ident<#(#arg_generics: #arg_traits),*>(&self, args: #arg_struct<#(#arg_generics),*>) -> #return_type {
Self::#static_fn_ident(args)
}
pub fn #static_fn_ident<#(#arg_generics: #arg_traits),*>(args: #arg_struct<#(#arg_generics),*>) -> #static_return_type {
let #arg_struct { #(#args),* } = args;
let locale = static_i18n::get_language_code();
match locale.as_str() {
#(#languages => #locs,)*
_ => #fallback_value
}
}
}
}
} else {
quote! {
impl #struct_ident {
pub fn #fn_ident(&self, #(#args: impl #arg_traits),*) -> #return_type {
Self::#static_fn_ident(#(#args),*)
}
pub fn #static_fn_ident(#(#args: impl #arg_traits),*) -> #static_return_type {
let locale = static_i18n::get_language_code();
match locale.as_str() {
#(#languages => #locs,)*
_ => #fallback_value
}
}
}
}
}
}
fn output_iter_tree(&self, source_field: &str, prefix: &str, tree: &HashMap<String, LocTreeNode>, named: bool) -> proc_macro2::TokenStream {
let struct_name = proc_macro2::Ident::new(prefix, proc_macro2::Span::call_site());
let mut fields = Vec::new();
let other_structs = tree.iter().map(|(key, value)| {
let next_prefix = format!("{}{}", prefix, key.to_case(Case::Pascal));
let field = ident(key);
let next_name = ident(&next_prefix);
match value {
LocTreeNode::Tree(tree) => {
fields.push(quote! { pub #field: #next_name });
self.output_iter_tree(&format!("{}.{}", source_field, key), &next_prefix, tree, named)
},
LocTreeNode::Localizations(locs) => self.output_locs(&format!("{}.{}", source_field, key), prefix, key, locs, named)
}
}).collect::<Vec<_>>();
quote! {
#(#other_structs)*
#[derive(Clone, Copy)]
pub struct #struct_name {
#(#fields),*
}
}
}
pub fn generate_token_stream(&self) -> TokenStream {
let structs = self.output_iter_tree("root", "Loc", &self.root, false);
let named_structs = self.output_iter_tree("root", "NamedLoc", &self.root, true);
quote! {
#structs
#named_structs
pub static loc: Loc = unsafe { std::mem::zeroed() }; pub static loc_named: NamedLoc = unsafe { std::mem::zeroed() }; }.into()
}
}
#[derive(Debug, darling::FromMeta)]
#[darling(derive_syn_parse)]
struct LocSettings {
path: String,
fallback_lang: Option<String>
}
fn parse_source_files(parser: &mut LocParser, prefixes: &[String], path: &Path) {
if path.is_dir() {
for entry in read_dir(path).unwrap() {
let entry = entry.unwrap();
let path = entry.path();
parse_source_files(
parser,
&[prefixes, &[path.with_extension("").file_name().unwrap().to_string_lossy().to_string()]].concat(),
&path
);
}
return
}
let contents = std::fs::read_to_string(path).unwrap();
let mut table: toml::Table = toml::from_str(&contents).unwrap();
for prefix in prefixes.iter().rev() {
if prefix != "mod" {
let mut inner = toml::Table::new();
inner.insert(prefix.to_string(), toml::Value::Table(table));
table = inner;
}
}
parser.parse(table);
}
#[proc_macro]
pub fn init(params: TokenStream) -> TokenStream {
let settings = parse_macro_input!(params as LocSettings);
let mut parser = LocParser::new();
if let Some(fallback_lang) = settings.fallback_lang {
parser.set_fallback_lang(&fallback_lang);
}
parse_source_files(&mut parser, &[], Path::new(&settings.path));
parser.generate_token_stream()
}