use anyhow::{anyhow, Result};
use regex::Regex;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::BTreeMap;
use std::sync::LazyLock;
static PLACEHOLDER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{\{\s*(\w+)\s*\}\}").unwrap());
fn normalize(s: &str) -> String {
PLACEHOLDER.replace_all(s, "{{${1}}}").into_owned()
}
#[derive(Serialize, Deserialize, Debug)]
pub struct Ir {
pub canonical: String,
pub locales: Vec<String>,
pub messages: Vec<Message>,
pub plural_rules: BTreeMap<String, PluralTable>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct PluralTable {
pub categories: Vec<String>,
pub small: Vec<String>,
pub modulo: Vec<String>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct Message {
pub path: Vec<String>,
pub params: Vec<Param>,
pub kind: Kind,
pub values: BTreeMap<String, MessageValue>,
}
impl Message {
pub fn dotted(&self) -> String {
self.path.join(".")
}
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Param {
pub name: String,
#[serde(rename = "type")]
pub ty: ParamType,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum ParamType {
String,
Number,
}
#[derive(Serialize, Deserialize, Debug, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum Kind {
Plain,
Plural,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(untagged)]
pub enum MessageValue {
Plain(String),
Plural(BTreeMap<String, String>),
}
pub fn build_ir(canonical: &str, locales: &BTreeMap<String, Value>) -> Result<Ir> {
let canon = locales
.get(canonical)
.ok_or_else(|| anyhow!("canonical locale '{}' not found", canonical))?;
let mut messages = Vec::new();
let mut path = Vec::new();
walk(canon, &mut path, locales, canonical, &mut messages)?;
let mut plural_rules = BTreeMap::new();
for loc in locales.keys() {
plural_rules.insert(loc.clone(), crate::plural::build_plural_table(loc)?);
}
Ok(Ir {
canonical: canonical.to_string(),
locales: locales.keys().cloned().collect(),
messages,
plural_rules,
})
}
fn walk(
node: &Value,
path: &mut Vec<String>,
locales: &BTreeMap<String, Value>,
canonical: &str,
out: &mut Vec<Message>,
) -> Result<()> {
let obj = node
.as_object()
.ok_or_else(|| anyhow!("expected object at '{}'", path.join(".")))?;
for (key, val) in obj {
path.push(key.clone());
if let Some(s) = val.as_str() {
let params = params_from(s);
let values = collect_values(path, locales, canonical, false);
out.push(Message {
path: path.clone(),
params,
kind: Kind::Plain,
values,
});
} else if let Some(forms) = val.as_object().and_then(|o| o.get("$plural")) {
let other = forms
.as_object()
.and_then(|m| m.get("other"))
.and_then(|v| v.as_str())
.unwrap_or("");
let mut params = vec![Param {
name: "count".into(),
ty: ParamType::Number,
}];
for p in params_from(other) {
if p.name != "count" {
params.push(p);
}
}
let values = collect_values(path, locales, canonical, true);
out.push(Message {
path: path.clone(),
params,
kind: Kind::Plural,
values,
});
} else if val.is_object() {
walk(val, path, locales, canonical, out)?;
}
path.pop();
}
Ok(())
}
fn params_from(s: &str) -> Vec<Param> {
let mut seen: Vec<Param> = Vec::new();
for cap in PLACEHOLDER.captures_iter(s) {
let name = cap[1].to_string();
if seen.iter().any(|p| p.name == name) {
continue;
}
let ty = if name == "count" {
ParamType::Number
} else {
ParamType::String
};
seen.push(Param { name, ty });
}
seen
}
fn lookup<'a>(root: &'a Value, path: &[String]) -> Option<&'a Value> {
let mut cur = root;
for seg in path {
cur = cur.as_object()?.get(seg)?;
}
Some(cur)
}
fn to_message_value(val: &Value, plural: bool) -> Option<MessageValue> {
if plural {
let m = val.as_object()?.get("$plural")?.as_object()?;
let forms = m
.iter()
.filter_map(|(k, v)| Some((k.clone(), normalize(v.as_str()?))))
.collect();
Some(MessageValue::Plural(forms))
} else {
Some(MessageValue::Plain(normalize(val.as_str()?)))
}
}
#[cfg(test)]
mod tests {
use super::{normalize, params_from};
#[test]
fn extracts_double_brace_params_whitespace_tolerant() {
let names: Vec<_> = params_from("Hi {{name}}, {{ count }} nearby")
.into_iter()
.map(|p| p.name)
.collect();
assert_eq!(names, vec!["name", "count"]);
}
#[test]
fn normalize_canonicalizes_spacing_and_leaves_single_braces() {
assert_eq!(normalize("Hi {{ name }}"), "Hi {{name}}");
assert_eq!(normalize("{{count}} dogs"), "{{count}} dogs");
assert_eq!(normalize("set it to {x}"), "set it to {x}");
}
}
fn collect_values(
path: &[String],
locales: &BTreeMap<String, Value>,
canonical: &str,
plural: bool,
) -> BTreeMap<String, MessageValue> {
let mut out = BTreeMap::new();
for (loc, root) in locales {
let value = lookup(root, path).and_then(|v| to_message_value(v, plural));
let resolved = match value {
Some(v) => v,
None => {
if loc != canonical {
eprintln!(
"warning: locale '{}' missing key '{}' — falling back to '{}'",
loc,
path.join("."),
canonical
);
}
lookup(&locales[canonical], path)
.and_then(|v| to_message_value(v, plural))
.unwrap_or(MessageValue::Plain(String::new()))
}
};
out.insert(loc.clone(), resolved);
}
out
}