i18n-audit 0.1.0

Rust i18n audit library and CLI for scanning translation usage, missing keys, and unused keys
Documentation
use crate::loaders::TranslationRepositoryLoader;
use crate::report::{LocaleStats, Report, ReportHtmlRenderer, ReportMeta, ReportStats};
use crate::scanner::RustSourceScanner;
use chrono::Utc;
use serde_json::Value;
use std::collections::BTreeMap;
use std::fs;
use std::path::Path;

#[derive(Debug, Clone)]
pub struct AuditOptions {
    pub root_path: String,
    pub locales: Vec<String>,
    pub paths: Vec<String>,
    pub exclude: Vec<String>,
    pub format: String,
    pub output: Option<String>,
    pub only_missing: bool,
    pub only_unused: bool,
    pub html: bool,
    pub html_output: String,
    pub lang_paths: Vec<String>,
    pub follow_symlinks: bool,
    pub fail_on_missing: bool,
    pub fail_on_unused: bool,
    pub log_path: String,
    pub dashboard_url: String,
}

impl Default for AuditOptions {
    fn default() -> Self {
        Self {
            root_path: ".".to_string(),
            locales: Vec::new(),
            paths: vec!["src".to_string(), "examples".to_string(), "tests".to_string()],
            exclude: vec![
                "target".to_string(),
                "vendor".to_string(),
                "node_modules".to_string(),
                ".git".to_string(),
            ],
            format: "table".to_string(),
            output: None,
            only_missing: false,
            only_unused: false,
            html: false,
            html_output: "target/i18n-audit-latest.html".to_string(),
            lang_paths: vec!["locales".to_string()],
            follow_symlinks: false,
            fail_on_missing: false,
            fail_on_unused: false,
            log_path: "target/i18n-audit.log".to_string(),
            dashboard_url: String::new(),
        }
    }
}

#[derive(Debug, Clone)]
pub struct AuditOutcome {
    pub report: Report,
    pub table_output: String,
    pub json_output: String,
    pub has_missing: bool,
    pub has_unused: bool,
}

pub struct AuditRunner {
    scanner: RustSourceScanner,
    loader: TranslationRepositoryLoader,
}

impl Default for AuditRunner {
    fn default() -> Self {
        Self {
            scanner: RustSourceScanner::default(),
            loader: TranslationRepositoryLoader,
        }
    }
}

impl AuditRunner {
    pub fn run(&self, options: &AuditOptions) -> std::io::Result<AuditOutcome> {
        let root = Path::new(&options.root_path);
        let scan_paths = resolve_paths(root, &options.paths);
        let excludes = options.exclude.clone();

        let scan_result = self
            .scanner
            .scan(&scan_paths, &excludes, options.follow_symlinks);
        let used_keys = scan_result.get_used_keys();
        let used_key_locations = scan_result.get_used_key_locations();
        let dynamic_keys = scan_result.get_dynamic_keys();

        let lang_paths_abs = resolve_paths(root, &options.lang_paths);
        let loaded = self.loader.load(
            (!options.locales.is_empty()).then_some(options.locales.clone()),
            &lang_paths_abs,
            &[],
        );

        let diff = compute_diff_data(
            &used_keys,
            &used_key_locations,
            &loaded.locales,
            &loaded.repositories,
        );

        let timestamp = Utc::now().to_rfc3339();
        let detailed_log_path = write_detailed_log(
            root,
            &options.log_path,
            &serde_json::json!({
                "timestamp": timestamp,
                "usedKeys": used_keys,
                "usedKeyLocations": used_key_locations,
                "dynamicKeys": dynamic_keys,
                "missingByLocale": diff.missing_by_locale,
                "missingKeyLocationsByLocale": diff.missing_key_locations_by_locale,
                "unusedByLocale": diff.unused_by_locale,
                "paths": scan_paths.iter().map(|p| PathNormalizer::relative_to(&options.root_path, p)).collect::<Vec<_>>(),
                "exclude": excludes,
                "locales": loaded.locales,
                "langPaths": lang_paths_abs.iter().map(|p| PathNormalizer::relative_to(&options.root_path, p)).collect::<Vec<_>>(),
                "warnings": loaded.warnings,
            }),
        )?;

        let report = Report {
            used_keys: used_keys.clone(),
            used_key_locations: used_key_locations.clone(),
            dynamic_keys: dynamic_keys.clone(),
            missing_by_locale: diff.missing_by_locale.clone(),
            missing_key_locations_by_locale: diff.missing_key_locations_by_locale.clone(),
            unused_by_locale: diff.unused_by_locale.clone(),
            stats: ReportStats {
                used_keys_total: used_keys.len(),
                dynamic_keys_total: dynamic_keys.len(),
                missing_total: diff.missing_by_locale.values().map(Vec::len).sum(),
                unused_total: diff.unused_by_locale.values().map(Vec::len).sum(),
                per_locale: diff.per_locale_stats.clone(),
            },
            meta: ReportMeta {
                timestamp,
                paths: scan_paths
                    .iter()
                    .map(|path| PathNormalizer::relative_to(&options.root_path, path))
                    .collect(),
                exclude: excludes,
                locales: loaded.locales,
                lang_paths: lang_paths_abs
                    .iter()
                    .map(|path| PathNormalizer::relative_to(&options.root_path, path))
                    .collect(),
                warnings: loaded.warnings,
                detailed_log_path: PathNormalizer::relative_to(&options.root_path, &detailed_log_path),
                dashboard_url: options.dashboard_url.clone(),
            },
        };

        let json_output = report.to_pretty_json();
        let table_output = crate::report::ReportRenderer::render_table(
            &report,
            options.only_missing,
            options.only_unused,
        );

        if let Some(output) = &options.output {
            let output_path = resolve_path(root, output);
            ensure_parent_exists(&output_path)?;
            fs::write(output_path, &json_output)?;
        }

        if options.html {
            let html = ReportHtmlRenderer::render(&report);
            let html_path = resolve_path(root, &options.html_output);
            ensure_parent_exists(&html_path)?;
            fs::write(html_path, html)?;
        }

        let has_missing = report.missing_by_locale.values().any(|items| !items.is_empty());
        let has_unused = report.unused_by_locale.values().any(|items| !items.is_empty());

        Ok(AuditOutcome {
            report,
            table_output,
            json_output,
            has_missing,
            has_unused,
        })
    }
}

#[derive(Debug, Clone, Default)]
struct DiffData {
    missing_by_locale: BTreeMap<String, Vec<String>>,
    missing_key_locations_by_locale: BTreeMap<String, BTreeMap<String, Vec<crate::report::LocationEntry>>>,
    unused_by_locale: BTreeMap<String, Vec<String>>,
    per_locale_stats: BTreeMap<String, LocaleStats>,
}

fn compute_diff_data(
    used_keys: &[String],
    used_key_locations: &BTreeMap<String, Vec<crate::report::LocationEntry>>,
    locales_scanned: &[String],
    repositories: &BTreeMap<String, crate::loaders::LocaleRepository>,
) -> DiffData {
    let mut output = DiffData::default();

    for locale in locales_scanned {
        let all_keys = repositories
            .get(locale)
            .map(|repo| repo.all_keys.clone())
            .unwrap_or_default();

        let missing = set_diff(used_keys, &all_keys);
        let unused = set_diff(&all_keys, used_keys);
        let used_in_locale = set_intersect(used_keys, &all_keys);

        output.missing_by_locale.insert(locale.clone(), missing.clone());
        output.unused_by_locale.insert(locale.clone(), unused.clone());

        let mut locations = BTreeMap::new();
        for key in &missing {
            if let Some(key_locations) = used_key_locations.get(key) {
                locations.insert(key.clone(), key_locations.clone());
            }
        }
        output
            .missing_key_locations_by_locale
            .insert(locale.clone(), locations);

        output.per_locale_stats.insert(
            locale.clone(),
            LocaleStats {
                total_translations: all_keys.len(),
                used: used_in_locale.len(),
                missing: missing.len(),
                unused: unused.len(),
            },
        );
    }

    output
}

fn set_diff(left: &[String], right: &[String]) -> Vec<String> {
    let right_set = right.iter().cloned().collect::<std::collections::BTreeSet<_>>();
    let mut output = left
        .iter()
        .filter(|item| !right_set.contains(*item))
        .cloned()
        .collect::<Vec<_>>();
    output.sort();
    output.dedup();
    output
}

fn set_intersect(left: &[String], right: &[String]) -> Vec<String> {
    let right_set = right.iter().cloned().collect::<std::collections::BTreeSet<_>>();
    let mut output = left
        .iter()
        .filter(|item| right_set.contains(*item))
        .cloned()
        .collect::<Vec<_>>();
    output.sort();
    output.dedup();
    output
}

fn write_detailed_log(root: &Path, configured_path: &str, payload: &Value) -> std::io::Result<String> {
    let target = resolve_path(root, configured_path);
    ensure_parent_exists(&target)?;

    let encoded = serde_json::to_string_pretty(payload).unwrap_or_else(|_| "{}".to_string());
    let entry = format!(
        "[{}] i18n:audit report\n-----BEGIN I18N AUDIT JSON-----\n{}\n-----END I18N AUDIT JSON-----\n\n",
        Utc::now().to_rfc3339(),
        encoded
    );

    let mut existing = fs::read_to_string(&target).unwrap_or_default();
    existing.push_str(&entry);
    fs::write(&target, existing)?;

    Ok(PathNormalizer::normalize(&target))
}

fn resolve_paths(root: &Path, configured_paths: &[String]) -> Vec<String> {
    let mut output = Vec::new();
    for path in configured_paths {
        let resolved = resolve_path(root, path);
        if Path::new(&resolved).exists() {
            output.push(resolved);
        }
    }
    output.sort();
    output.dedup();
    output
}

fn resolve_path(root: &Path, path: &str) -> String {
    if PathNormalizer::is_absolute(path) {
        PathNormalizer::normalize(path)
    } else {
        PathNormalizer::normalize(&root.join(path).to_string_lossy())
    }
}

fn ensure_parent_exists(path: &str) -> std::io::Result<()> {
    if let Some(parent) = Path::new(path).parent() {
        fs::create_dir_all(parent)?;
    }
    Ok(())
}

pub struct PathNormalizer;

impl PathNormalizer {
    pub fn normalize(path: &str) -> String {
        let mut normalized = path.replace('\\', "/");
        while normalized.contains("//") {
            normalized = normalized.replace("//", "/");
        }

        if normalized != "/" {
            normalized = normalized.trim_end_matches('/').to_string();
        }

        normalized
    }

    pub fn relative_to(base_path: &str, target_path: &str) -> String {
        let base = Self::normalize(base_path);
        let target = Self::normalize(target_path);
        let lower_base = base.to_ascii_lowercase();
        let lower_target = target.to_ascii_lowercase();

        let base_with_sep = format!("{}/", lower_base);
        if lower_target.starts_with(&base_with_sep) {
            target[base.len() + 1..].to_string()
        } else {
            target
        }
    }

    pub fn is_absolute(path: &str) -> bool {
        if path.is_empty() {
            return false;
        }

        if cfg!(windows) {
            let bytes = path.as_bytes();
            bytes.len() >= 3
                && bytes[1] == b':'
                && (bytes[2] == b'\\' || bytes[2] == b'/')
                && bytes[0].is_ascii_alphabetic()
        } else {
            path.starts_with('/')
        }
    }
}

pub fn flatten_keys(value: &Value, prefix: Option<&str>) -> Vec<String> {
    let mut keys = Vec::new();
    flatten_keys_inner(value, prefix.unwrap_or_default(), &mut keys);
    keys.sort();
    keys.dedup();
    keys
}

fn flatten_keys_inner(value: &Value, prefix: &str, keys: &mut Vec<String>) {
    match value {
        Value::Object(map) => {
            for (key, child) in map {
                let composed = if prefix.is_empty() {
                    key.to_string()
                } else {
                    format!("{}.{}", prefix, key)
                };
                flatten_keys_inner(child, &composed, keys);
            }
        }
        _ => {
            if !prefix.is_empty() {
                keys.push(prefix.to_string());
            }
        }
    }
}