fluent-zero-build 0.1.4

Build-time code generator for fluent-zero. It compiles Fluent (.ftl) files into static, zero-allocation Rust code and Perfect Hash Maps.
Documentation
use std::{
    collections::BTreeSet,
    env, fs,
    io::Write as _,
    path::{Path, PathBuf},
};

use fluent_syntax::parser;
use unic_langid::LanguageIdentifier;

/// A builder to configure and generate the static cache code for `fluent-zero`.
pub struct FluentZeroBuilder {
    locales_dir: PathBuf,
    charset_dest: Option<PathBuf>,
}

impl Default for FluentZeroBuilder {
    fn default() -> Self {
        Self {
            locales_dir: PathBuf::from("assets/locales"),
            charset_dest: None,
        }
    }
}

impl FluentZeroBuilder {
    /// Creates a new builder pointing to the directory containing locale subdirectories.
    ///
    /// # Arguments
    ///
    /// * `locales_dir_path` - Relative path to the folder containing locale subdirectories.
    #[must_use]
    pub fn new<P: AsRef<Path>>(locales_dir_path: P) -> Self {
        Self {
            locales_dir: locales_dir_path.as_ref().to_path_buf(),
            charset_dest: None,
        }
    }

    /// Opt-in to exporting a deterministic text file containing all unique
    /// characters used across all `.ftl` files in this crate **and its dependencies**.
    ///
    /// **Dependency Aggregation:** This natively aggregates characters from dependencies
    /// using Cargo's standard IPC. No metadata scraping or tree walking is required.
    #[must_use]
    pub fn export_charset<P: AsRef<Path>>(mut self, dest: P) -> Self {
        self.charset_dest = Some(dest.as_ref().to_path_buf());
        self
    }

    /// Consumes the builder, parses the Fluent files, aggregates IPC variables,
    /// and generates the Rust static cache code.
    ///
    /// # Panics
    ///
    /// Panics if the `OUT_DIR` environment variable is not set, or if critical
    /// IO file system operations fail during compilation.
    pub fn generate(self) {
        let out_dir = env::var("OUT_DIR").expect("OUT_DIR not set");
        let dest_path = Path::new(&out_dir).join("static_cache.rs");
        let mut file = fs::File::create(&dest_path).expect("Failed to create static_cache.rs");

        writeln!(&mut file, "// @generated by fluent-zero-build").unwrap();

        let mut unique_chars: BTreeSet<char> = BTreeSet::new();

        // 1. Dependency Aggregation via Cargo IPC.
        // Cargo automatically propagates `cargo:KEY=VALUE` emitted by dependency build
        // scripts up to dependents as `DEP_<LINKS>_<KEY>` if the dependency specifies a `links` key.
        for (env_key, env_val) in env::vars() {
            if env_key.starts_with("DEP_")
                && env_key.ends_with("_FLUENT_CHARSET_PATH")
                && let Ok(content) = fs::read_to_string(&env_val)
            {
                unique_chars.extend(content.chars().filter(|c| !c.is_control()));
            }
        }

        let mut bundle_entries: Vec<(String, String)> = Vec::new();
        let mut cache_root_entries: Vec<(String, String)> = Vec::new();

        // 2. Process Local Translations (If present)
        if self.locales_dir.exists() {
            println!("cargo:rerun-if-changed={}", self.locales_dir.display());
            let dir_entries =
                fs::read_dir(&self.locales_dir).expect("Failed to read locales directory");

            for entry in dir_entries {
                let entry = entry.expect("Failed to read directory entry");
                let path = entry.path();

                if !path.is_dir() {
                    continue;
                }

                let Some(dir_name) = path.file_name().and_then(|n| n.to_str()) else {
                    continue;
                };
                let Ok(lang_id) = dir_name.parse::<LanguageIdentifier>() else {
                    continue;
                };

                let lang_key = lang_id.to_string();
                let sanitized_lang = lang_key.replace('-', "_").to_uppercase();

                let cache_name = format!("CACHE_{sanitized_lang}");
                let bundle_name = format!("BUNDLE_{sanitized_lang}");

                let (combined_ftl_source, cache_entries) =
                    Self::process_locale_dir(&path, &mut unique_chars);

                Self::write_bundle_initializer(
                    &mut file,
                    &bundle_name,
                    &lang_key,
                    &combined_ftl_source,
                );
                Self::write_cache_map(&mut file, &cache_name, &cache_entries);

                bundle_entries.push((lang_key.clone(), format!("&{bundle_name}")));
                cache_root_entries.push((lang_key, cache_name));
            }
        }

        // 3. Complete Code Generation & Upstream Exporting
        // This unconditionally runs to guarantee valid empty PHF maps in aggregator crates.
        Self::write_root_maps(&mut file, &cache_root_entries, &bundle_entries);
        self.export_charset_data(&mut file, &unique_chars, &out_dir);
    }

    fn process_locale_dir(
        path: &Path,
        unique_chars: &mut BTreeSet<char>,
    ) -> (String, Vec<(String, String)>) {
        let mut combined_ftl_source = String::new();
        let mut cache_entries = Vec::new();

        let entries = fs::read_dir(path).expect("Failed to read locale subdirectory");

        for file_entry in entries {
            let file_entry = file_entry.expect("Failed to read file entry");
            let file_path = file_entry.path();

            if file_path.extension().is_some_and(|ext| ext == "ftl") {
                println!("cargo:rerun-if-changed={}", file_path.display());
                let source = fs::read_to_string(&file_path).expect("Failed to read FTL file");

                combined_ftl_source.push_str(&source);
                combined_ftl_source.push('\n');

                let ast = parser::parse(source).expect("Failed to parse FTL");
                for entry in ast.body {
                    if let fluent_syntax::ast::Entry::Message(msg) = entry {
                        Self::harvest_chars(&msg, unique_chars);
                        cache_entries.push((msg.id.name.clone(), Self::generate_cache_entry(&msg)));
                    }
                }
            }
        }

        (combined_ftl_source, cache_entries)
    }

    fn harvest_chars(msg: &fluent_syntax::ast::Message<String>, unique_chars: &mut BTreeSet<char>) {
        let Some(pattern) = &msg.value else { return };
        for element in &pattern.elements {
            if let fluent_syntax::ast::PatternElement::TextElement { value } = element {
                unique_chars.extend(value.chars().filter(|c| !c.is_control()));
            }
        }
    }

    fn generate_cache_entry(msg: &fluent_syntax::ast::Message<String>) -> String {
        if let Some(pattern) = &msg.value
            && pattern.elements.len() == 1
            && let fluent_syntax::ast::PatternElement::TextElement { value } = &pattern.elements[0]
            && !value.contains('\\')
        {
            format!("::fluent_zero::CacheEntry::Static(\"{value}\")")
        } else {
            "::fluent_zero::CacheEntry::Dynamic".to_string()
        }
    }

    fn write_bundle_initializer(
        file: &mut fs::File,
        bundle_name: &str,
        lang_key: &str,
        combined_ftl_source: &str,
    ) {
        let escaped_ftl = format!("{combined_ftl_source:?}");
        let init_code = format!(
            "std::sync::LazyLock::new(|| {{\n\
            \x20   let lang: ::fluent_zero::LanguageIdentifier = \"{lang_key}\".parse().unwrap();\n\
            \x20   let mut bundle = ::fluent_zero::ConcurrentFluentBundle::new_concurrent(vec![lang]);\n\
            \x20   let res = ::fluent_zero::FluentResource::try_new({escaped_ftl}.to_string()).expect(\"FTL Error\");\n\
            \x20   bundle.add_resource(res).expect(\"Resource Error\");\n\
            \x20   bundle\n\
            }})"
        );
        writeln!(
            file,
            "static {bundle_name}: std::sync::LazyLock<::fluent_zero::ConcurrentFluentBundle<::fluent_zero::FluentResource>> = {init_code};"
        ).unwrap();
    }

    fn write_cache_map(file: &mut fs::File, cache_name: &str, cache_entries: &[(String, String)]) {
        let mut map = phf_codegen::Map::new();
        map.phf_path("::fluent_zero::phf");
        for (k, v) in cache_entries {
            map.entry(k.as_str(), v.as_str());
        }
        writeln!(
            file,
            "static {cache_name}: ::fluent_zero::phf::Map<&'static str, ::fluent_zero::CacheEntry> = {};",
            map.build()
        ).unwrap();
    }

    fn write_root_maps(
        file: &mut fs::File,
        cache_root_entries: &[(String, String)],
        bundle_entries: &[(String, String)],
    ) {
        let mut root_map = phf_codegen::Map::new();
        root_map.phf_path("::fluent_zero::phf");
        for (l, v) in cache_root_entries {
            root_map.entry(l.as_str(), format!("&{v}"));
        }
        writeln!(
            file,
            "pub static CACHE: ::fluent_zero::phf::Map<&'static str, &'static ::fluent_zero::phf::Map<&'static str, ::fluent_zero::CacheEntry>> = {};",
            root_map.build()
        ).unwrap();

        let mut bundle_map = phf_codegen::Map::new();
        bundle_map.phf_path("::fluent_zero::phf");
        for (l, c) in bundle_entries {
            bundle_map.entry(l.as_str(), c.as_str());
        }
        writeln!(
            file,
            "pub static LOCALES: ::fluent_zero::phf::Map<&'static str, &'static std::sync::LazyLock<::fluent_zero::ConcurrentFluentBundle<::fluent_zero::FluentResource>>> = {};",
            bundle_map.build()
        ).unwrap();
    }

    fn export_charset_data(
        &self,
        file: &mut fs::File,
        unique_chars: &BTreeSet<char>,
        out_dir: &str,
    ) {
        let charset_string: String = unique_chars.iter().collect();

        // Embed the charset constant natively into the module.
        writeln!(
            file,
            "\n/// A deterministically sorted string containing all unique characters\n\
             /// used across all `.ftl` files in this crate and its dependencies.\n\
             pub const CHARSET: &str = {charset_string:?};"
        )
        .unwrap();

        // 1. Emit absolute path for the NEXT build.rs to consume natively.
        // Cargo automatically captures this and converts it into `DEP_<LINKS>_FLUENT_CHARSET_PATH`.
        // We write to a file rather than passing the string to bypass Windows 32KB limits.
        let internal_dest = Path::new(out_dir).join("fluent_charset_internal.txt");
        if fs::write(&internal_dest, &charset_string).is_ok() {
            println!("cargo:fluent_charset_path={}", internal_dest.display());
        }

        if let Some(charset_dest) = &self.charset_dest {
            if let Some(parent) = charset_dest.parent() {
                let _ = fs::create_dir_all(parent);
            }
            fs::write(charset_dest, &charset_string).expect("Failed to write fluent charset file");
            println!("cargo:rerun-if-changed={}", charset_dest.display());
        }
    }
}

/// Generates the static cache code for `fluent-zero`.
pub fn generate_static_cache(locales_dir_path: &str) {
    FluentZeroBuilder::new(locales_dir_path).generate();
}