ferrous-opencc 0.2.3

A pure Rust implementation of Open Chinese Convert (OpenCC), for fast and reliable conversion between Traditional and Simplified Chinese.
Documentation
use anyhow::{Context, Result};
use std::env;
use std::fs;
use std::fs::File;
use std::io::BufWriter;
use std::io::Write;
use std::path::{Path, PathBuf};

use ferrous_opencc_compiler::compile_dictionary;

fn run() -> Result<()> {
    let out_dir = env::var("OUT_DIR").context("Failed to get OUT_DIR environment variable")?;
    let dest_path = Path::new(&out_dir);
    let manifest_dir = PathBuf::from(
        env::var("CARGO_MANIFEST_DIR").context("Failed to get CARGO_MANIFEST_DIR env variable")?,
    );

    let assets_root = manifest_dir.join("assets");
    let dict_dir = assets_root.join("dictionaries");

    let mut dict_map_builder = phf_codegen::Map::<&str>::new();
    let mut dicts_to_add: Vec<(String, String)> = Vec::new();

    if dict_dir.exists() {
        for entry in fs::read_dir(&dict_dir)? {
            let entry = entry?;
            let path = entry.path();
            if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("txt") {
                let file_stem = path.file_stem().unwrap().to_str().unwrap();
                let ocd2_key_name = format!("{file_stem}.ocd2");
                let ocb_file_name = format!("{file_stem}.ocb");
                let ocb_path = dest_path.join(&ocb_file_name);
                let ocb_bytes = compile_dictionary(&path)?;
                fs::write(&ocb_path, ocb_bytes)?;
                let ocb_path_str = ocb_path.to_str().unwrap().replace('\\', "/");

                let value_code = format!("include_bytes!(r\"{ocb_path_str}\")");

                dicts_to_add.push((ocd2_key_name, value_code));
            }
        }
    }

    for (key, value) in &dicts_to_add {
        dict_map_builder.entry(key, value);
    }

    let mut config_map_builder = phf_codegen::Map::<&str>::new();
    let mut configs_to_add: Vec<(String, String)> = Vec::new();

    if assets_root.exists() {
        for entry in fs::read_dir(&assets_root)? {
            let entry = entry?;
            let path = entry.path();
            if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("json") {
                let file_name = path.file_name().unwrap().to_str().unwrap().to_string();
                let content = fs::read_to_string(&path)?;
                configs_to_add.push((file_name, content));
            }
        }
    }

    let formatted_config_values: Vec<String> = configs_to_add
        .iter()
        .map(|(_, content)| format!("r#\"{}\"#", content.trim()))
        .collect();

    for (i, (file_name, _)) in configs_to_add.iter().enumerate() {
        config_map_builder.entry(file_name, &formatted_config_values[i]);
    }

    let generated_map_path = dest_path.join("embedded_map.rs");
    let mut file = BufWriter::new(File::create(&generated_map_path)?);

    writeln!(
        &mut file,
        "// @generated - This file is automatically generated by build.rs."
    )?;
    writeln!(
        &mut file,
        "pub static EMBEDDED_DICTS: phf::Map<&'static str, &'static [u8]> = {};",
        dict_map_builder.build()
    )?;
    writeln!(
        &mut file,
        "pub static EMBEDDED_CONFIGS: phf::Map<&'static str, &'static str> = {};",
        config_map_builder.build()
    )?;

    cbindgen::Builder::new()
        .with_crate(manifest_dir)
        .with_config(cbindgen::Config::from_file("cbindgen.toml").unwrap())
        .generate()
        .expect("Unable to generate bindings")
        .write_to_file("opencc.h");

    Ok(())
}

fn main() {
    if let Err(e) = run() {
        eprintln!("cargo:warning=Build script ferrous-opencc/build.rs failed: {e:?}");
        std::process::exit(1);
    }
}