yosina 1.1.3

Japanese text transliteration library
Documentation
use std::borrow::Cow;
use std::collections::HashMap;
use std::sync::OnceLock;

use serde::{Deserialize, Serialize};

use crate::char::{Char, CharPool};
use crate::transliterator::{
    TransliterationError, Transliterator, TransliteratorFactory, TransliteratorFactoryError,
};

#[derive(Debug, Clone)]
struct HyphensRecord {
    pub ascii: Option<&'static str>,
    pub jisx0201: Option<&'static str>,
    pub jisx0208_90: Option<&'static str>,
    pub jisx0208_90_windows: Option<&'static str>,
    pub jisx0208_verbatim: Option<&'static str>,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
pub enum HyphensTransliterationVariant {
    Ascii,
    Jisx0201,
    Jisx0208_90,
    Jisx0208_90Windows,
    Jisx0208Verbatim,
}

include!("./hyphens_data.rs");

pub struct HyphensMappings(HashMap<&'static str, HyphensRecord>);

impl HyphensMappings {
    pub fn get() -> &'static Self {
        static SELF: OnceLock<HyphensMappings> = OnceLock::new();
        OnceLock::get_or_init(&SELF, || HyphensMappings(MAPPINGS.clone()))
    }

    fn get_ascii(&self, key: &str) -> Option<&'static str> {
        self.0.get(key).and_then(|r| r.ascii)
    }

    fn get_jisx0201(&self, key: &str) -> Option<&'static str> {
        self.0.get(key).and_then(|r| r.jisx0201)
    }

    fn get_jisx0208_90(&self, key: &str) -> Option<&'static str> {
        self.0.get(key).and_then(|r| r.jisx0208_90)
    }

    fn get_jisx0208_90_windows(&self, key: &str) -> Option<&'static str> {
        self.0.get(key).and_then(|r| r.jisx0208_90_windows)
    }

    fn get_jisx0208_verbatim(&self, key: &str) -> Option<&'static str> {
        self.0.get(key).and_then(|r| r.jisx0208_verbatim)
    }

    pub fn variant_getter<'a>(
        &'a self,
        variant: HyphensTransliterationVariant,
    ) -> impl Fn(&str) -> Option<&'static str> + 'a {
        use HyphensTransliterationVariant::*;
        let getter = match variant {
            Ascii => Self::get_ascii,
            Jisx0201 => Self::get_jisx0201,
            Jisx0208_90 => Self::get_jisx0208_90,
            Jisx0208_90Windows => Self::get_jisx0208_90_windows,
            Jisx0208Verbatim => Self::get_jisx0208_verbatim,
        };
        move |key: &str| getter(self, key)
    }
}

pub struct HyphensTransliterator<'c> {
    #[allow(clippy::type_complexity)]
    getters: Vec<Box<dyn Fn(&str) -> Option<&'static str> + Send + Sync + 'c>>,
}

impl<'c> Transliterator for HyphensTransliterator<'c> {
    fn transliterate<'a, 'b>(
        &self,
        pool: &mut CharPool<'a, 'b>,
        input: &[&'a Char<'a, 'b>],
    ) -> Result<Vec<&'a Char<'a, 'b>>, TransliterationError> {
        let mut result = Vec::new();
        let mut offset = 0;
        'outer: for char in input {
            if let Some(c) = char.c() {
                for getter in &self.getters {
                    if let Some(mapped) = getter(c) {
                        let nc = pool.new_char_from(Cow::Borrowed(mapped), offset, char);
                        offset += nc.c.len();
                        result.push(nc);
                        continue 'outer;
                    }
                }
            }
            let nc = pool.new_with_offset(char, offset);
            offset += nc.c.len();
            result.push(nc);
        }
        Ok(result)
    }
}

impl<'c> HyphensTransliterator<'c> {
    pub fn new(precedence: impl IntoIterator<Item = HyphensTransliterationVariant>) -> Self {
        Self {
            getters: precedence
                .into_iter()
                .map(|variant| {
                    Box::new(HyphensMappings::get().variant_getter(variant))
                        as Box<dyn Fn(&str) -> Option<&'static str> + Send + Sync + 'c>
                })
                .collect(),
        }
    }
}

#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
pub struct HyphensTransliteratorOptions {
    pub precedence: Vec<HyphensTransliterationVariant>,
}

impl TransliteratorFactory for HyphensTransliteratorOptions {
    fn new_transliterator(&self) -> Result<Box<dyn Transliterator>, TransliteratorFactoryError> {
        Ok(Box::new(HyphensTransliterator::new(
            self.precedence.iter().cloned(),
        )))
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::char::CharPool;

    #[test]
    fn test_ascii_variant() {
        let transliterator = HyphensTransliterator::new([HyphensTransliterationVariant::Ascii]);
        let mut pool = CharPool::new();

        let input_chars = pool.build_char_array("");
        let result = transliterator
            .transliterate(&mut pool, &input_chars)
            .unwrap();

        assert_eq!(result.len(), 2);
        assert_eq!(result[0].c(), Some("-"));
    }

    #[test]
    fn test_jisx0201_variant() {
        let transliterator = HyphensTransliterator::new([HyphensTransliterationVariant::Jisx0201]);
        let mut pool = CharPool::new();

        let input_chars = pool.build_char_array("");
        let result = transliterator
            .transliterate(&mut pool, &input_chars)
            .unwrap();

        assert_eq!(result.len(), 2);
        assert_eq!(result[0].c(), Some("-"));
    }

    #[test]
    fn test_jisx0208_90_variant() {
        let transliterator =
            HyphensTransliterator::new([HyphensTransliterationVariant::Jisx0208_90]);
        let mut pool = CharPool::new();

        let input_chars = pool.build_char_array("");
        let result = transliterator
            .transliterate(&mut pool, &input_chars)
            .unwrap();

        assert_eq!(result.len(), 2);
        assert_eq!(result[0].c(), Some(""));
    }

    #[test]
    fn test_jisx0208_90_windows_variant() {
        let transliterator =
            HyphensTransliterator::new([HyphensTransliterationVariant::Jisx0208_90Windows]);
        let mut pool = CharPool::new();

        let input_chars = pool.build_char_array("");
        let result = transliterator
            .transliterate(&mut pool, &input_chars)
            .unwrap();

        assert_eq!(result.len(), 2);
        assert_eq!(result[0].c(), Some(""));
    }

    #[test]
    fn test_precedence_order() {
        let transliterator = HyphensTransliterator::new([
            HyphensTransliterationVariant::Ascii,
            HyphensTransliterationVariant::Jisx0208_90,
        ]);
        let mut pool = CharPool::new();

        let input_chars = pool.build_char_array("");
        let result = transliterator
            .transliterate(&mut pool, &input_chars)
            .unwrap();

        assert_eq!(result.len(), 2);
        assert_eq!(result[0].c(), Some("-"));
    }

    #[test]
    fn test_no_mapping_leaves_unchanged() {
        let transliterator = HyphensTransliterator::new([HyphensTransliterationVariant::Ascii]);
        let mut pool = CharPool::new();

        let input_chars = pool.build_char_array("abc");
        let result = transliterator
            .transliterate(&mut pool, &input_chars)
            .unwrap();

        assert_eq!(result.len(), 4);
        assert_eq!(result[0].c(), Some("a"));
        assert_eq!(result[1].c(), Some("b"));
        assert_eq!(result[2].c(), Some("c"));
    }

    #[test]
    fn test_mixed_input() {
        let transliterator = HyphensTransliterator::new([HyphensTransliterationVariant::Ascii]);
        let mut pool = CharPool::new();

        let input_chars = pool.build_char_array("hello—world");
        let result = transliterator
            .transliterate(&mut pool, &input_chars)
            .unwrap();

        assert_eq!(result.len(), 12);
        assert_eq!(result[5].c(), Some("-"));
    }

    #[test]
    fn test_vertical_lines() {
        let transliterator =
            HyphensTransliterator::new([HyphensTransliterationVariant::Jisx0208_90]);
        let mut pool = CharPool::new();

        let input_chars = pool.build_char_array("");
        let result = transliterator
            .transliterate(&mut pool, &input_chars)
            .unwrap();

        assert_eq!(result.len(), 2);
        assert_eq!(result[0].c(), Some(""));
    }

    #[test]
    fn test_wave_dash() {
        let transliterator = HyphensTransliterator::new([HyphensTransliterationVariant::Ascii]);
        let mut pool = CharPool::new();

        let input_chars = pool.build_char_array("");
        let result = transliterator
            .transliterate(&mut pool, &input_chars)
            .unwrap();

        assert_eq!(result.len(), 2);
        assert_eq!(result[0].c(), Some("~"));
    }

    #[test]
    fn test_multiple_variants_fallback() {
        let transliterator = HyphensTransliterator::new([
            HyphensTransliterationVariant::Jisx0208Verbatim,
            HyphensTransliterationVariant::Ascii,
        ]);
        let mut pool = CharPool::new();

        let input_chars = pool.build_char_array("");
        let result = transliterator
            .transliterate(&mut pool, &input_chars)
            .unwrap();

        assert_eq!(result.len(), 2);
        assert_eq!(result[0].c(), Some("-"));
    }

    #[test]
    fn test_factory() {
        let options = HyphensTransliteratorOptions {
            precedence: vec![HyphensTransliterationVariant::Ascii],
        };
        let transliterator = options.new_transliterator().unwrap();
        let mut pool = CharPool::new();

        let input_chars = pool.build_char_array("");
        let result = transliterator
            .transliterate(&mut pool, &input_chars)
            .unwrap();

        assert_eq!(result.len(), 2);
        assert_eq!(result[0].c(), Some("-"));
    }
}