cskk 3.1.4

C ABIから使う事を目的とした SKK(Simple Kana Kanji henkan)方式のかな漢字変換ライブラリ
Documentation
use crate::env::filepath_from_xdg_data_dir;
use crate::skk_modes::InputMode;
use std::collections::BTreeMap;
use std::fs::File;
use std::io::Read;

pub(crate) struct KanaFormChanger {
    maps: KanaFormMap,
    /// max len in chars. 'う゛' -> 2
    katakana_key_maxlen: usize,
    jisx0201_key_maxlen: usize,
}

#[derive(Deserialize)]
struct KanaFormMap {
    #[serde(default)]
    katakana: BTreeMap<String, String>,
    #[serde(default)]
    jisx0201: BTreeMap<String, String>,
}

macro_rules! btreemap {
        ($([$key:expr,$val:expr]),*) => {
            {
            let mut map = BTreeMap::new();
            $(
                map.insert($key,$val);
            )*

            map
            }
        };
    }
lazy_static! {
    static ref KANA_ROM_MAP: BTreeMap<char, char> = btreemap![
        ['', 'a'],
        ['', 'i'],
        ['', 'u'],
        ['', 'e'],
        ['', 'o'],
        ['', 'k'],
        ['', 'k'],
        ['', 'k'],
        ['', 'k'],
        ['', 'k'],
        ['', 's'],
        ['', 's'],
        ['', 's'],
        ['', 's'],
        ['', 's'],
        ['', 't'],
        ['', 't'],
        ['', 't'],
        ['', 't'],
        ['', 't'],
        ['', 'n'],
        ['', 'n'],
        ['', 'n'],
        ['', 'n'],
        ['', 'n'],
        ['', 'h'],
        ['', 'h'],
        ['', 'h'],
        ['', 'h'],
        ['', 'h'],
        ['', 'm'],
        ['', 'm'],
        ['', 'm'],
        ['', 'm'],
        ['', 'm'],
        ['', 'y'],
        ['', 'y'],
        ['', 'y'],
        ['', 'r'],
        ['', 'r'],
        ['', 'r'],
        ['', 'r'],
        ['', 'r'],
        ['', 'w'],
        ['', 'x'],
        ['', 'x'],
        ['', 'w'],
        ['', 'n'],
        ['', 'g'],
        ['', 'g'],
        ['', 'g'],
        ['', 'g'],
        ['', 'g'],
        ['', 'z'],
        ['', 'z'], // ddskkでは'じ'が送り仮名の場合'j'として処理するのがデフォルト値だが、SKK-JISYO.S等ではzの送り仮名を用いることが多いのでこちらを用いる。
        ['', 'z'],
        ['', 'z'],
        ['', 'z'],
        ['', 'd'],
        ['', 'd'],
        ['', 'd'],
        ['', 'd'],
        ['', 'd'],
        ['', 'b'],
        ['', 'b'],
        ['', 'b'],
        ['', 'b'],
        ['', 'b'],
        ['', 'p'],
        ['', 'p'],
        ['', 'p'],
        ['', 'p'],
        ['', 'p'],
        ['', 'x'],
        ['', 'x'],
        ['', 'x'],
        ['', 'x'],
        ['', 'x'],
        ['', 't'], // ddskk 16.2ではxがデフォルトだが、SKK-JISYO.Lなどでは撥音便の用語はtで収録されているため。'いt'等。
        ['', 'x'],
        ['', 'x'],
        ['', 'x'],
        ['', 'x']
    ];
}
impl KanaFormChanger {
    pub fn default_kanaform_changer() -> Self {
        let filepath = filepath_from_xdg_data_dir("libcskk/rule/kana_form.toml");
        if let Ok(filepath) = filepath {
            KanaFormChanger::from_file(&filepath)
        } else {
            KanaFormChanger::from_string("")
        }
    }

    /// pub for e2e test. Use default_kanaform_changer instead.
    pub fn from_file(filename: &str) -> Self {
        let mut file =
            File::open(filename).unwrap_or_else(|_| panic!("file {} not found", filename));
        let mut contents = String::new();
        file.read_to_string(&mut contents).expect("file read error");
        KanaFormChanger::from_string(&contents)
    }

    fn from_string(contents: &str) -> Self {
        let kana_form_map: KanaFormMap =
            toml::from_str(contents).expect("source data file for kana form is broken");
        let katakana_key_maxlen = kana_form_map
            .katakana
            .keys()
            .map(|x| x.chars().count())
            .max()
            .unwrap_or(0);
        let jisx0201_key_maxlen = kana_form_map
            .jisx0201
            .keys()
            .map(|x| x.chars().count())
            .max()
            .unwrap_or(0);
        KanaFormChanger {
            maps: kana_form_map,
            katakana_key_maxlen,
            jisx0201_key_maxlen,
        }
    }

    ///
    /// 'kana' が最小置き換え単位と仮定して、input modeに合わせた置換をする。
    ///  'う゛' -> 'ヴ', 'ぽ' -> 'ポ' 等文字数も変わる可能性がある。
    ///
    #[allow(dead_code)]
    fn adjust_one_kana(&self, input_mode: &InputMode, kana: &str) -> String {
        match input_mode {
            InputMode::Katakana => self
                .maps
                .katakana
                .get(kana)
                .unwrap_or(&kana.to_string())
                .to_owned(),
            InputMode::HankakuKatakana => self
                .maps
                .jisx0201
                .get(kana)
                .unwrap_or(&kana.to_string())
                .to_owned(),
            InputMode::Hiragana => kana.to_string(),
            _ => kana.to_string(),
        }
    }

    ///
    ///  kanaに対してinput modeに合わせた置換をする。
    ///  'う゛' -> 'ヴ', 'ぽ' -> 'ポ' 等文字数も変わる可能性がある。
    ///
    pub fn adjust_kana_string(&self, input_mode: InputMode, kana: &str) -> String {
        if input_mode == InputMode::Katakana || input_mode == InputMode::HankakuKatakana {
            let replace_map = if input_mode == InputMode::Katakana {
                &self.maps.katakana
            } else {
                &self.maps.jisx0201
            };
            let maxlen = if input_mode == InputMode::Katakana {
                self.katakana_key_maxlen
            } else {
                self.jisx0201_key_maxlen
            };
            let mut result = "".to_string();
            KanaFormChanger::adjust_kana_string_inner_recur(replace_map, maxlen, kana, &mut result);
            result
        } else {
            kana.to_string()
        }
    }

    /// Greedy match and replace recursion.
    fn adjust_kana_string_inner_recur(
        map: &BTreeMap<String, String>,
        max_len: usize,
        to_adjust: &str,
        adjusted: &mut String,
    ) {
        if to_adjust.is_empty() {
            return;
        };

        for i in (1..max_len + 1).rev() {
            if let Some(replace) = map.get(&to_adjust.chars().take(i).collect::<String>()) {
                adjusted.push_str(replace);
                return KanaFormChanger::adjust_kana_string_inner_recur(
                    map,
                    max_len,
                    &to_adjust.chars().skip(i).collect::<String>(),
                    adjusted,
                );
            }
        }
        adjusted.push(to_adjust.chars().next().unwrap());
        KanaFormChanger::adjust_kana_string_inner_recur(
            map,
            max_len,
            &to_adjust.chars().skip(1).collect::<String>(),
            adjusted,
        )
    }

    ///
    /// ひらがな一文字からローマ字の最初のアルファベット一文字を返す。
    /// ddskkのskk-rom-kana-vectorの対応。
    ///
    pub(crate) fn kana_to_okuri_prefix(kana: &char) -> Option<char> {
        KANA_ROM_MAP.get(kana).copied()
    }
}

#[cfg(test)]
impl KanaFormChanger {
    pub fn test_kana_form_changer() -> Self {
        KanaFormChanger::from_string(
            "\
[katakana]
\"\" = \"\"
\"\" = \"\"
\"\" = \"\"
\"\" = \"\"
\"\" = \"\"
\"う゛\" = \"\"
\"\" = \"\"
\"\" = \"\"
[jisx0201]
\"\" = \"\"
",
        )
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn sanity_check() {
        let changer = KanaFormChanger::test_kana_form_changer();
        assert_eq!(changer.maps.jisx0201.get("").unwrap(), "");
        assert_eq!(changer.katakana_key_maxlen, 2);
    }

    #[test]
    fn adjust_kana_string() {
        let changer = KanaFormChanger::test_kana_form_changer();
        let actual = changer.adjust_kana_string(InputMode::Katakana, "う゛ぁいきんぐ");
        assert_eq!("ヴァイキング", actual);
    }

    #[test]
    fn adjust_kana_string_small_tu() {
        let changer = KanaFormChanger::test_kana_form_changer();
        let actual = changer.adjust_kana_string(InputMode::Hiragana, "");
        assert_eq!("", actual);
    }

    #[test]
    fn kana_to_okuri_prefix() {
        assert_eq!(Some('r'), KanaFormChanger::kana_to_okuri_prefix(&''));
        assert_eq!(Some('s'), KanaFormChanger::kana_to_okuri_prefix(&''));
        assert_eq!(Some('w'), KanaFormChanger::kana_to_okuri_prefix(&''));
    }

    #[test]
    fn test_empty_kanaform_changer() {
        KanaFormChanger::from_string("");
    }
}