b2c2_jis_x_201/
lib.rs

1// b2c2-jis-x-201 crate
2// author: Leonardone @ NEETSDKASU
3
4// UTF-8 -> JIS X 201
5pub fn convert_from_char(ch: char) -> u8 {
6    if ch.is_ascii() {
7        ch as u8
8    } else {
9        match ch as u32 {
10            code @ 0xFF61_u32..=0xFF9F_u32 => (code - 0xFF61_u32) as u8 + 0xA1_u8,
11            0x00AF_u32 | 0x203E_u32 => 0x7E_u8, // OVER_LINE (replace TILDE)
12            _ => ch as u8,
13        }
14    }
15}
16
17// JIS X 201 -> UTF-8
18pub fn convert_to_char(code: u8, replace_specials: bool) -> char {
19    if code.is_ascii() {
20        if code == 0x7E_u8 {
21            // OVER_LINE (replace from TILDE)
22            '\u{203E}'
23        } else if (0x20_u8..0x7E_u8).contains(&code) {
24            code as char
25        } else if replace_specials {
26            '\u{FFFD}'
27        } else {
28            code as char
29        }
30    } else if let 0xA1_u8..=0xDF_u8 = code {
31        char::try_from((code - 0xA1_u8) as u32 + 0xFF61_u32).unwrap_or('\u{FFFD}')
32    } else if replace_specials {
33        '\u{FFFD}'
34    } else {
35        code as char
36    }
37}
38
39const HIRAGANA: &str = "あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわをんぁぃぅぇぉゃゅょっ";
40const KATAKANA: &str = "アイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワヲンァィゥェォャュョッ";
41const KANA_HALF_WIDE: &str = "アイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワヲンァィゥェォャュョッ";
42const ASCII: &str = " !”#$%&’()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[¥]^_`abcdefghijklmnopqrstuvwxyz{|} ̄";
43const KIGOU: &str = "。「」、・ー゛゜";
44const KIGOU_HALF_WIDE: &str = "。「」、・ー゙゚";
45const HIRAGANA_DAKUON: &str = "がぎぐげござじずぜぞだぢづでどばびぶべぼぱぴぷぺぽ";
46const KATAKANA_DAKUON: &str = "ガギグゲゴザジズゼゾダヂヅデドバビブベボパピプペポ";
47const KANA_DAKUON_HALF_WIDE: &str = "ガギグゲゴザジズゼゾダヂヅデドバビブベボパピプペポ";
48
49// 一部の全角文字を半角文字に変換する (JIS X 201と関係ないコードだね…)
50pub fn convert_kana_wide_full_to_half(s: &str) -> String {
51    let mut ret = String::new();
52    for ch in s.chars() {
53        if ch.is_ascii() {
54            ret.push(ch);
55        } else if let Some(index) = HIRAGANA.chars().position(|x| x == ch) {
56            ret.push(KANA_HALF_WIDE.chars().nth(index).unwrap_or(ch));
57        } else if let Some(index) = KATAKANA.chars().position(|x| x == ch) {
58            ret.push(KANA_HALF_WIDE.chars().nth(index).unwrap_or(ch));
59        } else if let Some(index) = HIRAGANA_DAKUON.chars().position(|x| x == ch) {
60            let mut iter = KANA_DAKUON_HALF_WIDE.chars().skip(index * 2);
61            let ch1 = iter.next();
62            let ch2 = iter.next();
63            if let (Some(ch1), Some(ch2)) = (ch1, ch2) {
64                ret.push(ch1);
65                ret.push(ch2);
66            } else {
67                ret.push(ch);
68            }
69        } else if let Some(index) = KATAKANA_DAKUON.chars().position(|x| x == ch) {
70            let mut iter = KANA_DAKUON_HALF_WIDE.chars().skip(index * 2);
71            let ch1 = iter.next();
72            let ch2 = iter.next();
73            if let (Some(ch1), Some(ch2)) = (ch1, ch2) {
74                ret.push(ch1);
75                ret.push(ch2);
76            } else {
77                ret.push(ch);
78            }
79        } else if let Some(index) = ASCII.chars().position(|x| x == ch) {
80            ret.push((b' ' + index as u8) as char);
81        } else if let Some(index) = KIGOU.chars().position(|x| x == ch) {
82            ret.push(KIGOU_HALF_WIDE.chars().nth(index).unwrap_or(ch));
83        } else if ch == 'ヴ' {
84            ret.push_str("ヴ")
85        } else if ch == 'ヺ' {
86            ret.push_str("ヺ")
87        } else {
88            ret.push(ch);
89        }
90    }
91    ret
92}
93
94#[cfg(test)]
95mod test {
96    use super::*;
97
98    #[test]
99    fn it_works() {
100        let s = "あんたがフルフルバーだよん!?~`#_HOho}ヴヺ<123¥@*>";
101        let t = convert_kana_wide_full_to_half(s);
102        eprintln!("{}", s);
103        eprintln!("{}", t);
104        let mut u = String::new();
105        for ch in t.chars() {
106            let ch = convert_from_char(ch);
107            let ch = convert_to_char(ch, true);
108            u.push(ch);
109        }
110        eprintln!("{}", u);
111    }
112}