1pub fn convert_from_char(ch: char) -> u8 {
6 if ch.is_ascii() {
7 ch as u8
8 } else {
9 match ch as u32 {
10 code @ 0xFF61_u32..=0xFF9F_u32 => (code - 0xFF61_u32) as u8 + 0xA1_u8,
11 0x00AF_u32 | 0x203E_u32 => 0x7E_u8, _ => ch as u8,
13 }
14 }
15}
16
17pub fn convert_to_char(code: u8, replace_specials: bool) -> char {
19 if code.is_ascii() {
20 if code == 0x7E_u8 {
21 '\u{203E}'
23 } else if (0x20_u8..0x7E_u8).contains(&code) {
24 code as char
25 } else if replace_specials {
26 '\u{FFFD}'
27 } else {
28 code as char
29 }
30 } else if let 0xA1_u8..=0xDF_u8 = code {
31 char::try_from((code - 0xA1_u8) as u32 + 0xFF61_u32).unwrap_or('\u{FFFD}')
32 } else if replace_specials {
33 '\u{FFFD}'
34 } else {
35 code as char
36 }
37}
38
39const HIRAGANA: &str = "あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわをんぁぃぅぇぉゃゅょっ";
40const KATAKANA: &str = "アイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワヲンァィゥェォャュョッ";
41const KANA_HALF_WIDE: &str = "アイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワヲンァィゥェォャュョッ";
42const ASCII: &str = " !”#$%&’()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[¥]^_`abcdefghijklmnopqrstuvwxyz{|} ̄";
43const KIGOU: &str = "。「」、・ー゛゜";
44const KIGOU_HALF_WIDE: &str = "。「」、・ー゙゚";
45const HIRAGANA_DAKUON: &str = "がぎぐげござじずぜぞだぢづでどばびぶべぼぱぴぷぺぽ";
46const KATAKANA_DAKUON: &str = "ガギグゲゴザジズゼゾダヂヅデドバビブベボパピプペポ";
47const KANA_DAKUON_HALF_WIDE: &str = "ガギグゲゴザジズゼゾダヂヅデドバビブベボパピプペポ";
48
49pub fn convert_kana_wide_full_to_half(s: &str) -> String {
51 let mut ret = String::new();
52 for ch in s.chars() {
53 if ch.is_ascii() {
54 ret.push(ch);
55 } else if let Some(index) = HIRAGANA.chars().position(|x| x == ch) {
56 ret.push(KANA_HALF_WIDE.chars().nth(index).unwrap_or(ch));
57 } else if let Some(index) = KATAKANA.chars().position(|x| x == ch) {
58 ret.push(KANA_HALF_WIDE.chars().nth(index).unwrap_or(ch));
59 } else if let Some(index) = HIRAGANA_DAKUON.chars().position(|x| x == ch) {
60 let mut iter = KANA_DAKUON_HALF_WIDE.chars().skip(index * 2);
61 let ch1 = iter.next();
62 let ch2 = iter.next();
63 if let (Some(ch1), Some(ch2)) = (ch1, ch2) {
64 ret.push(ch1);
65 ret.push(ch2);
66 } else {
67 ret.push(ch);
68 }
69 } else if let Some(index) = KATAKANA_DAKUON.chars().position(|x| x == ch) {
70 let mut iter = KANA_DAKUON_HALF_WIDE.chars().skip(index * 2);
71 let ch1 = iter.next();
72 let ch2 = iter.next();
73 if let (Some(ch1), Some(ch2)) = (ch1, ch2) {
74 ret.push(ch1);
75 ret.push(ch2);
76 } else {
77 ret.push(ch);
78 }
79 } else if let Some(index) = ASCII.chars().position(|x| x == ch) {
80 ret.push((b' ' + index as u8) as char);
81 } else if let Some(index) = KIGOU.chars().position(|x| x == ch) {
82 ret.push(KIGOU_HALF_WIDE.chars().nth(index).unwrap_or(ch));
83 } else if ch == 'ヴ' {
84 ret.push_str("ヴ")
85 } else if ch == 'ヺ' {
86 ret.push_str("ヺ")
87 } else {
88 ret.push(ch);
89 }
90 }
91 ret
92}
93
94#[cfg(test)]
95mod test {
96 use super::*;
97
98 #[test]
99 fn it_works() {
100 let s = "あんたがフルフルバーだよん!?~`#_HOho}ヴヺ<123¥@*>";
101 let t = convert_kana_wide_full_to_half(s);
102 eprintln!("{}", s);
103 eprintln!("{}", t);
104 let mut u = String::new();
105 for ch in t.chars() {
106 let ch = convert_from_char(ch);
107 let ch = convert_to_char(ch, true);
108 u.push(ch);
109 }
110 eprintln!("{}", u);
111 }
112}