nadesiko3/
kanautils.rs

1//! 全角から半角への変換関数群
2//! ソースファイルの前処理に利用する
3
4use std::char;
5
6/// 半角か
7#[allow(dead_code)]
8pub fn is_half(c: char) -> bool {
9    (c as u32) < 0xF0u32
10}
11
12macro_rules! in_range {
13    ( $v:expr => $( $a:expr ),* ) => {
14        $( ($a).contains( & $v ) || )* false
15    };
16}
17
18/// 半角アルファベットか
19#[allow(dead_code)]
20pub fn is_alpha(c: char) -> bool {
21    in_range![c => 'a'..='z', 'A'..='Z']
22}
23
24/// 半角数字か
25#[allow(dead_code)]
26pub fn is_numeric(c: char) -> bool {
27    ('0'..='9').contains(&c)
28}
29
30/// ひらがなか
31pub fn is_hiragana(c: char) -> bool {
32    // 3041-309F
33    ('ぁ'..='ゟ').contains(&c)
34    // 正確に言えば...
35    // 'ぁ'..='ゔ', 'ゕ'..='ゖ', 'ゝ'..='ゞ', 'ゟ'..='ゟ',
36}
37
38/// 変数名に利用可能な文字か
39pub fn is_word_chars(c: char) -> bool {
40    let cu: u32 = c as u32;
41    // ASCII領域
42    if cu <= 0xFF {
43        if in_range![
44            c => '0'..='9', 'a'..='z', 'A'..='Z', '_'..='_'
45        ] { return true; }
46        return false;
47    }
48    // 非ASCII領域
49    // @see https://www.asahi-net.or.jp/~ax2s-KMTN/ref/unicode/index_u.html
50    /*
51    // 日本語で使う仮名領域
52    (0x3040 as char) ..= (0x309F as char), // ひらがな
53    (0x30A0 as char) ..= (0x30FF as char), // カタカナ
54    (0x1B000 as char) ..= (0x1B16F as char), // かな補助領域
55    (0xFF00 as char) ..= (0xFFEF as char), // 半角カナ
56    (0x3190 as char) ..= (0x319F as char), // 漢文用記号
57    // 漢字領域
58    (0x2F00 as char) ..= (0x31EF as char), // 部首字画など
59    (0x3400 as char) ..= (0x9FFC as char), // CJK統合漢字+A
60    (0xF900 as char) ..= (0xFAFF as char), // CJK互換漢字
61    (0x20000 as char) ..= (0x3134A as char), // CJK統合漢字B-G
62    (0xE0100 as char) ..= (0xE01EF as char), // 異体字セレクタ
63    */
64    // 基本OKだが全角記号などは変数名に使えない
65    if in_range![
66        cu => 
67        0x2190..=0x21FF, // 矢印領域
68        0x25A0..=0x25FF, // 幾何学模様(●や▲)
69        0x3000..=0x303F  // CJKの記号と句読点(「」や【】や『』) @see https://www.asahi-net.or.jp/~ax2s-KMTN/ref/unicode/u3000.html
70    ] { return false; }
71    return true;
72}
73
74pub fn char_from_u32(i: u32, def: char) -> char {
75    char::from_u32(i).unwrap_or(def)
76}
77
78/// 全角記号を半角記号に変換
79// https://en.wikipedia.org/wiki/Halfwidth_and_Fullwidth_Forms_(Unicode_block)
80pub fn to_half_ascii(c: char) -> char {
81    let ci = c as u32;
82    match ci {
83        // half ascii code
84        0x0020..=0x007E => c,
85        // '!'..='~' = '\u{FF01}'..='\u{FF5E}'
86        0xFF01..=0xFF5E => char_from_u32(ci - 0xFF01 + 0x21, c),
87        // space
88        0x2002..=0x200B => ' ',
89        0x3000 | 0xFEFF => ' ',
90        // others
91        _ => c,
92    }
93}
94
95#[cfg(test)]
96mod test_charutils {
97    use super::*;
98    #[test]
99    fn test_to_half() {
100        assert_eq!(is_half('!'), true);
101        assert_eq!(is_half('!'), false);
102        assert_eq!('!' as u32, 0xFF01);
103        assert_eq!(to_half_ascii('!'), '!');
104        assert_eq!(to_half_ascii('A'), 'A');
105        assert_eq!(to_half_ascii('#'), '#');
106        assert_eq!(to_half_ascii(' '), ' ');
107    }
108    #[test]
109    fn test_range() {
110        assert_eq!(is_alpha('a'), true);
111        assert_eq!(is_alpha('B'), true);
112        assert_eq!(is_alpha('3'), false);
113        assert_eq!(is_alpha('$'), false);
114    }
115}