use phf::{phf_map, phf_set};
pub static CONVERT_MAP: phf::Map<char, char> = phf_map! {
'"' => '"',
'#' => '#',
'$' => '$',
'%' => '%',
'&' => '&',
''' => '\'',
'*' => '*',
'+' => '+',
'.' => '.',
'/' => '/',
'0' => '0',
'1' => '1',
'2' => '2',
'3' => '3',
'4' => '4',
'5' => '5',
'6' => '6',
'7' => '7',
'8' => '8',
'9' => '9',
'<' => '<',
'=' => '=',
'>' => '>',
'@' => '@',
'A' => 'A',
'B' => 'B',
'C' => 'C',
'D' => 'D',
'E' => 'E',
'F' => 'F',
'G' => 'G',
'H' => 'H',
'I' => 'I',
'J' => 'J',
'K' => 'K',
'L' => 'L',
'M' => 'M',
'N' => 'N',
'O' => 'O',
'P' => 'P',
'Q' => 'Q',
'R' => 'R',
'S' => 'S',
'T' => 'T',
'U' => 'U',
'V' => 'V',
'W' => 'W',
'X' => 'X',
'Y' => 'Y',
'Z' => 'Z',
'\' => '\\',
'^' => '^',
'`' => '`',
'a' => 'a',
'b' => 'b',
'c' => 'c',
'd' => 'd',
'e' => 'e',
'f' => 'f',
'g' => 'g',
'h' => 'h',
'i' => 'i',
'j' => 'j',
'k' => 'k',
'l' => 'l',
'm' => 'm',
'n' => 'n',
'o' => 'o',
'p' => 'p',
'q' => 'q',
'r' => 'r',
's' => 's',
't' => 't',
'u' => 'u',
'v' => 'v',
'w' => 'w',
'x' => 'x',
'y' => 'y',
'z' => 'z',
'{' => '{',
'|' => '|',
'}' => '}',
'。' => '。',
'「' => '「',
'」' => '」',
'、' => '、',
'・' => '·',
'•' => '·',
'─' => '—',
'―' => '—',
'∶' => ':',
'‧' => '·',
'・' => '·',
'﹑' => '、',
'〜' => '~',
'︰' => ':',
'?' => '?',
'!' => '!',
',' => ',',
';' => ';',
'(' => '(',
')' => ')',
};
pub static CONVERT_T2S_MAP: phf::Map<char, char> = phf_map! {
'妳' => '你',
'姊' => '姐',
'擡' => '抬',
'牠' => '它',
'緖' => '绪',
'揹' => '背',
};
#[must_use]
#[inline]
pub const fn is_cjk(c: char) -> bool {
c == '\u{3007}'
|| range(c, '\u{3400}', '\u{4DBF}')
|| range(c, '\u{4E00}', '\u{9FFF}')
|| range(c, '\u{FA0E}', '\u{FA0F}')
|| c == '\u{FA11}'
|| range(c, '\u{FA13}', '\u{FA14}')
|| c == '\u{FA1F}'
|| c == '\u{FA21}'
|| range(c, '\u{FA23}', '\u{FA24}')
|| range(c, '\u{FA27}', '\u{FA29}')
|| range(c, '\u{20000}', '\u{2A6DF}')
|| range(c, '\u{2A700}', '\u{2B739}')
|| range(c, '\u{2B740}', '\u{2B81D}')
|| range(c, '\u{2B820}', '\u{2CEA1}')
|| range(c, '\u{2CEB0}', '\u{2EBE0}')
|| range(c, '\u{2EBF0}', '\u{2EE5F}')
|| range(c, '\u{30000}', '\u{3134A}')
|| range(c, '\u{31350}', '\u{323AF}')
}
#[must_use]
#[inline]
const fn range(c: char, min: char, max: char) -> bool {
c >= min && c <= max
}
static CHINESE_PUNCTUATION: phf::Set<char> = phf_set! {
'。',
'?',
'!',
',',
'、',
';',
':',
'“',
'”',
'『',
'』',
'‘',
'’',
'「',
'」',
'(',
')',
'[',
']',
'〔',
'〕',
'【',
'】',
'—',
'…',
'-',
'-',
'~',
'·',
'《',
'》',
'〈',
'〉',
'﹏',
'_',
'.'
};
#[must_use]
#[inline]
pub fn is_chinese_punctuation(c: char) -> bool {
CHINESE_PUNCTUATION.contains(&c)
}
static ENGLISH_PUNCTUATION: phf::Set<char> = phf_set! {
'.',
'?',
'!',
',',
':',
'…',
';',
'-',
'–',
'—',
'(',
')',
'[',
']',
'{',
'}',
'"',
'\'',
'/',
};
#[must_use]
#[inline]
pub fn is_english_punctuation(c: char) -> bool {
ENGLISH_PUNCTUATION.contains(&c)
}
#[must_use]
#[inline]
pub fn is_punctuation(c: char) -> bool {
is_chinese_punctuation(c) || is_english_punctuation(c)
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_is_cjk() {
assert!(is_cjk('你'));
assert!(is_cjk('〇'));
assert!(is_cjk('䀹'));
assert!(is_cjk('鿃'));
assert!(is_cjk('\u{9FEB}'));
assert!(is_cjk('﨧'));
assert!(is_cjk('𱞈'));
assert!(!is_cjk('a'));
assert!(!is_cjk('🍌'));
}
}