const PUNCT_MASKS_ASCII: [u16; 8] = [
0x0000, 0x0000, 0xfffe, 0xfc00, 0x0001, 0xf800, 0x0001, 0x7800, ];
const PUNCT_TAB: [u16; 132] = [
10, 11, 55, 56, 85, 88, 91, 92, 95, 96, 97, 102, 109, 112, 127, 131, 133, 150, 151, 175, 223, 228, 229, 240, 241, 243, 248, 253, 260, 271, 310, 320, 358, 361, 366, 371, 381, 384, 404, 417, 426, 437, 438, 447, 451, 455, 460, 461, 513, 514, 515, 516, 517, 519, 520, 560, 562, 630, 631, 636, 638, 664, 665, 669, 671, 719, 727, 736, 737, 738, 739, 740, 768, 769, 771, 778, 783, 2639, 2656, 2663, 2671, 2695, 2700, 2703, 2706, 2709, 2716, 2717, 2725, 2733, 2735, 2750, 4051, 4065, 4067, 4068, 4069, 4070, 4080, 4081, 4082, 4083, 4085, 4086, 4112, 4153, 4157, 4182, 4229, 4241, 4243, 4261, 4263, 4271, 4275, 4281, 4356, 4363, 4364, 4372, 4375, 4380, 4387, 4428, 4444, 4452, 4679, 5798, 5807, 5811, 5812, 7113, ];
const PUNCT_MASKS: [u16; 132] = [
0x0882, 0x88c0, 0x4000, 0x0080, 0xfc00, 0x0600, 0x4000, 0x0049, 0x0018, 0x3600, 0xc800, 0x3c00, 0x0010, 0x3fff, 0x0380, 0x7fff, 0x4000, 0x0030, 0x0001, 0x0001, 0x0010, 0x8000, 0x0c00, 0xfff0, 0x0017, 0x3c00, 0x0020, 0x061f, 0xfc00, 0x0800, 0x01ff, 0x0001, 0x6000, 0x1800, 0x3800, 0x0060, 0x0770, 0x07ff, 0x0030, 0xc000, 0x3f7f, 0xfc00, 0x0001, 0xf000, 0xf800, 0xc000, 0x00ff, 0x0008, 0xffff, 0x00ff, 0xffff, 0xffef, 0x7ffb, 0x6000, 0x6000, 0x0f00, 0x0600, 0xff00, 0x003f, 0x0060, 0xffc0, 0xfff8, 0x01ff, 0x0f00, 0x3000, 0xde00, 0x0001, 0xffff, 0xffff, 0x7fff, 0xffff, 0x0007, 0xff0e, 0xfff3, 0x2001, 0x0001, 0x0800, 0xc000, 0xe000, 0x4008, 0x00fc, 0x00f0, 0xc000, 0x0700, 0xc000, 0x8000, 0x3ffe, 0xc000, 0xf000, 0xc000, 0x0003, 0x0800, 0xc000, 0x03ff, 0xffff, 0xffff, 0xfff7, 0x0d0b, 0xf7ee, 0x8c00, 0x0001, 0xb800, 0xa800, 0x003f, 0x0007, 0x8000, 0x0001, 0x8000, 0x0080, 0x8000, 0x8000, 0x01ff, 0x8000, 0x007f, 0xfe00, 0x1e00, 0x3f80, 0xd800, 0x0003, 0x000f, 0x0030, 0x21e0, 0x3f00, 0x0040, 0x03fe, 0x000e, 0x001f, 0xc000, 0x0020, 0x0f80, 0x0010, 0x8000, ];
pub(crate) fn is_ascii_punctuation(c: u8) -> bool {
c < 128 && (PUNCT_MASKS_ASCII[(c / 16) as usize] & (1 << (c & 15))) != 0
}
pub(crate) fn is_punctuation(c: char) -> bool {
let cp = c as u32;
if cp < 128 {
return is_ascii_punctuation(cp as u8);
}
if cp > 0x1BC9F {
return false;
}
let high = (cp / 16) as u16;
match PUNCT_TAB.binary_search(&high) {
Ok(index) => (PUNCT_MASKS[index] & (1 << (cp & 15))) != 0,
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::{is_ascii_punctuation, is_punctuation};
#[test]
fn test_ascii() {
assert!(is_ascii_punctuation(b'!'));
assert!(is_ascii_punctuation(b'@'));
assert!(is_ascii_punctuation(b'~'));
assert!(!is_ascii_punctuation(b' '));
assert!(!is_ascii_punctuation(b'0'));
assert!(!is_ascii_punctuation(b'A'));
assert!(!is_ascii_punctuation(0xA1));
}
#[test]
fn test_unicode() {
assert!(is_punctuation('~'));
assert!(!is_punctuation(' '));
assert!(is_punctuation('\u{00A1}'));
assert!(is_punctuation('\u{060C}'));
assert!(is_punctuation('\u{FF65}'));
assert!(is_punctuation('\u{1BC9F}'));
assert!(!is_punctuation('\u{1BCA0}'));
}
}