const PUNCT_MASKS_ASCII: [u16; 8] = [
0x0000, 0x0000, 0xfffe, 0xfc00, 0x0001, 0xf800, 0x0001, 0x7800, ];
const PUNCT_TAB: [u16; 727] = [
10, 11, 13, 15, 44, 45, 46, 47, 55, 56, 63, 72, 85, 88, 91, 92, 95, 96, 97, 102, 109, 110, 111, 112, 127, 131, 133, 136, 150, 151, 159, 167, 175, 183, 191, 199, 200, 212, 215, 223, 227, 228, 229, 240, 241, 243, 248, 251, 252, 253, 260, 265, 271, 310, 313, 320, 358, 361, 366, 371, 381, 384, 404, 413, 414, 415, 417, 426, 437, 438, 439, 447, 451, 455, 460, 461, 507, 508, 509, 510, 511, 513, 514, 515, 516, 517, 519, 520, 522, 523, 524, 528, 529, 530, 531, 532, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 580, 585, 586, 587, 588, 589, 590, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 718, 719, 727, 736, 737, 738, 739, 740, 741, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 767, 768, 769, 770, 771, 777, 778, 783, 793, 796, 797, 798, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 1244, 1245, 1246, 1247, 2633, 2634, 2635, 2636, 2639, 2656, 2663, 2671, 2672, 2673, 2674, 2680, 2690, 2691, 2695, 2700, 2703, 2706, 2709, 2716, 2717, 2725, 2727, 2733, 2735, 2741, 2742, 2750, 4018, 4027, 4028, 4051, 4052, 4060, 4063, 4065, 4067, 4068, 4069, 4070, 4080, 4081, 4082, 4083, 4084, 4085, 4086, 4094, 4095, 4112, 4115, 4119, 4120, 4121, 4122, 4125, 4126, 4127, 4153, 4157, 4182, 4229, 4231, 4241, 4243, 4261, 4263, 4268, 4271, 4275, 4281, 4330, 4341, 4344, 4356, 4363, 4364, 4372, 4375, 4380, 4381, 4387, 4394, 4420, 4421, 4428, 4444, 4445, 4452, 4454, 4459, 4467, 4483, 4500, 4510, 4515, 4516, 4521, 4522, 4528, 4548, 4551, 4591, 4596, 4605, 4606, 4607, 4679, 4863, 5798, 5807, 5811, 5812, 5865, 5886, 7113, 7413, 7414, 7415, 7416, 7417, 7418, 7419, 7420, 7424, 7425, 7426, 7427, 7428, 7429, 7430, 7431, 7432, 7433, 7434, 7435, 7436, 7437, 7438, 7439, 7440, 7441, 7442, 7443, 7444, 7445, 7446, 7448, 7449, 7450, 7451, 7452, 7453, 7454, 7456, 7457, 7458, 7459, 7460, 7472, 7473, 7474, 7475, 7476, 7477, 7532, 7533, 7535, 7537, 7539, 7540, 7542, 7544, 7546, 7548, 7552, 7553, 7554, 7555, 7556, 7557, 7558, 7559, 7560, 7561, 7562, 7563, 7564, 7565, 7566, 7567, 7568, 7569, 7570, 7571, 7572, 7573, 7574, 7575, 7576, 7577, 7578, 7579, 7580, 7581, 7582, 7583, 7587, 7590, 7591, 7592, 7700, 7727, 7829, 7882, 7883, 7890, 7919, 7936, 7937, 7938, 7939, 7940, 7941, 7942, 7943, 7944, 7945, 7946, 7947, 7948, 7949, 7950, 7951, 7952, 7953, 7954, 7955, 7956, 7957, 7958, 7959, 7960, 7961, 7962, 7966, 7967, 7968, 7969, 7970, 7971, 7972, 7973, 7974, 7984, 7985, 7986, 7987, 7988, 7989, 7990, 7991, 7992, 7993, 7994, 7995, 7996, 7997, 7998, 7999, 8000, 8001, 8002, 8003, 8004, 8005, 8006, 8007, 8008, 8009, 8010, 8011, 8012, 8013, 8014, 8015, 8016, 8017, 8018, 8019, 8020, 8021, 8022, 8023, 8024, 8025, 8026, 8027, 8028, 8029, 8030, 8031, 8032, 8033, 8034, 8035, 8036, 8037, 8038, 8039, 8040, 8041, 8042, 8043, 8044, 8045, 8046, 8047, 8048, 8049, 8050, 8051, 8052, 8053, 8054, 8055, 8056, 8057, 8058, 8059, 8060, 8061, 8062, 8063, 8064, 8065, 8066, 8067, 8068, 8069, 8070, 8071, 8072, 8073, 8074, 8075, 8080, 8081, 8082, 8083, 8084, 8085, 8086, 8087, 8088, 8089, 8090, 8091, 8092, 8093, 8094, 8095, 8096, 8097, 8098, 8099, 8100, 8101, 8102, 8103, 8104, 8105, 8106, 8107, 8108, 8109, 8110, 8111, 8112, 8113, 8114, 8115, 8116, 8117, 8118, 8119, 8120, 8121, 8122, 8123, 8124, ];
const PUNCT_MASKS: [u16; 727] = [
0xdbfe, 0x89d3, 0x0080, 0x0080, 0x003c, 0xfffc, 0xafe0, 0xffff, 0x4020, 0x00b0, 0x0040, 0x0004, 0xfc00, 0xe600, 0x4000, 0x0049, 0x0018, 0xffc0, 0xe800, 0x3c00, 0x4010, 0x0200, 0x6000, 0x3fff, 0xc3c0, 0x7fff, 0x4000, 0x0100, 0x0030, 0x0001, 0x2c0c, 0x0040, 0x0003, 0x0001, 0x07f8, 0x8080, 0x0010, 0x8000, 0x0200, 0x0010, 0x8000, 0x8000, 0x0c00, 0xfffe, 0xfcff, 0x3d50, 0x0020, 0xc000, 0xdfbf, 0x07ff, 0xfc00, 0xc000, 0x0800, 0x01ff, 0x03ff, 0x0001, 0x6000, 0x1800, 0x3800, 0x0060, 0x0f70, 0x07ff, 0x0031, 0xc000, 0xffff, 0xffff, 0xc000, 0x3f7f, 0xfc00, 0x07ff, 0x7ff0, 0xf000, 0xf800, 0xc000, 0x00ff, 0x0008, 0xa000, 0xe003, 0xe000, 0xe000, 0x6000, 0xffff, 0x00ff, 0xffff, 0xffff, 0x7fff, 0x7c00, 0x7c00, 0xffff, 0xffff, 0x0001, 0x037b, 0xc1d0, 0x42af, 0x0c00, 0xbc1f, 0x0c00, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x007f, 0x07ff, 0xf000, 0xffff, 0xffff, 0xffff, 0xffff, 0x03ff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x003f, 0xfff0, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffcf, 0xffff, 0xffbf, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x07e0, 0xde00, 0x0001, 0xffff, 0xffff, 0x7fff, 0xffff, 0xffff, 0x3fff, 0xffff, 0xfbff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x000f, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x003f, 0xffff, 0xff1e, 0xffff, 0x0001, 0xe0c1, 0x1800, 0x0001, 0x0800, 0xffc3, 0xffff, 0xffff, 0x800f, 0xffff, 0x7fff, 0xfc00, 0xffff, 0x00ff, 0x0001, 0xffff, 0xffff, 0xfc00, 0xffff, 0xffff, 0x0001, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x007f, 0xc000, 0xe000, 0x4008, 0x00fc, 0xffff, 0x007f, 0x0003, 0x0600, 0x0f00, 0x03c0, 0x00f0, 0xc000, 0x1700, 0xc000, 0x8000, 0x3ffe, 0xc000, 0xf000, 0x0380, 0xc000, 0x0003, 0x0800, 0x0c00, 0x0800, 0x0200, 0xfffc, 0x0007, 0xc000, 0xffff, 0x8000, 0xf000, 0x03ff, 0xffff, 0xffff, 0xfff7, 0x0f7f, 0xfffe, 0xfc00, 0x0001, 0xf800, 0x0001, 0xf800, 0x003f, 0x7f7f, 0x3000, 0x0007, 0xff80, 0xfe00, 0x73ff, 0x1fff, 0x0001, 0xffff, 0xffff, 0x1fff, 0x8000, 0x0001, 0x8000, 0x0080, 0x0180, 0x8000, 0x8000, 0x01ff, 0x8000, 0x0100, 0x007f, 0xfe00, 0x1e00, 0x2000, 0x03e0, 0x03c0, 0x3f80, 0xd800, 0x0003, 0x000f, 0x0030, 0x21e0, 0xe800, 0x3f00, 0x0200, 0xf800, 0x2c00, 0x0040, 0xfffe, 0x00ff, 0x000e, 0x1fff, 0x0200, 0xf000, 0x0800, 0x0070, 0x0004, 0x8000, 0x007f, 0xdc00, 0x0007, 0x03ff, 0x003e, 0x0003, 0x0180, 0xfff8, 0xffe0, 0xffff, 0x8003, 0x001f, 0x0006, 0xc000, 0x0020, 0xff80, 0x0030, 0x0780, 0x0004, 0x9000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x000f, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x003f, 0xffff, 0xffff, 0xfe7f, 0xffff, 0xffff, 0xffff, 0x1c1f, 0xf018, 0xffff, 0xc3ff, 0xffff, 0xffff, 0xffff, 0x07ff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0023, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x007f, 0x0002, 0x0800, 0x0800, 0x0020, 0x0020, 0x8000, 0x8000, 0x0200, 0x0200, 0x0008, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0780, 0xe000, 0xffdf, 0x0fef, 0x8000, 0x8000, 0xc000, 0x1000, 0x0001, 0x4000, 0x0003, 0xffff, 0xffff, 0x0fff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x000f, 0x7fff, 0xfffe, 0xfffe, 0xfffe, 0xffff, 0x003f, 0xe000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x3fff, 0xffc0, 0xffff, 0x0007, 0xffff, 0xffff, 0x0fff, 0x01ff, 0x0003, 0x003f, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xf0ff, 0x1fff, 0x1fff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xf87f, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x03ff, 0x0fff, 0x0001, 0x0fff, 0xffff, 0xffff, 0xffff, 0x00ff, 0x03ff, 0xffff, 0xffff, 0x00ff, 0xffff, 0x3fff, 0x0003, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x000f, 0x3fff, 0x1fff, 0x01ff, 0xffff, 0xffff, 0xbfff, 0xc03f, 0x0fff, 0x01ff, 0x01ff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xfff7, 0xffff, 0xffff, 0x07ff, ];
pub(crate) fn is_ascii_punctuation(c: u8) -> bool {
c < 128 && (PUNCT_MASKS_ASCII[(c / 16) as usize] & (1 << (c & 15))) != 0
}
pub(crate) fn is_punctuation(c: char) -> bool {
let cp = c as u32;
if cp < 128 {
return is_ascii_punctuation(cp as u8);
}
if cp > 0x1FBCA {
return false;
}
let high = (cp / 16) as u16;
match PUNCT_TAB.binary_search(&high) {
Ok(index) => (PUNCT_MASKS[index] & (1 << (cp & 15))) != 0,
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::{is_ascii_punctuation, is_punctuation};
#[test]
fn test_ascii() {
assert!(is_ascii_punctuation(b'!'));
assert!(is_ascii_punctuation(b'@'));
assert!(is_ascii_punctuation(b'~'));
assert!(!is_ascii_punctuation(b' '));
assert!(!is_ascii_punctuation(b'0'));
assert!(!is_ascii_punctuation(b'A'));
assert!(!is_ascii_punctuation(0xA1));
}
#[test]
fn test_unicode() {
assert!(is_punctuation('~'));
assert!(!is_punctuation(' '));
assert!(is_punctuation('\u{00A1}'));
assert!(is_punctuation('\u{060C}'));
assert!(is_punctuation('\u{FF65}'));
assert!(is_punctuation('\u{1BC9F}'));
assert!(!is_punctuation('\u{1BCA0}'));
}
}