pulldown-cmark 0.5.3

A pull parser for CommonMark
Documentation
// Copyright 2015 Google Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

//! CommonMark punctuation set based on spec and Unicode properties.

// Autogenerated by mk_puncttable.py

const PUNCT_MASKS_ASCII: [u16; 8] = [
    0x0000, // U+0000...U+000F
    0x0000, // U+0010...U+001F
    0xfffe, // U+0020...U+002F
    0xfc00, // U+0030...U+003F
    0x0001, // U+0040...U+004F
    0xf800, // U+0050...U+005F
    0x0001, // U+0060...U+006F
    0x7800, // U+0070...U+007F
];

const PUNCT_TAB: [u16; 132] = [
    10,   // U+00A0...U+00AF
    11,   // U+00B0...U+00BF
    55,   // U+0370...U+037F
    56,   // U+0380...U+038F
    85,   // U+0550...U+055F
    88,   // U+0580...U+058F
    91,   // U+05B0...U+05BF
    92,   // U+05C0...U+05CF
    95,   // U+05F0...U+05FF
    96,   // U+0600...U+060F
    97,   // U+0610...U+061F
    102,  // U+0660...U+066F
    109,  // U+06D0...U+06DF
    112,  // U+0700...U+070F
    127,  // U+07F0...U+07FF
    131,  // U+0830...U+083F
    133,  // U+0850...U+085F
    150,  // U+0960...U+096F
    151,  // U+0970...U+097F
    175,  // U+0AF0...U+0AFF
    223,  // U+0DF0...U+0DFF
    228,  // U+0E40...U+0E4F
    229,  // U+0E50...U+0E5F
    240,  // U+0F00...U+0F0F
    241,  // U+0F10...U+0F1F
    243,  // U+0F30...U+0F3F
    248,  // U+0F80...U+0F8F
    253,  // U+0FD0...U+0FDF
    260,  // U+1040...U+104F
    271,  // U+10F0...U+10FF
    310,  // U+1360...U+136F
    320,  // U+1400...U+140F
    358,  // U+1660...U+166F
    361,  // U+1690...U+169F
    366,  // U+16E0...U+16EF
    371,  // U+1730...U+173F
    381,  // U+17D0...U+17DF
    384,  // U+1800...U+180F
    404,  // U+1940...U+194F
    417,  // U+1A10...U+1A1F
    426,  // U+1AA0...U+1AAF
    437,  // U+1B50...U+1B5F
    438,  // U+1B60...U+1B6F
    447,  // U+1BF0...U+1BFF
    451,  // U+1C30...U+1C3F
    455,  // U+1C70...U+1C7F
    460,  // U+1CC0...U+1CCF
    461,  // U+1CD0...U+1CDF
    513,  // U+2010...U+201F
    514,  // U+2020...U+202F
    515,  // U+2030...U+203F
    516,  // U+2040...U+204F
    517,  // U+2050...U+205F
    519,  // U+2070...U+207F
    520,  // U+2080...U+208F
    560,  // U+2300...U+230F
    562,  // U+2320...U+232F
    630,  // U+2760...U+276F
    631,  // U+2770...U+277F
    636,  // U+27C0...U+27CF
    638,  // U+27E0...U+27EF
    664,  // U+2980...U+298F
    665,  // U+2990...U+299F
    669,  // U+29D0...U+29DF
    671,  // U+29F0...U+29FF
    719,  // U+2CF0...U+2CFF
    727,  // U+2D70...U+2D7F
    736,  // U+2E00...U+2E0F
    737,  // U+2E10...U+2E1F
    738,  // U+2E20...U+2E2F
    739,  // U+2E30...U+2E3F
    740,  // U+2E40...U+2E4F
    768,  // U+3000...U+300F
    769,  // U+3010...U+301F
    771,  // U+3030...U+303F
    778,  // U+30A0...U+30AF
    783,  // U+30F0...U+30FF
    2639, // U+A4F0...U+A4FF
    2656, // U+A600...U+A60F
    2663, // U+A670...U+A67F
    2671, // U+A6F0...U+A6FF
    2695, // U+A870...U+A87F
    2700, // U+A8C0...U+A8CF
    2703, // U+A8F0...U+A8FF
    2706, // U+A920...U+A92F
    2709, // U+A950...U+A95F
    2716, // U+A9C0...U+A9CF
    2717, // U+A9D0...U+A9DF
    2725, // U+AA50...U+AA5F
    2733, // U+AAD0...U+AADF
    2735, // U+AAF0...U+AAFF
    2750, // U+ABE0...U+ABEF
    4051, // U+FD30...U+FD3F
    4065, // U+FE10...U+FE1F
    4067, // U+FE30...U+FE3F
    4068, // U+FE40...U+FE4F
    4069, // U+FE50...U+FE5F
    4070, // U+FE60...U+FE6F
    4080, // U+FF00...U+FF0F
    4081, // U+FF10...U+FF1F
    4082, // U+FF20...U+FF2F
    4083, // U+FF30...U+FF3F
    4085, // U+FF50...U+FF5F
    4086, // U+FF60...U+FF6F
    4112, // U+10100...U+1010F
    4153, // U+10390...U+1039F
    4157, // U+103D0...U+103DF
    4182, // U+10560...U+1056F
    4229, // U+10850...U+1085F
    4241, // U+10910...U+1091F
    4243, // U+10930...U+1093F
    4261, // U+10A50...U+10A5F
    4263, // U+10A70...U+10A7F
    4271, // U+10AF0...U+10AFF
    4275, // U+10B30...U+10B3F
    4281, // U+10B90...U+10B9F
    4356, // U+11040...U+1104F
    4363, // U+110B0...U+110BF
    4364, // U+110C0...U+110CF
    4372, // U+11140...U+1114F
    4375, // U+11170...U+1117F
    4380, // U+111C0...U+111CF
    4387, // U+11230...U+1123F
    4428, // U+114C0...U+114CF
    4444, // U+115C0...U+115CF
    4452, // U+11640...U+1164F
    4679, // U+12470...U+1247F
    5798, // U+16A60...U+16A6F
    5807, // U+16AF0...U+16AFF
    5811, // U+16B30...U+16B3F
    5812, // U+16B40...U+16B4F
    7113, // U+1BC90...U+1BC9F
];

const PUNCT_MASKS: [u16; 132] = [
    0x0882, // U+00A0...U+00AF
    0x88c0, // U+00B0...U+00BF
    0x4000, // U+0370...U+037F
    0x0080, // U+0380...U+038F
    0xfc00, // U+0550...U+055F
    0x0600, // U+0580...U+058F
    0x4000, // U+05B0...U+05BF
    0x0049, // U+05C0...U+05CF
    0x0018, // U+05F0...U+05FF
    0x3600, // U+0600...U+060F
    0xc800, // U+0610...U+061F
    0x3c00, // U+0660...U+066F
    0x0010, // U+06D0...U+06DF
    0x3fff, // U+0700...U+070F
    0x0380, // U+07F0...U+07FF
    0x7fff, // U+0830...U+083F
    0x4000, // U+0850...U+085F
    0x0030, // U+0960...U+096F
    0x0001, // U+0970...U+097F
    0x0001, // U+0AF0...U+0AFF
    0x0010, // U+0DF0...U+0DFF
    0x8000, // U+0E40...U+0E4F
    0x0c00, // U+0E50...U+0E5F
    0xfff0, // U+0F00...U+0F0F
    0x0017, // U+0F10...U+0F1F
    0x3c00, // U+0F30...U+0F3F
    0x0020, // U+0F80...U+0F8F
    0x061f, // U+0FD0...U+0FDF
    0xfc00, // U+1040...U+104F
    0x0800, // U+10F0...U+10FF
    0x01ff, // U+1360...U+136F
    0x0001, // U+1400...U+140F
    0x6000, // U+1660...U+166F
    0x1800, // U+1690...U+169F
    0x3800, // U+16E0...U+16EF
    0x0060, // U+1730...U+173F
    0x0770, // U+17D0...U+17DF
    0x07ff, // U+1800...U+180F
    0x0030, // U+1940...U+194F
    0xc000, // U+1A10...U+1A1F
    0x3f7f, // U+1AA0...U+1AAF
    0xfc00, // U+1B50...U+1B5F
    0x0001, // U+1B60...U+1B6F
    0xf000, // U+1BF0...U+1BFF
    0xf800, // U+1C30...U+1C3F
    0xc000, // U+1C70...U+1C7F
    0x00ff, // U+1CC0...U+1CCF
    0x0008, // U+1CD0...U+1CDF
    0xffff, // U+2010...U+201F
    0x00ff, // U+2020...U+202F
    0xffff, // U+2030...U+203F
    0xffef, // U+2040...U+204F
    0x7ffb, // U+2050...U+205F
    0x6000, // U+2070...U+207F
    0x6000, // U+2080...U+208F
    0x0f00, // U+2300...U+230F
    0x0600, // U+2320...U+232F
    0xff00, // U+2760...U+276F
    0x003f, // U+2770...U+277F
    0x0060, // U+27C0...U+27CF
    0xffc0, // U+27E0...U+27EF
    0xfff8, // U+2980...U+298F
    0x01ff, // U+2990...U+299F
    0x0f00, // U+29D0...U+29DF
    0x3000, // U+29F0...U+29FF
    0xde00, // U+2CF0...U+2CFF
    0x0001, // U+2D70...U+2D7F
    0xffff, // U+2E00...U+2E0F
    0xffff, // U+2E10...U+2E1F
    0x7fff, // U+2E20...U+2E2F
    0xffff, // U+2E30...U+2E3F
    0x0007, // U+2E40...U+2E4F
    0xff0e, // U+3000...U+300F
    0xfff3, // U+3010...U+301F
    0x2001, // U+3030...U+303F
    0x0001, // U+30A0...U+30AF
    0x0800, // U+30F0...U+30FF
    0xc000, // U+A4F0...U+A4FF
    0xe000, // U+A600...U+A60F
    0x4008, // U+A670...U+A67F
    0x00fc, // U+A6F0...U+A6FF
    0x00f0, // U+A870...U+A87F
    0xc000, // U+A8C0...U+A8CF
    0x0700, // U+A8F0...U+A8FF
    0xc000, // U+A920...U+A92F
    0x8000, // U+A950...U+A95F
    0x3ffe, // U+A9C0...U+A9CF
    0xc000, // U+A9D0...U+A9DF
    0xf000, // U+AA50...U+AA5F
    0xc000, // U+AAD0...U+AADF
    0x0003, // U+AAF0...U+AAFF
    0x0800, // U+ABE0...U+ABEF
    0xc000, // U+FD30...U+FD3F
    0x03ff, // U+FE10...U+FE1F
    0xffff, // U+FE30...U+FE3F
    0xffff, // U+FE40...U+FE4F
    0xfff7, // U+FE50...U+FE5F
    0x0d0b, // U+FE60...U+FE6F
    0xf7ee, // U+FF00...U+FF0F
    0x8c00, // U+FF10...U+FF1F
    0x0001, // U+FF20...U+FF2F
    0xb800, // U+FF30...U+FF3F
    0xa800, // U+FF50...U+FF5F
    0x003f, // U+FF60...U+FF6F
    0x0007, // U+10100...U+1010F
    0x8000, // U+10390...U+1039F
    0x0001, // U+103D0...U+103DF
    0x8000, // U+10560...U+1056F
    0x0080, // U+10850...U+1085F
    0x8000, // U+10910...U+1091F
    0x8000, // U+10930...U+1093F
    0x01ff, // U+10A50...U+10A5F
    0x8000, // U+10A70...U+10A7F
    0x007f, // U+10AF0...U+10AFF
    0xfe00, // U+10B30...U+10B3F
    0x1e00, // U+10B90...U+10B9F
    0x3f80, // U+11040...U+1104F
    0xd800, // U+110B0...U+110BF
    0x0003, // U+110C0...U+110CF
    0x000f, // U+11140...U+1114F
    0x0030, // U+11170...U+1117F
    0x21e0, // U+111C0...U+111CF
    0x3f00, // U+11230...U+1123F
    0x0040, // U+114C0...U+114CF
    0x03fe, // U+115C0...U+115CF
    0x000e, // U+11640...U+1164F
    0x001f, // U+12470...U+1247F
    0xc000, // U+16A60...U+16A6F
    0x0020, // U+16AF0...U+16AFF
    0x0f80, // U+16B30...U+16B3F
    0x0010, // U+16B40...U+16B4F
    0x8000, // U+1BC90...U+1BC9F
];

pub fn is_ascii_punctuation(c: u8) -> bool {
    c < 128 && (PUNCT_MASKS_ASCII[(c / 16) as usize] & (1 << (c & 15))) != 0
}

pub fn is_punctuation(c: char) -> bool {
    let cp = c as u32;
    if cp < 128 {
        return is_ascii_punctuation(cp as u8);
    }
    if cp > 0x1BC9F {
        return false;
    }
    let high = (cp / 16) as u16;
    match PUNCT_TAB.binary_search(&high) {
        Ok(index) => (PUNCT_MASKS[index] & (1 << (cp & 15))) != 0,
        _ => false,
    }
}

#[cfg(test)]
mod tests {
    use super::{is_ascii_punctuation, is_punctuation};

    #[test]
    fn test_ascii() {
        assert!(is_ascii_punctuation(b'!'));
        assert!(is_ascii_punctuation(b'@'));
        assert!(is_ascii_punctuation(b'~'));
        assert!(!is_ascii_punctuation(b' '));
        assert!(!is_ascii_punctuation(b'0'));
        assert!(!is_ascii_punctuation(b'A'));
        assert!(!is_ascii_punctuation(0xA1));
    }

    #[test]
    fn test_unicode() {
        assert!(is_punctuation('~'));
        assert!(!is_punctuation(' '));

        assert!(is_punctuation('\u{00A1}'));
        assert!(is_punctuation('\u{060C}'));
        assert!(is_punctuation('\u{FF65}'));
        assert!(is_punctuation('\u{1BC9F}'));
        assert!(!is_punctuation('\u{1BCA0}'));
    }
}