pulldown-cmark 0.0.8

A pull parser for CommonMark
Documentation
// Copyright 2015 Google Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

//! CommonMark punctuation set based on spec and Unicode properties.

// Autogenerated by mk_puncttable.py

const PUNCT_MASKS_ASCII: [u16; 8] = [
        0x0000,  // U+0000...U+000F
        0x0000,  // U+0010...U+001F
        0xfffe,  // U+0020...U+002F
        0xfc00,  // U+0030...U+003F
        0x0001,  // U+0040...U+004F
        0xf800,  // U+0050...U+005F
        0x0001,  // U+0060...U+006F
        0x7800,  // U+0070...U+007F
    ];

const PUNCT_TAB: [u16; 132] = [
        10,  // U+00A0...U+00AF
        11,  // U+00B0...U+00BF
        55,  // U+0370...U+037F
        56,  // U+0380...U+038F
        85,  // U+0550...U+055F
        88,  // U+0580...U+058F
        91,  // U+05B0...U+05BF
        92,  // U+05C0...U+05CF
        95,  // U+05F0...U+05FF
        96,  // U+0600...U+060F
        97,  // U+0610...U+061F
        102,  // U+0660...U+066F
        109,  // U+06D0...U+06DF
        112,  // U+0700...U+070F
        127,  // U+07F0...U+07FF
        131,  // U+0830...U+083F
        133,  // U+0850...U+085F
        150,  // U+0960...U+096F
        151,  // U+0970...U+097F
        175,  // U+0AF0...U+0AFF
        223,  // U+0DF0...U+0DFF
        228,  // U+0E40...U+0E4F
        229,  // U+0E50...U+0E5F
        240,  // U+0F00...U+0F0F
        241,  // U+0F10...U+0F1F
        243,  // U+0F30...U+0F3F
        248,  // U+0F80...U+0F8F
        253,  // U+0FD0...U+0FDF
        260,  // U+1040...U+104F
        271,  // U+10F0...U+10FF
        310,  // U+1360...U+136F
        320,  // U+1400...U+140F
        358,  // U+1660...U+166F
        361,  // U+1690...U+169F
        366,  // U+16E0...U+16EF
        371,  // U+1730...U+173F
        381,  // U+17D0...U+17DF
        384,  // U+1800...U+180F
        404,  // U+1940...U+194F
        417,  // U+1A10...U+1A1F
        426,  // U+1AA0...U+1AAF
        437,  // U+1B50...U+1B5F
        438,  // U+1B60...U+1B6F
        447,  // U+1BF0...U+1BFF
        451,  // U+1C30...U+1C3F
        455,  // U+1C70...U+1C7F
        460,  // U+1CC0...U+1CCF
        461,  // U+1CD0...U+1CDF
        513,  // U+2010...U+201F
        514,  // U+2020...U+202F
        515,  // U+2030...U+203F
        516,  // U+2040...U+204F
        517,  // U+2050...U+205F
        519,  // U+2070...U+207F
        520,  // U+2080...U+208F
        560,  // U+2300...U+230F
        562,  // U+2320...U+232F
        630,  // U+2760...U+276F
        631,  // U+2770...U+277F
        636,  // U+27C0...U+27CF
        638,  // U+27E0...U+27EF
        664,  // U+2980...U+298F
        665,  // U+2990...U+299F
        669,  // U+29D0...U+29DF
        671,  // U+29F0...U+29FF
        719,  // U+2CF0...U+2CFF
        727,  // U+2D70...U+2D7F
        736,  // U+2E00...U+2E0F
        737,  // U+2E10...U+2E1F
        738,  // U+2E20...U+2E2F
        739,  // U+2E30...U+2E3F
        740,  // U+2E40...U+2E4F
        768,  // U+3000...U+300F
        769,  // U+3010...U+301F
        771,  // U+3030...U+303F
        778,  // U+30A0...U+30AF
        783,  // U+30F0...U+30FF
        2639,  // U+A4F0...U+A4FF
        2656,  // U+A600...U+A60F
        2663,  // U+A670...U+A67F
        2671,  // U+A6F0...U+A6FF
        2695,  // U+A870...U+A87F
        2700,  // U+A8C0...U+A8CF
        2703,  // U+A8F0...U+A8FF
        2706,  // U+A920...U+A92F
        2709,  // U+A950...U+A95F
        2716,  // U+A9C0...U+A9CF
        2717,  // U+A9D0...U+A9DF
        2725,  // U+AA50...U+AA5F
        2733,  // U+AAD0...U+AADF
        2735,  // U+AAF0...U+AAFF
        2750,  // U+ABE0...U+ABEF
        4051,  // U+FD30...U+FD3F
        4065,  // U+FE10...U+FE1F
        4067,  // U+FE30...U+FE3F
        4068,  // U+FE40...U+FE4F
        4069,  // U+FE50...U+FE5F
        4070,  // U+FE60...U+FE6F
        4080,  // U+FF00...U+FF0F
        4081,  // U+FF10...U+FF1F
        4082,  // U+FF20...U+FF2F
        4083,  // U+FF30...U+FF3F
        4085,  // U+FF50...U+FF5F
        4086,  // U+FF60...U+FF6F
        4112,  // U+10100...U+1010F
        4153,  // U+10390...U+1039F
        4157,  // U+103D0...U+103DF
        4182,  // U+10560...U+1056F
        4229,  // U+10850...U+1085F
        4241,  // U+10910...U+1091F
        4243,  // U+10930...U+1093F
        4261,  // U+10A50...U+10A5F
        4263,  // U+10A70...U+10A7F
        4271,  // U+10AF0...U+10AFF
        4275,  // U+10B30...U+10B3F
        4281,  // U+10B90...U+10B9F
        4356,  // U+11040...U+1104F
        4363,  // U+110B0...U+110BF
        4364,  // U+110C0...U+110CF
        4372,  // U+11140...U+1114F
        4375,  // U+11170...U+1117F
        4380,  // U+111C0...U+111CF
        4387,  // U+11230...U+1123F
        4428,  // U+114C0...U+114CF
        4444,  // U+115C0...U+115CF
        4452,  // U+11640...U+1164F
        4679,  // U+12470...U+1247F
        5798,  // U+16A60...U+16A6F
        5807,  // U+16AF0...U+16AFF
        5811,  // U+16B30...U+16B3F
        5812,  // U+16B40...U+16B4F
        7113,  // U+1BC90...U+1BC9F
    ];

const PUNCT_MASKS: [u16; 132] = [
        0x0882,  // U+00A0...U+00AF
        0x88c0,  // U+00B0...U+00BF
        0x4000,  // U+0370...U+037F
        0x0080,  // U+0380...U+038F
        0xfc00,  // U+0550...U+055F
        0x0600,  // U+0580...U+058F
        0x4000,  // U+05B0...U+05BF
        0x0049,  // U+05C0...U+05CF
        0x0018,  // U+05F0...U+05FF
        0x3600,  // U+0600...U+060F
        0xc800,  // U+0610...U+061F
        0x3c00,  // U+0660...U+066F
        0x0010,  // U+06D0...U+06DF
        0x3fff,  // U+0700...U+070F
        0x0380,  // U+07F0...U+07FF
        0x7fff,  // U+0830...U+083F
        0x4000,  // U+0850...U+085F
        0x0030,  // U+0960...U+096F
        0x0001,  // U+0970...U+097F
        0x0001,  // U+0AF0...U+0AFF
        0x0010,  // U+0DF0...U+0DFF
        0x8000,  // U+0E40...U+0E4F
        0x0c00,  // U+0E50...U+0E5F
        0xfff0,  // U+0F00...U+0F0F
        0x0017,  // U+0F10...U+0F1F
        0x3c00,  // U+0F30...U+0F3F
        0x0020,  // U+0F80...U+0F8F
        0x061f,  // U+0FD0...U+0FDF
        0xfc00,  // U+1040...U+104F
        0x0800,  // U+10F0...U+10FF
        0x01ff,  // U+1360...U+136F
        0x0001,  // U+1400...U+140F
        0x6000,  // U+1660...U+166F
        0x1800,  // U+1690...U+169F
        0x3800,  // U+16E0...U+16EF
        0x0060,  // U+1730...U+173F
        0x0770,  // U+17D0...U+17DF
        0x07ff,  // U+1800...U+180F
        0x0030,  // U+1940...U+194F
        0xc000,  // U+1A10...U+1A1F
        0x3f7f,  // U+1AA0...U+1AAF
        0xfc00,  // U+1B50...U+1B5F
        0x0001,  // U+1B60...U+1B6F
        0xf000,  // U+1BF0...U+1BFF
        0xf800,  // U+1C30...U+1C3F
        0xc000,  // U+1C70...U+1C7F
        0x00ff,  // U+1CC0...U+1CCF
        0x0008,  // U+1CD0...U+1CDF
        0xffff,  // U+2010...U+201F
        0x00ff,  // U+2020...U+202F
        0xffff,  // U+2030...U+203F
        0xffef,  // U+2040...U+204F
        0x7ffb,  // U+2050...U+205F
        0x6000,  // U+2070...U+207F
        0x6000,  // U+2080...U+208F
        0x0f00,  // U+2300...U+230F
        0x0600,  // U+2320...U+232F
        0xff00,  // U+2760...U+276F
        0x003f,  // U+2770...U+277F
        0x0060,  // U+27C0...U+27CF
        0xffc0,  // U+27E0...U+27EF
        0xfff8,  // U+2980...U+298F
        0x01ff,  // U+2990...U+299F
        0x0f00,  // U+29D0...U+29DF
        0x3000,  // U+29F0...U+29FF
        0xde00,  // U+2CF0...U+2CFF
        0x0001,  // U+2D70...U+2D7F
        0xffff,  // U+2E00...U+2E0F
        0xffff,  // U+2E10...U+2E1F
        0x7fff,  // U+2E20...U+2E2F
        0xffff,  // U+2E30...U+2E3F
        0x0007,  // U+2E40...U+2E4F
        0xff0e,  // U+3000...U+300F
        0xfff3,  // U+3010...U+301F
        0x2001,  // U+3030...U+303F
        0x0001,  // U+30A0...U+30AF
        0x0800,  // U+30F0...U+30FF
        0xc000,  // U+A4F0...U+A4FF
        0xe000,  // U+A600...U+A60F
        0x4008,  // U+A670...U+A67F
        0x00fc,  // U+A6F0...U+A6FF
        0x00f0,  // U+A870...U+A87F
        0xc000,  // U+A8C0...U+A8CF
        0x0700,  // U+A8F0...U+A8FF
        0xc000,  // U+A920...U+A92F
        0x8000,  // U+A950...U+A95F
        0x3ffe,  // U+A9C0...U+A9CF
        0xc000,  // U+A9D0...U+A9DF
        0xf000,  // U+AA50...U+AA5F
        0xc000,  // U+AAD0...U+AADF
        0x0003,  // U+AAF0...U+AAFF
        0x0800,  // U+ABE0...U+ABEF
        0xc000,  // U+FD30...U+FD3F
        0x03ff,  // U+FE10...U+FE1F
        0xffff,  // U+FE30...U+FE3F
        0xffff,  // U+FE40...U+FE4F
        0xfff7,  // U+FE50...U+FE5F
        0x0d0b,  // U+FE60...U+FE6F
        0xf7ee,  // U+FF00...U+FF0F
        0x8c00,  // U+FF10...U+FF1F
        0x0001,  // U+FF20...U+FF2F
        0xb800,  // U+FF30...U+FF3F
        0xa800,  // U+FF50...U+FF5F
        0x003f,  // U+FF60...U+FF6F
        0x0007,  // U+10100...U+1010F
        0x8000,  // U+10390...U+1039F
        0x0001,  // U+103D0...U+103DF
        0x8000,  // U+10560...U+1056F
        0x0080,  // U+10850...U+1085F
        0x8000,  // U+10910...U+1091F
        0x8000,  // U+10930...U+1093F
        0x01ff,  // U+10A50...U+10A5F
        0x8000,  // U+10A70...U+10A7F
        0x007f,  // U+10AF0...U+10AFF
        0xfe00,  // U+10B30...U+10B3F
        0x1e00,  // U+10B90...U+10B9F
        0x3f80,  // U+11040...U+1104F
        0xd800,  // U+110B0...U+110BF
        0x0003,  // U+110C0...U+110CF
        0x000f,  // U+11140...U+1114F
        0x0030,  // U+11170...U+1117F
        0x21e0,  // U+111C0...U+111CF
        0x3f00,  // U+11230...U+1123F
        0x0040,  // U+114C0...U+114CF
        0x03fe,  // U+115C0...U+115CF
        0x000e,  // U+11640...U+1164F
        0x001f,  // U+12470...U+1247F
        0xc000,  // U+16A60...U+16A6F
        0x0020,  // U+16AF0...U+16AFF
        0x0f80,  // U+16B30...U+16B3F
        0x0010,  // U+16B40...U+16B4F
        0x8000,  // U+1BC90...U+1BC9F
    ];

pub fn is_ascii_punctuation(c: u8) -> bool {
    c < 128 && (PUNCT_MASKS_ASCII[(c / 16) as usize] & (1 << (c & 15))) != 0
}

pub fn is_punctuation(c: char) -> bool {
    let cp = c as u32;
    if cp < 128 {return is_ascii_punctuation(cp as u8); }
    if cp > 0x1BC9F { return false; }
    let high = (cp / 16) as u16;
    match PUNCT_TAB.binary_search(&high) {
        Ok(index) => (PUNCT_MASKS[index] & (1 << (cp & 15))) != 0,
        _ => false
    }
}

#[cfg(test)]
mod tests {
    use super::{is_ascii_punctuation, is_punctuation};

    #[test]
    fn test_ascii() {
        assert!(is_ascii_punctuation(b'!'));
        assert!(is_ascii_punctuation(b'@'));
        assert!(is_ascii_punctuation(b'~'));
        assert!(!is_ascii_punctuation(b' '));
        assert!(!is_ascii_punctuation(b'0'));
        assert!(!is_ascii_punctuation(b'A'));
        assert!(!is_ascii_punctuation(0xA1));
    }

    #[test]
    fn test_unicode() {
        assert!(is_punctuation('~'));
        assert!(!is_punctuation(' '));

        assert!(is_punctuation('\u{00A1}'));
        assert!(is_punctuation('\u{060C}'));
        assert!(is_punctuation('\u{FF65}'));
        assert!(is_punctuation('\u{1BC9F}'));
        assert!(!is_punctuation('\u{1BCA0}'));
    }
}