use unic_char_property::TotalCharProperty;
char_property! {
pub enum WordBreak {
abbr => "WB";
long => "Word_Break";
human => "Word Break";
CR {
abbr => CR,
long => CR,
human => "Carriage Return",
}
LF {
abbr => LF,
long => LF,
human => "Line Feed",
}
Newline {
abbr => NL,
long => Newline,
human => "Newline",
}
Extend {
abbr => Extend,
long => Extend,
human => "Extend",
}
ZWJ {
abbr => ZWJ,
long => ZWJ,
human => "Zero Width Joiner (ZWJ)",
}
RegionalIndicator {
abbr => RI,
long => Regional_Indicator,
human => "Regional Indicator",
}
Format {
abbr => FO,
long => Format,
human => "Format",
}
Katakana {
abbr => KA,
long => Katakana,
human => "Katakana",
}
HebrewLetter {
abbr => HL,
long => Hebrew_Letter,
human => "Hebrew Letter",
}
ALetter {
abbr => LE,
long => ALetter,
human => "Alphabetic Letter",
}
SingleQuote {
abbr => SQ,
long => Single_Quote,
human => "Single Quote",
}
DoubleQuote {
abbr => DQ,
long => Double_Quote,
human => "Double Quote",
}
MidNumLet {
abbr => MB,
long => MidNumLet,
human => "Middle of Numeric/Letter",
}
MidLetter {
abbr => ML,
long => MidLetter,
human => "Middle of Letter",
}
MidNum {
abbr => MN,
long => MidNum,
human => "Middle of Numeric",
}
Numeric {
abbr => NU,
long => Numeric,
human => "Numeric",
}
ExtendNumLet {
abbr => EX,
long => ExtendNumLet,
human => "Extend Numeric/Letter",
}
EBase {
abbr => EB,
long => E_Base,
human => "Emoji Base",
}
EModifier {
abbr => EM,
long => E_Modifier,
human => "Emoji Modifier",
}
GlueAfterZwj {
abbr => GAZ,
long => Glue_After_Zwj,
human => "Glue After ZWJ",
}
EBaseGAZ {
abbr => EBG,
long => E_Base_GAZ,
human => "Emoji Base and Glue After ZWJ",
}
Other {
abbr => XX,
long => Other,
human => "Other",
}
}
pub mod abbr_names for abbr;
pub mod long_names for long;
}
impl TotalCharProperty for WordBreak {
fn of(ch: char) -> Self {
Self::of(ch)
}
}
impl Default for WordBreak {
fn default() -> Self {
WordBreak::Other
}
}
mod data {
use super::long_names as WB;
use unic_char_property::tables::CharDataTable;
pub const WORD_BREAK_TABLE: CharDataTable<super::WordBreak> =
include!("../tables/word_break.rsv");
}
impl WordBreak {
pub fn of(ch: char) -> WordBreak {
data::WORD_BREAK_TABLE.find_or_default(ch)
}
}
#[cfg(test)]
mod tests {
use super::WordBreak as WB;
use unic_char_property::EnumeratedCharProperty;
#[test]
fn test_ascii() {
assert_eq!(WB::of('\u{0000}'), WB::Other);
assert_eq!(WB::of('\u{0040}'), WB::Other);
assert_eq!(WB::of('\u{0041}'), WB::ALetter);
assert_eq!(WB::of('\u{0062}'), WB::ALetter);
assert_eq!(WB::of('\u{007F}'), WB::Other);
}
#[test]
fn test_bmp() {
assert_eq!(WB::of('\u{0590}'), WB::Other);
assert_eq!(WB::of('\u{05D0}'), WB::HebrewLetter);
assert_eq!(WB::of('\u{05D1}'), WB::HebrewLetter);
assert_eq!(WB::of('\u{05FF}'), WB::Other);
assert_eq!(WB::of('\u{0600}'), WB::Format);
assert_eq!(WB::of('\u{0627}'), WB::ALetter);
assert_eq!(WB::of('\u{07BF}'), WB::Other);
assert_eq!(WB::of('\u{07C0}'), WB::Numeric);
assert_eq!(WB::of('\u{085F}'), WB::Other);
assert_eq!(WB::of('\u{0860}'), WB::ALetter);
assert_eq!(WB::of('\u{0870}'), WB::Other);
assert_eq!(WB::of('\u{089F}'), WB::Other);
assert_eq!(WB::of('\u{08A0}'), WB::ALetter);
assert_eq!(WB::of('\u{089F}'), WB::Other);
assert_eq!(WB::of('\u{08FF}'), WB::Extend);
assert_eq!(WB::of('\u{20A0}'), WB::Other);
assert_eq!(WB::of('\u{20CF}'), WB::Other);
assert_eq!(WB::of('\u{FB1D}'), WB::HebrewLetter);
assert_eq!(WB::of('\u{FB4F}'), WB::HebrewLetter);
assert_eq!(WB::of('\u{FB50}'), WB::ALetter);
assert_eq!(WB::of('\u{FDCF}'), WB::Other);
assert_eq!(WB::of('\u{FDF0}'), WB::ALetter);
assert_eq!(WB::of('\u{FDFF}'), WB::Other);
assert_eq!(WB::of('\u{FE70}'), WB::ALetter);
assert_eq!(WB::of('\u{FEFE}'), WB::Other);
assert_eq!(WB::of('\u{FEFF}'), WB::Format);
assert_eq!(WB::of('\u{FDD0}'), WB::Other);
assert_eq!(WB::of('\u{FDD1}'), WB::Other);
assert_eq!(WB::of('\u{FDEE}'), WB::Other);
assert_eq!(WB::of('\u{FDEF}'), WB::Other);
assert_eq!(WB::of('\u{FFFE}'), WB::Other);
assert_eq!(WB::of('\u{FFFF}'), WB::Other);
}
#[test]
fn test_smp() {
assert_eq!(WB::of('\u{10800}'), WB::ALetter);
assert_eq!(WB::of('\u{10FFF}'), WB::Other);
assert_eq!(WB::of('\u{1E800}'), WB::ALetter);
assert_eq!(WB::of('\u{1EDFF}'), WB::Other);
assert_eq!(WB::of('\u{1EE00}'), WB::ALetter);
assert_eq!(WB::of('\u{1EEFF}'), WB::Other);
assert_eq!(WB::of('\u{1EF00}'), WB::Other);
assert_eq!(WB::of('\u{1EFFF}'), WB::Other);
}
#[test]
fn test_unassigned_planes() {
assert_eq!(WB::of('\u{30000}'), WB::Other);
assert_eq!(WB::of('\u{40000}'), WB::Other);
assert_eq!(WB::of('\u{50000}'), WB::Other);
assert_eq!(WB::of('\u{60000}'), WB::Other);
assert_eq!(WB::of('\u{70000}'), WB::Other);
assert_eq!(WB::of('\u{80000}'), WB::Other);
assert_eq!(WB::of('\u{90000}'), WB::Other);
assert_eq!(WB::of('\u{a0000}'), WB::Other);
}
#[test]
fn test_abbr_name() {
assert_eq!(WB::CR.abbr_name(), "CR");
}
#[test]
fn test_long_name() {
assert_eq!(WB::CR.long_name(), "CR");
}
#[test]
fn test_human_name() {
assert_eq!(WB::CR.human_name(), "Carriage Return");
}
}