pub trait GetCharacterSet {
fn get_set(&self) -> &[char];
fn size(&self) -> usize {
self.get_set().len()
}
fn get_character(&self, index: u32) -> Option<&char> {
let index = index as usize;
if index == 0 {
None
} else if index > self.size() {
panic!("Too large number for given unicode set - cannot encode this amount of bits");
} else {
self.get_set().get(index - 1)
}
}
fn character_to_bits(&self, chr: &char) -> u32 {
if let Some(pos) = self.get_set().iter().position(|x| x == chr) {
(pos + 1) as u32
} else {
0
}
}
}
pub const FULL_UNICODE_CHARACTER_SET: [char; 31] = [
'\u{0020}', '\u{2000}', '\u{2001}', '\u{2002}', '\u{2003}', '\u{2004}', '\u{2005}', '\u{2006}',
'\u{2007}', '\u{2009}', '\u{200A}', '\u{200B}', '\u{200C}', '\u{200D}', '\u{200E}', '\u{2028}',
'\u{202A}', '\u{202C}', '\u{202D}', '\u{202F}', '\u{205F}', '\u{2060}', '\u{2061}', '\u{2062}',
'\u{2063}', '\u{2064}', '\u{2066}', '\u{2068}', '\u{2069}', '\u{3000}', '\u{FEFF}',
];
pub const TWITTER_UNICODE_CHARACTER_SET: [char; 15] = [
'\u{0020}', '\u{2000}', '\u{2001}', '\u{2002}', '\u{2003}', '\u{2004}', '\u{2005}', '\u{2006}',
'\u{2007}', '\u{2009}', '\u{200A}', '\u{200B}', '\u{200C}', '\u{200D}', '\u{FEFF}',
];
pub const FOUR_BIT_CHARACTER_SET: [char; 15] = [
'\u{0020}', '\u{2000}', '\u{2001}', '\u{2002}', '\u{2003}', '\u{2004}', '\u{2005}', '\u{2006}',
'\u{2007}', '\u{2009}', '\u{200A}', '\u{200B}', '\u{200C}', '\u{200D}', '\u{200E}',
];
pub const THREE_BIT_CHARACTER_SET: [char; 7] = [
'\u{0020}', '\u{2000}', '\u{2001}', '\u{2002}', '\u{2003}', '\u{2004}', '\u{2005}',
];
pub const TWO_BIT_CHARACTER_SET: [char; 3] = ['\u{0020}', '\u{2000}', '\u{2001}'];
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum CharacterSetType {
FullUnicodeSet,
FourBitUnicodeSet,
ThreeBitUnicodeSet,
TwoBitUnicodeSet,
TwitterUnicodeSet,
}
impl GetCharacterSet for CharacterSetType {
fn get_set(&self) -> &[char] {
match *self {
CharacterSetType::FullUnicodeSet => &FULL_UNICODE_CHARACTER_SET,
CharacterSetType::FourBitUnicodeSet => &FOUR_BIT_CHARACTER_SET,
CharacterSetType::ThreeBitUnicodeSet => &THREE_BIT_CHARACTER_SET,
CharacterSetType::TwoBitUnicodeSet => &TWO_BIT_CHARACTER_SET,
CharacterSetType::TwitterUnicodeSet => &TWITTER_UNICODE_CHARACTER_SET,
}
}
}