use std::fmt;
use std::ops::{
BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not, Sub, SubAssign,
};
use crate::unicode::{self, has_variation_sequence};
const WORD_BITS: usize = u64::BITS as usize;
const CHARSET_WORDS: usize = unicode::VARIATION_ENTRY_COUNT.div_ceil(WORD_BITS);
const ALL_CHARS: CharSet = CharSet { bits: all_bits() };
pub const ASCII: VariationSet = VariationSet {
chars: CharSet {
bits: named_bits(NamedSet::Ascii),
},
keycap_chars: CharSet::none(),
};
pub const TEXT_DEFAULTS: VariationSet = VariationSet {
chars: CharSet {
bits: named_bits(NamedSet::TextDefaults),
},
keycap_chars: CharSet::none(),
};
pub const EMOJI_DEFAULTS: VariationSet = VariationSet {
chars: CharSet {
bits: named_bits(NamedSet::EmojiDefaults),
},
keycap_chars: CharSet::none(),
};
pub const RIGHTS_MARKS: VariationSet = VariationSet {
chars: CharSet {
bits: named_bits(NamedSet::RightsMarks),
},
keycap_chars: CharSet::none(),
};
pub const ARROWS: VariationSet = VariationSet {
chars: CharSet {
bits: named_bits(NamedSet::Arrows),
},
keycap_chars: CharSet::none(),
};
pub const CARD_SUITS: VariationSet = VariationSet {
chars: CharSet {
bits: named_bits(NamedSet::CardSuits),
},
keycap_chars: CharSet::none(),
};
pub const NON_KEYCAP_CHARS: VariationSet = VariationSet {
chars: ALL_CHARS,
keycap_chars: CharSet::none(),
};
pub const KEYCAP_CHARS: VariationSet = VariationSet {
chars: CharSet::none(),
keycap_chars: ALL_CHARS,
};
pub const KEYCAP_EMOJIS: VariationSet = VariationSet {
chars: CharSet::none(),
keycap_chars: CharSet {
bits: named_bits(NamedSet::KeycapEmojis),
},
};
#[derive(Clone, Copy)]
enum NamedSet {
Ascii,
TextDefaults,
EmojiDefaults,
RightsMarks,
Arrows,
CardSuits,
KeycapEmojis,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct VariationSet {
chars: CharSet,
keycap_chars: CharSet,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct CharSet {
bits: [u64; CHARSET_WORDS],
}
#[must_use]
pub fn is_variation_sequence_character(ch: char) -> bool {
has_variation_sequence(ch)
}
impl VariationSet {
#[must_use]
pub const fn all() -> Self {
Self {
chars: ALL_CHARS,
keycap_chars: ALL_CHARS,
}
}
#[must_use]
pub const fn none() -> Self {
Self {
chars: CharSet::none(),
keycap_chars: CharSet::none(),
}
}
#[must_use]
pub fn singleton(ch: char) -> Self {
Self {
chars: CharSet::singleton(ch),
keycap_chars: CharSet::none(),
}
}
#[must_use]
pub fn singleton_keycap(ch: char) -> Self {
Self {
chars: CharSet::none(),
keycap_chars: CharSet::singleton(ch),
}
}
#[must_use]
pub fn contains(&self, ch: char) -> bool {
self.chars.contains(ch)
}
#[must_use]
pub fn contains_keycap(&self, ch: char) -> bool {
self.keycap_chars.contains(ch)
}
}
impl CharSet {
const fn none() -> Self {
Self {
bits: [0; CHARSET_WORDS],
}
}
fn singleton(ch: char) -> Self {
let mut set = Self::none();
if let Some(index) = unicode::variation_sequence_index(ch) {
set.set_index(index);
}
set
}
fn contains(&self, ch: char) -> bool {
let Some(index) = unicode::variation_sequence_index(ch) else {
return false;
};
let word = index / WORD_BITS;
let bit = index % WORD_BITS;
(self.bits[word] & (1u64 << bit)) != 0
}
fn set_index(&mut self, index: usize) {
let word = index / WORD_BITS;
let bit = index % WORD_BITS;
self.bits[word] |= 1u64 << bit;
}
}
const fn all_bits() -> [u64; CHARSET_WORDS] {
let mut bits = [u64::MAX; CHARSET_WORDS];
let used_bits = unicode::VARIATION_ENTRY_COUNT % WORD_BITS;
if used_bits != 0 {
bits[CHARSET_WORDS - 1] = (1u64 << used_bits) - 1;
}
bits
}
const fn named_bits(id: NamedSet) -> [u64; CHARSET_WORDS] {
let mut bits = [0; CHARSET_WORDS];
let mut index = 0;
while index < unicode::VARIATION_ENTRY_COUNT {
let ch = unicode::variation_entry(index);
if named_entry_matches(id, ch) {
let word = index / WORD_BITS;
let bit = index % WORD_BITS;
bits[word] |= 1u64 << bit;
}
index += 1;
}
bits
}
const fn named_entry_matches(id: NamedSet, ch: char) -> bool {
match id {
NamedSet::Ascii => ch.is_ascii(),
NamedSet::TextDefaults => unicode::is_text_default(ch),
NamedSet::EmojiDefaults => unicode::is_emoji_default(ch),
NamedSet::RightsMarks => matches!(ch, '\u{00A9}' | '\u{00AE}' | '\u{2122}'),
NamedSet::Arrows => matches!(
ch,
'\u{2194}'
| '\u{2195}'
| '\u{2196}'
| '\u{2197}'
| '\u{2198}'
| '\u{2199}'
| '\u{21A9}'
| '\u{21AA}'
| '\u{27A1}'
| '\u{2934}'
| '\u{2935}'
| '\u{2B05}'
| '\u{2B06}'
| '\u{2B07}'
),
NamedSet::CardSuits => {
matches!(ch, '\u{2660}' | '\u{2663}' | '\u{2665}' | '\u{2666}')
}
NamedSet::KeycapEmojis => ch == '#' || ch == '*' || ch.is_ascii_digit(),
}
}
impl Default for VariationSet {
fn default() -> Self {
Self::none()
}
}
impl Not for VariationSet {
type Output = Self;
fn not(self) -> Self::Output {
Self::all() - self
}
}
impl BitOr for VariationSet {
type Output = Self;
fn bitor(self, rhs: Self) -> Self::Output {
Self {
chars: self.chars | rhs.chars,
keycap_chars: self.keycap_chars | rhs.keycap_chars,
}
}
}
impl BitOrAssign for VariationSet {
fn bitor_assign(&mut self, rhs: Self) {
*self = *self | rhs;
}
}
impl BitAnd for VariationSet {
type Output = Self;
fn bitand(self, rhs: Self) -> Self::Output {
Self {
chars: self.chars & rhs.chars,
keycap_chars: self.keycap_chars & rhs.keycap_chars,
}
}
}
impl BitAndAssign for VariationSet {
fn bitand_assign(&mut self, rhs: Self) {
*self = *self & rhs;
}
}
impl BitXor for VariationSet {
type Output = Self;
fn bitxor(self, rhs: Self) -> Self::Output {
Self {
chars: self.chars ^ rhs.chars,
keycap_chars: self.keycap_chars ^ rhs.keycap_chars,
}
}
}
impl BitXorAssign for VariationSet {
fn bitxor_assign(&mut self, rhs: Self) {
*self = *self ^ rhs;
}
}
impl Sub for VariationSet {
type Output = Self;
fn sub(self, rhs: Self) -> Self::Output {
Self {
chars: self.chars - rhs.chars,
keycap_chars: self.keycap_chars - rhs.keycap_chars,
}
}
}
impl SubAssign for VariationSet {
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}
impl fmt::Display for VariationSet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if *self == Self::none() {
return write!(f, "none");
}
if *self == Self::all() {
return write!(f, "all");
}
let mut first = true;
for index in 0..unicode::VARIATION_ENTRY_COUNT {
if self.contains(unicode::variation_entry(index)) {
if !first {
write!(f, ",")?;
}
write!(f, "u({:04X})", unicode::variation_entry(index) as u32)?;
first = false;
}
}
for index in 0..unicode::VARIATION_ENTRY_COUNT {
if self.contains_keycap(unicode::variation_entry(index)) {
if !first {
write!(f, ",")?;
}
write!(f, "k({:04X})", unicode::variation_entry(index) as u32)?;
first = false;
}
}
Ok(())
}
}
impl BitOr for CharSet {
type Output = Self;
fn bitor(mut self, rhs: Self) -> Self::Output {
for index in 0..CHARSET_WORDS {
self.bits[index] |= rhs.bits[index];
}
self
}
}
impl BitAnd for CharSet {
type Output = Self;
fn bitand(mut self, rhs: Self) -> Self::Output {
for index in 0..CHARSET_WORDS {
self.bits[index] &= rhs.bits[index];
}
self
}
}
impl BitXor for CharSet {
type Output = Self;
fn bitxor(mut self, rhs: Self) -> Self::Output {
for index in 0..CHARSET_WORDS {
self.bits[index] ^= rhs.bits[index];
}
self
}
}
impl Sub for CharSet {
type Output = Self;
fn sub(mut self, rhs: Self) -> Self::Output {
for index in 0..CHARSET_WORDS {
self.bits[index] &= !rhs.bits[index];
}
self
}
}
#[cfg(test)]
mod tests;