use num_enum::TryFromPrimitive;
use std::fmt::{Display, Formatter, Write};
use utf8proc_sys::utf8proc_category_t;
#[derive(Clone)]
pub struct CharProperties {
info: &'static utf8proc_sys::utf8proc_property_t,
}
impl CharProperties {
#[inline]
pub fn as_ffi_property(&self) -> &'static utf8proc_sys::utf8proc_property_t {
self.info
}
#[inline]
pub fn for_char(c: char) -> CharProperties {
let ptr = unsafe { utf8proc_sys::utf8proc_get_property(c as i32) };
let info = unsafe { &*ptr };
CharProperties { info }
}
#[inline]
pub fn general_category(&self) -> GeneralCategory {
GeneralCategory::try_from_primitive(self.info.category.cast_unsigned() as u32).unwrap()
}
#[inline]
pub fn major_category(&self) -> MajorCategory {
self.general_category().major_category()
}
#[inline]
pub fn char_width(&self) -> Option<usize> {
match self.info.charwidth() {
0 => None,
other => Some(other as usize),
}
}
}
macro_rules! define_property_enum {
(
#[category(ffi = $ffi:ident, prefix = $prefix:ident)]
#[repr($repr_type:ty)]
$(#[$outer_attr:meta])*
pub enum $name:ident {
$(
$(#[$variant_attr:meta])*
$variant:ident,
)*
}
) => {
paste::paste! {
#[derive(TryFromPrimitive)]
#[repr($repr_type)]
$(#[$outer_attr])*
pub enum $name {
$(
$(#[$variant_attr])*
$variant = $ffi :: [< $prefix _ $variant>].0,
)*
}
impl $name {
pub const ALL: &'static [Self] = &[
$(Self::$variant,)*
];
#[inline]
fn declared_name(&self) -> &'static str {
match *self {
$(Self::$variant => stringify!($variant),)*
}
}
}
}
};
}
define_property_enum! {
#[category(ffi = utf8proc_category_t, prefix = UTF8PROC_CATEGORY)]
#[repr(u32)]
#[derive(Copy, Clone)]
pub enum GeneralCategory {
CN,
LU,
LL,
LT,
LM,
LO,
MN,
MC,
ME,
ND,
NL,
NO,
PC,
PD,
PS,
PE,
PI,
PF,
PO,
SM,
SC,
SK,
SO,
ZS,
ZL,
ZP,
CC,
CF,
CS,
CO,
}
}
impl GeneralCategory {
#[inline]
pub fn name(&self) -> &'static str {
self.declared_name()
}
#[inline]
pub fn major_category(&self) -> MajorCategory {
match self {
GeneralCategory::LU
| GeneralCategory::LL
| GeneralCategory::LT
| GeneralCategory::LM
| GeneralCategory::LO => MajorCategory::Letter,
GeneralCategory::MN | GeneralCategory::MC | GeneralCategory::ME => MajorCategory::Mark,
GeneralCategory::ND | GeneralCategory::NL | GeneralCategory::NO => MajorCategory::Number,
GeneralCategory::PC
| GeneralCategory::PD
| GeneralCategory::PS
| GeneralCategory::PE
| GeneralCategory::PI
| GeneralCategory::PF
| GeneralCategory::PO => MajorCategory::Punctuation,
GeneralCategory::SM | GeneralCategory::SC | GeneralCategory::SK | GeneralCategory::SO => {
MajorCategory::Symbol
}
GeneralCategory::ZS | GeneralCategory::ZL | GeneralCategory::ZP => MajorCategory::Separator,
GeneralCategory::CN
| GeneralCategory::CC
| GeneralCategory::CF
| GeneralCategory::CS
| GeneralCategory::CO => MajorCategory::Other,
}
}
}
impl Display for GeneralCategory {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(self.declared_name())
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum MajorCategory {
Letter,
Mark,
Number,
Punctuation,
Symbol,
Separator,
Other,
}
impl MajorCategory {
#[inline]
pub fn letter(&self) -> char {
match self {
MajorCategory::Letter => 'L',
MajorCategory::Mark => 'M',
MajorCategory::Number => 'N',
MajorCategory::Punctuation => 'P',
MajorCategory::Symbol => 'S',
MajorCategory::Separator => 'Z',
MajorCategory::Other => 'C',
}
}
}
impl From<GeneralCategory> for MajorCategory {
#[inline]
fn from(value: GeneralCategory) -> Self {
value.major_category()
}
}
impl Display for MajorCategory {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_char(self.letter())
}
}
#[cfg(test)]
mod test {
use crate::properties::GeneralCategory;
#[test]
fn major_categories_match() {
for &category in GeneralCategory::ALL {
let major_category = category.major_category();
assert!(category.name().starts_with(major_category.letter()), "{category}");
}
}
}