#[cfg(feature = "datagen")]
use alloc::collections::BTreeMap;
use core::num::TryFromIntError;
use icu_collections::codepointtrie::TrieValue;
use zerovec::ule::{AsULE, RawBytesULE, UleError, ULE};
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider::data))]
pub enum CaseType {
Lower = 1,
Upper = 2,
Title = 3,
}
impl CaseType {
pub(crate) const CASE_MASK: u16 = 0x3;
#[inline]
pub(crate) fn from_masked_bits(b: u16) -> Option<Self> {
debug_assert!(b & Self::CASE_MASK == b);
match b {
0 => None,
1 => Some(CaseType::Lower),
2 => Some(CaseType::Upper),
_ => Some(CaseType::Title),
}
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider::data))]
#[derive(Default)]
pub enum DotType {
#[default]
NoDot = 0,
SoftDotted = 1,
Above = 2,
OtherAccent = 3,
}
impl DotType {
pub(crate) const DOT_MASK: u16 = 0x3;
#[inline]
pub(crate) fn from_masked_bits(b: u16) -> Self {
debug_assert!(b & Self::DOT_MASK == b);
match b {
0 => DotType::NoDot,
1 => DotType::SoftDotted,
2 => DotType::Above,
_ => DotType::OtherAccent,
}
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) enum MappingKind {
Lower = 0,
Fold = 1,
Upper = 2,
Title = 3,
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider::data))]
pub struct CaseMapData {
pub ignoreable: bool,
pub kind: CaseMapDataKind,
}
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider::data))]
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum CaseMapDataKind {
Exception(Option<CaseType>, u16),
Uncased(NonExceptionData),
Delta(NonExceptionData, CaseType, i16),
}
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider::data))]
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct NonExceptionData {
pub sensitive: bool,
pub dot_type: DotType,
}
impl CaseMapData {
#[inline]
pub(crate) fn case_type(self) -> Option<CaseType> {
match self.kind {
CaseMapDataKind::Exception(case_type, ..) => case_type,
CaseMapDataKind::Delta(_, case_type, _) => Some(case_type),
CaseMapDataKind::Uncased(..) => None,
}
}
#[inline]
pub(crate) fn is_upper_or_title(self) -> bool {
match self.case_type() {
None | Some(CaseType::Lower) => false,
Some(CaseType::Upper) | Some(CaseType::Title) => true,
}
}
#[inline]
pub(crate) fn is_relevant_to(self, kind: MappingKind) -> bool {
match kind {
MappingKind::Lower | MappingKind::Fold => self.is_upper_or_title(),
MappingKind::Upper | MappingKind::Title => self.case_type() == Some(CaseType::Lower),
}
}
#[inline]
pub(crate) fn is_ignorable(self) -> bool {
self.ignoreable
}
#[inline]
pub(crate) fn has_exception(self) -> bool {
matches!(self.kind, CaseMapDataKind::Exception(..))
}
#[inline]
pub(crate) fn is_sensitive(self) -> bool {
match self.kind {
CaseMapDataKind::Exception(..) => false,
CaseMapDataKind::Delta(ned, ..) => ned.sensitive,
CaseMapDataKind::Uncased(ned) => ned.sensitive,
}
}
#[inline]
pub(crate) fn dot_type(self) -> DotType {
match self.kind {
CaseMapDataKind::Exception(..) => DotType::NoDot,
CaseMapDataKind::Delta(ned, ..) => ned.dot_type,
CaseMapDataKind::Uncased(ned) => ned.dot_type,
}
}
#[inline]
pub(crate) fn delta(self) -> i16 {
debug_assert!(!self.has_exception());
match self.kind {
CaseMapDataKind::Exception(..) => 0,
CaseMapDataKind::Delta(.., delta) => delta,
CaseMapDataKind::Uncased(..) => 0,
}
}
#[inline]
pub(crate) fn exception_index(self) -> u16 {
debug_assert!(self.has_exception());
if let CaseMapDataKind::Exception(_, i) = self.kind {
i
} else {
0
}
}
#[cfg(feature = "datagen")]
pub(crate) fn with_updated_exception(self, updates: &BTreeMap<u16, u16>) -> Self {
let kind = if let CaseMapDataKind::Exception(ty, index) = self.kind {
if let Some(updated_exception) = updates.get(&index) {
CaseMapDataKind::Exception(ty, *updated_exception)
} else {
self.kind
}
} else {
self.kind
};
Self { kind, ..self }
}
#[cfg(any(feature = "datagen", test))]
pub(crate) fn try_from_icu_integer(int: u16) -> Result<Self, UleError> {
let raw = int.to_unaligned();
CaseMapDataULE::validate_bytes(raw.as_bytes())?;
let this = Self::from_unaligned(CaseMapDataULE(raw));
Ok(this)
}
}
impl TrieValue for CaseMapData {
type TryFromU32Error = TryFromIntError;
fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
u16::try_from(i).map(|u| AsULE::from_unaligned(CaseMapDataULE(u.to_unaligned())))
}
fn to_u32(self) -> u32 {
u32::from(self.to_unaligned().0.as_unsigned_int())
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
#[repr(transparent)]
pub struct CaseMapDataULE(RawBytesULE<2>);
impl CaseMapDataULE {
const CASE_TYPE_BITS: u16 = 0x3;
const CASE_IGNOREABLE_BIT: u16 = 0x4;
const EXCEPTION_BIT: u16 = 0x8;
const CASE_SENSITIVE_BIT: u16 = 0x10;
const EXCEPTION_SHIFT: u16 = 4;
const DELTA_SHIFT: u16 = 7;
const DOT_TYPE_BITS: u16 = 0x60;
const DOT_SHIFT: u16 = 5;
}
unsafe impl ULE for CaseMapDataULE {
fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> {
let sixteens = RawBytesULE::<2>::parse_bytes_to_slice(bytes)?;
for sixteen in sixteens {
let sixteen = sixteen.as_unsigned_int();
if sixteen & Self::EXCEPTION_BIT == 0 {
if sixteen & Self::CASE_TYPE_BITS == 0 {
if sixteen >> Self::DELTA_SHIFT != 0 {
return Err(UleError::parse::<Self>());
}
}
}
}
Ok(())
}
}
impl AsULE for CaseMapData {
type ULE = CaseMapDataULE;
fn from_unaligned(ule: Self::ULE) -> Self {
let sixteen = ule.0.as_unsigned_int();
let ignoreable = (sixteen & CaseMapDataULE::CASE_IGNOREABLE_BIT) != 0;
let exception = (sixteen & CaseMapDataULE::EXCEPTION_BIT) != 0;
let case_type = sixteen & CaseMapDataULE::CASE_TYPE_BITS;
let case_type = CaseType::from_masked_bits(case_type);
let kind = if exception {
let exception = sixteen >> CaseMapDataULE::EXCEPTION_SHIFT;
CaseMapDataKind::Exception(case_type, exception)
} else {
let dot_type = (sixteen & CaseMapDataULE::DOT_TYPE_BITS) >> CaseMapDataULE::DOT_SHIFT;
let dot_type = DotType::from_masked_bits(dot_type);
let sensitive = (sixteen & CaseMapDataULE::CASE_SENSITIVE_BIT) != 0;
let ned = NonExceptionData {
dot_type,
sensitive,
};
if let Some(case_type) = case_type {
let delta = (sixteen as i16) >> CaseMapDataULE::DELTA_SHIFT;
CaseMapDataKind::Delta(ned, case_type, delta)
} else {
CaseMapDataKind::Uncased(ned)
}
};
CaseMapData { ignoreable, kind }
}
fn to_unaligned(self) -> Self::ULE {
let mut sixteen = 0;
if self.ignoreable {
sixteen |= CaseMapDataULE::CASE_IGNOREABLE_BIT;
}
match self.kind {
CaseMapDataKind::Exception(case_type, e) => {
sixteen |= CaseMapDataULE::EXCEPTION_BIT;
sixteen |= e << CaseMapDataULE::EXCEPTION_SHIFT;
sixteen |= case_type.map(|c| c as u16).unwrap_or(0);
}
CaseMapDataKind::Uncased(ned) => {
sixteen |= (ned.dot_type as u16) << CaseMapDataULE::DOT_SHIFT;
if ned.sensitive {
sixteen |= CaseMapDataULE::CASE_SENSITIVE_BIT;
}
}
CaseMapDataKind::Delta(ned, case_type, delta) => {
sixteen |= (delta << CaseMapDataULE::DELTA_SHIFT) as u16;
sixteen |= (ned.dot_type as u16) << CaseMapDataULE::DOT_SHIFT;
if ned.sensitive {
sixteen |= CaseMapDataULE::CASE_SENSITIVE_BIT;
}
sixteen |= case_type as u16;
}
}
CaseMapDataULE(sixteen.to_unaligned())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_roundtrip() {
const TESTCASES: &[CaseMapData] = &[
CaseMapData {
ignoreable: true,
kind: CaseMapDataKind::Exception(Some(CaseType::Title), 923),
},
CaseMapData {
ignoreable: false,
kind: CaseMapDataKind::Exception(None, 923),
},
CaseMapData {
ignoreable: true,
kind: CaseMapDataKind::Delta(
NonExceptionData {
sensitive: true,
dot_type: DotType::SoftDotted,
},
CaseType::Upper,
50,
),
},
CaseMapData {
ignoreable: false,
kind: CaseMapDataKind::Delta(
NonExceptionData {
sensitive: true,
dot_type: DotType::SoftDotted,
},
CaseType::Upper,
-50,
),
},
CaseMapData {
ignoreable: false,
kind: CaseMapDataKind::Uncased(NonExceptionData {
sensitive: false,
dot_type: DotType::SoftDotted,
}),
},
];
for case in TESTCASES {
let ule = case.to_unaligned();
let roundtrip = CaseMapData::from_unaligned(ule);
assert_eq!(*case, roundtrip);
let integer = ule.0.as_unsigned_int();
let roundtrip2 = CaseMapData::try_from_icu_integer(integer).unwrap();
assert_eq!(*case, roundtrip2);
}
}
#[test]
fn test_integer_roundtrip() {
fn test_single_integer(int: u16) {
let cmd = CaseMapData::try_from_icu_integer(int).unwrap();
assert_eq!(int, cmd.to_unaligned().0.as_unsigned_int())
}
test_single_integer(84);
test_single_integer(2503);
}
}