use icu_provider::prelude::*;
use super::data::MappingKind;
use super::exception_helpers::{ExceptionBits, ExceptionSlot, SlotPresence};
use crate::set::ClosureSink;
use alloc::borrow::Cow;
use alloc::string::String;
use core::fmt;
#[cfg(any(feature = "serde", feature = "datagen"))]
use core::ops::Range;
use core::ptr;
use zerovec::ule::AsULE;
use zerovec::VarZeroVec;
const SURROGATES_START: u32 = 0xD800;
const SURROGATES_LEN: u32 = 0xDFFF - SURROGATES_START + 1;
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider::exceptions))]
#[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
pub struct CaseMapExceptions<'data> {
#[cfg_attr(feature = "serde", serde(borrow))]
pub exceptions: VarZeroVec<'data, ExceptionULE>,
}
impl CaseMapExceptions<'_> {
pub fn get(&self, idx: u16) -> &ExceptionULE {
let exception = self.exceptions.get(idx.into());
debug_assert!(exception.is_some());
exception.unwrap_or(ExceptionULE::EMPTY_EXCEPTION)
}
#[cfg(any(feature = "serde", feature = "datagen"))]
pub(crate) fn validate(&self) -> Result<Range<u16>, &'static str> {
for exception in self.exceptions.iter() {
exception.validate()?;
}
u16::try_from(self.exceptions.len())
.map_err(|_| "Too many exceptions")
.map(|l| 0..l)
}
}
#[zerovec::make_varule(ExceptionULE)]
#[derive(PartialEq, Eq, Clone, Default, Debug)]
#[zerovec::skip_derive(Ord)]
#[cfg_attr(
feature = "serde",
derive(serde::Deserialize),
zerovec::derive(Deserialize)
)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize),
zerovec::derive(Serialize)
)]
pub struct Exception<'a> {
pub bits: ExceptionBits,
pub slot_presence: SlotPresence,
pub data: Cow<'a, str>,
}
impl ExceptionULE {
const EMPTY_EXCEPTION: &Self = {
static EMPTY_BYTES: &[u8] = &[0, 0];
unsafe {
let slice: *const [u8] = ptr::slice_from_raw_parts(EMPTY_BYTES.as_ptr(), 0);
&*(slice as *const Self)
}
};
pub(crate) fn has_slot(&self, slot: ExceptionSlot) -> bool {
self.slot_presence.has_slot(slot)
}
pub(crate) fn get_char_slot(&self, slot: ExceptionSlot) -> Option<char> {
if slot >= ExceptionSlot::STRING_SLOTS_START {
return None;
}
let bit = 1 << (slot as u8);
if self.slot_presence.0 & bit == 0 {
return None;
}
let previous_slot_mask = bit - 1;
let previous_slots = self.slot_presence.0 & previous_slot_mask;
let slot_num = previous_slots.count_ones() as usize;
self.data.chars().nth(slot_num)
}
fn get_simple_case_delta(&self) -> Option<u32> {
let delta_ch = self.get_char_slot(ExceptionSlot::Delta)?;
let mut delta = u32::from(delta_ch);
if delta >= SURROGATES_START {
delta -= SURROGATES_LEN;
}
Some(delta)
}
pub(crate) fn get_simple_case_slot_for(&self, ch: char) -> Option<char> {
let delta = self.get_simple_case_delta()?;
let mut delta = i32::try_from(delta).ok()?;
if self.bits.negative_delta() {
delta = -delta;
}
let new_ch = i32::try_from(u32::from(ch)).ok()? + delta;
char::try_from(u32::try_from(new_ch).ok()?).ok()
}
fn get_stringy_data(&self) -> Option<&str> {
const CHAR_MASK: u8 = (1 << ExceptionSlot::STRING_SLOTS_START as u8) - 1;
let char_slot_count = (self.slot_presence.0 & CHAR_MASK).count_ones() as usize;
let mut chars = self.data.chars();
for _ in 0..char_slot_count {
let res = chars.next();
res?;
}
Some(chars.as_str())
}
fn get_stringy_slot(&self, slot: ExceptionSlot) -> Option<&str> {
debug_assert!(slot == ExceptionSlot::Closure || slot == ExceptionSlot::FullMappings);
let other_slot = if slot == ExceptionSlot::Closure {
ExceptionSlot::FullMappings
} else {
ExceptionSlot::Closure
};
if !self.slot_presence.has_slot(slot) {
return None;
}
let stringy_data = self.get_stringy_data()?;
if self.slot_presence.has_slot(other_slot) {
let mut chars = stringy_data.chars();
let length_char = chars.next()?;
let length = usize::try_from(u32::from(length_char)).unwrap_or(0);
let remaining_slice = chars.as_str();
if slot == ExceptionSlot::Closure {
remaining_slice.get(0..length)
} else {
remaining_slice.get(length..)
}
} else {
Some(stringy_data)
}
}
pub(crate) fn get_closure_slot(&self) -> Option<&str> {
self.get_stringy_slot(ExceptionSlot::Closure)
}
fn get_fullmappings_slot_data(&self) -> Option<&str> {
self.get_stringy_slot(ExceptionSlot::FullMappings)
}
pub(crate) fn get_fullmappings_slot_for_kind(&self, kind: MappingKind) -> Option<&str> {
let data = self.get_fullmappings_slot_data()?;
let mut chars = data.chars();
let i1 = usize::try_from(u32::from(chars.next()?)).ok()?;
let i2 = usize::try_from(u32::from(chars.next()?)).ok()?;
let i3 = usize::try_from(u32::from(chars.next()?)).ok()?;
let remaining_slice = chars.as_str();
match kind {
MappingKind::Lower => remaining_slice.get(..i1),
MappingKind::Fold => remaining_slice.get(i1..i2),
MappingKind::Upper => remaining_slice.get(i2..i3),
MappingKind::Title => remaining_slice.get(i3..),
}
}
fn get_all_fullmapping_slots(&self) -> Option<[Cow<'_, str>; 4]> {
Some([
self.get_fullmappings_slot_for_kind(MappingKind::Lower)?
.into(),
self.get_fullmappings_slot_for_kind(MappingKind::Fold)?
.into(),
self.get_fullmappings_slot_for_kind(MappingKind::Upper)?
.into(),
self.get_fullmappings_slot_for_kind(MappingKind::Title)?
.into(),
])
}
#[inline]
pub(crate) fn slot_char_for_kind(&self, kind: MappingKind) -> Option<char> {
match kind {
MappingKind::Lower | MappingKind::Upper => self.get_char_slot(kind.into()),
MappingKind::Fold => self
.get_char_slot(ExceptionSlot::Fold)
.or_else(|| self.get_char_slot(ExceptionSlot::Lower)),
MappingKind::Title => self
.get_char_slot(ExceptionSlot::Title)
.or_else(|| self.get_char_slot(ExceptionSlot::Upper)),
}
}
pub(crate) fn add_full_and_closure_mappings<S: ClosureSink>(&self, set: &mut S) {
if let Some(full) = self.get_fullmappings_slot_for_kind(MappingKind::Fold) {
if !full.is_empty() {
set.add_string(full);
}
};
if let Some(closure) = self.get_closure_slot() {
for c in closure.chars() {
set.add_char(c);
}
};
}
pub fn decode(&self) -> DecodedException<'_> {
let bits = self.bits;
let lowercase = self.get_char_slot(ExceptionSlot::Lower);
let casefold = self.get_char_slot(ExceptionSlot::Fold);
let uppercase = self.get_char_slot(ExceptionSlot::Upper);
let titlecase = self.get_char_slot(ExceptionSlot::Title);
let simple_case_delta = self.get_simple_case_delta();
let closure = self.get_closure_slot().map(Into::into);
let full = self.get_all_fullmapping_slots();
DecodedException {
bits: ExceptionBits::from_unaligned(bits),
lowercase,
casefold,
uppercase,
titlecase,
simple_case_delta,
closure,
full,
}
}
#[cfg(any(feature = "serde", feature = "datagen"))]
pub(crate) fn validate(&self) -> Result<(), &'static str> {
if self.bits.double_width_slots() {
return Err("double-width-slots should not be used in ICU4C");
}
let decoded = self.decode();
for (slot, decoded_slot) in [
(ExceptionSlot::Lower, &decoded.lowercase),
(ExceptionSlot::Fold, &decoded.casefold),
(ExceptionSlot::Upper, &decoded.uppercase),
(ExceptionSlot::Title, &decoded.titlecase),
] {
if self.has_slot(slot) && decoded_slot.is_none() {
return Err("Slot decoding failed");
}
}
if self.has_slot(ExceptionSlot::Delta) && decoded.simple_case_delta.is_none() {
return Err("Slot decoding failed");
}
if self.has_slot(ExceptionSlot::Closure) && decoded.closure.is_none() {
return Err("Slot decoding failed");
}
if self.has_slot(ExceptionSlot::FullMappings) {
if decoded.full.is_some() {
let data = self
.get_fullmappings_slot_data()
.ok_or("fullmappings slot doesn't parse")?;
let mut chars = data.chars();
let i1 = u32::from(chars.next().ok_or("fullmappings string too small")?);
let i2 = u32::from(chars.next().ok_or("fullmappings string too small")?);
let i3 = u32::from(chars.next().ok_or("fullmappings string too small")?);
if i2 < i1 || i3 < i2 {
return Err("fullmappings string contains non-sequential indices");
}
let rest = chars.as_str();
let len = u32::try_from(rest.len()).map_err(|_| "len too large for u32")?;
if i1 > len || i2 > len || i3 > len {
return Err("fullmappings string contains out-of-bounds indices");
}
} else {
return Err("Slot decoding failed");
}
}
Ok(())
}
}
impl fmt::Debug for ExceptionULE {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.decode().fmt(f)
}
}
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize))]
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct DecodedException<'a> {
pub bits: ExceptionBits,
pub lowercase: Option<char>,
pub casefold: Option<char>,
pub uppercase: Option<char>,
pub titlecase: Option<char>,
pub simple_case_delta: Option<u32>,
pub closure: Option<Cow<'a, str>>,
pub full: Option<[Cow<'a, str>; 4]>,
}
impl DecodedException<'_> {
pub fn encode(&self) -> Exception<'static> {
let bits = self.bits;
let mut slot_presence = SlotPresence(0);
let mut data = String::new();
if let Some(lowercase) = self.lowercase {
slot_presence.add_slot(ExceptionSlot::Lower);
data.push(lowercase)
}
if let Some(casefold) = self.casefold {
slot_presence.add_slot(ExceptionSlot::Fold);
data.push(casefold)
}
if let Some(uppercase) = self.uppercase {
slot_presence.add_slot(ExceptionSlot::Upper);
data.push(uppercase)
}
if let Some(titlecase) = self.titlecase {
slot_presence.add_slot(ExceptionSlot::Title);
data.push(titlecase)
}
if let Some(mut simple_case_delta) = self.simple_case_delta {
slot_presence.add_slot(ExceptionSlot::Delta);
if simple_case_delta >= SURROGATES_START {
simple_case_delta += SURROGATES_LEN;
}
let simple_case_delta = char::try_from(simple_case_delta).unwrap_or('\0');
data.push(simple_case_delta)
}
if let Some(ref closure) = self.closure {
slot_presence.add_slot(ExceptionSlot::Closure);
if self.full.is_some() {
debug_assert!(
closure.len() < 0xD800,
"Found overlarge closure value when encoding exception"
);
let len_char = u32::try_from(closure.len())
.ok()
.and_then(|c| char::try_from(c).ok())
.unwrap_or('\0');
data.push(len_char);
}
data.push_str(closure);
}
if let Some(ref full) = self.full {
slot_presence.add_slot(ExceptionSlot::FullMappings);
let mut idx = 0;
for mapping in full.iter().take(3) {
idx += mapping.len();
data.push(char::try_from(u32::try_from(idx).unwrap_or(0)).unwrap_or('\0'));
}
for mapping in full {
data.push_str(mapping);
}
}
Exception {
bits,
slot_presence,
data: data.into(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn test_roundtrip_once(exception: DecodedException) {
let encoded = exception.encode();
let encoded = zerovec::ule::encode_varule_to_box(&encoded);
let decoded = encoded.decode();
assert_eq!(decoded, exception);
}
#[test]
fn test_roundtrip() {
test_roundtrip_once(DecodedException {
lowercase: Some('ø'),
..Default::default()
});
test_roundtrip_once(DecodedException {
titlecase: Some('X'),
lowercase: Some('ø'),
..Default::default()
});
test_roundtrip_once(DecodedException {
titlecase: Some('X'),
..Default::default()
});
test_roundtrip_once(DecodedException {
titlecase: Some('X'),
simple_case_delta: Some(0xE999),
closure: Some("hello world".into()),
..Default::default()
});
test_roundtrip_once(DecodedException {
simple_case_delta: Some(10),
closure: Some("hello world".into()),
full: Some(["ä½ å¥½ä¸–ç•Œ".into(), "".into(), "hi".into(), "Ã¥".into()]),
..Default::default()
});
test_roundtrip_once(DecodedException {
closure: Some("hello world".into()),
full: Some(["aa".into(), "È›".into(), "".into(), "Ã¥".into()]),
..Default::default()
});
test_roundtrip_once(DecodedException {
full: Some(["ä½ å¥½ä¸–ç•Œ".into(), "".into(), "hi".into(), "Ã¥".into()]),
..Default::default()
});
}
}