use crate::provider::exception_helpers::{
ExceptionBits, ExceptionBitsULE, ExceptionSlot, SlotPresence,
};
use crate::provider::exceptions::{CaseMapExceptions, DecodedException};
use alloc::borrow::Cow;
use alloc::collections::BTreeMap;
use alloc::string::String;
use alloc::vec::Vec;
use icu_provider::DataError;
use zerovec::ule::{AsULE, ULE};
#[derive(Copy, Clone, PartialEq, Eq)]
pub struct ExceptionHeader {
pub slot_presence: SlotPresence,
pub bits: ExceptionBits,
}
impl ExceptionHeader {
pub(crate) fn from_integer(int: u16) -> Self {
let slot_presence =
SlotPresence(u8::try_from(int & ExceptionHeaderULE::SLOTS_MASK).unwrap_or(0));
let bits = ExceptionBits::from_integer(
u8::try_from(int >> ExceptionHeaderULE::BITS_SHIFT).unwrap_or(0),
);
Self {
slot_presence,
bits,
}
}
pub(crate) fn has_slot(self, slot: ExceptionSlot) -> bool {
self.slot_presence.has_slot(slot)
}
}
#[derive(Copy, Clone, PartialEq, Eq, ULE)]
#[repr(C, packed)]
pub struct ExceptionHeaderULE {
slot_presence: SlotPresence,
bits: ExceptionBitsULE,
}
impl ExceptionHeaderULE {
const SLOTS_MASK: u16 = 0xff;
const BITS_SHIFT: u16 = 8;
}
impl AsULE for ExceptionHeader {
type ULE = ExceptionHeaderULE;
fn from_unaligned(u: ExceptionHeaderULE) -> Self {
Self {
slot_presence: u.slot_presence,
bits: ExceptionBits::from_integer(u.bits.0),
}
}
fn to_unaligned(self) -> ExceptionHeaderULE {
ExceptionHeaderULE {
slot_presence: self.slot_presence,
bits: ExceptionBitsULE(self.bits.to_integer()),
}
}
}
pub struct CaseMapExceptionsBuilder<'a> {
raw_data: &'a [u16],
raw_data_idx: usize,
double_slots: bool,
}
impl<'a> CaseMapExceptionsBuilder<'a> {
const MAPPINGS_ALL_LENGTHS_MASK: u32 = 0xffff;
const FULL_MAPPINGS_LENGTH_MASK: u32 = 0xf;
const FULL_MAPPINGS_LENGTH_SHIFT: u32 = 4;
const CLOSURE_MAX_LENGTH: u32 = 0xf;
pub fn new(raw_data: &'a [u16]) -> Self {
Self {
raw_data,
raw_data_idx: 0,
double_slots: false,
}
}
fn done(&self) -> bool {
self.raw_data_idx >= self.raw_data.len()
}
fn read_raw(&mut self) -> Result<u16, DataError> {
let result = self
.raw_data
.get(self.raw_data_idx)
.ok_or(DataError::custom("Incomplete exception data"))?;
self.raw_data_idx += 1;
Ok(*result)
}
fn read_slot(&mut self) -> Result<u32, DataError> {
if self.double_slots {
let hi = self.read_raw()? as u32;
let lo = self.read_raw()? as u32;
Ok((hi << 16) | lo)
} else {
Ok(self.read_raw()? as u32)
}
}
fn skip_string(&mut self, s: &str) {
for c in s.chars() {
self.raw_data_idx += c.len_utf16();
}
}
pub(crate) fn build(
mut self,
) -> Result<(CaseMapExceptions<'static>, BTreeMap<u16, u16>), DataError> {
let mut exceptions = Vec::new();
let mut idx_map = BTreeMap::new();
while !self.done() {
let old_idx = self.raw_data_idx as u16;
let mut exception = DecodedException::default();
let header = ExceptionHeader::from_integer(self.read_raw()?);
self.double_slots = header.bits.double_width_slots;
for (slot, output) in [
(ExceptionSlot::Lower, &mut exception.lowercase),
(ExceptionSlot::Fold, &mut exception.casefold),
(ExceptionSlot::Upper, &mut exception.uppercase),
(ExceptionSlot::Title, &mut exception.titlecase),
] {
if header.has_slot(slot) {
let value = self.read_slot()?;
if let Ok(ch) = char::try_from(value) {
*output = Some(ch)
} else {
return Err(DataError::custom(
"Found non-char value in casemapping exceptions data",
));
}
}
}
if header.has_slot(ExceptionSlot::Delta) {
let delta = self.read_slot()?;
exception.simple_case_delta = Some(delta)
}
let closure_length = if header.has_slot(ExceptionSlot::Closure) {
Some((self.read_slot()? & Self::CLOSURE_MAX_LENGTH) as usize)
} else {
None
};
let mappings_lengths = if header.has_slot(ExceptionSlot::FullMappings) {
Some(self.read_slot()? & Self::MAPPINGS_ALL_LENGTHS_MASK)
} else {
None
};
if let Some(mut lengths) = mappings_lengths {
let mut arr: [Cow<_>; 4] = Default::default();
for mapping in &mut arr {
let len = lengths & Self::FULL_MAPPINGS_LENGTH_MASK;
lengths >>= Self::FULL_MAPPINGS_LENGTH_SHIFT;
let start = self.raw_data_idx;
let end = start + len as usize;
let slice = &self
.raw_data
.get(start..end)
.ok_or(DataError::custom("Incomplete string data"))?;
let string = char::decode_utf16(slice.iter().copied())
.collect::<Result<String, _>>()
.map_err(|_| DataError::custom("Found non-utf16 exceptions data"))?;
self.skip_string(&string);
*mapping = string.into()
}
exception.full = Some(arr)
}
if let Some(len) = closure_length {
let start = self.raw_data_idx;
let slice = &self
.raw_data
.get(start..)
.ok_or(DataError::custom("Incomplete string data"))?;
let string = char::decode_utf16(slice.iter().copied())
.take(len)
.collect::<Result<String, _>>()
.map_err(|_| DataError::custom("Found non-utf16 exceptions data"))?;
self.skip_string(&string);
exception.closure = Some(string.into())
}
exception.bits = header.bits;
exception.bits.double_width_slots = false;
let new_exception_index = if let Ok(idx) = u16::try_from(exceptions.len()) {
idx
} else {
return Err(DataError::custom("More than u16 exceptions"));
};
idx_map.insert(old_idx, new_exception_index);
exceptions.push(exception.encode());
}
Ok((
CaseMapExceptions {
exceptions: (&exceptions).into(),
},
idx_map,
))
}
}