use alloc::borrow::Cow;
use alloc::string::String;
use alloc::vec::Vec;
use icu_provider::prelude::*;
use zerotrie::cursor::ZeroAsciiIgnoreCaseTrieCursor;
use crate::{
provider::names::{
Bcp47ToIanaMapV1, Bcp47ToIanaMapV1Marker, IanaToBcp47MapV3, IanaToBcp47MapV3Marker,
NON_REGION_CITY_PREFIX,
},
TimeZoneBcp47Id,
};
#[derive(Debug, Clone)]
pub struct TimeZoneIdMapper {
data: DataPayload<IanaToBcp47MapV3Marker>,
}
impl TimeZoneIdMapper {
#[cfg(feature = "compiled_data")]
#[allow(clippy::new_ret_no_self)]
pub fn new() -> TimeZoneIdMapperBorrowed<'static> {
TimeZoneIdMapperBorrowed::new()
}
icu_provider::gen_any_buffer_data_constructors!(() -> error: DataError,
functions: [
new: skip,
try_new_with_any_provider,
try_new_with_buffer_provider,
try_new_unstable,
Self,
]
);
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
pub fn try_new_unstable<P>(provider: &P) -> Result<Self, DataError>
where
P: DataProvider<IanaToBcp47MapV3Marker> + ?Sized,
{
let data = provider.load(Default::default())?.payload;
Ok(Self { data })
}
pub fn as_borrowed(&self) -> TimeZoneIdMapperBorrowed {
TimeZoneIdMapperBorrowed {
data: self.data.get(),
}
}
}
impl AsRef<TimeZoneIdMapper> for TimeZoneIdMapper {
#[inline]
fn as_ref(&self) -> &TimeZoneIdMapper {
self
}
}
#[derive(Debug, Copy, Clone)]
pub struct TimeZoneIdMapperBorrowed<'a> {
data: &'a IanaToBcp47MapV3<'a>,
}
#[cfg(feature = "compiled_data")]
impl Default for TimeZoneIdMapperBorrowed<'static> {
fn default() -> Self {
Self::new()
}
}
impl TimeZoneIdMapperBorrowed<'static> {
#[cfg(feature = "compiled_data")]
pub fn new() -> Self {
Self {
data: crate::provider::Baked::SINGLETON_IANA_TO_BCP47_MAP_V3_MARKER,
}
}
pub fn static_to_owned(&self) -> TimeZoneIdMapper {
TimeZoneIdMapper {
data: DataPayload::from_static_ref(self.data),
}
}
}
impl TimeZoneIdMapperBorrowed<'_> {
pub fn iana_to_bcp47(&self, iana_id: &str) -> TimeZoneBcp47Id {
self.iana_lookup_quick(iana_id)
.and_then(|trie_value| self.data.bcp47_ids.get(trie_value.index()))
.unwrap_or(TimeZoneBcp47Id::unknown())
}
pub fn iana_bytes_to_bcp47(&self, iana_id: &[u8]) -> TimeZoneBcp47Id {
self.iana_lookup_quick(iana_id)
.and_then(|trie_value| self.data.bcp47_ids.get(trie_value.index()))
.unwrap_or(TimeZoneBcp47Id::unknown())
}
pub fn normalize_iana<'s>(&self, iana_id: &'s str) -> Option<(Cow<'s, str>, TimeZoneBcp47Id)> {
let (trie_value, string) = self.iana_lookup_with_normalization(iana_id, |_| {})?;
let Some(bcp47_id) = self.data.bcp47_ids.get(trie_value.index()) else {
debug_assert!(false, "index should be in range");
return None;
};
Some((string, bcp47_id))
}
pub fn canonicalize_iana<'s>(
&self,
iana_id: &'s str,
) -> Option<(Cow<'s, str>, TimeZoneBcp47Id)> {
let mut stack = Vec::with_capacity(iana_id.len());
let (trie_value, mut string) = self.iana_lookup_with_normalization(iana_id, |cursor| {
stack.push((cursor.clone(), 0, 1));
})?;
let Some(bcp47_id) = self.data.bcp47_ids.get(trie_value.index()) else {
debug_assert!(false, "index should be in range");
return None;
};
if trie_value.is_canonical() {
return Some((string, bcp47_id));
}
let needle = trie_value.to_canonical();
if !string.contains('/') {
string.to_mut().insert(0, '_');
}
let Some(string) = self.iana_search(needle, string.into_owned(), stack) else {
debug_assert!(false, "every time zone should have a canonical IANA ID");
return None;
};
Some((Cow::Owned(string), bcp47_id))
}
pub fn find_canonical_iana_from_bcp47(&self, bcp47_id: TimeZoneBcp47Id) -> Option<String> {
let index = self.data.bcp47_ids.binary_search(&bcp47_id).ok()?;
let stack = alloc::vec![(self.data.map.cursor(), 0, 0)];
let needle = IanaTrieValue::canonical_for_index(index);
let string = self.iana_search(needle, String::new(), stack)?;
Some(string)
}
fn iana_lookup_quick(&self, iana_id: impl AsRef<[u8]>) -> Option<IanaTrieValue> {
let mut cursor = self.data.map.cursor();
let iana_id = iana_id.as_ref();
if !iana_id.contains(&b'/') {
cursor.step(NON_REGION_CITY_PREFIX);
}
for &b in iana_id {
cursor.step(b);
}
cursor.take_value().map(IanaTrieValue)
}
fn iana_lookup_with_normalization<'l, 's>(
&'l self,
iana_id: &'s str,
mut cursor_fn: impl FnMut(&ZeroAsciiIgnoreCaseTrieCursor<'l>),
) -> Option<(IanaTrieValue, Cow<'s, str>)> {
let mut cursor = self.data.map.cursor();
if !iana_id.contains('/') {
cursor_fn(&cursor);
cursor.step(NON_REGION_CITY_PREFIX);
}
let mut string = Cow::Borrowed(iana_id);
let mut i = 0;
let trie_value = loop {
cursor_fn(&cursor);
let Some(&input_byte) = string.as_bytes().get(i) else {
break cursor.take_value().map(IanaTrieValue);
};
let Some(matched_byte) = cursor.step(input_byte) else {
break None;
};
if matched_byte != input_byte {
let Some(input_byte) = unsafe { string.to_mut().as_bytes_mut() }.get_mut(i) else {
debug_assert!(false, "the same index was just accessed earlier");
break None;
};
if !input_byte.is_ascii() {
debug_assert!(false, "non-ASCII input byte: {input_byte}");
break None;
}
if !matched_byte.is_ascii() {
debug_assert!(false, "non-ASCII matched byte: {matched_byte}");
break None;
}
*input_byte = matched_byte;
}
i += 1;
}?;
Some((trie_value, string))
}
fn iana_search(
&self,
needle: IanaTrieValue,
mut string: String,
mut stack: Vec<(ZeroAsciiIgnoreCaseTrieCursor, usize, usize)>,
) -> Option<String> {
loop {
let Some((mut cursor, index, suffix_len)) = stack.pop() else {
return None;
};
if let Some(candidate) = cursor.take_value().map(IanaTrieValue) {
if candidate == needle {
return Some(string);
}
}
let mut sub_cursor = cursor.clone();
if let Some(probe_result) = sub_cursor.probe(index) {
if !probe_result.byte.is_ascii() {
debug_assert!(false, "non-ASCII probe byte: {}", probe_result.byte);
return None;
}
unsafe { string.as_mut_vec().push(probe_result.byte) };
if index + 1 < probe_result.total_siblings as usize {
stack.push((cursor, index + 1, suffix_len));
stack.push((sub_cursor, 0, 1));
} else {
stack.push((sub_cursor, 0, suffix_len + 1));
}
} else {
for _ in 0..suffix_len {
let removed_byte = unsafe { string.as_mut_vec().pop() };
if let Some(removed_byte) = removed_byte {
if !removed_byte.is_ascii() {
debug_assert!(false, "non-ASCII removed byte: {removed_byte}");
string.clear();
return None;
}
} else {
debug_assert!(false, "could not remove another byte");
return None;
}
}
}
}
}
}
#[derive(Debug, Clone)]
pub struct TimeZoneIdMapperWithFastCanonicalization<I> {
inner: I,
data: DataPayload<Bcp47ToIanaMapV1Marker>,
}
impl TimeZoneIdMapperWithFastCanonicalization<TimeZoneIdMapper> {
#[cfg(feature = "compiled_data")]
#[allow(clippy::new_ret_no_self)]
pub fn new() -> TimeZoneIdMapperWithFastCanonicalizationBorrowed<'static> {
TimeZoneIdMapperWithFastCanonicalizationBorrowed::new()
}
icu_provider::gen_any_buffer_data_constructors!(() -> error: DataError,
functions: [
new: skip,
try_new_with_any_provider,
try_new_with_buffer_provider,
try_new_unstable,
Self,
]
);
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
pub fn try_new_unstable<P>(provider: &P) -> Result<Self, DataError>
where
P: DataProvider<IanaToBcp47MapV3Marker> + DataProvider<Bcp47ToIanaMapV1Marker> + ?Sized,
{
let mapper = TimeZoneIdMapper::try_new_unstable(provider)?;
Self::try_new_with_mapper_unstable(provider, mapper)
}
}
impl<I> TimeZoneIdMapperWithFastCanonicalization<I>
where
I: AsRef<TimeZoneIdMapper>,
{
#[cfg(feature = "compiled_data")]
pub fn try_new_with_mapper(mapper: I) -> Result<Self, DataError> {
Self {
inner: mapper,
data: DataPayload::from_static_ref(
crate::provider::Baked::SINGLETON_BCP47_TO_IANA_MAP_V1_MARKER,
),
}
.validated()
}
icu_provider::gen_any_buffer_data_constructors!((mapper: I) -> error: DataError,
functions: [
try_new_with_mapper: skip,
try_new_with_mapper_with_any_provider,
try_new_with_mapper_with_buffer_provider,
try_new_with_mapper_unstable,
Self,
]
);
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
pub fn try_new_with_mapper_unstable<P>(provider: &P, mapper: I) -> Result<Self, DataError>
where
P: DataProvider<IanaToBcp47MapV3Marker> + DataProvider<Bcp47ToIanaMapV1Marker> + ?Sized,
{
let data = provider.load(Default::default())?.payload;
Self {
inner: mapper,
data,
}
.validated()
}
fn validated(self) -> Result<Self, DataError> {
if self.inner.as_ref().data.get().bcp47_ids_checksum != self.data.get().bcp47_ids_checksum {
return Err(
DataErrorKind::InconsistentData(IanaToBcp47MapV3Marker::INFO)
.with_marker(Bcp47ToIanaMapV1Marker::INFO),
);
}
Ok(self)
}
pub fn inner(&self) -> &TimeZoneIdMapper {
self.inner.as_ref()
}
pub fn as_borrowed(&self) -> TimeZoneIdMapperWithFastCanonicalizationBorrowed {
TimeZoneIdMapperWithFastCanonicalizationBorrowed {
inner: self.inner.as_ref().as_borrowed(),
data: self.data.get(),
}
}
}
#[derive(Debug, Copy, Clone)]
pub struct TimeZoneIdMapperWithFastCanonicalizationBorrowed<'a> {
inner: TimeZoneIdMapperBorrowed<'a>,
data: &'a Bcp47ToIanaMapV1<'a>,
}
#[cfg(feature = "compiled_data")]
impl Default for TimeZoneIdMapperWithFastCanonicalizationBorrowed<'static> {
fn default() -> Self {
Self::new()
}
}
impl TimeZoneIdMapperWithFastCanonicalizationBorrowed<'static> {
#[cfg(feature = "compiled_data")]
pub fn new() -> Self {
const _: () = assert!(
crate::provider::Baked::SINGLETON_IANA_TO_BCP47_MAP_V3_MARKER.bcp47_ids_checksum
== crate::provider::Baked::SINGLETON_BCP47_TO_IANA_MAP_V1_MARKER.bcp47_ids_checksum,
);
Self {
inner: TimeZoneIdMapperBorrowed::new(),
data: crate::provider::Baked::SINGLETON_BCP47_TO_IANA_MAP_V1_MARKER,
}
}
pub fn static_to_owned(&self) -> TimeZoneIdMapperWithFastCanonicalization<TimeZoneIdMapper> {
TimeZoneIdMapperWithFastCanonicalization {
inner: self.inner.static_to_owned(),
data: DataPayload::from_static_ref(self.data),
}
}
}
impl<'a> TimeZoneIdMapperWithFastCanonicalizationBorrowed<'a> {
pub fn inner(&self) -> TimeZoneIdMapperBorrowed<'a> {
self.inner
}
pub fn canonicalize_iana(&self, iana_id: &str) -> Option<(&str, TimeZoneBcp47Id)> {
let trie_value = self.inner.iana_lookup_quick(iana_id)?;
let Some(bcp47_id) = self.inner.data.bcp47_ids.get(trie_value.index()) else {
debug_assert!(false, "index should be in range");
return None;
};
let Some(string) = self.data.canonical_iana_ids.get(trie_value.index()) else {
debug_assert!(false, "index should be in range");
return None;
};
Some((string, bcp47_id))
}
pub fn canonical_iana_from_bcp47(&self, bcp47_id: TimeZoneBcp47Id) -> Option<&str> {
let index = self.inner.data.bcp47_ids.binary_search(&bcp47_id).ok()?;
let Some(string) = self.data.canonical_iana_ids.get(index) else {
debug_assert!(false, "index should be in range");
return None;
};
Some(string)
}
}
#[derive(Copy, Clone, PartialEq, Eq)]
#[repr(transparent)]
struct IanaTrieValue(usize);
impl IanaTrieValue {
#[inline]
pub(crate) fn to_canonical(self) -> Self {
Self(self.0 | 1)
}
#[inline]
pub(crate) fn canonical_for_index(index: usize) -> Self {
Self(index << 1).to_canonical()
}
#[inline]
pub(crate) fn index(self) -> usize {
self.0 >> 1
}
#[inline]
pub(crate) fn is_canonical(self) -> bool {
(self.0 & 0x1) != 0
}
}