use crate::provider::*;
use core::ops::RangeInclusive;
use icu_collections::codepointinvlist::CodePointInversionList;
use icu_provider::marker::ErasedMarker;
use icu_provider::prelude::*;
#[derive(Debug)]
pub struct CodePointSetData {
data: DataPayload<ErasedMarker<PropertyCodePointSet<'static>>>,
}
impl CodePointSetData {
#[expect(clippy::new_ret_no_self)]
#[cfg(feature = "compiled_data")]
pub const fn new<P: BinaryProperty>() -> CodePointSetDataBorrowed<'static> {
CodePointSetDataBorrowed::new::<P>()
}
#[cfg(feature = "serde")]
#[doc = icu_provider::gen_buffer_unstable_docs!(BUFFER, Self::new)]
pub fn try_new_with_buffer_provider<P: BinaryProperty>(
provider: &(impl BufferProvider + ?Sized),
) -> Result<CodePointSetData, DataError> {
use icu_provider::buf::AsDeserializingBufferProvider;
Self::try_new_unstable::<P>(&provider.as_deserializing())
}
#[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)]
pub fn try_new_unstable<P: BinaryProperty>(
provider: &(impl DataProvider<P::DataMarker> + ?Sized),
) -> Result<CodePointSetData, DataError> {
Ok(CodePointSetData::from_data(
provider.load(Default::default())?.payload,
))
}
#[inline]
pub fn as_borrowed(&self) -> CodePointSetDataBorrowed<'_> {
CodePointSetDataBorrowed {
set: self.data.get(),
}
}
pub(crate) fn from_data<M>(data: DataPayload<M>) -> Self
where
M: DynamicDataMarker<DataStruct = PropertyCodePointSet<'static>>,
{
Self { data: data.cast() }
}
pub fn from_code_point_inversion_list(set: CodePointInversionList<'static>) -> Self {
let set = PropertyCodePointSet::from_code_point_inversion_list(set);
CodePointSetData::from_data(
DataPayload::<ErasedMarker<PropertyCodePointSet<'static>>>::from_owned(set),
)
}
pub fn as_code_point_inversion_list(&self) -> Option<&CodePointInversionList<'_>> {
self.data.get().as_code_point_inversion_list()
}
pub fn to_code_point_inversion_list(&self) -> CodePointInversionList<'_> {
self.data.get().to_code_point_inversion_list()
}
}
#[derive(Clone, Copy, Debug)]
pub struct CodePointSetDataBorrowed<'a> {
set: &'a PropertyCodePointSet<'a>,
}
impl CodePointSetDataBorrowed<'static> {
#[inline]
#[cfg(feature = "compiled_data")]
pub const fn new<P: BinaryProperty>() -> Self {
CodePointSetDataBorrowed { set: P::SINGLETON }
}
pub const fn static_to_owned(self) -> CodePointSetData {
CodePointSetData {
data: DataPayload::from_static_ref(self.set),
}
}
}
impl<'a> CodePointSetDataBorrowed<'a> {
#[inline]
pub fn contains(self, ch: char) -> bool {
self.set.contains(ch)
}
#[inline]
pub fn contains32(self, ch: u32) -> bool {
self.set.contains32(ch)
}
#[inline]
pub fn iter_ranges(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a {
self.set.iter_ranges()
}
#[inline]
pub fn iter_ranges_complemented(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a {
self.set.iter_ranges_complemented()
}
}
pub trait BinaryProperty: crate::private::Sealed + Sized {
#[doc(hidden)]
type DataMarker: DataMarker<DataStruct = PropertyCodePointSet<'static>>;
#[doc(hidden)]
#[cfg(feature = "compiled_data")]
const SINGLETON: &'static PropertyCodePointSet<'static>;
const NAME: &'static [u8];
const SHORT_NAME: &'static [u8];
#[cfg(feature = "compiled_data")]
fn for_char(ch: char) -> bool {
CodePointSetData::new::<Self>().contains(ch)
}
}
#[cfg(test)]
mod tests {
#[test]
fn test_general_category() {
use crate::props::GeneralCategory;
use crate::props::GeneralCategoryGroup;
use crate::CodePointMapData;
let digits_data = CodePointMapData::<GeneralCategory>::new()
.get_set_for_value_group(GeneralCategoryGroup::Number);
let digits = digits_data.as_borrowed();
assert!(digits.contains('5'));
assert!(digits.contains('\u{0665}')); assert!(digits.contains('\u{096b}'));
assert!(!digits.contains('A'));
}
#[test]
fn test_script() {
use crate::props::Script;
use crate::CodePointMapData;
let thai_data = CodePointMapData::<Script>::new().get_set_for_value(Script::Thai);
let thai = thai_data.as_borrowed();
assert!(thai.contains('\u{0e01}')); assert!(thai.contains('\u{0e50}'));
assert!(!thai.contains('A'));
assert!(!thai.contains('\u{0e3f}')); }
#[test]
fn test_gc_groupings() {
use crate::props::{GeneralCategory, GeneralCategoryGroup};
use crate::CodePointMapData;
use icu_collections::codepointinvlist::CodePointInversionListBuilder;
let test_group = |category: GeneralCategoryGroup, subcategories: &[GeneralCategory]| {
let category_set =
CodePointMapData::<GeneralCategory>::new().get_set_for_value_group(category);
let category_set = category_set
.as_code_point_inversion_list()
.expect("The data should be valid");
let mut builder = CodePointInversionListBuilder::new();
for &subcategory in subcategories {
let gc_set_data =
CodePointMapData::<GeneralCategory>::new().get_set_for_value(subcategory);
let gc_set = gc_set_data.as_borrowed();
for range in gc_set.iter_ranges() {
builder.add_range32(range);
}
}
let combined_set = builder.build();
println!("{category:?} {subcategories:?}");
assert_eq!(
category_set.get_inversion_list_vec(),
combined_set.get_inversion_list_vec()
);
};
test_group(
GeneralCategoryGroup::Letter,
&[
GeneralCategory::UppercaseLetter,
GeneralCategory::LowercaseLetter,
GeneralCategory::TitlecaseLetter,
GeneralCategory::ModifierLetter,
GeneralCategory::OtherLetter,
],
);
test_group(
GeneralCategoryGroup::Other,
&[
GeneralCategory::Control,
GeneralCategory::Format,
GeneralCategory::Unassigned,
GeneralCategory::PrivateUse,
GeneralCategory::Surrogate,
],
);
test_group(
GeneralCategoryGroup::Mark,
&[
GeneralCategory::SpacingMark,
GeneralCategory::EnclosingMark,
GeneralCategory::NonspacingMark,
],
);
test_group(
GeneralCategoryGroup::Number,
&[
GeneralCategory::DecimalNumber,
GeneralCategory::LetterNumber,
GeneralCategory::OtherNumber,
],
);
test_group(
GeneralCategoryGroup::Punctuation,
&[
GeneralCategory::ConnectorPunctuation,
GeneralCategory::DashPunctuation,
GeneralCategory::ClosePunctuation,
GeneralCategory::FinalPunctuation,
GeneralCategory::InitialPunctuation,
GeneralCategory::OtherPunctuation,
GeneralCategory::OpenPunctuation,
],
);
test_group(
GeneralCategoryGroup::Symbol,
&[
GeneralCategory::CurrencySymbol,
GeneralCategory::ModifierSymbol,
GeneralCategory::MathSymbol,
GeneralCategory::OtherSymbol,
],
);
test_group(
GeneralCategoryGroup::Separator,
&[
GeneralCategory::LineSeparator,
GeneralCategory::ParagraphSeparator,
GeneralCategory::SpaceSeparator,
],
);
}
#[test]
fn test_gc_surrogate() {
use crate::props::GeneralCategory;
use crate::CodePointMapData;
let surrogates_data = CodePointMapData::<GeneralCategory>::new()
.get_set_for_value(GeneralCategory::Surrogate);
let surrogates = surrogates_data.as_borrowed();
assert!(surrogates.contains32(0xd800));
assert!(surrogates.contains32(0xd900));
assert!(surrogates.contains32(0xdfff));
assert!(!surrogates.contains('A'));
}
}