use crate::error::PropertiesError;
use crate::provider::*;
use crate::*;
use core::iter::FromIterator;
use core::ops::RangeInclusive;
use icu_collections::codepointinvlist::CodePointInversionList;
use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList;
use icu_provider::prelude::*;
#[derive(Debug)]
pub struct CodePointSetData {
data: DataPayload<ErasedSetlikeMarker>,
}
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub(crate) struct ErasedSetlikeMarker;
impl DataMarker for ErasedSetlikeMarker {
type Yokeable = PropertyCodePointSetV1<'static>;
}
impl CodePointSetData {
#[inline]
pub fn as_borrowed(&self) -> CodePointSetDataBorrowed<'_> {
CodePointSetDataBorrowed {
set: self.data.get(),
}
}
pub fn from_data<M>(data: DataPayload<M>) -> Self
where
M: DataMarker<Yokeable = PropertyCodePointSetV1<'static>>,
{
Self { data: data.cast() }
}
pub fn from_code_point_inversion_list(set: CodePointInversionList<'static>) -> Self {
let set = PropertyCodePointSetV1::from_code_point_inversion_list(set);
CodePointSetData::from_data(DataPayload::<ErasedSetlikeMarker>::from_owned(set))
}
pub fn as_code_point_inversion_list(&self) -> Option<&CodePointInversionList<'_>> {
self.data.get().as_code_point_inversion_list()
}
pub fn to_code_point_inversion_list(&self) -> CodePointInversionList<'_> {
self.data.get().to_code_point_inversion_list()
}
}
#[derive(Clone, Copy, Debug)]
pub struct CodePointSetDataBorrowed<'a> {
set: &'a PropertyCodePointSetV1<'a>,
}
impl CodePointSetDataBorrowed<'static> {
pub const fn static_to_owned(self) -> CodePointSetData {
CodePointSetData {
data: DataPayload::from_static_ref(self.set),
}
}
}
impl<'a> CodePointSetDataBorrowed<'a> {
#[inline]
pub fn contains(self, ch: char) -> bool {
self.set.contains(ch)
}
#[inline]
pub fn contains32(self, ch: u32) -> bool {
self.set.contains32(ch)
}
#[inline]
pub fn iter_ranges(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a {
self.set.iter_ranges()
}
#[inline]
pub fn iter_ranges_complemented(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a {
self.set.iter_ranges_complemented()
}
}
#[derive(Debug)]
pub struct UnicodeSetData {
data: DataPayload<ErasedUnicodeSetlikeMarker>,
}
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub(crate) struct ErasedUnicodeSetlikeMarker;
impl DataMarker for ErasedUnicodeSetlikeMarker {
type Yokeable = PropertyUnicodeSetV1<'static>;
}
impl UnicodeSetData {
#[inline]
pub fn as_borrowed(&self) -> UnicodeSetDataBorrowed<'_> {
UnicodeSetDataBorrowed {
set: self.data.get(),
}
}
pub fn from_data<M>(data: DataPayload<M>) -> Self
where
M: DataMarker<Yokeable = PropertyUnicodeSetV1<'static>>,
{
Self { data: data.cast() }
}
pub fn from_code_point_inversion_list_string_list(
set: CodePointInversionListAndStringList<'static>,
) -> Self {
let set = PropertyUnicodeSetV1::from_code_point_inversion_list_string_list(set);
UnicodeSetData::from_data(DataPayload::<ErasedUnicodeSetlikeMarker>::from_owned(set))
}
pub fn as_code_point_inversion_list_string_list(
&self,
) -> Option<&CodePointInversionListAndStringList<'_>> {
self.data.get().as_code_point_inversion_list_string_list()
}
pub fn to_code_point_inversion_list_string_list(
&self,
) -> CodePointInversionListAndStringList<'_> {
self.data.get().to_code_point_inversion_list_string_list()
}
}
#[derive(Clone, Copy, Debug)]
pub struct UnicodeSetDataBorrowed<'a> {
set: &'a PropertyUnicodeSetV1<'a>,
}
impl<'a> UnicodeSetDataBorrowed<'a> {
#[inline]
pub fn contains(self, s: &str) -> bool {
self.set.contains(s)
}
#[inline]
pub fn contains32(&self, cp: u32) -> bool {
self.set.contains32(cp)
}
#[inline]
pub fn contains_char(&self, ch: char) -> bool {
self.set.contains_char(ch)
}
}
impl UnicodeSetDataBorrowed<'static> {
pub const fn static_to_owned(self) -> UnicodeSetData {
UnicodeSetData {
data: DataPayload::from_static_ref(self.set),
}
}
}
pub(crate) fn load_set_data<M, P>(provider: &P) -> Result<CodePointSetData, PropertiesError>
where
M: KeyedDataMarker<Yokeable = PropertyCodePointSetV1<'static>>,
P: DataProvider<M> + ?Sized,
{
Ok(provider
.load(Default::default())
.and_then(DataResponse::take_payload)
.map(CodePointSetData::from_data)?)
}
macro_rules! make_code_point_set_property {
(
// currently unused
property: $property:expr;
// currently unused
marker: $marker_name:ident;
keyed_data_marker: $keyed_data_marker:ty;
func:
$(#[$doc:meta])+
$cvis:vis const fn $constname:ident() => $singleton_name:ident;
$vis:vis fn $funcname:ident();
) => {
#[doc = concat!("A version of [`", stringify!($constname), "()`] that uses custom data provided by a [`DataProvider`].")]
$vis fn $funcname(
provider: &(impl DataProvider<$keyed_data_marker> + ?Sized)
) -> Result<CodePointSetData, PropertiesError> {
load_set_data(provider)
}
$(#[$doc])*
#[cfg(feature = "compiled_data")]
$cvis const fn $constname() -> CodePointSetDataBorrowed<'static> {
CodePointSetDataBorrowed {
set: crate::provider::Baked::$singleton_name,
}
}
}
}
make_code_point_set_property! {
property: "ASCII_Hex_Digit";
marker: AsciiHexDigitProperty;
keyed_data_marker: AsciiHexDigitV1Marker;
func:
pub const fn ascii_hex_digit() => SINGLETON_PROPS_AHEX_V1;
pub fn load_ascii_hex_digit();
}
make_code_point_set_property! {
property: "Alnum";
marker: AlnumProperty;
keyed_data_marker: AlnumV1Marker;
func:
pub const fn alnum() => SINGLETON_PROPS_ALNUM_V1;
pub fn load_alnum();
}
make_code_point_set_property! {
property: "Alphabetic";
marker: AlphabeticProperty;
keyed_data_marker: AlphabeticV1Marker;
func:
pub const fn alphabetic() => SINGLETON_PROPS_ALPHA_V1;
pub fn load_alphabetic();
}
make_code_point_set_property! {
property: "Bidi_Control";
marker: BidiControlProperty;
keyed_data_marker: BidiControlV1Marker;
func:
pub const fn bidi_control() => SINGLETON_PROPS_BIDI_C_V1;
pub fn load_bidi_control();
}
make_code_point_set_property! {
property: "Bidi_Mirrored";
marker: BidiMirroredProperty;
keyed_data_marker: BidiMirroredV1Marker;
func:
pub const fn bidi_mirrored() => SINGLETON_PROPS_BIDI_M_V1;
pub fn load_bidi_mirrored();
}
make_code_point_set_property! {
property: "Blank";
marker: BlankProperty;
keyed_data_marker: BlankV1Marker;
func:
pub const fn blank() => SINGLETON_PROPS_BLANK_V1;
pub fn load_blank();
}
make_code_point_set_property! {
property: "Cased";
marker: CasedProperty;
keyed_data_marker: CasedV1Marker;
func:
pub const fn cased() => SINGLETON_PROPS_CASED_V1;
pub fn load_cased();
}
make_code_point_set_property! {
property: "Case_Ignorable";
marker: CaseIgnorableProperty;
keyed_data_marker: CaseIgnorableV1Marker;
func:
pub const fn case_ignorable() => SINGLETON_PROPS_CI_V1;
pub fn load_case_ignorable();
}
make_code_point_set_property! {
property: "Full_Composition_Exclusion";
marker: FullCompositionExclusionProperty;
keyed_data_marker: FullCompositionExclusionV1Marker;
func:
pub const fn full_composition_exclusion() => SINGLETON_PROPS_COMP_EX_V1;
pub fn load_full_composition_exclusion();
}
make_code_point_set_property! {
property: "Changes_When_Casefolded";
marker: ChangesWhenCasefoldedProperty;
keyed_data_marker: ChangesWhenCasefoldedV1Marker;
func:
pub const fn changes_when_casefolded() => SINGLETON_PROPS_CWCF_V1;
pub fn load_changes_when_casefolded();
}
make_code_point_set_property! {
property: "Changes_When_Casemapped";
marker: ChangesWhenCasemappedProperty;
keyed_data_marker: ChangesWhenCasemappedV1Marker;
func:
pub const fn changes_when_casemapped() => SINGLETON_PROPS_CWCM_V1;
pub fn load_changes_when_casemapped();
}
make_code_point_set_property! {
property: "Changes_When_NFKC_Casefolded";
marker: ChangesWhenNfkcCasefoldedProperty;
keyed_data_marker: ChangesWhenNfkcCasefoldedV1Marker;
func:
pub const fn changes_when_nfkc_casefolded() => SINGLETON_PROPS_CWKCF_V1;
pub fn load_changes_when_nfkc_casefolded();
}
make_code_point_set_property! {
property: "Changes_When_Lowercased";
marker: ChangesWhenLowercasedProperty;
keyed_data_marker: ChangesWhenLowercasedV1Marker;
func:
pub const fn changes_when_lowercased() => SINGLETON_PROPS_CWL_V1;
pub fn load_changes_when_lowercased();
}
make_code_point_set_property! {
property: "Changes_When_Titlecased";
marker: ChangesWhenTitlecasedProperty;
keyed_data_marker: ChangesWhenTitlecasedV1Marker;
func:
pub const fn changes_when_titlecased() => SINGLETON_PROPS_CWT_V1;
pub fn load_changes_when_titlecased();
}
make_code_point_set_property! {
property: "Changes_When_Uppercased";
marker: ChangesWhenUppercasedProperty;
keyed_data_marker: ChangesWhenUppercasedV1Marker;
func:
pub const fn changes_when_uppercased() => SINGLETON_PROPS_CWU_V1;
pub fn load_changes_when_uppercased();
}
make_code_point_set_property! {
property: "Dash";
marker: DashProperty;
keyed_data_marker: DashV1Marker;
func:
pub const fn dash() => SINGLETON_PROPS_DASH_V1;
pub fn load_dash();
}
make_code_point_set_property! {
property: "Deprecated";
marker: DeprecatedProperty;
keyed_data_marker: DeprecatedV1Marker;
func:
pub const fn deprecated() => SINGLETON_PROPS_DEP_V1;
pub fn load_deprecated();
}
make_code_point_set_property! {
property: "Default_Ignorable_Code_Point";
marker: DefaultIgnorableCodePointProperty;
keyed_data_marker: DefaultIgnorableCodePointV1Marker;
func:
pub const fn default_ignorable_code_point() => SINGLETON_PROPS_DI_V1;
pub fn load_default_ignorable_code_point();
}
make_code_point_set_property! {
property: "Diacritic";
marker: DiacriticProperty;
keyed_data_marker: DiacriticV1Marker;
func:
pub const fn diacritic() => SINGLETON_PROPS_DIA_V1;
pub fn load_diacritic();
}
make_code_point_set_property! {
property: "Emoji_Modifier_Base";
marker: EmojiModifierBaseProperty;
keyed_data_marker: EmojiModifierBaseV1Marker;
func:
pub const fn emoji_modifier_base() => SINGLETON_PROPS_EBASE_V1;
pub fn load_emoji_modifier_base();
}
make_code_point_set_property! {
property: "Emoji_Component";
marker: EmojiComponentProperty;
keyed_data_marker: EmojiComponentV1Marker;
func:
pub const fn emoji_component() => SINGLETON_PROPS_ECOMP_V1;
pub fn load_emoji_component();
}
make_code_point_set_property! {
property: "Emoji_Modifier";
marker: EmojiModifierProperty;
keyed_data_marker: EmojiModifierV1Marker;
func:
pub const fn emoji_modifier() => SINGLETON_PROPS_EMOD_V1;
pub fn load_emoji_modifier();
}
make_code_point_set_property! {
property: "Emoji";
marker: EmojiProperty;
keyed_data_marker: EmojiV1Marker;
func:
pub const fn emoji() => SINGLETON_PROPS_EMOJI_V1;
pub fn load_emoji();
}
make_code_point_set_property! {
property: "Emoji_Presentation";
marker: EmojiPresentationProperty;
keyed_data_marker: EmojiPresentationV1Marker;
func:
pub const fn emoji_presentation() => SINGLETON_PROPS_EPRES_V1;
pub fn load_emoji_presentation();
}
make_code_point_set_property! {
property: "Extender";
marker: ExtenderProperty;
keyed_data_marker: ExtenderV1Marker;
func:
pub const fn extender() => SINGLETON_PROPS_EXT_V1;
pub fn load_extender();
}
make_code_point_set_property! {
property: "Extended_Pictographic";
marker: ExtendedPictographicProperty;
keyed_data_marker: ExtendedPictographicV1Marker;
func:
pub const fn extended_pictographic() => SINGLETON_PROPS_EXTPICT_V1;
pub fn load_extended_pictographic();
}
make_code_point_set_property! {
property: "Graph";
marker: GraphProperty;
keyed_data_marker: GraphV1Marker;
func:
pub const fn graph() => SINGLETON_PROPS_GRAPH_V1;
pub fn load_graph();
}
make_code_point_set_property! {
property: "Grapheme_Base";
marker: GraphemeBaseProperty;
keyed_data_marker: GraphemeBaseV1Marker;
func:
pub const fn grapheme_base() => SINGLETON_PROPS_GR_BASE_V1;
pub fn load_grapheme_base();
}
make_code_point_set_property! {
property: "Grapheme_Extend";
marker: GraphemeExtendProperty;
keyed_data_marker: GraphemeExtendV1Marker;
func:
pub const fn grapheme_extend() => SINGLETON_PROPS_GR_EXT_V1;
pub fn load_grapheme_extend();
}
make_code_point_set_property! {
property: "Grapheme_Link";
marker: GraphemeLinkProperty;
keyed_data_marker: GraphemeLinkV1Marker;
func:
pub const fn grapheme_link() => SINGLETON_PROPS_GR_LINK_V1;
pub fn load_grapheme_link();
}
make_code_point_set_property! {
property: "Hex_Digit";
marker: HexDigitProperty;
keyed_data_marker: HexDigitV1Marker;
func:
pub const fn hex_digit() => SINGLETON_PROPS_HEX_V1;
pub fn load_hex_digit();
}
make_code_point_set_property! {
property: "Hyphen";
marker: HyphenProperty;
keyed_data_marker: HyphenV1Marker;
func:
pub const fn hyphen() => SINGLETON_PROPS_HYPHEN_V1;
pub fn load_hyphen();
}
make_code_point_set_property! {
property: "Id_Continue";
marker: IdContinueProperty;
keyed_data_marker: IdContinueV1Marker;
func:
pub const fn id_continue() => SINGLETON_PROPS_IDC_V1;
pub fn load_id_continue();
}
make_code_point_set_property! {
property: "Ideographic";
marker: IdeographicProperty;
keyed_data_marker: IdeographicV1Marker;
func:
pub const fn ideographic() => SINGLETON_PROPS_IDEO_V1;
pub fn load_ideographic();
}
make_code_point_set_property! {
property: "Id_Start";
marker: IdStartProperty;
keyed_data_marker: IdStartV1Marker;
func:
pub const fn id_start() => SINGLETON_PROPS_IDS_V1;
pub fn load_id_start();
}
make_code_point_set_property! {
property: "Ids_Binary_Operator";
marker: IdsBinaryOperatorProperty;
keyed_data_marker: IdsBinaryOperatorV1Marker;
func:
pub const fn ids_binary_operator() => SINGLETON_PROPS_IDSB_V1;
pub fn load_ids_binary_operator();
}
make_code_point_set_property! {
property: "Ids_Trinary_Operator";
marker: IdsTrinaryOperatorProperty;
keyed_data_marker: IdsTrinaryOperatorV1Marker;
func:
pub const fn ids_trinary_operator() => SINGLETON_PROPS_IDST_V1;
pub fn load_ids_trinary_operator();
}
make_code_point_set_property! {
property: "Join_Control";
marker: JoinControlProperty;
keyed_data_marker: JoinControlV1Marker;
func:
pub const fn join_control() => SINGLETON_PROPS_JOIN_C_V1;
pub fn load_join_control();
}
make_code_point_set_property! {
property: "Logical_Order_Exception";
marker: LogicalOrderExceptionProperty;
keyed_data_marker: LogicalOrderExceptionV1Marker;
func:
pub const fn logical_order_exception() => SINGLETON_PROPS_LOE_V1;
pub fn load_logical_order_exception();
}
make_code_point_set_property! {
property: "Lowercase";
marker: LowercaseProperty;
keyed_data_marker: LowercaseV1Marker;
func:
pub const fn lowercase() => SINGLETON_PROPS_LOWER_V1;
pub fn load_lowercase();
}
make_code_point_set_property! {
property: "Math";
marker: MathProperty;
keyed_data_marker: MathV1Marker;
func:
pub const fn math() => SINGLETON_PROPS_MATH_V1;
pub fn load_math();
}
make_code_point_set_property! {
property: "Noncharacter_Code_Point";
marker: NoncharacterCodePointProperty;
keyed_data_marker: NoncharacterCodePointV1Marker;
func:
pub const fn noncharacter_code_point() => SINGLETON_PROPS_NCHAR_V1;
pub fn load_noncharacter_code_point();
}
make_code_point_set_property! {
property: "NFC_Inert";
marker: NfcInertProperty;
keyed_data_marker: NfcInertV1Marker;
func:
pub const fn nfc_inert() => SINGLETON_PROPS_NFCINERT_V1;
pub fn load_nfc_inert();
}
make_code_point_set_property! {
property: "NFD_Inert";
marker: NfdInertProperty;
keyed_data_marker: NfdInertV1Marker;
func:
pub const fn nfd_inert() => SINGLETON_PROPS_NFDINERT_V1;
pub fn load_nfd_inert();
}
make_code_point_set_property! {
property: "NFKC_Inert";
marker: NfkcInertProperty;
keyed_data_marker: NfkcInertV1Marker;
func:
pub const fn nfkc_inert() => SINGLETON_PROPS_NFKCINERT_V1;
pub fn load_nfkc_inert();
}
make_code_point_set_property! {
property: "NFKD_Inert";
marker: NfkdInertProperty;
keyed_data_marker: NfkdInertV1Marker;
func:
pub const fn nfkd_inert() => SINGLETON_PROPS_NFKDINERT_V1;
pub fn load_nfkd_inert();
}
make_code_point_set_property! {
property: "Pattern_Syntax";
marker: PatternSyntaxProperty;
keyed_data_marker: PatternSyntaxV1Marker;
func:
pub const fn pattern_syntax() => SINGLETON_PROPS_PAT_SYN_V1;
pub fn load_pattern_syntax();
}
make_code_point_set_property! {
property: "Pattern_White_Space";
marker: PatternWhiteSpaceProperty;
keyed_data_marker: PatternWhiteSpaceV1Marker;
func:
pub const fn pattern_white_space() => SINGLETON_PROPS_PAT_WS_V1;
pub fn load_pattern_white_space();
}
make_code_point_set_property! {
property: "Prepended_Concatenation_Mark";
marker: PrependedConcatenationMarkProperty;
keyed_data_marker: PrependedConcatenationMarkV1Marker;
func:
pub const fn prepended_concatenation_mark() => SINGLETON_PROPS_PCM_V1;
pub fn load_prepended_concatenation_mark();
}
make_code_point_set_property! {
property: "Print";
marker: PrintProperty;
keyed_data_marker: PrintV1Marker;
func:
pub const fn print() => SINGLETON_PROPS_PRINT_V1;
pub fn load_print();
}
make_code_point_set_property! {
property: "Quotation_Mark";
marker: QuotationMarkProperty;
keyed_data_marker: QuotationMarkV1Marker;
func:
pub const fn quotation_mark() => SINGLETON_PROPS_QMARK_V1;
pub fn load_quotation_mark();
}
make_code_point_set_property! {
property: "Radical";
marker: RadicalProperty;
keyed_data_marker: RadicalV1Marker;
func:
pub const fn radical() => SINGLETON_PROPS_RADICAL_V1;
pub fn load_radical();
}
make_code_point_set_property! {
property: "Regional_Indicator";
marker: RegionalIndicatorProperty;
keyed_data_marker: RegionalIndicatorV1Marker;
func:
pub const fn regional_indicator() => SINGLETON_PROPS_RI_V1;
pub fn load_regional_indicator();
}
make_code_point_set_property! {
property: "Soft_Dotted";
marker: SoftDottedProperty;
keyed_data_marker: SoftDottedV1Marker;
func:
pub const fn soft_dotted() => SINGLETON_PROPS_SD_V1;
pub fn load_soft_dotted();
}
make_code_point_set_property! {
property: "Segment_Starter";
marker: SegmentStarterProperty;
keyed_data_marker: SegmentStarterV1Marker;
func:
pub const fn segment_starter() => SINGLETON_PROPS_SEGSTART_V1;
pub fn load_segment_starter();
}
make_code_point_set_property! {
property: "Case_Sensitive";
marker: CaseSensitiveProperty;
keyed_data_marker: CaseSensitiveV1Marker;
func:
pub const fn case_sensitive() => SINGLETON_PROPS_SENSITIVE_V1;
pub fn load_case_sensitive();
}
make_code_point_set_property! {
property: "Sentence_Terminal";
marker: SentenceTerminalProperty;
keyed_data_marker: SentenceTerminalV1Marker;
func:
pub const fn sentence_terminal() => SINGLETON_PROPS_STERM_V1;
pub fn load_sentence_terminal();
}
make_code_point_set_property! {
property: "Terminal_Punctuation";
marker: TerminalPunctuationProperty;
keyed_data_marker: TerminalPunctuationV1Marker;
func:
pub const fn terminal_punctuation() => SINGLETON_PROPS_TERM_V1;
pub fn load_terminal_punctuation();
}
make_code_point_set_property! {
property: "Unified_Ideograph";
marker: UnifiedIdeographProperty;
keyed_data_marker: UnifiedIdeographV1Marker;
func:
pub const fn unified_ideograph() => SINGLETON_PROPS_UIDEO_V1;
pub fn load_unified_ideograph();
}
make_code_point_set_property! {
property: "Uppercase";
marker: UppercaseProperty;
keyed_data_marker: UppercaseV1Marker;
func:
pub const fn uppercase() => SINGLETON_PROPS_UPPER_V1;
pub fn load_uppercase();
}
make_code_point_set_property! {
property: "Variation_Selector";
marker: VariationSelectorProperty;
keyed_data_marker: VariationSelectorV1Marker;
func:
pub const fn variation_selector() => SINGLETON_PROPS_VS_V1;
pub fn load_variation_selector();
}
make_code_point_set_property! {
property: "White_Space";
marker: WhiteSpaceProperty;
keyed_data_marker: WhiteSpaceV1Marker;
func:
pub const fn white_space() => SINGLETON_PROPS_WSPACE_V1;
pub fn load_white_space();
}
make_code_point_set_property! {
property: "Xdigit";
marker: XdigitProperty;
keyed_data_marker: XdigitV1Marker;
func:
pub const fn xdigit() => SINGLETON_PROPS_XDIGIT_V1;
pub fn load_xdigit();
}
make_code_point_set_property! {
property: "XID_Continue";
marker: XidContinueProperty;
keyed_data_marker: XidContinueV1Marker;
func:
pub const fn xid_continue() => SINGLETON_PROPS_XIDC_V1;
pub fn load_xid_continue();
}
make_code_point_set_property! {
property: "XID_Start";
marker: XidStartProperty;
keyed_data_marker: XidStartV1Marker;
func:
pub const fn xid_start() => SINGLETON_PROPS_XIDS_V1;
pub fn load_xid_start();
}
macro_rules! make_unicode_set_property {
(
// currently unused
property: $property:expr;
// currently unused
marker: $marker_name:ident;
keyed_data_marker: $keyed_data_marker:ty;
func:
$(#[$doc:meta])+
$cvis:vis const fn $constname:ident() => $singleton:ident;
$vis:vis fn $funcname:ident();
) => {
#[doc = concat!("A version of [`", stringify!($constname), "()`] that uses custom data provided by a [`DataProvider`].")]
$vis fn $funcname(
provider: &(impl DataProvider<$keyed_data_marker> + ?Sized)
) -> Result<UnicodeSetData, PropertiesError> {
Ok(provider.load(Default::default()).and_then(DataResponse::take_payload).map(UnicodeSetData::from_data)?)
}
$(#[$doc])*
#[cfg(feature = "compiled_data")]
$cvis const fn $constname() -> UnicodeSetDataBorrowed<'static> {
UnicodeSetDataBorrowed {
set: crate::provider::Baked::$singleton
}
}
}
}
make_unicode_set_property! {
property: "Basic_Emoji";
marker: BasicEmojiProperty;
keyed_data_marker: BasicEmojiV1Marker;
func:
pub const fn basic_emoji() => SINGLETON_PROPS_BASIC_EMOJI_V1;
pub fn load_basic_emoji();
}
pub fn load_for_general_category_group(
provider: &(impl DataProvider<GeneralCategoryV1Marker> + ?Sized),
enum_val: GeneralCategoryGroup,
) -> Result<CodePointSetData, PropertiesError> {
let gc_map_payload = maps::load_general_category(provider)?;
let gc_map = gc_map_payload.as_borrowed();
let matching_gc_ranges = gc_map
.iter_ranges()
.filter(|cpm_range| (1 << cpm_range.value as u32) & enum_val.0 != 0)
.map(|cpm_range| cpm_range.range);
let set = CodePointInversionList::from_iter(matching_gc_ranges);
Ok(CodePointSetData::from_code_point_inversion_list(set))
}
#[cfg(feature = "compiled_data")]
pub fn for_general_category_group(enum_val: GeneralCategoryGroup) -> CodePointSetData {
let matching_gc_ranges = maps::general_category()
.iter_ranges()
.filter(|cpm_range| (1 << cpm_range.value as u32) & enum_val.0 != 0)
.map(|cpm_range| cpm_range.range);
let set = CodePointInversionList::from_iter(matching_gc_ranges);
CodePointSetData::from_code_point_inversion_list(set)
}
#[cfg(feature = "compiled_data")]
pub fn load_for_ecma262(name: &str) -> Result<CodePointSetDataBorrowed<'static>, PropertiesError> {
use crate::runtime::UnicodeProperty;
let prop = if let Some(prop) = UnicodeProperty::parse_ecma262_name(name) {
prop
} else {
return Err(PropertiesError::UnexpectedPropertyName);
};
Ok(match prop {
UnicodeProperty::AsciiHexDigit => ascii_hex_digit(),
UnicodeProperty::Alphabetic => alphabetic(),
UnicodeProperty::BidiControl => bidi_control(),
UnicodeProperty::BidiMirrored => bidi_mirrored(),
UnicodeProperty::CaseIgnorable => case_ignorable(),
UnicodeProperty::Cased => cased(),
UnicodeProperty::ChangesWhenCasefolded => changes_when_casefolded(),
UnicodeProperty::ChangesWhenCasemapped => changes_when_casemapped(),
UnicodeProperty::ChangesWhenLowercased => changes_when_lowercased(),
UnicodeProperty::ChangesWhenNfkcCasefolded => changes_when_nfkc_casefolded(),
UnicodeProperty::ChangesWhenTitlecased => changes_when_titlecased(),
UnicodeProperty::ChangesWhenUppercased => changes_when_uppercased(),
UnicodeProperty::Dash => dash(),
UnicodeProperty::DefaultIgnorableCodePoint => default_ignorable_code_point(),
UnicodeProperty::Deprecated => deprecated(),
UnicodeProperty::Diacritic => diacritic(),
UnicodeProperty::Emoji => emoji(),
UnicodeProperty::EmojiComponent => emoji_component(),
UnicodeProperty::EmojiModifier => emoji_modifier(),
UnicodeProperty::EmojiModifierBase => emoji_modifier_base(),
UnicodeProperty::EmojiPresentation => emoji_presentation(),
UnicodeProperty::ExtendedPictographic => extended_pictographic(),
UnicodeProperty::Extender => extender(),
UnicodeProperty::GraphemeBase => grapheme_base(),
UnicodeProperty::GraphemeExtend => grapheme_extend(),
UnicodeProperty::HexDigit => hex_digit(),
UnicodeProperty::IdsBinaryOperator => ids_binary_operator(),
UnicodeProperty::IdsTrinaryOperator => ids_trinary_operator(),
UnicodeProperty::IdContinue => id_continue(),
UnicodeProperty::IdStart => id_start(),
UnicodeProperty::Ideographic => ideographic(),
UnicodeProperty::JoinControl => join_control(),
UnicodeProperty::LogicalOrderException => logical_order_exception(),
UnicodeProperty::Lowercase => lowercase(),
UnicodeProperty::Math => math(),
UnicodeProperty::NoncharacterCodePoint => noncharacter_code_point(),
UnicodeProperty::PatternSyntax => pattern_syntax(),
UnicodeProperty::PatternWhiteSpace => pattern_white_space(),
UnicodeProperty::QuotationMark => quotation_mark(),
UnicodeProperty::Radical => radical(),
UnicodeProperty::RegionalIndicator => regional_indicator(),
UnicodeProperty::SentenceTerminal => sentence_terminal(),
UnicodeProperty::SoftDotted => soft_dotted(),
UnicodeProperty::TerminalPunctuation => terminal_punctuation(),
UnicodeProperty::UnifiedIdeograph => unified_ideograph(),
UnicodeProperty::Uppercase => uppercase(),
UnicodeProperty::VariationSelector => variation_selector(),
UnicodeProperty::WhiteSpace => white_space(),
UnicodeProperty::XidContinue => xid_continue(),
UnicodeProperty::XidStart => xid_start(),
_ => return Err(PropertiesError::UnexpectedPropertyName),
})
}
icu_provider::gen_any_buffer_data_constructors!(
locale: skip,
name: &str,
result: Result<CodePointSetData, PropertiesError>,
#[cfg(skip)]
functions: [
load_for_ecma262,
load_for_ecma262_with_any_provider,
load_for_ecma262_with_buffer_provider,
load_for_ecma262_unstable,
]
);
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, load_for_ecma262)]
pub fn load_for_ecma262_unstable<P>(
provider: &P,
name: &str,
) -> Result<CodePointSetData, PropertiesError>
where
P: ?Sized
+ DataProvider<AsciiHexDigitV1Marker>
+ DataProvider<AlphabeticV1Marker>
+ DataProvider<BidiControlV1Marker>
+ DataProvider<BidiMirroredV1Marker>
+ DataProvider<CaseIgnorableV1Marker>
+ DataProvider<CasedV1Marker>
+ DataProvider<ChangesWhenCasefoldedV1Marker>
+ DataProvider<ChangesWhenCasemappedV1Marker>
+ DataProvider<ChangesWhenLowercasedV1Marker>
+ DataProvider<ChangesWhenNfkcCasefoldedV1Marker>
+ DataProvider<ChangesWhenTitlecasedV1Marker>
+ DataProvider<ChangesWhenUppercasedV1Marker>
+ DataProvider<DashV1Marker>
+ DataProvider<DefaultIgnorableCodePointV1Marker>
+ DataProvider<DeprecatedV1Marker>
+ DataProvider<DiacriticV1Marker>
+ DataProvider<EmojiV1Marker>
+ DataProvider<EmojiComponentV1Marker>
+ DataProvider<EmojiModifierV1Marker>
+ DataProvider<EmojiModifierBaseV1Marker>
+ DataProvider<EmojiPresentationV1Marker>
+ DataProvider<ExtendedPictographicV1Marker>
+ DataProvider<ExtenderV1Marker>
+ DataProvider<GraphemeBaseV1Marker>
+ DataProvider<GraphemeExtendV1Marker>
+ DataProvider<HexDigitV1Marker>
+ DataProvider<IdsBinaryOperatorV1Marker>
+ DataProvider<IdsTrinaryOperatorV1Marker>
+ DataProvider<IdContinueV1Marker>
+ DataProvider<IdStartV1Marker>
+ DataProvider<IdeographicV1Marker>
+ DataProvider<JoinControlV1Marker>
+ DataProvider<LogicalOrderExceptionV1Marker>
+ DataProvider<LowercaseV1Marker>
+ DataProvider<MathV1Marker>
+ DataProvider<NoncharacterCodePointV1Marker>
+ DataProvider<PatternSyntaxV1Marker>
+ DataProvider<PatternWhiteSpaceV1Marker>
+ DataProvider<QuotationMarkV1Marker>
+ DataProvider<RadicalV1Marker>
+ DataProvider<RegionalIndicatorV1Marker>
+ DataProvider<SentenceTerminalV1Marker>
+ DataProvider<SoftDottedV1Marker>
+ DataProvider<TerminalPunctuationV1Marker>
+ DataProvider<UnifiedIdeographV1Marker>
+ DataProvider<UppercaseV1Marker>
+ DataProvider<VariationSelectorV1Marker>
+ DataProvider<WhiteSpaceV1Marker>
+ DataProvider<XidContinueV1Marker>
+ DataProvider<XidStartV1Marker>,
{
use crate::runtime::UnicodeProperty;
let prop = if let Some(prop) = UnicodeProperty::parse_ecma262_name(name) {
prop
} else {
return Err(PropertiesError::UnexpectedPropertyName);
};
match prop {
UnicodeProperty::AsciiHexDigit => load_ascii_hex_digit(provider),
UnicodeProperty::Alphabetic => load_alphabetic(provider),
UnicodeProperty::BidiControl => load_bidi_control(provider),
UnicodeProperty::BidiMirrored => load_bidi_mirrored(provider),
UnicodeProperty::CaseIgnorable => load_case_ignorable(provider),
UnicodeProperty::Cased => load_cased(provider),
UnicodeProperty::ChangesWhenCasefolded => load_changes_when_casefolded(provider),
UnicodeProperty::ChangesWhenCasemapped => load_changes_when_casemapped(provider),
UnicodeProperty::ChangesWhenLowercased => load_changes_when_lowercased(provider),
UnicodeProperty::ChangesWhenNfkcCasefolded => load_changes_when_nfkc_casefolded(provider),
UnicodeProperty::ChangesWhenTitlecased => load_changes_when_titlecased(provider),
UnicodeProperty::ChangesWhenUppercased => load_changes_when_uppercased(provider),
UnicodeProperty::Dash => load_dash(provider),
UnicodeProperty::DefaultIgnorableCodePoint => load_default_ignorable_code_point(provider),
UnicodeProperty::Deprecated => load_deprecated(provider),
UnicodeProperty::Diacritic => load_diacritic(provider),
UnicodeProperty::Emoji => load_emoji(provider),
UnicodeProperty::EmojiComponent => load_emoji_component(provider),
UnicodeProperty::EmojiModifier => load_emoji_modifier(provider),
UnicodeProperty::EmojiModifierBase => load_emoji_modifier_base(provider),
UnicodeProperty::EmojiPresentation => load_emoji_presentation(provider),
UnicodeProperty::ExtendedPictographic => load_extended_pictographic(provider),
UnicodeProperty::Extender => load_extender(provider),
UnicodeProperty::GraphemeBase => load_grapheme_base(provider),
UnicodeProperty::GraphemeExtend => load_grapheme_extend(provider),
UnicodeProperty::HexDigit => load_hex_digit(provider),
UnicodeProperty::IdsBinaryOperator => load_ids_binary_operator(provider),
UnicodeProperty::IdsTrinaryOperator => load_ids_trinary_operator(provider),
UnicodeProperty::IdContinue => load_id_continue(provider),
UnicodeProperty::IdStart => load_id_start(provider),
UnicodeProperty::Ideographic => load_ideographic(provider),
UnicodeProperty::JoinControl => load_join_control(provider),
UnicodeProperty::LogicalOrderException => load_logical_order_exception(provider),
UnicodeProperty::Lowercase => load_lowercase(provider),
UnicodeProperty::Math => load_math(provider),
UnicodeProperty::NoncharacterCodePoint => load_noncharacter_code_point(provider),
UnicodeProperty::PatternSyntax => load_pattern_syntax(provider),
UnicodeProperty::PatternWhiteSpace => load_pattern_white_space(provider),
UnicodeProperty::QuotationMark => load_quotation_mark(provider),
UnicodeProperty::Radical => load_radical(provider),
UnicodeProperty::RegionalIndicator => load_regional_indicator(provider),
UnicodeProperty::SentenceTerminal => load_sentence_terminal(provider),
UnicodeProperty::SoftDotted => load_soft_dotted(provider),
UnicodeProperty::TerminalPunctuation => load_terminal_punctuation(provider),
UnicodeProperty::UnifiedIdeograph => load_unified_ideograph(provider),
UnicodeProperty::Uppercase => load_uppercase(provider),
UnicodeProperty::VariationSelector => load_variation_selector(provider),
UnicodeProperty::WhiteSpace => load_white_space(provider),
UnicodeProperty::XidContinue => load_xid_continue(provider),
UnicodeProperty::XidStart => load_xid_start(provider),
_ => Err(PropertiesError::UnexpectedPropertyName),
}
}
#[cfg(test)]
mod tests {
#[test]
fn test_general_category() {
use icu::properties::sets;
use icu::properties::GeneralCategoryGroup;
let digits_data = sets::for_general_category_group(GeneralCategoryGroup::Number);
let digits = digits_data.as_borrowed();
assert!(digits.contains('5'));
assert!(digits.contains('\u{0665}')); assert!(digits.contains('\u{096b}'));
assert!(!digits.contains('A'));
}
#[test]
fn test_script() {
use icu::properties::maps;
use icu::properties::Script;
let thai_data = maps::script().get_set_for_value(Script::Thai);
let thai = thai_data.as_borrowed();
assert!(thai.contains('\u{0e01}')); assert!(thai.contains('\u{0e50}'));
assert!(!thai.contains('A'));
assert!(!thai.contains('\u{0e3f}')); }
#[test]
fn test_gc_groupings() {
use icu::properties::{maps, sets};
use icu::properties::{GeneralCategory, GeneralCategoryGroup};
use icu_collections::codepointinvlist::CodePointInversionListBuilder;
let test_group = |category: GeneralCategoryGroup, subcategories: &[GeneralCategory]| {
let category_set = sets::for_general_category_group(category);
let category_set = category_set
.as_code_point_inversion_list()
.expect("The data should be valid");
let mut builder = CodePointInversionListBuilder::new();
for subcategory in subcategories {
let gc_set_data = &maps::general_category().get_set_for_value(*subcategory);
let gc_set = gc_set_data.as_borrowed();
for range in gc_set.iter_ranges() {
builder.add_range_u32(&range);
}
}
let combined_set = builder.build();
println!("{category:?} {subcategories:?}");
assert_eq!(
category_set.get_inversion_list_vec(),
combined_set.get_inversion_list_vec()
);
};
test_group(
GeneralCategoryGroup::Letter,
&[
GeneralCategory::UppercaseLetter,
GeneralCategory::LowercaseLetter,
GeneralCategory::TitlecaseLetter,
GeneralCategory::ModifierLetter,
GeneralCategory::OtherLetter,
],
);
test_group(
GeneralCategoryGroup::Other,
&[
GeneralCategory::Control,
GeneralCategory::Format,
GeneralCategory::Unassigned,
GeneralCategory::PrivateUse,
GeneralCategory::Surrogate,
],
);
test_group(
GeneralCategoryGroup::Mark,
&[
GeneralCategory::SpacingMark,
GeneralCategory::EnclosingMark,
GeneralCategory::NonspacingMark,
],
);
test_group(
GeneralCategoryGroup::Number,
&[
GeneralCategory::DecimalNumber,
GeneralCategory::LetterNumber,
GeneralCategory::OtherNumber,
],
);
test_group(
GeneralCategoryGroup::Punctuation,
&[
GeneralCategory::ConnectorPunctuation,
GeneralCategory::DashPunctuation,
GeneralCategory::ClosePunctuation,
GeneralCategory::FinalPunctuation,
GeneralCategory::InitialPunctuation,
GeneralCategory::OtherPunctuation,
GeneralCategory::OpenPunctuation,
],
);
test_group(
GeneralCategoryGroup::Symbol,
&[
GeneralCategory::CurrencySymbol,
GeneralCategory::ModifierSymbol,
GeneralCategory::MathSymbol,
GeneralCategory::OtherSymbol,
],
);
test_group(
GeneralCategoryGroup::Separator,
&[
GeneralCategory::LineSeparator,
GeneralCategory::ParagraphSeparator,
GeneralCategory::SpaceSeparator,
],
);
}
#[test]
fn test_gc_surrogate() {
use icu::properties::maps;
use icu::properties::GeneralCategory;
let surrogates_data =
maps::general_category().get_set_for_value(GeneralCategory::Surrogate);
let surrogates = surrogates_data.as_borrowed();
assert!(surrogates.contains32(0xd800));
assert!(surrogates.contains32(0xd900));
assert!(surrogates.contains32(0xdfff));
assert!(!surrogates.contains('A'));
}
}