pub struct GeneralCategoryGroup(_);
Expand description

Groupings of multiple General_Category property values.

Instances of GeneralCategoryGroup represent the defined multi-category values that are useful for users in certain contexts, such as regex. In other words, unlike GeneralCategory, this supports groups of general categories: for example, Letter /// is the union of UppercaseLetter, LowercaseLetter, etc.

See https://www.unicode.org/reports/tr44/ .

The discriminants correspond to the U_GC_XX_MASK constants in ICU4C. Unlike GeneralCategory, this supports groups of general categories: for example, Letter is the union of UppercaseLetter, LowercaseLetter, etc.

See UCharCategory and U_GET_GC_MASK in ICU4C.

Implementations§

source§

impl GeneralCategoryGroup

source

pub const UppercaseLetter: GeneralCategoryGroup = GCG(1 << GC::UppercaseLetter as u32)

(Lu) An uppercase letter

source

pub const LowercaseLetter: GeneralCategoryGroup = GCG(1 << GC::LowercaseLetter as u32)

(Ll) A lowercase letter

source

pub const TitlecaseLetter: GeneralCategoryGroup = GCG(1 << GC::TitlecaseLetter as u32)

(Lt) A digraphic letter, with first part uppercase

source

pub const ModifierLetter: GeneralCategoryGroup = GCG(1 << GC::ModifierLetter as u32)

(Lm) A modifier letter

source

pub const OtherLetter: GeneralCategoryGroup = GCG(1 << GC::OtherLetter as u32)

(Lo) Other letters, including syllables and ideographs

source

pub const CasedLetter: GeneralCategoryGroup = GCG(1 << GC::UppercaseLetter as u32 | 1 << GC::LowercaseLetter as u32 | 1 << GC::TitlecaseLetter as u32)

(LC) The union of UppercaseLetter, LowercaseLetter, and TitlecaseLetter

source

pub const Letter: GeneralCategoryGroup = GCG(1 << GC::UppercaseLetter as u32 | 1 << GC::LowercaseLetter as u32 | 1 << GC::TitlecaseLetter as u32 | 1 << GC::ModifierLetter as u32 | 1 << GC::OtherLetter as u32)

(L) The union of all letter categories

source

pub const NonspacingMark: GeneralCategoryGroup = GCG(1 << GC::NonspacingMark as u32)

(Mn) A nonspacing combining mark (zero advance width)

source

pub const EnclosingMark: GeneralCategoryGroup = GCG(1 << GC::EnclosingMark as u32)

(Mc) A spacing combining mark (positive advance width)

source

pub const SpacingMark: GeneralCategoryGroup = GCG(1 << GC::SpacingMark as u32)

(Me) An enclosing combining mark

source

pub const Mark: GeneralCategoryGroup = GCG(1 << GC::NonspacingMark as u32 | 1 << GC::EnclosingMark as u32 | 1 << GC::SpacingMark as u32)

(M) The union of all mark categories

source

pub const DecimalNumber: GeneralCategoryGroup = GCG(1 << GC::DecimalNumber as u32)

(Nd) A decimal digit

source

pub const LetterNumber: GeneralCategoryGroup = GCG(1 << GC::LetterNumber as u32)

(Nl) A letterlike numeric character

source

pub const OtherNumber: GeneralCategoryGroup = GCG(1 << GC::OtherNumber as u32)

(No) A numeric character of other type

source

pub const Number: GeneralCategoryGroup = GCG(1 << GC::DecimalNumber as u32 | 1 << GC::LetterNumber as u32 | 1 << GC::OtherNumber as u32)

(N) The union of all number categories

source

pub const SpaceSeparator: GeneralCategoryGroup = GCG(1 << GC::SpaceSeparator as u32)

(Zs) A space character (of various non-zero widths)

source

pub const LineSeparator: GeneralCategoryGroup = GCG(1 << GC::LineSeparator as u32)

(Zl) U+2028 LINE SEPARATOR only

source

pub const ParagraphSeparator: GeneralCategoryGroup = GCG(1 << GC::ParagraphSeparator as u32)

(Zp) U+2029 PARAGRAPH SEPARATOR only

source

pub const Separator: GeneralCategoryGroup = GCG(1 << GC::SpaceSeparator as u32 | 1 << GC::LineSeparator as u32 | 1 << GC::ParagraphSeparator as u32)

(Z) The union of all separator categories

source

pub const Control: GeneralCategoryGroup = GCG(1 << GC::Control as u32)

(Cc) A C0 or C1 control code

source

pub const Format: GeneralCategoryGroup = GCG(1 << GC::Format as u32)

(Cf) A format control character

source

pub const PrivateUse: GeneralCategoryGroup = GCG(1 << GC::PrivateUse as u32)

(Co) A private-use character

source

pub const Surrogate: GeneralCategoryGroup = GCG(1 << GC::Surrogate as u32)

(Cs) A surrogate code point

source

pub const Unassigned: GeneralCategoryGroup = GCG(1 << GC::Unassigned as u32)

(Cn) A reserved unassigned code point or a noncharacter

source

pub const Other: GeneralCategoryGroup = GCG(1 << GC::Control as u32 | 1 << GC::Format as u32 | 1 << GC::PrivateUse as u32 | 1 << GC::Surrogate as u32 | 1 << GC::Unassigned as u32)

(C) The union of all control code, reserved, and unassigned categories

source

pub const DashPunctuation: GeneralCategoryGroup = GCG(1 << GC::DashPunctuation as u32)

(Pd) A dash or hyphen punctuation mark

source

pub const OpenPunctuation: GeneralCategoryGroup = GCG(1 << GC::OpenPunctuation as u32)

(Ps) An opening punctuation mark (of a pair)

source

pub const ClosePunctuation: GeneralCategoryGroup = GCG(1 << GC::ClosePunctuation as u32)

(Pe) A closing punctuation mark (of a pair)

source

pub const ConnectorPunctuation: GeneralCategoryGroup = GCG(1 << GC::ConnectorPunctuation as u32)

(Pc) A connecting punctuation mark, like a tie

source

pub const InitialPunctuation: GeneralCategoryGroup = GCG(1 << GC::InitialPunctuation as u32)

(Pi) An initial quotation mark

source

pub const FinalPunctuation: GeneralCategoryGroup = GCG(1 << GC::FinalPunctuation as u32)

(Pf) A final quotation mark

source

pub const OtherPunctuation: GeneralCategoryGroup = GCG(1 << GC::OtherPunctuation as u32)

(Po) A punctuation mark of other type

source

pub const Punctuation: GeneralCategoryGroup = GCG(1 << GC::DashPunctuation as u32 | 1 << GC::OpenPunctuation as u32 | 1 << GC::ClosePunctuation as u32 | 1 << GC::ConnectorPunctuation as u32 | 1 << GC::OtherPunctuation as u32 | 1 << GC::InitialPunctuation as u32 | 1 << GC::FinalPunctuation as u32)

(P) The union of all punctuation categories

source

pub const MathSymbol: GeneralCategoryGroup = GCG(1 << GC::MathSymbol as u32)

(Sm) A symbol of mathematical use

source

pub const CurrencySymbol: GeneralCategoryGroup = GCG(1 << GC::CurrencySymbol as u32)

(Sc) A currency sign

source

pub const ModifierSymbol: GeneralCategoryGroup = GCG(1 << GC::ModifierSymbol as u32)

(Sk) A non-letterlike modifier symbol

source

pub const OtherSymbol: GeneralCategoryGroup = GCG(1 << GC::OtherSymbol as u32)

(So) A symbol of other type

source

pub const Symbol: GeneralCategoryGroup = GCG(1 << GC::MathSymbol as u32 | 1 << GC::CurrencySymbol as u32 | 1 << GC::ModifierSymbol as u32 | 1 << GC::OtherSymbol as u32)

(S) The union of all symbol categories

source

pub fn contains(&self, val: GeneralCategory) -> bool

Return whether the code point belongs in the provided multi-value category.

use icu::properties::{maps, GeneralCategory, GeneralCategoryGroup};
use icu_collections::codepointtrie::CodePointTrie;

let data = maps::load_general_category(&icu_testdata::unstable())
    .expect("The data should be valid");
let gc = data.as_borrowed();

assert_eq!(gc.get('A'), GeneralCategory::UppercaseLetter);
assert!(GeneralCategoryGroup::CasedLetter.contains(gc.get('A')));

// U+0B1E ORIYA LETTER NYA
assert_eq!(gc.get('ଞ'), GeneralCategory::OtherLetter);
assert!(GeneralCategoryGroup::Letter.contains(gc.get('ଞ')));
assert!(!GeneralCategoryGroup::CasedLetter.contains(gc.get('ଞ')));

// U+0301 COMBINING ACUTE ACCENT
assert_eq!(gc.get32(0x0301), GeneralCategory::NonspacingMark);
assert!(GeneralCategoryGroup::Mark.contains(gc.get32(0x0301)));
assert!(!GeneralCategoryGroup::Letter.contains(gc.get32(0x0301)));

assert_eq!(gc.get('0'), GeneralCategory::DecimalNumber);
assert!(GeneralCategoryGroup::Number.contains(gc.get('0')));
assert!(!GeneralCategoryGroup::Mark.contains(gc.get('0')));

assert_eq!(gc.get('('), GeneralCategory::OpenPunctuation);
assert!(GeneralCategoryGroup::Punctuation.contains(gc.get('(')));
assert!(!GeneralCategoryGroup::Number.contains(gc.get('(')));

// U+2713 CHECK MARK
assert_eq!(gc.get('✓'), GeneralCategory::OtherSymbol);
assert!(GeneralCategoryGroup::Symbol.contains(gc.get('✓')));
assert!(!GeneralCategoryGroup::Punctuation.contains(gc.get('✓')));

assert_eq!(gc.get(' '), GeneralCategory::SpaceSeparator);
assert!(GeneralCategoryGroup::Separator.contains(gc.get(' ')));
assert!(!GeneralCategoryGroup::Symbol.contains(gc.get(' ')));

// U+E007F CANCEL TAG
assert_eq!(gc.get32(0xE007F), GeneralCategory::Format);
assert!(GeneralCategoryGroup::Other.contains(gc.get32(0xE007F)));
assert!(!GeneralCategoryGroup::Separator.contains(gc.get32(0xE007F)));
source

pub fn complement(self) -> GeneralCategoryGroup

Produce a GeneralCategoryGroup that is the inverse of this one

Example
use icu::properties::{GeneralCategory, GeneralCategoryGroup};

let letter = GeneralCategoryGroup::Letter;
let not_letter = letter.complement();

assert!(not_letter.contains(GeneralCategory::MathSymbol));
assert!(!letter.contains(GeneralCategory::MathSymbol));
assert!(not_letter.contains(GeneralCategory::OtherPunctuation));
assert!(!letter.contains(GeneralCategory::OtherPunctuation));
assert!(!not_letter.contains(GeneralCategory::UppercaseLetter));
assert!(letter.contains(GeneralCategory::UppercaseLetter));
source

pub fn all() -> GeneralCategoryGroup

Return the group representing all GeneralCategory values

Example
use icu::properties::{GeneralCategory, GeneralCategoryGroup};

let all = GeneralCategoryGroup::all();

assert!(all.contains(GeneralCategory::MathSymbol));
assert!(all.contains(GeneralCategory::OtherPunctuation));
assert!(all.contains(GeneralCategory::UppercaseLetter));
source

pub fn empty() -> GeneralCategoryGroup

Return the empty group

Example
use icu::properties::{GeneralCategory, GeneralCategoryGroup};

let empty = GeneralCategoryGroup::empty();

assert!(!empty.contains(GeneralCategory::MathSymbol));
assert!(!empty.contains(GeneralCategory::OtherPunctuation));
assert!(!empty.contains(GeneralCategory::UppercaseLetter));
source

pub fn union(self, other: GeneralCategoryGroup) -> GeneralCategoryGroup

Take the union of two groups

Example
use icu::properties::{GeneralCategory, GeneralCategoryGroup};

let letter = GeneralCategoryGroup::Letter;
let symbol = GeneralCategoryGroup::Symbol;
let union = letter.union(symbol);

assert!(union.contains(GeneralCategory::MathSymbol));
assert!(!union.contains(GeneralCategory::OtherPunctuation));
assert!(union.contains(GeneralCategory::UppercaseLetter));
source

pub fn intersection(self, other: GeneralCategoryGroup) -> GeneralCategoryGroup

Take the intersection of two groups

Example
use icu::properties::{GeneralCategory, GeneralCategoryGroup};

let letter = GeneralCategoryGroup::Letter;
let lu = GeneralCategoryGroup::UppercaseLetter;
let intersection = letter.intersection(lu);

assert!(!intersection.contains(GeneralCategory::MathSymbol));
assert!(!intersection.contains(GeneralCategory::OtherPunctuation));
assert!(intersection.contains(GeneralCategory::UppercaseLetter));
assert!(!intersection.contains(GeneralCategory::LowercaseLetter));
source§

impl GeneralCategoryGroup

source

pub fn get_name_to_enum_mapper( provider: &impl DataProvider<GeneralCategoryMaskNameToValueV1Marker> ) -> Result<PropertyValueNameToEnumMapper<GeneralCategoryGroup>, PropertiesError>

Return a PropertyValueNameToEnumMapper, capable of looking up values from strings for the General_Category_Mask mask property

Example
use icu::properties::GeneralCategoryGroup;

let lookup = GeneralCategoryGroup::get_name_to_enum_mapper(&icu_testdata::unstable())
                 .expect("The data should be valid");
let lookup = lookup.as_borrowed();
// short name for value
assert_eq!(lookup.get_strict("L"), Some(GeneralCategoryGroup::Letter));
assert_eq!(lookup.get_strict("LC"), Some(GeneralCategoryGroup::CasedLetter));
assert_eq!(lookup.get_strict("Lu"), Some(GeneralCategoryGroup::UppercaseLetter));
assert_eq!(lookup.get_strict("Zp"), Some(GeneralCategoryGroup::ParagraphSeparator));
assert_eq!(lookup.get_strict("P"), Some(GeneralCategoryGroup::Punctuation));
// long name for value
assert_eq!(lookup.get_strict("Letter"), Some(GeneralCategoryGroup::Letter));
assert_eq!(lookup.get_strict("Cased_Letter"), Some(GeneralCategoryGroup::CasedLetter));
assert_eq!(lookup.get_strict("Uppercase_Letter"), Some(GeneralCategoryGroup::UppercaseLetter));
// alias name
assert_eq!(lookup.get_strict("punct"), Some(GeneralCategoryGroup::Punctuation));
// name has incorrect casing
assert_eq!(lookup.get_strict("letter"), None);
// loose matching of name
assert_eq!(lookup.get_loose("letter"), Some(GeneralCategoryGroup::Letter));
// fake property
assert_eq!(lookup.get_strict("EverythingLol"), None);

Trait Implementations§

source§

impl AsULE for GeneralCategoryGroup

§

type ULE = RawBytesULE<2>

The ULE type corresponding to Self. Read more
source§

fn to_unaligned(self) -> <GeneralCategoryGroup as AsULE>::ULE

Converts from Self to Self::ULE. Read more
source§

fn from_unaligned( ule: <GeneralCategoryGroup as AsULE>::ULE ) -> GeneralCategoryGroup

Converts from Self::ULE to Self. Read more
source§

impl Clone for GeneralCategoryGroup

source§

fn clone(&self) -> GeneralCategoryGroup

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl Debug for GeneralCategoryGroup

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more
source§

impl From<GeneralCategory> for GeneralCategoryGroup

source§

fn from(subcategory: GeneralCategory) -> GeneralCategoryGroup

Converts to this type from the input type.
source§

impl From<u32> for GeneralCategoryGroup

source§

fn from(mask: u32) -> GeneralCategoryGroup

Converts to this type from the input type.
source§

impl PartialEq<GeneralCategoryGroup> for GeneralCategoryGroup

source§

fn eq(&self, other: &GeneralCategoryGroup) -> bool

This method tests for self and other values to be equal, and is used by ==.
1.0.0 · source§

fn ne(&self, other: &Rhs) -> bool

This method tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
source§

impl TrieValue for GeneralCategoryGroup

§

type TryFromU32Error = TryFromIntError

Last-resort fallback value to return if we cannot read data from the trie. Read more
source§

fn try_from_u32( i: u32 ) -> Result<GeneralCategoryGroup, <GeneralCategoryGroup as TrieValue>::TryFromU32Error>

A parsing function that is primarily motivated by deserialization contexts. When the serialization type width is smaller than 32 bits, then it is expected that the call site will widen the value to a u32 first.
source§

fn to_u32(self) -> u32

A method for converting back to a u32 that can roundtrip through Self::try_from_u32(). The default implementation of this trait method panics in debug mode and returns 0 in release mode. Read more
source§

impl Copy for GeneralCategoryGroup

source§

impl Eq for GeneralCategoryGroup

source§

impl StructuralEq for GeneralCategoryGroup

source§

impl StructuralPartialEq for GeneralCategoryGroup

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for Twhere T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for Twhere T: ?Sized,

const: unstable · source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for Twhere T: ?Sized,

const: unstable · source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

const: unstable · source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for Twhere U: From<T>,

const: unstable · source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> ToOwned for Twhere T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
const: unstable · source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
const: unstable · source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
§

impl<T> ErasedDestructor for Twhere T: 'static,

source§

impl<T> MaybeSendSync for T