Struct icu_uniset::UnicodeSet[][src]

pub struct UnicodeSet { /* fields omitted */ }

A membership wrapper for UnicodeSet.

Provides exposure to membership functions and constructors from serialized UnicodeSets and predefined ranges.

Implementations

impl UnicodeSet[src]

pub fn from_inversion_list(inv_list: Vec<u32>) -> Result<Self, UnicodeSetError>[src]

Returns UnicodeSet from an inversion list. represented by a Vec<u32> of codepoints.

The inversion list must be of even length, sorted ascending non-overlapping, and within the bounds of 0x0 -> 0x10FFFF inclusive, and end points being exclusive.

Examples

use icu::uniset::UnicodeSet;
use icu::uniset::UnicodeSetError;
let invalid: Vec<u32> = vec![0, 128, 3];
let result = UnicodeSet::from_inversion_list(invalid.clone());
assert!(matches!(result, Err(UnicodeSetError::InvalidSet(_))));
if let Err(UnicodeSetError::InvalidSet(actual)) = result {
    assert_eq!(invalid, actual);
}

pub fn get_inversion_list(&self) -> Vec<u32>[src]

Returns an owned inversion list representing the current UnicodeSet

pub fn all() -> Self[src]

Returns UnicodeSet spanning entire Unicode range

The range spans from 0x0 -> 0x10FFFF inclusive

pub fn bmp() -> Self[src]

Returns UnicodeSet spanning BMP range

The range spans from 0x0 -> 0xFFFF inclusive

pub fn iter_chars(&self) -> impl Iterator<Item = char> + '_[src]

Yields an iterator going through the character set in the UnicodeSet

Examples

use icu::uniset::UnicodeSet;
let example_list = vec![65, 68, 69, 70];
let example = UnicodeSet::from_inversion_list(example_list).unwrap();
let mut example_iter = example.iter_chars();
assert_eq!(Some('A'), example_iter.next());
assert_eq!(Some('B'), example_iter.next());
assert_eq!(Some('C'), example_iter.next());
assert_eq!(Some('E'), example_iter.next());
assert_eq!(None, example_iter.next());

pub fn size(&self) -> usize[src]

Returns the number of elements of the UnicodeSet

pub fn is_empty(&self) -> bool[src]

Returns whether or not the UnicodeSet is empty

pub fn contains(&self, query: char) -> bool[src]

Checks to see the query is in the UnicodeSet

Runs a binary search in O(log(n)) where n is the number of start and end points in the set using std implementation

Examples

use icu::uniset::UnicodeSet;
let example_list = vec![65, 67, 68, 69];
let example = UnicodeSet::from_inversion_list(example_list).unwrap();
assert!(example.contains('A'));
assert!(!example.contains('C'));

pub fn contains_u32(&self, query: u32) -> bool[src]

Checks to see the unsigned int is in the UnicodeSet::all()

Note: Even though u32 and char in Rust are non-negative 4-byte values, there is an important difference. A u32 can take values up to a very large integer value, while a char in Rust is defined to be in the range from 0 to the maximum valid Unicode Scalar Value.

Runs a binary search in O(log(n)) where n is the number of start and end points in the set using std implementation

Examples

use icu::uniset::UnicodeSet;
let example_list = vec![65, 67, 68, 69];
let example = UnicodeSet::from_inversion_list(example_list).unwrap();
assert!(example.contains_u32(65));
assert!(!example.contains_u32(67));

pub fn contains_range(&self, range: &impl RangeBounds<char>) -> bool[src]

Checks to see if the range is in the UnicodeSet, returns a Result

Runs a binary search in O(log(n)) where n is the number of start and end points in the set using Vec implementation. Only runs the search once on the start parameter, while the end parameter is checked in a single O(1) step.

Examples

use icu::uniset::UnicodeSet;
let example_list = vec![65, 67, 68, 69];
let example = UnicodeSet::from_inversion_list(example_list).unwrap();
assert!(example.contains_range(&('A'..'C')));
assert!(example.contains_range(&('A'..='B')));
assert!(!example.contains_range(&('A'..='C')));

Surrogate points (0xD800 -> 0xDFFF) will return false if the Range contains them but the UnicodeSet does not.

Note: when comparing to ICU4C/J, keep in mind that ranges in Rust are constructed inclusive of start boundary and exclusive of end boundary. The ICU4C/J UnicodeSet::contains(UChar32 start, UChar32 end) method differs by including the end boundary.

Examples

use icu::uniset::UnicodeSet;
use std::char;
let check = char::from_u32(0xD7FE).unwrap() .. char::from_u32(0xE001).unwrap();
let example_list = vec![0xD7FE, 0xD7FF, 0xE000, 0xE001];
let example = UnicodeSet::from_inversion_list(example_list).unwrap();
assert!(!example.contains_range(&(check)));

pub fn contains_set(&self, set: &Self) -> bool[src]

Check if the calling UnicodeSet contains all the characters of the given UnicodeSet

Examples

use icu::uniset::UnicodeSet;
let example_list = vec![65, 70, 85, 91]; // A - E, U - Z
let example = UnicodeSet::from_inversion_list(example_list).unwrap();
let a_to_d = UnicodeSet::from_inversion_list(vec![65, 69]).unwrap();
let f_to_t = UnicodeSet::from_inversion_list(vec![70, 85]).unwrap();
let r_to_x = UnicodeSet::from_inversion_list(vec![82, 88]).unwrap();
assert!(example.contains_set(&a_to_d)); // contains all
assert!(!example.contains_set(&f_to_t)); // contains none
assert!(!example.contains_set(&r_to_x)); // contains some

pub fn span(&self, span_str: &str, contained: bool) -> usize[src]

Returns the end of the initial substring where the characters are either contained/not contained in the set.

Examples

use icu::uniset::UnicodeSet;
let example_list = vec![65, 68]; // {A, B, C}
let example = UnicodeSet::from_inversion_list(example_list).unwrap();
assert_eq!(example.span("CABXYZ", true), 3);
assert_eq!(example.span("XYZC", false), 3);
assert_eq!(example.span("XYZ", true), 0);
assert_eq!(example.span("ABC", false), 0);

pub fn span_back(&self, span_str: &str, contained: bool) -> usize[src]

Returns the start of the trailing substring (starting from end of string) where the characters are either contained/not contained in the set. Returns the length of the string if no valid return.

Examples

use icu::uniset::UnicodeSet;
let example_list = vec![65, 68]; // {A, B, C}
let example = UnicodeSet::from_inversion_list(example_list).unwrap();
assert_eq!(example.span_back("XYZCAB", true), 3);
assert_eq!(example.span_back("ABCXYZ", true), 6);
assert_eq!(example.span_back("CABXYZ", false), 3);

Trait Implementations

impl Debug for UnicodeSet[src]

impl Eq for UnicodeSet[src]

impl Hash for UnicodeSet[src]

impl PartialEq<UnicodeSet> for UnicodeSet[src]

impl StructuralEq for UnicodeSet[src]

impl StructuralPartialEq for UnicodeSet[src]

impl TryFrom<&'_ Range<char>> for UnicodeSet[src]

type Error = UnicodeSetError

The type returned in the event of a conversion error.

impl TryFrom<&'_ RangeFrom<char>> for UnicodeSet[src]

type Error = UnicodeSetError

The type returned in the event of a conversion error.

impl TryFrom<&'_ RangeFull> for UnicodeSet[src]

type Error = UnicodeSetError

The type returned in the event of a conversion error.

impl TryFrom<&'_ RangeInclusive<char>> for UnicodeSet[src]

type Error = UnicodeSetError

The type returned in the event of a conversion error.

impl TryFrom<&'_ RangeTo<char>> for UnicodeSet[src]

type Error = UnicodeSetError

The type returned in the event of a conversion error.

impl TryFrom<&'_ RangeToInclusive<char>> for UnicodeSet[src]

type Error = UnicodeSetError

The type returned in the event of a conversion error.

impl<'s> TryInto<UnicodeSet> for UnicodeProperty<'s>[src]

type Error = UnicodeSetError

The type returned in the event of a conversion error.

Auto Trait Implementations

Blanket Implementations

impl<T> Any for T where
    T: 'static + ?Sized
[src]

impl<T> Borrow<T> for T where
    T: ?Sized
[src]

impl<T> BorrowMut<T> for T where
    T: ?Sized
[src]

impl<T> From<T> for T[src]

impl<T, U> Into<U> for T where
    U: From<T>, 
[src]

impl<T, U> TryFrom<U> for T where
    U: Into<T>, 
[src]

type Error = Infallible

The type returned in the event of a conversion error.

impl<T, U> TryInto<U> for T where
    U: TryFrom<T>, 
[src]

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.