1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). //! `icu_uniset` is one of the [`ICU4X`] components. //! //! This API provides necessary functionality for highly efficient querying of sets of Unicode characters. //! //! It is an implementation of the existing [ICU4C UnicodeSet API](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1UnicodeSet.html). //! //! # Architecture //! ICU4X [`UnicodeSet`] is split up into independent levels, with [`UnicodeSet`] representing the membership/query API, //! and [`UnicodeSetBuilder`] representing the builder API. A [Properties API](http://userguide.icu-project.org/strings/properties) //! is in future works. //! //! # Examples: //! //! ## Creating a `UnicodeSet` //! //! UnicodeSets are created from either serialized [`UnicodeSets`](UnicodeSet), //! represented by [inversion lists](http://userguide.icu-project.org/strings/properties), //! the [`UnicodeSetBuilder`], or from the TBA Properties API. //! //! ``` //! use icu::uniset::{UnicodeSet, UnicodeSetBuilder}; //! //! let mut builder = UnicodeSetBuilder::new(); //! builder.add_range(&('A'..'Z')); //! let set: UnicodeSet = builder.build(); //! //! assert!(set.contains('A')); //! ``` //! //! ## Querying a `UnicodeSet` //! //! Currently, you can check if a character/range of characters exists in the [`UnicodeSet`], or iterate through the characters. //! //! ``` //! use icu::uniset::{UnicodeSet, UnicodeSetBuilder}; //! //! let mut builder = UnicodeSetBuilder::new(); //! builder.add_range(&('A'..'Z')); //! let set: UnicodeSet = builder.build(); //! //! assert!(set.contains('A')); //! assert!(set.contains_range(&('A'..='C'))); //! assert_eq!(set.iter_chars().next(), Some('A')); //! ``` //! //! [`ICU4X`]: ../icu/index.html #![cfg_attr(not(any(test, feature = "std")), no_std)] extern crate alloc; #[macro_use] mod builder; mod conversions; pub mod enum_props; pub mod props; pub mod provider; mod uniset; mod utils; use alloc::vec::Vec; pub use builder::UnicodeSetBuilder; pub use conversions::*; use displaydoc::Display; use icu_provider::DataError; pub use uniset::UnicodeSet; pub use utils::*; /// Custom Errors for [`UnicodeSet`]. #[derive(Display, Debug)] pub enum UnicodeSetError { #[displaydoc("Invalid set: {0:?}")] InvalidSet(Vec<u32>), #[displaydoc("Invalid range: {0}..{1}")] InvalidRange(u32, u32), #[displaydoc("{0}")] PropDataLoad(DataError), } #[cfg(feature = "std")] impl std::error::Error for UnicodeSetError {} impl From<DataError> for UnicodeSetError { fn from(e: DataError) -> Self { UnicodeSetError::PropDataLoad(e) } } #[derive(PartialEq)] pub enum UnicodeSetSpanCondition { Contained, NotContained, }