1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

//! `icu_uniset` is one of the [`ICU4X`] components.
//!
//! This API provides necessary functionality for highly efficient querying of sets of Unicode characters.
//!
//! It is an implementation of the existing [ICU4C UnicodeSet API](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1UnicodeSet.html).
//!
//! # Architecture
//! ICU4X [`UnicodeSet`] is split up into independent levels, with [`UnicodeSet`] representing the membership/query API,
//! and [`UnicodeSetBuilder`] representing the builder API. A [Properties API](http://userguide.icu-project.org/strings/properties)
//! is in future works.
//!
//! # Examples:
//!
//! ## Creating a `UnicodeSet`
//!
//! UnicodeSets are created from either serialized [`UnicodeSets`](UnicodeSet),
//! represented by [inversion lists](http://userguide.icu-project.org/strings/properties),
//! the [`UnicodeSetBuilder`], or from the TBA Properties API.
//!
//! ```
//! use icu::uniset::{UnicodeSet, UnicodeSetBuilder};
//!
//! let mut builder = UnicodeSetBuilder::new();
//! builder.add_range(&('A'..'Z'));
//! let set: UnicodeSet = builder.build();
//!
//! assert!(set.contains('A'));
//! ```
//!
//! ## Querying a `UnicodeSet`
//!
//! Currently, you can check if a character/range of characters exists in the [`UnicodeSet`], or iterate through the characters.
//!
//! ```
//! use icu::uniset::{UnicodeSet, UnicodeSetBuilder};
//!
//! let mut builder = UnicodeSetBuilder::new();
//! builder.add_range(&('A'..'Z'));
//! let set: UnicodeSet = builder.build();
//!
//! assert!(set.contains('A'));
//! assert!(set.contains_range(&('A'..='C')));
//! assert_eq!(set.iter_chars().next(), Some('A'));
//! ```
//!
//! [`ICU4X`]: ../icu/index.html

#![cfg_attr(not(any(test, feature = "std")), no_std)]

extern crate alloc;

#[macro_use]
mod builder;
mod conversions;
pub mod enum_props;
pub mod props;
pub mod provider;
mod uniset;
mod utils;

use alloc::vec::Vec;
pub use builder::UnicodeSetBuilder;
pub use conversions::*;
use displaydoc::Display;
use icu_provider::DataError;
pub use uniset::UnicodeSet;
pub use utils::*;

/// Custom Errors for [`UnicodeSet`].
#[derive(Display, Debug)]
pub enum UnicodeSetError {
    #[displaydoc("Invalid set: {0:?}")]
    InvalidSet(Vec<u32>),
    #[displaydoc("Invalid range: {0}..{1}")]
    InvalidRange(u32, u32),
    #[displaydoc("{0}")]
    PropDataLoad(DataError),
}

#[cfg(feature = "std")]
impl std::error::Error for UnicodeSetError {}

impl From<DataError> for UnicodeSetError {
    fn from(e: DataError) -> Self {
        UnicodeSetError::PropDataLoad(e)
    }
}

#[derive(PartialEq)]
pub enum UnicodeSetSpanCondition {
    Contained,
    NotContained,
}