unicode_intervals/
lib.rs

1//! [![github]](https://github.com/Stranger6667/unicode-intervals) [![crates-io]](https://crates.io/crates/unicode-intervals) [![docs-rs]](https://docs.rs/unicode-intervals)
2//!
3//! [github]: https://img.shields.io/badge/github-8da0cb?style=flat-square&labelColor=555555&logo=github
4//! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=flat-square&labelColor=555555&logo=rust
5//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=flat-square&labelColor=555555&logo=docs.rs
6//!
7//! <br>
8//!
9//! This library provides a way to search for Unicode code point intervals by categories, ranges,
10//! and custom character sets.
11//!
12//! The main purpose of `unicode-intervals` is to simplify generating strings that matching
13//! specific criteria.
14//!
15//! # Examples
16//!
17//! Raw Unicode codepoint intervals from the latest Unicode version:
18//!
19//! ```rust
20//! use unicode_intervals::UnicodeCategory;
21//!
22//! let intervals = unicode_intervals::query()
23//!     .include_categories(
24//!         UnicodeCategory::UPPERCASE_LETTER |
25//!         UnicodeCategory::LOWERCASE_LETTER
26//!     )
27//!     .max_codepoint(128)
28//!     .include_characters("☃")
29//!     .intervals()
30//!     .expect("Invalid query input");
31//! assert_eq!(intervals, &[(65, 90), (97, 122), (9731, 9731)]);
32//! ```
33//!
34//! `IntervalSet` for index-like access to the underlying codepoints:
35//!
36//! ```rust
37//! use unicode_intervals::UnicodeCategory;
38//!
39//! let interval_set = unicode_intervals::query()
40//!     .include_categories(UnicodeCategory::UPPERCASE_LETTER)
41//!     .interval_set()
42//!     .expect("Invalid query input");
43//! // Get 10th codepoint in this interval set
44//! assert_eq!(interval_set.codepoint_at(10), Some('K' as u32));
45//! assert_eq!(interval_set.index_of('K'), Some(10));
46//! ```
47//!
48//! Query specific Unicode version:
49//!
50//! ```rust
51//! use unicode_intervals::UnicodeVersion;
52//!
53//! let intervals = UnicodeVersion::V11_0_0.query()
54//!     .max_codepoint(128)
55//!     .include_characters("☃")
56//!     .intervals()
57//!     .expect("Invalid query input");
58//! assert_eq!(intervals, &[(0, 128), (9731, 9731)]);
59//! ```
60//!
61//! Restrict the output to code points within a certain range:
62//!
63//! ```rust
64//! let intervals = unicode_intervals::query()
65//!     .min_codepoint(65)
66//!     .max_codepoint(128)
67//!     .intervals()
68//!     .expect("Invalid query input");
69//! assert_eq!(intervals, &[(65, 128)])
70//! ```
71//!
72//! Include or exclude specific characters:
73//!
74//! ```rust
75//! # use unicode_intervals::UnicodeCategory;
76//! let intervals = unicode_intervals::query()
77//!     .include_categories(UnicodeCategory::PARAGRAPH_SEPARATOR)
78//!     .include_characters("-123")
79//!     .intervals()
80//!     .expect("Invalid query input");
81//! assert_eq!(intervals, &[(45, 45), (49, 51), (8233, 8233)])
82//! ```
83//!
84//! # Unicode version support
85//!
86//! `unicode-intervals` supports Unicode 9.0.0 - 15.0.0.
87#![warn(
88    clippy::cast_possible_truncation,
89    clippy::doc_markdown,
90    clippy::explicit_iter_loop,
91    clippy::map_unwrap_or,
92    clippy::match_same_arms,
93    clippy::needless_borrow,
94    clippy::needless_pass_by_value,
95    clippy::print_stdout,
96    clippy::redundant_closure,
97    clippy::trivially_copy_pass_by_ref,
98    missing_debug_implementations,
99    missing_docs,
100    trivial_casts,
101    trivial_numeric_casts,
102    unused_extern_crates,
103    unused_import_braces,
104    variant_size_differences,
105    clippy::integer_arithmetic,
106    clippy::unwrap_used,
107    clippy::semicolon_if_nothing_returned,
108    clippy::cargo
109)]
110#![allow(clippy::redundant_static_lifetimes)]
111use crate::constants::MAX_CODEPOINT;
112use core::fmt;
113use std::str::FromStr;
114
115mod categories;
116mod constants;
117mod error;
118mod intervals;
119mod intervalset;
120mod query;
121mod tables;
122pub use crate::{
123    categories::{UnicodeCategory, UnicodeCategorySet},
124    error::Error,
125    intervalset::IntervalSet,
126};
127
128#[cfg(feature = "__benchmark_internals")]
129/// Internals used for benchmarking.
130pub mod internals {
131    /// Unicode categories.
132    pub mod categories {
133        pub use crate::categories::merge;
134    }
135
136    /// Intervals manipulation.
137    pub mod intervals {
138        pub use crate::intervals::{from_str, merge, subtract};
139    }
140
141    /// Querying Unicode intervals.
142    pub mod query {
143        pub use crate::query::{intervals_for_set, query};
144    }
145}
146
147/// Interval between two Unicode codepoints.
148pub type Interval = (u32, u32);
149
150/// Supported Unicode versions.
151#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
152pub enum UnicodeVersion {
153    /// Unicode 9.0.0
154    V9_0_0,
155    /// Unicode 10.0.0
156    V10_0_0,
157    /// Unicode 11.0.0
158    V11_0_0,
159    /// Unicode 12.0.0
160    V12_0_0,
161    /// Unicode 12.1.0
162    V12_1_0,
163    /// Unicode 13.0.0
164    V13_0_0,
165    /// Unicode 14.0.0
166    V14_0_0,
167    /// Unicode 15.0.0
168    V15_0_0,
169}
170
171impl fmt::Display for UnicodeVersion {
172    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
173        f.write_str(self.as_str())
174    }
175}
176
177impl FromStr for UnicodeVersion {
178    type Err = Error;
179
180    fn from_str(s: &str) -> Result<Self, Self::Err> {
181        match s {
182            "9.0.0" => Ok(UnicodeVersion::V9_0_0),
183            "10.0.0" => Ok(UnicodeVersion::V10_0_0),
184            "11.0.0" => Ok(UnicodeVersion::V11_0_0),
185            "12.0.0" => Ok(UnicodeVersion::V12_0_0),
186            "12.1.0" => Ok(UnicodeVersion::V12_1_0),
187            "13.0.0" => Ok(UnicodeVersion::V13_0_0),
188            "14.0.0" => Ok(UnicodeVersion::V14_0_0),
189            "15.0.0" => Ok(UnicodeVersion::V15_0_0),
190            _ => Err(Error::InvalidVersion(s.to_string().into_boxed_str())),
191        }
192    }
193}
194
195impl UnicodeVersion {
196    /// Unicode version as a string.
197    #[must_use]
198    pub const fn as_str(self) -> &'static str {
199        match self {
200            UnicodeVersion::V9_0_0 => "9.0.0",
201            UnicodeVersion::V10_0_0 => "10.0.0",
202            UnicodeVersion::V11_0_0 => "11.0.0",
203            UnicodeVersion::V12_0_0 => "12.0.0",
204            UnicodeVersion::V12_1_0 => "12.1.0",
205            UnicodeVersion::V13_0_0 => "13.0.0",
206            UnicodeVersion::V14_0_0 => "14.0.0",
207            UnicodeVersion::V15_0_0 => "15.0.0",
208        }
209    }
210    /// Get the latest Unicode version.
211    #[must_use]
212    pub const fn latest() -> UnicodeVersion {
213        UnicodeVersion::V15_0_0
214    }
215    /// A sorted slice of slices where each item is a slice of intervals for every Unicode category.
216    /// They are sorted alphabetically by their full name.
217    #[inline]
218    #[must_use]
219    pub const fn table(self) -> &'static [&'static [Interval]] {
220        match self {
221            UnicodeVersion::V9_0_0 => tables::v9_0_0::BY_NAME,
222            UnicodeVersion::V10_0_0 => tables::v10_0_0::BY_NAME,
223            UnicodeVersion::V11_0_0 => tables::v11_0_0::BY_NAME,
224            UnicodeVersion::V12_0_0 => tables::v12_0_0::BY_NAME,
225            UnicodeVersion::V12_1_0 => tables::v12_1_0::BY_NAME,
226            UnicodeVersion::V13_0_0 => tables::v13_0_0::BY_NAME,
227            UnicodeVersion::V14_0_0 => tables::v14_0_0::BY_NAME,
228            UnicodeVersion::V15_0_0 => tables::v15_0_0::BY_NAME,
229        }
230    }
231
232    /// Get a slice of intervals for the provided Unicode category.
233    #[inline]
234    #[must_use]
235    pub const fn intervals_for(self, category: UnicodeCategory) -> &'static [Interval] {
236        self.table()[category as usize]
237    }
238
239    /// Unicode categories sorted by the number of intervals inside.
240    #[inline]
241    #[must_use]
242    pub const fn normalized_categories(self) -> [UnicodeCategory; 30] {
243        // Collect all categories & their lengths
244        let mut lengths: [(UnicodeCategory, usize); 30] = [(UnicodeCategory::Cc, 0); 30];
245        let mut idx = 0;
246        let table = self.table();
247        let categories = [
248            UnicodeCategory::Pe,
249            UnicodeCategory::Pc,
250            UnicodeCategory::Cc,
251            UnicodeCategory::Sc,
252            UnicodeCategory::Pd,
253            UnicodeCategory::Nd,
254            UnicodeCategory::Me,
255            UnicodeCategory::Pf,
256            UnicodeCategory::Cf,
257            UnicodeCategory::Pi,
258            UnicodeCategory::Nl,
259            UnicodeCategory::Zl,
260            UnicodeCategory::Ll,
261            UnicodeCategory::Sm,
262            UnicodeCategory::Lm,
263            UnicodeCategory::Sk,
264            UnicodeCategory::Mn,
265            UnicodeCategory::Ps,
266            UnicodeCategory::Lo,
267            UnicodeCategory::No,
268            UnicodeCategory::Po,
269            UnicodeCategory::So,
270            UnicodeCategory::Zp,
271            UnicodeCategory::Co,
272            UnicodeCategory::Zs,
273            UnicodeCategory::Mc,
274            UnicodeCategory::Cs,
275            UnicodeCategory::Lt,
276            UnicodeCategory::Cn,
277            UnicodeCategory::Lu,
278        ];
279        // `idx` is always less than 30 and will not overflow
280        #[allow(clippy::integer_arithmetic)]
281        while idx < table.len() {
282            lengths[idx] = (categories[idx], table[idx].len());
283            idx += 1;
284        }
285        // Bubble sort by length.
286        // The main reason to use bubble sort is that it works in the `const` context
287
288        loop {
289            let mut swapped = false;
290            let mut idx = 1;
291            // Arithmetic here will not overflow as it is always less than 30 and more than 1
292            #[allow(clippy::integer_arithmetic)]
293            while idx < lengths.len() {
294                if lengths[idx - 1].1 > lengths[idx].1 {
295                    let left = lengths[idx - 1];
296                    let right = lengths[idx];
297                    lengths[idx - 1] = right;
298                    lengths[idx] = left;
299                    swapped = true;
300                }
301                idx += 1;
302            }
303            if !swapped {
304                break;
305            }
306        }
307
308        // Fill only categories & skip Cc & Cs
309        let mut output: [UnicodeCategory; 30] = [
310            UnicodeCategory::Cc,
311            UnicodeCategory::Cc,
312            UnicodeCategory::Cc,
313            UnicodeCategory::Cc,
314            UnicodeCategory::Cc,
315            UnicodeCategory::Cc,
316            UnicodeCategory::Cc,
317            UnicodeCategory::Cc,
318            UnicodeCategory::Cc,
319            UnicodeCategory::Cc,
320            UnicodeCategory::Cc,
321            UnicodeCategory::Cc,
322            UnicodeCategory::Cc,
323            UnicodeCategory::Cc,
324            UnicodeCategory::Cc,
325            UnicodeCategory::Cc,
326            UnicodeCategory::Cc,
327            UnicodeCategory::Cc,
328            UnicodeCategory::Cc,
329            UnicodeCategory::Cc,
330            UnicodeCategory::Cc,
331            UnicodeCategory::Cc,
332            UnicodeCategory::Cc,
333            UnicodeCategory::Cc,
334            UnicodeCategory::Cc,
335            UnicodeCategory::Cc,
336            UnicodeCategory::Cc,
337            UnicodeCategory::Cc,
338            UnicodeCategory::Cc,
339            UnicodeCategory::Cs,
340        ];
341        let mut idx = 0;
342        let mut ptr = 0;
343
344        while idx < lengths.len() {
345            let (category, _) = lengths[idx];
346            // `idx` & `ptr` are always less than 30 and will not overflow
347            #[allow(clippy::integer_arithmetic)]
348            if category as u8 == UnicodeCategory::Cc as u8
349                || category as u8 == UnicodeCategory::Cs as u8
350            {
351                idx += 1;
352            } else {
353                output[ptr] = category;
354                ptr += 1;
355                idx += 1;
356            }
357        }
358        output
359    }
360
361    /// A Query builder for specifying the input parameters to `intervals()` / `interval_set` methods.
362    #[must_use]
363    #[inline]
364    pub fn query<'a>(self) -> IntervalQuery<'a> {
365        IntervalQuery::new(self)
366    }
367
368    /// Find intervals matching the query.
369    ///
370    /// # Errors
371    ///
372    ///   - `min_codepoint > max_codepoint`
373    ///   - `min_codepoint > 1114111` or `max_codepoint > 1114111`
374    pub fn intervals<'a>(
375        self,
376        include_categories: impl Into<Option<UnicodeCategorySet>>,
377        exclude_categories: impl Into<Option<UnicodeCategorySet>>,
378        include_characters: impl Into<Option<&'a str>>,
379        exclude_characters: impl Into<Option<&'a str>>,
380        min_codepoint: impl Into<Option<u32>>,
381        max_codepoint: impl Into<Option<u32>>,
382    ) -> Result<Vec<Interval>, Error> {
383        let exclude_categories: UnicodeCategorySet = exclude_categories
384            .into()
385            .unwrap_or_else(UnicodeCategorySet::new);
386        let min_codepoint = min_codepoint.into().unwrap_or(0);
387        let max_codepoint = max_codepoint.into().unwrap_or(MAX_CODEPOINT);
388        self.intervals_impl(
389            include_categories.into(),
390            exclude_categories,
391            include_characters.into(),
392            exclude_characters.into(),
393            min_codepoint,
394            max_codepoint,
395        )
396    }
397
398    fn intervals_impl(
399        self,
400        include_categories: Option<UnicodeCategorySet>,
401        exclude_categories: UnicodeCategorySet,
402        include_characters: Option<&str>,
403        exclude_characters: Option<&str>,
404        min_codepoint: u32,
405        max_codepoint: u32,
406    ) -> Result<Vec<Interval>, Error> {
407        if min_codepoint > MAX_CODEPOINT || max_codepoint > MAX_CODEPOINT {
408            return Err(Error::CodepointNotInRange(min_codepoint, max_codepoint));
409        }
410        if min_codepoint > max_codepoint {
411            return Err(Error::InvalidCodepoints(min_codepoint, max_codepoint));
412        }
413        Ok(query::query(
414            self,
415            include_categories,
416            exclude_categories,
417            include_characters.unwrap_or(""),
418            exclude_characters.unwrap_or(""),
419            min_codepoint,
420            max_codepoint,
421        ))
422    }
423
424    /// Build an `IndexSet` for the intervals matching the query.
425    ///
426    /// # Errors
427    ///
428    ///   - `min_codepoint > max_codepoint`
429    ///   - `min_codepoint > 1114111` or `max_codepoint > 1114111`
430    pub fn interval_set<'a>(
431        self,
432        include_categories: impl Into<Option<UnicodeCategorySet>>,
433        exclude_categories: impl Into<Option<UnicodeCategorySet>>,
434        include_characters: impl Into<Option<&'a str>>,
435        exclude_characters: impl Into<Option<&'a str>>,
436        min_codepoint: impl Into<Option<u32>>,
437        max_codepoint: impl Into<Option<u32>>,
438    ) -> Result<IntervalSet, Error> {
439        let intervals = self.intervals(
440            include_categories,
441            exclude_categories,
442            include_characters,
443            exclude_characters,
444            min_codepoint,
445            max_codepoint,
446        )?;
447        Ok(IntervalSet::new(intervals))
448    }
449}
450
451/// A Query builder for specifying the input parameters to the `intervals()` method in `UnicodeVersion`.
452///
453/// The builder allows for a more convenient and readable way to specify the input parameters,
454/// instead of relying on multiple function arguments.
455///
456/// # Examples
457///
458/// ```rust
459/// use unicode_intervals::{UnicodeVersion, UnicodeCategory};
460///
461/// let intervals = UnicodeVersion::V15_0_0.query()
462///     .include_categories(UnicodeCategory::UPPERCASE_LETTER | UnicodeCategory::LOWERCASE_LETTER)
463///     .max_codepoint(128)
464///     .include_characters("☃")
465///     .intervals()
466///     .expect("Invalid query input");
467/// assert_eq!(intervals, &[(65, 90), (97, 122), (9731, 9731)]);
468/// ```
469#[derive(Debug, Clone, PartialEq)]
470pub struct IntervalQuery<'a> {
471    version: UnicodeVersion,
472    include_categories: Option<UnicodeCategorySet>,
473    exclude_categories: Option<UnicodeCategorySet>,
474    include_characters: Option<&'a str>,
475    exclude_characters: Option<&'a str>,
476    min_codepoint: u32,
477    max_codepoint: u32,
478}
479
480impl<'a> IntervalQuery<'a> {
481    fn new(version: UnicodeVersion) -> IntervalQuery<'a> {
482        IntervalQuery {
483            version,
484            include_categories: None,
485            exclude_categories: None,
486            include_characters: None,
487            exclude_characters: None,
488            min_codepoint: 0,
489            max_codepoint: MAX_CODEPOINT,
490        }
491    }
492    /// Set `include_categories`.
493    #[must_use]
494    pub fn include_categories(
495        mut self,
496        include_categories: impl Into<Option<UnicodeCategorySet>>,
497    ) -> IntervalQuery<'a> {
498        self.include_categories = include_categories.into();
499        self
500    }
501    /// Set `exclude_categories`.
502    #[must_use]
503    pub fn exclude_categories(
504        mut self,
505        exclude_categories: impl Into<Option<UnicodeCategorySet>>,
506    ) -> IntervalQuery<'a> {
507        self.exclude_categories = exclude_categories.into();
508        self
509    }
510    /// Set `include_characters`.
511    #[must_use]
512    pub fn include_characters(mut self, include_characters: &'a str) -> IntervalQuery<'a> {
513        self.include_characters = Some(include_characters);
514        self
515    }
516    /// Set `exclude_characters`.
517    #[must_use]
518    pub fn exclude_characters(mut self, exclude_characters: &'a str) -> IntervalQuery<'a> {
519        self.exclude_characters = Some(exclude_characters);
520        self
521    }
522    /// Set `min_codepoint`.
523    #[must_use]
524    pub fn min_codepoint(mut self, min_codepoint: u32) -> IntervalQuery<'a> {
525        self.min_codepoint = min_codepoint;
526        self
527    }
528    /// Set `max_codepoint`.
529    #[must_use]
530    pub fn max_codepoint(mut self, max_codepoint: u32) -> IntervalQuery<'a> {
531        self.max_codepoint = max_codepoint;
532        self
533    }
534    /// Find intervals matching the query.
535    ///
536    /// # Errors
537    ///
538    ///   - `min_codepoint > max_codepoint`
539    ///   - `min_codepoint > 1114111` or `max_codepoint > 1114111`
540    pub fn intervals(&self) -> Result<Vec<Interval>, Error> {
541        self.version.intervals(
542            self.include_categories,
543            self.exclude_categories,
544            self.include_characters,
545            self.exclude_characters,
546            self.min_codepoint,
547            self.max_codepoint,
548        )
549    }
550    /// Build an `IndexSet` for the intervals matching the query.
551    ///
552    /// # Errors
553    ///
554    ///   - `min_codepoint > max_codepoint`
555    ///   - `min_codepoint > 1114111` or `max_codepoint > 1114111`
556    pub fn interval_set(&self) -> Result<IntervalSet, Error> {
557        self.version.interval_set(
558            self.include_categories,
559            self.exclude_categories,
560            self.include_characters,
561            self.exclude_characters,
562            self.min_codepoint,
563            self.max_codepoint,
564        )
565    }
566}
567
568/// Build a query that finds Unicode intervals matching the query criteria.
569///
570/// Uses the latest available Unicode version.
571pub fn query<'a>() -> IntervalQuery<'a> {
572    UnicodeVersion::latest().query()
573}
574
575#[cfg(test)]
576mod tests {
577    use super::*;
578    use std::{
579        collections::hash_map::DefaultHasher,
580        hash::{Hash, Hasher},
581    };
582    use test_case::test_case;
583
584    #[test_case(None, None, &[(95, 95), (8255, 8256), (8276, 8276), (65075, 65076), (65101, 65103), (65343, 65343)])]
585    #[test_case(None, Some(128), &[(95, 95)])]
586    #[test_case(Some(65077), None, &[(65101, 65103), (65343, 65343)])]
587    #[test_case(Some(65076), Some(65102), &[(65076, 65076), (65101, 65102)])]
588    fn test_intervals(
589        min_codepoint: Option<u32>,
590        max_codepoint: Option<u32>,
591        expected: &[Interval],
592    ) {
593        let intervals = UnicodeVersion::V15_0_0
594            .intervals(
595                UnicodeCategory::Pc,
596                None,
597                None,
598                None,
599                min_codepoint,
600                max_codepoint,
601            )
602            .expect("Invalid query");
603        assert_eq!(intervals, expected);
604    }
605
606    #[test]
607    fn test_interval_set() {
608        let interval_set = UnicodeVersion::V15_0_0
609            .interval_set(UnicodeCategory::Lu, None, None, None, None, 128)
610            .expect("Invalid query");
611        assert_eq!(interval_set.index_of('A'), Some(0));
612    }
613
614    #[test]
615    fn test_top_level_query() {
616        assert_eq!(
617            query().intervals().expect("Invalid query"),
618            vec![(0, MAX_CODEPOINT)]
619        );
620    }
621
622    #[test]
623    fn test_query_include_only_characters() {
624        let intervals = UnicodeVersion::V15_0_0
625            .query()
626            .include_categories(UnicodeCategory::Pc)
627            .min_codepoint(0)
628            .max_codepoint(50)
629            .include_characters("abc")
630            .intervals()
631            .expect("Invalid query");
632        assert_eq!(intervals, &[(97, 99)]);
633    }
634
635    #[test]
636    fn test_query_exclude_only_characters() {
637        let intervals = UnicodeVersion::V15_0_0
638            .query()
639            .include_categories(UnicodeCategory::UPPERCASE_LETTER)
640            .max_codepoint(90)
641            .exclude_characters("ABC")
642            .intervals()
643            .expect("Invalid query");
644        assert_eq!(intervals, &[(68, 90)]);
645    }
646
647    #[test]
648    fn test_query_exclude_categories() {
649        let intervals = UnicodeVersion::V15_0_0
650            .query()
651            .exclude_categories(UnicodeCategory::UPPERCASE_LETTER)
652            .max_codepoint(90)
653            .intervals()
654            .expect("Invalid query");
655        assert_eq!(intervals, &[(0, 64)]);
656    }
657
658    #[test]
659    fn test_query_include_category_and_characters() {
660        let intervals = UnicodeVersion::V15_0_0
661            .intervals(UnicodeCategory::Pc, None, "abc", None, None, None)
662            .expect("Invalid query");
663        assert_eq!(
664            intervals,
665            &[
666                (95, 95),
667                (97, 99),
668                (8255, 8256),
669                (8276, 8276),
670                (65075, 65076),
671                (65101, 65103),
672                (65343, 65343)
673            ]
674        );
675    }
676
677    #[test_case(
678        1073741824,
679        2147483648,
680        "Codepoints should be in [0; 1114111] range. Got: [1073741824; 2147483648]"
681    )]
682    #[test_case(
683        0,
684        2147483648,
685        "Codepoints should be in [0; 1114111] range. Got: [0; 2147483648]"
686    )]
687    #[test_case(
688        5,
689        1,
690        "Minimum codepoint should be less or equal than maximum codepoint. Got 5 < 1"
691    )]
692    fn test_query_invalid_codepoints(min_codepoint: u32, max_codepoint: u32, expected: &str) {
693        let error = UnicodeVersion::V15_0_0
694            .query()
695            .min_codepoint(min_codepoint)
696            .max_codepoint(max_codepoint)
697            .intervals()
698            .expect_err("Should error");
699        assert_eq!(error.to_string(), expected);
700        let error = UnicodeVersion::V15_0_0
701            .query()
702            .min_codepoint(min_codepoint)
703            .max_codepoint(max_codepoint)
704            .interval_set()
705            .expect_err("Should error");
706        assert_eq!(error.to_string(), expected);
707    }
708
709    #[test]
710    fn test_intervals_for() {
711        assert_eq!(
712            UnicodeVersion::V15_0_0.intervals_for(UnicodeCategory::Pc),
713            &[
714                (95, 95),
715                (8255, 8256),
716                (8276, 8276),
717                (65075, 65076),
718                (65101, 65103),
719                (65343, 65343),
720            ]
721        );
722    }
723
724    #[test]
725    fn test_normalized_categories() {
726        assert_eq!(
727            UnicodeVersion::V15_0_0.normalized_categories(),
728            [
729                UnicodeCategory::Zl,
730                UnicodeCategory::Zp,
731                UnicodeCategory::Co,
732                UnicodeCategory::Me,
733                UnicodeCategory::Pc,
734                UnicodeCategory::Zs,
735                UnicodeCategory::Pf,
736                UnicodeCategory::Lt,
737                UnicodeCategory::Pi,
738                UnicodeCategory::Nl,
739                UnicodeCategory::Pd,
740                UnicodeCategory::Sc,
741                UnicodeCategory::Cf,
742                UnicodeCategory::Sk,
743                UnicodeCategory::Nd,
744                UnicodeCategory::Sm,
745                UnicodeCategory::Lm,
746                UnicodeCategory::No,
747                UnicodeCategory::Pe,
748                UnicodeCategory::Ps,
749                UnicodeCategory::Mc,
750                UnicodeCategory::So,
751                UnicodeCategory::Po,
752                UnicodeCategory::Mn,
753                UnicodeCategory::Lo,
754                UnicodeCategory::Lu,
755                UnicodeCategory::Ll,
756                UnicodeCategory::Cn,
757                UnicodeCategory::Cc,
758                UnicodeCategory::Cs,
759            ]
760        );
761    }
762
763    #[test_case(UnicodeVersion::V9_0_0)]
764    #[test_case(UnicodeVersion::V10_0_0)]
765    #[test_case(UnicodeVersion::V11_0_0)]
766    #[test_case(UnicodeVersion::V12_0_0)]
767    #[test_case(UnicodeVersion::V12_1_0)]
768    #[test_case(UnicodeVersion::V13_0_0)]
769    #[test_case(UnicodeVersion::V14_0_0)]
770    #[test_case(UnicodeVersion::V15_0_0)]
771    fn test_successive_union(version: UnicodeVersion) {
772        let mut x = vec![];
773        for v in version.table() {
774            x.extend_from_slice(v);
775        }
776        intervals::merge(&mut x);
777        assert_eq!(x, vec![(0, MAX_CODEPOINT)]);
778    }
779
780    #[test_case(UnicodeVersion::V9_0_0, "9.0.0")]
781    #[test_case(UnicodeVersion::V10_0_0, "10.0.0")]
782    #[test_case(UnicodeVersion::V11_0_0, "11.0.0")]
783    #[test_case(UnicodeVersion::V12_0_0, "12.0.0")]
784    #[test_case(UnicodeVersion::V12_1_0, "12.1.0")]
785    #[test_case(UnicodeVersion::V13_0_0, "13.0.0")]
786    #[test_case(UnicodeVersion::V14_0_0, "14.0.0")]
787    #[test_case(UnicodeVersion::V15_0_0, "15.0.0")]
788    fn test_display(version: UnicodeVersion, expected: &str) {
789        let string = version.to_string();
790        assert_eq!(string, expected);
791        assert_eq!(
792            UnicodeVersion::from_str(&string).expect("Invalid version"),
793            version
794        );
795    }
796
797    #[test_case("9.0.0", UnicodeVersion::V9_0_0)]
798    #[test_case("10.0.0", UnicodeVersion::V10_0_0)]
799    #[test_case("11.0.0", UnicodeVersion::V11_0_0)]
800    #[test_case("12.0.0", UnicodeVersion::V12_0_0)]
801    #[test_case("12.1.0", UnicodeVersion::V12_1_0)]
802    #[test_case("13.0.0", UnicodeVersion::V13_0_0)]
803    #[test_case("14.0.0", UnicodeVersion::V14_0_0)]
804    #[test_case("15.0.0", UnicodeVersion::V15_0_0)]
805    fn test_version_from_str(version: &str, expected: UnicodeVersion) {
806        assert_eq!(
807            UnicodeVersion::from_str(version).expect("Invalid version"),
808            expected
809        );
810    }
811
812    #[test]
813    fn test_version_from_str_error() {
814        assert_eq!(
815            UnicodeVersion::from_str("invalid")
816                .expect_err("Should fail")
817                .to_string(),
818            "'invalid' is not a valid Unicode version"
819        );
820    }
821
822    #[test]
823    #[allow(clippy::clone_on_copy)]
824    fn test_unicode_version_traits() {
825        let version = UnicodeVersion::V15_0_0;
826        let mut hasher = DefaultHasher::new();
827        version.hash(&mut hasher);
828        hasher.finish();
829        let _ = version.clone();
830        assert_eq!(format!("{version:?}"), "V15_0_0");
831    }
832
833    #[test]
834    fn test_interval_query_traits() {
835        let query = UnicodeVersion::V15_0_0.query();
836        let _ = query.clone();
837        assert_eq!(
838            format!("{query:?}"), 
839            "IntervalQuery { version: V15_0_0, include_categories: None, exclude_categories: None, include_characters: None, exclude_characters: None, min_codepoint: 0, max_codepoint: 1114111 }"
840        );
841        assert_eq!(query, query);
842    }
843}