unic_char_range/
range.rs

1// Copyright 2017 The UNIC Project Developers.
2//
3// See the COPYRIGHT file at the top-level directory of this distribution.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11use core::{char, cmp};
12
13#[cfg(feature = "std")]
14use std::collections::Bound;
15
16use self::cmp::Ordering;
17use crate::CharIter;
18
19/// A range of unicode code points.
20///
21/// The most idiomatic way to construct this range is through the use of the `chars!` macro:
22///
23/// ```
24/// #[macro_use] extern crate unic_char_range;
25/// use unic_char_range::CharRange;
26///
27/// # fn main() {
28/// assert_eq!(chars!('a'..='z'), CharRange::closed('a', 'z'));
29/// assert_eq!(chars!('a'..'z'), CharRange::open_right('a', 'z'));
30/// assert_eq!(chars!(..), CharRange::all());
31/// # }
32/// ```
33///
34/// If constructed in reverse order, such that `self.high` is ordered before `self.low`,
35/// the range is empty. If you want to iterate in decreasing order, use `.iter().rev()`.
36/// All empty ranges are considered equal no matter the internal state.
37#[derive(Copy, Clone, Debug, Eq)]
38pub struct CharRange {
39    /// The lowest character in this range (inclusive).
40    pub low: char,
41
42    /// The highest character in this range (inclusive).
43    pub high: char,
44}
45
46/// Constructors
47impl CharRange {
48    /// Construct a closed range of characters.
49    ///
50    /// If `stop` is ordered before `start`, the resulting range will be empty.
51    ///
52    /// # Example
53    ///
54    /// ```
55    /// # use unic_char_range::*;
56    /// assert_eq!(
57    ///     CharRange::closed('a', 'd').iter().collect::<Vec<_>>(),
58    ///     vec!['a', 'b', 'c', 'd']
59    /// )
60    /// ```
61    pub fn closed(start: char, stop: char) -> CharRange {
62        CharRange {
63            low: start,
64            high: stop,
65        }
66    }
67
68    /// Construct a half open (right) range of characters.
69    ///
70    /// # Example
71    ///
72    /// ```
73    /// # use unic_char_range::*;
74    /// assert_eq!(
75    ///     CharRange::open_right('a', 'd').iter().collect::<Vec<_>>(),
76    ///     vec!['a', 'b', 'c']
77    /// )
78    /// ```
79    pub fn open_right(start: char, stop: char) -> CharRange {
80        let mut iter = CharRange::closed(start, stop).iter();
81        let _ = iter.next_back();
82        iter.into()
83    }
84
85    /// Construct a half open (left) range of characters.
86    ///
87    /// # Example
88    ///
89    /// ```
90    /// # use unic_char_range::*;
91    /// assert_eq!(
92    ///     CharRange::open_left('a', 'd').iter().collect::<Vec<_>>(),
93    ///     vec!['b', 'c', 'd']
94    /// )
95    /// ```
96    pub fn open_left(start: char, stop: char) -> CharRange {
97        let mut iter = CharRange::closed(start, stop).iter();
98        let _ = iter.next();
99        iter.into()
100    }
101
102    /// Construct a fully open range of characters.
103    ///
104    /// # Example
105    ///
106    /// ```
107    /// # use unic_char_range::*;
108    /// assert_eq!(
109    ///     CharRange::open('a', 'd').iter().collect::<Vec<_>>(),
110    ///     vec!['b', 'c']
111    /// )
112    /// ```
113    pub fn open(start: char, stop: char) -> CharRange {
114        let mut iter = CharRange::closed(start, stop).iter();
115        let _ = iter.next();
116        let _ = iter.next_back();
117        iter.into()
118    }
119
120    #[cfg(feature = "std")]
121    /// Construct a range of characters from bounds.
122    pub fn bound(start: Bound<char>, stop: Bound<char>) -> CharRange {
123        let start = if start == Bound::Unbounded {
124            Bound::Included('\u{0}')
125        } else {
126            start
127        };
128        let stop = if stop == Bound::Unbounded {
129            Bound::Included(char::MAX)
130        } else {
131            stop
132        };
133        match (start, stop) {
134            (Bound::Included(start), Bound::Included(stop)) => CharRange::closed(start, stop),
135            (Bound::Excluded(start), Bound::Excluded(stop)) => CharRange::open(start, stop),
136            (Bound::Included(start), Bound::Excluded(stop)) => CharRange::open_right(start, stop),
137            (Bound::Excluded(start), Bound::Included(stop)) => CharRange::open_left(start, stop),
138            (Bound::Unbounded, _) | (_, Bound::Unbounded) => unreachable!(),
139        }
140    }
141
142    /// Construct a range over all Unicode characters (Unicode Scalar Values).
143    pub fn all() -> CharRange {
144        CharRange::closed('\u{0}', char::MAX)
145    }
146
147    /// Construct a range over all characters of *assigned* Unicode Planes.
148    ///
149    /// Assigned *normal* (non-special) Unicode Planes are:
150    /// - Plane 0: *Basic Multilingual Plane* (BMP)
151    /// - Plane 1: *Supplementary Multilingual Plane* (SMP)
152    /// - Plane 2: *Supplementary Ideographic Plane* (SIP)
153    ///
154    /// Unicode Plane 14, *Supplementary Special-purpose Plane* (SSP), is not included in this
155    /// range, mainly because of the limit of `CharRange` only supporting a continuous range.
156    ///
157    /// Unicode Planes 3 to 13 are *Unassigned* planes and therefore excluded.
158    ///
159    /// Unicode Planes 15 and 16 are *Private Use Area* planes and won't have Unicode-assigned
160    /// characters.
161    pub fn assigned_normal_planes() -> CharRange {
162        CharRange::closed('\u{0}', '\u{2_FFFF}')
163    }
164}
165
166/// Collection-like fns
167impl CharRange {
168    /// Does this range include a character?
169    ///
170    /// # Examples
171    ///
172    /// ```
173    /// # use unic_char_range::CharRange;
174    /// assert!(   CharRange::closed('a', 'g').contains('d'));
175    /// assert!( ! CharRange::closed('a', 'g').contains('z'));
176    ///
177    /// assert!( ! CharRange:: open ('a', 'a').contains('a'));
178    /// assert!( ! CharRange::closed('z', 'a').contains('g'));
179    /// ```
180    pub fn contains(&self, ch: char) -> bool {
181        self.low <= ch && ch <= self.high
182    }
183
184    /// Determine the ordering of this range and a character.
185    ///
186    /// # Panics
187    ///
188    /// Panics if the range is empty. This fn may be adjusted in the future to not panic
189    /// in optimized builds. Even if so, an empty range will never compare as `Ordering::Equal`.
190    pub fn cmp_char(&self, ch: char) -> Ordering {
191        // possible optimization: only assert this in debug builds
192        assert!(!self.is_empty(), "Cannot compare empty range's ordering");
193        if self.high < ch {
194            Ordering::Less
195        } else if self.low > ch {
196            Ordering::Greater
197        } else {
198            Ordering::Equal
199        }
200    }
201
202    /// How many characters are in this range?
203    pub fn len(&self) -> usize {
204        self.iter().len()
205    }
206
207    /// Is this range empty?
208    pub fn is_empty(&self) -> bool {
209        self.low > self.high
210    }
211
212    /// Create an iterator over this range.
213    pub fn iter(&self) -> CharIter {
214        (*self).into()
215    }
216}
217
218impl IntoIterator for CharRange {
219    type IntoIter = CharIter;
220    type Item = char;
221
222    fn into_iter(self) -> CharIter {
223        self.iter()
224    }
225}
226
227impl PartialEq<CharRange> for CharRange {
228    fn eq(&self, other: &CharRange) -> bool {
229        (self.is_empty() && other.is_empty()) || (self.low == other.low && self.high == other.high)
230    }
231}