unic_char_range/range.rs
1// Copyright 2017 The UNIC Project Developers.
2//
3// See the COPYRIGHT file at the top-level directory of this distribution.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11use core::{char, cmp};
12
13#[cfg(feature = "std")]
14use std::collections::Bound;
15
16use self::cmp::Ordering;
17use crate::CharIter;
18
19/// A range of unicode code points.
20///
21/// The most idiomatic way to construct this range is through the use of the `chars!` macro:
22///
23/// ```
24/// #[macro_use] extern crate unic_char_range;
25/// use unic_char_range::CharRange;
26///
27/// # fn main() {
28/// assert_eq!(chars!('a'..='z'), CharRange::closed('a', 'z'));
29/// assert_eq!(chars!('a'..'z'), CharRange::open_right('a', 'z'));
30/// assert_eq!(chars!(..), CharRange::all());
31/// # }
32/// ```
33///
34/// If constructed in reverse order, such that `self.high` is ordered before `self.low`,
35/// the range is empty. If you want to iterate in decreasing order, use `.iter().rev()`.
36/// All empty ranges are considered equal no matter the internal state.
37#[derive(Copy, Clone, Debug, Eq)]
38pub struct CharRange {
39 /// The lowest character in this range (inclusive).
40 pub low: char,
41
42 /// The highest character in this range (inclusive).
43 pub high: char,
44}
45
46/// Constructors
47impl CharRange {
48 /// Construct a closed range of characters.
49 ///
50 /// If `stop` is ordered before `start`, the resulting range will be empty.
51 ///
52 /// # Example
53 ///
54 /// ```
55 /// # use unic_char_range::*;
56 /// assert_eq!(
57 /// CharRange::closed('a', 'd').iter().collect::<Vec<_>>(),
58 /// vec!['a', 'b', 'c', 'd']
59 /// )
60 /// ```
61 pub fn closed(start: char, stop: char) -> CharRange {
62 CharRange {
63 low: start,
64 high: stop,
65 }
66 }
67
68 /// Construct a half open (right) range of characters.
69 ///
70 /// # Example
71 ///
72 /// ```
73 /// # use unic_char_range::*;
74 /// assert_eq!(
75 /// CharRange::open_right('a', 'd').iter().collect::<Vec<_>>(),
76 /// vec!['a', 'b', 'c']
77 /// )
78 /// ```
79 pub fn open_right(start: char, stop: char) -> CharRange {
80 let mut iter = CharRange::closed(start, stop).iter();
81 let _ = iter.next_back();
82 iter.into()
83 }
84
85 /// Construct a half open (left) range of characters.
86 ///
87 /// # Example
88 ///
89 /// ```
90 /// # use unic_char_range::*;
91 /// assert_eq!(
92 /// CharRange::open_left('a', 'd').iter().collect::<Vec<_>>(),
93 /// vec!['b', 'c', 'd']
94 /// )
95 /// ```
96 pub fn open_left(start: char, stop: char) -> CharRange {
97 let mut iter = CharRange::closed(start, stop).iter();
98 let _ = iter.next();
99 iter.into()
100 }
101
102 /// Construct a fully open range of characters.
103 ///
104 /// # Example
105 ///
106 /// ```
107 /// # use unic_char_range::*;
108 /// assert_eq!(
109 /// CharRange::open('a', 'd').iter().collect::<Vec<_>>(),
110 /// vec!['b', 'c']
111 /// )
112 /// ```
113 pub fn open(start: char, stop: char) -> CharRange {
114 let mut iter = CharRange::closed(start, stop).iter();
115 let _ = iter.next();
116 let _ = iter.next_back();
117 iter.into()
118 }
119
120 #[cfg(feature = "std")]
121 /// Construct a range of characters from bounds.
122 pub fn bound(start: Bound<char>, stop: Bound<char>) -> CharRange {
123 let start = if start == Bound::Unbounded {
124 Bound::Included('\u{0}')
125 } else {
126 start
127 };
128 let stop = if stop == Bound::Unbounded {
129 Bound::Included(char::MAX)
130 } else {
131 stop
132 };
133 match (start, stop) {
134 (Bound::Included(start), Bound::Included(stop)) => CharRange::closed(start, stop),
135 (Bound::Excluded(start), Bound::Excluded(stop)) => CharRange::open(start, stop),
136 (Bound::Included(start), Bound::Excluded(stop)) => CharRange::open_right(start, stop),
137 (Bound::Excluded(start), Bound::Included(stop)) => CharRange::open_left(start, stop),
138 (Bound::Unbounded, _) | (_, Bound::Unbounded) => unreachable!(),
139 }
140 }
141
142 /// Construct a range over all Unicode characters (Unicode Scalar Values).
143 pub fn all() -> CharRange {
144 CharRange::closed('\u{0}', char::MAX)
145 }
146
147 /// Construct a range over all characters of *assigned* Unicode Planes.
148 ///
149 /// Assigned *normal* (non-special) Unicode Planes are:
150 /// - Plane 0: *Basic Multilingual Plane* (BMP)
151 /// - Plane 1: *Supplementary Multilingual Plane* (SMP)
152 /// - Plane 2: *Supplementary Ideographic Plane* (SIP)
153 ///
154 /// Unicode Plane 14, *Supplementary Special-purpose Plane* (SSP), is not included in this
155 /// range, mainly because of the limit of `CharRange` only supporting a continuous range.
156 ///
157 /// Unicode Planes 3 to 13 are *Unassigned* planes and therefore excluded.
158 ///
159 /// Unicode Planes 15 and 16 are *Private Use Area* planes and won't have Unicode-assigned
160 /// characters.
161 pub fn assigned_normal_planes() -> CharRange {
162 CharRange::closed('\u{0}', '\u{2_FFFF}')
163 }
164}
165
166/// Collection-like fns
167impl CharRange {
168 /// Does this range include a character?
169 ///
170 /// # Examples
171 ///
172 /// ```
173 /// # use unic_char_range::CharRange;
174 /// assert!( CharRange::closed('a', 'g').contains('d'));
175 /// assert!( ! CharRange::closed('a', 'g').contains('z'));
176 ///
177 /// assert!( ! CharRange:: open ('a', 'a').contains('a'));
178 /// assert!( ! CharRange::closed('z', 'a').contains('g'));
179 /// ```
180 pub fn contains(&self, ch: char) -> bool {
181 self.low <= ch && ch <= self.high
182 }
183
184 /// Determine the ordering of this range and a character.
185 ///
186 /// # Panics
187 ///
188 /// Panics if the range is empty. This fn may be adjusted in the future to not panic
189 /// in optimized builds. Even if so, an empty range will never compare as `Ordering::Equal`.
190 pub fn cmp_char(&self, ch: char) -> Ordering {
191 // possible optimization: only assert this in debug builds
192 assert!(!self.is_empty(), "Cannot compare empty range's ordering");
193 if self.high < ch {
194 Ordering::Less
195 } else if self.low > ch {
196 Ordering::Greater
197 } else {
198 Ordering::Equal
199 }
200 }
201
202 /// How many characters are in this range?
203 pub fn len(&self) -> usize {
204 self.iter().len()
205 }
206
207 /// Is this range empty?
208 pub fn is_empty(&self) -> bool {
209 self.low > self.high
210 }
211
212 /// Create an iterator over this range.
213 pub fn iter(&self) -> CharIter {
214 (*self).into()
215 }
216}
217
218impl IntoIterator for CharRange {
219 type IntoIter = CharIter;
220 type Item = char;
221
222 fn into_iter(self) -> CharIter {
223 self.iter()
224 }
225}
226
227impl PartialEq<CharRange> for CharRange {
228 fn eq(&self, other: &CharRange) -> bool {
229 (self.is_empty() && other.is_empty()) || (self.low == other.low && self.high == other.high)
230 }
231}