unicode_intervals/
intervalset.rs1use crate::Interval;
2
3#[derive(Debug, Clone)]
7pub struct IntervalSet {
8 intervals: Vec<Interval>,
9 offsets: Vec<u32>,
10 size: u32,
11}
12
13impl IntervalSet {
14 #[must_use]
15 pub(crate) fn new(intervals: Vec<Interval>) -> IntervalSet {
16 let mut offsets = vec![0];
17 offsets.reserve_exact(intervals.len());
18 let mut size = 0;
19 #[allow(clippy::integer_arithmetic)]
21 for (left, right) in &intervals {
22 size += *right - *left + 1;
23 offsets.push(size);
24 }
25 IntervalSet {
26 intervals,
27 offsets,
28 size,
29 }
30 }
31
32 #[inline]
45 #[must_use]
46 pub fn len(&self) -> usize {
47 self.size as usize
48 }
49
50 #[inline]
65 #[must_use]
66 pub const fn is_empty(&self) -> bool {
67 self.size == 0
68 }
69
70 #[inline]
84 #[must_use]
85 pub fn contains(&self, codepoint: impl Into<u32>) -> bool {
86 self.index_of(codepoint.into()).is_some()
87 }
88
89 #[inline]
103 #[must_use]
104 pub fn codepoint_at(&self, index: u32) -> Option<u32> {
105 if index >= self.size {
106 return None;
107 }
108 #[allow(clippy::integer_arithmetic)]
110 let mut current = self.intervals.len() - 1;
111 if self.offsets[current] > index {
112 let (mut high, mut low) = (current, 0_usize);
113 #[allow(clippy::integer_arithmetic)]
118 while low + 1 < high {
119 let mid = (low + high) / 2;
120 if self.offsets[mid] <= index {
121 low = mid;
122 } else {
123 high = mid;
124 }
125 }
126 current = low;
127 }
128 #[allow(clippy::integer_arithmetic)]
130 Some(self.intervals[current].0 + index - self.offsets[current])
131 }
132
133 #[inline]
147 #[must_use]
148 pub fn index_of(&self, codepoint: impl Into<u32>) -> Option<u32> {
149 let codepoint = codepoint.into();
150 for (offset, (left, right)) in self.offsets.iter().zip(self.intervals.iter()) {
151 if *left == codepoint {
152 return Some(*offset);
153 } else if *left > codepoint {
154 return None;
155 } else if codepoint <= *right {
156 #[allow(clippy::integer_arithmetic)]
159 return Some(*offset + (codepoint - left));
160 }
161 }
162 None
163 }
164
165 #[inline]
182 #[must_use]
183 pub fn index_above(&self, codepoint: impl Into<u32>) -> u32 {
184 let codepoint = codepoint.into();
185 for (offset, (left, right)) in self.offsets.iter().zip(self.intervals.iter()) {
186 if *left >= codepoint {
187 return *offset;
188 } else if codepoint <= *right {
189 #[allow(clippy::integer_arithmetic)]
192 return *offset + (codepoint - left);
193 }
194 }
195 self.size
196 }
197
198 pub fn iter(&self) -> impl Iterator<Item = u32> + DoubleEndedIterator + '_ {
216 self.intervals
217 .iter()
218 .flat_map(|(left, right)| *left..=*right)
219 }
220}
221
222#[cfg(test)]
223mod tests {
224 use super::*;
225 use crate::{UnicodeCategory, UnicodeVersion};
226 use test_case::test_case;
227
228 fn uppercase_letters() -> IntervalSet {
229 crate::query()
230 .include_categories(UnicodeCategory::UPPERCASE_LETTER)
231 .interval_set()
232 .expect("Invalid query input")
233 }
234
235 #[test_case(vec![(1, 1)])]
236 #[test_case(vec![])]
237 fn test_index_not_present(intervals: Vec<Interval>) {
238 assert!(IntervalSet::new(intervals).index_of(0_u32).is_none());
239 }
240
241 #[test_case(vec![], 1, None)]
242 #[test_case(vec![(1, 10)], 11, None)]
243 fn test_get(intervals: Vec<Interval>, index: u32, expected: Option<u32>) {
244 assert_eq!(IntervalSet::new(intervals).codepoint_at(index), expected);
245 }
246
247 #[test_case(vec![(1, 10)], 1, 0)]
248 #[test_case(vec![(1, 10)], 2, 1)]
249 #[test_case(vec![(1, 10)], 100, 10)]
250 fn test_index_above(intervals: Vec<Interval>, index: u32, expected: u32) {
251 assert_eq!(IntervalSet::new(intervals).index_above(index), expected);
252 }
253
254 #[test_case('Z' as u32, 25; "In the set")]
255 #[test_case('b' as u32, 26; "Not in the set")]
256 #[test_case(125218, 1831; "Greater than all")]
257 fn test_index_above_with_uppercase_letters(codepoint: u32, expected: u32) {
258 let interval_set = uppercase_letters();
259 assert_eq!(interval_set.index_above(codepoint), expected);
260 }
261
262 #[test_case('C', true)]
263 #[test_case('a', false)]
264 fn test_contains(codepoint: char, expected: bool) {
265 let interval_set = uppercase_letters();
266 assert_eq!(interval_set.contains(codepoint), expected);
267 }
268
269 #[test_case(10, Some('K' as u32); "Look from left")]
270 #[test_case(27, Some('Á' as u32); "Look from right")]
271 #[test_case(1830, Some(125217); "Max codepoint in the set")]
272 #[test_case(10000, None)]
273 #[test_case(u32::MAX, None)]
274 fn test_codepoint_at(index: u32, expected: Option<u32>) {
275 let interval_set = uppercase_letters();
276 assert_eq!(interval_set.codepoint_at(index), expected);
277 }
278
279 #[test]
280 fn test_codepoint_at_empty_set() {
281 let interval_set = IntervalSet::new(vec![]);
282 assert!(interval_set.codepoint_at(0).is_none());
283 }
284
285 #[test_case('K' as u32, Some(10); "Look from left")]
286 #[test_case('Á' as u32, Some(27); "Look from right")]
287 #[test_case(125184, Some(1797))]
288 #[test_case(5, None)]
289 fn test_index_of(codepoint: u32, expected: Option<u32>) {
290 let interval_set = uppercase_letters();
291 assert_eq!(interval_set.index_of(codepoint), expected);
292 }
293
294 #[test]
295 fn test_iter() {
296 let intervals = crate::query()
297 .include_categories(UnicodeCategory::LOWERCASE_LETTER)
298 .intervals()
299 .expect("Invalid query input");
300 let interval_set = IntervalSet::new(intervals);
301 let codepoints: Vec<_> = interval_set.iter().collect();
302 let mut expected = Vec::with_capacity(interval_set.len());
303 for (left, right) in
304 UnicodeVersion::latest().intervals_for(UnicodeCategory::LOWERCASE_LETTER)
305 {
306 for codepoint in *left..=*right {
307 expected.push(codepoint);
308 }
309 }
310 assert_eq!(codepoints, expected);
311 assert_eq!(interval_set.len(), codepoints.len());
312 assert!(!interval_set.is_empty());
313 }
314
315 #[test]
316 fn test_iter_rev() {
317 let interval_set = uppercase_letters();
318 let mut iter = interval_set.iter().rev();
319 assert_eq!(iter.next(), Some(125217));
320 }
321
322 #[test]
323 #[allow(clippy::redundant_clone)]
324 fn test_interval_set_traits() {
325 let interval_set = IntervalSet::new(vec![(0, 1)]);
326 let _ = interval_set.clone();
327 assert_eq!(
328 format!("{interval_set:?}"),
329 "IntervalSet { intervals: [(0, 1)], offsets: [0, 2], size: 2 }"
330 );
331 }
332}