bl_range_set/
char_range_set.rs

1
2use crate::discrete_range_set::{DiscreteRange, DiscreteRangeSet};
3
4pub type Error = Box<dyn std::error::Error>;
5pub type Result<T> = std::result::Result<T, Error>;
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub struct CharRange(char, char);
9
10impl Into<DiscreteRange<u32>> for CharRange {
11    fn into(self) -> DiscreteRange<u32> {
12        DiscreteRange::new(self.0 as u32, self.1 as u32).unwrap()
13    }
14}
15
16impl CharRange {
17    pub fn new(start: char, end: char) -> Result<Self> {
18        if start > end {
19            return Err("Invalid char range (negative size)".into());
20        }
21        Ok(CharRange(start, end))
22    }
23
24    pub fn len(&self) -> u32 {
25        self.1 as u32 - self.0 as u32 + 1
26    }
27
28    pub fn contains(&self, value: char) -> bool {
29        value >= self.0 && value <= self.1
30    }
31
32    pub fn try_merge(&self, other: &Self) -> Result<Self> {
33        if self.1 as u32 + 1 < other.0 as u32 || other.1 as u32 + 1 < self.0 as u32 {
34            return Err("Disjoint char ranges cannot be merged".into());
35        }
36
37        Ok(CharRange(self.0.min(other.0), self.1.max(other.1))) 
38    }
39
40    pub fn char(c: char) -> Self {
41        CharRange(c, c)
42    }
43
44    pub fn lowercase() -> Self {
45        CharRange('a', 'z')
46    }
47
48    pub fn uppercase() -> Self {
49        CharRange('A', 'Z')
50    }
51
52    pub fn digits() -> Self {
53        CharRange('0', '9')
54    }
55
56    pub fn all() -> Self {
57        CharRange('\u{0000}', '\u{10FFFF}')
58    }
59}
60
61impl From<(char, char)> for CharRange {
62    fn from(tuple: (char, char)) -> Self {
63        CharRange::new(tuple.0, tuple.1).unwrap()
64    }
65}
66
67#[derive(Debug, Clone, PartialEq, Eq)]
68pub struct CharRangeSet(DiscreteRangeSet<u32>);
69
70impl CharRangeSet {
71    pub fn new() -> Self {
72        CharRangeSet(DiscreteRangeSet::new())
73    }
74
75    pub fn add_range<R: Into<CharRange>>(&mut self, range: R) {
76        self.0.add_range(range.into());
77    }
78
79    pub fn contains(&self, value: char) -> bool {
80        self.0.contains(value as u32)
81    }
82
83    pub fn contains_any<I: IntoIterator<Item = char>>(&self, values: I) -> bool {
84        for v in values {
85            if self.contains(v) {
86                return true;
87            }
88        }
89        false
90    }
91
92    pub fn contains_all<I: IntoIterator<Item = char>>(&self, values: I) -> bool {
93        for v in values {
94            if !self.contains(v) {
95                return false;
96            }
97        }
98        true
99    }
100
101    pub fn with_range<R: Into<CharRange>>(mut self, range: R) -> Self {
102        self.add_range(range);
103        self
104    }
105
106    pub fn with_lowercase(mut self) -> Self {
107        self.add_range(CharRange::lowercase());
108        self
109    }
110
111    pub fn with_uppercase(mut self) -> Self {
112        self.add_range(CharRange::uppercase());
113        self
114    }
115
116    pub fn with_digits(mut self) -> Self {
117        self.add_range(CharRange::digits());
118        self
119    }
120
121    pub fn with_all(mut self) -> Self {
122        self.add_range(CharRange::all());
123        self
124    }
125
126    pub fn with_alpha(mut self) -> Self {
127        self.add_range(CharRange::lowercase());
128        self.add_range(CharRange::uppercase());
129        self
130    }
131
132    pub fn with_alphanumeric(mut self) -> Self {
133        self.add_range(CharRange::lowercase());
134        self.add_range(CharRange::uppercase());
135        self.add_range(CharRange::digits());
136        self
137    }
138
139    pub fn with_chars<I: IntoIterator<Item = char>>(mut self, chars: I) -> Self {
140        for c in chars {
141            self.add_range(CharRange::char(c));
142        }
143        self
144    }
145}
146
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151
152    #[test]
153    fn test_char_range() {
154        let range = CharRange::new('a', 'd').unwrap();
155        assert_eq!(range.len(), 4);
156        assert!(range.contains('b'));
157        assert!(!range.contains('e'));
158
159        let range2 = CharRange::new('c', 'f').unwrap();
160        let merged = range.try_merge(&range2).unwrap();
161        assert_eq!(merged, CharRange::new('a', 'f').unwrap());
162    }
163
164    #[test]
165    fn test_char_range_set() {
166        let mut set = CharRangeSet::new();
167        set.add_range(CharRange::new('a', 'd').unwrap());
168        set.add_range(CharRange::new('f', 'h').unwrap());
169        set.add_range(CharRange::new('c', 'g').unwrap());
170
171        assert!(set.contains('b'));
172        assert!(set.contains('e'));
173        assert!(!set.contains('i'));
174    }
175
176    #[test]
177    fn test_char_range_set_builder() {
178        let set = CharRangeSet::new()
179            .with_lowercase()
180            .with_uppercase()
181            .with_digits()
182            .with_chars(vec!['_', '-']);
183
184        assert!(set.contains('a'));
185        assert!(set.contains('Z'));
186        assert!(set.contains('5'));
187        assert!(set.contains('_'));
188        assert!(!set.contains('@'));
189        assert!(!set.contains(' '));
190    }
191}