range_regex/
lib.rs

1use bumpalo::Bump;
2use std::fmt::Write;
3use std::iter::Peekable;
4use crate::digits::Base10DigitIterator;
5use crate::fill_by_nines::FillByNinesIterator;
6use crate::fill_by_zeros::FillByZerosIterator;
7
8mod digits;
9mod fill_by_nines;
10mod fill_by_zeros;
11
12pub fn regex_for_range(min: i32, max: i32) -> String {
13    let arena = Bump::new();
14    let mut pattern_buffer = String::with_capacity(256);
15    let mut min = min as i64;
16    let max = max as i64;
17    let mut positive_subpatterns = vec![];
18    let mut negative_subpatterns = vec![];
19
20    if min < 0 {
21        let min_ = if max < 0 {
22            max.abs()
23        } else {
24            1
25        };
26        let max_ = min.abs();
27
28        negative_subpatterns = split_to_patterns(min_, max_, &mut pattern_buffer, &arena);
29        min = 0;
30    }
31
32    if max >= 0 {
33        positive_subpatterns = split_to_patterns(min, max, &mut pattern_buffer, &arena);
34    }
35
36    let mut regex = String::new();
37    // negative only subpatterns
38    for subpattern in negative_subpatterns.iter() {
39        if !positive_subpatterns.contains(&*subpattern) {
40            write!(regex, "-{}|", subpattern).unwrap();
41        }
42    }
43    // positive only subpatterns
44    for subpattern in positive_subpatterns.iter() {
45        if !negative_subpatterns.contains(&*subpattern) {
46            write!(regex, "{}|", subpattern).unwrap();
47        }
48    }
49    // intersected subpatterns
50    for subpattern in negative_subpatterns.iter() {
51        if positive_subpatterns.contains(&*subpattern) {
52            write!(regex, "-?{}|", subpattern).unwrap();
53        }
54    }
55
56    regex.pop();
57    regex
58}
59
60#[inline(always)]
61fn split_to_patterns<'a>(min: i64, max: i64, pattern_buffer: &mut String, arena: &'a Bump) -> Vec<&'a str> {
62    let mut subpatterns = Vec::with_capacity(64);
63
64    let mut start = min;
65    for stop in split_to_ranges(min, max) {
66        range_to_pattern(start, stop, pattern_buffer);
67        subpatterns.push(&*arena.alloc_str(&pattern_buffer));
68        start = stop + 1;
69    }
70
71    subpatterns
72}
73
74/// - `a` a vector of integers in descending order.
75/// - `b` an iterator of integers in descending order.
76#[inline(always)]
77fn merge_desc_to_asc<I: Iterator<Item = i64>>(a: &mut Vec<i64>, b: I) {
78    fn merge_desc_to_asc_inner<I: Iterator<Item = i64>>(a: &mut Vec<i64>, idx: usize, mut b: Peekable<I>) {
79        if idx >= a.len() {
80            let value = b.next();
81            if let Some(value) = value {
82                let next = b.peek().copied();
83                merge_desc_to_asc_inner(a, idx, b);
84                if let Some(next) = next {
85                    if next != value {
86                        a.push(value);
87                    }
88                } else {
89                    a.push(value);
90                }
91                return;
92            }
93            a.clear();
94            return;
95        }
96        let va = a[idx];
97        let vb = b.peek().copied();
98        if let Some(vb) = vb {
99            if va > vb {
100                merge_desc_to_asc_inner(a, idx + 1, b);
101                a.push(va);
102                return;
103            } else {
104                b.next();
105                let next = b.peek().copied();
106                merge_desc_to_asc_inner(a, idx + ((va == vb) as usize), b);
107                if let Some(next) = next {
108                    if next != vb {
109                        a.push(vb);
110                    }
111                } else {
112                    a.push(vb);
113                }
114                return;
115            }
116        } else {
117            merge_desc_to_asc_inner(a, idx + 1, b);
118            a.push(va);
119        }
120    }
121
122    merge_desc_to_asc_inner(a, 0, b.peekable());
123}
124
125#[inline(always)]
126fn split_to_ranges(min: i64, max: i64) -> Vec<i64> {
127    let mut stops = Vec::with_capacity(64);
128
129    // The iterator emits items in ascending order.
130    let mut fill_by_nines_iterator = FillByNinesIterator::new(min, max);
131    while let Some(stop) = fill_by_nines_iterator.next() {
132        if let Some(&last) = stops.last() {
133            if last != stop {
134                stops.push(stop);
135            }
136        } else {
137            stops.push(stop);
138        }
139    }
140    // We push the max value to the stops list.
141    if let Some(&last) = stops.last() {
142        if last != max {
143            stops.push(max);
144        }
145    } else {
146        stops.push(max);
147    }
148    // We reverse the stops list to emit items in descending order.
149    stops.reverse();
150
151    // The iterator emits items in descending order, we now merge the lists.
152    merge_desc_to_asc(&mut stops, FillByZerosIterator::new(min, max));
153
154    stops
155}
156
157#[inline(always)]
158fn range_to_pattern(start: i64, stop: i64, pattern_buffer: &mut String) {
159    pattern_buffer.clear();
160    let mut any_digit_count: i64 = 0;
161
162    let start_digits = Base10DigitIterator::new(start);
163    let stop_digits = Base10DigitIterator::new(stop);
164    for (start_digit, stop_digit) in start_digits.zip(stop_digits) {
165        if start_digit == stop_digit {
166            pattern_buffer.push(start_digit);
167        } else if start_digit != '0' || stop_digit != '9' {
168            write!(
169                pattern_buffer,
170                "[{}-{}]",
171                start_digit,
172                stop_digit
173            ).unwrap();
174        } else {
175            any_digit_count += 1;
176        }
177    }
178
179    if any_digit_count > 0 {
180        pattern_buffer.push_str(r"\d");
181    }
182
183    if any_digit_count > 1 {
184        pattern_buffer.push_str(&format!("{{{}}}", any_digit_count));
185    }
186}
187
188#[cfg(test)]
189mod tests {
190    use super::*;
191
192    #[test]
193    fn test_range_to_pattern() {
194        assert_eq!(regex_for_range(1, 1), "1");
195        assert_eq!(regex_for_range(0, 1), "[0-1]");
196        assert_eq!(regex_for_range(-1, -1), "-1");
197        assert_eq!(regex_for_range(-1, 0), "-1|0");
198        assert_eq!(regex_for_range(-1, 1), "-1|[0-1]");
199        assert_eq!(regex_for_range(-4, -2), "-[2-4]");
200        assert_eq!(regex_for_range(-3, 1), "-[1-3]|[0-1]");
201        assert_eq!(regex_for_range(-2, 0), "-[1-2]|0");
202        assert_eq!(regex_for_range(0, 2), "[0-2]");
203        assert_eq!(regex_for_range(-1, 3), "-1|[0-3]");
204        assert_eq!(regex_for_range(65666, 65667), "6566[6-7]");
205        assert_eq!(regex_for_range(12, 3456), r"1[2-9]|[2-9]\d|[1-9]\d{2}|[1-2]\d{3}|3[0-3]\d{2}|34[0-4]\d|345[0-6]");
206        assert_eq!(regex_for_range(1, 19), r"[1-9]|1\d");
207        assert_eq!(regex_for_range(1, 99), r"[1-9]|[1-9]\d");
208    }
209
210    #[test]
211    fn test_merge_desc_to_asc() {
212        let mut a = vec![3, 2, 1];
213        let b = vec![4, 3, 2, 1];
214        merge_desc_to_asc(&mut a, b.into_iter());
215        assert_eq!(a, vec![1, 2, 3, 4]);
216    }
217}