skim/
field.rs

1use regex::Regex;
2use std::cmp::{max, min};
3use std::sync::LazyLock;
4
5static FIELD_RANGE: LazyLock<Regex> =
6    LazyLock::new(|| Regex::new(r"^(?P<left>-?\d+)?(?P<sep>\.\.)?(?P<right>-?\d+)?$").unwrap());
7
8#[derive(PartialEq, Eq, Clone, Debug)]
9pub enum FieldRange {
10    Single(i32),
11    LeftInf(i32),
12    RightInf(i32),
13    Both(i32, i32),
14}
15
16impl FieldRange {
17    #[allow(clippy::should_implement_trait)]
18    pub fn from_str(range: &str) -> Option<FieldRange> {
19        use self::FieldRange::*;
20
21        // "1", "1..", "..10", "1..10", etc.
22        let opt_caps = FIELD_RANGE.captures(range);
23        if let Some(caps) = opt_caps {
24            let opt_left = caps.name("left").map(|s| s.as_str().parse().unwrap_or(1));
25            let opt_right = caps.name("right").map(|s| s.as_str().parse().unwrap_or(-1));
26            let opt_sep = caps.name("sep").map(|s| s.as_str().to_string());
27
28            match (opt_left, opt_right) {
29                (None, None) => Some(RightInf(0)),
30                (Some(left), None) => {
31                    match opt_sep {
32                        None => Some(Single(left)),      // 1
33                        Some(_) => Some(RightInf(left)), // 1..
34                    }
35                }
36                (None, Some(right)) => {
37                    match opt_sep {
38                        None => Some(Single(right)),     // 1 (should not happen)
39                        Some(_) => Some(LeftInf(right)), // ..1 (should not happen)
40                    }
41                }
42                (Some(left), Some(right)) => Some(Both(left, right)), // 1..3
43            }
44        } else {
45            None
46        }
47    }
48
49    // Parse FieldRange to index pair (left, right)
50    // e.g. 1..3 => (0, 4)
51    // note that field range is inclusive while the output index will exclude right end
52    pub fn to_index_pair(&self, length: usize) -> Option<(usize, usize)> {
53        use self::FieldRange::*;
54        match *self {
55            Single(num) => {
56                let num = FieldRange::translate_neg(num, length);
57                if num == 0 || num > length {
58                    None
59                } else {
60                    Some((num - 1, num))
61                }
62            }
63            LeftInf(right) => {
64                let right = FieldRange::translate_neg(right, length);
65                if length == 0 || right == 0 {
66                    None
67                } else {
68                    let right = min(right, length);
69                    Some((0, right))
70                }
71            }
72            RightInf(left) => {
73                let left = FieldRange::translate_neg(left, length);
74                if length == 0 || left > length {
75                    None
76                } else {
77                    let left = max(left, 1);
78                    Some((left - 1, length))
79                }
80            }
81            Both(left, right) => {
82                let left = FieldRange::translate_neg(left, length);
83                let right = FieldRange::translate_neg(right, length);
84                if length == 0 || right == 0 || left > right || left > length {
85                    None
86                } else {
87                    Some((max(left, 1) - 1, min(right, length)))
88                }
89            }
90        }
91    }
92
93    fn translate_neg(idx: i32, length: usize) -> usize {
94        let len = length as i32;
95        let idx = if idx < 0 { idx + len + 1 } else { idx };
96        max(0, idx) as usize
97    }
98}
99
100// ("|", "a|b||c") -> [(0, 2), (2, 4), (4, 5), (5, 6)]
101// explain: split to ["a|", "b|", "|", "c"]
102fn get_ranges_by_delimiter(delimiter: &Regex, text: &str) -> Vec<(usize, usize)> {
103    let mut ranges = Vec::new();
104    let mut last = 0;
105    for mat in delimiter.find_iter(text) {
106        ranges.push((last, mat.start()));
107        last = mat.end();
108    }
109    ranges.push((last, text.len()));
110    ranges
111}
112
113// e.g. delimiter = Regex::new(",").unwrap()
114// Note that this is differnt with `to_index_pair`, it uses delimiters like ".*?,"
115pub fn get_string_by_field<'a>(delimiter: &Regex, text: &'a str, field: &FieldRange) -> Option<&'a str> {
116    let ranges = get_ranges_by_delimiter(delimiter, text);
117
118    if let Some((start, stop)) = field.to_index_pair(ranges.len()) {
119        let &(begin, _) = &ranges[start];
120        let &(_, end) = ranges.get(stop - 1).unwrap_or(&(text.len(), 0));
121        Some(&text[begin..end])
122    } else {
123        None
124    }
125}
126
127pub fn get_string_by_range<'a>(delimiter: &Regex, text: &'a str, range: &str) -> Option<&'a str> {
128    FieldRange::from_str(range).and_then(|field| get_string_by_field(delimiter, text, &field))
129}
130
131// -> a vector of the matching fields (byte wise).
132// Given delimiter `,`, text: "a,b,c"
133// &[Single(2), LeftInf(2)] => [(2, 4), (0, 4)]
134pub fn parse_matching_fields(delimiter: &Regex, text: &str, fields: &[FieldRange]) -> Vec<(usize, usize)> {
135    let ranges = get_ranges_by_delimiter(delimiter, text);
136
137    let mut ret = Vec::new();
138    for field in fields {
139        if let Some((start, stop)) = field.to_index_pair(ranges.len()) {
140            let &(begin, _) = &ranges[start];
141            let &(end, _) = ranges.get(stop).unwrap_or(&(text.len(), 0));
142            ret.push((begin, end));
143        }
144    }
145    ret
146}
147
148pub fn parse_transform_fields(delimiter: &Regex, text: &str, fields: &[FieldRange]) -> String {
149    let ranges = get_ranges_by_delimiter(delimiter, text);
150
151    let mut ret = String::new();
152    for field in fields {
153        if let Some((start, stop)) = field.to_index_pair(ranges.len()) {
154            let &(begin, _) = &ranges[start];
155            let &(end, _) = ranges.get(stop).unwrap_or(&(text.len(), 0));
156            ret.push_str(&text[begin..end]);
157        }
158    }
159    ret
160}
161
162#[cfg(test)]
163mod test {
164    use super::FieldRange::*;
165    #[test]
166    fn test_parse_range() {
167        assert_eq!(FieldRange::from_str("1"), Some(Single(1)));
168        assert_eq!(FieldRange::from_str("-1"), Some(Single(-1)));
169
170        assert_eq!(FieldRange::from_str("1.."), Some(RightInf(1)));
171        assert_eq!(FieldRange::from_str("-1.."), Some(RightInf(-1)));
172
173        assert_eq!(FieldRange::from_str("..1"), Some(LeftInf(1)));
174        assert_eq!(FieldRange::from_str("..-1"), Some(LeftInf(-1)));
175
176        assert_eq!(FieldRange::from_str("1..3"), Some(Both(1, 3)));
177        assert_eq!(FieldRange::from_str("-1..-3"), Some(Both(-1, -3)));
178
179        assert_eq!(FieldRange::from_str(".."), Some(RightInf(0)));
180        assert_eq!(FieldRange::from_str("a.."), None);
181        assert_eq!(FieldRange::from_str("..b"), None);
182        assert_eq!(FieldRange::from_str("a..b"), None);
183    }
184
185    use regex::Regex;
186
187    #[test]
188    fn test_parse_field_range() {
189        assert_eq!(Single(0).to_index_pair(10), None);
190        assert_eq!(Single(1).to_index_pair(10), Some((0, 1)));
191        assert_eq!(Single(10).to_index_pair(10), Some((9, 10)));
192        assert_eq!(Single(11).to_index_pair(10), None);
193        assert_eq!(Single(-1).to_index_pair(10), Some((9, 10)));
194        assert_eq!(Single(-10).to_index_pair(10), Some((0, 1)));
195        assert_eq!(Single(-11).to_index_pair(10), None);
196
197        assert_eq!(LeftInf(0).to_index_pair(10), None);
198        assert_eq!(LeftInf(1).to_index_pair(10), Some((0, 1)));
199        assert_eq!(LeftInf(8).to_index_pair(10), Some((0, 8)));
200        assert_eq!(LeftInf(10).to_index_pair(10), Some((0, 10)));
201        assert_eq!(LeftInf(11).to_index_pair(10), Some((0, 10)));
202        assert_eq!(LeftInf(-1).to_index_pair(10), Some((0, 10)));
203        assert_eq!(LeftInf(-8).to_index_pair(10), Some((0, 3)));
204        assert_eq!(LeftInf(-9).to_index_pair(10), Some((0, 2)));
205        assert_eq!(LeftInf(-10).to_index_pair(10), Some((0, 1)));
206        assert_eq!(LeftInf(-11).to_index_pair(10), None);
207
208        assert_eq!(RightInf(0).to_index_pair(10), Some((0, 10)));
209        assert_eq!(RightInf(1).to_index_pair(10), Some((0, 10)));
210        assert_eq!(RightInf(8).to_index_pair(10), Some((7, 10)));
211        assert_eq!(RightInf(10).to_index_pair(10), Some((9, 10)));
212        assert_eq!(RightInf(11).to_index_pair(10), None);
213        assert_eq!(RightInf(-1).to_index_pair(10), Some((9, 10)));
214        assert_eq!(RightInf(-8).to_index_pair(10), Some((2, 10)));
215        assert_eq!(RightInf(-9).to_index_pair(10), Some((1, 10)));
216        assert_eq!(RightInf(-10).to_index_pair(10), Some((0, 10)));
217        assert_eq!(RightInf(-11).to_index_pair(10), Some((0, 10)));
218
219        assert_eq!(Both(0, 0).to_index_pair(10), None);
220        assert_eq!(Both(0, 1).to_index_pair(10), Some((0, 1)));
221        assert_eq!(Both(0, 10).to_index_pair(10), Some((0, 10)));
222        assert_eq!(Both(0, 11).to_index_pair(10), Some((0, 10)));
223        assert_eq!(Both(1, -1).to_index_pair(10), Some((0, 10)));
224        assert_eq!(Both(1, -9).to_index_pair(10), Some((0, 2)));
225        assert_eq!(Both(1, -10).to_index_pair(10), Some((0, 1)));
226        assert_eq!(Both(1, -11).to_index_pair(10), None);
227        assert_eq!(Both(-9, -9).to_index_pair(10), Some((1, 2)));
228        assert_eq!(Both(-9, -8).to_index_pair(10), Some((1, 3)));
229        assert_eq!(Both(-9, 0).to_index_pair(10), None);
230        assert_eq!(Both(-9, 1).to_index_pair(10), None);
231        assert_eq!(Both(-9, 2).to_index_pair(10), Some((1, 2)));
232        assert_eq!(Both(-1, 0).to_index_pair(10), None);
233        assert_eq!(Both(11, 20).to_index_pair(10), None);
234        assert_eq!(Both(-11, -11).to_index_pair(10), None);
235    }
236
237    #[test]
238    fn test_parse_transform_fields() {
239        // delimiter is ","
240        let re = Regex::new(",").unwrap();
241
242        assert_eq!(
243            super::parse_transform_fields(&re, "A,B,C,D,E,F", &[Single(2), Single(4), Single(-1), Single(-7)]),
244            "B,D,F"
245        );
246
247        assert_eq!(
248            super::parse_transform_fields(&re, "A,B,C,D,E,F", &[LeftInf(3), LeftInf(-6), LeftInf(-7)]),
249            "A,B,C,A,"
250        );
251
252        assert_eq!(
253            super::parse_transform_fields(
254                &re,
255                "A,B,C,D,E,F",
256                &[RightInf(5), RightInf(-2), RightInf(-1), RightInf(8)]
257            ),
258            "E,FE,FF"
259        );
260
261        assert_eq!(
262            super::parse_transform_fields(
263                &re,
264                "A,B,C,D,E,F",
265                &[Both(3, 3), Both(-9, 2), Both(6, 10), Both(-9, -5)]
266            ),
267            "C,A,B,FA,B,"
268        );
269    }
270
271    #[test]
272    fn test_parse_matching_fields() {
273        // delimiter is ","
274        let re = Regex::new(",").unwrap();
275
276        // bytes:3  3  3 3
277        //       中,华,人,民,E,F",
278
279        assert_eq!(
280            super::parse_matching_fields(&re, "中,华,人,民,E,F", &[Single(2), Single(4), Single(-1), Single(-7)]),
281            vec![(4, 8), (12, 16), (18, 19)]
282        );
283
284        assert_eq!(
285            super::parse_matching_fields(&re, "中,华,人,民,E,F", &[LeftInf(3), LeftInf(-6), LeftInf(-7)]),
286            vec![(0, 12), (0, 4)]
287        );
288
289        assert_eq!(
290            super::parse_matching_fields(
291                &re,
292                "中,华,人,民,E,F",
293                &[RightInf(5), RightInf(-2), RightInf(-1), RightInf(7)]
294            ),
295            vec![(16, 19), (16, 19), (18, 19)]
296        );
297
298        assert_eq!(
299            super::parse_matching_fields(
300                &re,
301                "中,华,人,民,E,F",
302                &[Both(3, 3), Both(-8, 2), Both(6, 10), Both(-8, -5)]
303            ),
304            vec![(8, 12), (0, 8), (18, 19), (0, 8)]
305        );
306    }
307
308    use super::*;
309    #[test]
310    fn test_get_string_by_field() {
311        // delimiter is ","
312        let re = Regex::new(",").unwrap();
313        let text = "a,b,c,";
314        assert_eq!(get_string_by_field(&re, text, &Single(0)), None);
315        assert_eq!(get_string_by_field(&re, text, &Single(1)), Some("a"));
316        assert_eq!(get_string_by_field(&re, text, &Single(2)), Some("b"));
317        assert_eq!(get_string_by_field(&re, text, &Single(3)), Some("c"));
318        assert_eq!(get_string_by_field(&re, text, &Single(4)), Some(""));
319        assert_eq!(get_string_by_field(&re, text, &Single(5)), None);
320        assert_eq!(get_string_by_field(&re, text, &Single(6)), None);
321        assert_eq!(get_string_by_field(&re, text, &Single(-1)), Some(""));
322        assert_eq!(get_string_by_field(&re, text, &Single(-2)), Some("c"));
323        assert_eq!(get_string_by_field(&re, text, &Single(-3)), Some("b"));
324        assert_eq!(get_string_by_field(&re, text, &Single(-4)), Some("a"));
325        assert_eq!(get_string_by_field(&re, text, &Single(-5)), None);
326        assert_eq!(get_string_by_field(&re, text, &Single(-6)), None);
327
328        assert_eq!(get_string_by_field(&re, text, &LeftInf(0)), None);
329        assert_eq!(get_string_by_field(&re, text, &LeftInf(1)), Some("a"));
330        assert_eq!(get_string_by_field(&re, text, &LeftInf(2)), Some("a,b"));
331        assert_eq!(get_string_by_field(&re, text, &LeftInf(3)), Some("a,b,c"));
332        assert_eq!(get_string_by_field(&re, text, &LeftInf(4)), Some("a,b,c,"));
333        assert_eq!(get_string_by_field(&re, text, &LeftInf(5)), Some("a,b,c,"));
334        assert_eq!(get_string_by_field(&re, text, &LeftInf(-5)), None);
335        assert_eq!(get_string_by_field(&re, text, &LeftInf(-4)), Some("a"));
336        assert_eq!(get_string_by_field(&re, text, &LeftInf(-3)), Some("a,b"));
337        assert_eq!(get_string_by_field(&re, text, &LeftInf(-2)), Some("a,b,c"));
338        assert_eq!(get_string_by_field(&re, text, &LeftInf(-1)), Some("a,b,c,"));
339
340        assert_eq!(get_string_by_field(&re, text, &RightInf(0)), Some("a,b,c,"));
341        assert_eq!(get_string_by_field(&re, text, &RightInf(1)), Some("a,b,c,"));
342        assert_eq!(get_string_by_field(&re, text, &RightInf(2)), Some("b,c,"));
343        assert_eq!(get_string_by_field(&re, text, &RightInf(3)), Some("c,"));
344        assert_eq!(get_string_by_field(&re, text, &RightInf(4)), Some(""));
345        assert_eq!(get_string_by_field(&re, text, &RightInf(5)), None);
346        assert_eq!(get_string_by_field(&re, text, &RightInf(-5)), Some("a,b,c,"));
347        assert_eq!(get_string_by_field(&re, text, &RightInf(-4)), Some("a,b,c,"));
348        assert_eq!(get_string_by_field(&re, text, &RightInf(-3)), Some("b,c,"));
349        assert_eq!(get_string_by_field(&re, text, &RightInf(-2)), Some("c,"));
350        assert_eq!(get_string_by_field(&re, text, &RightInf(-1)), Some(""));
351
352        assert_eq!(get_string_by_field(&re, text, &Both(0, 0)), None);
353        assert_eq!(get_string_by_field(&re, text, &Both(0, 1)), Some("a"));
354        assert_eq!(get_string_by_field(&re, text, &Both(0, 2)), Some("a,b"));
355        assert_eq!(get_string_by_field(&re, text, &Both(0, 3)), Some("a,b,c"));
356        assert_eq!(get_string_by_field(&re, text, &Both(0, 4)), Some("a,b,c,"));
357        assert_eq!(get_string_by_field(&re, text, &Both(0, 5)), Some("a,b,c,"));
358        assert_eq!(get_string_by_field(&re, text, &Both(1, 1)), Some("a"));
359        assert_eq!(get_string_by_field(&re, text, &Both(1, 2)), Some("a,b"));
360        assert_eq!(get_string_by_field(&re, text, &Both(1, 3)), Some("a,b,c"));
361        assert_eq!(get_string_by_field(&re, text, &Both(1, 4)), Some("a,b,c,"));
362        assert_eq!(get_string_by_field(&re, text, &Both(1, 5)), Some("a,b,c,"));
363        assert_eq!(get_string_by_field(&re, text, &Both(2, 5)), Some("b,c,"));
364        assert_eq!(get_string_by_field(&re, text, &Both(3, 5)), Some("c,"));
365        assert_eq!(get_string_by_field(&re, text, &Both(4, 5)), Some(""));
366        assert_eq!(get_string_by_field(&re, text, &Both(5, 5)), None);
367        assert_eq!(get_string_by_field(&re, text, &Both(6, 5)), None);
368        assert_eq!(get_string_by_field(&re, text, &Both(2, 3)), Some("b,c"));
369        assert_eq!(get_string_by_field(&re, text, &Both(3, 3)), Some("c"));
370        assert_eq!(get_string_by_field(&re, text, &Both(4, 3)), None);
371    }
372}