1use regex::Regex;
7use std::{
8 cmp::{max, min},
9 sync::LazyLock,
10};
11
12static FIELD_RANGE: LazyLock<Regex> =
13 LazyLock::new(|| Regex::new(r"^(?P<left>-?\d+)?(?P<sep>\.\.)?(?P<right>-?\d+)?$").unwrap());
14
15#[derive(PartialEq, Eq, Clone, Debug)]
17pub enum FieldRange {
18 Single(i32),
20 LeftInf(i32),
22 RightInf(i32),
24 Both(i32, i32),
26}
27
28impl FieldRange {
29 #[allow(clippy::should_implement_trait)]
31 pub fn from_str(range: &str) -> Option<FieldRange> {
32 use self::FieldRange::*;
33
34 let opt_caps = FIELD_RANGE.captures(range);
36 if let Some(caps) = opt_caps {
37 let opt_left = caps.name("left").map(|s| s.as_str().parse().unwrap_or(1));
38 let opt_right = caps.name("right").map(|s| s.as_str().parse().unwrap_or(-1));
39 let opt_sep = caps.name("sep").map(|s| s.as_str().to_string());
40
41 match (opt_left, opt_right) {
42 (None, None) => Some(RightInf(0)),
43 (Some(left), None) => {
44 match opt_sep {
45 None => Some(Single(left)), Some(_) => Some(RightInf(left)), }
48 }
49 (None, Some(right)) => {
50 match opt_sep {
51 None => Some(Single(right)), Some(_) => Some(LeftInf(right)), }
54 }
55 (Some(left), Some(right)) => Some(Both(left, right)), }
57 } else {
58 None
59 }
60 }
61
62 pub fn to_index_pair(&self, length: usize) -> Option<(usize, usize)> {
67 use self::FieldRange::*;
68 match *self {
69 Single(num) => {
70 let num = FieldRange::translate_neg(num, length);
71 if num == 0 || num > length {
72 None
73 } else {
74 Some((num - 1, num))
75 }
76 }
77 LeftInf(right) => {
78 let right = FieldRange::translate_neg(right, length);
79 if length == 0 || right == 0 {
80 None
81 } else {
82 let right = min(right, length);
83 Some((0, right))
84 }
85 }
86 RightInf(left) => {
87 let left = FieldRange::translate_neg(left, length);
88 if length == 0 || left > length {
89 None
90 } else {
91 let left = max(left, 1);
92 Some((left - 1, length))
93 }
94 }
95 Both(left, right) => {
96 let left = FieldRange::translate_neg(left, length);
97 let right = FieldRange::translate_neg(right, length);
98 if length == 0 || right == 0 || left > right || left > length {
99 None
100 } else {
101 Some((max(left, 1) - 1, min(right, length)))
102 }
103 }
104 }
105 }
106
107 fn translate_neg(idx: i32, length: usize) -> usize {
108 let len = length as i32;
109 let idx = if idx < 0 { idx + len + 1 } else { idx };
110 max(0, idx) as usize
111 }
112}
113
114fn get_ranges_by_delimiter(delimiter: &Regex, text: &str) -> Vec<(usize, usize)> {
117 let mut ranges = Vec::new();
118 let mut last = 0;
119 for mat in delimiter.find_iter(text) {
120 ranges.push((last, mat.start()));
121 last = mat.end();
122 }
123 ranges.push((last, text.len()));
124 ranges
125}
126
127pub fn get_string_by_field<'a>(delimiter: &Regex, text: &'a str, field: &FieldRange) -> Option<&'a str> {
132 let ranges = get_ranges_by_delimiter(delimiter, text);
133
134 if let Some((start, stop)) = field.to_index_pair(ranges.len()) {
135 let &(begin, _) = &ranges[start];
136 let &(_, end) = ranges.get(stop - 1).unwrap_or(&(text.len(), 0));
137 Some(&text[begin..end])
138 } else {
139 None
140 }
141}
142
143pub fn get_string_by_range<'a>(delimiter: &Regex, text: &'a str, range: &str) -> Option<&'a str> {
145 FieldRange::from_str(range).and_then(|field| get_string_by_field(delimiter, text, &field))
146}
147
148pub fn parse_matching_fields(delimiter: &Regex, text: &str, fields: &[FieldRange]) -> Vec<(usize, usize)> {
153 let ranges = get_ranges_by_delimiter(delimiter, text);
154
155 let mut ret = Vec::new();
156 for field in fields {
157 if let Some((start, stop)) = field.to_index_pair(ranges.len()) {
158 let &(begin, _) = &ranges[start];
159 let &(end, _) = ranges.get(stop).unwrap_or(&(text.len(), 0));
160 ret.push((begin, end));
161 }
162 }
163 ret
164}
165
166pub fn parse_transform_fields(delimiter: &Regex, text: &str, fields: &[FieldRange]) -> String {
168 let ranges = get_ranges_by_delimiter(delimiter, text);
169
170 let mut ret = String::new();
171 for field in fields {
172 if let Some((start, stop)) = field.to_index_pair(ranges.len()) {
173 let &(begin, _) = &ranges[start];
174 let &(end, _) = ranges.get(stop).unwrap_or(&(text.len(), 0));
175 ret.push_str(&text[begin..end]);
176 }
177 }
178 ret
179}
180
181#[cfg(test)]
182#[cfg_attr(coverage, coverage(off))]
183mod test {
184 use super::FieldRange::*;
185 #[test]
186 fn test_parse_range() {
187 assert_eq!(FieldRange::from_str("1"), Some(Single(1)));
188 assert_eq!(FieldRange::from_str("-1"), Some(Single(-1)));
189
190 assert_eq!(FieldRange::from_str("1.."), Some(RightInf(1)));
191 assert_eq!(FieldRange::from_str("-1.."), Some(RightInf(-1)));
192
193 assert_eq!(FieldRange::from_str("..1"), Some(LeftInf(1)));
194 assert_eq!(FieldRange::from_str("..-1"), Some(LeftInf(-1)));
195
196 assert_eq!(FieldRange::from_str("1..3"), Some(Both(1, 3)));
197 assert_eq!(FieldRange::from_str("-1..-3"), Some(Both(-1, -3)));
198
199 assert_eq!(FieldRange::from_str(".."), Some(RightInf(0)));
200 assert_eq!(FieldRange::from_str("a.."), None);
201 assert_eq!(FieldRange::from_str("..b"), None);
202 assert_eq!(FieldRange::from_str("a..b"), None);
203 }
204
205 use regex::Regex;
206
207 #[test]
208 fn test_parse_field_range() {
209 assert_eq!(Single(0).to_index_pair(10), None);
210 assert_eq!(Single(1).to_index_pair(10), Some((0, 1)));
211 assert_eq!(Single(10).to_index_pair(10), Some((9, 10)));
212 assert_eq!(Single(11).to_index_pair(10), None);
213 assert_eq!(Single(-1).to_index_pair(10), Some((9, 10)));
214 assert_eq!(Single(-10).to_index_pair(10), Some((0, 1)));
215 assert_eq!(Single(-11).to_index_pair(10), None);
216
217 assert_eq!(LeftInf(0).to_index_pair(10), None);
218 assert_eq!(LeftInf(1).to_index_pair(10), Some((0, 1)));
219 assert_eq!(LeftInf(8).to_index_pair(10), Some((0, 8)));
220 assert_eq!(LeftInf(10).to_index_pair(10), Some((0, 10)));
221 assert_eq!(LeftInf(11).to_index_pair(10), Some((0, 10)));
222 assert_eq!(LeftInf(-1).to_index_pair(10), Some((0, 10)));
223 assert_eq!(LeftInf(-8).to_index_pair(10), Some((0, 3)));
224 assert_eq!(LeftInf(-9).to_index_pair(10), Some((0, 2)));
225 assert_eq!(LeftInf(-10).to_index_pair(10), Some((0, 1)));
226 assert_eq!(LeftInf(-11).to_index_pair(10), None);
227
228 assert_eq!(RightInf(0).to_index_pair(10), Some((0, 10)));
229 assert_eq!(RightInf(1).to_index_pair(10), Some((0, 10)));
230 assert_eq!(RightInf(8).to_index_pair(10), Some((7, 10)));
231 assert_eq!(RightInf(10).to_index_pair(10), Some((9, 10)));
232 assert_eq!(RightInf(11).to_index_pair(10), None);
233 assert_eq!(RightInf(-1).to_index_pair(10), Some((9, 10)));
234 assert_eq!(RightInf(-8).to_index_pair(10), Some((2, 10)));
235 assert_eq!(RightInf(-9).to_index_pair(10), Some((1, 10)));
236 assert_eq!(RightInf(-10).to_index_pair(10), Some((0, 10)));
237 assert_eq!(RightInf(-11).to_index_pair(10), Some((0, 10)));
238
239 assert_eq!(Both(0, 0).to_index_pair(10), None);
240 assert_eq!(Both(0, 1).to_index_pair(10), Some((0, 1)));
241 assert_eq!(Both(0, 10).to_index_pair(10), Some((0, 10)));
242 assert_eq!(Both(0, 11).to_index_pair(10), Some((0, 10)));
243 assert_eq!(Both(1, -1).to_index_pair(10), Some((0, 10)));
244 assert_eq!(Both(1, -9).to_index_pair(10), Some((0, 2)));
245 assert_eq!(Both(1, -10).to_index_pair(10), Some((0, 1)));
246 assert_eq!(Both(1, -11).to_index_pair(10), None);
247 assert_eq!(Both(-9, -9).to_index_pair(10), Some((1, 2)));
248 assert_eq!(Both(-9, -8).to_index_pair(10), Some((1, 3)));
249 assert_eq!(Both(-9, 0).to_index_pair(10), None);
250 assert_eq!(Both(-9, 1).to_index_pair(10), None);
251 assert_eq!(Both(-9, 2).to_index_pair(10), Some((1, 2)));
252 assert_eq!(Both(-1, 0).to_index_pair(10), None);
253 assert_eq!(Both(11, 20).to_index_pair(10), None);
254 assert_eq!(Both(-11, -11).to_index_pair(10), None);
255 }
256
257 #[test]
258 fn test_parse_transform_fields() {
259 let re = Regex::new(",").unwrap();
261
262 assert_eq!(
263 super::parse_transform_fields(&re, "A,B,C,D,E,F", &[Single(2), Single(4), Single(-1), Single(-7)]),
264 "B,D,F"
265 );
266
267 assert_eq!(
268 super::parse_transform_fields(&re, "A,B,C,D,E,F", &[LeftInf(3), LeftInf(-6), LeftInf(-7)]),
269 "A,B,C,A,"
270 );
271
272 assert_eq!(
273 super::parse_transform_fields(
274 &re,
275 "A,B,C,D,E,F",
276 &[RightInf(5), RightInf(-2), RightInf(-1), RightInf(8)]
277 ),
278 "E,FE,FF"
279 );
280
281 assert_eq!(
282 super::parse_transform_fields(
283 &re,
284 "A,B,C,D,E,F",
285 &[Both(3, 3), Both(-9, 2), Both(6, 10), Both(-9, -5)]
286 ),
287 "C,A,B,FA,B,"
288 );
289 }
290
291 #[test]
292 fn test_parse_matching_fields() {
293 let re = Regex::new(",").unwrap();
295
296 assert_eq!(
300 super::parse_matching_fields(&re, "中,华,人,民,E,F", &[Single(2), Single(4), Single(-1), Single(-7)]),
301 vec![(4, 8), (12, 16), (18, 19)]
302 );
303
304 assert_eq!(
305 super::parse_matching_fields(&re, "中,华,人,民,E,F", &[LeftInf(3), LeftInf(-6), LeftInf(-7)]),
306 vec![(0, 12), (0, 4)]
307 );
308
309 assert_eq!(
310 super::parse_matching_fields(
311 &re,
312 "中,华,人,民,E,F",
313 &[RightInf(5), RightInf(-2), RightInf(-1), RightInf(7)]
314 ),
315 vec![(16, 19), (16, 19), (18, 19)]
316 );
317
318 assert_eq!(
319 super::parse_matching_fields(
320 &re,
321 "中,华,人,民,E,F",
322 &[Both(3, 3), Both(-8, 2), Both(6, 10), Both(-8, -5)]
323 ),
324 vec![(8, 12), (0, 8), (18, 19), (0, 8)]
325 );
326 }
327
328 use super::*;
329
330 #[test]
331 fn test_null_delimiter() {
332 let re = Regex::new("\x00").unwrap();
334 let text = "a\x00b\x00c";
335
336 assert_eq!(get_string_by_field(&re, text, &Single(1)), Some("a"));
338 assert_eq!(get_string_by_field(&re, text, &Single(2)), Some("b"));
339 assert_eq!(get_string_by_field(&re, text, &Single(3)), Some("c"));
340
341 assert_eq!(parse_matching_fields(&re, text, &[Single(2)]), vec![(2, 4)]);
345
346 assert_eq!(
349 parse_matching_fields(&re, text, &[Single(1), Single(3)]),
350 vec![(0, 2), (4, 5)]
351 );
352 }
353
354 #[test]
355 fn test_get_string_by_field() {
356 let re = Regex::new(",").unwrap();
358 let text = "a,b,c,";
359 assert_eq!(get_string_by_field(&re, text, &Single(0)), None);
360 assert_eq!(get_string_by_field(&re, text, &Single(1)), Some("a"));
361 assert_eq!(get_string_by_field(&re, text, &Single(2)), Some("b"));
362 assert_eq!(get_string_by_field(&re, text, &Single(3)), Some("c"));
363 assert_eq!(get_string_by_field(&re, text, &Single(4)), Some(""));
364 assert_eq!(get_string_by_field(&re, text, &Single(5)), None);
365 assert_eq!(get_string_by_field(&re, text, &Single(6)), None);
366 assert_eq!(get_string_by_field(&re, text, &Single(-1)), Some(""));
367 assert_eq!(get_string_by_field(&re, text, &Single(-2)), Some("c"));
368 assert_eq!(get_string_by_field(&re, text, &Single(-3)), Some("b"));
369 assert_eq!(get_string_by_field(&re, text, &Single(-4)), Some("a"));
370 assert_eq!(get_string_by_field(&re, text, &Single(-5)), None);
371 assert_eq!(get_string_by_field(&re, text, &Single(-6)), None);
372
373 assert_eq!(get_string_by_field(&re, text, &LeftInf(0)), None);
374 assert_eq!(get_string_by_field(&re, text, &LeftInf(1)), Some("a"));
375 assert_eq!(get_string_by_field(&re, text, &LeftInf(2)), Some("a,b"));
376 assert_eq!(get_string_by_field(&re, text, &LeftInf(3)), Some("a,b,c"));
377 assert_eq!(get_string_by_field(&re, text, &LeftInf(4)), Some("a,b,c,"));
378 assert_eq!(get_string_by_field(&re, text, &LeftInf(5)), Some("a,b,c,"));
379 assert_eq!(get_string_by_field(&re, text, &LeftInf(-5)), None);
380 assert_eq!(get_string_by_field(&re, text, &LeftInf(-4)), Some("a"));
381 assert_eq!(get_string_by_field(&re, text, &LeftInf(-3)), Some("a,b"));
382 assert_eq!(get_string_by_field(&re, text, &LeftInf(-2)), Some("a,b,c"));
383 assert_eq!(get_string_by_field(&re, text, &LeftInf(-1)), Some("a,b,c,"));
384
385 assert_eq!(get_string_by_field(&re, text, &RightInf(0)), Some("a,b,c,"));
386 assert_eq!(get_string_by_field(&re, text, &RightInf(1)), Some("a,b,c,"));
387 assert_eq!(get_string_by_field(&re, text, &RightInf(2)), Some("b,c,"));
388 assert_eq!(get_string_by_field(&re, text, &RightInf(3)), Some("c,"));
389 assert_eq!(get_string_by_field(&re, text, &RightInf(4)), Some(""));
390 assert_eq!(get_string_by_field(&re, text, &RightInf(5)), None);
391 assert_eq!(get_string_by_field(&re, text, &RightInf(-5)), Some("a,b,c,"));
392 assert_eq!(get_string_by_field(&re, text, &RightInf(-4)), Some("a,b,c,"));
393 assert_eq!(get_string_by_field(&re, text, &RightInf(-3)), Some("b,c,"));
394 assert_eq!(get_string_by_field(&re, text, &RightInf(-2)), Some("c,"));
395 assert_eq!(get_string_by_field(&re, text, &RightInf(-1)), Some(""));
396
397 assert_eq!(get_string_by_field(&re, text, &Both(0, 0)), None);
398 assert_eq!(get_string_by_field(&re, text, &Both(0, 1)), Some("a"));
399 assert_eq!(get_string_by_field(&re, text, &Both(0, 2)), Some("a,b"));
400 assert_eq!(get_string_by_field(&re, text, &Both(0, 3)), Some("a,b,c"));
401 assert_eq!(get_string_by_field(&re, text, &Both(0, 4)), Some("a,b,c,"));
402 assert_eq!(get_string_by_field(&re, text, &Both(0, 5)), Some("a,b,c,"));
403 assert_eq!(get_string_by_field(&re, text, &Both(1, 1)), Some("a"));
404 assert_eq!(get_string_by_field(&re, text, &Both(1, 2)), Some("a,b"));
405 assert_eq!(get_string_by_field(&re, text, &Both(1, 3)), Some("a,b,c"));
406 assert_eq!(get_string_by_field(&re, text, &Both(1, 4)), Some("a,b,c,"));
407 assert_eq!(get_string_by_field(&re, text, &Both(1, 5)), Some("a,b,c,"));
408 assert_eq!(get_string_by_field(&re, text, &Both(2, 5)), Some("b,c,"));
409 assert_eq!(get_string_by_field(&re, text, &Both(3, 5)), Some("c,"));
410 assert_eq!(get_string_by_field(&re, text, &Both(4, 5)), Some(""));
411 assert_eq!(get_string_by_field(&re, text, &Both(5, 5)), None);
412 assert_eq!(get_string_by_field(&re, text, &Both(6, 5)), None);
413 assert_eq!(get_string_by_field(&re, text, &Both(2, 3)), Some("b,c"));
414 assert_eq!(get_string_by_field(&re, text, &Both(3, 3)), Some("c"));
415 assert_eq!(get_string_by_field(&re, text, &Both(4, 3)), None);
416 }
417}