1use bstr::ByteSlice;
8use regex::bytes::Regex;
9use std::{cmp::max, collections::VecDeque, str::FromStr};
10use thiserror::Error;
11
12const MAX: usize = usize::MAX;
14
15#[derive(Error, Debug, PartialEq)]
17pub enum FieldError {
18 #[error("Header not found: {0}")]
19 HeaderNotFound(String),
20 #[error("Fields and positions are numbered from 1: {0}")]
21 InvalidField(usize),
22 #[error("High end of range less than low end of range: {0}-{1}")]
23 InvalidOrder(usize, usize),
24 #[error("Failed to parse field: {0}")]
25 FailedParse(String),
26 #[error("No headers matched")]
27 NoHeadersMatched,
28}
29
30#[derive(Debug, Clone)]
31pub enum RegexOrString {
32 Regex(Regex),
33 String(String),
34}
35
36impl RegexOrString {
37 fn split<'a>(&'a self, line: &'a [u8]) -> Box<dyn Iterator<Item = &'a [u8]> + 'a> {
38 match self {
39 RegexOrString::Regex(r) => Box::new(r.split(line)),
40 RegexOrString::String(s) => Box::new(line.split_str(s)),
41 }
42 }
43}
44
45#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Copy, Clone)]
47pub struct FieldRange {
48 pub low: usize,
49 pub high: usize,
50 pub pos: usize,
52}
53
54impl FromStr for FieldRange {
55 type Err = FieldError;
56
57 fn from_str(s: &str) -> Result<FieldRange, FieldError> {
59 let mut parts = s.splitn(2, '-');
60
61 match (parts.next(), parts.next()) {
62 (Some(nm), None) => {
63 if let Ok(nm) = nm.parse::<usize>() {
64 if nm > 0 {
65 Ok(FieldRange {
66 low: nm - 1,
67 high: nm - 1,
68 pos: 0,
69 })
70 } else {
71 Err(FieldError::InvalidField(nm))
72 }
73 } else {
74 Err(FieldError::FailedParse(nm.to_owned()))
75 }
76 }
77 (Some(n), Some("")) => {
78 if let Ok(low) = n.parse::<usize>() {
79 if low > 0 {
80 Ok(FieldRange {
81 low: low - 1,
82 high: MAX - 1,
83 pos: 0,
84 })
85 } else {
86 Err(FieldError::InvalidField(low))
87 }
88 } else {
89 Err(FieldError::FailedParse(n.to_owned()))
90 }
91 }
92 (Some(""), Some(m)) => {
93 if let Ok(high) = m.parse::<usize>() {
94 if high > 0 {
95 Ok(FieldRange {
96 low: 0,
97 high: high - 1,
98 pos: 0,
99 })
100 } else {
101 Err(FieldError::InvalidField(high))
102 }
103 } else {
104 Err(FieldError::FailedParse(m.to_owned()))
105 }
106 }
107 (Some(n), Some(m)) => match (n.parse::<usize>(), m.parse::<usize>()) {
108 (Ok(low), Ok(high)) => {
109 if low > 0 && low <= high {
110 Ok(FieldRange {
111 low: low - 1,
112 high: high - 1,
113 pos: 0,
114 })
115 } else if low == 0 {
116 Err(FieldError::InvalidField(low))
117 } else {
118 Err(FieldError::InvalidOrder(low, high))
119 }
120 }
121 _ => Err(FieldError::FailedParse(format!("{}-{}", n, m))),
122 },
123 _ => unreachable!(),
124 }
125 }
126}
127
128impl FieldRange {
129 pub const fn default() -> Self {
130 Self {
131 low: 0,
132 high: MAX - 1,
133 pos: 0,
134 }
135 }
136
137 pub fn from_list(list: &str) -> Result<Vec<FieldRange>, FieldError> {
139 let mut ranges: Vec<FieldRange> = vec![];
140 for (i, item) in list.split(',').enumerate() {
141 let mut rnge: FieldRange = FromStr::from_str(item)?;
142 rnge.pos = i;
143 ranges.push(rnge);
144 }
145 FieldRange::post_process_ranges(&mut ranges);
146
147 Ok(ranges)
148 }
149
150 pub fn from_header_list(
152 list: &[Regex],
153 header: &[u8],
154 delim: &RegexOrString,
155 header_is_regex: bool,
156 allow_missing: bool,
157 ) -> Result<Vec<FieldRange>, FieldError> {
158 let mut ranges = vec![];
159 let mut found = vec![false; list.len()];
160 for (i, header) in delim.split(header).enumerate() {
161 for (j, regex) in list.iter().enumerate() {
162 if !header_is_regex {
163 if regex.as_str().as_bytes() == header {
164 found[j] = true;
165 ranges.push(FieldRange {
166 low: i,
167 high: i,
168 pos: j,
169 });
170 }
171 } else if regex.is_match(header) {
172 found[j] = true;
173 ranges.push(FieldRange {
174 low: i,
175 high: i,
176 pos: j,
177 });
178 }
179 }
180 }
181
182 if !allow_missing {
183 if ranges.is_empty() {
184 return Err(FieldError::NoHeadersMatched);
185 }
186 for (i, was_found) in found.into_iter().enumerate() {
187 if !was_found {
188 return Err(FieldError::HeaderNotFound(list[i].as_str().to_owned()));
189 }
190 }
191 }
192
193 FieldRange::post_process_ranges(&mut ranges);
194
195 Ok(ranges)
196 }
197
198 pub fn post_process_ranges(ranges: &mut Vec<FieldRange>) {
205 ranges.sort();
206
207 let mut shifted = 0;
209 for i in 0..ranges.len() {
210 let j = i + 1;
211 if let Some(rng) = ranges.get_mut(i) {
212 rng.pos = rng.pos.saturating_sub(shifted);
213 }
214
215 while j < ranges.len()
216 && ranges[j].low <= ranges[i].high + 1
217 && ranges[j].pos.saturating_sub(1) == ranges[i].pos
218 {
219 let j_high = ranges.remove(j).high;
220 ranges[i].high = max(ranges[i].high, j_high);
221 shifted += 1;
222 }
223 }
224 }
225
226 pub fn contains(&self, value: usize) -> bool {
228 value >= self.low && value <= self.high
229 }
230
231 pub fn overlap(&self, other: &Self) -> bool {
233 self.low <= other.high && self.high >= other.low
234 }
235
236 pub fn exclude(fields: Vec<FieldRange>, exclude: Vec<FieldRange>) -> Vec<FieldRange> {
240 let mut fields: VecDeque<_> = fields.into_iter().collect();
241 let mut result = vec![];
242 let mut exclude_iter = exclude.into_iter();
243 let mut exclusion = if let Some(ex) = exclude_iter.next() {
244 ex
245 } else {
246 return fields.into_iter().collect();
248 };
249 let mut field = fields.pop_front().unwrap(); loop {
251 if exclusion.overlap(&field) {
253 match (
255 exclusion.contains(field.low),
256 exclusion.contains(field.high),
257 ) {
258 (false, true) => {
261 if exclusion.low != 0 {
262 field.high = exclusion.low - 1;
263 }
264 }
265
266 (true, false) => {
269 if exclusion.high != MAX - 1 {
270 field.low = exclusion.high + 1;
271 }
272 }
273 (true, true) => {
276 if let Some(f) = fields.pop_front() {
278 field = f;
279 } else {
280 break;
281 }
282 }
283
284 (false, false) => {
287 if exclusion.high != MAX - 1 {
290 let mut high_field = field;
291 high_field.low = exclusion.high + 1;
292 fields.push_front(high_field)
293 }
294
295 if exclusion.low != 0 {
297 field.high = exclusion.low - 1;
298 }
299 }
300 }
301 } else if field.low > exclusion.high {
302 if let Some(ex) = exclude_iter.next() {
304 exclusion = ex;
305 } else {
306 result.push(field);
307 result.extend(fields);
308 break;
309 }
310 } else if field.high < exclusion.low {
311 result.push(field);
313 if let Some(f) = fields.pop_front() {
314 field = f;
315 } else {
316 break;
317 }
318 } else {
319 unreachable!()
320 }
321 }
322 result
323 }
324}
325
326#[cfg(test)]
327mod test {
328 use super::*;
329
330 #[test]
331 #[rustfmt::skip::macros(assert_eq)]
332 fn test_parse_fields_good() {
333 assert_eq!(vec![FieldRange { low: 0, high: 0, pos: 0}], FieldRange::from_list("1").unwrap());
334 assert_eq!(vec![FieldRange { low: 0, high: 0, pos: 0}, FieldRange { low: 3, high: 3, pos: 1}], FieldRange::from_list("1,4").unwrap());
335 assert_eq!(vec![FieldRange { low: 0, high: 1, pos: 0}, FieldRange { low: 3, high: usize::MAX - 1, pos: 1}], FieldRange::from_list("1,2,4-").unwrap());
336 assert_eq!(vec![FieldRange { low: 1, high: 2, pos: 0}, FieldRange { low: 3, high: usize::MAX - 1, pos: 1} ], FieldRange::from_list("2,3,4-").unwrap());
337 assert_eq!(vec![FieldRange { low: 0, high: 0, pos: 0}, FieldRange { low: 3, high: usize::MAX - 1, pos: 1}], FieldRange::from_list("1,4-,5-8").unwrap());
338 assert_eq!(vec![FieldRange { low: 0, high: 0, pos: 1}, FieldRange { low: 3, high: usize::MAX - 1, pos: 0}, FieldRange { low: 4, high: 7, pos: 2}], FieldRange::from_list("4-,1,5-8").unwrap());
339 assert_eq!(vec![FieldRange { low: 0, high: 3, pos: 0}], FieldRange::from_list("-4").unwrap());
340 assert_eq!(vec![FieldRange { low: 0, high: 7, pos: 0}], FieldRange::from_list("-4,5-8").unwrap());
341 assert_eq!(vec![FieldRange { low: 0, high: 0, pos: 1 }, FieldRange { low: 2, high: 2, pos: 0}, FieldRange { low: 2, high: 2, pos: 2}], FieldRange::from_list("3,1,3").unwrap());
342 assert_eq!(vec![FieldRange { low: 0, high: 1, pos: 0 }, FieldRange { low: 2, high: 2, pos: 1}, FieldRange { low: 3, high: 3, pos: 2}], FieldRange::from_list("1,2,3,4").unwrap());
344 assert_eq!(vec![FieldRange { low: 0, high: 1, pos: 0 }, FieldRange { low: 2, high: 2, pos: 2}, FieldRange { low: 3, high: 3, pos: 1}], FieldRange::from_list("1,2,4,3").unwrap());
345 }
346
347 #[test]
348 fn test_parse_fields_bad() {
349 assert!(FieldRange::from_list("0").is_err());
350 assert!(FieldRange::from_list("4-1").is_err());
351 assert!(FieldRange::from_list("cat").is_err());
352 assert!(FieldRange::from_list("1-dog").is_err());
353 assert!(FieldRange::from_list("mouse-4").is_err());
354 }
355
356 #[test]
357 fn test_parse_header_fields() {
358 let header = b"is_cat-isdog-wascow-was_is_apple-12345-!$%*(_)";
359 let delim = Regex::new("-").unwrap();
360 let delim = RegexOrString::Regex(delim);
361 let header_fields = vec![
362 Regex::new(r"^is_.*$").unwrap(),
363 Regex::new("dog").unwrap(),
364 Regex::new(r"\$%").unwrap(),
365 ];
366 let fields =
367 FieldRange::from_header_list(&header_fields, header, &delim, true, false).unwrap();
368 assert_eq!(
369 vec![
370 FieldRange {
371 low: 0,
372 high: 1,
373 pos: 0
374 },
375 FieldRange {
376 low: 5,
377 high: 5,
378 pos: 1
379 }
380 ],
381 fields
382 );
383 }
384
385 #[test]
386 fn test_parse_header_fields_literal() {
387 let header = b"is_cat-is-isdog-wascow-was_is_apple-12345-!$%*(_)";
388 let delim = Regex::new("-").unwrap();
389 let delim = RegexOrString::Regex(delim);
390 let header_fields = vec![Regex::new(r"is").unwrap()];
391 let fields =
392 FieldRange::from_header_list(&header_fields, header, &delim, false, false).unwrap();
393 assert_eq!(
394 vec![FieldRange {
395 low: 1,
396 high: 1,
397 pos: 0
398 },],
399 fields
400 );
401 }
402
403 #[test]
404 fn test_parse_header_fields_literal_header_not_found() {
405 let header = b"is_cat-is-isdog-wascow-was_is_apple-12345-!$%*(_)";
406 let delim = Regex::new("-").unwrap();
407 let delim = RegexOrString::Regex(delim);
408 let header_fields = vec![
409 Regex::new(r"^is_.*$").unwrap(),
410 Regex::new("dog").unwrap(),
411 Regex::new(r"\$%").unwrap(),
412 Regex::new(r"is").unwrap(),
413 ];
414 let result = FieldRange::from_header_list(&header_fields, header, &delim, false, false);
415 assert_eq!(
416 result.unwrap_err(),
417 FieldError::HeaderNotFound(String::from(r"^is_.*$"))
418 );
419 }
420
421 #[test]
422 #[rustfmt::skip::macros(assert_eq)]
423 fn test_exclude_simple() {
424 assert_eq!(
425 vec![
426 FieldRange { low: 1, high: MAX - 1, pos: 0}
427 ],
428 FieldRange::exclude(
429 vec![FieldRange { low: 0, high: MAX - 1, pos: 0}],
430 vec![FieldRange { low: 0, high: 0, pos: 0}]
431 ),
432 "1"
433 );
434 assert_eq!(
435 vec![
436 FieldRange { low: 1, high: 2, pos: 0},
437 FieldRange { low: 4, high: MAX - 1, pos: 0},
438 ],
439 FieldRange::exclude(
440 vec![FieldRange { low: 0, high: MAX - 1, pos: 0}],
441 vec![
442 FieldRange { low: 0, high: 0, pos: 0},
443 FieldRange { low: 3, high: 3, pos: 0}
444 ]
445 ),
446 "1,4"
447 );
448 assert_eq!(
449 vec![
450 FieldRange { low: 2, high: 2, pos: 0},
451 ],
452 FieldRange::exclude(
453 vec![FieldRange { low: 0, high: MAX - 1, pos: 0}],
454 vec![
455 FieldRange { low: 0, high: 1, pos: 0},
456 FieldRange { low: 3, high: usize::MAX - 1, pos: 1}
457 ]
458 ),
459 "1,2,4-"
460 );
461 assert_eq!(
462 vec![
463 FieldRange { low: 0, high: 0, pos: 0},
464 ],
465 FieldRange::exclude(
466 vec![FieldRange { low: 0, high: MAX - 1, pos: 0}],
467 vec![
468 FieldRange { low: 1, high: 2, pos: 0},
469 FieldRange { low: 3, high: MAX - 1, pos: 1}
470 ]
471 ),
472 "2,3,4-"
473 );
474 assert_eq!(
475 vec![
476 FieldRange { low: 1, high: 2, pos: 0},
477 ],
478 FieldRange::exclude(
479 vec![FieldRange { low: 0, high: MAX - 1, pos: 0}],
480 vec![
481 FieldRange { low: 0, high: 0, pos: 0},
482 FieldRange { low: 3, high: MAX - 1, pos: 1}
483 ]
484 ),
485 "1,4-,5-8"
486 );
487 assert_eq!(
488 vec![
489 FieldRange { low: 1, high: 2, pos: 0},
490 ],
491 FieldRange::exclude(
492 vec![FieldRange { low: 0, high: MAX - 1, pos: 0}],
493 vec![
494 FieldRange { low: 0, high: 0, pos: 1},
495 FieldRange { low: 3, high: MAX - 1, pos: 0},
496 FieldRange { low: 4, high: 7, pos: 2}
497 ]
498 ),
499 "4-,1,5-8"
500 );
501 assert_eq!(
502 vec![
503 FieldRange { low: 4, high: MAX - 1, pos: 0},
504 ],
505 FieldRange::exclude(
506 vec![FieldRange { low: 0, high: MAX - 1, pos: 0}],
507 vec![
508 FieldRange { low: 0, high: 3, pos: 0}
509 ]
510 ),
511 "-4"
512 );
513 assert_eq!(
514 vec![
515 FieldRange { low: 8, high: MAX - 1, pos: 0},
516 ],
517 FieldRange::exclude(
518 vec![FieldRange { low: 0, high: MAX - 1, pos: 0}],
519 vec![
520 FieldRange { low: 0, high: 7, pos: 0}
521 ]
522 ),
523 "-4,5-8"
524 );
525 }
526 #[test]
527 #[rustfmt::skip::macros(assert_eq)]
528 fn test_exclude_complex() {
529 assert_eq!(
530 vec![
531 FieldRange { low: 1, high: 3, pos: 0},
532 FieldRange { low: 7, high: 14, pos: 1},
533 ],
534 FieldRange::exclude(
535 vec![FieldRange { low: 0, high: 3, pos: 0}, FieldRange { low: 7, high: MAX - 1, pos: 1}],
536 vec![FieldRange { low: 0, high: 0, pos: 0}, FieldRange { low: 15, high: MAX - 1, pos: 0}]
537 ),
538 "-f1-4,8- : -e1,16-"
539 );
540 let empty: Vec<FieldRange> = vec![];
541 assert_eq!(
542 empty,
543 FieldRange::exclude(
544 vec![FieldRange { low: 0, high: MAX-1, pos: 0}],
545 vec![FieldRange { low: 0, high: MAX-1, pos: 0}]
546 ),
547 "-f1- : -e1-"
548 );
549 assert_eq!(
550 vec![
551 FieldRange { low: 0, high: 0, pos: 0},
552 FieldRange { low: 9, high: 9, pos: 3},
553 ],
554 FieldRange::exclude(
555 vec![FieldRange { low: 0, high: 0, pos: 0}, FieldRange { low: 3, high: 3, pos: 1 }, FieldRange { low: 7, high: 7, pos: 2}, FieldRange { low: 9, high: 9, pos: 3}],
556 vec![FieldRange { low: 3, high: 7, pos: 0}]
557 ),
558 "-f1,4,8,10 : -e4-8"
559 );
560 assert_eq!(
563 vec![
564 FieldRange { low: 0, high: 3, pos: 0},
565 ],
566 FieldRange::exclude(
567 vec![FieldRange { low: 0, high: 9, pos: 0}],
568 vec![FieldRange { low: 4, high: MAX - 1, pos: 0}]
569 ),
570 "-f1-10 : -e5-"
571 );
572 assert_eq!(
575 vec![
576 FieldRange { low: 15, high: 19, pos: 0},
577 ],
578 FieldRange::exclude(
579 vec![FieldRange { low: 9, high: 19, pos: 0}],
580 vec![FieldRange { low: 4, high: 14, pos: 0}]
581 ),
582 "-f10-20 : -e5-15"
583 );
584 assert_eq!(
587 vec![
588 FieldRange { low: 9, high: 11, pos: 0},
589 FieldRange { low: 16, high: 19, pos: 0},
590 ],
591 FieldRange::exclude(
592 vec![FieldRange { low: 9, high: 19, pos: 0}],
593 vec![FieldRange { low: 12, high: 15, pos: 0}]
594 ),
595 "-f10-20 : -e13-16"
596 );
597 assert_eq!(
600 empty,
601 FieldRange::exclude(
602 vec![FieldRange { low: 12, high: 15, pos: 0}],
603 vec![FieldRange { low: 9, high: 19, pos: 0}]
604 ),
605 "-f13-16 : -e10-20"
606 );
607 assert_eq!(
610 vec![FieldRange { low: 4, high: 8, pos: 0 }, FieldRange { low: 25, high: 29, pos: 1}],
611 FieldRange::exclude(
612 vec![FieldRange { low: 4, high: 15, pos: 0}, FieldRange { low: 19, high: 29, pos: 1}],
613 vec![FieldRange { low: 9, high: 24, pos: 0}]
614 ),
615 "-f5-16,20-30 : -e10-25"
616 );
617 }
618}