1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
use crate::field_range::FieldRange;
use bstr::ByteSlice;
use regex::bytes::Regex;
use std::cmp::max;

/// Methods for parsing a line into a reordered `shuffler`
pub trait LineParser<'a> {
    /// Fills the shuffler with values parsed from the line.
    fn parse_line<'b>(&self, line: &'b [u8], shuffler: &mut Vec<Vec<&'b [u8]>>)
    where
        'a: 'b;
}

/// A line parser that works on fixed substrings
pub struct SubStrLineParser<'a> {
    field_ranges: &'a [FieldRange],
    delimiter: &'a [u8],
}

impl<'a> SubStrLineParser<'a> {
    pub fn new(field_ranges: &'a [FieldRange], delimiter: &'a [u8]) -> Self {
        Self {
            field_ranges,
            delimiter,
        }
    }
}
impl<'a> LineParser<'a> for SubStrLineParser<'a> {
    #[inline]
    fn parse_line<'b>(&self, line: &'b [u8], shuffler: &mut Vec<Vec<&'b [u8]>>)
    where
        'a: 'b,
    {
        let mut parts = line.split_str(self.delimiter).peekable();
        let mut iterator_index = 0;

        // Iterate over our ranges and write any fields that are contained by them.
        for &FieldRange { low, high, pos } in self.field_ranges {
            // Advance up to low end of range
            if low > iterator_index {
                match parts.nth(low - iterator_index - 1) {
                    Some(_part) => {
                        iterator_index = low;
                    }
                    None => break,
                }
            }

            // Advance through the range
            for _ in max(low, iterator_index)..=high {
                match parts.next() {
                    Some(part) => {
                        // Guaranteed to be in range since shuffler is created based on field pos anyways
                        if let Some(reshuffled_range) = shuffler.get_mut(pos) {
                            reshuffled_range.push(part)
                        }
                    }
                    None => break,
                }
                iterator_index += 1;
            }
        }
    }
}

/// A line parser that works on fixed substrings
pub struct RegexLineParser<'a> {
    field_ranges: &'a [FieldRange],
    delimiter: &'a Regex,
}

impl<'a> RegexLineParser<'a> {
    pub fn new(field_ranges: &'a [FieldRange], delimiter: &'a Regex) -> Self {
        Self {
            field_ranges,
            delimiter,
        }
    }
}
impl<'a> LineParser<'a> for RegexLineParser<'a> {
    #[inline]
    fn parse_line<'b>(&self, line: &'b [u8], shuffler: &mut Vec<Vec<&'b [u8]>>)
    where
        'a: 'b,
    {
        let mut parts = self.delimiter.split(line).peekable();
        let mut iterator_index = 0;

        // Iterate over our ranges and write any fields that are contained by them.
        for &FieldRange { low, high, pos } in self.field_ranges {
            // Advance up to low end of range
            if low > iterator_index {
                match parts.nth(low - iterator_index - 1) {
                    Some(_part) => {
                        iterator_index = low;
                    }
                    None => break,
                }
            }

            // Advance through the range
            for _ in max(low, iterator_index)..=high {
                match parts.next() {
                    Some(part) => {
                        // Guaranteed to be in range since shuffler is created based on field pos anyways
                        if let Some(reshuffled_range) = shuffler.get_mut(pos) {
                            reshuffled_range.push(part)
                        } else {
                            unreachable!()
                        }
                    }
                    None => break,
                }
                iterator_index += 1;
            }
        }
    }
}