1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
use crate::field_range::FieldRange;
use bstr::ByteSlice;
use regex::bytes::Regex;
use std::cmp::max;
pub trait LineParser<'a> {
fn parse_line<'b>(&self, line: &'b [u8], shuffler: &mut Vec<Vec<&'b [u8]>>)
where
'a: 'b;
}
pub struct SubStrLineParser<'a> {
field_ranges: &'a [FieldRange],
delimiter: &'a [u8],
}
impl<'a> SubStrLineParser<'a> {
pub fn new(field_ranges: &'a [FieldRange], delimiter: &'a [u8]) -> Self {
Self {
field_ranges,
delimiter,
}
}
}
impl<'a> LineParser<'a> for SubStrLineParser<'a> {
#[inline]
fn parse_line<'b>(&self, line: &'b [u8], shuffler: &mut Vec<Vec<&'b [u8]>>)
where
'a: 'b,
{
let mut parts = line.split_str(self.delimiter).peekable();
let mut iterator_index = 0;
for &FieldRange { low, high, pos } in self.field_ranges {
if low > iterator_index {
match parts.nth(low - iterator_index - 1) {
Some(_part) => {
iterator_index = low;
}
None => break,
}
}
for _ in max(low, iterator_index)..=high {
match parts.next() {
Some(part) => {
if let Some(reshuffled_range) = shuffler.get_mut(pos) {
reshuffled_range.push(part)
}
}
None => break,
}
iterator_index += 1;
}
}
}
}
pub struct RegexLineParser<'a> {
field_ranges: &'a [FieldRange],
delimiter: &'a Regex,
}
impl<'a> RegexLineParser<'a> {
pub fn new(field_ranges: &'a [FieldRange], delimiter: &'a Regex) -> Self {
Self {
field_ranges,
delimiter,
}
}
}
impl<'a> LineParser<'a> for RegexLineParser<'a> {
#[inline]
fn parse_line<'b>(&self, line: &'b [u8], shuffler: &mut Vec<Vec<&'b [u8]>>)
where
'a: 'b,
{
let mut parts = self.delimiter.split(line).peekable();
let mut iterator_index = 0;
for &FieldRange { low, high, pos } in self.field_ranges {
if low > iterator_index {
match parts.nth(low - iterator_index - 1) {
Some(_part) => {
iterator_index = low;
}
None => break,
}
}
for _ in max(low, iterator_index)..=high {
match parts.next() {
Some(part) => {
if let Some(reshuffled_range) = shuffler.get_mut(pos) {
reshuffled_range.push(part)
} else {
unreachable!()
}
}
None => break,
}
iterator_index += 1;
}
}
}
}