1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
use crate::field_range::FieldRange;
use bstr::ByteSlice;
use regex::bytes::Regex;
use std::cmp::max;
/// Methods for parsing a line into a reordered `shuffler`
pub trait LineParser<'a> {
/// Fills the shuffler with values parsed from the line.
fn parse_line<'b>(&self, line: &'b [u8], shuffler: &mut Vec<Vec<&'b [u8]>>)
where
'a: 'b;
}
/// A line parser that works on fixed substrings
pub struct SubStrLineParser<'a> {
field_ranges: &'a [FieldRange],
delimiter: &'a [u8],
}
impl<'a> SubStrLineParser<'a> {
pub fn new(field_ranges: &'a [FieldRange], delimiter: &'a [u8]) -> Self {
Self {
field_ranges,
delimiter,
}
}
}
impl<'a> LineParser<'a> for SubStrLineParser<'a> {
#[inline]
fn parse_line<'b>(&self, line: &'b [u8], shuffler: &mut Vec<Vec<&'b [u8]>>)
where
'a: 'b,
{
let mut parts = line.split_str(self.delimiter).peekable();
let mut iterator_index = 0;
// Iterate over our ranges and write any fields that are contained by them.
for &FieldRange { low, high, pos } in self.field_ranges {
// Advance up to low end of range
if low > iterator_index {
match parts.nth(low - iterator_index - 1) {
Some(_part) => {
iterator_index = low;
}
None => break,
}
}
// Advance through the range
for _ in max(low, iterator_index)..=high {
match parts.next() {
Some(part) => {
// Guaranteed to be in range since shuffler is created based on field pos anyways
if let Some(reshuffled_range) = shuffler.get_mut(pos) {
reshuffled_range.push(part)
}
}
None => break,
}
iterator_index += 1;
}
}
}
}
/// A line parser that works on fixed substrings
pub struct RegexLineParser<'a> {
field_ranges: &'a [FieldRange],
delimiter: &'a Regex,
}
impl<'a> RegexLineParser<'a> {
pub fn new(field_ranges: &'a [FieldRange], delimiter: &'a Regex) -> Self {
Self {
field_ranges,
delimiter,
}
}
}
impl<'a> LineParser<'a> for RegexLineParser<'a> {
#[inline]
fn parse_line<'b>(&self, line: &'b [u8], shuffler: &mut Vec<Vec<&'b [u8]>>)
where
'a: 'b,
{
let mut parts = self.delimiter.split(line).peekable();
let mut iterator_index = 0;
// Iterate over our ranges and write any fields that are contained by them.
for &FieldRange { low, high, pos } in self.field_ranges {
// Advance up to low end of range
if low > iterator_index {
match parts.nth(low - iterator_index - 1) {
Some(_part) => {
iterator_index = low;
}
None => break,
}
}
// Advance through the range
for _ in max(low, iterator_index)..=high {
match parts.next() {
Some(part) => {
// Guaranteed to be in range since shuffler is created based on field pos anyways
if let Some(reshuffled_range) = shuffler.get_mut(pos) {
reshuffled_range.push(part)
} else {
unreachable!()
}
}
None => break,
}
iterator_index += 1;
}
}
}
}