use crate::deflate::tokens::LZ77Token;
pub trait BlockSplitter {
fn process_token(&mut self, token: &LZ77Token);
fn is_good_split_point(&self) -> bool;
fn bytes_since_last_good_split(&self) -> usize;
fn reset(&mut self);
}
#[derive(Default)]
pub struct DefaultSplitter;
impl BlockSplitter for DefaultSplitter {
fn process_token(&mut self, _token: &LZ77Token) {}
fn is_good_split_point(&self) -> bool {
true }
fn bytes_since_last_good_split(&self) -> usize {
0
}
fn reset(&mut self) {}
}
pub struct FastqSplitter {
newline_count: u8,
bytes_since_record_end: usize,
at_record_boundary: bool,
}
impl FastqSplitter {
pub fn new() -> Self {
Self {
newline_count: 0,
bytes_since_record_end: 0,
at_record_boundary: true, }
}
}
impl Default for FastqSplitter {
fn default() -> Self {
Self::new()
}
}
impl BlockSplitter for FastqSplitter {
fn process_token(&mut self, token: &LZ77Token) {
match token {
LZ77Token::Literal(byte) => {
self.bytes_since_record_end += 1;
if *byte == b'\n' {
self.newline_count = (self.newline_count + 1) % 4;
if self.newline_count == 0 {
self.at_record_boundary = true;
self.bytes_since_record_end = 0;
} else {
self.at_record_boundary = false;
}
} else {
self.at_record_boundary = false;
}
}
LZ77Token::Copy { length, .. } => {
self.bytes_since_record_end += *length as usize;
self.at_record_boundary = false;
}
LZ77Token::EndOfBlock => {}
}
}
fn is_good_split_point(&self) -> bool {
self.at_record_boundary
}
fn bytes_since_last_good_split(&self) -> usize {
self.bytes_since_record_end
}
fn reset(&mut self) {
self.bytes_since_record_end = 0;
}
}
pub struct FastqByteSplitter {
newline_count: u8,
bytes_since_record_end: usize,
at_record_boundary: bool,
}
impl FastqByteSplitter {
pub fn new() -> Self {
Self { newline_count: 0, bytes_since_record_end: 0, at_record_boundary: true }
}
pub fn process_bytes(&mut self, bytes: &[u8]) {
for &byte in bytes {
self.bytes_since_record_end += 1;
if byte == b'\n' {
self.newline_count = (self.newline_count + 1) % 4;
if self.newline_count == 0 {
self.at_record_boundary = true;
self.bytes_since_record_end = 0;
} else {
self.at_record_boundary = false;
}
} else {
self.at_record_boundary = false;
}
}
}
pub fn is_good_split_point(&self) -> bool {
self.at_record_boundary
}
pub fn bytes_since_last_good_split(&self) -> usize {
self.bytes_since_record_end
}
}
impl Default for FastqByteSplitter {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_splitter() {
let splitter = DefaultSplitter;
assert!(splitter.is_good_split_point());
assert_eq!(splitter.bytes_since_last_good_split(), 0);
}
#[test]
fn test_fastq_splitter_record_boundary() {
let mut splitter = FastqSplitter::new();
for &b in b"@header" {
splitter.process_token(&LZ77Token::Literal(b));
}
splitter.process_token(&LZ77Token::Literal(b'\n'));
assert!(!splitter.is_good_split_point());
for &b in b"ACGT" {
splitter.process_token(&LZ77Token::Literal(b));
}
splitter.process_token(&LZ77Token::Literal(b'\n'));
assert!(!splitter.is_good_split_point());
splitter.process_token(&LZ77Token::Literal(b'+'));
splitter.process_token(&LZ77Token::Literal(b'\n'));
assert!(!splitter.is_good_split_point());
for &b in b"IIII" {
splitter.process_token(&LZ77Token::Literal(b));
}
splitter.process_token(&LZ77Token::Literal(b'\n'));
assert!(splitter.is_good_split_point()); assert_eq!(splitter.bytes_since_last_good_split(), 0);
}
#[test]
fn test_fastq_byte_splitter() {
let mut splitter = FastqByteSplitter::new();
splitter.process_bytes(b"@header\nACGT\n+\nIIII\n");
assert!(splitter.is_good_split_point());
assert_eq!(splitter.bytes_since_last_good_split(), 0);
splitter.process_bytes(b"@next\nAA");
assert!(!splitter.is_good_split_point());
assert!(splitter.bytes_since_last_good_split() > 0);
}
}