use alloc::vec;
use alloc::vec::Vec;
use memchr::memchr;
use crate::parsers::FromSlice;
use crate::record::StateMetadata;
use crate::EtError;
use crate::{impl_reader, impl_record};
#[derive(Clone, Debug, Default)]
pub struct FastqRecord<'r> {
pub id: &'r str,
pub sequence: &'r [u8],
pub quality: &'r [u8],
}
impl_record!(FastqRecord<'r>: id, sequence, quality);
#[derive(Clone, Copy, Debug, Default)]
pub struct FastqState {
header_end: usize,
seq: (usize, usize),
qual: (usize, usize),
}
impl StateMetadata for FastqState {
fn header(&self) -> Vec<&str> {
vec!["id", "sequence", "quality"]
}
}
impl<'b: 's, 's> FromSlice<'b, 's> for FastqState {
type State = ();
}
impl<'b: 's, 's> FromSlice<'b, 's> for FastqRecord<'s> {
type State = FastqState;
fn parse(
buf: &[u8],
eof: bool,
consumed: &mut usize,
state: &mut Self::State,
) -> Result<bool, EtError> {
if buf.is_empty() {
if eof {
return Ok(false);
}
return Err(EtError::new("No FASTQ could be parsed").incomplete());
}
if buf[0] != b'@' {
return Err("Valid FASTQ records start with '@'".into());
}
let seq_start = if let Some(p) = memchr(b'\n', buf) {
if p > 0 && buf[p - 1] == b'\r' {
state.header_end = p - 1;
} else {
state.header_end = p;
}
p + 1
} else {
return Err(EtError::new("Record ended prematurely in header").incomplete());
};
let id2_start = if let Some(p) = memchr(b'+', &buf[seq_start..]) {
if p == 0 || buf[seq_start + p - 1] != b'\n' {
return Err("Unexpected + found in sequence".into());
}
if seq_start + p > 2 && buf[seq_start + p - 2] == b'\r' {
state.seq = (seq_start, seq_start + p - 2);
} else {
state.seq = (seq_start, seq_start + p - 1);
}
seq_start + p
} else {
return Err(EtError::new("Record ended prematurely in sequence").incomplete());
};
let qual_start = if let Some(p) = memchr(b'\n', &buf[id2_start..]) {
id2_start + p + 1
} else {
return Err(EtError::new("Record ended prematurely in second header").incomplete());
};
let qual_end = qual_start + (state.seq.1 - state.seq.0);
let mut rec_end = qual_end + (id2_start - state.seq.1);
if rec_end > buf.len() && eof {
rec_end -= id2_start - state.seq.1;
}
if rec_end > buf.len() {
return Err(EtError::new("Record ended prematurely in quality").incomplete());
}
state.qual = (qual_start, qual_end);
*consumed += rec_end;
Ok(true)
}
fn get(&mut self, buf: &'b [u8], state: &'s Self::State) -> Result<(), EtError> {
self.id = alloc::str::from_utf8(&buf[1..state.header_end])?;
self.sequence = &buf[state.seq.0..state.seq.1];
self.quality = &buf[state.qual.0..state.qual.1];
Ok(())
}
}
impl_reader!(FastqReader, FastqRecord, FastqRecord<'r>, FastqState, ());
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fastq_reading() -> Result<(), EtError> {
const TEST_FASTQ: &[u8] = b"@id\nACGT\n+\n!!!!\n@id2\nTGCA\n+\n!!!!";
let mut pt = FastqReader::new(TEST_FASTQ, None)?;
if let Some(FastqRecord {
id,
sequence,
quality,
}) = pt.next()?
{
assert_eq!(id, "id");
assert_eq!(sequence, &b"ACGT"[..]);
assert_eq!(quality, &b"!!!!"[..]);
} else {
panic!("FASTQ reader returned non-FASTQ reader");
}
if let Some(FastqRecord {
id,
sequence,
quality,
}) = pt.next()?
{
assert_eq!(id, "id2");
assert_eq!(sequence, &b"TGCA"[..]);
assert_eq!(quality, &b"!!!!"[..]);
} else {
panic!("FASTQ reader returned non-FASTQ reader");
}
assert!(pt.next()?.is_none());
Ok(())
}
#[test]
fn test_fastq_extra_newlines() -> Result<(), EtError> {
const TEST_FASTQ: &[u8] = b"@id\r\nACGT\r\n+\r\n!!!!\r\n@id2\r\nTGCA\r\n+\r\n!!!!\r\n";
let mut pt = FastqReader::new(TEST_FASTQ, None)?;
if let Some(FastqRecord {
id,
sequence,
quality,
}) = pt.next()?
{
assert_eq!(id, "id");
assert_eq!(sequence, &b"ACGT"[..]);
assert_eq!(quality, &b"!!!!"[..]);
} else {
panic!("FASTQ reader returned non-FASTQ reader");
}
if let Some(FastqRecord {
id,
sequence,
quality,
}) = pt.next()?
{
assert_eq!(id, "id2");
assert_eq!(sequence, &b"TGCA"[..]);
assert_eq!(quality, &b"!!!!"[..]);
} else {
panic!("FASTQ reader returned non-FASTQ reader");
}
assert!(pt.next()?.is_none());
Ok(())
}
#[test]
fn test_fastq_pathological_sequences() -> Result<(), EtError> {
const TEST_FASTQ_1: &[u8] = b"@DF\n+\n+\n!";
let mut pt = FastqReader::new(TEST_FASTQ_1, None)?;
assert!(pt.next().is_err());
const TEST_FASTQ_2: &[u8] = b"@\n";
let mut pt = FastqReader::new(TEST_FASTQ_2, None)?;
assert!(pt.next().is_err());
Ok(())
}
#[test]
fn test_fastq_from_file() -> Result<(), EtError> {
let data: &[u8] = include_bytes!("../../tests/data/test.fastq");
let mut reader = FastqReader::new(data, None)?;
while reader.next()?.is_some() {}
Ok(())
}
}