use crate::{
block::header::{BlockHeader, ByteRange},
error::DryIceError,
};
use super::index::RecordIndexEntry;
const INDEX_ENTRY_SIZE: usize = 24;
type DecodeFn = fn(&[u8], usize, &mut Vec<u8>) -> Result<(), DryIceError>;
#[derive(Clone, Copy, Default)]
struct CodecIdentity {
name: bool,
sequence: bool,
quality: bool,
}
pub(crate) struct BlockDecoder {
header: BlockHeader,
index: Vec<RecordIndexEntry>,
name_bytes: Option<Vec<u8>>,
sequence_bytes: Vec<u8>,
quality_bytes: Option<Vec<u8>>,
record_key_bytes: Option<Vec<u8>>,
cursor: usize,
started: bool,
decoded_name_buf: Vec<u8>,
decoded_sequence_buf: Vec<u8>,
decoded_quality_buf: Vec<u8>,
identity: CodecIdentity,
}
fn section_len(range: Option<ByteRange>) -> Result<usize, DryIceError> {
let len = range.map_or(0, |r| r.len);
usize::try_from(len).map_err(|_| DryIceError::CorruptBlockLayout {
message: "section length exceeds usize range",
})
}
impl BlockDecoder {
pub fn from_header_and_reader<R: std::io::Read>(
header: BlockHeader,
reader: &mut R,
) -> Result<Self, DryIceError> {
let record_count = header.record_count as usize;
let index_byte_len = record_count * INDEX_ENTRY_SIZE;
let mut index_buf = vec![0u8; index_byte_len];
reader.read_exact(&mut index_buf)?;
let mut index = Vec::with_capacity(record_count);
for i in 0..record_count {
let base = i * INDEX_ENTRY_SIZE;
let b = &index_buf[base..base + INDEX_ENTRY_SIZE];
index.push(RecordIndexEntry {
name_offset: u32::from_le_bytes([b[0], b[1], b[2], b[3]]),
name_len: u32::from_le_bytes([b[4], b[5], b[6], b[7]]),
sequence_offset: u32::from_le_bytes([b[8], b[9], b[10], b[11]]),
sequence_len: u32::from_le_bytes([b[12], b[13], b[14], b[15]]),
quality_offset: u32::from_le_bytes([b[16], b[17], b[18], b[19]]),
quality_len: u32::from_le_bytes([b[20], b[21], b[22], b[23]]),
});
}
let name_bytes = if header.names.is_some() {
let len = section_len(header.names)?;
let mut buf = vec![0u8; len];
reader.read_exact(&mut buf)?;
Some(buf)
} else {
None
};
let seq_len =
usize::try_from(header.sequences.len).map_err(|_| DryIceError::CorruptBlockLayout {
message: "sequence section length exceeds usize range",
})?;
let mut sequence_bytes = vec![0u8; seq_len];
reader.read_exact(&mut sequence_bytes)?;
let quality_bytes = if header.qualities.is_some() {
let len = section_len(header.qualities)?;
let mut buf = vec![0u8; len];
reader.read_exact(&mut buf)?;
Some(buf)
} else {
None
};
let record_key_bytes = if header.record_keys.is_some() {
let len = section_len(header.record_keys)?;
let mut buf = vec![0u8; len];
reader.read_exact(&mut buf)?;
Some(buf)
} else {
None
};
Ok(Self {
header,
index,
name_bytes,
sequence_bytes,
quality_bytes,
record_key_bytes,
cursor: 0,
started: false,
decoded_name_buf: Vec::new(),
decoded_sequence_buf: Vec::new(),
decoded_quality_buf: Vec::new(),
identity: CodecIdentity::default(),
})
}
pub fn advance(
&mut self,
seq_decode_fn: DecodeFn,
qual_decode_fn: DecodeFn,
name_decode_fn: DecodeFn,
seq_identity: bool,
qual_identity: bool,
name_identity: bool,
) -> Result<bool, DryIceError> {
if self.started {
self.cursor += 1;
} else {
self.started = true;
self.identity = CodecIdentity {
name: name_identity,
sequence: seq_identity,
quality: qual_identity,
};
}
if self.cursor >= self.index.len() {
return Ok(false);
}
if !self.identity.name {
self.decode_current_name(name_decode_fn)?;
}
if !self.identity.sequence {
self.decode_current_sequence(seq_decode_fn)?;
}
if !self.identity.quality {
self.decode_current_quality(qual_decode_fn)?;
}
Ok(true)
}
fn decode_current_name(&mut self, decode_fn: DecodeFn) -> Result<(), DryIceError> {
if let Some(names) = &self.name_bytes {
let entry = &self.index[self.cursor];
let start = usize::try_from(entry.name_offset).expect("u32 fits in usize");
let len = usize::try_from(entry.name_len).expect("u32 fits in usize");
let encoded = &names[start..start + len];
self.decoded_name_buf.clear();
decode_fn(encoded, len, &mut self.decoded_name_buf)?;
} else {
self.decoded_name_buf.clear();
}
Ok(())
}
fn decode_current_sequence(&mut self, decode_fn: DecodeFn) -> Result<(), DryIceError> {
let entry = &self.index[self.cursor];
let start = usize::try_from(entry.sequence_offset).expect("u32 fits in usize");
let len = usize::try_from(entry.sequence_len).expect("u32 fits in usize");
let encoded = &self.sequence_bytes[start..start + len];
let original_len = usize::try_from(entry.quality_len).expect("u32 fits in usize");
self.decoded_sequence_buf.clear();
decode_fn(encoded, original_len, &mut self.decoded_sequence_buf)?;
Ok(())
}
fn decode_current_quality(&mut self, decode_fn: DecodeFn) -> Result<(), DryIceError> {
if let Some(quals) = &self.quality_bytes {
let entry = &self.index[self.cursor];
let start = usize::try_from(entry.quality_offset).expect("u32 fits in usize");
let len = usize::try_from(entry.quality_len).expect("u32 fits in usize");
let encoded = &quals[start..start + len];
let original_len = usize::try_from(entry.quality_len).expect("u32 fits in usize");
self.decoded_quality_buf.clear();
decode_fn(encoded, original_len, &mut self.decoded_quality_buf)?;
} else {
self.decoded_quality_buf.clear();
}
Ok(())
}
pub fn current_name(&self) -> &[u8] {
if self.identity.name {
if let Some(names) = &self.name_bytes {
let entry = &self.index[self.cursor];
let start = usize::try_from(entry.name_offset).expect("u32 fits in usize");
let len = usize::try_from(entry.name_len).expect("u32 fits in usize");
return &names[start..start + len];
}
&[]
} else {
&self.decoded_name_buf
}
}
pub fn current_sequence(&self) -> &[u8] {
if self.identity.sequence {
let entry = &self.index[self.cursor];
let start = usize::try_from(entry.sequence_offset).expect("u32 fits in usize");
let len = usize::try_from(entry.sequence_len).expect("u32 fits in usize");
&self.sequence_bytes[start..start + len]
} else {
&self.decoded_sequence_buf
}
}
pub fn current_quality(&self) -> &[u8] {
if self.identity.quality {
if let Some(quals) = &self.quality_bytes {
let entry = &self.index[self.cursor];
let start = usize::try_from(entry.quality_offset).expect("u32 fits in usize");
let len = usize::try_from(entry.quality_len).expect("u32 fits in usize");
return &quals[start..start + len];
}
return &[];
}
&self.decoded_quality_buf
}
pub fn verify_record_key<K: crate::key::RecordKey>(&self) -> Result<(), DryIceError> {
if self.header.record_keys.is_none() {
return Err(DryIceError::MissingRecordKeySection);
}
if self.header.record_key_width != K::WIDTH || self.header.record_key_tag != K::TYPE_TAG {
return Err(DryIceError::RecordKeyTypeMismatch);
}
Ok(())
}
pub fn current_record_key_bytes(&self) -> Result<&[u8], DryIceError> {
let key_bytes = self
.record_key_bytes
.as_ref()
.ok_or(DryIceError::MissingRecordKeySection)?;
let width = usize::from(self.header.record_key_width);
let start = self.cursor * width;
let end = start + width;
key_bytes
.get(start..end)
.ok_or(DryIceError::CorruptRecordIndex {
entry: self.cursor,
message: "record-key bytes out of range",
})
}
}