use std::io::{self, Write};
use byteorder::{LittleEndian, WriteBytesExt};
use indexmap::IndexMap;
use noodles_bgzf as bgzf;
use super::{
index::{
header::ReferenceSequenceNames,
reference_sequence::{bin::Chunk, Bin, Metadata},
Header, ReferenceSequence,
},
Index, MAGIC_NUMBER,
};
pub struct Writer<W>
where
W: Write,
{
inner: bgzf::Writer<W>,
}
impl<W> Writer<W>
where
W: Write,
{
pub fn new(writer: W) -> Self {
Self {
inner: bgzf::Writer::new(writer),
}
}
pub fn write_index(&mut self, index: &Index) -> io::Result<()> {
write_magic(&mut self.inner)?;
let min_shift = i32::from(index.min_shift());
self.inner.write_i32::<LittleEndian>(min_shift)?;
let depth = i32::from(index.depth());
self.inner.write_i32::<LittleEndian>(depth)?;
write_aux(&mut self.inner, index.header())?;
write_reference_sequences(&mut self.inner, index.depth(), index.reference_sequences())?;
if let Some(n_no_coor) = index.unplaced_unmapped_record_count() {
self.inner.write_u64::<LittleEndian>(n_no_coor)?;
}
Ok(())
}
}
fn write_magic<W>(writer: &mut W) -> io::Result<()>
where
W: Write,
{
writer.write_all(MAGIC_NUMBER)
}
fn write_aux<W>(writer: &mut W, header: Option<&Header>) -> io::Result<()>
where
W: Write,
{
let mut aux = Vec::new();
if let Some(hdr) = header {
write_header(&mut aux, hdr)?;
}
let l_aux =
i32::try_from(aux.len()).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
writer.write_i32::<LittleEndian>(l_aux)?;
writer.write_all(&aux)?;
Ok(())
}
pub(crate) fn write_header<W>(writer: &mut W, header: &Header) -> io::Result<()>
where
W: Write,
{
let format = i32::from(header.format());
writer.write_i32::<LittleEndian>(format)?;
let col_seq = i32::try_from(header.reference_sequence_name_index())
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
writer.write_i32::<LittleEndian>(col_seq)?;
let col_beg = i32::try_from(header.start_position_index())
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
writer.write_i32::<LittleEndian>(col_beg)?;
let col_end = header.end_position_index().map_or(Ok(0), |i| {
i32::try_from(i).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))
})?;
writer.write_i32::<LittleEndian>(col_end)?;
let meta = i32::from(header.line_comment_prefix());
writer.write_i32::<LittleEndian>(meta)?;
let skip = i32::try_from(header.line_skip_count())
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
writer.write_i32::<LittleEndian>(skip)?;
write_reference_sequence_names(writer, header.reference_sequence_names())?;
Ok(())
}
fn write_reference_sequence_names<W>(
writer: &mut W,
reference_sequence_names: &ReferenceSequenceNames,
) -> io::Result<()>
where
W: Write,
{
const NUL: u8 = 0x00;
let len = reference_sequence_names
.iter()
.map(|n| n.len() + 1)
.sum::<usize>();
let l_nm = i32::try_from(len).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
writer.write_i32::<LittleEndian>(l_nm)?;
for reference_sequence_name in reference_sequence_names {
writer.write_all(reference_sequence_name.as_bytes())?;
writer.write_u8(NUL)?;
}
Ok(())
}
fn write_reference_sequences<W>(
writer: &mut W,
depth: u8,
reference_sequences: &[ReferenceSequence],
) -> io::Result<()>
where
W: Write,
{
let n_ref = i32::try_from(reference_sequences.len())
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
writer.write_i32::<LittleEndian>(n_ref)?;
for reference_sequence in reference_sequences {
write_bins(
writer,
depth,
reference_sequence.bins(),
reference_sequence.metadata(),
)?;
}
Ok(())
}
fn write_bins<W>(
writer: &mut W,
depth: u8,
bins: &IndexMap<usize, Bin>,
metadata: Option<&Metadata>,
) -> io::Result<()>
where
W: Write,
{
let mut n_bin =
i32::try_from(bins.len()).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
if metadata.is_some() {
n_bin += 1;
}
writer.write_i32::<LittleEndian>(n_bin)?;
for (&id, bin) in bins {
let bin_id =
u32::try_from(id).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
writer.write_u32::<LittleEndian>(bin_id)?;
let loffset = u64::from(bin.loffset());
writer.write_u64::<LittleEndian>(loffset)?;
write_chunks(writer, bin.chunks())?;
}
if let Some(m) = metadata {
write_metadata(writer, depth, m)?;
}
Ok(())
}
fn write_chunks<W>(writer: &mut W, chunks: &[Chunk]) -> io::Result<()>
where
W: Write,
{
let n_chunk =
i32::try_from(chunks.len()).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
writer.write_i32::<LittleEndian>(n_chunk)?;
for chunk in chunks {
let chunk_beg = u64::from(chunk.start());
writer.write_u64::<LittleEndian>(chunk_beg)?;
let chunk_end = u64::from(chunk.start());
writer.write_u64::<LittleEndian>(chunk_end)?;
}
Ok(())
}
fn write_metadata<W>(writer: &mut W, depth: u8, metadata: &Metadata) -> io::Result<()>
where
W: Write,
{
const N_CHUNK: i32 = 2;
let bin_id = u32::try_from(Bin::metadata_id(depth))
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
writer.write_u32::<LittleEndian>(bin_id)?;
let loffset = u64::from(bgzf::VirtualPosition::default());
writer.write_u64::<LittleEndian>(loffset)?;
writer.write_i32::<LittleEndian>(N_CHUNK)?;
let ref_beg = u64::from(metadata.start_position());
writer.write_u64::<LittleEndian>(ref_beg)?;
let ref_end = u64::from(metadata.end_position());
writer.write_u64::<LittleEndian>(ref_end)?;
let n_mapped = metadata.mapped_record_count();
writer.write_u64::<LittleEndian>(n_mapped)?;
let n_unmapped = metadata.unmapped_record_count();
writer.write_u64::<LittleEndian>(n_unmapped)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_write_metadata() -> io::Result<()> {
let mut buf = Vec::new();
let depth = 5;
let metadata = Metadata::new(
bgzf::VirtualPosition::from(610),
bgzf::VirtualPosition::from(1597),
55,
0,
);
write_metadata(&mut buf, depth, &metadata)?;
let expected = [
0x4a, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x62, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3d, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ];
assert_eq!(buf, expected);
Ok(())
}
}