use std::{
fs::File,
io::{self, BufWriter, Write},
path::Path,
};
use bytes::BytesMut;
use crate::{
CompressionLevel, Compressor, BGZF_BLOCK_SIZE, BGZF_EOF, BUFSIZE, MAX_BGZF_BLOCK_SIZE,
};
pub struct Writer<W>
where
W: Write,
{
uncompressed_buffer: BytesMut,
compressed_buffer: Vec<u8>,
blocksize: usize,
compressor: Compressor,
writer: Option<W>,
}
impl<W> Writer<W>
where
W: Write,
{
pub fn new(writer: W, compression_level: CompressionLevel) -> Self {
Self::with_capacity(writer, compression_level, BGZF_BLOCK_SIZE)
}
pub fn with_capacity(writer: W, compression_level: CompressionLevel, blocksize: usize) -> Self {
assert!(blocksize <= BGZF_BLOCK_SIZE);
let compressor = Compressor::new(compression_level);
Self {
uncompressed_buffer: BytesMut::with_capacity(BUFSIZE),
compressed_buffer: Vec::with_capacity(BUFSIZE),
blocksize,
compressor,
writer: Some(writer),
}
}
pub fn finish(mut self) -> io::Result<W> {
self.flush_buffer()?;
let mut writer = self.writer.take().expect("writer already taken");
writer.write_all(BGZF_EOF)?;
writer.flush()?;
Ok(writer)
}
fn flush_buffer(&mut self) -> io::Result<()> {
let writer = self
.writer
.as_mut()
.ok_or_else(|| io::Error::new(io::ErrorKind::Other, "writer already finished"))?;
while !self.uncompressed_buffer.is_empty() {
let b = self
.uncompressed_buffer
.split_to(std::cmp::min(self.uncompressed_buffer.len(), MAX_BGZF_BLOCK_SIZE))
.freeze();
self.compressor
.compress(&b[..], &mut self.compressed_buffer)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
writer.write_all(&self.compressed_buffer)?;
self.compressed_buffer.clear();
}
Ok(())
}
}
impl Writer<File> {
pub fn from_path<P>(path: P, compression_level: CompressionLevel) -> io::Result<Self>
where
P: AsRef<Path>,
{
File::create(path).map(|f| Self::new(f, compression_level))
}
}
impl Writer<BufWriter<File>> {
pub fn from_path_buffered<P>(path: P, compression_level: CompressionLevel) -> io::Result<Self>
where
P: AsRef<Path>,
{
File::create(path)
.map(|f| Self::new(BufWriter::with_capacity(256 * 1024, f), compression_level))
}
}
impl<W> Write for Writer<W>
where
W: Write,
{
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
let writer = self
.writer
.as_mut()
.ok_or_else(|| io::Error::new(io::ErrorKind::Other, "writer already finished"))?;
self.uncompressed_buffer.extend_from_slice(buf);
while self.uncompressed_buffer.len() >= self.blocksize {
let b = self.uncompressed_buffer.split_to(self.blocksize).freeze();
self.compressor
.compress(&b[..], &mut self.compressed_buffer)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
writer.write_all(&self.compressed_buffer)?;
self.compressed_buffer.clear();
}
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
self.flush_buffer()?;
if let Some(writer) = self.writer.as_mut() {
writer.flush()?;
}
Ok(())
}
}
impl<W> Drop for Writer<W>
where
W: Write,
{
fn drop(&mut self) {
if self.writer.is_some() {
let _ = self.flush_buffer();
if let Some(ref mut writer) = self.writer {
let _ = writer.write_all(BGZF_EOF);
let _ = writer.flush();
}
}
}
}