use std::io::Read;
use bitvec::{order::Msb0, prelude as bv};
use serde::{Serialize, Deserialize};
use crate::io::{BUS_HEADER_SIZE, BusHeader, BusParams};
mod blocks;
#[deprecated]
mod decode;
pub mod decode_bytes;
mod encode;
mod utils;
mod runlength_codec;
pub use encode::BuszWriter;
pub use decode_bytes::BuszReader;
const PFD_BLOCKSIZE: usize = 512;
pub (crate) type BuszBitSlice = bv::BitSlice<u8,Msb0>;
pub (crate) type BuszBitVector = bv::BitVec<u8, Msb0>;
const BUSZ_HEADER_SIZE: usize = 4+4+4;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
struct BuszHeader {
block_size: u32,
pfd_block_size: u32,
lossy_umi: u32,
}
impl BuszHeader {
pub fn from_bytes(bytes: &[u8]) -> BuszHeader {
let header_struct: BuszHeader =
bincode::deserialize(bytes).expect("FAILED to deserialze busz header");
assert_eq!(
header_struct.lossy_umi, 0,
"lossy_umi != 0 not supported"
);
header_struct
}
pub fn to_bytes(&self) -> Vec<u8> {
bincode::serialize(self).expect("FAILED to serialze header")
}
}
fn read_busz_header(reader: &mut impl Read) -> (BusParams, BuszHeader) {
let mut header_bytes = [0_u8; BUS_HEADER_SIZE];
reader.read_exact(&mut header_bytes).expect("failed to read header");
let header = BusHeader::from_bytes(&header_bytes);
let params = header.get_params();
assert_eq!(
&header.magic, b"BUS\x01",
"Header struct not matching; MAGIC is wrong"
);
let mut var_buffer = Vec::with_capacity(header.tlen as usize);
for _i in 0..header.tlen {
var_buffer.push(0_u8);
}
reader.read_exact(&mut var_buffer).expect("failed to read variable header");
let mut buszheader_bytes = [0_u8; BUSZ_HEADER_SIZE];
reader.read_exact(&mut buszheader_bytes).unwrap();
let busz_header = BuszHeader::from_bytes(&buszheader_bytes);
(params, busz_header)
}
#[cfg(test)]
mod test {
use crate::busz::blocks::CompressedBlockHeader;
#[test]
fn test_header_encode_decode() {
let nbytes = 20;
let nrecords = 10;
let h = CompressedBlockHeader::new(nbytes, nrecords);
assert_eq!(h.get_blocksize_and_nrecords().0, nbytes);
assert_eq!(h.get_blocksize_and_nrecords().1, nrecords);
}
mod external {
use std::fs::File;
use std::io::Read;
use std::path::Path;
use itertools::Itertools;
use tempfile::tempdir;
use crate::busz::BuszWriter;
use crate::io::{BusRecord, BusWriterPlain, BusReaderPlain, BusParams};
use crate::busz::decode_bytes::BuszReader;
fn compress_busfile(input: &Path, output: &Path, blocksize: usize) {
let reader = BusReaderPlain::new(input);
let mut writer = BuszWriter::new(output, reader.params.clone(), blocksize);
writer.write_iterator(reader.into_iter());
}
fn decompress_busfile(input: &Path, output: &Path) {
let reader = BuszReader::new(input);
let mut writer = BusWriterPlain::new(
output,
reader.get_params().clone()
);
for r in reader {
writer.write_record(&r);
}
}
#[test]
fn test_encode_decode_busz(){
let v = vec![
BusRecord {CB:10,UMI:11,EC:10,COUNT:13, FLAG: 20 }, BusRecord {CB:11,UMI:11,EC:10,COUNT:13, FLAG: 20 }, BusRecord {CB:22,UMI:10,EC:10,COUNT:1, FLAG: 0 }, BusRecord {CB:22,UMI:11,EC:10,COUNT:1, FLAG: 0 }, ];
let dir = tempdir().unwrap();
let input_plain= dir.path().join("buscompress.bus");
let mut writer = BusWriterPlain::new(
&input_plain,
BusParams {cb_len: 16, umi_len: 12}
);
writer.write_iterator(v.iter().cloned());
drop(writer);
let copmressed_output= dir.path().join("lalalala.busz");
compress_busfile(
&input_plain,
&copmressed_output,
100
);
let reader = BuszReader::new(&copmressed_output);
let recs: Vec<_> = reader.collect();
assert_eq!(v, recs);
}
#[test]
fn test_encode_decode_busz_biggerfile(){
let input_plain = Path::new("/home/michi/bus_testing/bus_output_shorter/output.corrected.sort.bus");
let dir = tempdir().unwrap();
let copmressed_output = dir.path().join("output.corrected.sort.busz");
println!("copmressing busfile");
compress_busfile(
input_plain,
&copmressed_output,
10000
);
println!("decoding busfile");
let reader = BuszReader::new(&copmressed_output);
let recs: Vec<_> = reader.collect();
let x = BusReaderPlain::new(input_plain);
assert_eq!(x.collect::<Vec<_>>(), recs);
}
#[test]
fn test_decompress(){
let input_compressed = Path::new("/home/michi/bus_testing/bus_output/output.corrected.sort.busz");
let input_plain = Path::new("/home/michi/bus_testing/bus_output/output.corrected.sort.bus");
let dir = tempdir().unwrap();
let output= dir.path().join("buscompress_lala.bus");
let start = std::time::Instant::now();
decompress_busfile(
input_compressed,
&output);
let elapsed = start.elapsed().as_millis();
println!("decoding: {elapsed} ms");
let r = BusReaderPlain::new(&output);
let r_original = BusReaderPlain::new(input_plain);
for (r1, r2) in r.zip_eq(r_original) {
assert_eq!(r1, r2)
}
}
#[test]
fn test_iterator(){
let buszfile = "/home/michi/bus_testing/bus_output_shortest/output.corrected.sort.busz";
let buffer_busz = bus_to_mem(buszfile);
let reader_busz = BuszReader::from_read(buffer_busz.as_slice());
let busfile = "/home/michi/bus_testing/bus_output_shortest/output.corrected.sort.bus";
let buffer_bus = bus_to_mem(busfile);
let r_original = BusReaderPlain::from_read(buffer_bus.as_slice());
for (r1, r2) in reader_busz.zip_eq(r_original) {
assert_eq!(r1, r2)
}
}
fn bus_to_mem(busfile: &str) -> Vec<u8>{
let mut buffer = Vec::new();
let mut f= File::open(busfile).unwrap();
f.read_to_end(&mut buffer).unwrap();
buffer
}
}
}