use bitvec::{order::Msb0, prelude as bv};
use serde::{Serialize, Deserialize};
use self::utils::{setbits_u32, setbits_u64};
mod decode;
mod encode;
mod utils;
mod runlength_codec;
pub mod decode2;
pub use encode::BuszWriter;
pub use decode::BuszReader;
const PFD_BLOCKSIZE: usize = 512;
pub (crate) type BuszBitSlice = bv::BitSlice<u8,Msb0>;
pub (crate) type BuszBitVector = bv::BitVec<u8, Msb0>;
struct CompressedBlockHeader {
header_bytes: u64
}
impl CompressedBlockHeader {
pub fn new(block_size_bytes: u64, block_size_records:u64) -> Self {
let header_bytes = (block_size_bytes << 30) | block_size_records ;
if (setbits_u32(30) as u64) <= block_size_records {
panic!("Cant store more than {} records, trying {}", setbits_u32(30), block_size_records)
}
if setbits_u64(34) <= block_size_bytes {
panic!("Cant store more than {} records, trying {}", setbits_u32(34), block_size_bytes)
}
CompressedBlockHeader { header_bytes }
}
pub fn get_blocksize_and_nrecords(&self) -> (u64, u64) {
let bit_length = 30; let block_size_bytes = self.header_bytes >> bit_length;
let bitmask_64 = setbits_u32(bit_length) as u64;
let block_size_records = self.header_bytes & bitmask_64;
(block_size_bytes, block_size_records)
}
}
const BUSZ_HEADER_SIZE: usize = 4+4+4;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
struct BuszHeader {
block_size: u32,
pfd_block_size: u32,
lossy_umi: u32,
}
impl BuszHeader {
pub fn from_bytes(bytes: &[u8]) -> BuszHeader {
let header_struct: BuszHeader =
bincode::deserialize(bytes).expect("FAILED to deserialze busz header");
assert_eq!(
header_struct.lossy_umi, 0,
"lossy_umi != 0 not supported"
);
header_struct
}
pub fn to_bytes(&self) -> Vec<u8> {
bincode::serialize(self).expect("FAILED to serialze header")
}
}
#[cfg(test)]
mod test {
use crate::busz::CompressedBlockHeader;
#[test]
fn test_header_encode_decode() {
let nbytes = 20;
let nrecords = 10;
let h = CompressedBlockHeader::new(nbytes, nrecords);
assert_eq!(h.get_blocksize_and_nrecords().0, nbytes);
assert_eq!(h.get_blocksize_and_nrecords().1, nrecords);
}
mod external {
use std::fs::File;
use std::io::Read;
use tempfile::tempdir;
use crate::io::{BusRecord, BusWriterPlain, BusReaderPlain, BusParams};
use crate::busz::decode::{BuszReader, decompress_busfile};
use crate::busz::encode::compress_busfile;
#[test]
fn test_external(){
let v = vec![
BusRecord {CB:10,UMI:11,EC:10,COUNT:13, FLAG: 14 }, BusRecord {CB:11,UMI:11,EC:10,COUNT:13, FLAG: 14 }, BusRecord {CB:22,UMI:10,EC:10,COUNT:1, FLAG: 0 }, BusRecord {CB:22,UMI:11,EC:10,COUNT:1, FLAG: 0 }, ];
let dir = tempdir().unwrap();
let file_path= dir.path().join("buscompress.bus");
let filename = file_path.to_str().unwrap();
let mut writer = BusWriterPlain::new(
filename,
BusParams {cb_len: 16, umi_len: 12}
);
writer.write_records(&v);
}
#[test]
fn test_encode_decode_busz(){
let v = vec![
BusRecord {CB:10,UMI:11,EC:10,COUNT:13, FLAG: 20 }, BusRecord {CB:11,UMI:11,EC:10,COUNT:13, FLAG: 20 }, BusRecord {CB:22,UMI:10,EC:10,COUNT:1, FLAG: 0 }, BusRecord {CB:22,UMI:11,EC:10,COUNT:1, FLAG: 0 }, ];
let dir = tempdir().unwrap();
let file_path= dir.path().join("buscompress.bus");
let input_plain = file_path.to_str().unwrap();
let mut writer = BusWriterPlain::new(
input_plain,
BusParams {cb_len: 16, umi_len: 12}
);
writer.write_records(&v);
drop(writer);
let file_path= dir.path().join("lalalala.busz");
let copmressed_output = file_path.to_str().unwrap();
compress_busfile(
input_plain,
copmressed_output,
100
);
let reader = BuszReader::new(copmressed_output);
let recs: Vec<_> = reader.collect();
assert_eq!(v, recs);
}
#[test]
fn test_encode_decode_busz_biggerfile(){
let input_plain = "/home/michi/bus_testing/bus_output_shorter/output.corrected.sort.bus";
let dir = tempdir().unwrap();
let file_path = dir.path().join("output.corrected.sort.busz");
let copmressed_output = file_path.to_str().unwrap();
println!("copmressing busfile");
compress_busfile(
input_plain,
copmressed_output,
10000
);
println!("decoding busfile");
let reader = BuszReader::new(copmressed_output);
let recs: Vec<_> = reader.collect();
let x = BusReaderPlain::new(input_plain);
assert_eq!(x.collect::<Vec<_>>(), recs);
}
fn test_compress1() {
let input_plain = "/home/michi/bus_testing/bus_output_shorter/output.corrected.sort.bus";
let dir = tempdir().unwrap();
let file_path = dir.path().join("buscompress_testing.busz");
let copmressed_output = file_path.to_str().unwrap();
compress_busfile(
input_plain,
copmressed_output,
10000
);
}
#[allow(dead_code)]
fn test_compress_full() {
let input_plain = "/home/michi/bus_testing/bus_output/output.corrected.sort.bus";
let copmressed_output = "/tmp/buscompress_testing_full.busz";
compress_busfile(
input_plain,
copmressed_output,
10000
);
}
#[test]
fn test_decompress(){
let input_compressed = "/home/michi/bus_testing/bus_output/output.corrected.sort.busz";
let input_plain = "/home/michi/bus_testing/bus_output/output.corrected.sort.bus";
let dir = tempdir().unwrap();
let file_path= dir.path().join("buscompress_lala.bus");
let output = file_path.to_str().unwrap();
let start = std::time::Instant::now();
decompress_busfile(
input_compressed,
output);
let elapsed = start.elapsed().as_millis();
println!("decoding: {elapsed} ms");
let r = BusReaderPlain::new(output);
let records:Vec<_> = r.collect();
let r_original = BusReaderPlain::new(input_plain);
let records_original:Vec<_> = r_original.collect();
assert_eq!(records.len(), records_original.len());
assert_eq!(records, records_original);
}
#[test]
fn test_iterator(){
let buszfile = "/home/michi/bus_testing/bus_output_shortest/output.corrected.sort.busz";
let buffer_busz = bus_to_mem(buszfile);
let reader_busz = BuszReader::from_read(buffer_busz.as_slice());
let records:Vec<BusRecord> = reader_busz.collect();
let busfile = "/home/michi/bus_testing/bus_output_shortest/output.corrected.sort.bus";
let buffer_bus = bus_to_mem(busfile);
let r_original = BusReaderPlain::from_read(buffer_bus.as_slice());
let records_original:Vec<_> = r_original.collect();
assert_eq!(records.len(), records_original.len());
assert_eq!(records, records_original);
}
fn bus_to_mem(busfile: &str) -> Vec<u8>{
let mut buffer = Vec::new();
let mut f= File::open(busfile).unwrap();
f.read_to_end(&mut buffer).unwrap();
buffer
}
}
}