use noodles::bgzf;
use serde::Serialize;
use std::fs::File;
use std::path::Path;
use crate::data_types::compare_benchmark::CompareBenchmark;
use crate::data_types::compare_region::CompareRegion;
pub struct RegionSequenceWriter {
csv_writer: csv::Writer<bgzf::io::MultithreadedWriter<File>>,
}
#[derive(Serialize)]
struct RegionSequenceRow {
region_id: u64,
coordinates: String,
ref_seq: String,
truth_seq1: String,
truth_seq2: String,
query_seq1: String,
query_seq2: String
}
impl RegionSequenceRow {
pub fn new(region: &CompareRegion, benchmark: &CompareBenchmark) -> Self {
let region_id = region.region_id();
let coordinates = format!("{}", region.coordinates());
let sequence_bundle = benchmark.sequence_bundle().unwrap();
Self {
region_id, coordinates,
ref_seq: sequence_bundle.ref_seq.clone(),
truth_seq1: sequence_bundle.truth_seq1.clone(),
truth_seq2: sequence_bundle.truth_seq2.clone(),
query_seq1: sequence_bundle.query_seq1.clone(),
query_seq2: sequence_bundle.query_seq2.clone(),
}
}
}
impl RegionSequenceWriter {
pub fn new(filename: &Path, threads: usize) -> anyhow::Result<Self> {
let delimiter: u8 = b'\t';
let w_threads = std::num::NonZeroUsize::new(threads.clamp(1, 4)).unwrap();
let gzip_writer = bgzf::io::MultithreadedWriter::with_worker_count(w_threads, File::create(filename)?);
let csv_writer= csv::WriterBuilder::new()
.delimiter(delimiter)
.from_writer(gzip_writer);
Ok(Self {
csv_writer
})
}
pub fn write_region_sequences(&mut self, region: &CompareRegion, comparison: &CompareBenchmark) -> csv::Result<()> {
let row = RegionSequenceRow::new(
region, comparison
);
self.csv_writer.serialize(&row)?;
Ok(())
}
}