use crate::{
common::{csv::TwitcherCSVWriter, list_of_regions::Regions, reference::ReferenceReader},
vcf::pipeline::{clusterizer::ClusterizerSettings, message::Message},
};
use std::fmt::Debug;
use clusterizer::Clusterizer;
use rust_htslib::bam;
use tokio::sync::mpsc;
use writer::Writer;
use crate::vcf::pipeline::{
clusterizer::local_phasing::PhasingSettings,
reader::VCFReader,
writer::{OutputWriter, region_writer::RegionWriter},
};
pub mod clusterizer;
mod message;
pub mod reader;
pub mod writer;
#[derive(clap::Args, Debug)]
pub struct PipelineSettings {
#[command(flatten)]
pub cluster: clusterizer::ClusterizerSettings,
#[arg(long = "buffer", default_value_t = 8192)]
pub buffer_size: usize,
}
impl Default for PipelineSettings {
fn default() -> Self {
Self {
cluster: ClusterizerSettings::default(),
buffer_size: 8192,
}
}
}
pub struct VCFPipeline {
pub input: VCFReader,
pub output: OutputWriter,
pub reference: ReferenceReader,
pub targets: Option<Regions>,
pub region_output: Option<RegionWriter>,
pub csv_output: Option<TwitcherCSVWriter>,
pub settings: PipelineSettings,
pub bam: Option<bam::IndexedReader>,
pub phasing: PhasingSettings,
}
impl VCFPipeline {
pub async fn run(self) -> anyhow::Result<()> {
let aligner_settings = self.settings.cluster;
let bam = self.bam;
let phasing = self.phasing;
let (tx, rx) = mpsc::channel(self.settings.buffer_size);
let clusterizer_handle = tokio::spawn(async move {
let c = Clusterizer::new(
self.input,
self.reference,
self.targets,
tx,
aligner_settings,
bam,
phasing,
);
c.run().await
});
let w = Writer::new(rx, self.output, self.region_output, self.csv_output);
w.run().await?;
clusterizer_handle.await??;
Ok(())
}
}