use anyhow::Result;
use derive_builder::Builder;
use std::fs;
use std::fs::File;
use std::io::Write;
use std::path::Path;
use std::path::PathBuf;
use std::process::Command;
use std::process::Stdio;
use tempfile::tempdir;
#[derive(Builder)]
#[builder(pattern = "owned")]
pub struct Caller {
genome: PathBuf,
haplotype_variants: PathBuf,
reads: Vec<PathBuf>,
output: PathBuf,
threads: String,
output_bam: bool,
}
impl Caller {
pub fn call(&self) -> Result<()> {
let outdir = &self.output;
let mut parent = outdir.clone();
parent.pop();
fs::create_dir_all(parent)?;
let temp_dir = tempdir()?;
let ref_genome = &self.genome;
let haplotype_variants = &self.haplotype_variants;
let file_name = Path::new(&haplotype_variants)
.file_name()
.unwrap()
.to_str()
.unwrap();
dbg!(&file_name);
let bgzip_dir = temp_dir.path().join(format!("{}.gz", file_name));
println!("{}", bgzip_dir.display());
let bgzip = {
Command::new("bgzip")
.arg("-c")
.arg(&haplotype_variants)
.stdout(Stdio::piped())
.spawn()
.expect("failed to execute the zipping process")
};
println!("Bgzip was exited with: {:?}", bgzip);
let output = bgzip
.wait_with_output()
.expect("Bgzip: Failed to read stdout");
let mut bgzipped_file = std::fs::File::create(&bgzip_dir)?;
bgzipped_file.write_all(&output.stdout)?; bgzipped_file.flush()?;
let tabix = {
Command::new("tabix")
.arg("-p")
.arg("vcf")
.arg(&bgzip_dir)
.stdout(Stdio::piped())
.spawn()
.expect("failed to execute the tabix process")
};
println!("Tabix was exited with: {:?}", tabix);
let _ = tabix
.wait_with_output()
.expect("Tabix: Failed to read stdout");
let idx_name = temp_dir.path().join(&"idx");
println!("{}", bgzip_dir.display());
let vg_index = {
Command::new("vg")
.arg("autoindex")
.arg("--workflow")
.arg("giraffe")
.arg("-r")
.arg(&ref_genome)
.arg("-v")
.arg(&bgzip_dir)
.arg("-p")
.arg(&idx_name)
.arg("-t")
.arg(&self.threads)
.status()
.expect("failed to execute the vg giraffe process")
};
println!("vg indexing was exited with: {:?}", vg_index);
let sample_name_ext = Path::new(&self.reads[0])
.file_name()
.unwrap()
.to_str()
.unwrap();
let mut sample_name = "sample".to_string();
if sample_name_ext.contains('_') {
sample_name = sample_name_ext.split('_').collect::<Vec<&str>>()[0].to_string();
} else if sample_name_ext.contains('.') {
sample_name = sample_name_ext.split('.').collect::<Vec<&str>>()[0].to_string();
}
let file_aligned_pangenome = temp_dir.path().join(format!("{}_vg.bam", sample_name));
let align_pangenome = {
Command::new("vg")
.arg("giraffe")
.arg("-Z")
.arg(format!("{}.giraffe.gbz", &idx_name.display()))
.arg("-f")
.arg(&self.reads[0])
.arg("-f")
.arg(&self.reads[1])
.arg("--output-format")
.arg("BAM")
.arg("-t")
.arg(&self.threads)
.stdout(Stdio::piped())
.spawn()
.expect("failed to execute the vg giraffe process")
};
println!(
"Alignment to pangenome was exited with: {:?}",
align_pangenome
);
let output = align_pangenome
.wait_with_output()
.expect("Failed to read stdout");
let mut vg_bam = std::fs::File::create(file_aligned_pangenome.clone())?;
vg_bam.write_all(&output.stdout)?; vg_bam.flush()?;
let mut file_vg_aligned_sorted =
temp_dir.path().join(format!("{}_sorted.bam", sample_name));
if self.output_bam {
let mut parent = outdir.clone();
parent.pop();
file_vg_aligned_sorted = parent.join(format!("{}_sorted.bam", sample_name));
}
println!(
"file_vg_aligned_sorted: {}",
file_vg_aligned_sorted.display()
);
let vg_sort = {
Command::new("samtools")
.arg("sort")
.arg(&file_aligned_pangenome)
.arg("-o")
.arg(&file_vg_aligned_sorted)
.arg("-@")
.arg(&self.threads)
.arg("--write-index")
.status()
.expect("failed to execute the sorting process")
};
println!("The sorting was exited with: {}", vg_sort);
println!("{}", file_vg_aligned_sorted.display());
let varlociraptor_prep_dir = temp_dir.path().join(format!("{}_obs.bcf", sample_name));
println!(
"varlociraptor_prep_dir: {}",
varlociraptor_prep_dir.display()
);
let varlociraptor_prep = {
Command::new("varlociraptor")
.arg("preprocess")
.arg("variants")
.arg("--report-fragment-ids")
.arg("--omit-mapq-adjustment")
.arg("--atomic-candidate-variants")
.arg("--candidates")
.arg(&haplotype_variants)
.arg(&ref_genome)
.arg("--bam")
.arg(&file_vg_aligned_sorted)
.arg("--output")
.arg(&varlociraptor_prep_dir)
.arg("--max-depth")
.arg("3000")
.status()
.expect("failed to execute the varlociraptor preprocessing")
};
println!(
"The varlociraptor preprocessing was exited with: {}",
varlociraptor_prep
);
let scenario_str = include_str!("../../resources/scenarios/scenario.yaml");
let scenario_path = temp_dir.path().join("scenario.yaml");
let mut scenario_file = File::create(&scenario_path)?;
scenario_file.write_all(scenario_str.as_bytes())?;
println!("YAML written to scenario.yaml in temp dir");
let varlociraptor_call = {
Command::new("varlociraptor")
.arg("call")
.arg("variants")
.arg("--output")
.arg(&outdir)
.arg("--omit-strand-bias")
.arg("--omit-read-position-bias")
.arg("--omit-read-orientation-bias")
.arg("--omit-softclip-bias")
.arg("--omit-homopolymer-artifact-detection")
.arg("--omit-alt-locus-bias")
.arg("generic")
.arg("--obs")
.arg(format!("sample={}", varlociraptor_prep_dir.display()))
.arg("--scenario")
.arg(&scenario_path)
.status()
.expect("failed to execute the varlociraptor calling process")
};
println!(
"varlociraptor calling finished with exit status: {:?}",
varlociraptor_call
);
Ok(())
}
}