use crate::{
catalog::TandemRepeatCatalog,
constants::{OUT_SUFFIX_BAG, OUT_SUFFIX_BAG_UNSORTED},
extract::{extract_bag_of_reads, RepeatPurityScoreParams},
};
use anyhow::Ok;
use clap::{arg, Parser};
use rust_htslib::bam::{self, Read};
use std::{
path::{Path, PathBuf},
process::Command,
};
#[derive(Parser)]
pub struct ExtractCommandArgs {
pub alignment: PathBuf,
pub catalog: PathBuf,
#[arg(short = '@', default_value_t = 1)]
pub htslib_read_threads: usize,
#[arg(short = 'e', long, default_value_t = 300)]
pub extension_length: u32,
#[arg(long = "mapq-f", default_value_t = 50)]
pub min_flank_mapq: u8,
#[arg(long = "mapq-i", default_value_t = 40)]
pub max_irr_mapq: u8,
#[arg(short = 's', long, default_value = "0.9")]
pub irr_score_min: f32,
#[arg(short = 'q', long, default_value = "20")]
pub base_qual_min: u8,
#[arg(short = 'm', long, default_value = "1.0")]
pub match_weight: f32,
#[arg(short = 'x', long, default_value = "-1.0")]
pub mismatch_weight: f32,
#[arg(short = 'z', long, default_value = "0.5")]
pub low_qual_mismatch_weight: f32,
#[arg(short = 'i', long)]
pub sort_and_index: bool,
}
pub fn run_extract_command(args: &ExtractCommandArgs, output_prefix: &Path) -> anyhow::Result<()> {
info!("Loading catalog");
let catalog = TandemRepeatCatalog::from_path(&args.catalog)?;
info!("Loaded {} loci from catalog", catalog.len());
info!("Opening input alignments file");
let mut reader: bam::IndexedReader = bam::IndexedReader::from_path(&args.alignment)?;
reader.set_threads(args.htslib_read_threads)?;
info!("Opening output file");
let extension = if args.sort_and_index {
OUT_SUFFIX_BAG_UNSORTED
} else {
OUT_SUFFIX_BAG
};
let output_path = output_prefix.with_extension(extension);
let header = bam::Header::from_template(reader.header());
let mut writer = bam::Writer::from_path(output_path, &header, bam::Format::Bam)?;
let purity_score_params = RepeatPurityScoreParams {
irr_score_min: args.irr_score_min,
base_qual_min: args.base_qual_min,
match_weight: args.match_weight,
mismatch_weight: args.mismatch_weight,
low_qual_mismatch_weight: args.low_qual_mismatch_weight,
};
info!("Extracting bag of reads");
extract_bag_of_reads(
&mut reader,
&mut writer,
catalog,
args.extension_length,
args.min_flank_mapq,
args.max_irr_mapq,
&purity_score_params,
)?;
drop(writer);
if args.sort_and_index {
info!("Sorting and indexing output file");
Command::new("samtools")
.args(vec![
"sort",
"-o",
output_prefix
.with_extension(OUT_SUFFIX_BAG)
.to_str()
.unwrap(),
"-O",
"bam",
output_prefix
.with_extension(OUT_SUFFIX_BAG_UNSORTED)
.to_str()
.unwrap(),
"-T",
output_prefix
.with_extension("samtools_tmp")
.to_str()
.unwrap(),
"-@",
&args.htslib_read_threads.to_string(),
])
.output()?;
Command::new("samtools")
.args(vec![
"index",
output_prefix
.with_extension(OUT_SUFFIX_BAG)
.to_str()
.unwrap(),
"-@",
&args.htslib_read_threads.to_string(),
])
.output()?;
std::fs::remove_file(output_prefix.with_extension(OUT_SUFFIX_BAG_UNSORTED))?;
}
Ok(())
}