fibertools_rs/utils/
input_bam.rsuse crate::cli;
use crate::fiber::FiberseqRecords;
use crate::utils::bio_io;
use clap::{Args, ValueHint};
use rust_htslib::bam;
use rust_htslib::bam::Read;
use std::fmt::Debug;
pub static MIN_ML_SCORE: &str = "125";
#[derive(Debug, Args, Clone)]
pub struct FiberFilters {
#[clap(
global = true,
short = 'F',
long = "filter",
default_value = "0",
help_heading = "BAM-Options"
)]
pub bit_flag: u16,
#[clap(
global = true,
short = 'x',
long = "ftx",
alias = "ft-expression",
help_heading = "BAM-Options"
)]
pub filter_expression: Option<String>,
#[clap(long="ml", alias="min-ml-score", default_value = MIN_ML_SCORE, help_heading = "BAM-Options", env="FT_MIN_ML_SCORE")]
pub min_ml_score: u8,
#[clap(
global = true,
long,
default_value = "0",
help_heading = "BAM-Options",
hide = true
)]
pub strip_starting_basemods: i64,
}
impl std::default::Default for FiberFilters {
fn default() -> Self {
Self {
bit_flag: 0,
min_ml_score: MIN_ML_SCORE.parse().unwrap(),
filter_expression: None,
strip_starting_basemods: 0,
}
}
}
impl FiberFilters {
pub fn filter_on_bit_flags<'a, I>(
&'a self,
records: I,
) -> impl Iterator<Item = bam::Record> + 'a
where
I: IntoIterator<Item = Result<bam::Record, rust_htslib::errors::Error>> + 'a,
{
records
.into_iter()
.map(|r| r.expect("htslib is unable to read a record in the input."))
.filter(|r| {
(r.flags() & self.bit_flag) == 0
})
}
}
#[derive(Debug, Args)]
pub struct InputBam {
#[clap(default_value = "-", value_hint = ValueHint::AnyPath)]
pub bam: String,
#[clap(flatten)]
pub filters: FiberFilters,
#[clap(flatten)]
pub global: cli::GlobalOpts,
#[clap(skip)]
pub header: Option<bam::Header>,
}
impl InputBam {
pub fn bam_reader(&mut self) -> bam::Reader {
let mut bam = bio_io::bam_reader(&self.bam);
bam.set_threads(self.global.threads)
.expect("unable to set threads for bam reader");
self.header = Some(bam::Header::from_template(bam.header()));
bam
}
pub fn indexed_bam_reader(&mut self) -> bam::IndexedReader {
if &self.bam == "-" {
panic!("Cannot use stdin (\"-\") for indexed bam reading. Please provide a file path for the bam file.");
}
let mut bam =
bam::IndexedReader::from_path(&self.bam).expect("unable to open indexed bam file");
self.header = Some(bam::Header::from_template(bam.header()));
bam.set_threads(self.global.threads).unwrap();
bam
}
pub fn fibers<'a>(&self, bam: &'a mut bam::Reader) -> FiberseqRecords<'a> {
FiberseqRecords::new(bam, self.filters.clone())
}
pub fn header_view(&self) -> bam::HeaderView {
bam::HeaderView::from_header(self.header.as_ref().expect(
"Input bam must be opened before opening the header or creating a writer with the input bam as a template.",
))
}
pub fn header(&self) -> bam::Header {
bam::Header::from_template(&self.header_view())
}
pub fn bam_writer(&self, out: &str) -> bam::Writer {
let header = self.header();
let program_name = "fibertools-rs";
let program_id = "ft";
let program_version = crate::VERSION;
let mut out = crate::utils::bio_io::program_bam_writer_from_header(
out,
header,
program_name,
program_id,
program_version,
);
out.set_threads(self.global.threads)
.expect("unable to set threads for bam writer");
out
}
}
impl std::default::Default for InputBam {
fn default() -> Self {
Self {
bam: "-".to_string(),
filters: FiberFilters::default(),
global: cli::GlobalOpts::default(),
header: None,
}
}
}