use crate::{
Error, F32Bw0and1, GenomicRegion, ModChar, PathOrURLOrStdin, ReadStates,
RestrictModCalledStrand, ThresholdState,
};
use bedrs::prelude::Bed3;
use clap::{Args, FromArgMatches};
use derive_builder::Builder;
use rust_htslib::bam;
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::num::{NonZeroU32, NonZeroUsize};
use std::str::FromStr;
#[derive(Builder, Debug, Args, Clone, Serialize, Deserialize)]
#[serde(default)]
#[builder(default, build_fn(error = "Error", validate = "Self::validate"))]
#[non_exhaustive]
pub struct InputBam {
pub bam_path: PathOrURLOrStdin,
#[clap(long, default_value_t)]
#[builder(setter(into))]
pub min_seq_len: u64,
#[clap(long)]
#[builder(setter(into, strip_option))]
pub min_align_len: Option<i64>,
#[clap(long, conflicts_with = "read_id_list")]
#[builder(setter(into, strip_option))]
pub read_id: Option<String>,
#[clap(long, conflicts_with = "read_id")]
#[builder(setter(into, strip_option))]
pub read_id_list: Option<String>,
#[clap(skip)]
#[builder(setter(into, strip_option))]
pub read_id_set: Option<HashSet<String>>,
#[clap(long, default_value_t = NonZeroU32::new(2).expect("no error"))]
#[builder(setter(into))]
pub threads: NonZeroU32,
#[clap(long, default_value_t)]
pub include_zero_len: bool,
#[clap(long)]
#[builder(field(
ty = "String",
build = "(!self.read_filter.is_empty()).then(|| ReadStates::from_str(&self.read_filter)).transpose()?"
))]
pub read_filter: Option<ReadStates>,
#[clap(short, long, default_value_t = F32Bw0and1::one())]
pub sample_fraction: F32Bw0and1,
#[clap(long)]
#[builder(setter(into, strip_option))]
pub sample_seed: Option<u64>,
#[clap(long, default_value_t)]
#[builder(setter(into))]
pub mapq_filter: u8,
#[clap(long, default_value_t)]
pub exclude_mapq_unavail: bool,
#[clap(long)]
#[builder(field(
ty = "String",
build = "(!self.region.is_empty()).then(|| GenomicRegion::from_str(&self.region)).transpose()?"
))]
pub region: Option<GenomicRegion>,
#[clap(skip)]
#[builder(setter(into, strip_option))]
pub region_bed3: Option<Bed3<i32, u64>>,
#[clap(long, default_value_t, requires = "region")]
pub full_region: bool,
}
impl InputBamBuilder {
#[expect(
clippy::arithmetic_side_effects,
reason = "1 + 1 + 1 is not gonna overflow"
)]
fn validate(&self) -> Result<(), Error> {
match (
!self.region.is_empty(),
self.region_bed3.is_some(),
self.full_region,
) {
(false, false, Some(true)) => {
return Err(Error::BuilderValidation("InputBamBuilder: cannot set `full_region` without setting `region` or `region_bed3`".to_owned()));
}
(true, true, _) => {
return Err(Error::BuilderValidation(
"InputBamBuilder: cannot set both `region` and `region_bed3`".to_owned(),
));
}
_ => {}
}
if u8::from(self.read_id.is_some())
+ u8::from(self.read_id_list.is_some())
+ u8::from(self.read_id_set.is_some())
> 1
{
Err(Error::BuilderValidation(
"InputBamBuilder: cannot set >1 of `read_id`, `read_id_list` and `read_id_set`"
.to_owned(),
))
} else {
Ok(())
}
}
}
impl Default for InputBam {
fn default() -> Self {
InputBam {
bam_path: PathOrURLOrStdin::Stdin,
min_seq_len: 0,
min_align_len: None,
read_id: None,
read_id_list: None,
read_id_set: None,
threads: NonZeroU32::new(2).expect("no error"),
include_zero_len: false,
read_filter: None,
sample_fraction: F32Bw0and1::one(),
sample_seed: None,
mapq_filter: 0,
exclude_mapq_unavail: false,
region: None,
region_bed3: None,
full_region: false,
}
}
}
#[derive(Debug, Default, Args, Clone, Copy, Serialize, Deserialize)]
#[non_exhaustive]
pub struct OptionalTag {
#[clap(long)]
pub tag: Option<ModChar>,
}
impl FromStr for OptionalTag {
type Err = Error;
fn from_str(mod_type: &str) -> Result<Self, Self::Err> {
Ok(OptionalTag {
tag: Some(ModChar::from_str(mod_type)?),
})
}
}
#[derive(Debug, Default, Args, Clone, Copy, Serialize, Deserialize)]
#[non_exhaustive]
pub struct RequiredTag {
#[clap(long)]
pub tag: ModChar,
}
impl FromStr for RequiredTag {
type Err = Error;
fn from_str(mod_type: &str) -> Result<Self, Self::Err> {
Ok(RequiredTag {
tag: ModChar::from_str(mod_type)?,
})
}
}
pub trait TagState {
fn tag(&self) -> Option<ModChar> {
unimplemented!();
}
}
impl TagState for OptionalTag {
fn tag(&self) -> Option<ModChar> {
self.tag
}
}
impl TagState for RequiredTag {
fn tag(&self) -> Option<ModChar> {
Some(self.tag)
}
}
#[derive(Builder, Debug, Args, Clone, Serialize, Deserialize)]
#[builder(default, build_fn(error = "Error", validate = "Self::validate"))]
#[serde(default)]
#[non_exhaustive]
pub struct InputMods<S: TagState + Args + FromArgMatches + Default> {
#[clap(flatten)]
pub tag: S,
#[clap(long)]
#[builder(field(
ty = "String",
build = "(!self.mod_strand.is_empty()).then(|| RestrictModCalledStrand::from_str(&self.mod_strand)).transpose()?"
))]
pub mod_strand: Option<RestrictModCalledStrand>,
#[clap(long, value_parser=ThresholdState::from_str_ordpair_fraction, default_value = "")]
pub mod_prob_filter: ThresholdState,
#[clap(long, default_value_t)]
pub trim_read_ends_mod: usize,
#[clap(long, default_value_t)]
pub base_qual_filter_mod: u8,
#[clap(long)]
#[builder(field(
ty = "String",
build = "(!self.mod_region.is_empty()).then(|| GenomicRegion::from_str(&self.mod_region)).transpose()?"
))]
pub mod_region: Option<GenomicRegion>,
#[clap(skip)]
#[builder(setter(strip_option))]
pub region_bed3: Option<Bed3<i32, u64>>,
}
impl<S: TagState + Args + FromArgMatches + Default> InputModsBuilder<S> {
fn validate(&self) -> Result<(), Error> {
if (!self.mod_region.is_empty()) && self.region_bed3.is_some() {
Err(Error::BuilderValidation(
"cannot set `mod_region` and `region_bed3` simultaneously!".to_owned(),
))
} else {
Ok(())
}
}
}
impl<S: TagState + Args + FromArgMatches + Default> Default for InputMods<S> {
fn default() -> Self {
InputMods::<S> {
tag: S::default(),
mod_strand: None,
mod_prob_filter: ThresholdState::GtEq(0),
trim_read_ends_mod: 0,
base_qual_filter_mod: 0,
mod_region: None,
region_bed3: None,
}
}
}
pub trait InputModOptions {
fn tag(&self) -> Option<ModChar> {
unimplemented!()
}
fn mod_strand(&self) -> Option<RestrictModCalledStrand> {
unimplemented!()
}
fn mod_prob_filter(&self) -> ThresholdState {
unimplemented!()
}
fn trim_read_ends_mod(&self) -> usize {
unimplemented!()
}
fn base_qual_filter_mod(&self) -> u8 {
unimplemented!()
}
}
pub trait InputRegionOptions {
fn region_filter(&self) -> &Option<Bed3<i32, u64>> {
unimplemented!()
}
fn region_filter_genomic_string(&self) -> Option<GenomicRegion> {
unimplemented!()
}
fn set_region_filter(&mut self, _value: Option<Bed3<i32, u64>>) {
unimplemented!()
}
fn is_full_overlap(&self) -> bool {
false
}
fn convert_region_to_bed3(&mut self, header: bam::HeaderView) -> Result<(), Error> {
match (
self.region_filter_genomic_string().is_some(),
self.region_filter().is_some(),
) {
(false, false) => self.set_region_filter(None),
(true, false) => {
let genomic_region = self
.region_filter_genomic_string()
.expect("checked above that this is Some");
self.set_region_filter(Some(genomic_region.try_to_bed3(&header)?));
}
(false, true) => {}
(true, true) => {
return Err(Error::InvalidState(
"cannot set a region as both a `GenomicRegion` and a `Bed3`".to_owned(),
));
}
}
Ok(())
}
}
impl<S: TagState + Args + FromArgMatches + Default> InputModOptions for InputMods<S> {
fn tag(&self) -> Option<ModChar> {
self.tag.tag()
}
fn mod_strand(&self) -> Option<RestrictModCalledStrand> {
self.mod_strand
}
fn mod_prob_filter(&self) -> ThresholdState {
self.mod_prob_filter
}
fn trim_read_ends_mod(&self) -> usize {
self.trim_read_ends_mod
}
fn base_qual_filter_mod(&self) -> u8 {
self.base_qual_filter_mod
}
}
impl<S: TagState + Args + FromArgMatches + Default> InputRegionOptions for InputMods<S> {
fn region_filter_genomic_string(&self) -> Option<GenomicRegion> {
self.mod_region.clone()
}
fn region_filter(&self) -> &Option<Bed3<i32, u64>> {
&self.region_bed3
}
fn set_region_filter(&mut self, value: Option<Bed3<i32, u64>>) {
self.region_bed3 = value;
}
}
impl InputMods<RequiredTag> {
#[must_use]
pub fn required_tag(&self) -> ModChar {
self.tag.tag
}
}
impl InputRegionOptions for InputBam {
fn region_filter_genomic_string(&self) -> Option<GenomicRegion> {
self.region.clone()
}
fn region_filter(&self) -> &Option<Bed3<i32, u64>> {
&self.region_bed3
}
fn set_region_filter(&mut self, value: Option<Bed3<i32, u64>>) {
self.region_bed3 = value;
}
fn is_full_overlap(&self) -> bool {
self.full_region
}
}
#[derive(Builder, Debug, Args, Clone, Copy, Serialize, Deserialize)]
#[builder(default, build_fn(error = "Error"))]
#[serde(default)]
#[non_exhaustive]
pub struct InputWindowing {
#[clap(long)]
#[builder(field(ty = "usize", build = "NonZeroUsize::try_from(self.win)?"))]
pub win: NonZeroUsize,
#[clap(long)]
#[builder(field(ty = "usize", build = "NonZeroUsize::try_from(self.step)?"))]
pub step: NonZeroUsize,
}
impl Default for InputWindowing {
fn default() -> Self {
InputWindowing {
win: NonZeroUsize::new(1).expect("no error"),
step: NonZeroUsize::new(1).expect("no error"),
}
}
}
#[derive(Debug, Clone, Default, Copy, Serialize, Deserialize)]
#[non_exhaustive]
pub enum SeqDisplayOptions {
#[default]
No,
Full {
show_base_qual: bool,
},
Region {
show_base_qual: bool,
show_ins_lowercase: bool,
show_mod_z: bool,
region: Bed3<i32, u64>,
},
}
#[cfg(test)]
mod tag_struct_tests {
use super::*;
#[test]
fn optional_tag_default() {
let optional_tag = OptionalTag::default();
assert!(optional_tag.tag.is_none());
assert_eq!(optional_tag.tag(), None);
}
#[test]
fn optional_tag_with_value() {
let mod_char = ModChar::new('m');
let optional_tag = OptionalTag {
tag: Some(mod_char),
};
assert_eq!(optional_tag.tag(), Some(mod_char));
}
#[test]
fn required_tag_default() {
let required_tag = RequiredTag::default();
assert_eq!(required_tag.tag(), Some(ModChar::default()));
}
#[test]
fn required_tag_with_value() {
let mod_char = ModChar::new('C');
let required_tag = RequiredTag { tag: mod_char };
assert_eq!(required_tag.tag(), Some(mod_char));
}
#[test]
fn optional_tag_from_str() {
let tag = OptionalTag::from_str("m").unwrap();
assert_eq!(tag.tag.unwrap().val(), 'm');
}
#[test]
fn required_tag_from_str() {
let tag = RequiredTag::from_str("m").unwrap();
assert_eq!(tag.tag.val(), 'm');
}
}
#[cfg(test)]
mod input_windowing_tests {
use super::*;
#[test]
fn input_windowing_default() {
let windowing = InputWindowing::default();
assert_eq!(windowing.win, NonZeroUsize::new(1).unwrap());
assert_eq!(windowing.step, NonZeroUsize::new(1).unwrap());
}
#[test]
fn input_windowing_custom_values() {
let windowing = InputWindowing {
win: NonZeroUsize::new(300).unwrap(),
step: NonZeroUsize::new(150).unwrap(),
};
assert_eq!(windowing.win, NonZeroUsize::new(300).unwrap());
assert_eq!(windowing.step, NonZeroUsize::new(150).unwrap());
}
#[test]
fn input_windowing_builder() {
let _: InputWindowing = InputWindowingBuilder::default()
.win(20)
.step(10)
.build()
.unwrap();
}
}
#[cfg(test)]
mod input_mods_required_tag_tests {
use super::*;
#[test]
fn input_mods_required_tag_fn_tag() {
let mod_char = ModChar::new('C');
let input_mods = InputMods::<RequiredTag> {
tag: RequiredTag { tag: mod_char },
mod_strand: None,
mod_prob_filter: ThresholdState::GtEq(0),
trim_read_ends_mod: 0,
base_qual_filter_mod: 0,
mod_region: None,
region_bed3: None,
};
assert_eq!(input_mods.required_tag(), mod_char);
}
#[test]
fn input_mods_builder_required_tag_with_mod_region() {
let _: InputMods<RequiredTag> = InputModsBuilder::default()
.tag(RequiredTag::from_str("m").unwrap())
.mod_strand("bc".into())
.mod_prob_filter(ThresholdState::GtEq(0))
.trim_read_ends_mod(10)
.base_qual_filter_mod(10)
.mod_region("chr3:4000-5000".into())
.build()
.unwrap();
}
#[test]
fn input_mods_builder_required_tag_with_region_bed3() {
let _: InputMods<RequiredTag> = InputModsBuilder::<RequiredTag>::default()
.tag(RequiredTag::from_str("m").unwrap())
.mod_strand("bc".into())
.mod_prob_filter(ThresholdState::GtEq(0))
.trim_read_ends_mod(10)
.base_qual_filter_mod(10)
.region_bed3(Bed3::<i32, u64>::new(4, 4000, 5000))
.build()
.unwrap();
}
#[test]
fn input_mods_builder_optional_tag_minimal() {
let _: InputMods<OptionalTag> = InputModsBuilder::<OptionalTag>::default()
.mod_prob_filter(ThresholdState::GtEq(0))
.build()
.unwrap();
}
}
#[cfg(test)]
mod input_bam_tests {
use super::*;
use bedrs::Coordinates as _;
use indoc::indoc;
#[test]
fn input_bam_is_full_overlap() {
let input_bam_default = InputBam::default();
assert!(!input_bam_default.is_full_overlap());
let input_bam_false = InputBam {
full_region: false,
..Default::default()
};
assert!(!input_bam_false.is_full_overlap());
let input_bam_true = InputBam {
full_region: true,
..Default::default()
};
assert!(input_bam_true.is_full_overlap());
}
#[test]
fn input_bam_builder() {
let _: InputBam = InputBamBuilder::default()
.bam_path(PathOrURLOrStdin::Path("/some/path/to/bam.bam".into()))
.min_seq_len(30000u64)
.min_align_len(20000i64)
.read_id("some-id")
.read_filter("primary_forward,secondary_forward".into())
.sample_fraction(F32Bw0and1::one())
.mapq_filter(20)
.exclude_mapq_unavail(true)
.region("chr4:1000-2000".into())
.full_region(true)
.build()
.unwrap();
}
#[test]
fn input_bam_convert_region_to_bed3_none() {
let mut input_bam = InputBam::default();
let header_view = bam::HeaderView::from_bytes(indoc! {b"@HD\tVN:1.6\tSO:coordinate
@SQ\tSN:chr1\tLN:248956422\n"});
input_bam.convert_region_to_bed3(header_view).unwrap();
assert!(input_bam.region_bed3.is_none());
}
#[test]
fn input_bam_convert_region_to_bed3_with_region() {
let mut input_bam = InputBam {
region: Some(GenomicRegion::from_str("chr2:3400-3600").unwrap()),
..Default::default()
};
let header_view = bam::HeaderView::from_bytes(indoc! {b"@HD\tVN:1.6\tSO:coordinate
@SQ\tSN:chr1\tLN:3000
@SQ\tSN:chr2\tLN:4000\n"});
input_bam.convert_region_to_bed3(header_view).unwrap();
assert!(input_bam.region_bed3.is_some());
let bed3 = input_bam.region_bed3.unwrap();
assert_eq!(bed3.chr(), &1);
assert_eq!(bed3.start(), 3400);
assert_eq!(bed3.end(), 3600);
}
#[test]
#[should_panic(expected = "InvalidRegion")]
fn input_bam_convert_region_to_bed3_invalid_region() {
let mut input_bam = InputBam {
region: Some(GenomicRegion::from_str("chr2:4400-4600").expect("no error")),
..Default::default()
};
let header_view = bam::HeaderView::from_bytes(indoc! {b"@HD\tVN:1.6\tSO:coordinate
@SQ\tSN:chr1\tLN:3000
@SQ\tSN:chr2\tLN:4000\n"});
input_bam.convert_region_to_bed3(header_view).unwrap();
}
#[test]
#[should_panic(expected = "InvalidRegion")]
fn input_bam_convert_region_to_bed3_invalid_open_ended_region() {
let mut input_bam = InputBam {
region: Some(GenomicRegion::from_str("chr2:4600-").expect("no error")),
..Default::default()
};
let header_view = bam::HeaderView::from_bytes(indoc! {b"@HD\tVN:1.6\tSO:coordinate
@SQ\tSN:chr1\tLN:3000
@SQ\tSN:chr2\tLN:4000\n"});
input_bam.convert_region_to_bed3(header_view).unwrap();
}
#[test]
#[should_panic(expected = "InvalidAlignCoords")]
fn input_bam_convert_region_to_bed3_invalid_contig() {
let mut input_bam = InputBam {
region: Some(GenomicRegion::from_str("chr3:1000-2000").expect("no error")),
..Default::default()
};
let header_view = bam::HeaderView::from_bytes(indoc! {b"@HD\tVN:1.6\tSO:coordinate
@SQ\tSN:chr1\tLN:3000
@SQ\tSN:chr2\tLN:4000\n"});
input_bam.convert_region_to_bed3(header_view).unwrap();
}
#[test]
fn input_bam_convert_region_to_bed3_already_set_bed3() {
let mut input_bam = InputBam {
region: None,
region_bed3: Some(Bed3::<i32, u64>::new(1, 1000, 2000)),
..Default::default()
};
let header_view = bam::HeaderView::from_bytes(indoc! {b"@HD\tVN:1.6\tSO:coordinate
@SQ\tSN:chr1\tLN:3000
@SQ\tSN:chr2\tLN:4000\n"});
input_bam.convert_region_to_bed3(header_view).unwrap();
assert!(input_bam.region_bed3.is_some());
let bed3 = input_bam.region_bed3.unwrap();
assert_eq!(bed3.chr(), &1);
assert_eq!(bed3.start(), 1000);
assert_eq!(bed3.end(), 2000);
}
#[test]
#[should_panic(expected = "InvalidState")]
fn input_bam_convert_region_to_bed3_both_set() {
let mut input_bam = InputBam {
region: Some(GenomicRegion::from_str("chr2:1500-2500").unwrap()),
region_bed3: Some(Bed3::<i32, u64>::new(1, 1000, 2000)),
..Default::default()
};
let header_view = bam::HeaderView::from_bytes(indoc! {b"@HD\tVN:1.6\tSO:coordinate
@SQ\tSN:chr1\tLN:3000
@SQ\tSN:chr2\tLN:4000\n"});
input_bam.convert_region_to_bed3(header_view).unwrap();
}
}
#[cfg(test)]
mod validate_builder_functions {
use super::*;
#[test]
#[should_panic(expected = "BuilderValidation")]
fn input_bam_builder_full_region_without_region_fails() {
let _: InputBam = InputBamBuilder::default()
.bam_path(PathOrURLOrStdin::Path("/some/path.bam".into()))
.full_region(true)
.build()
.unwrap();
}
#[test]
#[should_panic(expected = "BuilderValidation")]
fn input_bam_builder_both_region_and_region_bed3_fails() {
let _: InputBam = InputBamBuilder::default()
.bam_path(PathOrURLOrStdin::Path("/some/path.bam".into()))
.region("chr1:1000-2000".into())
.region_bed3(Bed3::<i32, u64>::new(0, 1000, 2000))
.build()
.unwrap();
}
#[test]
#[should_panic(expected = "BuilderValidation")]
fn input_bam_builder_read_id_and_read_id_list_fails() {
let _: InputBam = InputBamBuilder::default()
.bam_path(PathOrURLOrStdin::Path("/some/path.bam".into()))
.read_id("some-id")
.read_id_list("/some/file.txt")
.build()
.unwrap();
}
#[test]
#[should_panic(expected = "BuilderValidation")]
fn input_bam_builder_read_id_and_read_id_set_fails() {
let mut read_ids = HashSet::new();
let _: bool = read_ids.insert("read1".to_owned());
let _: InputBam = InputBamBuilder::default()
.bam_path(PathOrURLOrStdin::Path("/some/path.bam".into()))
.read_id("some-id")
.read_id_set(read_ids)
.build()
.unwrap();
}
#[test]
#[should_panic(expected = "BuilderValidation")]
fn input_bam_builder_read_id_list_and_read_id_set_fails() {
let mut read_ids = HashSet::new();
let _: bool = read_ids.insert("read1".to_owned());
let _: InputBam = InputBamBuilder::default()
.bam_path(PathOrURLOrStdin::Path("/some/path.bam".into()))
.read_id_list("/some/file.txt")
.read_id_set(read_ids)
.build()
.unwrap();
}
#[test]
#[should_panic(expected = "BuilderValidation")]
fn input_bam_builder_all_three_read_id_options_fails() {
let mut read_ids = HashSet::new();
let _: bool = read_ids.insert("read1".to_owned());
let _: InputBam = InputBamBuilder::default()
.bam_path(PathOrURLOrStdin::Path("/some/path.bam".into()))
.read_id("some-id")
.read_id_list("/some/file.txt")
.read_id_set(read_ids)
.build()
.unwrap();
}
#[test]
#[should_panic(expected = "BuilderValidation")]
fn input_mods_builder_both_mod_region_and_region_bed3_fails() {
let _: InputMods<OptionalTag> = InputModsBuilder::<OptionalTag>::default()
.mod_prob_filter(ThresholdState::GtEq(0))
.mod_region("chr1:1000-2000".into())
.region_bed3(Bed3::<i32, u64>::new(0, 1000, 2000))
.build()
.unwrap();
}
#[test]
#[should_panic(expected = "BuilderValidation")]
fn input_mods_builder_required_tag_both_regions_fails() {
let _: InputMods<RequiredTag> = InputModsBuilder::<RequiredTag>::default()
.tag(RequiredTag::from_str("m").unwrap())
.mod_prob_filter(ThresholdState::GtEq(0))
.mod_region("chr1:1000-2000".into())
.region_bed3(Bed3::<i32, u64>::new(0, 1000, 2000))
.build()
.unwrap();
}
}