fibertools_rs/utils/
input_bam.rs1use crate::cli;
2use crate::fiber::FiberseqRecords;
3use crate::utils::bio_io;
4use clap::{Args, ValueHint};
5use rust_htslib::bam;
6use rust_htslib::bam::Read;
7use std::fmt::Debug;
8
9pub static MIN_ML_SCORE: &str = "125";
10
11#[derive(Debug, Args, Clone)]
14pub struct FiberFilters {
15 #[clap(
17 global = true,
18 short = 'F',
19 long = "filter",
20 default_value = "0",
21 help_heading = "BAM-Options"
22 )]
23 pub bit_flag: u16,
24 #[clap(
33 global = true,
34 short = 'x',
35 long = "ftx",
36 alias = "ft-expression",
37 help_heading = "BAM-Options"
38 )]
39 pub filter_expression: Option<String>,
40 #[clap(long="ml", alias="min-ml-score", default_value = MIN_ML_SCORE, help_heading = "BAM-Options", env="FT_MIN_ML_SCORE")]
42 pub min_ml_score: u8,
43 #[clap(
45 global = true,
46 long,
47 default_value = "0",
48 help_heading = "BAM-Options",
49 hide = true
50 )]
51 pub strip_starting_basemods: i64,
52}
53
54impl std::default::Default for FiberFilters {
55 fn default() -> Self {
56 Self {
57 bit_flag: 0,
58 min_ml_score: MIN_ML_SCORE.parse().unwrap(),
59 filter_expression: None,
60 strip_starting_basemods: 0,
61 }
62 }
63}
64
65impl FiberFilters {
66 pub fn filter_on_bit_flags<'a, I>(
68 &'a self,
69 records: I,
70 ) -> impl Iterator<Item = bam::Record> + 'a
71 where
72 I: IntoIterator<Item = Result<bam::Record, rust_htslib::errors::Error>> + 'a,
73 {
74 records
75 .into_iter()
76 .map(|r| r.expect("htslib is unable to read a record in the input."))
77 .filter(|r| {
78 (r.flags() & self.bit_flag) == 0
80 })
81 }
82}
83
84#[derive(Debug, Args)]
87pub struct InputBam {
88 #[clap(default_value = "-", value_hint = ValueHint::AnyPath)]
90 pub bam: String,
91 #[clap(flatten)]
92 pub filters: FiberFilters,
93 #[clap(flatten)]
94 pub global: cli::GlobalOpts,
95 #[clap(skip)]
97 pub header: Option<bam::Header>,
98}
99
100impl InputBam {
101 pub fn bam_reader(&mut self) -> bam::Reader {
102 let mut bam = bio_io::bam_reader(&self.bam);
103 bam.set_threads(self.global.threads)
104 .expect("unable to set threads for bam reader");
105 self.header = Some(bam::Header::from_template(bam.header()));
106 bam
107 }
108
109 pub fn indexed_bam_reader(&mut self) -> bam::IndexedReader {
110 if &self.bam == "-" {
111 panic!("Cannot use stdin (\"-\") for indexed bam reading. Please provide a file path for the bam file.");
112 }
113
114 let mut bam =
115 bam::IndexedReader::from_path(&self.bam).expect("unable to open indexed bam file");
116 self.header = Some(bam::Header::from_template(bam.header()));
117 bam.set_threads(self.global.threads).unwrap();
118 bam
119 }
120
121 pub fn fibers<'a>(&self, bam: &'a mut bam::Reader) -> FiberseqRecords<'a> {
122 FiberseqRecords::new(bam, self.filters.clone())
123 }
124
125 pub fn header_view(&self) -> bam::HeaderView {
126 bam::HeaderView::from_header(self.header.as_ref().expect(
127 "Input bam must be opened before opening the header or creating a writer with the input bam as a template.",
128 ))
129 }
130
131 pub fn header(&self) -> bam::Header {
132 bam::Header::from_template(&self.header_view())
133 }
134
135 pub fn bam_writer(&self, out: &str) -> bam::Writer {
136 let header = self.header();
137 let program_name = "fibertools-rs";
138 let program_id = "ft";
139 let program_version = crate::VERSION;
140 let mut out = crate::utils::bio_io::program_bam_writer_from_header(
141 out,
142 header,
143 program_name,
144 program_id,
145 program_version,
146 );
147 out.set_threads(self.global.threads)
148 .expect("unable to set threads for bam writer");
149 out
150 }
151}
152
153impl std::default::Default for InputBam {
154 fn default() -> Self {
155 Self {
156 bam: "-".to_string(),
157 filters: FiberFilters::default(),
158 global: cli::GlobalOpts::default(),
159 header: None,
160 }
161 }
162}