1pub mod cli;
2pub mod clipping;
3
4use cli::Parser;
5use clipping::ClipStat;
6
7use log::{debug, info};
8use rust_htslib::{
9 bam,
10 bam::{record::CigarStringView, Header, Read, Reader, Record},
11};
12
13pub fn run(
54 in_bam: String,
55 out_bam: String,
56 inverse: bool,
57 both_end: f64,
58 left_side: f64,
59 right_side: f64,
60 unalign: bool,
61) -> Result<u8, String> {
62 let mut out_count: u32 = 0;
63 let mut in_count: u32 = 0;
64 let mut unaligned_count: u32 = 0;
65 info!("Reading from alignment file: {}", in_bam);
66 info!("Writing to alignment file: {}", out_bam);
67 info!(
68 "Thresholds: trailing clipped: {}, leading clipped: {}, total clipped: {}",
69 right_side, left_side, both_end
70 );
71 let mut in_bam: Reader = match in_bam.eq("-") {
72 true => bam::Reader::from_stdin().map_err(|e| e.to_string())?,
73 _ => bam::Reader::from_path(&in_bam).map_err(|e| e.to_string())?,
74 };
75 let header: Header = bam::Header::from_template(in_bam.header());
76
77 let mut out_bam = match out_bam.eq("-") {
78 true => bam::Writer::from_stdout(&header, bam::Format::Bam).map_err(|e| e.to_string())?,
79 _ => bam::Writer::from_path(&out_bam, &header, bam::Format::Bam)
80 .map_err(|e| e.to_string())?,
81 };
82
83 for r in in_bam.records() {
84 in_count += 1;
85 let mut record: Record = r.map_err(|e| e.to_string())?;
86 let seq_len: f64 = record.seq().len() as f64;
87 let cigar: CigarStringView = record.cigar();
88
89 let leading_clipped: Vec<i64> = vec![cigar.leading_softclips(), cigar.leading_hardclips()];
90 let trailing_cliped: Vec<i64> =
91 vec![cigar.trailing_softclips(), cigar.trailing_hardclips()];
92
93 let clip_stat: ClipStat = ClipStat::new(leading_clipped, trailing_cliped);
94
95 let keep: bool = clip_stat.total_fraction(seq_len)? < both_end
96 && clip_stat.left_fraction(seq_len)? <= left_side
97 && clip_stat.right_fraction(seq_len)? <= right_side;
98
99 debug!("{:?} {}", clip_stat, seq_len);
100 if !(unalign) {
101 if (keep && !inverse) || (inverse && !keep) {
102 out_bam.write(&record).map_err(|e| e.to_string())?;
103 out_count += 1;
104 }
105 } else {
106 if keep {
107 out_bam.write(&record).map_err(|e| e.to_string())?;
108 } else {
109 record.set_unmapped();
110 record.unset_reverse();
111 record.unset_proper_pair();
112 record.set_tid(-1);
113 record.set_pos(-1);
114 out_bam.write(&record).map_err(|e| e.to_string())?;
115 unaligned_count += 1
116 }
117 out_count += 1;
118 }
119 }
120 info!(
121 "Read {} alignments; Written {} alignments; Making {} to unaligned",
122 in_count, out_count, unaligned_count,
123 );
124 Ok(0) }
126
127pub fn wrapper() {
130 let args = cli::Command::parse();
131 let result = run(
132 args.in_bam,
133 args.out_bam,
134 args.inverse,
135 args.both_end,
136 args.left_side,
137 args.right_side,
138 args.unalign,
139 );
140 match result {
141 Ok(_) => (),
142 Err(err) => println!("{}", err),
143 };
144}
145
146#[cfg(test)]
147mod tests {
148 use super::*;
149 use rstest::rstest;
150 use std::string::String;
151
152 fn count_bam(bam_file: String, expected_count: i32, expected_unaligned: i32) {
153 let mut bam_reader = bam::Reader::from_path(bam_file).unwrap();
154 let mut aln_count = 0;
155 let mut unaligned_count = 0;
156 for r in bam_reader.records() {
157 let _record = r.unwrap();
158 aln_count += 1;
159 if _record.is_unmapped() {
160 unaligned_count += 1;
161 }
162 }
163 assert_eq!(expected_count, aln_count);
164
165 if expected_unaligned > 0 {
166 assert_eq!(expected_unaligned, unaligned_count);
167 }
168 }
169
170 #[rstest]
171 #[case(1, 0.2, 0.3, true, 0, 0, false)]
172 #[case(2, 0.2, 0.3, false, 9, 0, false)]
173 #[case(3, 0.1, 0.1, false, 6, 0, false)]
174 #[case(4, 0.1, 0.1, false, 9, 3, true)]
175 #[case(4, 0.1, 0.1, false, 9, 3, true)]
176 fn test_run(
177 #[case] test_case: usize,
178 #[case] max_both_end: f64,
179 #[case] max_single_end: f64,
180 #[case] inverse: bool,
181 #[case] expected_count: i32,
182 #[case] expected_unaligned: i32,
183 #[case] unalign: bool,
184 ) {
185 let out_bam: &str = &format!("test/data/out_{}.bam", test_case);
186 let result = run(
187 "test/data/test.sam".to_string(),
188 out_bam.to_string(),
189 inverse,
190 max_both_end,
191 max_single_end,
192 max_single_end,
193 unalign,
194 )
195 .unwrap();
196 assert_eq!(result, 0);
197 count_bam(out_bam.to_string(), expected_count, expected_unaligned);
198 }
199}