filter_clipped/clipping.rs
1#[derive(Debug)]
2/// An object to store statistics for base clipping on
3/// an alignment
4pub struct ClipStat {
5 /// number of bases from the 5' end being clipped
6 left: i64,
7 /// number of bases from the 3' end being clipped
8 right: i64,
9 /// total number of clipped bases on the alignment,
10 /// this should be the sum of left and right
11 total_clipped: i64,
12}
13
14/// Helper function to find the maximum value in a list
15///
16/// # Arguments
17/// * `clip_vec`: a list of integers
18///
19/// # Return
20/// * the maximum value from the list, 0 if it's None
21///
22/// # Examples
23/// ```
24/// use filter_clipped::clipping::vec_to_max;
25/// let list_of_numbers = vec![0,1,2,3];
26/// assert_eq!(3, vec_to_max(list_of_numbers));
27/// ```
28pub fn vec_to_max(clip_vec: Vec<i64>) -> i64 {
29 let max_clip = clip_vec.iter().max();
30 match max_clip {
31 Some(n) => *n,
32 _ => 0,
33 }
34}
35
36/// Helper function to calculate a fraction given two numbers
37///
38/// # Arguments
39/// * `n_base`: the numerator in the fraction
40/// * `seq_len`: the denominator in the fraction
41///
42/// # Return
43/// * fraction: n_base / seq_len
44///
45/// # Examples
46/// ```
47/// use filter_clipped::clipping::nbase_to_frac;
48/// assert_eq!(nbase_to_frac(10, 10.0).unwrap(), 1.0)
49/// ```
50pub fn nbase_to_frac(n_base: i64, seq_len: f64) -> Result<f64, String> {
51 if seq_len < 1.0 {
52 Err(String::from("seq_len must be greater than 0"))
53 } else {
54 Ok(n_base as f64 / seq_len)
55 }
56}
57
58impl ClipStat {
59 /// Creat a new ClipStat object for an alignment
60 ///
61 /// # Arguments
62 /// * `leading_clipped`: a list of [number of 5' soft clipped bases, number of 5' hard clipped bases]
63 /// * `trailing_clipped`: a list of [number of 3' soft clipped bases, number of 3' hard clipped bases]
64 ///
65 /// # Return:
66 /// A ClipStat object
67 ///
68 /// # Example
69 /// ```
70 /// use filter_clipped::clipping::ClipStat;
71 /// let clip_stat = ClipStat::new(
72 /// vec![0,1],
73 /// vec![0,2],
74 /// );
75 /// assert_eq!(clip_stat.left(), 1);
76 /// assert_eq!(clip_stat.right(), 2);
77 /// assert_eq!(clip_stat.total_clipped(), 3);
78 /// ```
79 pub fn new(leading_clipped: Vec<i64>, trailing_clipped: Vec<i64>) -> Self {
80 let all_clipped =
81 leading_clipped.iter().sum::<i64>() + trailing_clipped.iter().sum::<i64>();
82
83 Self {
84 left: vec_to_max(leading_clipped),
85 right: vec_to_max(trailing_clipped),
86 total_clipped: all_clipped,
87 }
88 }
89
90 /// Return the fraction of 3' clipped base relative to the sequence length
91 ///
92 /// # Argument
93 /// * `seq_len`: sequence length of the alignment
94 ///
95 /// # Return:
96 /// * `f64` fraction of 3' clipped base
97 ///
98 /// # Example
99 /// ```
100 /// use filter_clipped::clipping::ClipStat;
101 /// let clip_stat = ClipStat::new(
102 /// vec![0,1],
103 /// vec![0,2],
104 /// );
105 /// assert_eq!(clip_stat.right_fraction(10.0).unwrap(), 0.2);
106 /// ```
107 pub fn right_fraction(&self, seq_len: f64) -> Result<f64, String> {
108 nbase_to_frac(self.right, seq_len)
109 }
110
111 /// Return the fraction of 5' clipped base relative to the sequence length
112 ///
113 /// # Argument
114 /// * `seq_len`: sequence length of the alignment
115 ///
116 /// # Return:
117 /// * `f64` fraction of 5' clipped base
118 ///
119 /// # Example
120 /// ```
121 /// use filter_clipped::clipping::ClipStat;
122 /// let clip_stat = ClipStat::new(
123 /// vec![0,1],
124 /// vec![0,2],
125 /// );
126 /// assert_eq!(clip_stat.left_fraction(10.0).unwrap(), 0.1);
127 /// ```
128 pub fn left_fraction(&self, seq_len: f64) -> Result<f64, String> {
129 nbase_to_frac(self.left, seq_len)
130 }
131 /// Return the fraction of total clipped base relative to the sequence length
132 ///
133 /// # Argument
134 /// * `seq_len`: sequence length of the alignment
135 ///
136 /// # Return:
137 /// * `f64` fraction of 5' clipped base
138 ///
139 /// # Example
140 /// ```
141 /// use filter_clipped::clipping::ClipStat;
142 /// let clip_stat = ClipStat::new(
143 /// vec![0,1],
144 /// vec![0,2],
145 /// );
146 /// assert_eq!(clip_stat.total_fraction(10.0).unwrap(), 0.3);
147 /// ```
148 pub fn total_fraction(&self, seq_len: f64) -> Result<f64, String> {
149 nbase_to_frac(self.total_clipped, seq_len)
150 }
151
152 /// Expose left
153 /// # Example
154 /// ```
155 /// use filter_clipped::clipping::ClipStat;
156 /// let clip_stat = ClipStat::new(
157 /// vec![0,1],
158 /// vec![0,2],
159 /// );
160 /// assert_eq!(clip_stat.left(), 1);
161 /// ```
162 pub fn left(&self) -> i64 {
163 self.left
164 }
165
166 /// Expose right
167 /// # Example
168 /// ```
169 /// use filter_clipped::clipping::ClipStat;
170 /// let clip_stat = ClipStat::new(
171 /// vec![0,1],
172 /// vec![0,2],
173 /// );
174 /// assert_eq!(clip_stat.right(), 2);
175 /// ```
176 pub fn right(&self) -> i64 {
177 self.right
178 }
179
180 /// Expose total_clipped
181 /// # Example
182 /// ```
183 /// use filter_clipped::clipping::ClipStat;
184 /// let clip_stat = ClipStat::new(
185 /// vec![0,1],
186 /// vec![0,2],
187 /// );
188 /// assert_eq!(clip_stat.total_clipped(), 3);
189 /// ```
190 pub fn total_clipped(&self) -> i64 {
191 self.total_clipped
192 }
193}
194
195#[cfg(test)]
196mod tests {
197 use super::*;
198 use rstest::rstest;
199
200 #[rstest]
201 #[case(vec![2,0], vec![0,2], 0.2, 0.2, 0.4)]
202 #[case(vec![1,0], vec![0,2], 0.2, 0.1, 0.3)]
203 fn test_clip_stat(
204 #[case] leading_clipped: Vec<i64>,
205 #[case] trailing_cliped: Vec<i64>,
206 #[case] expected_r_frac: f64,
207 #[case] expected_l_frac: f64,
208 #[case] expected_total_frac: f64,
209 ) {
210 let seq_len = 10.0;
211 let clip_stat = ClipStat::new(leading_clipped, trailing_cliped);
212 assert_eq!(expected_r_frac, clip_stat.right_fraction(seq_len).unwrap());
213 assert_eq!(expected_l_frac, clip_stat.left_fraction(seq_len).unwrap());
214 assert_eq!(
215 expected_total_frac,
216 clip_stat.total_fraction(seq_len).unwrap()
217 );
218 }
219
220 #[rstest]
221 #[case(vec![2,3,0], 3)]
222 #[case(vec![1,2,3], 3)]
223 #[case(vec![1,0], 1)]
224 fn test_vec_to_max(#[case] input_vec: Vec<i64>, #[case] expected_out: i64) {
225 assert_eq!(expected_out, vec_to_max(input_vec));
226 }
227
228 #[rstest]
229 #[case(10, 20.0, 0.5)]
230 #[case(10, 40.0, 0.25)]
231 #[case(2, 40.0, 0.05)]
232 fn test_nbase_to_frac(#[case] n_base: i64, #[case] seq_len: f64, #[case] expected_out: f64) {
233 assert_eq!(nbase_to_frac(n_base, seq_len).unwrap(), expected_out);
234 }
235}