nanalogue_core/utils/threshold_state.rs
1//! `ThresholdState` enum for modification probability thresholds
2//! Handles different threshold types for modification data filtering
3
4use super::contains::Contains;
5use super::f32_bw0and1::F32Bw0and1;
6use super::ord_pair::OrdPair;
7use crate::Error;
8use serde::{Deserialize, Serialize};
9use std::{fmt, str::FromStr as _};
10
11/// Types of thresholds on modification level that can be applied to modification data.
12/// Two possible use cases: (1) to specify that reading mod data should be restricted
13/// to bases at least this level of modified, or (2) to specify that only bases
14/// in this range should be regarded as modified.
15/// Values are 0 to 255 below as that's how they are stored in a modBAM file and
16/// this struct is expected to be used in contexts dealing directly with this data.
17#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
18#[non_exhaustive]
19pub enum ThresholdState {
20 /// modification probability >= this value, values are 0 to 255
21 GtEq(u8),
22 /// modification probability not within this range.
23 /// We expect this to be used to filter out modification calls
24 /// around 0.5 i.e. ones with the most uncertainty, although
25 /// users of this crate are free to set this to an interval
26 /// not including 0.5
27 InvertGtEqLtEq(OrdPair<u8>),
28 /// modification probability >= first value, and mod prob
29 /// not within the second range i.e. the 'and' combination
30 /// of the two possibilities above
31 Both((u8, OrdPair<u8>)),
32}
33
34/// default threshold is >= 0 i.e. all mods are allowed
35impl Default for ThresholdState {
36 fn default() -> Self {
37 ThresholdState::GtEq(0)
38 }
39}
40
41/// Displays thresholds but using floating point numbers between 0 and 1
42///
43/// Example 1:
44/// ```
45/// use nanalogue_core::{ThresholdState, OrdPair};
46/// let b = ThresholdState::GtEq(100);
47/// assert_eq!("probabilities >= 0.3922", format!("{}", b));
48/// ```
49/// Example 2:
50/// ```
51/// # use nanalogue_core::{ThresholdState, OrdPair};
52/// let b = ThresholdState::InvertGtEqLtEq(OrdPair::new(200, 220).expect("no error"));
53/// assert_eq!("probabilities < 0.7843 or > 0.8627", format!("{}", b));
54/// ```
55///
56/// Example 3:
57/// ```
58/// # use nanalogue_core::{ThresholdState, OrdPair};
59/// let b = ThresholdState::Both((100, OrdPair::new(200, 220).expect("no error")));
60/// assert_eq!("probabilities >= 0.3922 and (probabilities < 0.7843 or > 0.8627)", format!("{}", b));
61/// ```
62impl fmt::Display for ThresholdState {
63 /// display the u8 thresholds as a floating point number between 0 and 1
64 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
65 let printable = match *self {
66 ThresholdState::GtEq(v) => format!("probabilities >= {:.4}", F32Bw0and1::from(v)),
67 ThresholdState::InvertGtEqLtEq(v) => {
68 format!(
69 "probabilities < {:.4} or > {:.4}",
70 F32Bw0and1::from(v.low()),
71 F32Bw0and1::from(v.high())
72 )
73 }
74 ThresholdState::Both((a, b)) => {
75 format!(
76 "{:.4} and ({:.4})",
77 ThresholdState::GtEq(a),
78 ThresholdState::InvertGtEqLtEq(b)
79 )
80 }
81 };
82 write!(f, "{printable}")
83 }
84}
85
86/// Check if a given u8 is within the interval covered
87///
88/// Example 1:
89/// ```
90/// use nanalogue_core::{Error, OrdPair, ThresholdState, Contains};
91/// let b = ThresholdState::GtEq(100);
92/// assert!(b.contains(&101));
93/// assert!(b.contains(&100));
94/// assert!(!b.contains(&99));
95/// assert!(!b.contains(&0));
96/// ```
97/// Example 2:
98/// ```
99/// # use nanalogue_core::{Error, OrdPair, ThresholdState, Contains};
100/// let b = ThresholdState::InvertGtEqLtEq(OrdPair::new(200, 220).expect("no error"));
101/// assert!(b.contains(&0));
102/// assert!(b.contains(&100));
103/// assert!(!b.contains(&200));
104/// assert!(!b.contains(&210));
105/// assert!(!b.contains(&220));
106/// assert!(b.contains(&250));
107/// ```
108/// Example 3:
109/// ```
110/// # use nanalogue_core::{Error, OrdPair, ThresholdState, Contains};
111/// let b = ThresholdState::Both((100, OrdPair::new(200, 220).expect("no error")));
112/// assert!(!b.contains(&0));
113/// assert!(!b.contains(&99));
114/// assert!(b.contains(&100));
115/// assert!(b.contains(&101));
116/// assert!(!b.contains(&200));
117/// assert!(!b.contains(&210));
118/// assert!(!b.contains(&220));
119/// assert!(b.contains(&250));
120/// ```
121impl Contains<u8> for ThresholdState {
122 /// see if value is contained within the interval
123 /// specified by the threshold state
124 fn contains(&self, val: &u8) -> bool {
125 match *self {
126 ThresholdState::GtEq(v) => *val >= v,
127 ThresholdState::InvertGtEqLtEq(w) => !w.contains(val),
128 ThresholdState::Both((a, b)) => {
129 ThresholdState::GtEq(a).contains(val)
130 && ThresholdState::InvertGtEqLtEq(b).contains(val)
131 }
132 }
133 }
134}
135
136/// Converts from `OrdPair<F32Bw0and1>` to `ThresholdState::InvertGtEqLtEq`
137///
138/// Example
139/// ```
140/// use nanalogue_core::{F32Bw0and1, OrdPair, ThresholdState};
141/// use std::str::FromStr;
142/// let b: ThresholdState = OrdPair::<F32Bw0and1>::from_str("0.4,0.6")?.into();
143/// assert_eq!(b, ThresholdState::InvertGtEqLtEq(OrdPair::<u8>::new(102u8, 153u8)?));
144/// # Ok::<(), nanalogue_core::Error>(())
145/// ```
146impl From<OrdPair<F32Bw0and1>> for ThresholdState {
147 fn from(value: OrdPair<F32Bw0and1>) -> Self {
148 let low: u8 = value.low().into();
149 let high: u8 = value.high().into();
150 ThresholdState::InvertGtEqLtEq(OrdPair::<u8>::new(low, high).expect("no error"))
151 }
152}
153
154impl ThresholdState {
155 /// Converts a pair of fractions e.g. "0.4,0.6" into a `ThresholdState::InvertGtEqLtEq`, and
156 /// an empty string to the all-permitted `ThresholdState::GtEq(0)`.
157 ///
158 /// Used to set up a filter to reject mod calls whose probabilities lie in a band.
159 /// This can be used to reject low-quality calls for example which lie around 0.5.
160 ///
161 /// We've elected to not write a `std::str::FromStr` implementation for `ThresholdState`
162 /// as the enum is quite complex, generating it from a string is not very user friendly.
163 ///
164 /// # Errors
165 /// String not empty and not in the format of low,high where low and high are
166 /// numbers from 0 to 1, both included
167 ///
168 /// # Examples
169 ///
170 /// Simple example
171 ///
172 /// ```
173 /// use nanalogue_core::ThresholdState;
174 /// let a = ThresholdState::from_str_ordpair_fraction("0.4,0.6")?;
175 /// assert_eq!(a, ThresholdState::InvertGtEqLtEq((102u8, 153u8).try_into()?));
176 /// # Ok::<(), nanalogue_core::Error>(())
177 /// ```
178 ///
179 /// Empty string should generate no filter
180 ///
181 /// ```
182 /// use nanalogue_core::ThresholdState;
183 /// let a = ThresholdState::from_str_ordpair_fraction("")?;
184 /// assert_eq!(a, ThresholdState::GtEq(0));
185 /// # Ok::<(), nanalogue_core::Error>(())
186 /// ```
187 pub fn from_str_ordpair_fraction(value: &str) -> Result<ThresholdState, Error> {
188 if value.is_empty() {
189 // allow all mods irrespective of their probabilities
190 Ok(ThresholdState::GtEq(0))
191 } else {
192 let result: ThresholdState = OrdPair::<F32Bw0and1>::from_str(value)?.into();
193 Ok(result)
194 }
195 }
196}
197
198#[cfg(test)]
199mod tests {
200 use super::*;
201
202 #[test]
203 fn threshold_state_gt_eq() {
204 let threshold = ThresholdState::GtEq(100);
205
206 // Test contains functionality
207 assert!(threshold.contains(&101));
208 assert!(threshold.contains(&100));
209 assert!(!threshold.contains(&99));
210
211 // Test display; 100/255 is approx 0.3922
212 let display_str = format!("{threshold}");
213 assert!(display_str.contains("probabilities >= 0.3922"));
214 }
215
216 #[test]
217 fn threshold_state_invert_gt_eq_lt_eq() {
218 let pair = OrdPair::new(200, 220).expect("should create");
219 let threshold = ThresholdState::InvertGtEqLtEq(pair);
220
221 // Test contains functionality
222 assert!(threshold.contains(&0)); // zero is outside range
223 assert!(threshold.contains(&100)); // outside range (below)
224 assert!(!threshold.contains(&200)); // within range (boundary)
225 assert!(!threshold.contains(&210)); // within range (middle)
226 assert!(!threshold.contains(&220)); // within range (boundary)
227 assert!(threshold.contains(&250)); // outside range (above)
228
229 // Test display
230 let display_str = format!("{threshold}");
231 assert!(display_str.contains("probabilities <"));
232 assert!(display_str.contains("or >"));
233 }
234
235 #[test]
236 fn threshold_state_both() {
237 let pair = OrdPair::new(200, 220).expect("should create");
238 let threshold = ThresholdState::Both((100, pair));
239
240 // Test contains functionality
241 assert!(!threshold.contains(&0)); // fails zero
242 assert!(!threshold.contains(&99)); // fails first condition
243 assert!(threshold.contains(&100)); // meets both conditions
244 assert!(threshold.contains(&101)); // meets both conditions
245 assert!(!threshold.contains(&200)); // fails second condition
246 assert!(!threshold.contains(&210)); // fails second condition
247 assert!(!threshold.contains(&220)); // fails second condition
248 assert!(threshold.contains(&250)); // meets both conditions
249
250 // Test display
251 let display_str = format!("{threshold}");
252 assert!(display_str.contains("and"));
253 assert!(display_str.contains("probabilities >="));
254 }
255
256 #[test]
257 fn threshold_state_default() {
258 let default_threshold = ThresholdState::default();
259 assert!(matches!(default_threshold, ThresholdState::GtEq(0)));
260
261 // Default should accept all values
262 for val in 0..=255u8 {
263 assert!(default_threshold.contains(&val));
264 }
265 }
266
267 #[test]
268 fn threshold_state_display_consistency() {
269 // Test that display format is consistent and meaningful
270 let thresholds = vec![
271 ThresholdState::GtEq(128),
272 ThresholdState::InvertGtEqLtEq(OrdPair::new(100, 150).expect("should create")),
273 ThresholdState::Both((50, OrdPair::new(120, 140).expect("should create"))),
274 ];
275
276 for threshold in thresholds {
277 let display_str = format!("{threshold}");
278 assert!(display_str.contains("probabilities"));
279 assert!(!display_str.is_empty());
280 }
281 }
282
283 #[test]
284 fn threshold_state_edge_cases() {
285 // Test boundary conditions
286 let threshold_255 = ThresholdState::GtEq(255);
287 assert!(threshold_255.contains(&255));
288 assert!(!threshold_255.contains(&254));
289
290 let threshold_0 = ThresholdState::GtEq(0);
291 assert!(threshold_0.contains(&0));
292 assert!(threshold_0.contains(&255));
293
294 // Test single-value range
295 let single_val_pair = OrdPair::new(128, 129).expect("should create");
296 let threshold_single = ThresholdState::InvertGtEqLtEq(single_val_pair);
297 assert!(threshold_single.contains(&127));
298 assert!(!threshold_single.contains(&128));
299 assert!(!threshold_single.contains(&129));
300 assert!(threshold_single.contains(&130));
301 }
302
303 /// Converts from `OrdPair<F32Bw0and1>` to `ThresholdState::InvertGtEqLtEq`
304 #[test]
305 fn threshold_state_from_ordpair_f32bw0and1() {
306 use std::str::FromStr as _;
307 let b: ThresholdState = OrdPair::<F32Bw0and1>::from_str("0.4,0.6")
308 .expect("should parse")
309 .into();
310 assert_eq!(
311 b,
312 ThresholdState::InvertGtEqLtEq(
313 OrdPair::<u8>::new(102u8, 153u8).expect("should create")
314 )
315 );
316 }
317
318 /// Converts a pair of fractions e.g. "0.4,0.6" into a `ThresholdState::InvertGtEqLtEq`
319 #[test]
320 fn threshold_state_from_str_ordpair_fraction_simple() {
321 let a = ThresholdState::from_str_ordpair_fraction("0.4,0.6").expect("should parse");
322 assert_eq!(
323 a,
324 ThresholdState::InvertGtEqLtEq((102u8, 153u8).try_into().expect("should create"))
325 );
326 }
327
328 /// Empty string should generate no filter (all-permitted `ThresholdState::GtEq(0)`)
329 #[test]
330 fn threshold_state_from_str_ordpair_fraction_empty_string() {
331 let a = ThresholdState::from_str_ordpair_fraction("").expect("should parse");
332 assert_eq!(a, ThresholdState::GtEq(0));
333 }
334
335 #[test]
336 fn threshold_state_from_str_ordpair_fraction_error_cases() {
337 // Test invalid format - should error
338 let _: Error = ThresholdState::from_str_ordpair_fraction("invalid").unwrap_err();
339 let _: Error = ThresholdState::from_str_ordpair_fraction("0.5").unwrap_err();
340 let _: Error = ThresholdState::from_str_ordpair_fraction("0.6,0.4").unwrap_err(); // wrong order
341 let _: Error = ThresholdState::from_str_ordpair_fraction("1.5,2.0").unwrap_err(); // out of range
342 }
343
344 /// Tests conversion from `OrdPair<F32Bw0and1>` to `ThresholdState::InvertGtEqLtEq`
345 #[test]
346 fn threshold_state_from_ordpair_f32bw0and1_conversion() {
347 use std::str::FromStr as _;
348
349 // Test basic conversion with 0.4,0.6
350 let pair1 = OrdPair::<F32Bw0and1>::from_str("0.4,0.6").expect("should parse");
351 let threshold1: ThresholdState = pair1.into();
352 assert_eq!(
353 threshold1,
354 ThresholdState::InvertGtEqLtEq(
355 OrdPair::<u8>::new(102u8, 153u8).expect("should create")
356 )
357 );
358
359 // Test with edge values 0.0,1.0
360 let pair2 = OrdPair::<F32Bw0and1>::from_str("0.0,1.0").expect("should parse");
361 let threshold2: ThresholdState = pair2.into();
362 assert_eq!(
363 threshold2,
364 ThresholdState::InvertGtEqLtEq(OrdPair::<u8>::new(0u8, 255u8).expect("should create"))
365 );
366
367 // Test with mid-range values 0.5,0.7
368 let pair3 = OrdPair::<F32Bw0and1>::from_str("0.5,0.7").expect("should parse");
369 let threshold3: ThresholdState = pair3.into();
370 assert!(
371 matches!(threshold3, ThresholdState::InvertGtEqLtEq(_)),
372 "Expected InvertGtEqLtEq variant"
373 );
374 if let ThresholdState::InvertGtEqLtEq(ord_pair) = threshold3 {
375 // Verify the conversion is approximately correct
376 // 0.5 * 255 ≈ 127.5, 0.7 * 255 ≈ 178.5
377 assert!(ord_pair.low() >= 127 && ord_pair.low() <= 128);
378 assert!(ord_pair.high() >= 178 && ord_pair.high() <= 179);
379 }
380 }
381}