nanalogue_core/utils/
threshold_state.rs

1//! `ThresholdState` enum for modification probability thresholds
2//! Handles different threshold types for modification data filtering
3
4use super::contains::Contains;
5use super::f32_bw0and1::F32Bw0and1;
6use super::ord_pair::OrdPair;
7use crate::Error;
8use serde::{Deserialize, Serialize};
9use std::{fmt, str::FromStr as _};
10
11/// Types of thresholds on modification level that can be applied to modification data.
12/// Two possible use cases: (1) to specify that reading mod data should be restricted
13/// to bases at least this level of modified, or (2) to specify that only bases
14/// in this range should be regarded as modified.
15/// Values are 0 to 255 below as that's how they are stored in a modBAM file and
16/// this struct is expected to be used in contexts dealing directly with this data.
17#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
18#[non_exhaustive]
19pub enum ThresholdState {
20    /// modification probability >= this value, values are 0 to 255
21    GtEq(u8),
22    /// modification probability not within this range.
23    /// We expect this to be used to filter out modification calls
24    /// around 0.5 i.e. ones with the most uncertainty, although
25    /// users of this crate are free to set this to an interval
26    /// not including 0.5
27    InvertGtEqLtEq(OrdPair<u8>),
28    /// modification probability >= first value, and mod prob
29    /// not within the second range i.e. the 'and' combination
30    /// of the two possibilities above
31    Both((u8, OrdPair<u8>)),
32}
33
34/// default threshold is >= 0 i.e. all mods are allowed
35impl Default for ThresholdState {
36    fn default() -> Self {
37        ThresholdState::GtEq(0)
38    }
39}
40
41/// Displays thresholds but using floating point numbers between 0 and 1
42///
43/// Example 1:
44/// ```
45/// use nanalogue_core::{ThresholdState, OrdPair};
46/// let b = ThresholdState::GtEq(100);
47/// assert_eq!("probabilities >= 0.3922", format!("{}", b));
48/// ```
49/// Example 2:
50/// ```
51/// # use nanalogue_core::{ThresholdState, OrdPair};
52/// let b = ThresholdState::InvertGtEqLtEq(OrdPair::new(200, 220).expect("no error"));
53/// assert_eq!("probabilities < 0.7843 or > 0.8627", format!("{}", b));
54/// ```
55///
56/// Example 3:
57/// ```
58/// # use nanalogue_core::{ThresholdState, OrdPair};
59/// let b = ThresholdState::Both((100, OrdPair::new(200, 220).expect("no error")));
60/// assert_eq!("probabilities >= 0.3922 and (probabilities < 0.7843 or > 0.8627)", format!("{}", b));
61/// ```
62impl fmt::Display for ThresholdState {
63    /// display the u8 thresholds as a floating point number between 0 and 1
64    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
65        let printable = match *self {
66            ThresholdState::GtEq(v) => format!("probabilities >= {:.4}", F32Bw0and1::from(v)),
67            ThresholdState::InvertGtEqLtEq(v) => {
68                format!(
69                    "probabilities < {:.4} or > {:.4}",
70                    F32Bw0and1::from(v.low()),
71                    F32Bw0and1::from(v.high())
72                )
73            }
74            ThresholdState::Both((a, b)) => {
75                format!(
76                    "{:.4} and ({:.4})",
77                    ThresholdState::GtEq(a),
78                    ThresholdState::InvertGtEqLtEq(b)
79                )
80            }
81        };
82        write!(f, "{printable}")
83    }
84}
85
86/// Check if a given u8 is within the interval covered
87///
88/// Example 1:
89/// ```
90/// use nanalogue_core::{Error, OrdPair, ThresholdState, Contains};
91/// let b = ThresholdState::GtEq(100);
92/// assert!(b.contains(&101));
93/// assert!(b.contains(&100));
94/// assert!(!b.contains(&99));
95/// assert!(!b.contains(&0));
96/// ```
97/// Example 2:
98/// ```
99/// # use nanalogue_core::{Error, OrdPair, ThresholdState, Contains};
100/// let b = ThresholdState::InvertGtEqLtEq(OrdPair::new(200, 220).expect("no error"));
101/// assert!(b.contains(&0));
102/// assert!(b.contains(&100));
103/// assert!(!b.contains(&200));
104/// assert!(!b.contains(&210));
105/// assert!(!b.contains(&220));
106/// assert!(b.contains(&250));
107/// ```
108/// Example 3:
109/// ```
110/// # use nanalogue_core::{Error, OrdPair, ThresholdState, Contains};
111/// let b = ThresholdState::Both((100, OrdPair::new(200, 220).expect("no error")));
112/// assert!(!b.contains(&0));
113/// assert!(!b.contains(&99));
114/// assert!(b.contains(&100));
115/// assert!(b.contains(&101));
116/// assert!(!b.contains(&200));
117/// assert!(!b.contains(&210));
118/// assert!(!b.contains(&220));
119/// assert!(b.contains(&250));
120/// ```
121impl Contains<u8> for ThresholdState {
122    /// see if value is contained within the interval
123    /// specified by the threshold state
124    fn contains(&self, val: &u8) -> bool {
125        match *self {
126            ThresholdState::GtEq(v) => *val >= v,
127            ThresholdState::InvertGtEqLtEq(w) => !w.contains(val),
128            ThresholdState::Both((a, b)) => {
129                ThresholdState::GtEq(a).contains(val)
130                    && ThresholdState::InvertGtEqLtEq(b).contains(val)
131            }
132        }
133    }
134}
135
136/// Converts from `OrdPair<F32Bw0and1>` to `ThresholdState::InvertGtEqLtEq`
137///
138/// Example
139/// ```
140/// use nanalogue_core::{F32Bw0and1, OrdPair, ThresholdState};
141/// use std::str::FromStr;
142/// let b: ThresholdState = OrdPair::<F32Bw0and1>::from_str("0.4,0.6")?.into();
143/// assert_eq!(b, ThresholdState::InvertGtEqLtEq(OrdPair::<u8>::new(102u8, 153u8)?));
144/// # Ok::<(), nanalogue_core::Error>(())
145/// ```
146impl From<OrdPair<F32Bw0and1>> for ThresholdState {
147    fn from(value: OrdPair<F32Bw0and1>) -> Self {
148        let low: u8 = value.low().into();
149        let high: u8 = value.high().into();
150        ThresholdState::InvertGtEqLtEq(OrdPair::<u8>::new(low, high).expect("no error"))
151    }
152}
153
154impl ThresholdState {
155    /// Converts a pair of fractions e.g. "0.4,0.6" into a `ThresholdState::InvertGtEqLtEq`, and
156    /// an empty string to the all-permitted `ThresholdState::GtEq(0)`.
157    ///
158    /// Used to set up a filter to reject mod calls whose probabilities lie in a band.
159    /// This can be used to reject low-quality calls for example which lie around 0.5.
160    ///
161    /// We've elected to not write a `std::str::FromStr` implementation for `ThresholdState`
162    /// as the enum is quite complex, generating it from a string is not very user friendly.
163    ///
164    /// # Errors
165    /// String not empty and not in the format of low,high where low and high are
166    /// numbers from 0 to 1, both included
167    ///
168    /// # Examples
169    ///
170    /// Simple example
171    ///
172    /// ```
173    /// use nanalogue_core::ThresholdState;
174    /// let a = ThresholdState::from_str_ordpair_fraction("0.4,0.6")?;
175    /// assert_eq!(a, ThresholdState::InvertGtEqLtEq((102u8, 153u8).try_into()?));
176    /// # Ok::<(), nanalogue_core::Error>(())
177    /// ```
178    ///
179    /// Empty string should generate no filter
180    ///
181    /// ```
182    /// use nanalogue_core::ThresholdState;
183    /// let a = ThresholdState::from_str_ordpair_fraction("")?;
184    /// assert_eq!(a, ThresholdState::GtEq(0));
185    /// # Ok::<(), nanalogue_core::Error>(())
186    /// ```
187    pub fn from_str_ordpair_fraction(value: &str) -> Result<ThresholdState, Error> {
188        if value.is_empty() {
189            // allow all mods irrespective of their probabilities
190            Ok(ThresholdState::GtEq(0))
191        } else {
192            let result: ThresholdState = OrdPair::<F32Bw0and1>::from_str(value)?.into();
193            Ok(result)
194        }
195    }
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201
202    #[test]
203    fn threshold_state_gt_eq() {
204        let threshold = ThresholdState::GtEq(100);
205
206        // Test contains functionality
207        assert!(threshold.contains(&101));
208        assert!(threshold.contains(&100));
209        assert!(!threshold.contains(&99));
210
211        // Test display; 100/255 is approx 0.3922
212        let display_str = format!("{threshold}");
213        assert!(display_str.contains("probabilities >= 0.3922"));
214    }
215
216    #[test]
217    fn threshold_state_invert_gt_eq_lt_eq() {
218        let pair = OrdPair::new(200, 220).expect("should create");
219        let threshold = ThresholdState::InvertGtEqLtEq(pair);
220
221        // Test contains functionality
222        assert!(threshold.contains(&0)); // zero is outside range
223        assert!(threshold.contains(&100)); // outside range (below)
224        assert!(!threshold.contains(&200)); // within range (boundary)
225        assert!(!threshold.contains(&210)); // within range (middle)
226        assert!(!threshold.contains(&220)); // within range (boundary)
227        assert!(threshold.contains(&250)); // outside range (above)
228
229        // Test display
230        let display_str = format!("{threshold}");
231        assert!(display_str.contains("probabilities <"));
232        assert!(display_str.contains("or >"));
233    }
234
235    #[test]
236    fn threshold_state_both() {
237        let pair = OrdPair::new(200, 220).expect("should create");
238        let threshold = ThresholdState::Both((100, pair));
239
240        // Test contains functionality
241        assert!(!threshold.contains(&0)); // fails zero
242        assert!(!threshold.contains(&99)); // fails first condition
243        assert!(threshold.contains(&100)); // meets both conditions
244        assert!(threshold.contains(&101)); // meets both conditions
245        assert!(!threshold.contains(&200)); // fails second condition
246        assert!(!threshold.contains(&210)); // fails second condition
247        assert!(!threshold.contains(&220)); // fails second condition
248        assert!(threshold.contains(&250)); // meets both conditions
249
250        // Test display
251        let display_str = format!("{threshold}");
252        assert!(display_str.contains("and"));
253        assert!(display_str.contains("probabilities >="));
254    }
255
256    #[test]
257    fn threshold_state_default() {
258        let default_threshold = ThresholdState::default();
259        assert!(matches!(default_threshold, ThresholdState::GtEq(0)));
260
261        // Default should accept all values
262        for val in 0..=255u8 {
263            assert!(default_threshold.contains(&val));
264        }
265    }
266
267    #[test]
268    fn threshold_state_display_consistency() {
269        // Test that display format is consistent and meaningful
270        let thresholds = vec![
271            ThresholdState::GtEq(128),
272            ThresholdState::InvertGtEqLtEq(OrdPair::new(100, 150).expect("should create")),
273            ThresholdState::Both((50, OrdPair::new(120, 140).expect("should create"))),
274        ];
275
276        for threshold in thresholds {
277            let display_str = format!("{threshold}");
278            assert!(display_str.contains("probabilities"));
279            assert!(!display_str.is_empty());
280        }
281    }
282
283    #[test]
284    fn threshold_state_edge_cases() {
285        // Test boundary conditions
286        let threshold_255 = ThresholdState::GtEq(255);
287        assert!(threshold_255.contains(&255));
288        assert!(!threshold_255.contains(&254));
289
290        let threshold_0 = ThresholdState::GtEq(0);
291        assert!(threshold_0.contains(&0));
292        assert!(threshold_0.contains(&255));
293
294        // Test single-value range
295        let single_val_pair = OrdPair::new(128, 129).expect("should create");
296        let threshold_single = ThresholdState::InvertGtEqLtEq(single_val_pair);
297        assert!(threshold_single.contains(&127));
298        assert!(!threshold_single.contains(&128));
299        assert!(!threshold_single.contains(&129));
300        assert!(threshold_single.contains(&130));
301    }
302
303    /// Converts from `OrdPair<F32Bw0and1>` to `ThresholdState::InvertGtEqLtEq`
304    #[test]
305    fn threshold_state_from_ordpair_f32bw0and1() {
306        use std::str::FromStr as _;
307        let b: ThresholdState = OrdPair::<F32Bw0and1>::from_str("0.4,0.6")
308            .expect("should parse")
309            .into();
310        assert_eq!(
311            b,
312            ThresholdState::InvertGtEqLtEq(
313                OrdPair::<u8>::new(102u8, 153u8).expect("should create")
314            )
315        );
316    }
317
318    /// Converts a pair of fractions e.g. "0.4,0.6" into a `ThresholdState::InvertGtEqLtEq`
319    #[test]
320    fn threshold_state_from_str_ordpair_fraction_simple() {
321        let a = ThresholdState::from_str_ordpair_fraction("0.4,0.6").expect("should parse");
322        assert_eq!(
323            a,
324            ThresholdState::InvertGtEqLtEq((102u8, 153u8).try_into().expect("should create"))
325        );
326    }
327
328    /// Empty string should generate no filter (all-permitted `ThresholdState::GtEq(0)`)
329    #[test]
330    fn threshold_state_from_str_ordpair_fraction_empty_string() {
331        let a = ThresholdState::from_str_ordpair_fraction("").expect("should parse");
332        assert_eq!(a, ThresholdState::GtEq(0));
333    }
334
335    #[test]
336    fn threshold_state_from_str_ordpair_fraction_error_cases() {
337        // Test invalid format - should error
338        let _: Error = ThresholdState::from_str_ordpair_fraction("invalid").unwrap_err();
339        let _: Error = ThresholdState::from_str_ordpair_fraction("0.5").unwrap_err();
340        let _: Error = ThresholdState::from_str_ordpair_fraction("0.6,0.4").unwrap_err(); // wrong order
341        let _: Error = ThresholdState::from_str_ordpair_fraction("1.5,2.0").unwrap_err(); // out of range
342    }
343
344    /// Tests conversion from `OrdPair<F32Bw0and1>` to `ThresholdState::InvertGtEqLtEq`
345    #[test]
346    fn threshold_state_from_ordpair_f32bw0and1_conversion() {
347        use std::str::FromStr as _;
348
349        // Test basic conversion with 0.4,0.6
350        let pair1 = OrdPair::<F32Bw0and1>::from_str("0.4,0.6").expect("should parse");
351        let threshold1: ThresholdState = pair1.into();
352        assert_eq!(
353            threshold1,
354            ThresholdState::InvertGtEqLtEq(
355                OrdPair::<u8>::new(102u8, 153u8).expect("should create")
356            )
357        );
358
359        // Test with edge values 0.0,1.0
360        let pair2 = OrdPair::<F32Bw0and1>::from_str("0.0,1.0").expect("should parse");
361        let threshold2: ThresholdState = pair2.into();
362        assert_eq!(
363            threshold2,
364            ThresholdState::InvertGtEqLtEq(OrdPair::<u8>::new(0u8, 255u8).expect("should create"))
365        );
366
367        // Test with mid-range values 0.5,0.7
368        let pair3 = OrdPair::<F32Bw0and1>::from_str("0.5,0.7").expect("should parse");
369        let threshold3: ThresholdState = pair3.into();
370        assert!(
371            matches!(threshold3, ThresholdState::InvertGtEqLtEq(_)),
372            "Expected InvertGtEqLtEq variant"
373        );
374        if let ThresholdState::InvertGtEqLtEq(ord_pair) = threshold3 {
375            // Verify the conversion is approximately correct
376            // 0.5 * 255 ≈ 127.5, 0.7 * 255 ≈ 178.5
377            assert!(ord_pair.low() >= 127 && ord_pair.low() <= 128);
378            assert!(ord_pair.high() >= 178 && ord_pair.high() <= 179);
379        }
380    }
381}