nanalogue_core/utils/
read_state.rs

1//! `ReadState` enum for representing BAM alignment states
2//! Handles conversion between internal representation and BAM flags
3
4use crate::Error;
5use rand::Rng;
6use rand::distr::{Distribution, StandardUniform};
7use serde::{Deserialize, Serialize};
8use std::fmt;
9use std::str::FromStr;
10
11/// Alignment state of a read; seven possibilities + one unknown state
12#[derive(Debug, Clone, Default, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
13#[non_exhaustive]
14pub enum ReadState {
15    #[default]
16    /// Primary alignment to the reference strand
17    #[serde(rename = "primary_forward")]
18    PrimaryFwd,
19    /// Primary alignment opposite the reference strand
20    #[serde(rename = "primary_reverse")]
21    PrimaryRev,
22    /// Secondary alignment to the reference strand
23    #[serde(rename = "secondary_forward")]
24    SecondaryFwd,
25    /// Secondary alignment opposite the reference strand
26    #[serde(rename = "secondary_reverse")]
27    SecondaryRev,
28    /// Supplementary alignment to the reference strand
29    #[serde(rename = "supplementary_forward")]
30    SupplementaryFwd,
31    /// Supplementary alignment opposite the reference strand
32    #[serde(rename = "supplementary_reverse")]
33    SupplementaryRev,
34    /// Marked as unmapped in the BAM file. We are assuming
35    /// that unmapped sequences will not be stored as reversed
36    /// complements, as what would be the point of that?
37    #[serde(rename = "unmapped")]
38    Unmapped,
39}
40
41// Implements random pick of a variant
42impl Distribution<ReadState> for StandardUniform {
43    /// Allows us to randomly pick a variant
44    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> ReadState {
45        match rng.random_range(0..7) {
46            0 => ReadState::PrimaryFwd,
47            1 => ReadState::PrimaryRev,
48            2 => ReadState::SecondaryFwd,
49            3 => ReadState::SecondaryRev,
50            4 => ReadState::SupplementaryFwd,
51            5 => ReadState::SupplementaryRev,
52            6 => ReadState::Unmapped,
53            _ => unreachable!(),
54        }
55    }
56}
57
58// Implements conversion of ReadState into the standard BAM flag format
59impl From<ReadState> for u16 {
60    /// converts our internal representation to the BAM flag format
61    fn from(value: ReadState) -> u16 {
62        match value {
63            ReadState::PrimaryFwd => 0,
64            ReadState::Unmapped => 4,
65            ReadState::PrimaryRev => 16,
66            ReadState::SecondaryFwd => 256,
67            ReadState::SecondaryRev => 272,
68            ReadState::SupplementaryFwd => 2048,
69            ReadState::SupplementaryRev => 2064,
70        }
71    }
72}
73
74// Implements conversion of the standard BAM flag format into ReadState.
75// NOTE: this function is geared towards our API where we do not deal with
76// paired reads etc. which have other flags set in the BAM record.
77impl TryFrom<u16> for ReadState {
78    type Error = Error;
79    /// converts BAM flag format to our internal representation
80    fn try_from(value: u16) -> Result<ReadState, Error> {
81        match value {
82            0 => Ok(ReadState::PrimaryFwd),
83            4 => Ok(ReadState::Unmapped),
84            16 => Ok(ReadState::PrimaryRev),
85            256 => Ok(ReadState::SecondaryFwd),
86            272 => Ok(ReadState::SecondaryRev),
87            2048 => Ok(ReadState::SupplementaryFwd),
88            2064 => Ok(ReadState::SupplementaryRev),
89            v => Err(Error::UnknownAlignState(format!(
90                "BAM flag {v} cannot be converted to our `ReadState` variants"
91            ))),
92        }
93    }
94}
95
96/// Implements from string for `ReadState`
97///
98/// ```
99/// use nanalogue_core::ReadState;
100/// use std::str::FromStr;
101///
102/// // Primary alignments
103/// let state = ReadState::from_str("primary_forward")?;
104/// assert_eq!(state, ReadState::PrimaryFwd);
105/// # Ok::<(), nanalogue_core::Error>(())
106/// ```
107///
108/// ```
109/// # use nanalogue_core::ReadState;
110/// # use std::str::FromStr;
111/// #
112/// // Secondary alignments
113/// let state = ReadState::from_str("secondary_reverse")?;
114/// assert_eq!(state, ReadState::SecondaryRev);
115/// # Ok::<(), nanalogue_core::Error>(())
116/// ```
117///
118/// ```
119/// # use nanalogue_core::ReadState;
120/// # use std::str::FromStr;
121/// #
122/// // Supplementary alignments
123/// let state = ReadState::from_str("supplementary_forward")?;
124/// assert_eq!(state, ReadState::SupplementaryFwd);
125/// # Ok::<(), nanalogue_core::Error>(())
126/// ```
127///
128/// ```
129/// # use nanalogue_core::ReadState;
130/// # use std::str::FromStr;
131/// #
132/// // Unmapped reads
133/// let state = ReadState::from_str("unmapped")?;
134/// assert_eq!(state, ReadState::Unmapped);
135/// # Ok::<(), nanalogue_core::Error>(())
136/// ```
137///
138/// ```should_panic
139/// # use nanalogue_core::ReadState;
140/// # use std::str::FromStr;
141/// #
142/// // Invalid string should error
143/// let state = ReadState::from_str("invalid_state")?;
144/// # Ok::<(), nanalogue_core::Error>(())
145/// ```
146impl FromStr for ReadState {
147    type Err = Error;
148
149    fn from_str(s: &str) -> Result<Self, Self::Err> {
150        match s {
151            "primary_forward" => Ok(ReadState::PrimaryFwd),
152            "primary_reverse" => Ok(ReadState::PrimaryRev),
153            "secondary_forward" => Ok(ReadState::SecondaryFwd),
154            "secondary_reverse" => Ok(ReadState::SecondaryRev),
155            "supplementary_forward" => Ok(ReadState::SupplementaryFwd),
156            "supplementary_reverse" => Ok(ReadState::SupplementaryRev),
157            "unmapped" => Ok(ReadState::Unmapped),
158            v => Err(Error::UnknownAlignState(format!(
159                "{v} cannot be converted to `ReadState` variant"
160            ))),
161        }
162    }
163}
164
165/// Implements printing of read state
166impl fmt::Display for ReadState {
167    #[expect(
168        clippy::pattern_type_mismatch,
169        reason = "simple function, notation cleaner without *"
170    )]
171    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
172        match self {
173            ReadState::PrimaryFwd => "primary_forward",
174            ReadState::SecondaryFwd => "secondary_forward",
175            ReadState::SupplementaryFwd => "supplementary_forward",
176            ReadState::PrimaryRev => "primary_reverse",
177            ReadState::SecondaryRev => "secondary_reverse",
178            ReadState::SupplementaryRev => "supplementary_reverse",
179            ReadState::Unmapped => "unmapped",
180        }
181        .fmt(f)
182    }
183}
184
185impl ReadState {
186    /// Checks if the state is unmapped
187    #[expect(
188        clippy::pattern_type_mismatch,
189        reason = "simple function, notation cleaner without *"
190    )]
191    #[must_use]
192    pub fn is_unmapped(&self) -> bool {
193        match self {
194            ReadState::Unmapped => true,
195            ReadState::PrimaryFwd
196            | ReadState::PrimaryRev
197            | ReadState::SecondaryFwd
198            | ReadState::SecondaryRev
199            | ReadState::SupplementaryFwd
200            | ReadState::SupplementaryRev => false,
201        }
202    }
203    /// Gets the strand corresponding to the alignment type
204    ///
205    /// * '.' if unmapped
206    /// * '+' if forward
207    /// * '-' if reverse
208    #[expect(
209        clippy::pattern_type_mismatch,
210        reason = "simple function, notation cleaner without *"
211    )]
212    #[must_use]
213    pub fn strand(&self) -> char {
214        match self {
215            ReadState::Unmapped => '.',
216            ReadState::PrimaryFwd | ReadState::SecondaryFwd | ReadState::SupplementaryFwd => '+',
217            ReadState::PrimaryRev | ReadState::SecondaryRev | ReadState::SupplementaryRev => '-',
218        }
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225
226    /// Tests `ReadState` u16 conversion round-trip
227    #[test]
228    fn readstate_u16_conversion_roundtrip() {
229        let states = vec![
230            ReadState::PrimaryFwd,
231            ReadState::PrimaryRev,
232            ReadState::SecondaryFwd,
233            ReadState::SecondaryRev,
234            ReadState::SupplementaryFwd,
235            ReadState::SupplementaryRev,
236            ReadState::Unmapped,
237        ];
238
239        for state in states {
240            // Convert to u16 and back
241            let flag: u16 = state.into();
242            let recovered_state: ReadState =
243                flag.try_into().expect("conversion from u16 should work");
244            assert_eq!(state, recovered_state);
245        }
246    }
247
248    /// Tests specific `ReadState` u16 flag values
249    #[test]
250    fn readstate_specific_flag_values() {
251        assert_eq!(u16::from(ReadState::PrimaryFwd), 0);
252        assert_eq!(u16::from(ReadState::Unmapped), 4);
253        assert_eq!(u16::from(ReadState::PrimaryRev), 16);
254        assert_eq!(u16::from(ReadState::SecondaryFwd), 256);
255        assert_eq!(u16::from(ReadState::SecondaryRev), 272);
256        assert_eq!(u16::from(ReadState::SupplementaryFwd), 2048);
257        assert_eq!(u16::from(ReadState::SupplementaryRev), 2064);
258    }
259
260    /// Tests `ReadState` from invalid u16 values
261    #[test]
262    fn readstate_invalid_u16_values() {
263        // Test various invalid flag combinations
264        let invalid_flags = vec![1, 2, 8, 32, 64, 128, 512, 1024, 4096, 8192];
265        for flag in invalid_flags {
266            assert!(matches!(
267                ReadState::try_from(flag),
268                Err(Error::UnknownAlignState(_))
269            ));
270        }
271    }
272
273    /// Tests `ReadState` string parsing and display consistency
274    #[test]
275    fn readstate_string_consistency() {
276        let states = vec![
277            ReadState::PrimaryFwd,
278            ReadState::PrimaryRev,
279            ReadState::SecondaryFwd,
280            ReadState::SecondaryRev,
281            ReadState::SupplementaryFwd,
282            ReadState::SupplementaryRev,
283            ReadState::Unmapped,
284        ];
285
286        for state in states {
287            let string_repr = format!("{state}");
288            let parsed_state = ReadState::from_str(&string_repr).expect("should parse");
289            assert_eq!(state, parsed_state);
290        }
291    }
292
293    /// Tests `ReadState` `from_str` with invalid state string
294    #[test]
295    #[should_panic(expected = "UnknownAlignState")]
296    fn readstate_from_str_invalid_state() {
297        let _result: ReadState = ReadState::from_str("invalid_state").unwrap();
298    }
299
300    /// Tests `ReadState` `from_str` with empty string
301    #[test]
302    #[should_panic(expected = "UnknownAlignState")]
303    fn readstate_from_str_empty_string() {
304        let _result: ReadState = ReadState::from_str("").unwrap();
305    }
306
307    /// Tests `ReadState` `from_str` with incomplete string
308    #[test]
309    #[should_panic(expected = "UnknownAlignState")]
310    fn readstate_from_str_incomplete_string() {
311        let _result: ReadState = ReadState::from_str("primary").unwrap();
312    }
313
314    /// Tests random `ReadState` generation from `StandardUniform` produces all variants
315    #[test]
316    fn readstate_random_generation_all_variants() {
317        let mut rng = rand::rng();
318
319        // Generate many random states to ensure all variants appear
320        let mut generated_states = std::collections::HashSet::new();
321        for _ in 0..1000 {
322            let state: ReadState = rng.random();
323            let _: bool = generated_states.insert(state);
324        }
325
326        // Verify all 7 variants can be generated
327        assert_eq!(generated_states.len(), 7);
328        assert!(generated_states.contains(&ReadState::PrimaryFwd));
329        assert!(generated_states.contains(&ReadState::PrimaryRev));
330        assert!(generated_states.contains(&ReadState::SecondaryFwd));
331        assert!(generated_states.contains(&ReadState::SecondaryRev));
332        assert!(generated_states.contains(&ReadState::SupplementaryFwd));
333        assert!(generated_states.contains(&ReadState::SupplementaryRev));
334        assert!(generated_states.contains(&ReadState::Unmapped));
335    }
336}