nanalogue 0.1.9

BAM/Mod BAM parsing and analysis tool with a single-molecule focus
Documentation
//! `ReadState` enum for representing BAM alignment states
//! Handles conversion between internal representation and BAM flags

use crate::Error;
use rand::Rng;
use rand::distr::{Distribution, StandardUniform};
use serde::{Deserialize, Serialize};
use std::fmt;
use std::str::FromStr;

/// Alignment state of a read; seven possibilities + one unknown state
#[derive(Debug, Clone, Default, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum ReadState {
    #[default]
    /// Primary alignment to the reference strand
    #[serde(rename = "primary_forward")]
    PrimaryFwd,
    /// Primary alignment opposite the reference strand
    #[serde(rename = "primary_reverse")]
    PrimaryRev,
    /// Secondary alignment to the reference strand
    #[serde(rename = "secondary_forward")]
    SecondaryFwd,
    /// Secondary alignment opposite the reference strand
    #[serde(rename = "secondary_reverse")]
    SecondaryRev,
    /// Supplementary alignment to the reference strand
    #[serde(rename = "supplementary_forward")]
    SupplementaryFwd,
    /// Supplementary alignment opposite the reference strand
    #[serde(rename = "supplementary_reverse")]
    SupplementaryRev,
    /// Marked as unmapped in the BAM file. We are assuming
    /// that unmapped sequences will not be stored as reversed
    /// complements, as what would be the point of that?
    #[serde(rename = "unmapped")]
    Unmapped,
}

// Implements random pick of a variant
impl Distribution<ReadState> for StandardUniform {
    /// Allows us to randomly pick a variant
    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> ReadState {
        match rng.random_range(0..7) {
            0 => ReadState::PrimaryFwd,
            1 => ReadState::PrimaryRev,
            2 => ReadState::SecondaryFwd,
            3 => ReadState::SecondaryRev,
            4 => ReadState::SupplementaryFwd,
            5 => ReadState::SupplementaryRev,
            6 => ReadState::Unmapped,
            _ => unreachable!(),
        }
    }
}

// Implements conversion of ReadState into the standard BAM flag format
impl From<ReadState> for u16 {
    /// converts our internal representation to the BAM flag format
    fn from(value: ReadState) -> u16 {
        match value {
            ReadState::PrimaryFwd => 0,
            ReadState::Unmapped => 4,
            ReadState::PrimaryRev => 16,
            ReadState::SecondaryFwd => 256,
            ReadState::SecondaryRev => 272,
            ReadState::SupplementaryFwd => 2048,
            ReadState::SupplementaryRev => 2064,
        }
    }
}

// Implements conversion of the standard BAM flag format into ReadState.
// NOTE: this function is geared towards our API where we do not deal with
// paired reads etc. which have other flags set in the BAM record.
impl TryFrom<u16> for ReadState {
    type Error = Error;
    /// converts BAM flag format to our internal representation
    fn try_from(value: u16) -> Result<ReadState, Error> {
        match value {
            0 => Ok(ReadState::PrimaryFwd),
            4 => Ok(ReadState::Unmapped),
            16 => Ok(ReadState::PrimaryRev),
            256 => Ok(ReadState::SecondaryFwd),
            272 => Ok(ReadState::SecondaryRev),
            2048 => Ok(ReadState::SupplementaryFwd),
            2064 => Ok(ReadState::SupplementaryRev),
            v => Err(Error::UnknownAlignState(format!(
                "BAM flag {v} cannot be converted to our `ReadState` variants"
            ))),
        }
    }
}

/// Implements from string for `ReadState`
///
/// ```
/// use nanalogue_core::ReadState;
/// use std::str::FromStr;
///
/// // Primary alignments
/// let state = ReadState::from_str("primary_forward")?;
/// assert_eq!(state, ReadState::PrimaryFwd);
/// # Ok::<(), nanalogue_core::Error>(())
/// ```
///
/// ```
/// # use nanalogue_core::ReadState;
/// # use std::str::FromStr;
/// #
/// // Secondary alignments
/// let state = ReadState::from_str("secondary_reverse")?;
/// assert_eq!(state, ReadState::SecondaryRev);
/// # Ok::<(), nanalogue_core::Error>(())
/// ```
///
/// ```
/// # use nanalogue_core::ReadState;
/// # use std::str::FromStr;
/// #
/// // Supplementary alignments
/// let state = ReadState::from_str("supplementary_forward")?;
/// assert_eq!(state, ReadState::SupplementaryFwd);
/// # Ok::<(), nanalogue_core::Error>(())
/// ```
///
/// ```
/// # use nanalogue_core::ReadState;
/// # use std::str::FromStr;
/// #
/// // Unmapped reads
/// let state = ReadState::from_str("unmapped")?;
/// assert_eq!(state, ReadState::Unmapped);
/// # Ok::<(), nanalogue_core::Error>(())
/// ```
///
/// ```should_panic
/// # use nanalogue_core::ReadState;
/// # use std::str::FromStr;
/// #
/// // Invalid string should error
/// let state = ReadState::from_str("invalid_state")?;
/// # Ok::<(), nanalogue_core::Error>(())
/// ```
impl FromStr for ReadState {
    type Err = Error;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s {
            "primary_forward" => Ok(ReadState::PrimaryFwd),
            "primary_reverse" => Ok(ReadState::PrimaryRev),
            "secondary_forward" => Ok(ReadState::SecondaryFwd),
            "secondary_reverse" => Ok(ReadState::SecondaryRev),
            "supplementary_forward" => Ok(ReadState::SupplementaryFwd),
            "supplementary_reverse" => Ok(ReadState::SupplementaryRev),
            "unmapped" => Ok(ReadState::Unmapped),
            v => Err(Error::UnknownAlignState(format!(
                "{v} cannot be converted to `ReadState` variant"
            ))),
        }
    }
}

/// Implements printing of read state
impl fmt::Display for ReadState {
    #[expect(
        clippy::pattern_type_mismatch,
        reason = "simple function, notation cleaner without *"
    )]
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            ReadState::PrimaryFwd => "primary_forward",
            ReadState::SecondaryFwd => "secondary_forward",
            ReadState::SupplementaryFwd => "supplementary_forward",
            ReadState::PrimaryRev => "primary_reverse",
            ReadState::SecondaryRev => "secondary_reverse",
            ReadState::SupplementaryRev => "supplementary_reverse",
            ReadState::Unmapped => "unmapped",
        }
        .fmt(f)
    }
}

impl ReadState {
    /// Checks if the state is unmapped
    #[expect(
        clippy::pattern_type_mismatch,
        reason = "simple function, notation cleaner without *"
    )]
    #[must_use]
    pub fn is_unmapped(&self) -> bool {
        match self {
            ReadState::Unmapped => true,
            ReadState::PrimaryFwd
            | ReadState::PrimaryRev
            | ReadState::SecondaryFwd
            | ReadState::SecondaryRev
            | ReadState::SupplementaryFwd
            | ReadState::SupplementaryRev => false,
        }
    }
    /// Gets the strand corresponding to the alignment type
    ///
    /// * '.' if unmapped
    /// * '+' if forward
    /// * '-' if reverse
    #[expect(
        clippy::pattern_type_mismatch,
        reason = "simple function, notation cleaner without *"
    )]
    #[must_use]
    pub fn strand(&self) -> char {
        match self {
            ReadState::Unmapped => '.',
            ReadState::PrimaryFwd | ReadState::SecondaryFwd | ReadState::SupplementaryFwd => '+',
            ReadState::PrimaryRev | ReadState::SecondaryRev | ReadState::SupplementaryRev => '-',
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Tests `ReadState` u16 conversion round-trip
    #[test]
    fn readstate_u16_conversion_roundtrip() {
        let states = vec![
            ReadState::PrimaryFwd,
            ReadState::PrimaryRev,
            ReadState::SecondaryFwd,
            ReadState::SecondaryRev,
            ReadState::SupplementaryFwd,
            ReadState::SupplementaryRev,
            ReadState::Unmapped,
        ];

        for state in states {
            // Convert to u16 and back
            let flag: u16 = state.into();
            let recovered_state: ReadState =
                flag.try_into().expect("conversion from u16 should work");
            assert_eq!(state, recovered_state);
        }
    }

    /// Tests specific `ReadState` u16 flag values
    #[test]
    fn readstate_specific_flag_values() {
        assert_eq!(u16::from(ReadState::PrimaryFwd), 0);
        assert_eq!(u16::from(ReadState::Unmapped), 4);
        assert_eq!(u16::from(ReadState::PrimaryRev), 16);
        assert_eq!(u16::from(ReadState::SecondaryFwd), 256);
        assert_eq!(u16::from(ReadState::SecondaryRev), 272);
        assert_eq!(u16::from(ReadState::SupplementaryFwd), 2048);
        assert_eq!(u16::from(ReadState::SupplementaryRev), 2064);
    }

    /// Tests `ReadState` from invalid u16 values
    #[test]
    fn readstate_invalid_u16_values() {
        // Test various invalid flag combinations
        let invalid_flags = vec![1, 2, 8, 32, 64, 128, 512, 1024, 4096, 8192];
        for flag in invalid_flags {
            assert!(matches!(
                ReadState::try_from(flag),
                Err(Error::UnknownAlignState(_))
            ));
        }
    }

    /// Tests `ReadState` string parsing and display consistency
    #[test]
    fn readstate_string_consistency() {
        let states = vec![
            ReadState::PrimaryFwd,
            ReadState::PrimaryRev,
            ReadState::SecondaryFwd,
            ReadState::SecondaryRev,
            ReadState::SupplementaryFwd,
            ReadState::SupplementaryRev,
            ReadState::Unmapped,
        ];

        for state in states {
            let string_repr = format!("{state}");
            let parsed_state = ReadState::from_str(&string_repr).expect("should parse");
            assert_eq!(state, parsed_state);
        }
    }

    /// Tests `ReadState` `from_str` with invalid state string
    #[test]
    #[should_panic(expected = "UnknownAlignState")]
    fn readstate_from_str_invalid_state() {
        let _result: ReadState = ReadState::from_str("invalid_state").unwrap();
    }

    /// Tests `ReadState` `from_str` with empty string
    #[test]
    #[should_panic(expected = "UnknownAlignState")]
    fn readstate_from_str_empty_string() {
        let _result: ReadState = ReadState::from_str("").unwrap();
    }

    /// Tests `ReadState` `from_str` with incomplete string
    #[test]
    #[should_panic(expected = "UnknownAlignState")]
    fn readstate_from_str_incomplete_string() {
        let _result: ReadState = ReadState::from_str("primary").unwrap();
    }

    /// Tests random `ReadState` generation from `StandardUniform` produces all variants
    #[test]
    fn readstate_random_generation_all_variants() {
        let mut rng = rand::rng();

        // Generate many random states to ensure all variants appear
        let mut generated_states = std::collections::HashSet::new();
        for _ in 0..1000 {
            let state: ReadState = rng.random();
            let _: bool = generated_states.insert(state);
        }

        // Verify all 7 variants can be generated
        assert_eq!(generated_states.len(), 7);
        assert!(generated_states.contains(&ReadState::PrimaryFwd));
        assert!(generated_states.contains(&ReadState::PrimaryRev));
        assert!(generated_states.contains(&ReadState::SecondaryFwd));
        assert!(generated_states.contains(&ReadState::SecondaryRev));
        assert!(generated_states.contains(&ReadState::SupplementaryFwd));
        assert!(generated_states.contains(&ReadState::SupplementaryRev));
        assert!(generated_states.contains(&ReadState::Unmapped));
    }
}