Skip to main content

yara_mapper/
record.rs

1/// A single CIGAR operation with BAM encoding.
2#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3pub struct CigarOp {
4    /// Operation code: M=0, I=1, D=2, N=3, S=4, H=5, P=6, ==7, X=8.
5    pub op: u8,
6    /// Length of the operation.
7    pub len: u32,
8}
9
10impl CigarOp {
11    /// Decode from BAM-encoded uint32: `len << 4 | op`.
12    #[must_use]
13    pub fn from_bam(encoded: u32) -> Self {
14        Self { op: (encoded & 0xF) as u8, len: encoded >> 4 }
15    }
16
17    /// Operation as a SAM character.
18    #[must_use]
19    pub fn op_char(&self) -> char {
20        match self.op {
21            0 => 'M',
22            1 => 'I',
23            2 => 'D',
24            3 => 'N',
25            4 => 'S',
26            5 => 'H',
27            6 => 'P',
28            7 => '=',
29            8 => 'X',
30            _ => '?',
31        }
32    }
33}
34
35impl std::fmt::Display for CigarOp {
36    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37        write!(f, "{}{}", self.len, self.op_char())
38    }
39}
40
41/// A single alignment record returned by the YARA mapper.
42///
43/// This is a fully-owned Rust type — all heap data (CIGAR, sequence, quality,
44/// XA tag) has been copied from the C++ side and the C++ memory freed.
45#[derive(Debug, Clone)]
46#[expect(clippy::struct_excessive_bools, reason = "mirrors the C FFI record layout")]
47pub struct YaraRecord {
48    /// Index of the read pair in the input batch (0-based).
49    pub read_pair_index: u32,
50    /// Whether this record is for the first read in the pair.
51    pub is_read1: bool,
52
53    // Alignment position
54    /// Reference contig index.
55    pub contig_id: u32,
56    /// 0-based leftmost position on the reference.
57    pub pos: u32,
58    /// Whether the read is mapped to the reverse strand.
59    pub is_reverse: bool,
60    /// Whether this is a secondary alignment.
61    pub is_secondary: bool,
62    /// Whether the read is unmapped.
63    pub is_unmapped: bool,
64
65    // Alignment quality
66    /// Mapping quality.
67    pub mapq: u8,
68    /// Edit distance (NM tag).
69    pub nm: u8,
70    /// Number of co-optimal alignments (X0 tag).
71    pub x0: u16,
72    /// Number of sub-optimal alignments (X1 tag).
73    pub x1: u16,
74
75    // Mate info
76    /// Mate's reference contig index.
77    pub mate_contig_id: u32,
78    /// Mate's 0-based position.
79    pub mate_pos: u32,
80    /// Template length (TLEN).
81    pub tlen: i32,
82    /// Full SAM flag field.
83    pub flag: u16,
84
85    // CIGAR
86    /// CIGAR operations (empty for secondaries without `align_secondary`).
87    pub cigar: Vec<CigarOp>,
88
89    // Sequence and quality
90    /// Read sequence (None for secondary records).
91    pub seq: Option<Vec<u8>>,
92    /// Base qualities (None for secondary records).
93    pub qual: Option<Vec<u8>>,
94
95    /// XA tag string (only when `secondary_mode=Tag`, otherwise None).
96    pub xa: Option<String>,
97}
98
99impl YaraRecord {
100    /// CIGAR string in SAM format (e.g., "50M2I48M").
101    #[must_use]
102    pub fn cigar_string(&self) -> String {
103        use std::fmt::Write;
104        let mut s = String::with_capacity(self.cigar.len() * 4);
105        for op in &self.cigar {
106            write!(s, "{}{}", op.len, op.op_char()).unwrap();
107        }
108        s
109    }
110}