yara_mapper/record.rs
1/// A single CIGAR operation with BAM encoding.
2#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3pub struct CigarOp {
4 /// Operation code: M=0, I=1, D=2, N=3, S=4, H=5, P=6, ==7, X=8.
5 pub op: u8,
6 /// Length of the operation.
7 pub len: u32,
8}
9
10impl CigarOp {
11 /// Decode from BAM-encoded uint32: `len << 4 | op`.
12 #[must_use]
13 pub fn from_bam(encoded: u32) -> Self {
14 Self { op: (encoded & 0xF) as u8, len: encoded >> 4 }
15 }
16
17 /// Operation as a SAM character.
18 #[must_use]
19 pub fn op_char(&self) -> char {
20 match self.op {
21 0 => 'M',
22 1 => 'I',
23 2 => 'D',
24 3 => 'N',
25 4 => 'S',
26 5 => 'H',
27 6 => 'P',
28 7 => '=',
29 8 => 'X',
30 _ => '?',
31 }
32 }
33}
34
35impl std::fmt::Display for CigarOp {
36 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37 write!(f, "{}{}", self.len, self.op_char())
38 }
39}
40
41/// A single alignment record returned by the YARA mapper.
42///
43/// This is a fully-owned Rust type — all heap data (CIGAR, sequence, quality,
44/// XA tag) has been copied from the C++ side and the C++ memory freed.
45#[derive(Debug, Clone)]
46#[expect(clippy::struct_excessive_bools, reason = "mirrors the C FFI record layout")]
47pub struct YaraRecord {
48 /// Index of the read pair in the input batch (0-based).
49 pub read_pair_index: u32,
50 /// Whether this record is for the first read in the pair.
51 pub is_read1: bool,
52
53 // Alignment position
54 /// Reference contig index.
55 pub contig_id: u32,
56 /// 0-based leftmost position on the reference.
57 pub pos: u32,
58 /// Whether the read is mapped to the reverse strand.
59 pub is_reverse: bool,
60 /// Whether this is a secondary alignment.
61 pub is_secondary: bool,
62 /// Whether the read is unmapped.
63 pub is_unmapped: bool,
64
65 // Alignment quality
66 /// Mapping quality.
67 pub mapq: u8,
68 /// Edit distance (NM tag).
69 pub nm: u8,
70 /// Number of co-optimal alignments (X0 tag).
71 pub x0: u16,
72 /// Number of sub-optimal alignments (X1 tag).
73 pub x1: u16,
74
75 // Mate info
76 /// Mate's reference contig index.
77 pub mate_contig_id: u32,
78 /// Mate's 0-based position.
79 pub mate_pos: u32,
80 /// Template length (TLEN).
81 pub tlen: i32,
82 /// Full SAM flag field.
83 pub flag: u16,
84
85 // CIGAR
86 /// CIGAR operations (empty for secondaries without `align_secondary`).
87 pub cigar: Vec<CigarOp>,
88
89 // Sequence and quality
90 /// Read sequence (None for secondary records).
91 pub seq: Option<Vec<u8>>,
92 /// Base qualities (None for secondary records).
93 pub qual: Option<Vec<u8>>,
94
95 /// XA tag string (only when `secondary_mode=Tag`, otherwise None).
96 pub xa: Option<String>,
97}
98
99impl YaraRecord {
100 /// CIGAR string in SAM format (e.g., "50M2I48M").
101 #[must_use]
102 pub fn cigar_string(&self) -> String {
103 use std::fmt::Write;
104 let mut s = String::with_capacity(self.cigar.len() * 4);
105 for op in &self.cigar {
106 write!(s, "{}{}", op.len, op.op_char()).unwrap();
107 }
108 s
109 }
110}