Skip to main content

rustalign_aligner/
paired_end.rs

1//! Paired-end alignment policy and utilities
2//!
3//! This module implements paired-end alignment support, including:
4//! - Orientation policies (FR, RF, FF, RR)
5//! - Insert size constraints
6//! - Mate finding logic
7//! - Concordant/discordant pair classification
8
9use rustalign_common::Score;
10
11/// Paired-end orientation policy
12///
13/// Defines the expected orientation of mate pairs.
14/// This matches standard paired-end sequencing orientation types.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
16pub enum PePolicy {
17    /// Forward-Reverse: R1 forward, R2 reverse (most common for Illumina)
18    #[default]
19    FR,
20    /// Reverse-Forward: R1 reverse, R2 forward
21    RF,
22    /// Forward-Forward: both mates in forward orientation
23    FF,
24    /// Reverse-Reverse: both mates in reverse orientation
25    RR,
26}
27
28/// Paired-end constraints
29///
30/// Defines the constraints for valid paired-end alignments.
31#[derive(Debug, Clone, Copy)]
32pub struct PeConstraints {
33    /// Minimum fragment/insert size (outer distance between mates)
34    pub min_insert: u32,
35    /// Maximum fragment/insert size
36    pub max_insert: u32,
37    /// Orientation policy
38    pub policy: PePolicy,
39    /// Whether to allow overlapping mates
40    pub allow_overlap: bool,
41    /// Whether to allow dovetailing (one mate extends past the other)
42    pub allow_dovetail: bool,
43    /// Whether to allow one mate containing the other
44    pub allow_contain: bool,
45}
46
47impl Default for PeConstraints {
48    fn default() -> Self {
49        Self {
50            min_insert: 0,
51            max_insert: 500,
52            policy: PePolicy::FR,
53            allow_overlap: true,
54            allow_dovetail: false,
55            allow_contain: false,
56        }
57    }
58}
59
60impl PeConstraints {
61    /// Create new paired-end constraints
62    pub fn new(min_insert: u32, max_insert: u32, policy: PePolicy) -> Self {
63        Self {
64            min_insert,
65            max_insert,
66            policy,
67            ..Default::default()
68        }
69    }
70
71    /// Check if a pair of alignments is concordant (properly paired)
72    ///
73    /// # Arguments
74    /// * `pos1` - Position of mate 1 (0-based)
75    /// * `len1` - Length of mate 1
76    /// * `strand1` - Strand of mate 1 (true = forward, false = reverse)
77    /// * `chr1` - Chromosome of mate 1
78    /// * `pos2` - Position of mate 2 (0-based)
79    /// * `len2` - Length of mate 2
80    /// * `strand2` - Strand of mate 2
81    /// * `chr2` - Chromosome of mate 2
82    ///
83    /// # Returns
84    /// `Some(insert_size)` if concordant, `None` if discordant
85    #[allow(clippy::too_many_arguments)]
86    pub fn is_concordant(
87        &self,
88        pos1: u64,
89        len1: u32,
90        strand1: bool,
91        chr1: u32,
92        pos2: u64,
93        len2: u32,
94        strand2: bool,
95        chr2: u32,
96    ) -> Option<i32> {
97        // Must be on same chromosome
98        if chr1 != chr2 {
99            return None;
100        }
101
102        // Check orientation
103        let orientation_ok = match self.policy {
104            PePolicy::FR => strand1 && !strand2,  // R1 forward, R2 reverse
105            PePolicy::RF => !strand1 && strand2,  // R1 reverse, R2 forward
106            PePolicy::FF => strand1 && strand2,   // Both forward
107            PePolicy::RR => !strand1 && !strand2, // Both reverse
108        };
109
110        if !orientation_ok {
111            return None;
112        }
113
114        // Calculate insert size (outer distance between mates)
115        // Insert size = distance from leftmost base to rightmost base
116        let (left_pos, _left_len, right_pos) = if pos1 <= pos2 {
117            (pos1, len1, pos2 + len2 as u64)
118        } else {
119            (pos2, len2, pos1 + len1 as u64)
120        };
121
122        let insert_size = (right_pos - left_pos) as i32;
123
124        // Check insert size constraints
125        if insert_size < self.min_insert as i32 || insert_size > self.max_insert as i32 {
126            return None;
127        }
128
129        // Check for containment (one read entirely within the other)
130        if !self.allow_contain {
131            let r1_end = pos1 + len1 as u64;
132            let r2_end = pos2 + len2 as u64;
133
134            // R1 contains R2
135            if pos1 <= pos2 && r1_end >= r2_end {
136                return None;
137            }
138            // R2 contains R1
139            if pos2 <= pos1 && r2_end >= r1_end {
140                return None;
141            }
142        }
143
144        // Check for dovetailing (one read extends past the end of the other)
145        if !self.allow_dovetail {
146            // For FR policy with normal orientation, R1 should be left of R2
147            // Dovetailing means R2 starts before R1 ends significantly
148            if self.policy == PePolicy::FR && strand1 && !strand2 {
149                let r1_end = pos1 + len1 as u64;
150                if pos2 < r1_end {
151                    // Mates overlap - check if it's just overlap or dovetail
152                    let overlap = r1_end - pos2;
153                    if overlap > len2 as u64 / 2 {
154                        // Significant dovetail
155                        return None;
156                    }
157                }
158            }
159        }
160
161        Some(insert_size)
162    }
163
164    /// Calculate the expected position range for the opposite mate
165    ///
166    /// Given an anchor alignment, calculate where the opposite mate
167    /// should align based on insert size constraints.
168    ///
169    /// # Arguments
170    /// * `anchor_pos` - Position of the anchor mate (0-based)
171    /// * `anchor_len` - Length of the anchor mate
172    /// * `anchor_is_r1` - True if anchor is read 1, false if read 2
173    /// * `anchor_strand` - True if anchor is forward strand
174    ///
175    /// # Returns
176    /// (start_pos, end_pos, expected_strand) for the opposite mate search
177    pub fn mate_search_range(
178        &self,
179        anchor_pos: u64,
180        anchor_len: u32,
181        anchor_is_r1: bool,
182        anchor_strand: bool,
183    ) -> (u64, u64, bool) {
184        // Determine expected strand for opposite mate
185        let opp_strand = match self.policy {
186            PePolicy::FR => !anchor_strand, // Opposite strand
187            PePolicy::RF => !anchor_strand, // Opposite strand
188            PePolicy::FF => anchor_strand,  // Same strand
189            PePolicy::RR => anchor_strand,  // Same strand
190        };
191
192        // Calculate search range based on policy and anchor position
193        let anchor_end = anchor_pos + anchor_len as u64;
194
195        let (start, end) = match self.policy {
196            PePolicy::FR => {
197                // FR: R1 forward left of R2 reverse
198                if anchor_is_r1 {
199                    // Anchor is R1 (forward), mate should be to the right
200                    let start = anchor_pos.saturating_sub(self.max_insert as u64);
201                    let end = anchor_end + self.max_insert as u64;
202                    (start, end)
203                } else {
204                    // Anchor is R2 (reverse), mate should be to the left
205                    let start = anchor_end.saturating_sub(self.max_insert as u64);
206                    let end = anchor_pos + self.max_insert as u64;
207                    (start, end)
208                }
209            }
210            PePolicy::RF => {
211                // RF: R1 reverse left of R2 forward
212                if anchor_is_r1 {
213                    // Anchor is R1 (reverse), mate should be to the right
214                    let start = anchor_pos.saturating_sub(self.max_insert as u64);
215                    let end = anchor_end + self.max_insert as u64;
216                    (start, end)
217                } else {
218                    // Anchor is R2 (forward), mate should be to the left
219                    let start = anchor_end.saturating_sub(self.max_insert as u64);
220                    let end = anchor_pos + self.max_insert as u64;
221                    (start, end)
222                }
223            }
224            PePolicy::FF | PePolicy::RR => {
225                // FF/RR: Mates can be on either side
226                let start = anchor_end.saturating_sub(self.max_insert as u64);
227                let end = anchor_pos + self.max_insert as u64;
228                (start, end)
229            }
230        };
231
232        (start, end, opp_strand)
233    }
234}
235
236/// Classification of paired-end alignment types
237#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
238pub enum PeAlignmentType {
239    /// Normal concordant alignment
240    #[default]
241    Concordant,
242    /// Mates overlap but neither contains the other
243    Overlap,
244    /// One mate contains the other
245    Contain,
246    /// Mates dovetail (one extends past the other)
247    Dovetail,
248    /// Discordant alignment (doesn't meet constraints)
249    Discordant,
250}
251
252/// Result of a paired-end alignment
253#[derive(Debug, Clone, Default)]
254pub struct PairedAlignment {
255    /// Chromosome index for mate 1
256    pub chr1: u32,
257    /// Position for mate 1 (0-based)
258    pub pos1: u64,
259    /// Strand for mate 1 (true = forward)
260    pub strand1: bool,
261    /// CIGAR for mate 1
262    pub cigar1: String,
263    /// Score for mate 1
264    pub score1: Score,
265    /// Number of edits for mate 1
266    pub edits1: u32,
267
268    /// Chromosome index for mate 2
269    pub chr2: u32,
270    /// Position for mate 2 (0-based)
271    pub pos2: u64,
272    /// Strand for mate 2 (true = forward)
273    pub strand2: bool,
274    /// CIGAR for mate 2
275    pub cigar2: String,
276    /// Score for mate 2
277    pub score2: Score,
278    /// Number of edits for mate 2
279    pub edits2: u32,
280
281    /// Insert size (TLEN in SAM)
282    pub insert_size: i32,
283    /// Alignment type
284    pub alignment_type: PeAlignmentType,
285    /// Mapping quality
286    pub mapq: u8,
287}
288
289impl PairedAlignment {
290    /// Create a new paired alignment
291    pub fn new() -> Self {
292        Self::default()
293    }
294
295    /// Check if this is a concordant alignment
296    pub fn is_concordant(&self) -> bool {
297        self.alignment_type == PeAlignmentType::Concordant
298    }
299
300    /// Calculate SAM flags for mate 1
301    pub fn sam_flag1(&self) -> u16 {
302        let mut flag: u16 = 0;
303        flag |= 1; // PAIRED
304        flag |= 2; // PROPER_PAIR (if concordant)
305        if !self.strand1 {
306            flag |= 16; // REVERSE
307        }
308        if !self.strand2 {
309            flag |= 32; // MATE_REVERSE
310        }
311        flag |= 64; // FIRST_IN_PAIR
312        flag
313    }
314
315    /// Calculate SAM flags for mate 2
316    pub fn sam_flag2(&self) -> u16 {
317        let mut flag: u16 = 0;
318        flag |= 1; // PAIRED
319        flag |= 2; // PROPER_PAIR (if concordant)
320        if !self.strand2 {
321            flag |= 16; // REVERSE
322        }
323        if !self.strand1 {
324            flag |= 32; // MATE_REVERSE
325        }
326        flag |= 128; // SECOND_IN_PAIR
327        flag
328    }
329}
330
331#[cfg(test)]
332mod tests {
333    use super::*;
334
335    #[test]
336    fn test_pe_policy_default() {
337        let policy = PePolicy::default();
338        assert_eq!(policy, PePolicy::FR);
339    }
340
341    #[test]
342    fn test_pe_constraints_default() {
343        let constraints = PeConstraints::default();
344        assert_eq!(constraints.min_insert, 0);
345        assert_eq!(constraints.max_insert, 500);
346        assert_eq!(constraints.policy, PePolicy::FR);
347    }
348
349    #[test]
350    fn test_concordant_fr_pair() {
351        let constraints = PeConstraints::new(100, 500, PePolicy::FR);
352
353        // FR pair: R1 forward at pos 1000, R2 reverse at pos 1200
354        // Insert size = 1200 + 100 - 1000 = 300
355        let result = constraints.is_concordant(
356            1000, 100, true, 0, // R1: pos=1000, len=100, forward, chr0
357            1200, 100, false, 0, // R2: pos=1200, len=100, reverse, chr0
358        );
359
360        assert_eq!(result, Some(300));
361    }
362
363    #[test]
364    fn test_discordant_wrong_orientation() {
365        let constraints = PeConstraints::new(100, 500, PePolicy::FR);
366
367        // Wrong orientation: both forward
368        let result = constraints.is_concordant(
369            1000, 100, true, 0, 1200, 100, true, 0, // Wrong: R2 should be reverse
370        );
371
372        assert_eq!(result, None);
373    }
374
375    #[test]
376    fn test_discordant_wrong_insert_size() {
377        let constraints = PeConstraints::new(100, 500, PePolicy::FR);
378
379        // Insert size too large
380        let result = constraints.is_concordant(
381            1000, 100, true, 0, 2000, 100, false, 0, // Insert = 2000 + 100 - 1000 = 1100 > 500
382        );
383
384        assert_eq!(result, None);
385    }
386
387    #[test]
388    fn test_mate_search_range_fr_r1() {
389        let constraints = PeConstraints::new(100, 500, PePolicy::FR);
390
391        // R1 forward at position 1000, length 100
392        let (start, end, strand) = constraints.mate_search_range(1000, 100, true, true);
393
394        assert_eq!(start, 500); // 1000 - 500
395        assert_eq!(end, 1600); // 1100 + 500
396        assert!(!strand); // R2 should be reverse
397    }
398
399    #[test]
400    fn test_paired_alignment_flags() {
401        let mut aln = PairedAlignment::new();
402        aln.strand1 = true;
403        aln.strand2 = false;
404        aln.alignment_type = PeAlignmentType::Concordant;
405
406        let flag1 = aln.sam_flag1();
407        let flag2 = aln.sam_flag2();
408
409        assert_eq!(flag1 & 1, 1); // PAIRED
410        assert_eq!(flag1 & 2, 2); // PROPER_PAIR
411        assert_eq!(flag1 & 64, 64); // FIRST_IN_PAIR
412        assert_eq!(flag1 & 16, 0); // R1 forward
413        assert_eq!(flag1 & 32, 32); // Mate reverse
414
415        assert_eq!(flag2 & 1, 1); // PAIRED
416        assert_eq!(flag2 & 2, 2); // PROPER_PAIR
417        assert_eq!(flag2 & 128, 128); // SECOND_IN_PAIR
418        assert_eq!(flag2 & 16, 16); // R2 reverse
419        assert_eq!(flag2 & 32, 0); // Mate forward
420    }
421}