rustalign_aligner/paired_end.rs
1//! Paired-end alignment policy and utilities
2//!
3//! This module implements paired-end alignment support, including:
4//! - Orientation policies (FR, RF, FF, RR)
5//! - Insert size constraints
6//! - Mate finding logic
7//! - Concordant/discordant pair classification
8
9use rustalign_common::Score;
10
11/// Paired-end orientation policy
12///
13/// Defines the expected orientation of mate pairs.
14/// This matches standard paired-end sequencing orientation types.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
16pub enum PePolicy {
17 /// Forward-Reverse: R1 forward, R2 reverse (most common for Illumina)
18 #[default]
19 FR,
20 /// Reverse-Forward: R1 reverse, R2 forward
21 RF,
22 /// Forward-Forward: both mates in forward orientation
23 FF,
24 /// Reverse-Reverse: both mates in reverse orientation
25 RR,
26}
27
28/// Paired-end constraints
29///
30/// Defines the constraints for valid paired-end alignments.
31#[derive(Debug, Clone, Copy)]
32pub struct PeConstraints {
33 /// Minimum fragment/insert size (outer distance between mates)
34 pub min_insert: u32,
35 /// Maximum fragment/insert size
36 pub max_insert: u32,
37 /// Orientation policy
38 pub policy: PePolicy,
39 /// Whether to allow overlapping mates
40 pub allow_overlap: bool,
41 /// Whether to allow dovetailing (one mate extends past the other)
42 pub allow_dovetail: bool,
43 /// Whether to allow one mate containing the other
44 pub allow_contain: bool,
45}
46
47impl Default for PeConstraints {
48 fn default() -> Self {
49 Self {
50 min_insert: 0,
51 max_insert: 500,
52 policy: PePolicy::FR,
53 allow_overlap: true,
54 allow_dovetail: false,
55 allow_contain: false,
56 }
57 }
58}
59
60impl PeConstraints {
61 /// Create new paired-end constraints
62 pub fn new(min_insert: u32, max_insert: u32, policy: PePolicy) -> Self {
63 Self {
64 min_insert,
65 max_insert,
66 policy,
67 ..Default::default()
68 }
69 }
70
71 /// Check if a pair of alignments is concordant (properly paired)
72 ///
73 /// # Arguments
74 /// * `pos1` - Position of mate 1 (0-based)
75 /// * `len1` - Length of mate 1
76 /// * `strand1` - Strand of mate 1 (true = forward, false = reverse)
77 /// * `chr1` - Chromosome of mate 1
78 /// * `pos2` - Position of mate 2 (0-based)
79 /// * `len2` - Length of mate 2
80 /// * `strand2` - Strand of mate 2
81 /// * `chr2` - Chromosome of mate 2
82 ///
83 /// # Returns
84 /// `Some(insert_size)` if concordant, `None` if discordant
85 #[allow(clippy::too_many_arguments)]
86 pub fn is_concordant(
87 &self,
88 pos1: u64,
89 len1: u32,
90 strand1: bool,
91 chr1: u32,
92 pos2: u64,
93 len2: u32,
94 strand2: bool,
95 chr2: u32,
96 ) -> Option<i32> {
97 // Must be on same chromosome
98 if chr1 != chr2 {
99 return None;
100 }
101
102 // Check orientation
103 let orientation_ok = match self.policy {
104 PePolicy::FR => strand1 && !strand2, // R1 forward, R2 reverse
105 PePolicy::RF => !strand1 && strand2, // R1 reverse, R2 forward
106 PePolicy::FF => strand1 && strand2, // Both forward
107 PePolicy::RR => !strand1 && !strand2, // Both reverse
108 };
109
110 if !orientation_ok {
111 return None;
112 }
113
114 // Calculate insert size (outer distance between mates)
115 // Insert size = distance from leftmost base to rightmost base
116 let (left_pos, _left_len, right_pos) = if pos1 <= pos2 {
117 (pos1, len1, pos2 + len2 as u64)
118 } else {
119 (pos2, len2, pos1 + len1 as u64)
120 };
121
122 let insert_size = (right_pos - left_pos) as i32;
123
124 // Check insert size constraints
125 if insert_size < self.min_insert as i32 || insert_size > self.max_insert as i32 {
126 return None;
127 }
128
129 // Check for containment (one read entirely within the other)
130 if !self.allow_contain {
131 let r1_end = pos1 + len1 as u64;
132 let r2_end = pos2 + len2 as u64;
133
134 // R1 contains R2
135 if pos1 <= pos2 && r1_end >= r2_end {
136 return None;
137 }
138 // R2 contains R1
139 if pos2 <= pos1 && r2_end >= r1_end {
140 return None;
141 }
142 }
143
144 // Check for dovetailing (one read extends past the end of the other)
145 if !self.allow_dovetail {
146 // For FR policy with normal orientation, R1 should be left of R2
147 // Dovetailing means R2 starts before R1 ends significantly
148 if self.policy == PePolicy::FR && strand1 && !strand2 {
149 let r1_end = pos1 + len1 as u64;
150 if pos2 < r1_end {
151 // Mates overlap - check if it's just overlap or dovetail
152 let overlap = r1_end - pos2;
153 if overlap > len2 as u64 / 2 {
154 // Significant dovetail
155 return None;
156 }
157 }
158 }
159 }
160
161 Some(insert_size)
162 }
163
164 /// Calculate the expected position range for the opposite mate
165 ///
166 /// Given an anchor alignment, calculate where the opposite mate
167 /// should align based on insert size constraints.
168 ///
169 /// # Arguments
170 /// * `anchor_pos` - Position of the anchor mate (0-based)
171 /// * `anchor_len` - Length of the anchor mate
172 /// * `anchor_is_r1` - True if anchor is read 1, false if read 2
173 /// * `anchor_strand` - True if anchor is forward strand
174 ///
175 /// # Returns
176 /// (start_pos, end_pos, expected_strand) for the opposite mate search
177 pub fn mate_search_range(
178 &self,
179 anchor_pos: u64,
180 anchor_len: u32,
181 anchor_is_r1: bool,
182 anchor_strand: bool,
183 ) -> (u64, u64, bool) {
184 // Determine expected strand for opposite mate
185 let opp_strand = match self.policy {
186 PePolicy::FR => !anchor_strand, // Opposite strand
187 PePolicy::RF => !anchor_strand, // Opposite strand
188 PePolicy::FF => anchor_strand, // Same strand
189 PePolicy::RR => anchor_strand, // Same strand
190 };
191
192 // Calculate search range based on policy and anchor position
193 let anchor_end = anchor_pos + anchor_len as u64;
194
195 let (start, end) = match self.policy {
196 PePolicy::FR => {
197 // FR: R1 forward left of R2 reverse
198 if anchor_is_r1 {
199 // Anchor is R1 (forward), mate should be to the right
200 let start = anchor_pos.saturating_sub(self.max_insert as u64);
201 let end = anchor_end + self.max_insert as u64;
202 (start, end)
203 } else {
204 // Anchor is R2 (reverse), mate should be to the left
205 let start = anchor_end.saturating_sub(self.max_insert as u64);
206 let end = anchor_pos + self.max_insert as u64;
207 (start, end)
208 }
209 }
210 PePolicy::RF => {
211 // RF: R1 reverse left of R2 forward
212 if anchor_is_r1 {
213 // Anchor is R1 (reverse), mate should be to the right
214 let start = anchor_pos.saturating_sub(self.max_insert as u64);
215 let end = anchor_end + self.max_insert as u64;
216 (start, end)
217 } else {
218 // Anchor is R2 (forward), mate should be to the left
219 let start = anchor_end.saturating_sub(self.max_insert as u64);
220 let end = anchor_pos + self.max_insert as u64;
221 (start, end)
222 }
223 }
224 PePolicy::FF | PePolicy::RR => {
225 // FF/RR: Mates can be on either side
226 let start = anchor_end.saturating_sub(self.max_insert as u64);
227 let end = anchor_pos + self.max_insert as u64;
228 (start, end)
229 }
230 };
231
232 (start, end, opp_strand)
233 }
234}
235
236/// Classification of paired-end alignment types
237#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
238pub enum PeAlignmentType {
239 /// Normal concordant alignment
240 #[default]
241 Concordant,
242 /// Mates overlap but neither contains the other
243 Overlap,
244 /// One mate contains the other
245 Contain,
246 /// Mates dovetail (one extends past the other)
247 Dovetail,
248 /// Discordant alignment (doesn't meet constraints)
249 Discordant,
250}
251
252/// Result of a paired-end alignment
253#[derive(Debug, Clone, Default)]
254pub struct PairedAlignment {
255 /// Chromosome index for mate 1
256 pub chr1: u32,
257 /// Position for mate 1 (0-based)
258 pub pos1: u64,
259 /// Strand for mate 1 (true = forward)
260 pub strand1: bool,
261 /// CIGAR for mate 1
262 pub cigar1: String,
263 /// Score for mate 1
264 pub score1: Score,
265 /// Number of edits for mate 1
266 pub edits1: u32,
267
268 /// Chromosome index for mate 2
269 pub chr2: u32,
270 /// Position for mate 2 (0-based)
271 pub pos2: u64,
272 /// Strand for mate 2 (true = forward)
273 pub strand2: bool,
274 /// CIGAR for mate 2
275 pub cigar2: String,
276 /// Score for mate 2
277 pub score2: Score,
278 /// Number of edits for mate 2
279 pub edits2: u32,
280
281 /// Insert size (TLEN in SAM)
282 pub insert_size: i32,
283 /// Alignment type
284 pub alignment_type: PeAlignmentType,
285 /// Mapping quality
286 pub mapq: u8,
287}
288
289impl PairedAlignment {
290 /// Create a new paired alignment
291 pub fn new() -> Self {
292 Self::default()
293 }
294
295 /// Check if this is a concordant alignment
296 pub fn is_concordant(&self) -> bool {
297 self.alignment_type == PeAlignmentType::Concordant
298 }
299
300 /// Calculate SAM flags for mate 1
301 pub fn sam_flag1(&self) -> u16 {
302 let mut flag: u16 = 0;
303 flag |= 1; // PAIRED
304 flag |= 2; // PROPER_PAIR (if concordant)
305 if !self.strand1 {
306 flag |= 16; // REVERSE
307 }
308 if !self.strand2 {
309 flag |= 32; // MATE_REVERSE
310 }
311 flag |= 64; // FIRST_IN_PAIR
312 flag
313 }
314
315 /// Calculate SAM flags for mate 2
316 pub fn sam_flag2(&self) -> u16 {
317 let mut flag: u16 = 0;
318 flag |= 1; // PAIRED
319 flag |= 2; // PROPER_PAIR (if concordant)
320 if !self.strand2 {
321 flag |= 16; // REVERSE
322 }
323 if !self.strand1 {
324 flag |= 32; // MATE_REVERSE
325 }
326 flag |= 128; // SECOND_IN_PAIR
327 flag
328 }
329}
330
331#[cfg(test)]
332mod tests {
333 use super::*;
334
335 #[test]
336 fn test_pe_policy_default() {
337 let policy = PePolicy::default();
338 assert_eq!(policy, PePolicy::FR);
339 }
340
341 #[test]
342 fn test_pe_constraints_default() {
343 let constraints = PeConstraints::default();
344 assert_eq!(constraints.min_insert, 0);
345 assert_eq!(constraints.max_insert, 500);
346 assert_eq!(constraints.policy, PePolicy::FR);
347 }
348
349 #[test]
350 fn test_concordant_fr_pair() {
351 let constraints = PeConstraints::new(100, 500, PePolicy::FR);
352
353 // FR pair: R1 forward at pos 1000, R2 reverse at pos 1200
354 // Insert size = 1200 + 100 - 1000 = 300
355 let result = constraints.is_concordant(
356 1000, 100, true, 0, // R1: pos=1000, len=100, forward, chr0
357 1200, 100, false, 0, // R2: pos=1200, len=100, reverse, chr0
358 );
359
360 assert_eq!(result, Some(300));
361 }
362
363 #[test]
364 fn test_discordant_wrong_orientation() {
365 let constraints = PeConstraints::new(100, 500, PePolicy::FR);
366
367 // Wrong orientation: both forward
368 let result = constraints.is_concordant(
369 1000, 100, true, 0, 1200, 100, true, 0, // Wrong: R2 should be reverse
370 );
371
372 assert_eq!(result, None);
373 }
374
375 #[test]
376 fn test_discordant_wrong_insert_size() {
377 let constraints = PeConstraints::new(100, 500, PePolicy::FR);
378
379 // Insert size too large
380 let result = constraints.is_concordant(
381 1000, 100, true, 0, 2000, 100, false, 0, // Insert = 2000 + 100 - 1000 = 1100 > 500
382 );
383
384 assert_eq!(result, None);
385 }
386
387 #[test]
388 fn test_mate_search_range_fr_r1() {
389 let constraints = PeConstraints::new(100, 500, PePolicy::FR);
390
391 // R1 forward at position 1000, length 100
392 let (start, end, strand) = constraints.mate_search_range(1000, 100, true, true);
393
394 assert_eq!(start, 500); // 1000 - 500
395 assert_eq!(end, 1600); // 1100 + 500
396 assert!(!strand); // R2 should be reverse
397 }
398
399 #[test]
400 fn test_paired_alignment_flags() {
401 let mut aln = PairedAlignment::new();
402 aln.strand1 = true;
403 aln.strand2 = false;
404 aln.alignment_type = PeAlignmentType::Concordant;
405
406 let flag1 = aln.sam_flag1();
407 let flag2 = aln.sam_flag2();
408
409 assert_eq!(flag1 & 1, 1); // PAIRED
410 assert_eq!(flag1 & 2, 2); // PROPER_PAIR
411 assert_eq!(flag1 & 64, 64); // FIRST_IN_PAIR
412 assert_eq!(flag1 & 16, 0); // R1 forward
413 assert_eq!(flag1 & 32, 32); // Mate reverse
414
415 assert_eq!(flag2 & 1, 1); // PAIRED
416 assert_eq!(flag2 & 2, 2); // PROPER_PAIR
417 assert_eq!(flag2 & 128, 128); // SECOND_IN_PAIR
418 assert_eq!(flag2 & 16, 16); // R2 reverse
419 assert_eq!(flag2 & 32, 0); // Mate forward
420 }
421}