Skip to main content

fgumi_lib/
template.rs

1//! Template data structure for grouping reads by query name.
2//!
3//! This module provides the `Template` structure for organizing all SAM/BAM records
4//! that share the same query name. In paired-end sequencing, a template typically
5//! contains:
6//! - Primary R1 and R2 alignments
7//! - Supplementary alignments (chimeric/split alignments)
8//! - Secondary alignments (alternative mappings)
9//!
10//! # Structure
11//!
12//! The `Template` organizes reads into categories:
13//! - **Primary reads**: The main alignment for R1 and R2
14//! - **Supplementary**: Additional alignments for chimeric reads (same read, different locations)
15//! - **Secondary**: Alternative alignments at different confidence levels
16//!
17//! # Usage
18//!
19//! Templates can be built incrementally using `Builder`, created from an iterator
20//! of records, or iterated over from a BAM file using `TemplateIterator`.
21//!
22//! # Examples
23//!
24//! ```rust,ignore
25//! // Build a template from records
26//! let mut builder = Builder::new();
27//! for record in records {
28//!     builder.push(record)?;
29//! }
30//! let template = builder.build()?;
31//!
32//! // Access primary reads
33//! if let Some(r1) = template.r1() {
34//!     // Process R1
35//! }
36//! ```
37
38use crate::sam::{record_utils, to_smallest_signed_int};
39use crate::unified_pipeline::MemoryEstimate;
40use anyhow::{Result, anyhow, bail};
41use noodles::sam::alignment::record::Cigar;
42use noodles::sam::alignment::record::data::field::Tag;
43use noodles::sam::alignment::record_buf::RecordBuf;
44use noodles::sam::alignment::record_buf::data::field::Value as BufValue;
45use std::fmt::Write as _;
46
47// Re-export PairOrientation from sam module for backwards compatibility
48pub use crate::sam::PairOrientation;
49
50// MoleculeId is now defined in fgumi-umi crate
51pub use fgumi_umi::MoleculeId;
52
53/// A template representing all reads with the same query name.
54///
55/// In paired-end sequencing, this typically includes R1 and R2, plus any secondary
56/// or supplementary alignments. All records with the same name are grouped together.
57///
58/// This structure mirrors the Scala Template class from fgbio, organizing reads into:
59/// - Primary R1 and R2 reads
60/// - Supplementary alignments for R1 and R2
61/// - Secondary alignments for R1 and R2
62#[derive(Debug, Clone)]
63pub struct Template {
64    /// The query name (QNAME) shared by all records in this template
65    pub name: Vec<u8>,
66    /// The records (empty in raw-byte mode)
67    pub records: Vec<RecordBuf>,
68    /// Raw BAM record bytes (without `block_size` prefix). Present only in raw-byte mode.
69    pub raw_records: Option<Vec<Vec<u8>>>,
70    /// Primary R1 read (first segment, non-secondary, non-supplementary)
71    pub r1: Option<(usize, usize)>,
72    /// Primary R2 read (second segment, non-secondary, non-supplementary)
73    pub r2: Option<(usize, usize)>,
74    /// Supplementary alignments for R1
75    pub r1_supplementals: Option<(usize, usize)>,
76    /// Supplementary alignments for R2
77    pub r2_supplementals: Option<(usize, usize)>,
78    /// Secondary alignments for R1
79    pub r1_secondaries: Option<(usize, usize)>,
80    /// Secondary alignments for R2
81    pub r2_secondaries: Option<(usize, usize)>,
82    /// Assigned molecule ID from UMI grouping (set during group command)
83    pub mi: MoleculeId,
84}
85
86/// A batch of templates for parallel processing.
87pub type TemplateBatch = Vec<Template>;
88
89/// A builder for constructing `Template` instances.
90///
91/// The builder allows incremental construction of a template by adding records
92/// one at a time. It automatically categorizes each record into the appropriate
93/// slot (primary R1/R2, supplementary, or secondary) based on SAM flags.
94///
95/// # Examples
96///
97/// ```rust,ignore
98/// let mut builder = Builder::new();
99/// builder.push(r1_record)?;
100/// builder.push(r2_record)?;
101/// builder.push(r1_supplementary)?;
102/// let template = builder.build()?;
103/// ```
104#[derive(Debug, Default)]
105pub struct Builder {
106    /// The query name (QNAME) shared by all records in this template
107    name: Option<Vec<u8>>,
108    /// Primary R1 read (first segment, non-secondary, non-supplementary)
109    r1: Option<RecordBuf>,
110    /// Primary R2 read (second segment, non-secondary, non-supplementary)
111    r2: Option<RecordBuf>,
112    /// Supplementary alignments for R1
113    r1_supplementals: Vec<RecordBuf>,
114    /// Supplementary alignments for R2
115    r2_supplementals: Vec<RecordBuf>,
116    /// Secondary alignments for R1
117    r1_secondaries: Vec<RecordBuf>,
118    /// Secondary alignments for R2
119    r2_secondaries: Vec<RecordBuf>,
120}
121
122impl Builder {
123    /// Creates a new empty template builder.
124    ///
125    /// # Returns
126    ///
127    /// A new `Builder` instance ready to accept records
128    fn new() -> Self {
129        Builder {
130            name: None,
131            r1: None,
132            r2: None,
133            r1_supplementals: Vec::new(),
134            r2_supplementals: Vec::new(),
135            r1_secondaries: Vec::new(),
136            r2_secondaries: Vec::new(),
137        }
138    }
139
140    /// Adds a record to this builder, organizing it into the appropriate category.
141    ///
142    /// # Arguments
143    ///
144    /// * `record` - The record to add
145    ///
146    /// # Errors
147    ///
148    /// Returns an error if multiple primary R1s or R2s are encountered
149    ///
150    /// # Panics
151    ///
152    /// Panics if template name is unexpectedly missing when reporting duplicate records.
153    pub fn push(&mut self, record: RecordBuf) -> Result<&mut Builder> {
154        // Get record name as bytes for comparison (no allocation yet)
155        let record_name_bytes: &[u8] = record.name().map_or(&[], <_ as AsRef<[u8]>>::as_ref);
156
157        if let Some(cur_name) = &self.name {
158            // Compare bytes directly without allocating
159            if cur_name.as_slice() != record_name_bytes {
160                bail!(
161                    "Template name mismatch: found '{record_name_bytes:?}', expected '{cur_name:?}'"
162                );
163            }
164        } else {
165            // Only allocate Vec when setting the name for the first time
166            self.name = Some(record_name_bytes.to_vec());
167        }
168
169        let flags = record.flags();
170        let is_r1 = !flags.is_segmented() || flags.is_first_segment();
171
172        if is_r1 {
173            if flags.is_secondary() {
174                self.r1_secondaries.push(record);
175            } else if flags.is_supplementary() {
176                self.r1_supplementals.push(record);
177            } else if self.r1.is_some() {
178                return Err(anyhow!(
179                    "Multiple non-secondary, non-supplemental R1 records for read '{:?}'",
180                    self.name.as_ref().unwrap()
181                ));
182            } else {
183                self.r1 = Some(record);
184            }
185        } else if flags.is_secondary() {
186            self.r2_secondaries.push(record);
187        } else if flags.is_supplementary() {
188            self.r2_supplementals.push(record);
189        } else if self.r2.is_some() {
190            return Err(anyhow!(
191                "Multiple non-secondary, non-supplemental R2 records for read '{:?}'",
192                self.name.as_ref().unwrap()
193            ));
194        } else {
195            self.r2 = Some(record);
196        }
197
198        Ok(self)
199    }
200
201    /// Returns the number of records currently in the builder.
202    ///
203    /// # Returns
204    ///
205    /// Total count of records across all categories
206    #[must_use]
207    pub fn len(&self) -> usize {
208        let mut count = 0;
209        count += usize::from(self.r1.is_some());
210        count += usize::from(self.r2.is_some());
211        count += self.r1_supplementals.len();
212        count += self.r2_supplementals.len();
213        count += self.r1_secondaries.len();
214        count += self.r2_secondaries.len();
215        count
216    }
217
218    /// Checks if the builder contains no records.
219    ///
220    /// # Returns
221    ///
222    /// `true` if the builder is empty, `false` otherwise
223    #[must_use]
224    pub fn is_empty(&self) -> bool {
225        self.len() == 0
226    }
227
228    /// Builds a new template from this builder.
229    ///
230    /// The builder may be re-used.
231    ///
232    /// # Errors
233    ///
234    /// If the template could not be built.
235    pub fn build(&mut self) -> Result<Template> {
236        let r1_end: usize = usize::from(self.r1.is_some());
237        let r2_end: usize = r1_end + usize::from(self.r2.is_some());
238        let r1_supplementals_end: usize = r2_end + self.r1_supplementals.len();
239        let r2_supplementals_end: usize = r1_supplementals_end + self.r2_supplementals.len();
240        let r1_secondaries_end: usize = r2_supplementals_end + self.r1_secondaries.len();
241        let r2_secondaries_end: usize = r1_secondaries_end + self.r2_secondaries.len();
242
243        let r1: Option<(usize, usize)> = if self.r1.is_some() { Some((0, r1_end)) } else { None };
244        let r2: Option<(usize, usize)> =
245            if self.r2.is_some() { Some((r1_end, r2_end)) } else { None };
246        let r1_supplementals: Option<(usize, usize)> = if self.r1_supplementals.is_empty() {
247            None
248        } else {
249            Some((r2_end, r1_supplementals_end))
250        };
251        let r2_supplementals: Option<(usize, usize)> = if self.r2_supplementals.is_empty() {
252            None
253        } else {
254            Some((r1_supplementals_end, r2_supplementals_end))
255        };
256        let r1_secondaries: Option<(usize, usize)> = if self.r1_secondaries.is_empty() {
257            None
258        } else {
259            Some((r2_supplementals_end, r1_secondaries_end))
260        };
261        let r2_secondaries: Option<(usize, usize)> = if self.r2_secondaries.is_empty() {
262            None
263        } else {
264            Some((r1_secondaries_end, r2_secondaries_end))
265        };
266
267        let Some(name) = self.name.take() else {
268            return Err(anyhow!("No records given to template builder"));
269        };
270
271        // Pre-allocate records Vec with exact capacity to avoid reallocations
272        let mut records: Vec<RecordBuf> = Vec::with_capacity(self.len());
273        if let Some(rec) = self.r1.take() {
274            records.push(rec);
275        }
276        if let Some(rec) = self.r2.take() {
277            records.push(rec);
278        }
279        // Reverse the supplementary and secondary vectors to match fgbio's behavior.
280        // fgbio uses Scala List prepend (r :: list) which results in reverse input order.
281        self.r1_supplementals.reverse();
282        self.r2_supplementals.reverse();
283        self.r1_secondaries.reverse();
284        self.r2_secondaries.reverse();
285        records.append(&mut self.r1_supplementals);
286        records.append(&mut self.r2_supplementals);
287        records.append(&mut self.r1_secondaries);
288        records.append(&mut self.r2_secondaries);
289
290        Ok(Template {
291            name,
292            records,
293            raw_records: None,
294            r1,
295            r2,
296            r1_supplementals,
297            r2_supplementals,
298            r1_secondaries,
299            r2_secondaries,
300            mi: MoleculeId::None,
301        })
302    }
303}
304
305impl Template {
306    /// Returns the primary R1 read if present.
307    ///
308    /// The primary R1 is the main (non-secondary, non-supplementary) alignment
309    /// for the first read in a pair (or the only read for single-end data).
310    ///
311    /// # Returns
312    ///
313    /// Reference to the primary R1 record, or `None` if not present.
314    /// Returns `None` in raw-byte mode (use `raw_r1()` instead).
315    #[must_use]
316    pub fn r1(&self) -> Option<&RecordBuf> {
317        if self.records.is_empty() {
318            return None;
319        }
320        self.r1.map(|_| &self.records[0])
321    }
322
323    /// Returns the primary R2 read if present.
324    /// Returns `None` in raw-byte mode (use `raw_r2()` instead).
325    ///
326    /// The primary R2 is the main (non-secondary, non-supplementary) alignment
327    /// for the second read in a paired-end template.
328    ///
329    /// # Returns
330    ///
331    /// Reference to the primary R2 record, or `None` if not present
332    #[must_use]
333    pub fn r2(&self) -> Option<&RecordBuf> {
334        if self.records.is_empty() {
335            return None;
336        }
337        self.r2.map(|(i, _)| &self.records[i])
338    }
339
340    /// Returns the records in the given range, or an empty slice if the range is invalid.
341    fn records_in_range(&self, range: Option<(usize, usize)>) -> &[RecordBuf] {
342        if let Some((start, end)) = range {
343            if start <= end && end <= self.records.len() {
344                return &self.records[start..end];
345            }
346        }
347        &[]
348    }
349
350    /// Returns supplementary alignments for R1.
351    /// Returns empty in raw-byte mode.
352    #[must_use]
353    pub fn r1_supplementals(&self) -> &[RecordBuf] {
354        self.records_in_range(self.r1_supplementals)
355    }
356
357    /// Returns supplementary alignments for R2.
358    /// Returns empty in raw-byte mode.
359    #[must_use]
360    pub fn r2_supplementals(&self) -> &[RecordBuf] {
361        self.records_in_range(self.r2_supplementals)
362    }
363
364    /// Returns secondary alignments for R1.
365    /// Returns empty in raw-byte mode.
366    #[must_use]
367    pub fn r1_secondaries(&self) -> &[RecordBuf] {
368        self.records_in_range(self.r1_secondaries)
369    }
370
371    /// Returns secondary alignments for R2.
372    /// Returns empty in raw-byte mode.
373    #[must_use]
374    pub fn r2_secondaries(&self) -> &[RecordBuf] {
375        self.records_in_range(self.r2_secondaries)
376    }
377
378    /// Creates a new empty template with the given name
379    ///
380    /// # Arguments
381    ///
382    /// * `name` - The query name for this template
383    ///
384    /// # Returns
385    ///
386    /// An empty template with no records
387    #[must_use]
388    pub fn new(name: Vec<u8>) -> Self {
389        Template {
390            name,
391            records: Vec::new(),
392            raw_records: None,
393            r1: None,
394            r2: None,
395            r1_supplementals: None,
396            r2_supplementals: None,
397            r1_secondaries: None,
398            r2_secondaries: None,
399            mi: MoleculeId::None,
400        }
401    }
402
403    /// Creates a Template from an iterator of records with the same query name
404    ///
405    /// Automatically organizes records into r1/r2, primary/supplementary/secondary categories.
406    /// Assumes all records have the same query name.
407    ///
408    /// # Arguments
409    ///
410    /// * `records` - Iterator of records to organize into a template
411    ///
412    /// # Returns
413    ///
414    /// A Template with records organized by type, or an error if multiple primary reads found
415    ///
416    /// # Errors
417    ///
418    /// Returns an error if:
419    /// - Multiple non-secondary, non-supplemental R1s are found
420    /// - Multiple non-secondary, non-supplemental R2s are found
421    /// - Iterator is empty (no name available)
422    ///
423    /// # Panics
424    ///
425    /// - If not all the records have the same query name
426    pub fn from_records<I>(records: I) -> Result<Self>
427    where
428        I: IntoIterator<Item = RecordBuf>,
429    {
430        let mut builder = Builder::new();
431
432        for record in records {
433            builder.push(record)?;
434        }
435
436        builder.build()
437    }
438
439    /// Returns an iterator over primary (non-secondary, non-supplementary) reads in this template
440    ///
441    /// Returns both R1 and R2 primary reads if present.
442    ///
443    /// # Returns
444    ///
445    /// Iterator over primary read records
446    pub fn primary_reads(&self) -> impl Iterator<Item = &RecordBuf> {
447        let records = &self.records;
448        self.r1.iter().chain(self.r2.iter()).filter_map(move |(start, _)| records.get(*start))
449    }
450
451    /// Consumes the template and returns owned primary reads.
452    ///
453    /// This method takes ownership of the template and returns the primary R1 and R2
454    /// reads without cloning. Use this when you need owned records and won't access
455    /// the template again.
456    ///
457    /// # Returns
458    ///
459    /// Tuple of `(Option<RecordBuf>, Option<RecordBuf>)` containing owned R1 and R2 records
460    ///
461    /// # Example
462    ///
463    /// ```rust,ignore
464    /// let template = Template::from_records(records)?;
465    /// let (r1, r2) = template.into_primary_reads();
466    /// if let Some(r1) = r1 {
467    ///     writer.write_record(r1)?; // No clone needed
468    /// }
469    /// ```
470    #[must_use]
471    pub fn into_primary_reads(mut self) -> (Option<RecordBuf>, Option<RecordBuf>) {
472        if self.records.is_empty() {
473            return (None, None);
474        }
475        // Extract r2 first (higher index) to avoid shifting r1's position
476        let r2 = self.r2.and_then(|(i, _)| self.records.get_mut(i).map(std::mem::take));
477        let r1 = self.r1.and_then(|_| self.records.get_mut(0).map(std::mem::take));
478        (r1, r2)
479    }
480
481    /// Returns an iterator over all supplementary and secondary reads
482    ///
483    /// # Returns
484    ///
485    /// Iterator over non-primary reads
486    pub fn all_supplementary_and_secondary(&self) -> impl Iterator<Item = &RecordBuf> {
487        let records = &self.records;
488        let iter = self
489            .r1_supplementals
490            .iter()
491            .chain(self.r2_supplementals.iter())
492            .chain(self.r1_secondaries.iter())
493            .chain(self.r2_secondaries.iter());
494        iter.flat_map(move |(start, end)| {
495            let s = (*start).min(records.len());
496            let e = (*end).min(records.len());
497            records[s..e].iter()
498        })
499    }
500
501    /// Returns an iterator over all reads in this template
502    ///
503    /// Includes primary, supplementary, and secondary alignments.
504    ///
505    /// # Returns
506    ///
507    /// Iterator over all records
508    pub fn all_reads(&self) -> impl Iterator<Item = &RecordBuf> {
509        self.primary_reads().chain(self.all_supplementary_and_secondary())
510    }
511
512    /// Consumes the template and returns all records as a vector.
513    ///
514    /// This is useful when you need to take ownership of all records
515    /// for further processing.
516    #[must_use]
517    pub fn into_records(self) -> Vec<RecordBuf> {
518        self.records
519    }
520
521    /// Returns an iterator over all R1 reads (primary, supplementary, and secondary)
522    ///
523    /// # Returns
524    ///
525    /// Iterator over all R1 records
526    pub fn all_r1s(&self) -> impl Iterator<Item = &RecordBuf> {
527        let records = &self.records;
528        let iter =
529            self.r1.iter().chain(self.r1_secondaries.iter()).chain(self.r1_supplementals.iter());
530        iter.flat_map(move |(start, end)| {
531            let s = (*start).min(records.len());
532            let e = (*end).min(records.len());
533            records[s..e].iter()
534        })
535    }
536
537    /// Returns an iterator over all R2 reads (primary, supplementary, and secondary)
538    ///
539    /// # Returns
540    ///
541    /// Iterator over all R2 records
542    pub fn all_r2s(&self) -> impl Iterator<Item = &RecordBuf> {
543        let records = &self.records;
544        let iter =
545            self.r2.iter().chain(self.r2_secondaries.iter()).chain(self.r2_supplementals.iter());
546        iter.flat_map(move |(start, end)| {
547            let s = (*start).min(records.len());
548            let e = (*end).min(records.len());
549            records[s..e].iter()
550        })
551    }
552
553    /// Returns the total count of records in this template
554    ///
555    /// # Returns
556    ///
557    /// Total number of records (primary, supplementary, and secondary)
558    #[must_use]
559    pub fn read_count(&self) -> usize {
560        if let Some(ref rr) = self.raw_records { rr.len() } else { self.records.len() }
561    }
562
563    /// Returns true if this template is in raw-byte mode.
564    #[inline]
565    #[must_use]
566    pub fn is_raw_byte_mode(&self) -> bool {
567        self.raw_records.is_some()
568    }
569
570    /// Returns the raw R1 bytes if in raw-byte mode.
571    #[must_use]
572    pub fn raw_r1(&self) -> Option<&[u8]> {
573        let rr = self.raw_records.as_ref()?;
574        self.r1.map(|_| rr[0].as_slice())
575    }
576
577    /// Returns the raw R2 bytes if in raw-byte mode.
578    #[must_use]
579    pub fn raw_r2(&self) -> Option<&[u8]> {
580        let rr = self.raw_records.as_ref()?;
581        self.r2.map(|(i, _)| rr[i].as_slice())
582    }
583
584    /// Returns all raw records if in raw-byte mode.
585    #[must_use]
586    pub fn all_raw_records(&self) -> Option<&[Vec<u8>]> {
587        self.raw_records.as_deref()
588    }
589
590    /// Consumes self and returns the raw records if in raw-byte mode.
591    #[must_use]
592    pub fn into_raw_records(self) -> Option<Vec<Vec<u8>>> {
593        self.raw_records
594    }
595
596    /// Returns mutable access to all raw records if in raw-byte mode.
597    pub fn all_raw_records_mut(&mut self) -> Option<&mut [Vec<u8>]> {
598        self.raw_records.as_deref_mut()
599    }
600
601    /// Build a Template from raw BAM byte records, categorizing by flags.
602    ///
603    /// The records are categorized using `bam_fields::flags()` to determine
604    /// R1/R2/supplementary/secondary status, with the same index-pair scheme
605    /// as `Builder::build()`.
606    ///
607    /// # Errors
608    ///
609    /// Returns an error if multiple primary R1s or R2s are found.
610    #[allow(clippy::too_many_lines)]
611    pub fn from_raw_records(mut raw_records: Vec<Vec<u8>>) -> Result<Self> {
612        use crate::sort::bam_fields;
613
614        if raw_records.is_empty() {
615            bail!("No records given to from_raw_records");
616        }
617
618        // Guard against truncated records
619        for (i, r) in raw_records.iter().enumerate() {
620            if r.len() < bam_fields::MIN_BAM_RECORD_LEN {
621                bail!(
622                    "Raw BAM record {i} too short to parse ({} < {})",
623                    r.len(),
624                    bam_fields::MIN_BAM_RECORD_LEN
625                );
626            }
627            let l_rn = r[8] as usize;
628            if r.len() < 32 + l_rn {
629                bail!(
630                    "Raw BAM record {i} truncated: l_read_name={l_rn} but only {} bytes after header",
631                    r.len() - 32
632                );
633            }
634        }
635
636        // Extract name from first record
637        let name = bam_fields::read_name(&raw_records[0]).to_vec();
638
639        // Fast path for common 2-record paired-end case (no supplementals/secondaries)
640        if raw_records.len() == 2 {
641            let f0 = bam_fields::flags(&raw_records[0]);
642            let f1 = bam_fields::flags(&raw_records[1]);
643            let neither_sec_supp =
644                (f0 | f1) & (bam_fields::flags::SECONDARY | bam_fields::flags::SUPPLEMENTARY) == 0;
645            if neither_sec_supp {
646                // Use same R1/R2 logic as general path: R1 = !paired || first_segment
647                let is_r1_0 = (f0 & bam_fields::flags::PAIRED) == 0
648                    || (f0 & bam_fields::flags::FIRST_SEGMENT) != 0;
649                let is_r1_1 = (f1 & bam_fields::flags::PAIRED) == 0
650                    || (f1 & bam_fields::flags::FIRST_SEGMENT) != 0;
651                // Only use fast path if exactly one R1 and one R2
652                if is_r1_0 != is_r1_1 {
653                    if !is_r1_0 {
654                        // rec[0]=R2, rec[1]=R1 — swap so R1 is at index 0
655                        raw_records.swap(0, 1);
656                    }
657                    return Ok(Template {
658                        name,
659                        records: Vec::new(),
660                        raw_records: Some(raw_records),
661                        r1: Some((0, 1)),
662                        r2: Some((1, 2)),
663                        r1_supplementals: None,
664                        r2_supplementals: None,
665                        r1_secondaries: None,
666                        r2_secondaries: None,
667                        mi: MoleculeId::None,
668                    });
669                }
670                // Both R1 or both R2 — fall through to general path for error handling
671            }
672        }
673
674        // Verify all records share the same QNAME (matching Builder behavior)
675        for rec in raw_records.iter().skip(1) {
676            if bam_fields::read_name(rec) != name.as_slice() {
677                bail!("Template name mismatch in from_raw_records");
678            }
679        }
680
681        // Categorize records into: r1, r2, r1_supp, r2_supp, r1_sec, r2_sec
682        let mut r1_idx: Option<usize> = None;
683        let mut r2_idx: Option<usize> = None;
684        let mut r1_supp: Vec<usize> = Vec::new();
685        let mut r2_supp: Vec<usize> = Vec::new();
686        let mut r1_sec: Vec<usize> = Vec::new();
687        let mut r2_sec: Vec<usize> = Vec::new();
688
689        for (i, rec) in raw_records.iter().enumerate() {
690            let flg = bam_fields::flags(rec);
691            let is_secondary = (flg & bam_fields::flags::SECONDARY) != 0;
692            let is_supplementary = (flg & bam_fields::flags::SUPPLEMENTARY) != 0;
693            let is_paired = (flg & bam_fields::flags::PAIRED) != 0;
694            let is_first = (flg & bam_fields::flags::FIRST_SEGMENT) != 0;
695            let is_r1 = !is_paired || is_first;
696
697            if is_r1 {
698                if is_secondary {
699                    r1_sec.push(i);
700                } else if is_supplementary {
701                    r1_supp.push(i);
702                } else if r1_idx.is_some() {
703                    bail!(
704                        "Multiple non-secondary, non-supplemental R1 records for read '{name:?}'"
705                    );
706                } else {
707                    r1_idx = Some(i);
708                }
709            } else if is_secondary {
710                r2_sec.push(i);
711            } else if is_supplementary {
712                r2_supp.push(i);
713            } else if r2_idx.is_some() {
714                bail!("Multiple non-secondary, non-supplemental R2 records for read '{name:?}'");
715            } else {
716                r2_idx = Some(i);
717            }
718        }
719
720        // Build ordered Vec: r1, r2, r1_supplementals, r2_supplementals, r1_secondaries, r2_secondaries
721        // (matching Builder::build() ordering, with supplementals/secondaries reversed)
722        let mut ordered: Vec<Vec<u8>> = Vec::with_capacity(raw_records.len());
723        let mut take = |idx: usize| -> Vec<u8> { std::mem::take(&mut raw_records[idx]) };
724
725        // Track indices
726        let r1_pair = if let Some(idx) = r1_idx {
727            ordered.push(take(idx));
728            Some((ordered.len() - 1, ordered.len()))
729        } else {
730            None
731        };
732
733        let r2_pair = if let Some(idx) = r2_idx {
734            ordered.push(take(idx));
735            Some((ordered.len() - 1, ordered.len()))
736        } else {
737            None
738        };
739
740        let r1_supp_pair = if r1_supp.is_empty() {
741            None
742        } else {
743            r1_supp.reverse();
744            let start = ordered.len();
745            for idx in &r1_supp {
746                ordered.push(take(*idx));
747            }
748            Some((start, ordered.len()))
749        };
750
751        let r2_supp_pair = if r2_supp.is_empty() {
752            None
753        } else {
754            r2_supp.reverse();
755            let start = ordered.len();
756            for idx in &r2_supp {
757                ordered.push(take(*idx));
758            }
759            Some((start, ordered.len()))
760        };
761
762        let r1_sec_pair = if r1_sec.is_empty() {
763            None
764        } else {
765            r1_sec.reverse();
766            let start = ordered.len();
767            for idx in &r1_sec {
768                ordered.push(take(*idx));
769            }
770            Some((start, ordered.len()))
771        };
772
773        let r2_sec_pair = if r2_sec.is_empty() {
774            None
775        } else {
776            r2_sec.reverse();
777            let start = ordered.len();
778            for idx in &r2_sec {
779                ordered.push(take(*idx));
780            }
781            Some((start, ordered.len()))
782        };
783
784        Ok(Template {
785            name,
786            records: Vec::new(),
787            raw_records: Some(ordered),
788            r1: r1_pair,
789            r2: r2_pair,
790            r1_supplementals: r1_supp_pair,
791            r2_supplementals: r2_supp_pair,
792            r1_secondaries: r1_sec_pair,
793            r2_secondaries: r2_sec_pair,
794            mi: MoleculeId::None,
795        })
796    }
797
798    /// Returns the pair orientation if both R1 and R2 are present and mapped to the same chromosome.
799    ///
800    /// This matches htsjdk's `SamPairUtil.getPairOrientation()` and fgbio's `Template.pairOrientation`.
801    ///
802    /// # Returns
803    ///
804    /// - `Some(PairOrientation::FR)` - Forward-Reverse ("innie") orientation
805    /// - `Some(PairOrientation::RF)` - Reverse-Forward ("outie") orientation
806    /// - `Some(PairOrientation::Tandem)` - Both reads on the same strand
807    /// - `None` - If R1 or R2 is missing, unmapped, or they're on different chromosomes
808    ///
809    /// # Examples
810    ///
811    /// ```rust,ignore
812    /// let template = Template::from_records(records)?;
813    /// if let Some(orientation) = template.pair_orientation() {
814    ///     match orientation {
815    ///         PairOrientation::FR => println!("Innie pair"),
816    ///         PairOrientation::RF => println!("Outie pair"),
817    ///         PairOrientation::Tandem => println!("Tandem pair"),
818    ///     }
819    /// }
820    /// ```
821    #[must_use]
822    pub fn pair_orientation(&self) -> Option<PairOrientation> {
823        let r1 = self.r1()?;
824        let r2 = self.r2()?;
825
826        // Both must be mapped
827        if r1.flags().is_unmapped() || r2.flags().is_unmapped() {
828            return None;
829        }
830
831        // Must be on the same reference
832        if r1.reference_sequence_id() != r2.reference_sequence_id() {
833            return None;
834        }
835
836        // Use R1 to determine orientation (htsjdk uses the first record passed)
837        Some(get_pair_orientation(r1))
838    }
839
840    /// Fixes mate information for paired-end reads.
841    ///
842    /// This method follows htsjdk's `SamPairUtil.setMateInfo` behavior exactly:
843    ///
844    /// **For primary R1/R2 pairs:**
845    /// - Case 1 (both mapped): Sets mate ref, mate pos, mate strand flag, mate unmapped=false,
846    ///   MQ tag, MC tag (if valid CIGAR), and computes TLEN
847    /// - Case 2 (both unmapped): Clears ref/pos to unmapped values, sets mate unmapped=true,
848    ///   removes MQ and MC tags, sets TLEN=0
849    /// - Case 3 (one mapped, one unmapped): Places unmapped read at mapped read's coordinates,
850    ///   sets appropriate mate info, MQ only on unmapped read, TLEN=0
851    ///
852    /// **For supplementary alignments:**
853    /// - Sets mate information based on the mate's primary alignment
854    /// - Sets ms (mate score) tag if AS (alignment score) is available
855    ///
856    /// # Errors
857    ///
858    /// Returns an error if the CIGAR operations cannot be parsed (malformed CIGAR data)
859    #[allow(clippy::too_many_lines)]
860    pub fn fix_mate_info(&mut self) -> Result<()> {
861        if self.records.is_empty() {
862            return Ok(());
863        }
864
865        let mapq_tag = Tag::new(b'M', b'Q');
866        let mate_cigar_tag = Tag::new(b'M', b'C');
867        let mate_score_tag = Tag::new(b'm', b's');
868        let align_score_tag = Tag::new(b'A', b'S');
869
870        // Fix mate info for primary R1/R2 pair
871        // Following htsjdk's SamPairUtil.setMateInfo exactly
872        if let (Some((r1_i, _)), Some((r2_i, _))) = (self.r1, self.r2) {
873            let r1_is_unmapped = self.records[r1_i].flags().is_unmapped();
874            let r2_is_unmapped = self.records[r2_i].flags().is_unmapped();
875
876            // Get alignment scores for mate score tags (used in all cases)
877            let r1_as = self.records[r1_i].data().get(&align_score_tag).and_then(extract_int_value);
878            let r2_as = self.records[r2_i].data().get(&align_score_tag).and_then(extract_int_value);
879
880            if !r1_is_unmapped && !r2_is_unmapped {
881                // Case 1: Both reads are mapped
882                self.set_mate_info_both_mapped(r1_i, r2_i, mapq_tag, mate_cigar_tag)?;
883            } else if r1_is_unmapped && r2_is_unmapped {
884                // Case 2: Both reads are unmapped
885                self.set_mate_info_both_unmapped(r1_i, r2_i, mapq_tag, mate_cigar_tag);
886            } else {
887                // Case 3: One mapped, one unmapped
888                self.set_mate_info_one_unmapped(
889                    r1_i,
890                    r2_i,
891                    r1_is_unmapped,
892                    mapq_tag,
893                    mate_cigar_tag,
894                )?;
895            }
896
897            // Set mate score tags (ms) in all cases
898            // Use Int8 to match fgbio's tag type encoding
899            if let Some(as_value) = r2_as {
900                self.records[r1_i]
901                    .data_mut()
902                    .insert(mate_score_tag, to_smallest_signed_int(as_value));
903            }
904            if let Some(as_value) = r1_as {
905                self.records[r2_i]
906                    .data_mut()
907                    .insert(mate_score_tag, to_smallest_signed_int(as_value));
908            }
909        }
910
911        // Fix mate info for R1 supplementals (mate is primary R2)
912        // Following htsjdk's setMateInformationOnSupplementalAlignment:
913        // - Mate reference index (RNEXT)
914        // - Mate alignment start (PNEXT)
915        // - Mate reverse strand flag (0x20)
916        // - Mate unmapped flag (0x8)
917        // - TLEN is set to negative of mate primary's TLEN
918        // - MQ tag is set to mate primary's mapping quality
919        // - MC tag is set to mate primary's CIGAR
920        // - ms tag is set to mate primary's AS value
921        if let Some((r2_i, _)) = self.r2 {
922            let r2_ref_id = self.records[r2_i].reference_sequence_id();
923            let r2_pos = self.records[r2_i].alignment_start();
924            let r2_is_reverse = self.records[r2_i].flags().is_reverse_complemented();
925            let r2_is_unmapped = self.records[r2_i].flags().is_unmapped();
926            let r2_tlen = self.records[r2_i].template_length();
927            let r2_mapq = self.records[r2_i].mapping_quality();
928            let r2_cigar_str = cigar_to_string(self.records[r2_i].cigar())?;
929            let r2_as = self.records[r2_i].data().get(&align_score_tag).and_then(extract_int_value);
930
931            if let Some((start, end)) = self.r1_supplementals {
932                for i in start..end {
933                    // Set mate reference and position
934                    *self.records[i].mate_reference_sequence_id_mut() = r2_ref_id;
935                    *self.records[i].mate_alignment_start_mut() = r2_pos;
936
937                    // Set mate flags (reverse strand and unmapped)
938                    set_mate_flags(&mut self.records[i], r2_is_reverse, r2_is_unmapped);
939
940                    // Set TLEN to negative of mate primary's TLEN
941                    *self.records[i].template_length_mut() = -r2_tlen;
942
943                    // Set MQ tag - use Int8 to match fgbio's tag type encoding
944                    let r2_mapq_value = r2_mapq.map_or(255, |m| i32::from(u8::from(m)));
945                    self.records[i]
946                        .data_mut()
947                        .insert(mapq_tag, to_smallest_signed_int(r2_mapq_value));
948
949                    // Set MC tag if CIGAR is available
950                    if !r2_cigar_str.is_empty() && r2_cigar_str != "*" && !r2_is_unmapped {
951                        self.records[i]
952                            .data_mut()
953                            .insert(mate_cigar_tag, BufValue::from(r2_cigar_str.clone()));
954                    }
955
956                    // Set ms tag if AS is available - use Int8 to match fgbio
957                    if let Some(as_value) = r2_as {
958                        self.records[i]
959                            .data_mut()
960                            .insert(mate_score_tag, to_smallest_signed_int(as_value));
961                    }
962                }
963            }
964        }
965
966        // Fix mate info for R2 supplementals (mate is primary R1)
967        if let Some((r1_i, _)) = self.r1 {
968            let r1_ref_id = self.records[r1_i].reference_sequence_id();
969            let r1_pos = self.records[r1_i].alignment_start();
970            let r1_is_reverse = self.records[r1_i].flags().is_reverse_complemented();
971            let r1_is_unmapped = self.records[r1_i].flags().is_unmapped();
972            let r1_tlen = self.records[r1_i].template_length();
973            let r1_mapq = self.records[r1_i].mapping_quality();
974            let r1_cigar_str = cigar_to_string(self.records[r1_i].cigar())?;
975            let r1_as = self.records[r1_i].data().get(&align_score_tag).and_then(extract_int_value);
976
977            if let Some((start, end)) = self.r2_supplementals {
978                for i in start..end {
979                    // Set mate reference and position
980                    *self.records[i].mate_reference_sequence_id_mut() = r1_ref_id;
981                    *self.records[i].mate_alignment_start_mut() = r1_pos;
982
983                    // Set mate flags (reverse strand and unmapped)
984                    set_mate_flags(&mut self.records[i], r1_is_reverse, r1_is_unmapped);
985
986                    // Set TLEN to negative of mate primary's TLEN
987                    *self.records[i].template_length_mut() = -r1_tlen;
988
989                    // Set MQ tag - use Int8 to match fgbio's tag type encoding
990                    let r1_mapq_value = r1_mapq.map_or(255, |m| i32::from(u8::from(m)));
991                    self.records[i]
992                        .data_mut()
993                        .insert(mapq_tag, to_smallest_signed_int(r1_mapq_value));
994
995                    // Set MC tag if CIGAR is available
996                    if !r1_cigar_str.is_empty() && r1_cigar_str != "*" && !r1_is_unmapped {
997                        self.records[i]
998                            .data_mut()
999                            .insert(mate_cigar_tag, BufValue::from(r1_cigar_str.clone()));
1000                    }
1001
1002                    // Set ms tag if AS is available - use Int8 to match fgbio
1003                    if let Some(as_value) = r1_as {
1004                        self.records[i]
1005                            .data_mut()
1006                            .insert(mate_score_tag, to_smallest_signed_int(as_value));
1007                    }
1008                }
1009            }
1010        }
1011
1012        Ok(())
1013    }
1014
1015    /// Sets mate information when both reads are mapped.
1016    /// Following htsjdk's SamPairUtil.setMateInfo case 1.
1017    fn set_mate_info_both_mapped(
1018        &mut self,
1019        r1_i: usize,
1020        r2_i: usize,
1021        mapq_tag: Tag,
1022        mate_cigar_tag: Tag,
1023    ) -> Result<()> {
1024        // Get R2's info for R1
1025        let r2_ref_id = self.records[r2_i].reference_sequence_id();
1026        let r2_pos = self.records[r2_i].alignment_start();
1027        let r2_is_reverse = self.records[r2_i].flags().is_reverse_complemented();
1028        let r2_mapq = self.records[r2_i].mapping_quality();
1029        let r2_cigar_str = cigar_to_string(self.records[r2_i].cigar())?;
1030
1031        // Get R1's info for R2
1032        let r1_ref_id = self.records[r1_i].reference_sequence_id();
1033        let r1_pos = self.records[r1_i].alignment_start();
1034        let r1_is_reverse = self.records[r1_i].flags().is_reverse_complemented();
1035        let r1_mapq = self.records[r1_i].mapping_quality();
1036        let r1_cigar_str = cigar_to_string(self.records[r1_i].cigar())?;
1037
1038        // Set mate info on R1 from R2
1039        *self.records[r1_i].mate_reference_sequence_id_mut() = r2_ref_id;
1040        *self.records[r1_i].mate_alignment_start_mut() = r2_pos;
1041        set_mate_flags(&mut self.records[r1_i], r2_is_reverse, false);
1042        // Use Int8 for MQ to match fgbio's tag type encoding
1043        let r2_mapq_value = r2_mapq.map_or(255, |m| i32::from(u8::from(m)));
1044        self.records[r1_i].data_mut().insert(mapq_tag, to_smallest_signed_int(r2_mapq_value));
1045        if !r2_cigar_str.is_empty() && r2_cigar_str != "*" {
1046            self.records[r1_i].data_mut().insert(mate_cigar_tag, BufValue::from(r2_cigar_str));
1047        }
1048
1049        // Set mate info on R2 from R1
1050        *self.records[r2_i].mate_reference_sequence_id_mut() = r1_ref_id;
1051        *self.records[r2_i].mate_alignment_start_mut() = r1_pos;
1052        set_mate_flags(&mut self.records[r2_i], r1_is_reverse, false);
1053        // Use Int8 for MQ to match fgbio's tag type encoding
1054        let r1_mapq_value = r1_mapq.map_or(255, |m| i32::from(u8::from(m)));
1055        self.records[r2_i].data_mut().insert(mapq_tag, to_smallest_signed_int(r1_mapq_value));
1056        if !r1_cigar_str.is_empty() && r1_cigar_str != "*" {
1057            self.records[r2_i].data_mut().insert(mate_cigar_tag, BufValue::from(r1_cigar_str));
1058        }
1059
1060        // Compute and set insert size (TLEN)
1061        let insert_size = compute_insert_size(&self.records[r1_i], &self.records[r2_i]);
1062        *self.records[r1_i].template_length_mut() = insert_size;
1063        *self.records[r2_i].template_length_mut() = -insert_size;
1064
1065        Ok(())
1066    }
1067
1068    /// Sets mate information when both reads are unmapped.
1069    /// Following htsjdk's SamPairUtil.setMateInfo case 2.
1070    fn set_mate_info_both_unmapped(
1071        &mut self,
1072        r1_i: usize,
1073        r2_i: usize,
1074        mapq_tag: Tag,
1075        mate_cigar_tag: Tag,
1076    ) {
1077        // Get strand info before modifying
1078        let r1_is_reverse = self.records[r1_i].flags().is_reverse_complemented();
1079        let r2_is_reverse = self.records[r2_i].flags().is_reverse_complemented();
1080
1081        // R1: set to unmapped coordinates
1082        *self.records[r1_i].reference_sequence_id_mut() = None;
1083        *self.records[r1_i].alignment_start_mut() = None;
1084        *self.records[r1_i].mate_reference_sequence_id_mut() = None;
1085        *self.records[r1_i].mate_alignment_start_mut() = None;
1086        set_mate_flags(&mut self.records[r1_i], r2_is_reverse, true);
1087        self.records[r1_i].data_mut().remove(&mapq_tag);
1088        self.records[r1_i].data_mut().remove(&mate_cigar_tag);
1089        *self.records[r1_i].template_length_mut() = 0;
1090
1091        // R2: set to unmapped coordinates
1092        *self.records[r2_i].reference_sequence_id_mut() = None;
1093        *self.records[r2_i].alignment_start_mut() = None;
1094        *self.records[r2_i].mate_reference_sequence_id_mut() = None;
1095        *self.records[r2_i].mate_alignment_start_mut() = None;
1096        set_mate_flags(&mut self.records[r2_i], r1_is_reverse, true);
1097        self.records[r2_i].data_mut().remove(&mapq_tag);
1098        self.records[r2_i].data_mut().remove(&mate_cigar_tag);
1099        *self.records[r2_i].template_length_mut() = 0;
1100    }
1101
1102    /// Sets mate information when one read is mapped and one is unmapped.
1103    /// Following htsjdk's SamPairUtil.setMateInfo case 3.
1104    fn set_mate_info_one_unmapped(
1105        &mut self,
1106        r1_i: usize,
1107        r2_i: usize,
1108        r1_is_unmapped: bool,
1109        mapq_tag: Tag,
1110        mate_cigar_tag: Tag,
1111    ) -> Result<()> {
1112        let (mapped_i, unmapped_i) = if r1_is_unmapped { (r2_i, r1_i) } else { (r1_i, r2_i) };
1113
1114        // Get mapped read's info
1115        let mapped_ref_id = self.records[mapped_i].reference_sequence_id();
1116        let mapped_pos = self.records[mapped_i].alignment_start();
1117        let mapped_is_reverse = self.records[mapped_i].flags().is_reverse_complemented();
1118        let mapped_mapq = self.records[mapped_i].mapping_quality();
1119        let mapped_cigar_str = cigar_to_string(self.records[mapped_i].cigar())?;
1120
1121        // Get unmapped read's strand (for mate strand flag on mapped read)
1122        let unmapped_is_reverse = self.records[unmapped_i].flags().is_reverse_complemented();
1123
1124        // Place unmapped read at mapped read's coordinates (htsjdk behavior)
1125        *self.records[unmapped_i].reference_sequence_id_mut() = mapped_ref_id;
1126        *self.records[unmapped_i].alignment_start_mut() = mapped_pos;
1127
1128        // Set mate info on mapped read (mate is unmapped)
1129        // After placing unmapped at mapped's coords, unmapped's ref/pos = mapped's ref/pos
1130        *self.records[mapped_i].mate_reference_sequence_id_mut() = mapped_ref_id;
1131        *self.records[mapped_i].mate_alignment_start_mut() = mapped_pos;
1132        set_mate_flags(&mut self.records[mapped_i], unmapped_is_reverse, true);
1133        self.records[mapped_i].data_mut().remove(&mapq_tag);
1134        self.records[mapped_i].data_mut().remove(&mate_cigar_tag);
1135        *self.records[mapped_i].template_length_mut() = 0;
1136
1137        // Set mate info on unmapped read (mate is mapped)
1138        *self.records[unmapped_i].mate_reference_sequence_id_mut() = mapped_ref_id;
1139        *self.records[unmapped_i].mate_alignment_start_mut() = mapped_pos;
1140        set_mate_flags(&mut self.records[unmapped_i], mapped_is_reverse, false);
1141        // Use Int8 for MQ to match fgbio's tag type encoding
1142        let mapped_mapq_value = mapped_mapq.map_or(255, |m| i32::from(u8::from(m)));
1143        self.records[unmapped_i]
1144            .data_mut()
1145            .insert(mapq_tag, to_smallest_signed_int(mapped_mapq_value));
1146        if !mapped_cigar_str.is_empty() && mapped_cigar_str != "*" {
1147            self.records[unmapped_i]
1148                .data_mut()
1149                .insert(mate_cigar_tag, BufValue::from(mapped_cigar_str));
1150        }
1151        *self.records[unmapped_i].template_length_mut() = 0;
1152
1153        Ok(())
1154    }
1155}
1156
1157/// Computes the insert size (TLEN) for two reads.
1158/// Following htsjdk's SamPairUtil.computeInsertSize exactly.
1159///
1160/// The insert size is computed using 5' positions:
1161/// - For forward strand reads: alignment start (leftmost position)
1162/// - For reverse strand reads: alignment end (rightmost position)
1163///
1164/// Returns 0 if either read is unmapped or if reads are on different references.
1165fn compute_insert_size(rec1: &RecordBuf, rec2: &RecordBuf) -> i32 {
1166    // If either read is unmapped, return 0
1167    if rec1.flags().is_unmapped() || rec2.flags().is_unmapped() {
1168        return 0;
1169    }
1170
1171    // If reads are on different references, return 0
1172    if rec1.reference_sequence_id() != rec2.reference_sequence_id() {
1173        return 0;
1174    }
1175
1176    // Get alignment start positions
1177    let pos1 = rec1.alignment_start().map_or(0, |p| i32::try_from(usize::from(p)).unwrap_or(0));
1178    let pos2 = rec2.alignment_start().map_or(0, |p| i32::try_from(usize::from(p)).unwrap_or(0));
1179
1180    // Calculate alignment ends (1-based, inclusive)
1181    let end1 = pos1 + alignment_length(rec1.cigar()) - 1;
1182    let end2 = pos2 + alignment_length(rec2.cigar()) - 1;
1183
1184    // Use 5' positions: alignment end for reverse strand, alignment start for forward
1185    // This matches htsjdk's computeInsertSize exactly
1186    let first_5prime = if rec1.flags().is_reverse_complemented() { end1 } else { pos1 };
1187    let second_5prime = if rec2.flags().is_reverse_complemented() { end2 } else { pos2 };
1188
1189    // Compute insert size with adjustment
1190    let adjustment = if second_5prime >= first_5prime { 1 } else { -1 };
1191    second_5prime - first_5prime + adjustment
1192}
1193
1194/// Calculates the alignment length from a CIGAR (reference-consuming operations).
1195fn alignment_length(cigar: &noodles::sam::alignment::record_buf::Cigar) -> i32 {
1196    use noodles::sam::alignment::record::cigar::op::Kind;
1197    let mut len = 0i32;
1198    for op in cigar.iter().flatten() {
1199        match op.kind() {
1200            Kind::Match
1201            | Kind::Deletion
1202            | Kind::Skip
1203            | Kind::SequenceMatch
1204            | Kind::SequenceMismatch => {
1205                len += i32::try_from(op.len()).unwrap_or(0);
1206            }
1207            _ => {}
1208        }
1209    }
1210    len
1211}
1212
1213/// Determines the pair orientation for a paired read.
1214///
1215/// This matches htsjdk's `SamPairUtil.getPairOrientation()` algorithm exactly.
1216///
1217/// # Arguments
1218/// * `record` - A paired, mapped read with a mapped mate on the same reference
1219///
1220/// # Returns
1221/// The pair orientation (FR, RF, or Tandem)
1222///
1223/// # Panics
1224/// The caller must ensure the record is paired, mapped, has a mapped mate,
1225/// and is on the same reference as its mate.
1226#[allow(clippy::cast_possible_wrap)]
1227fn get_pair_orientation(record: &RecordBuf) -> PairOrientation {
1228    let is_reverse = record.flags().is_reverse_complemented();
1229    let mate_reverse = record.flags().is_mate_reverse_complemented();
1230
1231    // Same strand = TANDEM
1232    if is_reverse == mate_reverse {
1233        return PairOrientation::Tandem;
1234    }
1235
1236    // Now determine if FR or RF using htsjdk's logic:
1237    // positiveStrandFivePrimePos = readIsOnReverseStrand ? mateStart : alignmentStart
1238    // negativeStrandFivePrimePos = readIsOnReverseStrand ? alignmentEnd : alignmentStart + insertSize
1239    // FR if positiveStrandFivePrimePos < negativeStrandFivePrimePos
1240
1241    let alignment_start = record.alignment_start().map_or(0, usize::from);
1242    let mate_start = record.mate_alignment_start().map_or(0, usize::from);
1243    let insert_size = record.template_length();
1244
1245    let (positive_five_prime, negative_five_prime) = if is_reverse {
1246        // This read is on reverse strand, mate is on positive strand
1247        // positiveStrandFivePrimePos = mateStart
1248        // negativeStrandFivePrimePos = alignmentEnd (this read's end)
1249        let end = record_utils::alignment_end(record).unwrap_or(alignment_start);
1250        (mate_start as i64, end as i64)
1251    } else {
1252        // This read is on positive strand, mate is on reverse strand
1253        // positiveStrandFivePrimePos = alignmentStart (this read's start)
1254        // negativeStrandFivePrimePos = alignmentStart + insertSize
1255        (alignment_start as i64, alignment_start as i64 + i64::from(insert_size))
1256    };
1257
1258    // FR if positive strand 5' < negative strand 5'
1259    if positive_five_prime < negative_five_prime {
1260        PairOrientation::FR
1261    } else {
1262        PairOrientation::RF
1263    }
1264}
1265
1266/// Sets the mate flags (`MATE_REVERSE_COMPLEMENTED` and `MATE_UNMAPPED`) on a record
1267/// based on the mate's flags.
1268///
1269/// This follows htsjdk's `setMateInformationOnSupplementalAlignment` behavior:
1270/// - Sets `MATE_REVERSE_COMPLEMENTED` (0x20) if mate is reverse complemented
1271/// - Sets `MATE_UNMAPPED` (0x8) if mate is unmapped
1272fn set_mate_flags(record: &mut RecordBuf, mate_is_reverse: bool, mate_is_unmapped: bool) {
1273    use noodles::sam::alignment::record::Flags;
1274
1275    let mut flags = record.flags();
1276
1277    // Clear and then conditionally set MATE_REVERSE_COMPLEMENTED
1278    flags.remove(Flags::MATE_REVERSE_COMPLEMENTED);
1279    if mate_is_reverse {
1280        flags.insert(Flags::MATE_REVERSE_COMPLEMENTED);
1281    }
1282
1283    // Clear and then conditionally set MATE_UNMAPPED
1284    flags.remove(Flags::MATE_UNMAPPED);
1285    if mate_is_unmapped {
1286        flags.insert(Flags::MATE_UNMAPPED);
1287    }
1288
1289    *record.flags_mut() = flags;
1290}
1291
1292/// Extracts an i32 value from any integer `BufValue` variant.
1293///
1294/// SAM tags can store integers in various sizes (Int8, Int16, Int32, `UInt8`, `UInt16`, `UInt32`).
1295/// This function extracts the value as an i32 from any of these variants.
1296///
1297/// # Arguments
1298///
1299/// * `value` - The `BufValue` to extract from
1300///
1301/// # Returns
1302///
1303/// `Some(i32)` if the value is any integer type, `None` otherwise.
1304fn extract_int_value(value: &BufValue) -> Option<i32> {
1305    match value {
1306        BufValue::Int8(i) => Some(i32::from(*i)),
1307        BufValue::Int16(i) => Some(i32::from(*i)),
1308        BufValue::Int32(i) => Some(*i),
1309        BufValue::UInt8(i) => Some(i32::from(*i)),
1310        BufValue::UInt16(i) => Some(i32::from(*i)),
1311        BufValue::UInt32(i) => i32::try_from(*i).ok(),
1312        _ => None,
1313    }
1314}
1315
1316/// Converts a CIGAR operation kind to its SAM character representation.
1317///
1318/// Maps CIGAR operation types to their single-character SAM format codes:
1319/// - M (Match/Mismatch)
1320/// - I (Insertion)
1321/// - D (Deletion)
1322/// - N (Skipped region)
1323/// - S (Soft clip)
1324/// - H (Hard clip)
1325/// - P (Padding)
1326/// - = (Sequence match)
1327/// - X (Sequence mismatch)
1328///
1329/// # Arguments
1330///
1331/// * `kind` - The CIGAR operation kind to convert
1332///
1333/// # Returns
1334///
1335/// Single character representing the operation in SAM format
1336///
1337/// # Panics
1338///
1339/// This function does not panic. The match is exhaustive over all CIGAR operation kinds.
1340fn kind_to_char(kind: noodles::sam::alignment::record::cigar::op::Kind) -> char {
1341    use noodles::sam::alignment::record::cigar::op::Kind;
1342    match kind {
1343        Kind::Match => 'M',
1344        Kind::Insertion => 'I',
1345        Kind::Deletion => 'D',
1346        Kind::Skip => 'N',
1347        Kind::SoftClip => 'S',
1348        Kind::HardClip => 'H',
1349        Kind::Pad => 'P',
1350        Kind::SequenceMatch => '=',
1351        Kind::SequenceMismatch => 'X',
1352    }
1353}
1354
1355/// Converts a CIGAR to its string representation
1356///
1357/// # Arguments
1358///
1359/// * `cigar` - The CIGAR to convert
1360///
1361/// # Returns
1362///
1363/// String representation of the CIGAR (e.g., "100M5I3D") or "*" if empty
1364///
1365/// # Errors
1366///
1367/// Returns an error if the CIGAR operations cannot be parsed (malformed CIGAR data)
1368fn cigar_to_string(cigar: &noodles::sam::alignment::record_buf::Cigar) -> Result<String> {
1369    if cigar.is_empty() {
1370        return Ok(String::from("*"));
1371    }
1372
1373    // Pre-allocate: estimate ~4 chars per op (e.g., "10M" = 3 chars, "100M" = 4 chars)
1374    let mut result = String::with_capacity(cigar.as_ref().len() * 4);
1375    for op in cigar.iter() {
1376        let op = op?;
1377        let _ = write!(result, "{}{}", op.len(), kind_to_char(op.kind()));
1378    }
1379    Ok(result)
1380}
1381
1382/// Iterator that groups consecutive records by query name into `Template` objects.
1383///
1384/// This iterator consumes a stream of SAM/BAM records and yields complete `Template`
1385/// objects. It assumes the input is queryname-sorted or queryname-grouped, meaning all
1386/// records with the same query name appear consecutively in the stream.
1387///
1388/// # Requirements
1389///
1390/// The input records MUST be queryname-sorted or grouped. If records with the same
1391/// name are not consecutive, they will be placed in separate templates.
1392///
1393/// # Examples
1394///
1395/// ```rust,ignore
1396/// let record_iter = bam_reader.records();
1397/// let template_iter = TemplateIterator::new(record_iter);
1398///
1399/// for template in template_iter {
1400///     let template = template?;
1401///     // Process template
1402/// }
1403/// ```
1404pub struct TemplateIterator<I>
1405where
1406    I: Iterator<Item = Result<RecordBuf>>,
1407{
1408    record_iter: I,
1409    builder: Builder,
1410}
1411
1412impl<I> TemplateIterator<I>
1413where
1414    I: Iterator<Item = Result<RecordBuf>>,
1415{
1416    /// Creates a new `TemplateIterator`
1417    ///
1418    /// # Arguments
1419    ///
1420    /// * `record_iter` - Iterator over records to group into templates
1421    ///
1422    /// # Returns
1423    ///
1424    /// A new `TemplateIterator` ready to produce Templates
1425    pub fn new(record_iter: I) -> Self {
1426        TemplateIterator { record_iter, builder: Builder::new() }
1427    }
1428}
1429
1430impl<I> Iterator for TemplateIterator<I>
1431where
1432    I: Iterator<Item = Result<RecordBuf>>,
1433{
1434    type Item = Result<Template>;
1435
1436    /// Returns the next Template from the record iterator
1437    ///
1438    /// Reads records until the query name changes, then returns the completed
1439    /// template. On EOF, returns any remaining template, then None.
1440    ///
1441    /// # Returns
1442    ///
1443    /// * `Some(Ok(Template))` - Next template successfully read
1444    /// * `Some(Err(_))` - Error from underlying iterator
1445    /// * `None` - No more templates (EOF reached)
1446    fn next(&mut self) -> Option<Self::Item> {
1447        loop {
1448            match self.record_iter.next() {
1449                None => {
1450                    // EOF - return any remaining template
1451                    if self.builder.is_empty() {
1452                        return None;
1453                    }
1454                    return Some(self.builder.build());
1455                }
1456                Some(Err(e)) => {
1457                    // Error from underlying iterator
1458                    return Some(Err(e));
1459                }
1460                Some(Ok(record)) => {
1461                    let name: Vec<u8> = if let Some(n) = record.name() {
1462                        Vec::from(<_ as AsRef<[u8]>>::as_ref(n))
1463                    } else {
1464                        Vec::new()
1465                    };
1466
1467                    if self.builder.name.iter().any(|n| *n == name) || self.builder.is_empty() {
1468                        if let Err(e) = self.builder.push(record) {
1469                            return Some(Err(e));
1470                        }
1471                    } else {
1472                        let template: std::result::Result<Template, anyhow::Error> =
1473                            self.builder.build();
1474                        if let Err(e) = self.builder.push(record) {
1475                            return Some(Err(e));
1476                        }
1477                        return Some(template);
1478                    }
1479                }
1480            }
1481        }
1482    }
1483}
1484
1485// ============================================================================
1486// MemoryEstimate Implementations
1487// ============================================================================
1488
1489impl MemoryEstimate for Template {
1490    fn estimate_heap_size(&self) -> usize {
1491        // name: Vec<u8>
1492        let name_size = self.name.capacity();
1493
1494        // records: Vec<RecordBuf>
1495        // Each RecordBuf contains:
1496        // - name: Option<BString> (Vec<u8>)
1497        // - sequence: Sequence (Vec<u8>)
1498        // - quality_scores: QualityScores (Vec<u8>)
1499        // - cigar: Cigar (Vec<Op>)
1500        // - data: Data (IndexMap of tags to values)
1501        let records_size: usize = self.records.iter().map(estimate_record_buf_heap_size).sum();
1502        let records_vec_overhead = self.records.capacity() * std::mem::size_of::<RecordBuf>();
1503
1504        let raw_records_size = self.raw_records.as_ref().map_or(0, |rr| {
1505            rr.iter().map(Vec::capacity).sum::<usize>()
1506                + rr.capacity() * std::mem::size_of::<Vec<u8>>()
1507        });
1508
1509        name_size + records_size + records_vec_overhead + raw_records_size
1510    }
1511}
1512
1513impl MemoryEstimate for TemplateBatch {
1514    fn estimate_heap_size(&self) -> usize {
1515        self.iter().map(MemoryEstimate::estimate_heap_size).sum::<usize>()
1516            + self.capacity() * std::mem::size_of::<Template>()
1517    }
1518}
1519
1520/// Estimate heap size of a `RecordBuf`.
1521///
1522/// This function estimates the heap memory used by a noodles `RecordBuf`,
1523/// including its name, sequence, quality scores, CIGAR, and data fields.
1524///
1525/// noodles `RecordBuf` layout:
1526/// - name: `Option<BString>` (`Vec<u8>`) - heap allocated
1527/// - sequence: `Sequence` (`Vec<u8>`, 1 byte per base, unpacked ASCII)
1528/// - `quality_scores`: `QualityScores` (`Vec<u8>`)
1529/// - cigar: `Cigar` (`Vec<Op>`, each `Op` is 4 bytes)
1530/// - data: Data (`IndexMap`<Tag, Value>) - significant overhead from hash table
1531#[must_use]
1532pub fn estimate_record_buf_heap_size(record: &RecordBuf) -> usize {
1533    // Name: Option<BString> which is a Vec<u8>
1534    // NB: noodles API returns &BStr, so we can only measure len(), not capacity().
1535    // Same constraint applies to sequence and quality_scores below.
1536    let name_size = record.name().map_or(0, |n| n.len());
1537
1538    // Sequence: noodles stores bases as unpacked ASCII (1 byte per base in Vec<u8>)
1539    // .len() returns number of bases, which equals the heap allocation in bytes
1540    let seq_len = record.sequence().len();
1541
1542    // Quality scores: Vec<u8>, one byte per base
1543    let qual_len = record.quality_scores().len();
1544
1545    // CIGAR: Vec<Op> where each Op is 4 bytes (u32)
1546    let cigar_ops = record.cigar().as_ref().len();
1547    let cigar_size = cigar_ops * 4;
1548
1549    // Data fields: IndexMap<Tag, Value>
1550    // IndexMap stores entries in a Vec<Bucket<(K, V)>> plus a hash table Vec<usize>.
1551    // - Each entry: Tag (2 bytes) + Value enum (~40 bytes on stack for noodles Value)
1552    //   + padding = ~48 bytes per entry in the entries vec
1553    // - Hash table: capacity * 8 bytes (indices) + capacity * 8 bytes (hashes)
1554    //   With ~87.5% load factor, capacity ~= count * 1.15
1555    // - Values with heap allocation (String, Array): add ~24 bytes (Vec header) + data
1556    //   Typical BAM tags: RX/QX (string, ~20 bytes), MI (int, no heap), CB/CR (string, ~16 bytes)
1557    //   Estimate ~32 bytes average heap per string-type field, ~50% of fields are strings
1558    let data_fields = record.data().iter().count();
1559    let entry_capacity = (data_fields * 115) / 100 + 1; // ~1.15x for load factor
1560    let entries_size = data_fields * 48; // entry vec storage
1561    let hash_table_size = entry_capacity * 16; // hash + index arrays
1562    let value_heap_size = data_fields * 16; // average heap per field (50% strings × 32 bytes)
1563    let data_size = entries_size + hash_table_size + value_heap_size;
1564
1565    // Note: RecordBuf inline struct overhead (flags, position, etc.) is accounted for
1566    // by the caller via `capacity * size_of::<RecordBuf>()`, so we only count heap allocations here.
1567    name_size + seq_len + qual_len + cigar_size + data_size
1568}
1569
1570#[cfg(test)]
1571#[allow(clippy::similar_names)]
1572mod tests {
1573    use super::*;
1574    use crate::sam::builder::RecordBuilder;
1575    use noodles::sam::alignment::record::Flags;
1576    use noodles::sam::alignment::record::cigar::Op;
1577    use noodles::sam::alignment::record::cigar::op::Kind;
1578    use noodles::sam::alignment::record_buf::Cigar as CigarBuf;
1579
1580    fn create_test_record(name: &[u8], flags: u16) -> RecordBuf {
1581        RecordBuilder::new()
1582            .name(std::str::from_utf8(name).unwrap())
1583            .flags(Flags::from(flags))
1584            .build()
1585    }
1586
1587    // SAM flag constants
1588    const FLAG_PAIRED: u16 = 0x1;
1589    const FLAG_READ1: u16 = 0x40;
1590    const FLAG_READ2: u16 = 0x80;
1591    const FLAG_SECONDARY: u16 = 0x100;
1592    const FLAG_SUPPLEMENTARY: u16 = 0x800;
1593
1594    #[test]
1595    fn test_template_new() {
1596        let template = Template::new(b"read1".to_vec());
1597        assert_eq!(template.name, b"read1");
1598        assert_eq!(template.read_count(), 0);
1599    }
1600
1601    #[test]
1602    fn test_builder_empty() {
1603        let builder = Builder::new();
1604        assert!(builder.is_empty());
1605        assert_eq!(builder.len(), 0);
1606    }
1607
1608    #[test]
1609    fn test_builder_push_r1_only() -> Result<()> {
1610        let mut builder = Builder::new();
1611        let r1 = create_test_record(b"read1", 0);
1612        builder.push(r1)?;
1613        assert_eq!(builder.len(), 1);
1614        assert!(!builder.is_empty());
1615        Ok(())
1616    }
1617
1618    #[test]
1619    fn test_builder_push_paired_end() -> Result<()> {
1620        let mut builder = Builder::new();
1621        let r1 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1);
1622        let r2 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2);
1623        builder.push(r1)?;
1624        builder.push(r2)?;
1625        assert_eq!(builder.len(), 2);
1626        Ok(())
1627    }
1628
1629    #[test]
1630    fn test_builder_push_supplementary() -> Result<()> {
1631        let mut builder = Builder::new();
1632        let supp = create_test_record(b"read1", FLAG_SUPPLEMENTARY);
1633        builder.push(supp)?;
1634        assert_eq!(builder.len(), 1);
1635        Ok(())
1636    }
1637
1638    #[test]
1639    fn test_builder_push_secondary() -> Result<()> {
1640        let mut builder = Builder::new();
1641        let sec = create_test_record(b"read1", FLAG_SECONDARY);
1642        builder.push(sec)?;
1643        assert_eq!(builder.len(), 1);
1644        Ok(())
1645    }
1646
1647    #[test]
1648    fn test_builder_error_on_name_mismatch() {
1649        let mut builder = Builder::new();
1650        let r1 = create_test_record(b"read1", 0);
1651        let r2 = create_test_record(b"read2", 0);
1652        builder.push(r1).unwrap();
1653        let result = builder.push(r2);
1654        assert!(result.is_err());
1655        assert!(result.unwrap_err().to_string().contains("mismatch"));
1656    }
1657
1658    #[test]
1659    fn test_builder_error_on_multiple_r1() {
1660        let mut builder = Builder::new();
1661        let r1a = create_test_record(b"read1", 0);
1662        let r1b = create_test_record(b"read1", 0);
1663        builder.push(r1a).unwrap();
1664        let result = builder.push(r1b);
1665        assert!(result.is_err());
1666        assert!(result.unwrap_err().to_string().contains("Multiple non-secondary"));
1667    }
1668
1669    #[test]
1670    fn test_builder_error_on_multiple_r2() {
1671        let mut builder = Builder::new();
1672        let r2a = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2);
1673        let r2b = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2);
1674        builder.push(r2a).unwrap();
1675        let result = builder.push(r2b);
1676        assert!(result.is_err());
1677        assert!(result.unwrap_err().to_string().contains("Multiple non-secondary"));
1678    }
1679
1680    #[test]
1681    fn test_builder_build_empty_error() {
1682        let mut builder = Builder::new();
1683        let result = builder.build();
1684        assert!(result.is_err());
1685        assert!(result.unwrap_err().to_string().contains("No records"));
1686    }
1687
1688    #[test]
1689    fn test_builder_build_success() -> Result<()> {
1690        let mut builder = Builder::new();
1691        let r1 = create_test_record(b"read1", 0);
1692        builder.push(r1)?;
1693        let template = builder.build()?;
1694        assert_eq!(template.name, b"read1");
1695        assert_eq!(template.read_count(), 1);
1696        assert!(template.r1().is_some());
1697        Ok(())
1698    }
1699
1700    #[test]
1701    fn test_template_from_records() -> Result<()> {
1702        let records = vec![
1703            create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1),
1704            create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2),
1705        ];
1706        let template = Template::from_records(records)?;
1707        assert_eq!(template.read_count(), 2);
1708        assert!(template.r1().is_some());
1709        assert!(template.r2().is_some());
1710        Ok(())
1711    }
1712
1713    #[test]
1714    fn test_template_primary_reads() -> Result<()> {
1715        let mut builder = Builder::new();
1716        builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1))?;
1717        builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2))?;
1718        let template = builder.build()?;
1719        let primaries: Vec<_> = template.primary_reads().collect();
1720        assert_eq!(primaries.len(), 2);
1721        Ok(())
1722    }
1723
1724    #[test]
1725    fn test_template_into_primary_reads() -> Result<()> {
1726        let records = vec![
1727            create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1),
1728            create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2),
1729        ];
1730        let template = Template::from_records(records)?;
1731
1732        // Verify we have both reads before consuming
1733        assert!(template.r1().is_some());
1734        assert!(template.r2().is_some());
1735
1736        // Consume the template and get owned reads
1737        let (r1, r2) = template.into_primary_reads();
1738
1739        // Both should be Some
1740        assert!(r1.is_some());
1741        assert!(r2.is_some());
1742
1743        // Verify the records have the expected flags
1744        let r1 = r1.unwrap();
1745        let r2 = r2.unwrap();
1746        assert!(r1.flags().is_first_segment());
1747        assert!(r2.flags().is_last_segment());
1748
1749        Ok(())
1750    }
1751
1752    #[test]
1753    fn test_template_into_primary_reads_r1_only() -> Result<()> {
1754        let records = vec![create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1)];
1755        let template = Template::from_records(records)?;
1756
1757        let (r1, r2) = template.into_primary_reads();
1758
1759        assert!(r1.is_some());
1760        assert!(r2.is_none());
1761
1762        Ok(())
1763    }
1764
1765    #[test]
1766    fn test_template_all_reads() -> Result<()> {
1767        let mut builder = Builder::new();
1768        builder.push(create_test_record(b"read1", 0))?;
1769        builder.push(create_test_record(b"read1", FLAG_SUPPLEMENTARY))?;
1770        builder.push(create_test_record(b"read1", FLAG_SECONDARY))?;
1771        let template = builder.build()?;
1772        let all: Vec<_> = template.all_reads().collect();
1773        assert_eq!(all.len(), 3);
1774        Ok(())
1775    }
1776
1777    #[test]
1778    fn test_template_all_r1s() -> Result<()> {
1779        let mut builder = Builder::new();
1780        builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1))?;
1781        builder
1782            .push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY))?;
1783        let template = builder.build()?;
1784        let all_r1s: Vec<_> = template.all_r1s().collect();
1785        assert_eq!(all_r1s.len(), 2);
1786        Ok(())
1787    }
1788
1789    #[test]
1790    fn test_template_all_r2s() -> Result<()> {
1791        let mut builder = Builder::new();
1792        builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2))?;
1793        builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY))?;
1794        let template = builder.build()?;
1795        let all_r2s: Vec<_> = template.all_r2s().collect();
1796        assert_eq!(all_r2s.len(), 2);
1797        Ok(())
1798    }
1799
1800    #[test]
1801    fn test_template_supplementals_and_secondaries() -> Result<()> {
1802        let mut builder = Builder::new();
1803        builder.push(create_test_record(b"read1", 0))?;
1804        builder.push(create_test_record(b"read1", FLAG_SUPPLEMENTARY))?;
1805        builder.push(create_test_record(b"read1", FLAG_SECONDARY))?;
1806        let template = builder.build()?;
1807        let non_primary: Vec<_> = template.all_supplementary_and_secondary().collect();
1808        assert_eq!(non_primary.len(), 2);
1809        Ok(())
1810    }
1811
1812    #[test]
1813    fn test_cigar_to_string() {
1814        use noodles::sam::alignment::record_buf::Cigar;
1815        let cigar = Cigar::default();
1816        assert_eq!(cigar_to_string(&cigar).unwrap(), "*");
1817    }
1818
1819    #[test]
1820    fn test_cigar_to_string_with_ops() {
1821        let ops =
1822            vec![Op::new(Kind::Match, 10), Op::new(Kind::Deletion, 2), Op::new(Kind::Match, 5)];
1823        let cigar = CigarBuf::from(ops);
1824        let result = cigar_to_string(&cigar).unwrap();
1825        assert_eq!(result, "10M2D5M");
1826    }
1827
1828    #[test]
1829    fn test_template_builder_reuse() -> Result<()> {
1830        let mut builder = Builder::new();
1831        builder.push(create_test_record(b"read1", 0))?;
1832        let template1 = builder.build()?;
1833        assert_eq!(template1.name, b"read1");
1834        builder.push(create_test_record(b"read2", 0))?;
1835        let template2 = builder.build()?;
1836        assert_eq!(template2.name, b"read2");
1837        Ok(())
1838    }
1839
1840    #[test]
1841    fn test_template_r1_supplementals() -> Result<()> {
1842        let mut builder = Builder::new();
1843        builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1))?;
1844        builder
1845            .push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY))?;
1846        builder
1847            .push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY))?;
1848        let template = builder.build()?;
1849        let supps = template.r1_supplementals();
1850        assert_eq!(supps.len(), 2);
1851        Ok(())
1852    }
1853
1854    #[test]
1855    fn test_template_r2_supplementals() -> Result<()> {
1856        let mut builder = Builder::new();
1857        builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2))?;
1858        builder
1859            .push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY))?;
1860        let template = builder.build()?;
1861        let supps = template.r2_supplementals();
1862        assert_eq!(supps.len(), 1);
1863        Ok(())
1864    }
1865
1866    #[test]
1867    fn test_template_r1_secondaries() -> Result<()> {
1868        let mut builder = Builder::new();
1869        builder.push(create_test_record(b"read1", 0))?;
1870        builder.push(create_test_record(b"read1", FLAG_SECONDARY))?;
1871        let template = builder.build()?;
1872        let secs = template.r1_secondaries();
1873        assert_eq!(secs.len(), 1);
1874        Ok(())
1875    }
1876
1877    #[test]
1878    fn test_template_r2_secondaries() -> Result<()> {
1879        let mut builder = Builder::new();
1880        builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2))?;
1881        builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY))?;
1882        builder.push(create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY))?;
1883        let template = builder.build()?;
1884        let secs = template.r2_secondaries();
1885        assert_eq!(secs.len(), 2);
1886        Ok(())
1887    }
1888
1889    // Tests for extract_int_value helper function
1890    #[test]
1891    fn test_extract_int_value_int8() {
1892        let value = BufValue::Int8(42);
1893        assert_eq!(extract_int_value(&value), Some(42));
1894    }
1895
1896    #[test]
1897    fn test_extract_int_value_int16() {
1898        let value = BufValue::Int16(1234);
1899        assert_eq!(extract_int_value(&value), Some(1234));
1900    }
1901
1902    #[test]
1903    fn test_extract_int_value_int32() {
1904        let value = BufValue::Int32(123_456);
1905        assert_eq!(extract_int_value(&value), Some(123_456));
1906    }
1907
1908    #[test]
1909    fn test_extract_int_value_uint8() {
1910        let value = BufValue::UInt8(255);
1911        assert_eq!(extract_int_value(&value), Some(255));
1912    }
1913
1914    #[test]
1915    fn test_extract_int_value_uint16() {
1916        let value = BufValue::UInt16(65535);
1917        assert_eq!(extract_int_value(&value), Some(65535));
1918    }
1919
1920    #[test]
1921    fn test_extract_int_value_uint32() {
1922        let value = BufValue::UInt32(100_000);
1923        assert_eq!(extract_int_value(&value), Some(100_000));
1924    }
1925
1926    #[test]
1927    fn test_extract_int_value_uint32_overflow() {
1928        // Value too large to fit in i32
1929        let value = BufValue::UInt32(u32::MAX);
1930        assert_eq!(extract_int_value(&value), None);
1931    }
1932
1933    #[test]
1934    fn test_extract_int_value_string_returns_none() {
1935        let value = BufValue::String("test".into());
1936        assert_eq!(extract_int_value(&value), None);
1937    }
1938
1939    #[test]
1940    fn test_extract_int_value_negative() {
1941        let value = BufValue::Int8(-42);
1942        assert_eq!(extract_int_value(&value), Some(-42));
1943
1944        let value = BufValue::Int16(-1234);
1945        assert_eq!(extract_int_value(&value), Some(-1234));
1946
1947        let value = BufValue::Int32(-123_456);
1948        assert_eq!(extract_int_value(&value), Some(-123_456));
1949    }
1950
1951    /// Helper to create a mapped record with position and CIGAR for `fix_mate_info` tests
1952    fn create_mapped_record(name: &[u8], flags: u16, pos: usize, mapq: u8) -> RecordBuf {
1953        create_mapped_record_with_tlen(name, flags, pos, mapq, 0)
1954    }
1955
1956    /// Helper to create a mapped record with position, CIGAR, and TLEN for `fix_mate_info` tests
1957    fn create_mapped_record_with_tlen(
1958        name: &[u8],
1959        flags: u16,
1960        pos: usize,
1961        mapq: u8,
1962        tlen: i32,
1963    ) -> RecordBuf {
1964        let is_read1 = (flags & FLAG_READ1) != 0;
1965        let is_paired = (flags & FLAG_PAIRED) != 0;
1966
1967        let mut builder = RecordBuilder::new()
1968            .name(std::str::from_utf8(name).unwrap())
1969            .sequence(&"A".repeat(100))
1970            .qualities(&[30u8; 100])
1971            .cigar("100M")
1972            .reference_sequence_id(0)
1973            .alignment_start(pos)
1974            .mapping_quality(mapq)
1975            .template_length(tlen);
1976
1977        if is_paired {
1978            builder = builder.first_segment(is_read1);
1979        }
1980
1981        // Handle secondary/supplementary flags - directly set flags for non-standard combinations
1982        let mut record = builder.build();
1983        // Apply any additional flags not covered by builder methods
1984        *record.flags_mut() = Flags::from(flags);
1985
1986        record
1987    }
1988
1989    /// Tests that `fix_mate_info` correctly sets ms tag when AS is stored as `Int8`.
1990    /// This was a bug where only `Int32` AS values were recognized.
1991    #[test]
1992    fn test_fix_mate_info_sets_ms_tag_with_int8_as() -> Result<()> {
1993        let as_tag = Tag::new(b'A', b'S');
1994        let ms_tag = Tag::new(b'm', b's');
1995
1996        let mut r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
1997        r1.data_mut().insert(as_tag, BufValue::Int8(55)); // AS as Int8 (small value)
1998
1999        let mut r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
2000        r2.data_mut().insert(as_tag, BufValue::Int8(44)); // AS as Int8 (small value)
2001
2002        let mut builder = Builder::new();
2003        builder.push(r1)?;
2004        builder.push(r2)?;
2005        let mut template = builder.build()?;
2006
2007        template.fix_mate_info()?;
2008
2009        // R1 should have ms tag with R2's AS value (44)
2010        let r1_ms = template.records[0].data().get(&ms_tag);
2011        assert!(r1_ms.is_some(), "R1 should have ms tag");
2012        assert_eq!(extract_int_value(r1_ms.unwrap()), Some(44));
2013
2014        // R2 should have ms tag with R1's AS value (55)
2015        let r2_ms = template.records[1].data().get(&ms_tag);
2016        assert!(r2_ms.is_some(), "R2 should have ms tag");
2017        assert_eq!(extract_int_value(r2_ms.unwrap()), Some(55));
2018
2019        Ok(())
2020    }
2021
2022    /// Tests that `fix_mate_info` correctly sets ms tag when AS is stored as `UInt8`
2023    #[test]
2024    fn test_fix_mate_info_sets_ms_tag_with_uint8_as() -> Result<()> {
2025        let as_tag = Tag::new(b'A', b'S');
2026        let ms_tag = Tag::new(b'm', b's');
2027
2028        let mut r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
2029        r1.data_mut().insert(as_tag, BufValue::UInt8(77)); // AS as UInt8
2030
2031        let mut r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
2032        r2.data_mut().insert(as_tag, BufValue::UInt8(88)); // AS as UInt8
2033
2034        let mut builder = Builder::new();
2035        builder.push(r1)?;
2036        builder.push(r2)?;
2037        let mut template = builder.build()?;
2038
2039        template.fix_mate_info()?;
2040
2041        // R1 should have ms tag with R2's AS value (88)
2042        let r1_ms = template.records[0].data().get(&ms_tag);
2043        assert!(r1_ms.is_some(), "R1 should have ms tag");
2044        assert_eq!(extract_int_value(r1_ms.unwrap()), Some(88));
2045
2046        // R2 should have ms tag with R1's AS value (77)
2047        let r2_ms = template.records[1].data().get(&ms_tag);
2048        assert!(r2_ms.is_some(), "R2 should have ms tag");
2049        assert_eq!(extract_int_value(r2_ms.unwrap()), Some(77));
2050
2051        Ok(())
2052    }
2053
2054    /// Tests that `fix_mate_info` correctly sets ms tag when AS is stored as `Int16`
2055    #[test]
2056    fn test_fix_mate_info_sets_ms_tag_with_int16_as() -> Result<()> {
2057        let as_tag = Tag::new(b'A', b'S');
2058        let ms_tag = Tag::new(b'm', b's');
2059
2060        let mut r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
2061        r1.data_mut().insert(as_tag, BufValue::Int16(1000)); // AS as Int16
2062
2063        let mut r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
2064        r2.data_mut().insert(as_tag, BufValue::Int16(2000)); // AS as Int16
2065
2066        let mut builder = Builder::new();
2067        builder.push(r1)?;
2068        builder.push(r2)?;
2069        let mut template = builder.build()?;
2070
2071        template.fix_mate_info()?;
2072
2073        // R1 should have ms tag with R2's AS value (2000)
2074        let r1_ms = template.records[0].data().get(&ms_tag);
2075        assert!(r1_ms.is_some(), "R1 should have ms tag");
2076        assert_eq!(extract_int_value(r1_ms.unwrap()), Some(2000));
2077
2078        // R2 should have ms tag with R1's AS value (1000)
2079        let r2_ms = template.records[1].data().get(&ms_tag);
2080        assert!(r2_ms.is_some(), "R2 should have ms tag");
2081        assert_eq!(extract_int_value(r2_ms.unwrap()), Some(1000));
2082
2083        Ok(())
2084    }
2085
2086    /// Tests that `fix_mate_info` correctly sets ms tag when AS is stored as `Int32`
2087    #[test]
2088    fn test_fix_mate_info_sets_ms_tag_with_int32_as() -> Result<()> {
2089        let as_tag = Tag::new(b'A', b'S');
2090        let ms_tag = Tag::new(b'm', b's');
2091
2092        let mut r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
2093        r1.data_mut().insert(as_tag, BufValue::Int32(100_000)); // AS as Int32
2094
2095        let mut r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
2096        r2.data_mut().insert(as_tag, BufValue::Int32(200_000)); // AS as Int32
2097
2098        let mut builder = Builder::new();
2099        builder.push(r1)?;
2100        builder.push(r2)?;
2101        let mut template = builder.build()?;
2102
2103        template.fix_mate_info()?;
2104
2105        // R1 should have ms tag with R2's AS value
2106        let r1_ms = template.records[0].data().get(&ms_tag);
2107        assert!(r1_ms.is_some(), "R1 should have ms tag");
2108        assert_eq!(extract_int_value(r1_ms.unwrap()), Some(200_000));
2109
2110        // R2 should have ms tag with R1's AS value
2111        let r2_ms = template.records[1].data().get(&ms_tag);
2112        assert!(r2_ms.is_some(), "R2 should have ms tag");
2113        assert_eq!(extract_int_value(r2_ms.unwrap()), Some(100_000));
2114
2115        Ok(())
2116    }
2117
2118    /// Tests that `fix_mate_info` does not set ms tag when AS is missing
2119    #[test]
2120    fn test_fix_mate_info_no_ms_tag_without_as() -> Result<()> {
2121        let ms_tag = Tag::new(b'm', b's');
2122
2123        let r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
2124        let r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
2125
2126        let mut builder = Builder::new();
2127        builder.push(r1)?;
2128        builder.push(r2)?;
2129        let mut template = builder.build()?;
2130
2131        template.fix_mate_info()?;
2132
2133        // Neither record should have ms tag since AS is missing
2134        assert!(template.records[0].data().get(&ms_tag).is_none(), "R1 should not have ms tag");
2135        assert!(template.records[1].data().get(&ms_tag).is_none(), "R2 should not have ms tag");
2136
2137        Ok(())
2138    }
2139
2140    /// Tests that `fix_mate_info` sets ms tag on supplementary alignments
2141    #[test]
2142    fn test_fix_mate_info_sets_ms_tag_on_supplementals() -> Result<()> {
2143        let as_tag = Tag::new(b'A', b'S');
2144        let ms_tag = Tag::new(b'm', b's');
2145
2146        let mut r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
2147        r1.data_mut().insert(as_tag, BufValue::Int8(55));
2148
2149        let mut r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40);
2150        r2.data_mut().insert(as_tag, BufValue::Int8(44));
2151
2152        let mut r1_supp =
2153            create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY, 300, 20);
2154        r1_supp.data_mut().insert(as_tag, BufValue::Int8(33));
2155
2156        let mut builder = Builder::new();
2157        builder.push(r1)?;
2158        builder.push(r2)?;
2159        builder.push(r1_supp)?;
2160        let mut template = builder.build()?;
2161
2162        template.fix_mate_info()?;
2163
2164        // R1 supplementary should have ms tag with R2's AS value (44)
2165        let r1_supp_ms = template.records[2].data().get(&ms_tag);
2166        assert!(r1_supp_ms.is_some(), "R1 supplementary should have ms tag");
2167        assert_eq!(extract_int_value(r1_supp_ms.unwrap()), Some(44));
2168
2169        Ok(())
2170    }
2171
2172    /// Tests that `fix_mate_info` sets TLEN on R1 supplementary alignments.
2173    /// TLEN should be set to negative of mate primary's (R2) TLEN.
2174    #[test]
2175    fn test_fix_mate_info_sets_tlen_on_r1_supplementals() -> Result<()> {
2176        // R1 primary with TLEN=200
2177        let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 200);
2178        // R2 primary with TLEN=-200
2179        let r2 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40, -200);
2180        // R1 supplementary with original TLEN=0
2181        let r1_supp = create_mapped_record_with_tlen(
2182            b"read1",
2183            FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
2184            300,
2185            20,
2186            0,
2187        );
2188
2189        let mut builder = Builder::new();
2190        builder.push(r1)?;
2191        builder.push(r2)?;
2192        builder.push(r1_supp)?;
2193        let mut template = builder.build()?;
2194
2195        template.fix_mate_info()?;
2196
2197        // R1 supplementary TLEN should be -(-101) = 101 (negative of R2's recalculated TLEN)
2198        // R1(pos=100,forward) and R2(pos=200,forward) → 5' positions are 100 and 200 → insert_size = 101
2199        assert_eq!(
2200            template.records[2].template_length(),
2201            101,
2202            "R1 supplementary TLEN should be negative of R2's TLEN"
2203        );
2204
2205        Ok(())
2206    }
2207
2208    /// Tests that `fix_mate_info` sets TLEN on R2 supplementary alignments.
2209    /// TLEN should be set to negative of mate primary's (R1) TLEN.
2210    #[test]
2211    fn test_fix_mate_info_sets_tlen_on_r2_supplementals() -> Result<()> {
2212        // R1 primary with TLEN=300
2213        let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 300);
2214        // R2 primary with TLEN=-300
2215        let r2 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40, -300);
2216        // R2 supplementary with original TLEN=0
2217        let r2_supp = create_mapped_record_with_tlen(
2218            b"read1",
2219            FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY,
2220            400,
2221            25,
2222            0,
2223        );
2224
2225        let mut builder = Builder::new();
2226        builder.push(r1)?;
2227        builder.push(r2)?;
2228        builder.push(r2_supp)?;
2229        let mut template = builder.build()?;
2230
2231        template.fix_mate_info()?;
2232
2233        // R2 supplementary TLEN should be -(101) = -101 (negative of R1's recalculated TLEN)
2234        // R1(pos=100,forward) and R2(pos=200,forward) → 5' positions are 100 and 200 → insert_size = 101
2235        assert_eq!(
2236            template.records[2].template_length(),
2237            -101,
2238            "R2 supplementary TLEN should be negative of R1's TLEN"
2239        );
2240
2241        Ok(())
2242    }
2243
2244    /// Tests that `fix_mate_info` handles multiple supplementary alignments
2245    #[test]
2246    fn test_fix_mate_info_sets_tlen_on_multiple_supplementals() -> Result<()> {
2247        // R1 primary with TLEN=500
2248        let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 500);
2249        // R2 primary with TLEN=-500
2250        let r2 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40, -500);
2251        // Two R1 supplementaries
2252        let r1_supp1 = create_mapped_record_with_tlen(
2253            b"read1",
2254            FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
2255            300,
2256            20,
2257            0,
2258        );
2259        let r1_supp2 = create_mapped_record_with_tlen(
2260            b"read1",
2261            FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
2262            400,
2263            15,
2264            0,
2265        );
2266        // Two R2 supplementaries
2267        let r2_supp1 = create_mapped_record_with_tlen(
2268            b"read1",
2269            FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY,
2270            500,
2271            25,
2272            0,
2273        );
2274        let r2_supp2 = create_mapped_record_with_tlen(
2275            b"read1",
2276            FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY,
2277            600,
2278            10,
2279            0,
2280        );
2281
2282        let mut builder = Builder::new();
2283        builder.push(r1)?;
2284        builder.push(r2)?;
2285        builder.push(r1_supp1)?;
2286        builder.push(r1_supp2)?;
2287        builder.push(r2_supp1)?;
2288        builder.push(r2_supp2)?;
2289        let mut template = builder.build()?;
2290
2291        template.fix_mate_info()?;
2292
2293        // Order: R1[0], R2[1], R1_supp1[2], R1_supp2[3], R2_supp1[4], R2_supp2[5]
2294        // R1(pos=100,forward) and R2(pos=200,forward) → 5' positions are 100 and 200 → insert_size = 101
2295
2296        // R1 supplementaries should have TLEN = -(-101) = 101
2297        assert_eq!(template.records[2].template_length(), 101, "R1 supp1 TLEN");
2298        assert_eq!(template.records[3].template_length(), 101, "R1 supp2 TLEN");
2299
2300        // R2 supplementaries should have TLEN = -(101) = -101
2301        assert_eq!(template.records[4].template_length(), -101, "R2 supp1 TLEN");
2302        assert_eq!(template.records[5].template_length(), -101, "R2 supp2 TLEN");
2303
2304        Ok(())
2305    }
2306
2307    /// Tests that `fix_mate_info` recalculates TLEN for supplementaries based on primary positions
2308    #[test]
2309    fn test_fix_mate_info_tlen_recalculated() -> Result<()> {
2310        // Primary reads with TLEN=0 (initial value, will be recalculated)
2311        // Note: even if original TLEN is 0, fix_mate_info recalculates based on positions
2312        let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 0);
2313        let r2 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 40, 0);
2314        let r1_supp = create_mapped_record_with_tlen(
2315            b"read1",
2316            FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
2317            300,
2318            20,
2319            999, // Non-zero original TLEN (will be recalculated)
2320        );
2321
2322        let mut builder = Builder::new();
2323        builder.push(r1)?;
2324        builder.push(r2)?;
2325        builder.push(r1_supp)?;
2326        let mut template = builder.build()?;
2327
2328        template.fix_mate_info()?;
2329
2330        // R1(pos=100,forward) and R2(pos=200,forward) → 5' positions are 100 and 200 → insert_size = 101
2331        // R1 supplementary TLEN = -R2.TLEN = -(-101) = 101
2332        assert_eq!(
2333            template.records[2].template_length(),
2334            101,
2335            "R1 supplementary TLEN should be recalculated from positions"
2336        );
2337
2338        Ok(())
2339    }
2340
2341    /// Tests that `fix_mate_info` doesn't affect supplementaries when mate primary is missing
2342    #[test]
2343    fn test_fix_mate_info_no_mate_primary() -> Result<()> {
2344        // Only R1 primary (no R2 primary)
2345        let r1 = create_mapped_record_with_tlen(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30, 200);
2346        // R1 supplementary
2347        let r1_supp = create_mapped_record_with_tlen(
2348            b"read1",
2349            FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
2350            300,
2351            20,
2352            777, // Original TLEN
2353        );
2354
2355        let mut builder = Builder::new();
2356        builder.push(r1)?;
2357        builder.push(r1_supp)?;
2358        let mut template = builder.build()?;
2359
2360        template.fix_mate_info()?;
2361
2362        // R1 supplementary TLEN should remain unchanged since there's no R2 primary
2363        assert_eq!(
2364            template.records[1].template_length(),
2365            777,
2366            "R1 supplementary TLEN should be unchanged without R2 primary"
2367        );
2368
2369        Ok(())
2370    }
2371
2372    /// Tests record ordering in Template matches fgbio's ordering
2373    /// Order should be: R1, R2, `r1_supps`, `r2_supps`, `r1_secondaries`, `r2_secondaries`
2374    #[test]
2375    fn test_template_record_ordering() -> Result<()> {
2376        let r1 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1);
2377        let r2 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2);
2378        let r1_supp1 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY);
2379        let r1_supp2 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY);
2380        let r2_supp = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY);
2381        let r1_sec = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SECONDARY);
2382        let r2_sec1 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY);
2383        let r2_sec2 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SECONDARY);
2384
2385        // Push in scrambled order
2386        let mut builder = Builder::new();
2387        builder.push(r2_sec1)?;
2388        builder.push(r1_supp2)?;
2389        builder.push(r1)?;
2390        builder.push(r2_supp)?;
2391        builder.push(r1_sec)?;
2392        builder.push(r2)?;
2393        builder.push(r2_sec2)?;
2394        builder.push(r1_supp1)?;
2395        let template = builder.build()?;
2396
2397        // Verify indices
2398        assert_eq!(template.r1, Some((0, 1)), "R1 should be at index 0");
2399        assert_eq!(template.r2, Some((1, 2)), "R2 should be at index 1");
2400        assert_eq!(template.r1_supplementals, Some((2, 4)), "R1 supps should be at indices 2-3");
2401        assert_eq!(template.r2_supplementals, Some((4, 5)), "R2 supps should be at index 4");
2402        assert_eq!(template.r1_secondaries, Some((5, 6)), "R1 secs should be at index 5");
2403        assert_eq!(template.r2_secondaries, Some((6, 8)), "R2 secs should be at indices 6-7");
2404
2405        // Verify all_reads() returns in correct order
2406        let all_reads: Vec<_> = template.all_reads().collect();
2407        assert_eq!(all_reads.len(), 8);
2408
2409        // Verify flags match expected ordering
2410        assert!(
2411            all_reads[0].flags().is_first_segment() && !all_reads[0].flags().is_supplementary()
2412        );
2413        assert!(all_reads[1].flags().is_last_segment() && !all_reads[1].flags().is_supplementary());
2414        assert!(all_reads[2].flags().is_first_segment() && all_reads[2].flags().is_supplementary());
2415        assert!(all_reads[3].flags().is_first_segment() && all_reads[3].flags().is_supplementary());
2416        assert!(all_reads[4].flags().is_last_segment() && all_reads[4].flags().is_supplementary());
2417        assert!(all_reads[5].flags().is_first_segment() && all_reads[5].flags().is_secondary());
2418        assert!(all_reads[6].flags().is_last_segment() && all_reads[6].flags().is_secondary());
2419        assert!(all_reads[7].flags().is_last_segment() && all_reads[7].flags().is_secondary());
2420
2421        Ok(())
2422    }
2423
2424    /// Tests that records within supplementary/secondary groups are in reverse input order.
2425    /// This matches fgbio's behavior which uses Scala List prepend (r :: list).
2426    #[test]
2427    fn test_record_ordering_within_groups_is_reversed() -> Result<()> {
2428        // Create records with distinct positions so we can verify ordering
2429        let r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
2430        let r2 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2, 200, 30);
2431
2432        // R1 supplementaries with positions 1000, 2000, 3000 (input order)
2433        let r1_supp1 =
2434            create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY, 1000, 20);
2435        let r1_supp2 =
2436            create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY, 2000, 20);
2437        let r1_supp3 =
2438            create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY, 3000, 20);
2439
2440        // R2 supplementaries with positions 4000, 5000 (input order)
2441        let r2_supp1 =
2442            create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY, 4000, 20);
2443        let r2_supp2 =
2444            create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY, 5000, 20);
2445
2446        // R1 secondaries with positions 6000, 7000 (input order)
2447        let r1_sec1 =
2448            create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SECONDARY, 6000, 10);
2449        let r1_sec2 =
2450            create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_SECONDARY, 7000, 10);
2451
2452        // Push in input order
2453        let mut builder = Builder::new();
2454        builder.push(r1)?;
2455        builder.push(r2)?;
2456        builder.push(r1_supp1)?;
2457        builder.push(r1_supp2)?;
2458        builder.push(r1_supp3)?;
2459        builder.push(r2_supp1)?;
2460        builder.push(r2_supp2)?;
2461        builder.push(r1_sec1)?;
2462        builder.push(r1_sec2)?;
2463
2464        let template = builder.build()?;
2465
2466        // Helper to extract position as usize
2467        let get_pos = |rec: &RecordBuf| -> usize { rec.alignment_start().map_or(0, |p| p.get()) };
2468
2469        // Verify R1 and R2 primaries are first
2470        assert_eq!(get_pos(&template.records[0]), 100, "R1 primary");
2471        assert_eq!(get_pos(&template.records[1]), 200, "R2 primary");
2472
2473        // Verify R1 supplementaries are in REVERSE input order: 3000, 2000, 1000
2474        assert_eq!(get_pos(&template.records[2]), 3000, "R1 supp should be in reverse order");
2475        assert_eq!(get_pos(&template.records[3]), 2000, "R1 supp should be in reverse order");
2476        assert_eq!(get_pos(&template.records[4]), 1000, "R1 supp should be in reverse order");
2477
2478        // Verify R2 supplementaries are in REVERSE input order: 5000, 4000
2479        assert_eq!(get_pos(&template.records[5]), 5000, "R2 supp should be in reverse order");
2480        assert_eq!(get_pos(&template.records[6]), 4000, "R2 supp should be in reverse order");
2481
2482        // Verify R1 secondaries are in REVERSE input order: 7000, 6000
2483        assert_eq!(get_pos(&template.records[7]), 7000, "R1 sec should be in reverse order");
2484        assert_eq!(get_pos(&template.records[8]), 6000, "R1 sec should be in reverse order");
2485
2486        Ok(())
2487    }
2488
2489    /// Helper to create a mapped record with specific flags including reverse strand
2490    fn create_mapped_record_with_flags(
2491        name: &[u8],
2492        flags: u16,
2493        pos: usize,
2494        mapq: u8,
2495        tlen: i32,
2496        mate_ref_id: Option<usize>,
2497        mate_pos: Option<usize>,
2498    ) -> RecordBuf {
2499        let is_read1 = (flags & FLAG_READ1) != 0;
2500        let is_paired = (flags & FLAG_PAIRED) != 0;
2501
2502        let mut builder = RecordBuilder::new()
2503            .name(std::str::from_utf8(name).unwrap())
2504            .sequence(&"A".repeat(100))
2505            .qualities(&[30u8; 100])
2506            .cigar("100M")
2507            .reference_sequence_id(0)
2508            .alignment_start(pos)
2509            .mapping_quality(mapq)
2510            .template_length(tlen);
2511
2512        if is_paired {
2513            builder = builder.first_segment(is_read1);
2514        }
2515
2516        if let Some(mate_ref) = mate_ref_id {
2517            builder = builder.mate_reference_sequence_id(mate_ref);
2518        }
2519        if let Some(mate_p) = mate_pos {
2520            builder = builder.mate_alignment_start(mate_p);
2521        }
2522
2523        // Build and then override with exact flags
2524        let mut record = builder.build();
2525        *record.flags_mut() = Flags::from(flags);
2526
2527        record
2528    }
2529
2530    // Flag constants for reverse strand
2531    const FLAG_REVERSE: u16 = 0x10;
2532    const FLAG_MATE_REVERSE: u16 = 0x20;
2533    const FLAG_UNMAPPED: u16 = 0x4;
2534    const FLAG_MATE_UNMAPPED: u16 = 0x8;
2535
2536    /// Tests that `fix_mate_info` sets mate info on R1 supplementary alignments correctly.
2537    /// The mate is R2 primary, so supplementary should get R2's info.
2538    #[test]
2539    fn test_fix_mate_info_sets_full_mate_info_on_r1_supplementals() -> Result<()> {
2540        let mq_tag = Tag::new(b'M', b'Q');
2541        let mc_tag = Tag::new(b'M', b'C');
2542        let as_tag = Tag::new(b'A', b'S');
2543        let ms_tag = Tag::new(b'm', b's');
2544
2545        // R1 primary at pos=100, forward strand, TLEN=200
2546        let mut r1 = create_mapped_record_with_flags(
2547            b"read1",
2548            FLAG_PAIRED | FLAG_READ1,
2549            100,
2550            30,
2551            200,
2552            Some(0),
2553            Some(200),
2554        );
2555        r1.data_mut().insert(as_tag, BufValue::Int32(100));
2556
2557        // R2 primary at pos=200, reverse strand (0x10), TLEN=-200
2558        let mut r2 = create_mapped_record_with_flags(
2559            b"read1",
2560            FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
2561            200,
2562            40,
2563            -200,
2564            Some(0),
2565            Some(100),
2566        );
2567        r2.data_mut().insert(as_tag, BufValue::Int32(150));
2568
2569        // R1 supplementary at pos=500, originally has wrong mate info (will be corrected)
2570        let r1_supp = create_mapped_record_with_flags(
2571            b"read1",
2572            FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY | FLAG_MATE_REVERSE, // has wrong mate_reverse
2573            500,
2574            25,
2575            0, // wrong TLEN
2576            Some(0),
2577            Some(500), // wrong mate pos (points to self)
2578        );
2579
2580        let mut builder = Builder::new();
2581        builder.push(r1)?;
2582        builder.push(r2)?;
2583        builder.push(r1_supp)?;
2584        let mut template = builder.build()?;
2585
2586        template.fix_mate_info()?;
2587
2588        // Verify R1 supplementary (at index 2) has correct mate info from R2 primary
2589        let supp = &template.records[2];
2590
2591        // Mate position should be R2's position (200)
2592        assert_eq!(
2593            supp.mate_alignment_start().map(|p| p.get()),
2594            Some(200),
2595            "R1 supp should have mate pos from R2"
2596        );
2597
2598        // Mate reference should be R2's reference (0)
2599        assert_eq!(
2600            supp.mate_reference_sequence_id(),
2601            Some(0),
2602            "R1 supp should have mate ref from R2"
2603        );
2604
2605        // Mate reverse flag should be set (R2 is reverse)
2606        assert!(
2607            supp.flags().is_mate_reverse_complemented(),
2608            "R1 supp should have mate_reverse set since R2 is reverse"
2609        );
2610
2611        // Mate unmapped flag should NOT be set (R2 is mapped)
2612        assert!(
2613            !supp.flags().is_mate_unmapped(),
2614            "R1 supp should NOT have mate_unmapped since R2 is mapped"
2615        );
2616
2617        // TLEN should be negative of R2's TLEN
2618        assert_eq!(supp.template_length(), 200, "R1 supp TLEN should be -(-200) = 200");
2619
2620        // MQ tag should be R2's mapping quality (40)
2621        let mq_value = supp.data().get(&mq_tag).and_then(extract_int_value);
2622        assert_eq!(mq_value, Some(40), "R1 supp MQ should be R2's mapq");
2623
2624        // MC tag should be R2's CIGAR (100M)
2625        let mc_value = supp.data().get(&mc_tag);
2626        assert!(mc_value.is_some(), "R1 supp should have MC tag");
2627
2628        // ms tag should be R2's AS value (150)
2629        let ms_value = supp.data().get(&ms_tag).and_then(extract_int_value);
2630        assert_eq!(ms_value, Some(150), "R1 supp ms should be R2's AS value");
2631
2632        Ok(())
2633    }
2634
2635    /// Tests that `fix_mate_info` sets mate info on R2 supplementary alignments correctly.
2636    /// The mate is R1 primary, so supplementary should get R1's info.
2637    #[test]
2638    fn test_fix_mate_info_sets_full_mate_info_on_r2_supplementals() -> Result<()> {
2639        let mq_tag = Tag::new(b'M', b'Q');
2640        let mc_tag = Tag::new(b'M', b'C');
2641        let as_tag = Tag::new(b'A', b'S');
2642        let ms_tag = Tag::new(b'm', b's');
2643
2644        // R1 primary at pos=100, forward strand, TLEN=300
2645        let mut r1 = create_mapped_record_with_flags(
2646            b"read1",
2647            FLAG_PAIRED | FLAG_READ1,
2648            100,
2649            35,
2650            300,
2651            Some(0),
2652            Some(200),
2653        );
2654        r1.data_mut().insert(as_tag, BufValue::Int32(120));
2655
2656        // R2 primary at pos=200, forward strand, TLEN=-300
2657        let mut r2 = create_mapped_record_with_flags(
2658            b"read1",
2659            FLAG_PAIRED | FLAG_READ2,
2660            200,
2661            45,
2662            -300,
2663            Some(0),
2664            Some(100),
2665        );
2666        r2.data_mut().insert(as_tag, BufValue::Int32(180));
2667
2668        // R2 supplementary at pos=600, originally has wrong mate info
2669        let r2_supp = create_mapped_record_with_flags(
2670            b"read1",
2671            FLAG_PAIRED | FLAG_READ2 | FLAG_SUPPLEMENTARY,
2672            600,
2673            20,
2674            0, // wrong TLEN
2675            Some(0),
2676            Some(600), // wrong mate pos
2677        );
2678
2679        let mut builder = Builder::new();
2680        builder.push(r1)?;
2681        builder.push(r2)?;
2682        builder.push(r2_supp)?;
2683        let mut template = builder.build()?;
2684
2685        template.fix_mate_info()?;
2686
2687        // Verify R2 supplementary (at index 2) has correct mate info from R1 primary
2688        let supp = &template.records[2];
2689
2690        // Mate position should be R1's position (100)
2691        assert_eq!(
2692            supp.mate_alignment_start().map(|p| p.get()),
2693            Some(100),
2694            "R2 supp should have mate pos from R1"
2695        );
2696
2697        // Mate reverse flag should NOT be set (R1 is forward)
2698        assert!(
2699            !supp.flags().is_mate_reverse_complemented(),
2700            "R2 supp should NOT have mate_reverse since R1 is forward"
2701        );
2702
2703        // TLEN should be negative of R1's recalculated TLEN
2704        // R1(pos=100,forward) and R2(pos=200,forward) → 5' positions are 100 and 200 → insert_size = 101
2705        assert_eq!(supp.template_length(), -101, "R2 supp TLEN should be -(101) = -101");
2706
2707        // MQ tag should be R1's mapping quality (35)
2708        let mq_value = supp.data().get(&mq_tag).and_then(extract_int_value);
2709        assert_eq!(mq_value, Some(35), "R2 supp MQ should be R1's mapq");
2710
2711        // MC tag should be present
2712        assert!(supp.data().get(&mc_tag).is_some(), "R2 supp should have MC tag");
2713
2714        // ms tag should be R1's AS value (120)
2715        let ms_value = supp.data().get(&ms_tag).and_then(extract_int_value);
2716        assert_eq!(ms_value, Some(120), "R2 supp ms should be R1's AS value");
2717
2718        Ok(())
2719    }
2720
2721    /// Tests that `fix_mate_info` correctly handles unmapped mate.
2722    /// When mate is unmapped, `mate_unmapped` flag should be set and MC tag should not be set.
2723    #[test]
2724    fn test_fix_mate_info_supplemental_with_unmapped_mate() -> Result<()> {
2725        let mc_tag = Tag::new(b'M', b'C');
2726
2727        // R1 primary at pos=100, forward strand
2728        let r1 = create_mapped_record_with_flags(
2729            b"read1",
2730            FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_UNMAPPED, // R1 knows mate is unmapped
2731            100,
2732            30,
2733            0, // TLEN=0 when mate unmapped
2734            Some(0),
2735            Some(100), // unmapped mate placed at R1's position
2736        );
2737
2738        // R2 is unmapped but placed at R1's position (standard convention)
2739        let mut r2 = create_mapped_record_with_flags(
2740            b"read1",
2741            FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED,
2742            100, // same position as R1 (convention for unmapped mate)
2743            0,   // mapq 0 for unmapped
2744            0,   // TLEN=0
2745            Some(0),
2746            Some(100),
2747        );
2748        // Clear CIGAR for unmapped read
2749        *r2.cigar_mut() = CigarBuf::default();
2750
2751        // R1 supplementary, originally has incorrect mate_reverse flag set
2752        let r1_supp = create_mapped_record_with_flags(
2753            b"read1",
2754            FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY | FLAG_MATE_REVERSE, // wrong
2755            500,
2756            25,
2757            999, // wrong TLEN
2758            Some(0),
2759            Some(500), // wrong mate pos
2760        );
2761
2762        let mut builder = Builder::new();
2763        builder.push(r1)?;
2764        builder.push(r2)?;
2765        builder.push(r1_supp)?;
2766        let mut template = builder.build()?;
2767
2768        template.fix_mate_info()?;
2769
2770        // Verify R1 supplementary has correct flags for unmapped mate
2771        let supp = &template.records[2];
2772
2773        // Mate unmapped flag should be set
2774        assert!(
2775            supp.flags().is_mate_unmapped(),
2776            "R1 supp should have mate_unmapped since R2 is unmapped"
2777        );
2778
2779        // Mate reverse flag should NOT be set for unmapped mate
2780        assert!(
2781            !supp.flags().is_mate_reverse_complemented(),
2782            "R1 supp should NOT have mate_reverse when R2 is unmapped (unmapped reads have no orientation)"
2783        );
2784
2785        // MC tag should NOT be set for unmapped mate
2786        assert!(
2787            supp.data().get(&mc_tag).is_none(),
2788            "R1 supp should NOT have MC tag when mate is unmapped"
2789        );
2790
2791        // TLEN should be 0 (negative of mate's 0)
2792        assert_eq!(supp.template_length(), 0, "TLEN should be 0 when mate is unmapped");
2793
2794        Ok(())
2795    }
2796
2797    /// Tests that `fix_mate_info` correctly clears `mate_reverse` flag when mate is on forward strand.
2798    /// This tests the scenario where supplementary had incorrect `mate_reverse` flag.
2799    #[test]
2800    fn test_fix_mate_info_clears_incorrect_mate_reverse_flag() -> Result<()> {
2801        // R1 primary at pos=100, forward strand
2802        let r1 = create_mapped_record_with_flags(
2803            b"read1",
2804            FLAG_PAIRED | FLAG_READ1,
2805            100,
2806            30,
2807            200,
2808            Some(0),
2809            Some(200),
2810        );
2811
2812        // R2 primary at pos=200, forward strand (NOT reverse)
2813        let r2 = create_mapped_record_with_flags(
2814            b"read1",
2815            FLAG_PAIRED | FLAG_READ2, // no FLAG_REVERSE
2816            200,
2817            40,
2818            -200,
2819            Some(0),
2820            Some(100),
2821        );
2822
2823        // R1 supplementary incorrectly has MATE_REVERSE set
2824        let r1_supp = create_mapped_record_with_flags(
2825            b"read1",
2826            FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY | FLAG_MATE_REVERSE, // incorrectly set
2827            500,
2828            25,
2829            0,
2830            Some(0),
2831            Some(500),
2832        );
2833
2834        let mut builder = Builder::new();
2835        builder.push(r1)?;
2836        builder.push(r2)?;
2837        builder.push(r1_supp)?;
2838        let mut template = builder.build()?;
2839
2840        // Before fix, supplementary has MATE_REVERSE set incorrectly
2841        assert!(
2842            template.records[2].flags().is_mate_reverse_complemented(),
2843            "Before fix: supp incorrectly has mate_reverse"
2844        );
2845
2846        template.fix_mate_info()?;
2847
2848        // After fix, MATE_REVERSE should be cleared since R2 is forward
2849        assert!(
2850            !template.records[2].flags().is_mate_reverse_complemented(),
2851            "After fix: supp should NOT have mate_reverse since R2 is forward"
2852        );
2853
2854        Ok(())
2855    }
2856
2857    /// Tests that `fix_mate_info` handles multiple supplementary alignments.
2858    #[test]
2859    fn test_fix_mate_info_multiple_supplementals() -> Result<()> {
2860        let as_tag = Tag::new(b'A', b'S');
2861
2862        // R1 primary, forward
2863        let r1 = create_mapped_record_with_flags(
2864            b"read1",
2865            FLAG_PAIRED | FLAG_READ1,
2866            100,
2867            30,
2868            400,
2869            Some(0),
2870            Some(300),
2871        );
2872
2873        // R2 primary, reverse
2874        let mut r2 = create_mapped_record_with_flags(
2875            b"read1",
2876            FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
2877            300,
2878            50,
2879            -400,
2880            Some(0),
2881            Some(100),
2882        );
2883        r2.data_mut().insert(as_tag, BufValue::Int32(200));
2884
2885        // Two R1 supplementaries
2886        let r1_supp1 = create_mapped_record_with_flags(
2887            b"read1",
2888            FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
2889            500,
2890            20,
2891            0,
2892            Some(0),
2893            Some(500),
2894        );
2895        let r1_supp2 = create_mapped_record_with_flags(
2896            b"read1",
2897            FLAG_PAIRED | FLAG_READ1 | FLAG_SUPPLEMENTARY,
2898            700,
2899            15,
2900            0,
2901            Some(0),
2902            Some(700),
2903        );
2904
2905        let mut builder = Builder::new();
2906        builder.push(r1)?;
2907        builder.push(r2)?;
2908        builder.push(r1_supp1)?;
2909        builder.push(r1_supp2)?;
2910        let mut template = builder.build()?;
2911
2912        template.fix_mate_info()?;
2913
2914        // Both supplementaries should have correct mate info from R2
2915        // Note: after build(), supplementaries are at indices 2 and 3 (reversed order)
2916        // R1(pos=100,forward) and R2(pos=300,100M,reverse) → 5' positions are 100 and 399 → insert_size = 300
2917        for i in 2..=3 {
2918            let supp = &template.records[i];
2919
2920            assert_eq!(
2921                supp.mate_alignment_start().map(|p| p.get()),
2922                Some(300),
2923                "Supp {i} should have mate pos 300"
2924            );
2925            assert!(
2926                supp.flags().is_mate_reverse_complemented(),
2927                "Supp {i} should have mate_reverse"
2928            );
2929            assert_eq!(supp.template_length(), 300, "Supp {i} TLEN should be 300");
2930        }
2931
2932        Ok(())
2933    }
2934
2935    // ============================================================================
2936    // Tests for compute_insert_size (ported from htsjdk SamPairUtilTest)
2937    // ============================================================================
2938
2939    /// Helper to create a mapped record with specific strand orientation and CIGAR for insert size tests
2940    fn create_insert_size_record(
2941        name: &[u8],
2942        flags: u16,
2943        pos: usize,
2944        read_length: usize,
2945        ref_id: Option<usize>,
2946    ) -> RecordBuf {
2947        let is_read1 = (flags & FLAG_READ1) != 0;
2948        let is_paired = (flags & FLAG_PAIRED) != 0;
2949
2950        let mut builder = RecordBuilder::new()
2951            .name(std::str::from_utf8(name).unwrap())
2952            .sequence(&"A".repeat(read_length))
2953            .qualities(&vec![30u8; read_length])
2954            .cigar(&format!("{read_length}M"))
2955            .alignment_start(pos)
2956            .mapping_quality(30);
2957
2958        if let Some(rid) = ref_id {
2959            builder = builder.reference_sequence_id(rid);
2960        }
2961
2962        if is_paired {
2963            builder = builder.first_segment(is_read1);
2964        }
2965
2966        // Build and then override with exact flags
2967        let mut record = builder.build();
2968        *record.flags_mut() = Flags::from(flags);
2969
2970        record
2971    }
2972
2973    /// Test `compute_insert_size` for "normal innie" FR pair (htsjdk test case)
2974    /// R1: pos=1, length=100, forward
2975    /// R2: pos=500, length=100, reverse
2976    /// Expected: 5' positions are 1 (forward start) and 599 (reverse end)
2977    /// Insert size = 599 - 1 + 1 = 599
2978    #[test]
2979    fn test_compute_insert_size_normal_innie() {
2980        let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 1, 100, Some(0));
2981        let r2 = create_insert_size_record(
2982            b"read1",
2983            FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
2984            500,
2985            100,
2986            Some(0),
2987        );
2988
2989        let insert_size = compute_insert_size(&r1, &r2);
2990        // R1 5' = 1 (forward, start), R2 5' = 599 (reverse, end = 500+100-1)
2991        // second_5prime (599) >= first_5prime (1), so adjustment = +1
2992        // 599 - 1 + 1 = 599
2993        assert_eq!(insert_size, 599);
2994    }
2995
2996    /// Test `compute_insert_size` for overlapping innie
2997    /// R1: pos=1, length=100, forward (ends at 100)
2998    /// R2: pos=50, length=100, reverse (ends at 149)
2999    #[test]
3000    fn test_compute_insert_size_overlapping_innie() {
3001        let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 1, 100, Some(0));
3002        let r2 = create_insert_size_record(
3003            b"read1",
3004            FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3005            50,
3006            100,
3007            Some(0),
3008        );
3009
3010        let insert_size = compute_insert_size(&r1, &r2);
3011        // R1 5' = 1, R2 5' = 149 (50+100-1)
3012        // 149 - 1 + 1 = 149
3013        assert_eq!(insert_size, 149);
3014    }
3015
3016    /// Test `compute_insert_size` for completely overlapping innie
3017    /// R1: pos=1, length=100, forward
3018    /// R2: pos=1, length=100, reverse
3019    #[test]
3020    fn test_compute_insert_size_completely_overlapping_innie() {
3021        let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 1, 100, Some(0));
3022        let r2 = create_insert_size_record(
3023            b"read1",
3024            FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3025            1,
3026            100,
3027            Some(0),
3028        );
3029
3030        let insert_size = compute_insert_size(&r1, &r2);
3031        // R1 5' = 1, R2 5' = 100 (1+100-1)
3032        // 100 - 1 + 1 = 100
3033        assert_eq!(insert_size, 100);
3034    }
3035
3036    /// Test `compute_insert_size` for outie pair (RF orientation)
3037    /// R1: pos=1, length=100, reverse
3038    /// R2: pos=500, length=100, forward
3039    #[test]
3040    fn test_compute_insert_size_normal_outie() {
3041        let r1 = create_insert_size_record(
3042            b"read1",
3043            FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE,
3044            1,
3045            100,
3046            Some(0),
3047        );
3048        let r2 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ2, 500, 100, Some(0));
3049
3050        let insert_size = compute_insert_size(&r1, &r2);
3051        // R1 5' = 100 (reverse, end = 1+100-1), R2 5' = 500 (forward, start)
3052        // 500 - 100 + 1 = 401
3053        assert_eq!(insert_size, 401);
3054    }
3055
3056    /// Test `compute_insert_size` for forward tandem
3057    /// R1: pos=1, length=100, reverse
3058    /// R2: pos=500, length=100, reverse
3059    #[test]
3060    fn test_compute_insert_size_forward_tandem() {
3061        let r1 = create_insert_size_record(
3062            b"read1",
3063            FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE,
3064            1,
3065            100,
3066            Some(0),
3067        );
3068        let r2 = create_insert_size_record(
3069            b"read1",
3070            FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3071            500,
3072            100,
3073            Some(0),
3074        );
3075
3076        let insert_size = compute_insert_size(&r1, &r2);
3077        // R1 5' = 100 (reverse), R2 5' = 599 (reverse)
3078        // 599 - 100 + 1 = 500
3079        assert_eq!(insert_size, 500);
3080    }
3081
3082    /// Test `compute_insert_size` for reverse tandem
3083    /// R1: pos=1, length=100, forward
3084    /// R2: pos=500, length=100, forward
3085    #[test]
3086    fn test_compute_insert_size_reverse_tandem() {
3087        let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 1, 100, Some(0));
3088        let r2 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ2, 500, 100, Some(0));
3089
3090        let insert_size = compute_insert_size(&r1, &r2);
3091        // R1 5' = 1 (forward), R2 5' = 500 (forward)
3092        // 500 - 1 + 1 = 500
3093        assert_eq!(insert_size, 500);
3094    }
3095
3096    /// Test `compute_insert_size` when second read is before first (negative insert size)
3097    /// R1: pos=500, length=100, forward
3098    /// R2: pos=1, length=100, reverse
3099    #[test]
3100    fn test_compute_insert_size_negative() {
3101        let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 500, 100, Some(0));
3102        let r2 = create_insert_size_record(
3103            b"read1",
3104            FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3105            1,
3106            100,
3107            Some(0),
3108        );
3109
3110        let insert_size = compute_insert_size(&r1, &r2);
3111        // R1 5' = 500 (forward), R2 5' = 100 (reverse)
3112        // second_5prime (100) < first_5prime (500), so adjustment = -1
3113        // 100 - 500 + (-1) = -401
3114        assert_eq!(insert_size, -401);
3115    }
3116
3117    /// Test `compute_insert_size` returns 0 when reads are on different references
3118    #[test]
3119    fn test_compute_insert_size_different_references() {
3120        let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 100, Some(0));
3121        let r2 = create_insert_size_record(
3122            b"read1",
3123            FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3124            200,
3125            100,
3126            Some(1), // different reference
3127        );
3128
3129        let insert_size = compute_insert_size(&r1, &r2);
3130        assert_eq!(insert_size, 0, "Insert size should be 0 for different references");
3131    }
3132
3133    /// Test `compute_insert_size` returns 0 when R1 is unmapped
3134    #[test]
3135    fn test_compute_insert_size_r1_unmapped() {
3136        let r1 = create_insert_size_record(
3137            b"read1",
3138            FLAG_PAIRED | FLAG_READ1 | FLAG_UNMAPPED,
3139            100,
3140            100,
3141            Some(0),
3142        );
3143        let r2 = create_insert_size_record(
3144            b"read1",
3145            FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3146            200,
3147            100,
3148            Some(0),
3149        );
3150
3151        let insert_size = compute_insert_size(&r1, &r2);
3152        assert_eq!(insert_size, 0, "Insert size should be 0 when R1 is unmapped");
3153    }
3154
3155    /// Test `compute_insert_size` returns 0 when R2 is unmapped
3156    #[test]
3157    fn test_compute_insert_size_r2_unmapped() {
3158        let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 100, Some(0));
3159        let r2 = create_insert_size_record(
3160            b"read1",
3161            FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED,
3162            200,
3163            100,
3164            Some(0),
3165        );
3166
3167        let insert_size = compute_insert_size(&r1, &r2);
3168        assert_eq!(insert_size, 0, "Insert size should be 0 when R2 is unmapped");
3169    }
3170
3171    /// Test `compute_insert_size` returns 0 when both reads are unmapped
3172    #[test]
3173    fn test_compute_insert_size_both_unmapped() {
3174        let r1 = create_insert_size_record(
3175            b"read1",
3176            FLAG_PAIRED | FLAG_READ1 | FLAG_UNMAPPED,
3177            100,
3178            100,
3179            Some(0),
3180        );
3181        let r2 = create_insert_size_record(
3182            b"read1",
3183            FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED,
3184            200,
3185            100,
3186            Some(0),
3187        );
3188
3189        let insert_size = compute_insert_size(&r1, &r2);
3190        assert_eq!(insert_size, 0, "Insert size should be 0 when both reads are unmapped");
3191    }
3192
3193    /// Test `compute_insert_size` with complex CIGAR (insertions don't affect reference length)
3194    #[test]
3195    fn test_compute_insert_size_with_indels() {
3196        // CIGAR: 50M10I40M = 90M on reference (insertions don't consume reference)
3197        let r1 = RecordBuilder::new()
3198            .name("read1")
3199            .sequence(&"A".repeat(100))
3200            .qualities(&[30u8; 100])
3201            .reference_sequence_id(0)
3202            .alignment_start(1)
3203            .cigar("50M10I40M")
3204            .first_segment(true)
3205            .build();
3206
3207        // CIGAR: 50M5D50M = 105 on reference (deletions consume reference)
3208        let mut r2 = RecordBuilder::new()
3209            .name("read1")
3210            .sequence(&"A".repeat(100))
3211            .qualities(&[30u8; 100])
3212            .reference_sequence_id(0)
3213            .alignment_start(200)
3214            .cigar("50M5D50M")
3215            .first_segment(false)
3216            .reverse_complement(true)
3217            .build();
3218        // Ensure FLAG_REVERSE is set
3219        *r2.flags_mut() = Flags::from(FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE);
3220
3221        let insert_size = compute_insert_size(&r1, &r2);
3222        // R1: alignment length = 50 + 40 = 90, end = 90, 5' = 1 (forward)
3223        // R2: alignment length = 50 + 5 + 50 = 105, end = 304, 5' = 304 (reverse)
3224        // 304 - 1 + 1 = 304
3225        assert_eq!(insert_size, 304);
3226    }
3227
3228    // ============================================================================
3229    // Tests for setMateInfo (both mapped case) - ported from htsjdk
3230    // ============================================================================
3231
3232    /// Test `fix_mate_info` correctly sets TLEN for normal innie pair
3233    #[test]
3234    fn test_fix_mate_info_sets_tlen_normal_innie() -> Result<()> {
3235        let r1 = create_insert_size_record(b"read1", FLAG_PAIRED | FLAG_READ1, 1, 100, Some(0));
3236        let r2 = create_insert_size_record(
3237            b"read1",
3238            FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3239            500,
3240            100,
3241            Some(0),
3242        );
3243
3244        let mut builder = Builder::new();
3245        builder.push(r1)?;
3246        builder.push(r2)?;
3247        let mut template = builder.build()?;
3248
3249        template.fix_mate_info()?;
3250
3251        // Expected insert size: 599
3252        assert_eq!(template.records[0].template_length(), 599, "R1 TLEN");
3253        assert_eq!(template.records[1].template_length(), -599, "R2 TLEN");
3254
3255        Ok(())
3256    }
3257
3258    /// Test `fix_mate_info` correctly sets mate CIGAR (MC tag)
3259    #[test]
3260    fn test_fix_mate_info_sets_mate_cigar() -> Result<()> {
3261        let mc_tag = Tag::new(b'M', b'C');
3262
3263        let r1 = RecordBuilder::new()
3264            .name("read1")
3265            .sequence(&"A".repeat(50))
3266            .qualities(&[30u8; 50])
3267            .reference_sequence_id(0)
3268            .alignment_start(1)
3269            .cigar("50M")
3270            .first_segment(true)
3271            .build();
3272
3273        // Complex CIGAR for R2: 25M5I20M
3274        let mut r2 = RecordBuilder::new()
3275            .name("read1")
3276            .sequence(&"A".repeat(50))
3277            .qualities(&[30u8; 50])
3278            .reference_sequence_id(0)
3279            .alignment_start(500)
3280            .cigar("25M5I20M")
3281            .first_segment(false)
3282            .reverse_complement(true)
3283            .build();
3284        // Ensure FLAG_REVERSE is set
3285        *r2.flags_mut() = Flags::from(FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE);
3286
3287        let mut builder = Builder::new();
3288        builder.push(r1)?;
3289        builder.push(r2)?;
3290        let mut template = builder.build()?;
3291
3292        template.fix_mate_info()?;
3293
3294        // R1's MC should be R2's CIGAR
3295        let r1_mc = template.records[0].data().get(&mc_tag);
3296        assert!(r1_mc.is_some(), "R1 should have MC tag");
3297        if let Some(BufValue::String(mc)) = r1_mc {
3298            assert_eq!(&mc[..], b"25M5I20M", "R1 MC should be R2's CIGAR");
3299        } else {
3300            panic!("MC tag should be a string");
3301        }
3302
3303        // R2's MC should be R1's CIGAR
3304        let r2_mc = template.records[1].data().get(&mc_tag);
3305        assert!(r2_mc.is_some(), "R2 should have MC tag");
3306        if let Some(BufValue::String(mc)) = r2_mc {
3307            assert_eq!(&mc[..], b"50M", "R2 MC should be R1's CIGAR");
3308        } else {
3309            panic!("MC tag should be a string");
3310        }
3311
3312        Ok(())
3313    }
3314
3315    /// Test `fix_mate_info` correctly handles both unmapped case
3316    #[test]
3317    fn test_fix_mate_info_both_unmapped() -> Result<()> {
3318        let mq_tag = Tag::new(b'M', b'Q');
3319        let mc_tag = Tag::new(b'M', b'C');
3320
3321        let mut r1 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1 | FLAG_UNMAPPED);
3322        *r1.reference_sequence_id_mut() = Some(0); // Originally had a reference
3323        *r1.data_mut() = [(mq_tag, BufValue::Int32(30))].into_iter().collect(); // Has MQ tag
3324
3325        let mut r2 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED);
3326        *r2.reference_sequence_id_mut() = Some(0);
3327        *r2.data_mut() = [(mc_tag, BufValue::String("100M".into()))].into_iter().collect();
3328
3329        let mut builder = Builder::new();
3330        builder.push(r1)?;
3331        builder.push(r2)?;
3332        let mut template = builder.build()?;
3333
3334        template.fix_mate_info()?;
3335
3336        // Both should have reference cleared (None)
3337        assert!(template.records[0].reference_sequence_id().is_none(), "R1 ref should be cleared");
3338        assert!(template.records[1].reference_sequence_id().is_none(), "R2 ref should be cleared");
3339
3340        // Both should have mate_unmapped flag set
3341        assert!(template.records[0].flags().is_mate_unmapped(), "R1 mate_unmapped");
3342        assert!(template.records[1].flags().is_mate_unmapped(), "R2 mate_unmapped");
3343
3344        // MQ and MC tags should be removed
3345        assert!(template.records[0].data().get(&mq_tag).is_none(), "R1 MQ should be removed");
3346        assert!(template.records[1].data().get(&mc_tag).is_none(), "R2 MC should be removed");
3347
3348        // TLEN should be 0
3349        assert_eq!(template.records[0].template_length(), 0, "R1 TLEN should be 0");
3350        assert_eq!(template.records[1].template_length(), 0, "R2 TLEN should be 0");
3351
3352        Ok(())
3353    }
3354
3355    /// Test `fix_mate_info` correctly handles one mapped, one unmapped case
3356    #[test]
3357    fn test_fix_mate_info_one_unmapped() -> Result<()> {
3358        let mq_tag = Tag::new(b'M', b'Q');
3359        let mc_tag = Tag::new(b'M', b'C');
3360
3361        // R1 is mapped
3362        let r1 = create_mapped_record(b"read1", FLAG_PAIRED | FLAG_READ1, 100, 30);
3363
3364        // R2 is unmapped
3365        let r2 = create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED);
3366
3367        let mut builder = Builder::new();
3368        builder.push(r1)?;
3369        builder.push(r2)?;
3370        let mut template = builder.build()?;
3371
3372        template.fix_mate_info()?;
3373
3374        // Unmapped R2 should be placed at R1's position
3375        assert_eq!(
3376            template.records[1].reference_sequence_id(),
3377            Some(0),
3378            "R2 should be placed at R1's reference"
3379        );
3380        assert_eq!(
3381            template.records[1].alignment_start().map(|p| p.get()),
3382            Some(100),
3383            "R2 should be placed at R1's position"
3384        );
3385
3386        // R1 (mapped) should NOT have MQ tag (mate is unmapped)
3387        assert!(
3388            template.records[0].data().get(&mq_tag).is_none(),
3389            "R1 should NOT have MQ when mate is unmapped"
3390        );
3391
3392        // R2 (unmapped) SHOULD have MQ tag (mate is mapped)
3393        assert!(
3394            template.records[1].data().get(&mq_tag).is_some(),
3395            "R2 should have MQ when mate is mapped"
3396        );
3397
3398        // R1 should have mate_unmapped flag set
3399        assert!(template.records[0].flags().is_mate_unmapped(), "R1 mate_unmapped should be set");
3400
3401        // R2 should NOT have mate_unmapped flag set (R1 is mapped)
3402        assert!(
3403            !template.records[1].flags().is_mate_unmapped(),
3404            "R2 mate_unmapped should NOT be set"
3405        );
3406
3407        // TLEN should be 0 for both
3408        assert_eq!(template.records[0].template_length(), 0, "R1 TLEN");
3409        assert_eq!(template.records[1].template_length(), 0, "R2 TLEN");
3410
3411        // MC tag: R1 should not have it (mate unmapped), R2 should have it (mate mapped)
3412        assert!(
3413            template.records[0].data().get(&mc_tag).is_none(),
3414            "R1 should NOT have MC when mate is unmapped"
3415        );
3416        assert!(
3417            template.records[1].data().get(&mc_tag).is_some(),
3418            "R2 should have MC when mate is mapped"
3419        );
3420
3421        Ok(())
3422    }
3423
3424    // ==================== pair_orientation tests ====================
3425
3426    /// Test FR pair: R1 forward at pos 100, R2 reverse at pos 200 (positive insert size)
3427    /// This matches htsjdk's test case for getPairOrientation
3428    #[test]
3429    fn test_pair_orientation_fr_pair() -> Result<()> {
3430        // R1: forward strand at pos 100, mate (R2) is reverse at pos 200
3431        // positive 5' = 100 (R1 start), negative 5' = 100 + 200 = 300
3432        // 100 < 300 => FR
3433        let r1 = create_mapped_record_with_flags(
3434            b"read1",
3435            FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_REVERSE,
3436            100,
3437            30,
3438            200, // positive TLEN for FR
3439            Some(0),
3440            Some(200),
3441        );
3442        let r2 = create_mapped_record_with_flags(
3443            b"read1",
3444            FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3445            200,
3446            30,
3447            -200,
3448            Some(0),
3449            Some(100),
3450        );
3451
3452        let template = Template::from_records(vec![r1, r2])?;
3453        assert_eq!(template.pair_orientation(), Some(PairOrientation::FR));
3454        Ok(())
3455    }
3456
3457    /// Test RF pair: R1 forward but positioned after mate's 5' end
3458    /// This is an "outie" pair
3459    #[test]
3460    fn test_pair_orientation_rf_pair() -> Result<()> {
3461        // R1: forward at pos 300, R2: reverse at pos 100
3462        // positive 5' = 300 (R1 start), negative 5' = 300 + (-200) = 100
3463        // 300 >= 100 => RF
3464        let r1 = create_mapped_record_with_flags(
3465            b"read1",
3466            FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_REVERSE,
3467            300,
3468            30,
3469            -200, // negative TLEN for RF
3470            Some(0),
3471            Some(100),
3472        );
3473        let r2 = create_mapped_record_with_flags(
3474            b"read1",
3475            FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3476            100,
3477            30,
3478            200,
3479            Some(0),
3480            Some(300),
3481        );
3482
3483        let template = Template::from_records(vec![r1, r2])?;
3484        assert_eq!(template.pair_orientation(), Some(PairOrientation::RF));
3485        Ok(())
3486    }
3487
3488    /// Test TANDEM pair: both reads on forward strand
3489    #[test]
3490    fn test_pair_orientation_tandem_both_forward() -> Result<()> {
3491        // Both R1 and R2 on forward strand => TANDEM
3492        let r1 = create_mapped_record_with_flags(
3493            b"read1",
3494            FLAG_PAIRED | FLAG_READ1, // forward, mate also forward (no MATE_REVERSE)
3495            100,
3496            30,
3497            100,
3498            Some(0),
3499            Some(200),
3500        );
3501        let r2 = create_mapped_record_with_flags(
3502            b"read1",
3503            FLAG_PAIRED | FLAG_READ2, // forward
3504            200,
3505            30,
3506            -100,
3507            Some(0),
3508            Some(100),
3509        );
3510
3511        let template = Template::from_records(vec![r1, r2])?;
3512        assert_eq!(template.pair_orientation(), Some(PairOrientation::Tandem));
3513        Ok(())
3514    }
3515
3516    /// Test TANDEM pair: both reads on reverse strand
3517    #[test]
3518    fn test_pair_orientation_tandem_both_reverse() -> Result<()> {
3519        // Both R1 and R2 on reverse strand => TANDEM
3520        let r1 = create_mapped_record_with_flags(
3521            b"read1",
3522            FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE | FLAG_MATE_REVERSE,
3523            100,
3524            30,
3525            100,
3526            Some(0),
3527            Some(200),
3528        );
3529        let r2 = create_mapped_record_with_flags(
3530            b"read1",
3531            FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE | FLAG_MATE_REVERSE,
3532            200,
3533            30,
3534            -100,
3535            Some(0),
3536            Some(100),
3537        );
3538
3539        let template = Template::from_records(vec![r1, r2])?;
3540        assert_eq!(template.pair_orientation(), Some(PairOrientation::Tandem));
3541        Ok(())
3542    }
3543
3544    /// Test `pair_orientation` returns None when R1 is missing
3545    #[test]
3546    fn test_pair_orientation_no_r1() -> Result<()> {
3547        let r2 = create_mapped_record_with_flags(
3548            b"read1",
3549            FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3550            100,
3551            30,
3552            0,
3553            Some(0),
3554            Some(100),
3555        );
3556
3557        let template = Template::from_records(vec![r2])?;
3558        assert_eq!(template.pair_orientation(), None);
3559        Ok(())
3560    }
3561
3562    /// Test `pair_orientation` returns None when R2 is missing
3563    #[test]
3564    fn test_pair_orientation_no_r2() -> Result<()> {
3565        let r1 = create_mapped_record_with_flags(
3566            b"read1",
3567            FLAG_PAIRED | FLAG_READ1,
3568            100,
3569            30,
3570            0,
3571            Some(0),
3572            Some(100),
3573        );
3574
3575        let template = Template::from_records(vec![r1])?;
3576        assert_eq!(template.pair_orientation(), None);
3577        Ok(())
3578    }
3579
3580    /// Test `pair_orientation` returns None when R1 is unmapped
3581    #[test]
3582    fn test_pair_orientation_r1_unmapped() -> Result<()> {
3583        let r1 = create_mapped_record_with_flags(
3584            b"read1",
3585            FLAG_PAIRED | FLAG_READ1 | FLAG_UNMAPPED,
3586            100,
3587            30,
3588            0,
3589            Some(0),
3590            Some(100),
3591        );
3592        let r2 = create_mapped_record_with_flags(
3593            b"read1",
3594            FLAG_PAIRED | FLAG_READ2,
3595            100,
3596            30,
3597            0,
3598            Some(0),
3599            Some(100),
3600        );
3601
3602        let template = Template::from_records(vec![r1, r2])?;
3603        assert_eq!(template.pair_orientation(), None);
3604        Ok(())
3605    }
3606
3607    /// Test `pair_orientation` returns None when R2 is unmapped
3608    #[test]
3609    fn test_pair_orientation_r2_unmapped() -> Result<()> {
3610        let r1 = create_mapped_record_with_flags(
3611            b"read1",
3612            FLAG_PAIRED | FLAG_READ1,
3613            100,
3614            30,
3615            0,
3616            Some(0),
3617            Some(100),
3618        );
3619        let r2 = create_mapped_record_with_flags(
3620            b"read1",
3621            FLAG_PAIRED | FLAG_READ2 | FLAG_UNMAPPED,
3622            100,
3623            30,
3624            0,
3625            Some(0),
3626            Some(100),
3627        );
3628
3629        let template = Template::from_records(vec![r1, r2])?;
3630        assert_eq!(template.pair_orientation(), None);
3631        Ok(())
3632    }
3633
3634    /// Test `pair_orientation` returns None when reads are on different chromosomes
3635    #[test]
3636    fn test_pair_orientation_different_chromosomes() -> Result<()> {
3637        let mut r1 = create_mapped_record_with_flags(
3638            b"read1",
3639            FLAG_PAIRED | FLAG_READ1 | FLAG_MATE_REVERSE,
3640            100,
3641            30,
3642            200,
3643            Some(1), // mate on different chromosome
3644            Some(200),
3645        );
3646        // Override R1's reference to be chr 0
3647        *r1.reference_sequence_id_mut() = Some(0);
3648
3649        let mut r2 = create_mapped_record_with_flags(
3650            b"read1",
3651            FLAG_PAIRED | FLAG_READ2 | FLAG_REVERSE,
3652            200,
3653            30,
3654            -200,
3655            Some(0),
3656            Some(100),
3657        );
3658        // R2 is on chr 1
3659        *r2.reference_sequence_id_mut() = Some(1);
3660
3661        let template = Template::from_records(vec![r1, r2])?;
3662        assert_eq!(template.pair_orientation(), None);
3663        Ok(())
3664    }
3665
3666    /// Test FR pair from reverse strand perspective (R1 is reverse, R2 is forward)
3667    #[test]
3668    fn test_pair_orientation_fr_from_reverse_read() -> Result<()> {
3669        // R1: reverse at pos 200, R2: forward at pos 100
3670        // For R1 (reverse): positive 5' = mate_start = 100, negative 5' = alignment_end = 299 (200 + 100 - 1)
3671        // 100 < 299 => FR
3672        let r1 = create_mapped_record_with_flags(
3673            b"read1",
3674            FLAG_PAIRED | FLAG_READ1 | FLAG_REVERSE, // R1 reverse, mate forward (no MATE_REVERSE)
3675            200,
3676            30,
3677            -200, // negative because R1 is after R2
3678            Some(0),
3679            Some(100),
3680        );
3681        let r2 = create_mapped_record_with_flags(
3682            b"read1",
3683            FLAG_PAIRED | FLAG_READ2 | FLAG_MATE_REVERSE, // R2 forward, mate (R1) reverse
3684            100,
3685            30,
3686            200,
3687            Some(0),
3688            Some(200),
3689        );
3690
3691        let template = Template::from_records(vec![r1, r2])?;
3692        assert_eq!(template.pair_orientation(), Some(PairOrientation::FR));
3693        Ok(())
3694    }
3695
3696    // ========================================================================
3697    // Tests for RecordBuf accessor panic in raw-byte mode
3698    // ========================================================================
3699
3700    /// Helper to create a minimal raw BAM record for testing raw-byte mode.
3701    #[allow(clippy::cast_possible_truncation)]
3702    fn make_minimal_raw_bam(name: &[u8], flags: u16) -> Vec<u8> {
3703        let l_read_name = (name.len() + 1) as u8; // +1 for null terminator
3704        let total = 32 + l_read_name as usize; // minimal: header + name only
3705        let mut buf = vec![0u8; total];
3706
3707        buf[8] = l_read_name;
3708        buf[14..16].copy_from_slice(&flags.to_le_bytes());
3709
3710        let name_start = 32;
3711        buf[name_start..name_start + name.len()].copy_from_slice(name);
3712        buf[name_start + name.len()] = 0; // null terminator
3713
3714        buf
3715    }
3716
3717    #[test]
3718    fn test_from_raw_records_creates_raw_mode_template() {
3719        let raw = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3720        let template = Template::from_raw_records(vec![raw]).unwrap();
3721
3722        // Verify it's in raw-byte mode
3723        assert!(template.is_raw_byte_mode());
3724        // Verify records vec is empty (all data is in raw_records)
3725        assert!(template.records.is_empty());
3726        // Verify raw accessors work
3727        assert!(template.raw_r1().is_some());
3728    }
3729
3730    #[test]
3731    fn test_r1_accessor_returns_none_in_raw_mode() {
3732        // After fix: calling r1() on a raw-mode Template returns None instead of panicking
3733        let raw = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3734        let template = Template::from_raw_records(vec![raw]).unwrap();
3735
3736        // r1() should return None in raw mode (use raw_r1() instead)
3737        assert!(template.r1().is_none());
3738        // raw_r1() should still work
3739        assert!(template.raw_r1().is_some());
3740    }
3741
3742    #[test]
3743    fn test_r2_accessor_returns_none_in_raw_mode() {
3744        // After fix: calling r2() on a raw-mode Template returns None instead of panicking
3745        let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3746        let r2 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ2);
3747        let template = Template::from_raw_records(vec![r1, r2]).unwrap();
3748
3749        assert!(template.is_raw_byte_mode());
3750
3751        // r2() should return None in raw mode (use raw_r2() instead)
3752        assert!(template.r2().is_none());
3753        // raw_r2() should still work
3754        assert!(template.raw_r2().is_some());
3755    }
3756
3757    // ========================================================================
3758    // from_raw_records fast path tests
3759    // ========================================================================
3760
3761    #[test]
3762    fn test_from_raw_records_fast_path_normal_order() {
3763        // R1 first, R2 second — fast path with no swap
3764        let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3765        let r2 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ2);
3766        let template = Template::from_raw_records(vec![r1, r2]).unwrap();
3767
3768        assert!(template.is_raw_byte_mode());
3769        assert!(template.raw_r1().is_some());
3770        assert!(template.raw_r2().is_some());
3771        // Verify R1 is at index 0 (has FIRST_SEGMENT flag)
3772        let r1_flags = crate::sort::bam_fields::flags(template.raw_r1().unwrap());
3773        assert_ne!(r1_flags & FLAG_READ1, 0);
3774    }
3775
3776    #[test]
3777    fn test_from_raw_records_fast_path_swap() {
3778        // R2 first, R1 second — fast path should swap them
3779        let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3780        let r2 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ2);
3781        let template = Template::from_raw_records(vec![r2, r1]).unwrap();
3782
3783        assert!(template.is_raw_byte_mode());
3784        // After swap, R1 should be at index 0
3785        let r1_flags = crate::sort::bam_fields::flags(template.raw_r1().unwrap());
3786        assert_ne!(r1_flags & FLAG_READ1, 0);
3787        let r2_flags = crate::sort::bam_fields::flags(template.raw_r2().unwrap());
3788        assert_ne!(r2_flags & FLAG_READ2, 0);
3789    }
3790
3791    #[test]
3792    fn test_from_raw_records_fast_path_fallthrough_both_r1() {
3793        // Both records are R1 — fast path should fall through to general path (error)
3794        let r1a = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3795        let r1b = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3796        let result = Template::from_raw_records(vec![r1a, r1b]);
3797        assert!(result.is_err());
3798    }
3799
3800    #[test]
3801    fn test_from_raw_records_fast_path_with_secondary() {
3802        // 2 records but one is secondary — should skip fast path
3803        let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3804        let sec = make_minimal_raw_bam(
3805            b"read1",
3806            FLAG_PAIRED | FLAG_READ1 | crate::sort::bam_fields::flags::SECONDARY,
3807        );
3808        let template = Template::from_raw_records(vec![r1, sec]).unwrap();
3809        assert!(template.is_raw_byte_mode());
3810        assert!(template.raw_r1().is_some());
3811    }
3812
3813    #[test]
3814    fn test_from_raw_records_with_supplementary() {
3815        // R1 primary + R1 supplementary + R2 primary — general path
3816        let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3817        let r1_supp = make_minimal_raw_bam(
3818            b"read1",
3819            FLAG_PAIRED | FLAG_READ1 | crate::sort::bam_fields::flags::SUPPLEMENTARY,
3820        );
3821        let r2 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ2);
3822        let template = Template::from_raw_records(vec![r1, r1_supp, r2]).unwrap();
3823        assert!(template.is_raw_byte_mode());
3824        assert!(template.raw_r1().is_some());
3825        assert!(template.raw_r2().is_some());
3826    }
3827
3828    #[test]
3829    fn test_from_raw_records_single_unpaired() {
3830        // Single unpaired record — should be treated as R1
3831        let r = make_minimal_raw_bam(b"read1", 0); // no PAIRED flag
3832        let template = Template::from_raw_records(vec![r]).unwrap();
3833        assert!(template.is_raw_byte_mode());
3834        assert!(template.raw_r1().is_some());
3835        assert!(template.raw_r2().is_none());
3836    }
3837
3838    #[test]
3839    fn test_from_raw_records_empty() {
3840        let result = Template::from_raw_records(vec![]);
3841        assert!(result.is_err());
3842    }
3843
3844    #[test]
3845    fn test_from_raw_records_name_mismatch() {
3846        // Two records with different names — should error in general path
3847        let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3848        let r1_supp = make_minimal_raw_bam(
3849            b"read1",
3850            FLAG_PAIRED | FLAG_READ1 | crate::sort::bam_fields::flags::SUPPLEMENTARY,
3851        );
3852        let r2_wrong = make_minimal_raw_bam(b"read2", FLAG_PAIRED | FLAG_READ2);
3853        let result = Template::from_raw_records(vec![r1, r1_supp, r2_wrong]);
3854        assert!(result.is_err());
3855    }
3856
3857    #[test]
3858    fn test_from_raw_records_all_raw_records_mut() {
3859        let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3860        let r2 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ2);
3861        let mut template = Template::from_raw_records(vec![r1, r2]).unwrap();
3862        // Should be able to get mutable access
3863        let recs = template.all_raw_records_mut().unwrap();
3864        assert_eq!(recs.len(), 2);
3865    }
3866
3867    #[test]
3868    fn test_from_raw_records_into_raw_records() {
3869        let r1 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ1);
3870        let r2 = make_minimal_raw_bam(b"read1", FLAG_PAIRED | FLAG_READ2);
3871        let template = Template::from_raw_records(vec![r1, r2]).unwrap();
3872        let recs = template.into_raw_records().unwrap();
3873        assert_eq!(recs.len(), 2);
3874    }
3875
3876    #[test]
3877    fn test_from_raw_records_truncated_header() {
3878        // Record too short (< 32 bytes)
3879        let short = vec![0u8; 20];
3880        let err = Template::from_raw_records(vec![short]).unwrap_err();
3881        assert!(err.to_string().contains("too short"), "Error: {err}");
3882    }
3883
3884    #[test]
3885    fn test_from_raw_records_truncated_read_name() {
3886        // Record has 32 bytes but l_read_name claims more bytes than available
3887        let mut buf = vec![0u8; 34]; // 32 header + 2 bytes
3888        buf[8] = 10; // l_read_name=10, but only 2 bytes after header
3889        let err = Template::from_raw_records(vec![buf]).unwrap_err();
3890        assert!(err.to_string().contains("truncated"), "Error: {err}");
3891    }
3892
3893    #[test]
3894    fn test_from_raw_records_valid_l_read_name() {
3895        // Record with l_read_name that exactly fits
3896        let rec = make_minimal_raw_bam(b"test", FLAG_PAIRED | FLAG_READ1);
3897        assert!(Template::from_raw_records(vec![rec]).is_ok());
3898    }
3899
3900    // ========================================================================
3901    // write_with_offset tests
3902    // ========================================================================
3903
3904    #[test]
3905    fn test_write_with_offset_none() {
3906        let mi = MoleculeId::None;
3907        let mut buf = String::new();
3908        let result = mi.write_with_offset(100, &mut buf);
3909        assert!(result.is_empty());
3910    }
3911
3912    #[test]
3913    fn test_write_with_offset_single() {
3914        let mi = MoleculeId::Single(5);
3915        let mut buf = String::new();
3916        let result = mi.write_with_offset(100, &mut buf);
3917        assert_eq!(result, b"105");
3918    }
3919
3920    #[test]
3921    fn test_write_with_offset_paired_a() {
3922        let mi = MoleculeId::PairedA(3);
3923        let mut buf = String::new();
3924        let result = mi.write_with_offset(10, &mut buf);
3925        assert_eq!(result, b"13/A");
3926    }
3927
3928    #[test]
3929    fn test_write_with_offset_paired_b() {
3930        let mi = MoleculeId::PairedB(3);
3931        let mut buf = String::new();
3932        let result = mi.write_with_offset(10, &mut buf);
3933        assert_eq!(result, b"13/B");
3934    }
3935
3936    #[test]
3937    fn test_write_with_offset_reuses_buffer() {
3938        let mut buf = String::new();
3939        let mi1 = MoleculeId::Single(1);
3940        let _ = mi1.write_with_offset(0, &mut buf);
3941        assert_eq!(buf, "1");
3942
3943        // Reuse buffer — should clear and overwrite
3944        let mi2 = MoleculeId::PairedA(99);
3945        let result = mi2.write_with_offset(0, &mut buf);
3946        assert_eq!(result, b"99/A");
3947        assert_eq!(buf, "99/A");
3948    }
3949
3950    #[test]
3951    fn test_write_with_offset_matches_to_string() {
3952        // Verify write_with_offset produces identical output to to_string_with_offset
3953        let mut buf = String::new();
3954        for mi in [
3955            MoleculeId::None,
3956            MoleculeId::Single(0),
3957            MoleculeId::Single(42),
3958            MoleculeId::PairedA(7),
3959            MoleculeId::PairedB(7),
3960        ] {
3961            let expected = mi.to_string_with_offset(100);
3962            let result = mi.write_with_offset(100, &mut buf);
3963            assert_eq!(result, expected.as_bytes(), "Mismatch for {mi:?}");
3964        }
3965    }
3966
3967    #[test]
3968    fn test_records_in_range_invalid_start_gt_end() {
3969        let template = Template {
3970            name: b"read1".to_vec(),
3971            records: vec![
3972                create_test_record(b"read1", FLAG_PAIRED | FLAG_READ1),
3973                create_test_record(b"read1", FLAG_PAIRED | FLAG_READ2),
3974            ],
3975            raw_records: None,
3976            r1: Some((0, 1)),
3977            r2: Some((1, 2)),
3978            r1_supplementals: None,
3979            r2_supplementals: None,
3980            r1_secondaries: None,
3981            r2_secondaries: None,
3982            mi: MoleculeId::None,
3983        };
3984
3985        // start > end should return empty, not panic
3986        assert!(template.records_in_range(Some((2, 1))).is_empty());
3987        // valid range
3988        assert_eq!(template.records_in_range(Some((0, 2))).len(), 2);
3989        // None returns empty
3990        assert!(template.records_in_range(None).is_empty());
3991        // end > len returns empty
3992        assert!(template.records_in_range(Some((0, 10))).is_empty());
3993    }
3994}