Skip to main content

fgumi_sam/
builder.rs

1//! Builder for creating test SAM/BAM records and files.
2//!
3//! This module provides a fluent API for constructing SAM/BAM records for testing,
4//! modeled after fgbio's `SamBuilder`. It supports both building individual records
5//! and accumulating records into a collection that can be written to a file.
6//!
7//! ## Builders
8//!
9//! - [`SamBuilder`]: Accumulates records and manages headers for writing to files
10//! - [`RecordBuilder`]: Creates individual records without header management (standalone)
11//! - [`ConsensusTagsBuilder`]: Creates consensus-specific SAM tags
12//!
13//! ## Examples
14//!
15//! ```rust
16//! use fgumi_sam::builder::{RecordBuilder, ConsensusTagsBuilder};
17//!
18//! // Create a simple record
19//! let record = RecordBuilder::new()
20//!     .name("read1")
21//!     .sequence("ACGT")
22//!     .build();
23//!
24//! // Create a consensus record with tags
25//! let consensus = RecordBuilder::new()
26//!     .name("consensus1")
27//!     .sequence("ACGTACGT")
28//!     .consensus_tags(ConsensusTagsBuilder::new().depth_max(10).depth_min(5))
29//!     .build();
30//! ```
31
32use crate::to_smallest_signed_int;
33use anyhow::Result;
34use bstr::BString;
35use noodles::core::Position;
36use noodles::sam::Header;
37use noodles::sam::alignment::io::Write as AlignmentWrite;
38use noodles::sam::alignment::record::Flags;
39use noodles::sam::alignment::record::cigar::Op;
40use noodles::sam::alignment::record::cigar::op::Kind;
41use noodles::sam::alignment::record::data::field::Tag;
42use noodles::sam::alignment::record_buf::data::field::Value as BufValue;
43use noodles::sam::alignment::record_buf::{QualityScores, RecordBuf, Sequence};
44use noodles::sam::header::record::value::Map;
45use noodles::sam::header::record::value::map::{Program, ReadGroup, ReferenceSequence};
46use std::num::NonZeroUsize;
47use std::path::Path;
48use std::sync::atomic::{AtomicU64, Ordering};
49use tempfile::{NamedTempFile, TempDir};
50
51// Constants
52pub const DEFAULT_READ_LENGTH: usize = 100;
53pub const DEFAULT_BASE_QUALITY: u8 = 30;
54pub const DEFAULT_MAPQ: u8 = 60;
55pub const DEFAULT_SAMPLE_NAME: &str = "Sample";
56pub const DEFAULT_READ_GROUP_ID: &str = "A";
57pub const DEFAULT_REFERENCE_LENGTH: usize = 200_000_000;
58
59/// Strand orientation for reads.
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum Strand {
62    Plus,
63    Minus,
64}
65
66impl Strand {
67    #[must_use]
68    pub fn is_negative(&self) -> bool {
69        matches!(self, Strand::Minus)
70    }
71}
72
73/// Builder for creating test SAM/BAM records and files.
74///
75/// This builder simplifies test record creation by providing methods to add
76/// paired-end and single-end reads with sensible defaults. Records are accumulated
77/// internally and can be written to a file or iterated over.
78///
79/// # Examples
80///
81/// ```rust
82/// use fgumi_sam::builder::{SamBuilder, Strand};
83///
84/// // Create a builder with defaults
85/// let mut builder = SamBuilder::new();
86///
87/// // Add a mapped pair
88/// let pair = builder.add_pair()
89///     .contig(0)
90///     .start1(100)
91///     .start2(200)
92///     .build();
93///
94/// // Add a fragment
95/// let frag = builder.add_frag()
96///     .name("frag1")
97///     .bases("ACGTACGT")
98///     .contig(0)
99///     .start(150)
100///     .build();
101///
102/// // Write to a temp file
103/// let path = builder.to_temp_file().unwrap();
104/// ```
105#[derive(Debug)]
106pub struct SamBuilder {
107    /// SAM header
108    pub header: Header,
109    /// Accumulated records
110    records: Vec<RecordBuf>,
111    /// Default read length
112    read_length: usize,
113    /// Default base quality
114    base_quality: u8,
115    /// Read group ID
116    read_group_id: String,
117    /// Counter for generating sequential names
118    counter: AtomicU64,
119}
120
121impl Default for SamBuilder {
122    fn default() -> Self {
123        Self::new()
124    }
125}
126
127impl SamBuilder {
128    /// Creates a new builder with default settings.
129    ///
130    /// Default settings:
131    /// - Read length: 100
132    /// - Base quality: 30
133    /// - Read group ID: "A"
134    /// - Sample name: "Sample"
135    /// - Reference sequences: chr1-chr22, chrX, chrY, chrM (200MB each)
136    #[must_use]
137    pub fn new() -> Self {
138        Self::with_defaults(DEFAULT_READ_LENGTH, DEFAULT_BASE_QUALITY)
139    }
140
141    /// Creates a new builder with specified read length and base quality.
142    ///
143    /// # Panics
144    ///
145    /// Panics if `DEFAULT_REFERENCE_LENGTH` is zero (impossible in practice).
146    #[must_use]
147    pub fn with_defaults(read_length: usize, base_quality: u8) -> Self {
148        let mut header = Header::builder();
149
150        // Add reference sequences (chr1-22, X, Y, M)
151        for i in 1..=22 {
152            let name = format!("chr{i}");
153            let map =
154                Map::<ReferenceSequence>::new(NonZeroUsize::new(DEFAULT_REFERENCE_LENGTH).unwrap());
155            header = header.add_reference_sequence(BString::from(name), map);
156        }
157        for chr in ["chrX", "chrY", "chrM"] {
158            let map =
159                Map::<ReferenceSequence>::new(NonZeroUsize::new(DEFAULT_REFERENCE_LENGTH).unwrap());
160            header = header.add_reference_sequence(BString::from(chr), map);
161        }
162
163        // Add read group
164        let rg_map = Map::<ReadGroup>::default();
165        header = header.add_read_group(BString::from(DEFAULT_READ_GROUP_ID), rg_map);
166
167        // Add program record
168        let pg_map = Map::<Program>::default();
169        header = header.add_program(BString::from("SamBuilder"), pg_map);
170
171        let header = header.build();
172
173        Self {
174            header,
175            records: Vec::new(),
176            read_length,
177            base_quality,
178            read_group_id: DEFAULT_READ_GROUP_ID.to_string(),
179            counter: AtomicU64::new(0),
180        }
181    }
182
183    /// Creates a builder for unmapped reads (no reference sequences in header).
184    #[must_use]
185    pub fn new_unmapped() -> Self {
186        let mut header = Header::builder();
187
188        // Add read group
189        let rg_map = Map::<ReadGroup>::default();
190        header = header.add_read_group(BString::from(DEFAULT_READ_GROUP_ID), rg_map);
191
192        // Add program record
193        let pg_map = Map::<Program>::default();
194        header = header.add_program(BString::from("SamBuilder"), pg_map);
195
196        let header = header.build();
197
198        Self {
199            header,
200            records: Vec::new(),
201            read_length: DEFAULT_READ_LENGTH,
202            base_quality: DEFAULT_BASE_QUALITY,
203            read_group_id: DEFAULT_READ_GROUP_ID.to_string(),
204            counter: AtomicU64::new(0),
205        }
206    }
207
208    /// Creates a builder with a single reference sequence.
209    ///
210    /// # Panics
211    ///
212    /// Panics if `ref_length` is zero.
213    #[must_use]
214    pub fn with_single_ref(ref_name: &str, ref_length: usize) -> Self {
215        let mut header = Header::builder();
216
217        let map = Map::<ReferenceSequence>::new(NonZeroUsize::new(ref_length).unwrap());
218        header = header.add_reference_sequence(BString::from(ref_name), map);
219
220        // Add read group
221        let rg_map = Map::<ReadGroup>::default();
222        header = header.add_read_group(BString::from(DEFAULT_READ_GROUP_ID), rg_map);
223
224        // Add program record
225        let pg_map = Map::<Program>::default();
226        header = header.add_program(BString::from("SamBuilder"), pg_map);
227
228        let header = header.build();
229
230        Self {
231            header,
232            records: Vec::new(),
233            read_length: DEFAULT_READ_LENGTH,
234            base_quality: DEFAULT_BASE_QUALITY,
235            read_group_id: DEFAULT_READ_GROUP_ID.to_string(),
236            counter: AtomicU64::new(0),
237        }
238    }
239
240    /// Returns the next sequential name.
241    fn next_name(&self) -> String {
242        format!("{:04}", self.counter.fetch_add(1, Ordering::SeqCst))
243    }
244
245    /// Generates random bases of the configured read length.
246    fn random_bases(&self) -> String {
247        use std::collections::hash_map::DefaultHasher;
248        use std::hash::{Hash, Hasher};
249
250        let bases = [b'A', b'C', b'G', b'T'];
251        let seed = self.counter.load(Ordering::SeqCst);
252
253        let mut result = Vec::with_capacity(self.read_length);
254        for i in 0..self.read_length {
255            let mut hasher = DefaultHasher::new();
256            (seed, i).hash(&mut hasher);
257            let idx = usize::try_from(hasher.finish() % 4).expect("modulo 4 always fits in usize");
258            result.push(bases[idx]);
259        }
260        String::from_utf8(result).unwrap()
261    }
262
263    /// Returns a reference to the accumulated records.
264    #[must_use]
265    pub fn records(&self) -> &[RecordBuf] {
266        &self.records
267    }
268
269    /// Returns the number of accumulated records.
270    #[must_use]
271    pub fn len(&self) -> usize {
272        self.records.len()
273    }
274
275    /// Returns true if no records have been added.
276    #[must_use]
277    pub fn is_empty(&self) -> bool {
278        self.records.is_empty()
279    }
280
281    /// Clears all accumulated records.
282    pub fn clear(&mut self) {
283        self.records.clear();
284    }
285
286    /// Returns an iterator over the accumulated records.
287    pub fn iter(&self) -> impl Iterator<Item = &RecordBuf> {
288        self.records.iter()
289    }
290
291    /// Pushes a raw record to the collection.
292    ///
293    /// This is useful when you need to add pre-built records to the collection.
294    pub fn push_record(&mut self, record: RecordBuf) {
295        self.records.push(record);
296    }
297
298    /// Starts building a paired-end read pair.
299    ///
300    /// Returns a `PairBuilder` that can be configured and then built.
301    /// The resulting records are added to this builder and also returned.
302    #[must_use]
303    pub fn add_pair(&mut self) -> PairBuilder<'_> {
304        PairBuilder::new(self)
305    }
306
307    /// Starts building a single-end (fragment) read.
308    ///
309    /// Returns a `FragBuilder` that can be configured and then built.
310    /// The resulting record is added to this builder and also returned.
311    #[must_use]
312    pub fn add_frag(&mut self) -> FragBuilder<'_> {
313        FragBuilder::new(self)
314    }
315
316    /// Writes accumulated records to a BAM file.
317    ///
318    /// # Errors
319    ///
320    /// Returns an error if the file cannot be created or written.
321    pub fn write_bam(&self, path: &Path) -> Result<()> {
322        let file = std::fs::File::create(path)?;
323        let mut writer = noodles::bam::io::Writer::new(file);
324        writer.write_header(&self.header)?;
325
326        for record in &self.records {
327            writer.write_alignment_record(&self.header, record)?;
328        }
329
330        Ok(())
331    }
332
333    /// Writes accumulated records to a SAM file.
334    ///
335    /// # Errors
336    ///
337    /// Returns an error if the file cannot be created or written.
338    pub fn write_sam(&self, path: &Path) -> Result<()> {
339        let file = std::fs::File::create(path)?;
340        let mut writer = noodles::sam::io::Writer::new(file);
341        writer.write_header(&self.header)?;
342
343        for record in &self.records {
344            writer.write_alignment_record(&self.header, record)?;
345        }
346
347        Ok(())
348    }
349
350    /// Writes to a temporary BAM file and returns the path.
351    ///
352    /// # Errors
353    ///
354    /// Returns an error if the temporary file cannot be created or written.
355    pub fn to_temp_file(&self) -> Result<NamedTempFile> {
356        let temp = NamedTempFile::new()?;
357        self.write_bam(temp.path())?;
358        Ok(temp)
359    }
360}
361
362/// Builder for a paired-end read pair.
363pub struct PairBuilder<'a> {
364    parent: &'a mut SamBuilder,
365    name: Option<String>,
366    bases1: Option<String>,
367    bases2: Option<String>,
368    quals1: Option<Vec<u8>>,
369    quals2: Option<Vec<u8>>,
370    contig: usize,
371    contig2: Option<usize>,
372    start1: Option<usize>,
373    start2: Option<usize>,
374    cigar1: Option<String>,
375    cigar2: Option<String>,
376    mapq1: u8,
377    mapq2: u8,
378    strand1: Strand,
379    strand2: Strand,
380    attrs: Vec<(String, BufValue)>,
381}
382
383impl<'a> PairBuilder<'a> {
384    fn new(parent: &'a mut SamBuilder) -> Self {
385        Self {
386            parent,
387            name: None,
388            bases1: None,
389            bases2: None,
390            quals1: None,
391            quals2: None,
392            contig: 0,
393            contig2: None,
394            start1: None,
395            start2: None,
396            cigar1: None,
397            cigar2: None,
398            mapq1: DEFAULT_MAPQ,
399            mapq2: DEFAULT_MAPQ,
400            strand1: Strand::Plus,
401            strand2: Strand::Minus,
402            attrs: Vec::new(),
403        }
404    }
405
406    /// Sets the read name (same for both reads in pair).
407    #[must_use]
408    pub fn name(mut self, name: &str) -> Self {
409        self.name = Some(name.to_string());
410        self
411    }
412
413    /// Sets the bases for R1.
414    #[must_use]
415    pub fn bases1(mut self, bases: &str) -> Self {
416        self.bases1 = Some(bases.to_string());
417        self
418    }
419
420    /// Sets the bases for R2.
421    #[must_use]
422    pub fn bases2(mut self, bases: &str) -> Self {
423        self.bases2 = Some(bases.to_string());
424        self
425    }
426
427    /// Sets the quality scores for R1.
428    #[must_use]
429    pub fn quals1(mut self, quals: &[u8]) -> Self {
430        self.quals1 = Some(quals.to_vec());
431        self
432    }
433
434    /// Sets the quality scores for R2.
435    #[must_use]
436    pub fn quals2(mut self, quals: &[u8]) -> Self {
437        self.quals2 = Some(quals.to_vec());
438        self
439    }
440
441    /// Sets the reference sequence index for both reads.
442    #[must_use]
443    pub fn contig(mut self, contig: usize) -> Self {
444        self.contig = contig;
445        self
446    }
447
448    /// Sets a different reference sequence index for R2.
449    #[must_use]
450    pub fn contig2(mut self, contig: usize) -> Self {
451        self.contig2 = Some(contig);
452        self
453    }
454
455    /// Sets the alignment start for R1 (1-based). If not set, R1 is unmapped.
456    #[must_use]
457    pub fn start1(mut self, start: usize) -> Self {
458        self.start1 = Some(start);
459        self
460    }
461
462    /// Sets the alignment start for R2 (1-based). If not set, R2 is unmapped.
463    #[must_use]
464    pub fn start2(mut self, start: usize) -> Self {
465        self.start2 = Some(start);
466        self
467    }
468
469    /// Sets the CIGAR string for R1.
470    #[must_use]
471    pub fn cigar1(mut self, cigar: &str) -> Self {
472        self.cigar1 = Some(cigar.to_string());
473        self
474    }
475
476    /// Sets the CIGAR string for R2.
477    #[must_use]
478    pub fn cigar2(mut self, cigar: &str) -> Self {
479        self.cigar2 = Some(cigar.to_string());
480        self
481    }
482
483    /// Sets the mapping quality for R1.
484    #[must_use]
485    pub fn mapq1(mut self, mapq: u8) -> Self {
486        self.mapq1 = mapq;
487        self
488    }
489
490    /// Sets the mapping quality for R2.
491    #[must_use]
492    pub fn mapq2(mut self, mapq: u8) -> Self {
493        self.mapq2 = mapq;
494        self
495    }
496
497    /// Sets the strand for R1.
498    #[must_use]
499    pub fn strand1(mut self, strand: Strand) -> Self {
500        self.strand1 = strand;
501        self
502    }
503
504    /// Sets the strand for R2.
505    #[must_use]
506    pub fn strand2(mut self, strand: Strand) -> Self {
507        self.strand2 = strand;
508        self
509    }
510
511    /// Marks R1 as unmapped.
512    #[must_use]
513    pub fn unmapped1(mut self) -> Self {
514        self.start1 = None;
515        self
516    }
517
518    /// Marks R2 as unmapped.
519    #[must_use]
520    pub fn unmapped2(mut self) -> Self {
521        self.start2 = None;
522        self
523    }
524
525    /// Adds a tag to both reads.
526    #[must_use]
527    pub fn attr<V: Into<BufValue>>(mut self, tag: &str, value: V) -> Self {
528        self.attrs.push((tag.to_string(), value.into()));
529        self
530    }
531
532    /// Builds the pair, adds to parent, and returns references to both records.
533    ///
534    /// # Panics
535    ///
536    /// Panics if the alignment start positions are invalid.
537    #[must_use]
538    #[expect(clippy::too_many_lines, reason = "test builder with many configuration steps")]
539    pub fn build(self) -> (RecordBuf, RecordBuf) {
540        let name = self.name.unwrap_or_else(|| self.parent.next_name());
541        let bases1 = self.bases1.unwrap_or_else(|| self.parent.random_bases());
542        let bases2 = self.bases2.unwrap_or_else(|| self.parent.random_bases());
543        let quals1 = self.quals1.unwrap_or_else(|| vec![self.parent.base_quality; bases1.len()]);
544        let quals2 = self.quals2.unwrap_or_else(|| vec![self.parent.base_quality; bases2.len()]);
545        let cigar1 = self.cigar1.unwrap_or_else(|| format!("{}M", bases1.len()));
546        let cigar2 = self.cigar2.unwrap_or_else(|| format!("{}M", bases2.len()));
547        let contig2 = self.contig2.unwrap_or(self.contig);
548
549        let unmapped1 = self.start1.is_none();
550        let unmapped2 = self.start2.is_none();
551
552        // Build R1
553        let mut first_read = RecordBuf::default();
554        *first_read.name_mut() = Some(BString::from(name.as_bytes()));
555        *first_read.sequence_mut() = Sequence::from(bases1.as_bytes().to_vec());
556        *first_read.quality_scores_mut() = QualityScores::from(quals1);
557
558        let mut flags1 = Flags::SEGMENTED | Flags::FIRST_SEGMENT;
559        if unmapped1 {
560            flags1 |= Flags::UNMAPPED;
561        }
562        if unmapped2 {
563            flags1 |= Flags::MATE_UNMAPPED;
564        }
565        if self.strand1.is_negative() {
566            flags1 |= Flags::REVERSE_COMPLEMENTED;
567        }
568        if self.strand2.is_negative() {
569            flags1 |= Flags::MATE_REVERSE_COMPLEMENTED;
570        }
571        *first_read.flags_mut() = flags1;
572
573        if !unmapped1 {
574            *first_read.reference_sequence_id_mut() = Some(self.contig);
575            *first_read.alignment_start_mut() =
576                Some(Position::try_from(self.start1.unwrap()).unwrap());
577            *first_read.cigar_mut() = parse_cigar(&cigar1).into_iter().collect();
578            *first_read.mapping_quality_mut() = Some(
579                noodles::sam::alignment::record::MappingQuality::try_from(self.mapq1).unwrap(),
580            );
581        }
582
583        if !unmapped2 {
584            *first_read.mate_reference_sequence_id_mut() = Some(contig2);
585            *first_read.mate_alignment_start_mut() =
586                Some(Position::try_from(self.start2.unwrap()).unwrap());
587        }
588
589        // Add read group tag
590        let rg_tag = Tag::new(b'R', b'G');
591        first_read.data_mut().insert(rg_tag, BufValue::from(self.parent.read_group_id.clone()));
592
593        // Add custom attributes
594        for (tag_str, value) in &self.attrs {
595            if tag_str.len() == 2 {
596                let tag = Tag::new(tag_str.as_bytes()[0], tag_str.as_bytes()[1]);
597                first_read.data_mut().insert(tag, value.clone());
598            }
599        }
600
601        // Build R2
602        let mut second_read = RecordBuf::default();
603        *second_read.name_mut() = Some(BString::from(name.as_bytes()));
604        *second_read.sequence_mut() = Sequence::from(bases2.as_bytes().to_vec());
605        *second_read.quality_scores_mut() = QualityScores::from(quals2);
606
607        let mut flags2 = Flags::SEGMENTED | Flags::LAST_SEGMENT;
608        if unmapped2 {
609            flags2 |= Flags::UNMAPPED;
610        }
611        if unmapped1 {
612            flags2 |= Flags::MATE_UNMAPPED;
613        }
614        if self.strand2.is_negative() {
615            flags2 |= Flags::REVERSE_COMPLEMENTED;
616        }
617        if self.strand1.is_negative() {
618            flags2 |= Flags::MATE_REVERSE_COMPLEMENTED;
619        }
620        *second_read.flags_mut() = flags2;
621
622        if !unmapped2 {
623            *second_read.reference_sequence_id_mut() = Some(contig2);
624            *second_read.alignment_start_mut() =
625                Some(Position::try_from(self.start2.unwrap()).unwrap());
626            *second_read.cigar_mut() = parse_cigar(&cigar2).into_iter().collect();
627            *second_read.mapping_quality_mut() = Some(
628                noodles::sam::alignment::record::MappingQuality::try_from(self.mapq2).unwrap(),
629            );
630        }
631
632        if !unmapped1 {
633            *second_read.mate_reference_sequence_id_mut() = Some(self.contig);
634            *second_read.mate_alignment_start_mut() =
635                Some(Position::try_from(self.start1.unwrap()).unwrap());
636        }
637
638        // Add read group tag
639        second_read.data_mut().insert(rg_tag, BufValue::from(self.parent.read_group_id.clone()));
640
641        // Add custom attributes
642        for (tag_str, value) in &self.attrs {
643            if tag_str.len() == 2 {
644                let tag = Tag::new(tag_str.as_bytes()[0], tag_str.as_bytes()[1]);
645                second_read.data_mut().insert(tag, value.clone());
646            }
647        }
648
649        // Calculate template length if both mapped to same contig
650        if !unmapped1 && !unmapped2 && self.contig == contig2 {
651            let pos1 = i32::try_from(self.start1.unwrap()).expect("start1 fits in i32");
652            let pos2 = i32::try_from(self.start2.unwrap()).expect("start2 fits in i32");
653            let end1 = pos1
654                + i32::try_from(cigar_ref_len(&cigar1)).expect("cigar1 ref len fits in i32")
655                - 1;
656            let end2 = pos2
657                + i32::try_from(cigar_ref_len(&cigar2)).expect("cigar2 ref len fits in i32")
658                - 1;
659
660            let (left, right) = if pos1 <= pos2 { (pos1, end2) } else { (pos2, end1) };
661            let tlen = right - left + 1;
662
663            *first_read.template_length_mut() = if pos1 <= pos2 { tlen } else { -tlen };
664            *second_read.template_length_mut() = if pos2 <= pos1 { tlen } else { -tlen };
665        }
666
667        self.parent.records.push(first_read.clone());
668        self.parent.records.push(second_read.clone());
669
670        (first_read, second_read)
671    }
672}
673
674/// Builder for a single-end (fragment) read.
675pub struct FragBuilder<'a> {
676    parent: &'a mut SamBuilder,
677    name: Option<String>,
678    bases: Option<String>,
679    quals: Option<Vec<u8>>,
680    contig: usize,
681    start: Option<usize>,
682    cigar: Option<String>,
683    mapq: u8,
684    strand: Strand,
685    attrs: Vec<(String, BufValue)>,
686}
687
688impl<'a> FragBuilder<'a> {
689    fn new(parent: &'a mut SamBuilder) -> Self {
690        Self {
691            parent,
692            name: None,
693            bases: None,
694            quals: None,
695            contig: 0,
696            start: None,
697            cigar: None,
698            mapq: DEFAULT_MAPQ,
699            strand: Strand::Plus,
700            attrs: Vec::new(),
701        }
702    }
703
704    /// Sets the read name.
705    #[must_use]
706    pub fn name(mut self, name: &str) -> Self {
707        self.name = Some(name.to_string());
708        self
709    }
710
711    /// Sets the bases.
712    #[must_use]
713    pub fn bases(mut self, bases: &str) -> Self {
714        self.bases = Some(bases.to_string());
715        self
716    }
717
718    /// Sets the quality scores.
719    #[must_use]
720    pub fn quals(mut self, quals: &[u8]) -> Self {
721        self.quals = Some(quals.to_vec());
722        self
723    }
724
725    /// Sets the reference sequence index.
726    #[must_use]
727    pub fn contig(mut self, contig: usize) -> Self {
728        self.contig = contig;
729        self
730    }
731
732    /// Sets the alignment start (1-based). If not set, read is unmapped.
733    #[must_use]
734    pub fn start(mut self, start: usize) -> Self {
735        self.start = Some(start);
736        self
737    }
738
739    /// Sets the CIGAR string.
740    #[must_use]
741    pub fn cigar(mut self, cigar: &str) -> Self {
742        self.cigar = Some(cigar.to_string());
743        self
744    }
745
746    /// Sets the mapping quality.
747    #[must_use]
748    pub fn mapq(mut self, mapq: u8) -> Self {
749        self.mapq = mapq;
750        self
751    }
752
753    /// Sets the strand.
754    #[must_use]
755    pub fn strand(mut self, strand: Strand) -> Self {
756        self.strand = strand;
757        self
758    }
759
760    /// Marks the read as unmapped.
761    #[must_use]
762    pub fn unmapped(mut self) -> Self {
763        self.start = None;
764        self
765    }
766
767    /// Adds a tag to the read.
768    #[must_use]
769    pub fn attr<V: Into<BufValue>>(mut self, tag: &str, value: V) -> Self {
770        self.attrs.push((tag.to_string(), value.into()));
771        self
772    }
773
774    /// Builds the record, adds to parent, and returns a clone of the record.
775    ///
776    /// # Panics
777    ///
778    /// Panics if the alignment start position is invalid.
779    #[must_use]
780    pub fn build(self) -> RecordBuf {
781        let name = self.name.unwrap_or_else(|| self.parent.next_name());
782        let bases = self.bases.unwrap_or_else(|| self.parent.random_bases());
783        let quals = self.quals.unwrap_or_else(|| vec![self.parent.base_quality; bases.len()]);
784        let cigar = self.cigar.unwrap_or_else(|| format!("{}M", bases.len()));
785
786        let unmapped = self.start.is_none();
787
788        let mut rec = RecordBuf::default();
789        *rec.name_mut() = Some(BString::from(name.as_bytes()));
790        *rec.sequence_mut() = Sequence::from(bases.as_bytes().to_vec());
791        *rec.quality_scores_mut() = QualityScores::from(quals);
792
793        let mut flags = Flags::empty();
794        if unmapped {
795            flags |= Flags::UNMAPPED;
796        }
797        if self.strand.is_negative() {
798            flags |= Flags::REVERSE_COMPLEMENTED;
799        }
800        *rec.flags_mut() = flags;
801
802        if !unmapped {
803            *rec.reference_sequence_id_mut() = Some(self.contig);
804            *rec.alignment_start_mut() = Some(Position::try_from(self.start.unwrap()).unwrap());
805            *rec.cigar_mut() = parse_cigar(&cigar).into_iter().collect();
806            *rec.mapping_quality_mut() =
807                Some(noodles::sam::alignment::record::MappingQuality::try_from(self.mapq).unwrap());
808        }
809
810        // Add read group tag
811        let rg_tag = Tag::new(b'R', b'G');
812        rec.data_mut().insert(rg_tag, BufValue::from(self.parent.read_group_id.clone()));
813
814        // Add custom attributes
815        for (tag_str, value) in &self.attrs {
816            if tag_str.len() == 2 {
817                let tag = Tag::new(tag_str.as_bytes()[0], tag_str.as_bytes()[1]);
818                rec.data_mut().insert(tag, value.clone());
819            }
820        }
821
822        self.parent.records.push(rec.clone());
823        rec
824    }
825}
826
827/// Parses a CIGAR string into a vector of operations.
828///
829/// # Panics
830///
831/// Panics if the CIGAR string contains invalid characters or formatting.
832#[must_use]
833pub fn parse_cigar(cigar_str: &str) -> Vec<Op> {
834    let mut ops = Vec::new();
835    let mut num_str = String::new();
836
837    for c in cigar_str.chars() {
838        if c.is_ascii_digit() {
839            num_str.push(c);
840        } else {
841            let len: usize = num_str.parse().expect("Invalid CIGAR: expected number");
842            let kind = match c {
843                'M' => Kind::Match,
844                'I' => Kind::Insertion,
845                'D' => Kind::Deletion,
846                'N' => Kind::Skip,
847                'S' => Kind::SoftClip,
848                'H' => Kind::HardClip,
849                'P' => Kind::Pad,
850                '=' => Kind::SequenceMatch,
851                'X' => Kind::SequenceMismatch,
852                _ => panic!("Unknown CIGAR operation: {c}"),
853            };
854            ops.push(Op::new(kind, len));
855            num_str.clear();
856        }
857    }
858
859    ops
860}
861
862// ============================================================================
863// Convenience Methods
864// ============================================================================
865
866/// Default reference length for test data.
867pub const REFERENCE_LENGTH: usize = 1_000;
868
869/// Program ID for mapped BAM files.
870pub const MAPPED_PG_ID: &str = "mapped";
871
872impl SamBuilder {
873    /// Creates a new builder for mapped BAM files with a single reference.
874    ///
875    /// Creates a builder with "chr1" as the reference name and `REFERENCE_LENGTH` as length.
876    #[must_use]
877    pub fn new_mapped() -> Self {
878        Self::with_single_ref("chr1", REFERENCE_LENGTH)
879    }
880
881    /// Adds a pair using positional arguments.
882    ///
883    /// A convenience method for adding pairs with common parameters.
884    ///
885    /// # Arguments
886    /// * `name` - Read name
887    /// * `start1` - Start position for R1 (None = unmapped)
888    /// * `start2` - Start position for R2 (None = unmapped)
889    /// * `strand1` - true = Plus, false = Minus for R1
890    /// * `strand2` - true = Plus, false = Minus for R2
891    /// * `attrs` - Attributes to add to both reads
892    pub fn add_pair_with_attrs(
893        &mut self,
894        name: &str,
895        start1: Option<usize>,
896        start2: Option<usize>,
897        strand1: bool,
898        strand2: bool,
899        attrs: &std::collections::HashMap<&str, BufValue>,
900    ) {
901        let mut pair = self
902            .add_pair()
903            .name(name)
904            .strand1(if strand1 { Strand::Plus } else { Strand::Minus })
905            .strand2(if strand2 { Strand::Plus } else { Strand::Minus });
906
907        if let Some(s) = start1 {
908            pair = pair.start1(s);
909        }
910        if let Some(s) = start2 {
911            pair = pair.start2(s);
912        }
913        for (tag, value) in attrs {
914            pair = pair.attr(tag, value.clone());
915        }
916        let _ = pair.build();
917    }
918
919    /// Adds a fragment using positional arguments.
920    ///
921    /// A convenience method for adding fragments with common parameters.
922    ///
923    /// # Arguments
924    /// * `name` - Read name
925    /// * `start` - Start position (None = unmapped)
926    /// * `strand` - true = Plus, false = Minus
927    /// * `attrs` - Attributes to add
928    pub fn add_frag_with_attrs(
929        &mut self,
930        name: &str,
931        start: Option<usize>,
932        strand: bool,
933        attrs: &std::collections::HashMap<&str, BufValue>,
934    ) {
935        let mut frag =
936            self.add_frag().name(name).strand(if strand { Strand::Plus } else { Strand::Minus });
937
938        if let Some(s) = start {
939            frag = frag.start(s);
940        }
941        for (tag, value) in attrs {
942            frag = frag.attr(tag, value.clone());
943        }
944        let _ = frag.build();
945    }
946
947    /// Writes accumulated records to a BAM file.
948    ///
949    /// Alias for `write_bam`.
950    ///
951    /// # Errors
952    ///
953    /// Returns an error if the BAM file cannot be created or written to.
954    pub fn write(&self, path: &std::path::Path) -> Result<()> {
955        self.write_bam(path)
956    }
957}
958
959/// Helper to create a reference dictionary file.
960///
961/// # Panics
962///
963/// Panics if `ref_length` is zero, since a reference sequence length must be non-zero.
964///
965/// # Errors
966///
967/// Returns an error if the dictionary file cannot be created or written to.
968pub fn create_ref_dict(
969    dir: &TempDir,
970    ref_name: &str,
971    ref_length: usize,
972) -> Result<std::path::PathBuf> {
973    let dict_path = dir.path().join("ref.dict");
974    let mut header = Header::builder();
975
976    let map = Map::<ReferenceSequence>::new(NonZeroUsize::new(ref_length).unwrap());
977    header = header.add_reference_sequence(BString::from(ref_name), map);
978
979    let header = header.build();
980
981    let file = std::fs::File::create(&dict_path)?;
982    let mut writer = noodles::sam::io::Writer::new(file);
983    writer.write_header(&header)?;
984
985    Ok(dict_path)
986}
987
988// ============================================================================
989// Standalone Record Builder
990// ============================================================================
991
992/// Builder for creating individual BAM/SAM records without a parent `SamBuilder`.
993///
994/// This builder simplifies test record creation by providing a chainable interface
995/// for setting all record fields. All fields have sensible defaults. Unlike
996/// `SamBuilder`, this creates records directly without accumulating them.
997///
998/// # Examples
999///
1000/// ```rust
1001/// use fgumi_sam::builder::{RecordBuilder, ConsensusTagsBuilder};
1002/// use noodles::sam::alignment::record::Flags;
1003///
1004/// // Create a simple unmapped read
1005/// let record = RecordBuilder::new()
1006///     .name("read1")
1007///     .sequence("ACGT")
1008///     .qualities(&[30, 30, 30, 30])
1009///     .build();
1010///
1011/// // Create a mapped paired-end R1
1012/// let r1 = RecordBuilder::new()
1013///     .name("read1")
1014///     .sequence("ACGTACGT")
1015///     .qualities(&[30; 8])
1016///     .paired(true)
1017///     .first_segment(true)
1018///     .reference_sequence_id(0)
1019///     .alignment_start(100)
1020///     .cigar("8M")
1021///     .mapping_quality(60)
1022///     .build();
1023///
1024/// // Create a record with tags
1025/// let record = RecordBuilder::new()
1026///     .name("read1")
1027///     .sequence("ACGT")
1028///     .tag("RG", "A")
1029///     .tag("MI", "AAAA-CCCC/A")
1030///     .build();
1031/// ```
1032#[derive(Debug, Default)]
1033pub struct RecordBuilder {
1034    name: Option<Vec<u8>>,
1035    flags: Flags,
1036    reference_sequence_id: Option<usize>,
1037    alignment_start: Option<usize>,
1038    mapping_quality: Option<u8>,
1039    cigar: Option<String>,
1040    sequence: Vec<u8>,
1041    qualities: Vec<u8>,
1042    tags: Vec<(Tag, BufValue)>,
1043    mate_reference_sequence_id: Option<usize>,
1044    mate_alignment_start: Option<usize>,
1045    template_length: Option<i32>,
1046}
1047
1048impl RecordBuilder {
1049    /// Creates a new builder with default values.
1050    #[must_use]
1051    pub fn new() -> Self {
1052        Self {
1053            name: None,
1054            flags: Flags::empty(),
1055            reference_sequence_id: None,
1056            alignment_start: None,
1057            mapping_quality: Some(60), // Default to high quality
1058            cigar: None,
1059            sequence: Vec::new(),
1060            qualities: Vec::new(),
1061            tags: Vec::new(),
1062            mate_reference_sequence_id: None,
1063            mate_alignment_start: None,
1064            template_length: None,
1065        }
1066    }
1067
1068    /// Creates a new builder pre-configured for a typical mapped read.
1069    ///
1070    /// Sets common defaults:
1071    /// - `reference_sequence_id`: 0
1072    /// - `mapping_quality`: 60
1073    /// - CIGAR will be auto-generated as `{len}M` if not explicitly set
1074    ///
1075    /// # Examples
1076    ///
1077    /// ```rust
1078    /// use fgumi_sam::builder::RecordBuilder;
1079    /// use noodles::sam::alignment::record::Cigar;
1080    ///
1081    /// let record = RecordBuilder::mapped_read()
1082    ///     .name("read1")
1083    ///     .sequence("ACGTACGT")
1084    ///     .alignment_start(100)
1085    ///     .build();
1086    ///
1087    /// assert_eq!(record.reference_sequence_id(), Some(0));
1088    /// assert!(!record.cigar().is_empty()); // Auto-generated as "8M"
1089    /// ```
1090    #[must_use]
1091    pub fn mapped_read() -> Self {
1092        Self { reference_sequence_id: Some(0), ..Self::new() }
1093    }
1094
1095    /// Sets the read name.
1096    #[must_use]
1097    pub fn name(mut self, name: &str) -> Self {
1098        self.name = Some(name.as_bytes().to_vec());
1099        self
1100    }
1101
1102    /// Sets the sequence.
1103    #[must_use]
1104    pub fn sequence(mut self, seq: &str) -> Self {
1105        self.sequence = seq.as_bytes().to_vec();
1106        // Auto-generate qualities if not set
1107        if self.qualities.is_empty() {
1108            self.qualities = vec![DEFAULT_BASE_QUALITY; seq.len()];
1109        }
1110        self
1111    }
1112
1113    /// Sets the quality scores (Phred+33 ASCII).
1114    #[must_use]
1115    pub fn qualities(mut self, quals: &[u8]) -> Self {
1116        self.qualities = quals.to_vec();
1117        self
1118    }
1119
1120    /// Sets all flags at once.
1121    #[must_use]
1122    pub fn flags(mut self, flags: Flags) -> Self {
1123        self.flags = flags;
1124        self
1125    }
1126
1127    /// Sets the paired flag.
1128    #[must_use]
1129    pub fn paired(mut self, paired: bool) -> Self {
1130        self.flags.set(Flags::SEGMENTED, paired);
1131        self
1132    }
1133
1134    /// Sets the first segment (R1) flag. Implies paired.
1135    #[must_use]
1136    pub fn first_segment(mut self, is_first: bool) -> Self {
1137        self.flags.set(Flags::SEGMENTED, true);
1138        self.flags.set(Flags::FIRST_SEGMENT, is_first);
1139        if !is_first {
1140            self.flags.set(Flags::LAST_SEGMENT, true);
1141        }
1142        self
1143    }
1144
1145    /// Sets the properly paired flag. Implies paired.
1146    #[must_use]
1147    pub fn properly_paired(mut self, properly_paired: bool) -> Self {
1148        if properly_paired {
1149            self.flags.set(Flags::SEGMENTED, true);
1150        }
1151        self.flags.set(Flags::PROPERLY_SEGMENTED, properly_paired);
1152        self
1153    }
1154
1155    /// Sets the unmapped flag.
1156    #[must_use]
1157    pub fn unmapped(mut self, unmapped: bool) -> Self {
1158        self.flags.set(Flags::UNMAPPED, unmapped);
1159        self
1160    }
1161
1162    /// Sets the reverse complement flag.
1163    #[must_use]
1164    pub fn reverse_complement(mut self, reverse: bool) -> Self {
1165        self.flags.set(Flags::REVERSE_COMPLEMENTED, reverse);
1166        self
1167    }
1168
1169    /// Sets the secondary alignment flag.
1170    #[must_use]
1171    pub fn secondary(mut self, secondary: bool) -> Self {
1172        self.flags.set(Flags::SECONDARY, secondary);
1173        self
1174    }
1175
1176    /// Sets the supplementary alignment flag.
1177    #[must_use]
1178    pub fn supplementary(mut self, supplementary: bool) -> Self {
1179        self.flags.set(Flags::SUPPLEMENTARY, supplementary);
1180        self
1181    }
1182
1183    /// Sets the reference sequence ID (0-based).
1184    #[must_use]
1185    pub fn reference_sequence_id(mut self, id: usize) -> Self {
1186        self.reference_sequence_id = Some(id);
1187        self
1188    }
1189
1190    /// Sets the alignment start position (1-based).
1191    #[must_use]
1192    pub fn alignment_start(mut self, pos: usize) -> Self {
1193        self.alignment_start = Some(pos);
1194        self
1195    }
1196
1197    /// Sets the mapping quality.
1198    #[must_use]
1199    pub fn mapping_quality(mut self, mapq: u8) -> Self {
1200        self.mapping_quality = Some(mapq);
1201        self
1202    }
1203
1204    /// Sets the CIGAR string.
1205    #[must_use]
1206    pub fn cigar(mut self, cigar: &str) -> Self {
1207        self.cigar = Some(cigar.to_string());
1208        self
1209    }
1210
1211    /// Sets the mate reference sequence ID (0-based).
1212    #[must_use]
1213    pub fn mate_reference_sequence_id(mut self, id: usize) -> Self {
1214        self.mate_reference_sequence_id = Some(id);
1215        self
1216    }
1217
1218    /// Sets the mate alignment start position (1-based).
1219    #[must_use]
1220    pub fn mate_alignment_start(mut self, pos: usize) -> Self {
1221        self.mate_alignment_start = Some(pos);
1222        self
1223    }
1224
1225    /// Sets the template length (insert size).
1226    #[must_use]
1227    pub fn template_length(mut self, tlen: i32) -> Self {
1228        self.template_length = Some(tlen);
1229        self
1230    }
1231
1232    /// Sets the mate reverse complement flag.
1233    #[must_use]
1234    pub fn mate_reverse_complement(mut self, reverse: bool) -> Self {
1235        self.flags.set(Flags::MATE_REVERSE_COMPLEMENTED, reverse);
1236        self
1237    }
1238
1239    /// Sets the mate unmapped flag.
1240    #[must_use]
1241    pub fn mate_unmapped(mut self, unmapped: bool) -> Self {
1242        self.flags.set(Flags::MATE_UNMAPPED, unmapped);
1243        self
1244    }
1245
1246    /// Adds a SAM tag.
1247    ///
1248    /// # Examples
1249    ///
1250    /// ```rust
1251    /// use fgumi_sam::builder::RecordBuilder;
1252    /// let record = RecordBuilder::new()
1253    ///     .name("read1")
1254    ///     .tag("RG", "A")
1255    ///     .tag("MI", "AAAA-CCCC/A")
1256    ///     .build();
1257    /// ```
1258    #[must_use]
1259    pub fn tag<V: Into<BufValue>>(mut self, tag: &str, value: V) -> Self {
1260        let tag_bytes = tag.as_bytes();
1261        if tag_bytes.len() == 2 {
1262            let tag = Tag::from([tag_bytes[0], tag_bytes[1]]);
1263            self.tags.push((tag, value.into()));
1264        }
1265        self
1266    }
1267
1268    /// Adds consensus tags using a `ConsensusTagsBuilder`.
1269    ///
1270    /// # Examples
1271    ///
1272    /// ```rust
1273    /// use fgumi_sam::builder::{RecordBuilder, ConsensusTagsBuilder};
1274    /// let record = RecordBuilder::new()
1275    ///     .sequence("ACGT")
1276    ///     .consensus_tags(
1277    ///         ConsensusTagsBuilder::new()
1278    ///             .depth_max(10)
1279    ///             .depth_min(5)
1280    ///             .error_rate(0.01)
1281    ///     )
1282    ///     .build();
1283    /// ```
1284    #[must_use]
1285    pub fn consensus_tags(mut self, builder: ConsensusTagsBuilder) -> Self {
1286        for (tag, value) in builder.build() {
1287            self.tags.push((tag, value));
1288        }
1289        self
1290    }
1291
1292    /// Builds the `RecordBuf`.
1293    ///
1294    /// # Panics
1295    ///
1296    /// Panics if CIGAR string parsing fails (should only happen with invalid CIGAR).
1297    #[must_use]
1298    pub fn build(self) -> RecordBuf {
1299        let mut record = RecordBuf::default();
1300
1301        // Set name
1302        if let Some(name) = self.name {
1303            *record.name_mut() = Some(name.into());
1304        }
1305
1306        // Set flags
1307        *record.flags_mut() = self.flags;
1308
1309        // Set reference and position
1310        if let Some(ref_id) = self.reference_sequence_id {
1311            *record.reference_sequence_id_mut() = Some(ref_id);
1312        }
1313        if let Some(pos) = self.alignment_start {
1314            *record.alignment_start_mut() =
1315                Some(Position::try_from(pos).expect("alignment_start must be >= 1"));
1316        }
1317
1318        // Set mate reference and position
1319        if let Some(mate_ref_id) = self.mate_reference_sequence_id {
1320            *record.mate_reference_sequence_id_mut() = Some(mate_ref_id);
1321        }
1322        if let Some(mate_pos) = self.mate_alignment_start {
1323            *record.mate_alignment_start_mut() =
1324                Some(Position::try_from(mate_pos).expect("mate_alignment_start must be >= 1"));
1325        }
1326        if let Some(tlen) = self.template_length {
1327            *record.template_length_mut() = tlen;
1328        }
1329
1330        // Set mapping quality
1331        if let Some(mapq) = self.mapping_quality {
1332            *record.mapping_quality_mut() = Some(
1333                noodles::sam::alignment::record::MappingQuality::try_from(mapq)
1334                    .expect("mapping_quality must be valid"),
1335            );
1336        }
1337
1338        // Handle CIGAR and sequence auto-generation:
1339        // - If both set: use as-is
1340        // - If only sequence: auto-generate CIGAR as NM
1341        // - If only CIGAR: auto-generate sequence of appropriate length
1342        let (cigar_str, sequence) = match (self.cigar, self.sequence.is_empty()) {
1343            (Some(cigar), true) => {
1344                // CIGAR provided, no sequence: generate sequence from CIGAR
1345                let seq_len = cigar_seq_len(&cigar);
1346                let generated_seq: String =
1347                    (0..seq_len).map(|i| "ACGT".chars().nth(i % 4).unwrap()).collect();
1348                (cigar, generated_seq.into_bytes())
1349            }
1350            (Some(cigar), false) => (cigar, self.sequence),
1351            (None, false) => (format!("{}M", self.sequence.len()), self.sequence),
1352            (None, true) => (String::new(), Vec::new()),
1353        };
1354
1355        if !cigar_str.is_empty() {
1356            let ops = parse_cigar(&cigar_str);
1357            *record.cigar_mut() = ops.into_iter().collect();
1358        }
1359
1360        // Set sequence and qualities (auto-generate qualities if needed)
1361        let qualities = if self.qualities.is_empty() && !sequence.is_empty() {
1362            vec![DEFAULT_BASE_QUALITY; sequence.len()]
1363        } else {
1364            self.qualities
1365        };
1366        *record.sequence_mut() = Sequence::from(sequence);
1367        *record.quality_scores_mut() = QualityScores::from(qualities);
1368
1369        // Add tags
1370        for (tag, value) in self.tags {
1371            record.data_mut().insert(tag, value);
1372        }
1373
1374        record
1375    }
1376}
1377
1378// ============================================================================
1379// Record Pair Builder (Standalone)
1380// ============================================================================
1381
1382/// Builder for creating paired-end read pairs without a parent `SamBuilder`.
1383///
1384/// This builder simplifies test pair creation by providing a chainable interface
1385/// that builds both R1 and R2 with proper mate information and template length.
1386///
1387/// # Examples
1388///
1389/// ```rust
1390/// use fgumi_sam::builder::RecordPairBuilder;
1391///
1392/// let (r1, r2) = RecordPairBuilder::new()
1393///     .name("read1")
1394///     .r1_sequence("ACGTACGT")
1395///     .r1_start(100)
1396///     .r2_sequence("TGCATGCA")
1397///     .r2_start(200)
1398///     .tag("MI", "1/A")
1399///     .build();
1400///
1401/// assert!(r1.flags().is_first_segment());
1402/// assert!(r2.flags().is_last_segment());
1403/// ```
1404#[derive(Debug, Default)]
1405#[expect(
1406    clippy::struct_excessive_bools,
1407    reason = "test builder struct; bools represent independent SAM flags (r1_reverse, r2_reverse, secondary, supplementary)"
1408)]
1409pub struct RecordPairBuilder {
1410    name: Option<String>,
1411    r1_sequence: Option<String>,
1412    r2_sequence: Option<String>,
1413    r1_qualities: Option<Vec<u8>>,
1414    r2_qualities: Option<Vec<u8>>,
1415    r1_start: Option<usize>,
1416    r2_start: Option<usize>,
1417    r1_cigar: Option<String>,
1418    r2_cigar: Option<String>,
1419    reference_sequence_id: usize,
1420    r2_reference_sequence_id: Option<usize>,
1421    mapping_quality: u8,
1422    r1_reverse: bool,
1423    r2_reverse: bool,
1424    secondary: bool,
1425    supplementary: bool,
1426    tags: Vec<(String, BufValue)>,
1427    r1_tags: Vec<(String, BufValue)>,
1428    r2_tags: Vec<(String, BufValue)>,
1429}
1430
1431impl RecordPairBuilder {
1432    /// Creates a new pair builder with defaults.
1433    ///
1434    /// Defaults:
1435    /// - `reference_sequence_id`: 0 (both reads)
1436    /// - `mapping_quality`: 60
1437    /// - R1: forward strand, R2: reverse strand (FR orientation)
1438    #[must_use]
1439    pub fn new() -> Self {
1440        Self { mapping_quality: 60, r2_reverse: true, ..Self::default() }
1441    }
1442
1443    /// Sets the read name (shared by both reads).
1444    #[must_use]
1445    pub fn name(mut self, name: &str) -> Self {
1446        self.name = Some(name.to_string());
1447        self
1448    }
1449
1450    /// Sets the R1 sequence.
1451    #[must_use]
1452    pub fn r1_sequence(mut self, seq: &str) -> Self {
1453        self.r1_sequence = Some(seq.to_string());
1454        self
1455    }
1456
1457    /// Sets the R2 sequence.
1458    #[must_use]
1459    pub fn r2_sequence(mut self, seq: &str) -> Self {
1460        self.r2_sequence = Some(seq.to_string());
1461        self
1462    }
1463
1464    /// Sets the R1 quality scores.
1465    #[must_use]
1466    pub fn r1_qualities(mut self, quals: &[u8]) -> Self {
1467        self.r1_qualities = Some(quals.to_vec());
1468        self
1469    }
1470
1471    /// Sets the R2 quality scores.
1472    #[must_use]
1473    pub fn r2_qualities(mut self, quals: &[u8]) -> Self {
1474        self.r2_qualities = Some(quals.to_vec());
1475        self
1476    }
1477
1478    /// Sets the R1 alignment start (1-based).
1479    #[must_use]
1480    pub fn r1_start(mut self, start: usize) -> Self {
1481        self.r1_start = Some(start);
1482        self
1483    }
1484
1485    /// Sets the R2 alignment start (1-based).
1486    #[must_use]
1487    pub fn r2_start(mut self, start: usize) -> Self {
1488        self.r2_start = Some(start);
1489        self
1490    }
1491
1492    /// Sets the R1 CIGAR string.
1493    #[must_use]
1494    pub fn r1_cigar(mut self, cigar: &str) -> Self {
1495        self.r1_cigar = Some(cigar.to_string());
1496        self
1497    }
1498
1499    /// Sets the R2 CIGAR string.
1500    #[must_use]
1501    pub fn r2_cigar(mut self, cigar: &str) -> Self {
1502        self.r2_cigar = Some(cigar.to_string());
1503        self
1504    }
1505
1506    /// Sets the reference sequence ID for both reads.
1507    #[must_use]
1508    pub fn reference_sequence_id(mut self, id: usize) -> Self {
1509        self.reference_sequence_id = id;
1510        self
1511    }
1512
1513    /// Sets a different reference sequence ID for R2.
1514    #[must_use]
1515    pub fn r2_reference_sequence_id(mut self, id: usize) -> Self {
1516        self.r2_reference_sequence_id = Some(id);
1517        self
1518    }
1519
1520    /// Sets the mapping quality for both reads.
1521    #[must_use]
1522    pub fn mapping_quality(mut self, mapq: u8) -> Self {
1523        self.mapping_quality = mapq;
1524        self
1525    }
1526
1527    /// Sets R1 as reverse complemented.
1528    #[must_use]
1529    pub fn r1_reverse(mut self, reverse: bool) -> Self {
1530        self.r1_reverse = reverse;
1531        self
1532    }
1533
1534    /// Sets R2 as reverse complemented.
1535    #[must_use]
1536    pub fn r2_reverse(mut self, reverse: bool) -> Self {
1537        self.r2_reverse = reverse;
1538        self
1539    }
1540
1541    /// Marks both reads as secondary alignments.
1542    #[must_use]
1543    pub fn secondary(mut self, secondary: bool) -> Self {
1544        self.secondary = secondary;
1545        self
1546    }
1547
1548    /// Marks both reads as supplementary alignments.
1549    #[must_use]
1550    pub fn supplementary(mut self, supplementary: bool) -> Self {
1551        self.supplementary = supplementary;
1552        self
1553    }
1554
1555    /// Adds a tag to both reads.
1556    #[must_use]
1557    pub fn tag<V: Into<BufValue>>(mut self, tag: &str, value: V) -> Self {
1558        self.tags.push((tag.to_string(), value.into()));
1559        self
1560    }
1561
1562    /// Adds a tag to R1 only.
1563    #[must_use]
1564    pub fn r1_tag<V: Into<BufValue>>(mut self, tag: &str, value: V) -> Self {
1565        self.r1_tags.push((tag.to_string(), value.into()));
1566        self
1567    }
1568
1569    /// Adds a tag to R2 only.
1570    #[must_use]
1571    pub fn r2_tag<V: Into<BufValue>>(mut self, tag: &str, value: V) -> Self {
1572        self.r2_tags.push((tag.to_string(), value.into()));
1573        self
1574    }
1575
1576    /// Builds the pair, returning (R1, R2).
1577    ///
1578    /// # Panics
1579    ///
1580    /// Panics if start positions or sequence lengths do not fit in `i32`, which is
1581    /// only possible with unreasonably large test values.
1582    #[must_use]
1583    pub fn build(self) -> (RecordBuf, RecordBuf) {
1584        let name = self.name.unwrap_or_else(|| "pair".to_string());
1585        let r1_seq = self.r1_sequence.unwrap_or_else(|| "ACGT".to_string());
1586        let r2_seq = self.r2_sequence.unwrap_or_else(|| "ACGT".to_string());
1587        let r1_quals =
1588            self.r1_qualities.unwrap_or_else(|| vec![DEFAULT_BASE_QUALITY; r1_seq.len()]);
1589        let r2_quals =
1590            self.r2_qualities.unwrap_or_else(|| vec![DEFAULT_BASE_QUALITY; r2_seq.len()]);
1591        let r1_cigar = self.r1_cigar.unwrap_or_else(|| format!("{}M", r1_seq.len()));
1592        let r2_cigar = self.r2_cigar.unwrap_or_else(|| format!("{}M", r2_seq.len()));
1593        let r2_ref_id = self.r2_reference_sequence_id.unwrap_or(self.reference_sequence_id);
1594
1595        let r1_mapped = self.r1_start.is_some();
1596        let r2_mapped = self.r2_start.is_some();
1597
1598        // Build R1
1599        let mut r1_builder = RecordBuilder::new()
1600            .name(&name)
1601            .sequence(&r1_seq)
1602            .qualities(&r1_quals)
1603            .paired(true)
1604            .first_segment(true)
1605            .reverse_complement(self.r1_reverse)
1606            .mate_reverse_complement(self.r2_reverse)
1607            .secondary(self.secondary)
1608            .supplementary(self.supplementary);
1609
1610        if r1_mapped {
1611            r1_builder = r1_builder
1612                .reference_sequence_id(self.reference_sequence_id)
1613                .alignment_start(self.r1_start.unwrap())
1614                .cigar(&r1_cigar)
1615                .mapping_quality(self.mapping_quality);
1616        } else {
1617            r1_builder = r1_builder.unmapped(true);
1618        }
1619
1620        if r2_mapped {
1621            r1_builder = r1_builder
1622                .mate_reference_sequence_id(r2_ref_id)
1623                .mate_alignment_start(self.r2_start.unwrap());
1624        } else {
1625            r1_builder = r1_builder.mate_unmapped(true);
1626        }
1627
1628        for (tag, value) in &self.tags {
1629            r1_builder = r1_builder.tag(tag, value.clone());
1630        }
1631        for (tag, value) in &self.r1_tags {
1632            r1_builder = r1_builder.tag(tag, value.clone());
1633        }
1634
1635        // Build R2
1636        let mut r2_builder = RecordBuilder::new()
1637            .name(&name)
1638            .sequence(&r2_seq)
1639            .qualities(&r2_quals)
1640            .paired(true)
1641            .first_segment(false) // Sets LAST_SEGMENT
1642            .reverse_complement(self.r2_reverse)
1643            .mate_reverse_complement(self.r1_reverse)
1644            .secondary(self.secondary)
1645            .supplementary(self.supplementary);
1646
1647        if r2_mapped {
1648            r2_builder = r2_builder
1649                .reference_sequence_id(r2_ref_id)
1650                .alignment_start(self.r2_start.unwrap())
1651                .cigar(&r2_cigar)
1652                .mapping_quality(self.mapping_quality);
1653        } else {
1654            r2_builder = r2_builder.unmapped(true);
1655        }
1656
1657        if r1_mapped {
1658            r2_builder = r2_builder
1659                .mate_reference_sequence_id(self.reference_sequence_id)
1660                .mate_alignment_start(self.r1_start.unwrap());
1661        } else {
1662            r2_builder = r2_builder.mate_unmapped(true);
1663        }
1664
1665        for (tag, value) in &self.tags {
1666            r2_builder = r2_builder.tag(tag, value.clone());
1667        }
1668        for (tag, value) in &self.r2_tags {
1669            r2_builder = r2_builder.tag(tag, value.clone());
1670        }
1671
1672        // Add MC (mate CIGAR) tags when both reads are mapped
1673        if r1_mapped && r2_mapped {
1674            r1_builder = r1_builder.tag("MC", r2_cigar.as_str());
1675            r2_builder = r2_builder.tag("MC", r1_cigar.as_str());
1676        }
1677
1678        // Calculate template length if both mapped to same reference
1679        let mut first_read = r1_builder.build();
1680        let mut second_read = r2_builder.build();
1681
1682        if r1_mapped && r2_mapped && self.reference_sequence_id == r2_ref_id {
1683            let pos1 = i32::try_from(self.r1_start.unwrap()).expect("r1_start fits in i32");
1684            let pos2 = i32::try_from(self.r2_start.unwrap()).expect("r2_start fits in i32");
1685            let end1 = pos1
1686                + i32::try_from(cigar_ref_len(&r1_cigar)).expect("r1 cigar ref len fits in i32")
1687                - 1;
1688            let end2 = pos2
1689                + i32::try_from(cigar_ref_len(&r2_cigar)).expect("r2 cigar ref len fits in i32")
1690                - 1;
1691
1692            let (left, right) = if pos1 <= pos2 { (pos1, end2) } else { (pos2, end1) };
1693            let tlen = right - left + 1;
1694
1695            *first_read.template_length_mut() = if pos1 <= pos2 { tlen } else { -tlen };
1696            *second_read.template_length_mut() = if pos2 <= pos1 { tlen } else { -tlen };
1697        }
1698
1699        (first_read, second_read)
1700    }
1701}
1702
1703// ============================================================================
1704// Consensus Tags Builder
1705// ============================================================================
1706
1707/// Builder for consensus-specific SAM tags.
1708///
1709/// This builder creates the standard consensus tags used by fgumi consensus callers:
1710/// - `cD`: Maximum depth (coverage)
1711/// - `cM`: Minimum depth (coverage)
1712/// - `cE`: Error rate
1713/// - `cd`: Per-base depth array
1714/// - `ce`: Per-base error count array
1715///
1716/// For duplex consensus reads, additional strand-specific tags are supported:
1717/// - `aD`: A-strand (AB) depth
1718/// - `bD`: B-strand (BA) depth
1719/// - `aM`: A-strand minimum depth
1720/// - `bM`: B-strand minimum depth
1721/// - `aE`: A-strand error rate
1722/// - `bE`: B-strand error rate
1723#[derive(Debug, Default)]
1724pub struct ConsensusTagsBuilder {
1725    depth_max: Option<i32>,
1726    depth_min: Option<i32>,
1727    error_rate: Option<f32>,
1728    error_count: Option<i32>,
1729    per_base_depths: Option<Vec<u16>>,
1730    per_base_errors: Option<Vec<u16>>,
1731    // Duplex-specific fields
1732    ab_depth: Option<i32>,
1733    ba_depth: Option<i32>,
1734    ab_min_depth: Option<i32>,
1735    ba_min_depth: Option<i32>,
1736    ab_errors: Option<f32>,
1737    ba_errors: Option<f32>,
1738    ab_error_count: Option<i32>,
1739    ba_error_count: Option<i32>,
1740}
1741
1742impl ConsensusTagsBuilder {
1743    /// Creates a new builder.
1744    #[must_use]
1745    pub fn new() -> Self {
1746        Self::default()
1747    }
1748
1749    /// Sets the maximum depth (cD tag).
1750    #[must_use]
1751    pub fn depth_max(mut self, depth: i32) -> Self {
1752        self.depth_max = Some(depth);
1753        self
1754    }
1755
1756    /// Sets the minimum depth (cM tag).
1757    #[must_use]
1758    pub fn depth_min(mut self, depth: i32) -> Self {
1759        self.depth_min = Some(depth);
1760        self
1761    }
1762
1763    /// Sets the error rate (cE tag).
1764    #[must_use]
1765    pub fn error_rate(mut self, rate: f32) -> Self {
1766        self.error_rate = Some(rate);
1767        self
1768    }
1769
1770    /// Sets per-base depths array.
1771    #[must_use]
1772    pub fn per_base_depths(mut self, depths: &[u16]) -> Self {
1773        self.per_base_depths = Some(depths.to_vec());
1774        self
1775    }
1776
1777    /// Sets per-base errors array.
1778    #[must_use]
1779    pub fn per_base_errors(mut self, errors: &[u16]) -> Self {
1780        self.per_base_errors = Some(errors.to_vec());
1781        self
1782    }
1783
1784    /// Sets the A-strand (AB) depth (aD tag) for duplex consensus.
1785    #[must_use]
1786    pub fn ab_depth(mut self, depth: i32) -> Self {
1787        self.ab_depth = Some(depth);
1788        self
1789    }
1790
1791    /// Sets the B-strand (BA) depth (bD tag) for duplex consensus.
1792    #[must_use]
1793    pub fn ba_depth(mut self, depth: i32) -> Self {
1794        self.ba_depth = Some(depth);
1795        self
1796    }
1797
1798    /// Sets the A-strand minimum depth (aM tag) for duplex consensus.
1799    #[must_use]
1800    pub fn ab_min_depth(mut self, depth: i32) -> Self {
1801        self.ab_min_depth = Some(depth);
1802        self
1803    }
1804
1805    /// Sets the B-strand minimum depth (bM tag) for duplex consensus.
1806    #[must_use]
1807    pub fn ba_min_depth(mut self, depth: i32) -> Self {
1808        self.ba_min_depth = Some(depth);
1809        self
1810    }
1811
1812    /// Sets the A-strand error rate (aE tag) for duplex consensus.
1813    #[must_use]
1814    pub fn ab_errors(mut self, rate: f32) -> Self {
1815        self.ab_errors = Some(rate);
1816        self
1817    }
1818
1819    /// Sets the B-strand error rate (bE tag) for duplex consensus.
1820    #[must_use]
1821    pub fn ba_errors(mut self, rate: f32) -> Self {
1822        self.ba_errors = Some(rate);
1823        self
1824    }
1825
1826    /// Sets the consensus error count (cE tag) as an integer.
1827    ///
1828    /// This is an alternative to `error_rate` for tools that store error counts rather than rates.
1829    #[must_use]
1830    pub fn tag_ce_as_int(mut self, count: i32) -> Self {
1831        self.error_count = Some(count);
1832        self
1833    }
1834
1835    /// Sets the A-strand error count (aE tag) as an integer.
1836    #[must_use]
1837    pub fn ab_errors_int(mut self, count: i32) -> Self {
1838        self.ab_error_count = Some(count);
1839        self
1840    }
1841
1842    /// Sets the B-strand error count (bE tag) as an integer.
1843    #[must_use]
1844    pub fn ba_errors_int(mut self, count: i32) -> Self {
1845        self.ba_error_count = Some(count);
1846        self
1847    }
1848
1849    /// Builds the tags as a vector of (Tag, Value) pairs.
1850    #[must_use]
1851    pub fn build(self) -> Vec<(Tag, BufValue)> {
1852        // Guard against conflicting tag sources
1853        debug_assert!(
1854            !(self.depth_max.is_some() && self.per_base_depths.is_some()),
1855            "depth_max (cD) and per_base_depths (cd) are mutually exclusive; use one or the other"
1856        );
1857        debug_assert!(
1858            !(self.error_rate.is_some() && self.per_base_errors.is_some()),
1859            "error_rate (cE) and per_base_errors (ce) are mutually exclusive; use one or the other"
1860        );
1861        debug_assert!(
1862            !(self.error_count.is_some() && self.per_base_errors.is_some()),
1863            "error_count (cE) and per_base_errors (ce) are mutually exclusive; use one or the other"
1864        );
1865
1866        let mut tags = Vec::new();
1867
1868        // Standard consensus tags
1869        if let Some(depth) = self.depth_max {
1870            tags.push((Tag::from([b'c', b'D']), to_smallest_signed_int(depth)));
1871        }
1872        if let Some(depth) = self.depth_min {
1873            tags.push((Tag::from([b'c', b'M']), to_smallest_signed_int(depth)));
1874        }
1875        if let Some(rate) = self.error_rate {
1876            tags.push((Tag::from([b'c', b'E']), BufValue::from(rate)));
1877        }
1878        if let Some(count) = self.error_count {
1879            tags.push((Tag::from([b'c', b'E']), to_smallest_signed_int(count)));
1880        }
1881        if let Some(depths) = self.per_base_depths {
1882            tags.push((Tag::from([b'c', b'd']), BufValue::from(depths)));
1883        }
1884        if let Some(errors) = self.per_base_errors {
1885            tags.push((Tag::from([b'c', b'e']), BufValue::from(errors)));
1886        }
1887
1888        // Duplex consensus tags
1889        if let Some(depth) = self.ab_depth {
1890            tags.push((Tag::from([b'a', b'D']), to_smallest_signed_int(depth)));
1891        }
1892        if let Some(depth) = self.ba_depth {
1893            tags.push((Tag::from([b'b', b'D']), to_smallest_signed_int(depth)));
1894        }
1895        if let Some(depth) = self.ab_min_depth {
1896            tags.push((Tag::from([b'a', b'M']), to_smallest_signed_int(depth)));
1897        }
1898        if let Some(depth) = self.ba_min_depth {
1899            tags.push((Tag::from([b'b', b'M']), to_smallest_signed_int(depth)));
1900        }
1901        if let Some(rate) = self.ab_errors {
1902            tags.push((Tag::from([b'a', b'E']), BufValue::from(rate)));
1903        }
1904        if let Some(rate) = self.ba_errors {
1905            tags.push((Tag::from([b'b', b'E']), BufValue::from(rate)));
1906        }
1907        if let Some(count) = self.ab_error_count {
1908            tags.push((Tag::from([b'a', b'E']), to_smallest_signed_int(count)));
1909        }
1910        if let Some(count) = self.ba_error_count {
1911            tags.push((Tag::from([b'b', b'E']), to_smallest_signed_int(count)));
1912        }
1913
1914        tags
1915    }
1916}
1917
1918#[cfg(test)]
1919#[expect(clippy::similar_names, reason = "test code uses domain-specific tag names like cd/cm/ce")]
1920mod tests {
1921    use super::*;
1922    use noodles::sam::alignment::record::Cigar;
1923
1924    #[test]
1925    fn test_new_builder() {
1926        let builder = SamBuilder::new();
1927        assert!(builder.is_empty());
1928        assert!(!builder.header.reference_sequences().is_empty());
1929    }
1930
1931    #[test]
1932    fn test_add_frag_unmapped() {
1933        let mut builder = SamBuilder::new_unmapped();
1934        let rec = builder.add_frag().name("read1").bases("ACGT").build();
1935
1936        assert_eq!(rec.name().map(std::convert::AsRef::as_ref), Some(b"read1".as_ref()));
1937        assert!(rec.flags().is_unmapped());
1938        assert_eq!(builder.len(), 1);
1939    }
1940
1941    #[test]
1942    fn test_add_frag_mapped() {
1943        let mut builder = SamBuilder::new();
1944        let rec = builder.add_frag().name("read1").bases("ACGT").contig(0).start(100).build();
1945
1946        assert!(!rec.flags().is_unmapped());
1947        assert_eq!(rec.reference_sequence_id(), Some(0));
1948        assert_eq!(rec.alignment_start(), Some(Position::try_from(100).unwrap()));
1949    }
1950
1951    #[test]
1952    fn test_add_pair_unmapped() {
1953        let mut builder = SamBuilder::new_unmapped();
1954        let (read_one, read_two) =
1955            builder.add_pair().name("pair1").bases1("AAAA").bases2("CCCC").build();
1956
1957        assert!(read_one.flags().is_first_segment());
1958        assert!(read_two.flags().is_last_segment());
1959        assert!(read_one.flags().is_unmapped());
1960        assert!(read_two.flags().is_unmapped());
1961        assert_eq!(builder.len(), 2);
1962    }
1963
1964    #[test]
1965    fn test_add_pair_mapped() {
1966        let mut builder = SamBuilder::new();
1967        let (read_one, read_two) = builder
1968            .add_pair()
1969            .name("pair1")
1970            .bases1("AAAA")
1971            .bases2("CCCC")
1972            .start1(100)
1973            .start2(200)
1974            .build();
1975
1976        assert!(!read_one.flags().is_unmapped());
1977        assert!(!read_two.flags().is_unmapped());
1978        assert_eq!(read_one.alignment_start(), Some(Position::try_from(100).unwrap()));
1979        assert_eq!(read_two.alignment_start(), Some(Position::try_from(200).unwrap()));
1980    }
1981
1982    #[test]
1983    fn test_add_pair_with_attrs() {
1984        let mut builder = SamBuilder::new_unmapped();
1985        let (read_one, read_two) =
1986            builder.add_pair().attr("MI", "test_umi").attr("RX", "ACGT").build();
1987
1988        let mi_tag = Tag::new(b'M', b'I');
1989        assert!(read_one.data().get(&mi_tag).is_some());
1990        assert!(read_two.data().get(&mi_tag).is_some());
1991    }
1992
1993    #[test]
1994    fn test_sequential_names() {
1995        let mut builder = SamBuilder::new_unmapped();
1996        let first = builder.add_frag().bases("ACGT").build();
1997        let second = builder.add_frag().bases("ACGT").build();
1998        let third = builder.add_frag().bases("ACGT").build();
1999
2000        assert_eq!(first.name().map(std::convert::AsRef::as_ref), Some(b"0000".as_ref()));
2001        assert_eq!(second.name().map(std::convert::AsRef::as_ref), Some(b"0001".as_ref()));
2002        assert_eq!(third.name().map(std::convert::AsRef::as_ref), Some(b"0002".as_ref()));
2003    }
2004
2005    #[test]
2006    fn test_write_to_temp_file() {
2007        let mut builder = SamBuilder::new();
2008        let _ = builder.add_frag().name("read1").bases("ACGT").contig(0).start(100).build();
2009
2010        let temp = builder.to_temp_file().unwrap();
2011        assert!(temp.path().exists());
2012    }
2013
2014    #[test]
2015    fn test_parse_cigar() {
2016        let ops = parse_cigar("10M2I5M");
2017        assert_eq!(ops.len(), 3);
2018        assert_eq!(ops[0].kind(), Kind::Match);
2019        assert_eq!(ops[0].len(), 10);
2020        assert_eq!(ops[1].kind(), Kind::Insertion);
2021        assert_eq!(ops[1].len(), 2);
2022        assert_eq!(ops[2].kind(), Kind::Match);
2023        assert_eq!(ops[2].len(), 5);
2024    }
2025
2026    #[test]
2027    fn test_strand_setting() {
2028        let mut builder = SamBuilder::new();
2029        let rec =
2030            builder.add_frag().bases("ACGT").contig(0).start(100).strand(Strand::Minus).build();
2031
2032        assert!(rec.flags().is_reverse_complemented());
2033    }
2034
2035    #[test]
2036    fn test_pair_strands() {
2037        let mut builder = SamBuilder::new();
2038        let (read_one, read_two) = builder
2039            .add_pair()
2040            .start1(100)
2041            .start2(200)
2042            .strand1(Strand::Minus)
2043            .strand2(Strand::Plus)
2044            .build();
2045
2046        assert!(read_one.flags().is_reverse_complemented());
2047        assert!(!read_two.flags().is_reverse_complemented());
2048        assert!(!read_one.flags().is_mate_reverse_complemented());
2049        assert!(read_two.flags().is_mate_reverse_complemented());
2050    }
2051
2052    #[test]
2053    fn test_template_length_calculation() {
2054        let mut builder = SamBuilder::new();
2055        let (read_one, read_two) =
2056            builder.add_pair().bases1("AAAA").bases2("CCCC").start1(100).start2(110).build();
2057
2058        // R1 at 100-103, R2 at 110-113
2059        // Template spans 100-113 = 14bp
2060        assert_eq!(read_one.template_length(), 14);
2061        assert_eq!(read_two.template_length(), -14);
2062    }
2063
2064    #[test]
2065    fn test_template_length_with_deletion() {
2066        let mut builder = SamBuilder::new();
2067        // R1: 4bp sequence with 2M2D2M = 6bp ref span (positions 100-105)
2068        // R2: 4bp sequence with 4M = 4bp ref span (positions 110-113)
2069        let (read_one, read_two) = builder
2070            .add_pair()
2071            .bases1("AAAA")
2072            .cigar1("2M2D2M")
2073            .bases2("CCCC")
2074            .start1(100)
2075            .start2(110)
2076            .build();
2077
2078        // Template spans 100-113 = 14bp
2079        assert_eq!(read_one.template_length(), 14);
2080        assert_eq!(read_two.template_length(), -14);
2081    }
2082
2083    #[test]
2084    fn test_template_length_with_insertion() {
2085        let mut builder = SamBuilder::new();
2086        // R1: 6bp sequence with 2M2I2M = 4bp ref span (positions 100-103)
2087        // R2: 4bp sequence at position 110 = 4bp ref span (positions 110-113)
2088        let (read_one, read_two) = builder
2089            .add_pair()
2090            .bases1("AAAAAA")
2091            .cigar1("2M2I2M")
2092            .bases2("CCCC")
2093            .start1(100)
2094            .start2(110)
2095            .build();
2096
2097        // Template spans 100-113 = 14bp (R1 ref span is 4, not 6)
2098        assert_eq!(read_one.template_length(), 14);
2099        assert_eq!(read_two.template_length(), -14);
2100    }
2101
2102    #[test]
2103    fn test_cigar_ref_len_simple() {
2104        assert_eq!(cigar_ref_len("10M"), 10);
2105        assert_eq!(cigar_ref_len("5M3I5M"), 10); // insertion doesn't consume ref
2106        assert_eq!(cigar_ref_len("5M3D5M"), 13); // deletion consumes ref
2107        assert_eq!(cigar_ref_len("2S5M3S"), 5); // soft clips don't consume ref
2108        assert_eq!(cigar_ref_len("5M2N5M"), 12); // skip consumes ref
2109        assert_eq!(cigar_ref_len("5=3X"), 8); // sequence match/mismatch consume ref
2110    }
2111
2112    // ========================================================================
2113    // RecordBuilder tests
2114    // ========================================================================
2115
2116    #[test]
2117    fn test_record_builder_basic() {
2118        let record = RecordBuilder::new()
2119            .name("read1")
2120            .sequence("ACGT")
2121            .qualities(&[30, 30, 30, 30])
2122            .build();
2123
2124        assert_eq!(record.name().map(std::convert::AsRef::as_ref), Some(b"read1".as_ref()));
2125        assert_eq!(record.sequence().as_ref(), b"ACGT");
2126        assert_eq!(record.quality_scores().as_ref(), &[30, 30, 30, 30]);
2127    }
2128
2129    #[test]
2130    fn test_record_builder_paired() {
2131        let record =
2132            RecordBuilder::new().name("read1").sequence("ACGT").first_segment(true).build();
2133
2134        assert!(record.flags().is_segmented());
2135        assert!(record.flags().is_first_segment());
2136    }
2137
2138    #[test]
2139    fn test_record_builder_mapped() {
2140        let record = RecordBuilder::new()
2141            .name("read1")
2142            .sequence("ACGT")
2143            .reference_sequence_id(0)
2144            .alignment_start(100)
2145            .cigar("4M")
2146            .mapping_quality(60)
2147            .build();
2148
2149        assert_eq!(record.reference_sequence_id(), Some(0));
2150        assert_eq!(record.alignment_start(), Some(Position::try_from(100).unwrap()));
2151    }
2152
2153    #[test]
2154    fn test_record_builder_with_tags() {
2155        let record = RecordBuilder::new()
2156            .name("read1")
2157            .sequence("ACGT")
2158            .tag("RG", "A")
2159            .tag("MI", "AAAA-CCCC/A")
2160            .build();
2161
2162        let rg_tag = Tag::from([b'R', b'G']);
2163        let mi_tag = Tag::from([b'M', b'I']);
2164
2165        assert!(record.data().get(&rg_tag).is_some());
2166        assert!(record.data().get(&mi_tag).is_some());
2167    }
2168
2169    #[test]
2170    fn test_record_builder_auto_qualities() {
2171        let record = RecordBuilder::new().name("read1").sequence("ACGTACGT").build();
2172
2173        // Should auto-generate Q30 qualities
2174        assert_eq!(record.quality_scores().as_ref(), &[30; 8]);
2175    }
2176
2177    #[test]
2178    fn test_record_builder_secondary_supplementary() {
2179        let secondary = RecordBuilder::new().sequence("ACGT").secondary(true).build();
2180        assert!(secondary.flags().is_secondary());
2181
2182        let supplementary = RecordBuilder::new().sequence("ACGT").supplementary(true).build();
2183        assert!(supplementary.flags().is_supplementary());
2184    }
2185
2186    // ========================================================================
2187    // ConsensusTagsBuilder tests
2188    // ========================================================================
2189
2190    #[test]
2191    fn test_consensus_tags_builder() {
2192        let record = RecordBuilder::new()
2193            .sequence("ACGT")
2194            .consensus_tags(ConsensusTagsBuilder::new().depth_max(10).depth_min(5).error_rate(0.01))
2195            .build();
2196
2197        let cd_tag = Tag::from([b'c', b'D']);
2198        let cm_tag = Tag::from([b'c', b'M']);
2199        let ce_tag = Tag::from([b'c', b'E']);
2200
2201        assert!(record.data().get(&cd_tag).is_some());
2202        assert!(record.data().get(&cm_tag).is_some());
2203        assert!(record.data().get(&ce_tag).is_some());
2204    }
2205
2206    #[test]
2207    fn test_consensus_tags_per_base() {
2208        let tags = ConsensusTagsBuilder::new()
2209            .per_base_depths(&[10, 20, 30])
2210            .per_base_errors(&[1, 2, 3])
2211            .build();
2212
2213        assert_eq!(tags.len(), 2);
2214        assert_eq!(tags[0].0, Tag::from([b'c', b'd']));
2215        assert_eq!(tags[1].0, Tag::from([b'c', b'e']));
2216    }
2217
2218    // ========================================================================
2219    // New builder tests (Phase 0)
2220    // ========================================================================
2221
2222    #[test]
2223    fn test_mapped_read_convenience() {
2224        let record = RecordBuilder::mapped_read()
2225            .name("read1")
2226            .sequence("ACGTACGT")
2227            .alignment_start(100)
2228            .build();
2229
2230        assert_eq!(record.reference_sequence_id(), Some(0));
2231        assert_eq!(record.mapping_quality().map(u8::from), Some(60));
2232        // CIGAR should be auto-generated as 8M
2233        assert!(!record.cigar().is_empty());
2234    }
2235
2236    #[test]
2237    fn test_record_builder_auto_cigar() {
2238        let record = RecordBuilder::new().sequence("ACGTACGT").build();
2239
2240        // CIGAR should be auto-generated as 8M when sequence is set
2241        let ops: Vec<_> = record.cigar().iter().map(|r| r.unwrap()).collect();
2242        assert_eq!(ops.len(), 1);
2243        assert_eq!(ops[0].kind(), Kind::Match);
2244        assert_eq!(ops[0].len(), 8);
2245    }
2246
2247    #[test]
2248    fn test_record_builder_explicit_cigar_overrides_auto() {
2249        let record = RecordBuilder::new().sequence("ACGTACGT").cigar("4M2I2M").build();
2250
2251        let ops: Vec<_> = record.cigar().iter().map(|r| r.unwrap()).collect();
2252        assert_eq!(ops.len(), 3);
2253        assert_eq!(ops[0].kind(), Kind::Match);
2254        assert_eq!(ops[0].len(), 4);
2255        assert_eq!(ops[1].kind(), Kind::Insertion);
2256        assert_eq!(ops[1].len(), 2);
2257    }
2258
2259    #[test]
2260    fn test_record_pair_builder_basic() {
2261        let (read_one, read_two) = RecordPairBuilder::new()
2262            .name("pair1")
2263            .r1_sequence("AAAA")
2264            .r2_sequence("TTTT")
2265            .r1_start(100)
2266            .r2_start(200)
2267            .build();
2268
2269        assert!(read_one.flags().is_first_segment());
2270        assert!(read_two.flags().is_last_segment());
2271        assert!(!read_one.flags().is_reverse_complemented());
2272        assert!(read_two.flags().is_reverse_complemented()); // FR default
2273    }
2274
2275    #[test]
2276    fn test_record_pair_builder_fr_orientation() {
2277        let (read_one, read_two) = RecordPairBuilder::new()
2278            .r1_sequence("ACGT")
2279            .r2_sequence("TGCA")
2280            .r1_start(100)
2281            .r2_start(200)
2282            .build();
2283
2284        // Default is FR: R1 forward, R2 reverse
2285        assert!(!read_one.flags().is_reverse_complemented());
2286        assert!(read_two.flags().is_reverse_complemented());
2287
2288        // Mate flags should also be correct
2289        assert!(read_one.flags().is_mate_reverse_complemented());
2290        assert!(!read_two.flags().is_mate_reverse_complemented());
2291    }
2292
2293    #[test]
2294    fn test_record_pair_builder_with_tags() {
2295        let (read_one, read_two) = RecordPairBuilder::new()
2296            .name("pair1")
2297            .r1_sequence("ACGT")
2298            .r2_sequence("TGCA")
2299            .r1_start(100)
2300            .r2_start(200)
2301            .tag("MI", "1/A")
2302            .tag("RX", "AAAA-TTTT")
2303            .build();
2304
2305        let mi_tag = Tag::from([b'M', b'I']);
2306        let rx_tag = Tag::from([b'R', b'X']);
2307        assert!(read_one.data().get(&mi_tag).is_some());
2308        assert!(read_two.data().get(&mi_tag).is_some());
2309        assert!(read_one.data().get(&rx_tag).is_some());
2310        assert!(read_two.data().get(&rx_tag).is_some());
2311    }
2312
2313    #[test]
2314    fn test_record_pair_builder_template_length() {
2315        let (read_one, read_two) = RecordPairBuilder::new()
2316            .r1_sequence("AAAA")
2317            .r2_sequence("CCCC")
2318            .r1_start(100)
2319            .r2_start(110)
2320            .build();
2321
2322        // R1 at 100-103, R2 at 110-113
2323        // Template spans 100-113 = 14bp
2324        assert_eq!(read_one.template_length(), 14);
2325        assert_eq!(read_two.template_length(), -14);
2326    }
2327
2328    #[test]
2329    fn test_record_pair_builder_unmapped() {
2330        let (read_one, read_two) = RecordPairBuilder::new()
2331            .name("unmapped_pair")
2332            .r1_sequence("ACGT")
2333            .r2_sequence("TGCA")
2334            .build();
2335
2336        // When no start positions are set, reads are unmapped
2337        assert!(read_one.flags().is_unmapped());
2338        assert!(read_two.flags().is_unmapped());
2339        assert!(read_one.flags().is_mate_unmapped());
2340        assert!(read_two.flags().is_mate_unmapped());
2341    }
2342
2343    #[test]
2344    fn test_record_pair_builder_per_read_tags() {
2345        let (read_one, read_two) = RecordPairBuilder::new()
2346            .name("pair1")
2347            .r1_sequence("ACGT")
2348            .r2_sequence("TGCA")
2349            .r1_start(100)
2350            .r2_start(200)
2351            .tag("MI", "UMI123") // Shared tag
2352            .r1_tag("MQ", 0i32) // R1-only: mate quality 0 (unmapped mate)
2353            .r2_tag("MQ", 60i32) // R2-only: mate quality 60
2354            .build();
2355
2356        let mi_tag = Tag::from([b'M', b'I']);
2357        let mq_tag = Tag::from([b'M', b'Q']);
2358
2359        // Both have MI tag
2360        assert!(read_one.data().get(&mi_tag).is_some());
2361        assert!(read_two.data().get(&mi_tag).is_some());
2362
2363        // Each has different MQ value
2364        let read_one_mq = read_one.data().get(&mq_tag).expect("R1 should have MQ tag");
2365        let read_two_mq = read_two.data().get(&mq_tag).expect("R2 should have MQ tag");
2366
2367        // Values can be stored as different int types, so check the value directly
2368        match read_one_mq {
2369            BufValue::Int8(v) => assert_eq!(*v, 0),
2370            BufValue::UInt8(v) => assert_eq!(*v, 0),
2371            BufValue::Int16(v) => assert_eq!(*v, 0),
2372            BufValue::UInt16(v) => assert_eq!(*v, 0),
2373            BufValue::Int32(v) => assert_eq!(*v, 0),
2374            BufValue::UInt32(v) => assert_eq!(*v, 0),
2375            _ => panic!("Expected integer type for MQ tag, got {read_one_mq:?}"),
2376        }
2377        match read_two_mq {
2378            BufValue::Int8(v) => assert_eq!(*v, 60),
2379            BufValue::UInt8(v) => assert_eq!(*v, 60),
2380            BufValue::Int16(v) => assert_eq!(*v, 60),
2381            BufValue::UInt16(v) => assert_eq!(*v, 60),
2382            BufValue::Int32(v) => assert_eq!(*v, 60),
2383            BufValue::UInt32(v) => assert_eq!(*v, 60),
2384            _ => panic!("Expected integer type for MQ tag, got {read_two_mq:?}"),
2385        }
2386    }
2387
2388    #[test]
2389    fn test_record_pair_builder_secondary() {
2390        let (read_one, read_two) = RecordPairBuilder::new()
2391            .name("sec_pair")
2392            .r1_sequence("ACGT")
2393            .r2_sequence("TGCA")
2394            .r1_start(100)
2395            .r2_start(200)
2396            .secondary(true)
2397            .build();
2398
2399        assert!(read_one.flags().is_secondary());
2400        assert!(read_two.flags().is_secondary());
2401        assert!(!read_one.flags().is_supplementary());
2402        assert!(!read_two.flags().is_supplementary());
2403    }
2404
2405    #[test]
2406    fn test_record_pair_builder_supplementary() {
2407        let (read_one, read_two) = RecordPairBuilder::new()
2408            .name("sup_pair")
2409            .r1_sequence("ACGT")
2410            .r2_sequence("TGCA")
2411            .r1_start(100)
2412            .r2_start(200)
2413            .supplementary(true)
2414            .build();
2415
2416        assert!(!read_one.flags().is_secondary());
2417        assert!(!read_two.flags().is_secondary());
2418        assert!(read_one.flags().is_supplementary());
2419        assert!(read_two.flags().is_supplementary());
2420    }
2421
2422    #[test]
2423    fn test_duplex_consensus_tags() {
2424        let record = RecordBuilder::new()
2425            .sequence("ACGT")
2426            .consensus_tags(
2427                ConsensusTagsBuilder::new()
2428                    .depth_max(10)
2429                    .depth_min(5)
2430                    .error_rate(0.01)
2431                    .ab_depth(6)
2432                    .ba_depth(4)
2433                    .ab_min_depth(3)
2434                    .ba_min_depth(2)
2435                    .ab_errors(0.005)
2436                    .ba_errors(0.008),
2437            )
2438            .build();
2439
2440        // Standard consensus tags
2441        let cd_tag = Tag::from([b'c', b'D']);
2442        let cm_tag = Tag::from([b'c', b'M']);
2443        let ce_tag = Tag::from([b'c', b'E']);
2444        assert!(record.data().get(&cd_tag).is_some());
2445        assert!(record.data().get(&cm_tag).is_some());
2446        assert!(record.data().get(&ce_tag).is_some());
2447
2448        // Duplex-specific tags
2449        let ab_depth_tag = Tag::from([b'a', b'D']);
2450        let ba_depth_tag = Tag::from([b'b', b'D']);
2451        let ab_min_tag = Tag::from([b'a', b'M']);
2452        let ba_min_tag = Tag::from([b'b', b'M']);
2453        let ab_error_tag = Tag::from([b'a', b'E']);
2454        let ba_error_tag = Tag::from([b'b', b'E']);
2455        assert!(record.data().get(&ab_depth_tag).is_some());
2456        assert!(record.data().get(&ba_depth_tag).is_some());
2457        assert!(record.data().get(&ab_min_tag).is_some());
2458        assert!(record.data().get(&ba_min_tag).is_some());
2459        assert!(record.data().get(&ab_error_tag).is_some());
2460        assert!(record.data().get(&ba_error_tag).is_some());
2461    }
2462}
2463
2464// ============================================================================
2465// Test Data Generators
2466// ============================================================================
2467
2468/// Calculates the sequence length required for a CIGAR string.
2469///
2470/// This counts the number of bases that consume the query sequence:
2471/// M (match), I (insertion), S (soft clip), = (sequence match), X (mismatch).
2472///
2473/// # Examples
2474///
2475/// ```rust
2476/// use fgumi_sam::builder::cigar_seq_len;
2477/// assert_eq!(cigar_seq_len("50M"), 50);
2478/// assert_eq!(cigar_seq_len("10S40M"), 50);
2479/// assert_eq!(cigar_seq_len("10H50M5S"), 55);
2480/// assert_eq!(cigar_seq_len("10M5I30M5D10M"), 55); // Deletions don't consume query
2481/// ```
2482#[must_use]
2483pub fn cigar_seq_len(cigar: &str) -> usize {
2484    let ops = parse_cigar(cigar);
2485    ops.iter()
2486        .filter(|op| {
2487            matches!(
2488                op.kind(),
2489                noodles::sam::alignment::record::cigar::op::Kind::Match
2490                    | noodles::sam::alignment::record::cigar::op::Kind::Insertion
2491                    | noodles::sam::alignment::record::cigar::op::Kind::SoftClip
2492                    | noodles::sam::alignment::record::cigar::op::Kind::SequenceMatch
2493                    | noodles::sam::alignment::record::cigar::op::Kind::SequenceMismatch
2494            )
2495        })
2496        .map(|op| op.len())
2497        .sum()
2498}
2499
2500/// Returns the reference (alignment) span consumed by a CIGAR string.
2501///
2502/// Counts bases consumed by reference-consuming operations: M, D, N, =, X.
2503#[must_use]
2504pub fn cigar_ref_len(cigar: &str) -> usize {
2505    let ops = parse_cigar(cigar);
2506    ops.iter()
2507        .filter(|op| {
2508            matches!(
2509                op.kind(),
2510                Kind::Match
2511                    | Kind::Deletion
2512                    | Kind::Skip
2513                    | Kind::SequenceMatch
2514                    | Kind::SequenceMismatch
2515            )
2516        })
2517        .map(|op| op.len())
2518        .sum()
2519}
2520
2521/// Creates a vector with `n` copies of an item.
2522///
2523/// This is useful for quickly generating repeated test data.
2524///
2525/// # Examples
2526///
2527/// ```rust
2528/// use fgumi_sam::builder::repeat_n;
2529/// let umis = repeat_n("AAAA".to_string(), 5);
2530/// assert_eq!(umis.len(), 5);
2531/// assert!(umis.iter().all(|u| u == "AAAA"));
2532/// ```
2533#[must_use]
2534pub fn repeat_n<T: Clone>(item: T, n: usize) -> Vec<T> {
2535    vec![item; n]
2536}
2537
2538/// Generates quality scores with a specified mean and length.
2539///
2540/// All quality scores will be set to the same value (uniform distribution).
2541///
2542/// # Examples
2543///
2544/// ```rust
2545/// use fgumi_sam::builder::uniform_qualities;
2546/// let quals = uniform_qualities(30, 100);
2547/// assert_eq!(quals.len(), 100);
2548/// assert!(quals.iter().all(|&q| q == 30));
2549/// ```
2550#[must_use]
2551pub fn uniform_qualities(quality: u8, length: usize) -> Vec<u8> {
2552    vec![quality; length]
2553}
2554
2555/// Generates quality scores that degrade linearly from start to end.
2556///
2557/// Useful for simulating read quality degradation along the length of a read.
2558///
2559/// # Panics
2560///
2561/// Panics if an interpolated quality value does not fit in `u8`, which cannot happen
2562/// when `start` and `end` are both valid `u8` quality scores.
2563///
2564/// # Examples
2565///
2566/// ```rust
2567/// use fgumi_sam::builder::degrading_qualities;
2568/// let quals = degrading_qualities(40, 20, 10);
2569/// assert_eq!(quals.len(), 10);
2570/// assert_eq!(quals[0], 40);
2571/// assert_eq!(quals[9], 20);
2572/// ```
2573#[must_use]
2574#[expect(
2575    clippy::cast_possible_truncation,
2576    reason = "f64 -> i64 cast is safe: values are interpolated between two u8 inputs (0-255)"
2577)]
2578#[expect(
2579    clippy::cast_precision_loss,
2580    reason = "usize -> f64 cast is acceptable: length values in tests are small enough that precision loss is negligible"
2581)]
2582pub fn degrading_qualities(start: u8, end: u8, length: usize) -> Vec<u8> {
2583    if length == 0 {
2584        return Vec::new();
2585    }
2586    if length == 1 {
2587        return vec![start];
2588    }
2589
2590    let start_f = f64::from(start);
2591    let end_f = f64::from(end);
2592    let step = (end_f - start_f) / (length - 1) as f64;
2593
2594    (0..length)
2595        .map(|i| {
2596            let quality = (start_f + step * i as f64).round();
2597            u8::try_from(quality as i64).expect("quality score fits in u8")
2598        })
2599        .collect()
2600}
2601
2602/// Creates a temporary FASTA file with the given sequences.
2603///
2604/// Each tuple in `sequences` contains (name, sequence).
2605///
2606/// # Examples
2607///
2608/// ```rust,no_run
2609/// use fgumi_sam::builder::create_test_fasta;
2610/// let fasta = create_test_fasta(&[
2611///     ("chr1", "ACGTACGTACGT"),
2612///     ("chr2", "GGGGCCCCAAAA"),
2613/// ]).unwrap();
2614/// assert!(fasta.path().exists());
2615/// ```
2616///
2617/// # Errors
2618///
2619/// Returns an error if the temporary file cannot be created or written to.
2620pub fn create_test_fasta(sequences: &[(&str, &str)]) -> std::io::Result<NamedTempFile> {
2621    use std::io::Write;
2622    let mut file = NamedTempFile::new()?;
2623    for (name, seq) in sequences {
2624        writeln!(file, ">{name}")?;
2625        writeln!(file, "{seq}")?;
2626    }
2627    file.flush()?;
2628    Ok(file)
2629}
2630
2631/// Creates a simple two-chromosome test FASTA file.
2632///
2633/// Creates chr1 with "ACGTACGTACGT" and chr2 with "GGGGCCCCAAAA".
2634/// This matches the default test FASTA used in multiple test modules.
2635///
2636/// # Examples
2637///
2638/// ```rust,no_run
2639/// use fgumi_sam::builder::create_default_test_fasta;
2640/// let fasta = create_default_test_fasta().unwrap();
2641/// assert!(fasta.path().exists());
2642/// ```
2643///
2644/// # Errors
2645///
2646/// Returns an error if the temporary file cannot be created or written to.
2647pub fn create_default_test_fasta() -> std::io::Result<NamedTempFile> {
2648    create_test_fasta(&[("chr1", "ACGTACGTACGT"), ("chr2", "GGGGCCCCAAAA")])
2649}
2650
2651#[cfg(test)]
2652mod generator_tests {
2653    use super::*;
2654
2655    #[test]
2656    fn test_repeat_n() {
2657        let items = repeat_n(42, 5);
2658        assert_eq!(items, vec![42, 42, 42, 42, 42]);
2659    }
2660
2661    #[test]
2662    fn test_repeat_n_strings() {
2663        let items = repeat_n("ACGT".to_string(), 3);
2664        assert_eq!(items.len(), 3);
2665        assert!(items.iter().all(|s| s == "ACGT"));
2666    }
2667
2668    #[test]
2669    fn test_uniform_qualities() {
2670        let quals = uniform_qualities(30, 10);
2671        assert_eq!(quals, vec![30; 10]);
2672    }
2673
2674    #[test]
2675    fn test_degrading_qualities() {
2676        let quals = degrading_qualities(40, 20, 5);
2677        assert_eq!(quals.len(), 5);
2678        assert_eq!(quals[0], 40);
2679        assert_eq!(quals[4], 20);
2680        // Check intermediate values are monotonically decreasing
2681        for i in 0..quals.len() - 1 {
2682            assert!(quals[i] >= quals[i + 1]);
2683        }
2684    }
2685
2686    #[test]
2687    fn test_degrading_qualities_edge_cases() {
2688        // Length 0
2689        assert_eq!(degrading_qualities(40, 20, 0), Vec::<u8>::new());
2690
2691        // Length 1
2692        assert_eq!(degrading_qualities(40, 20, 1), vec![40]);
2693
2694        // Same start and end
2695        let quals = degrading_qualities(30, 30, 5);
2696        assert!(quals.iter().all(|&q| q == 30));
2697    }
2698
2699    #[test]
2700    fn test_create_test_fasta() {
2701        let fasta = create_test_fasta(&[("chr1", "ACGT"), ("chr2", "GGGG")]).unwrap();
2702        assert!(fasta.path().exists());
2703
2704        // Read the file and verify contents
2705        let contents = std::fs::read_to_string(fasta.path()).unwrap();
2706        assert!(contents.contains(">chr1"));
2707        assert!(contents.contains("ACGT"));
2708        assert!(contents.contains(">chr2"));
2709        assert!(contents.contains("GGGG"));
2710    }
2711
2712    #[test]
2713    fn test_create_default_test_fasta() {
2714        let fasta = create_default_test_fasta().unwrap();
2715        assert!(fasta.path().exists());
2716
2717        let contents = std::fs::read_to_string(fasta.path()).unwrap();
2718        assert!(contents.contains(">chr1"));
2719        assert!(contents.contains("ACGTACGTACGT"));
2720        assert!(contents.contains(">chr2"));
2721        assert!(contents.contains("GGGGCCCCAAAA"));
2722    }
2723}