1use crate::to_smallest_signed_int;
33use anyhow::Result;
34use bstr::BString;
35use noodles::core::Position;
36use noodles::sam::Header;
37use noodles::sam::alignment::io::Write as AlignmentWrite;
38use noodles::sam::alignment::record::Flags;
39use noodles::sam::alignment::record::cigar::Op;
40use noodles::sam::alignment::record::cigar::op::Kind;
41use noodles::sam::alignment::record::data::field::Tag;
42use noodles::sam::alignment::record_buf::data::field::Value as BufValue;
43use noodles::sam::alignment::record_buf::{QualityScores, RecordBuf, Sequence};
44use noodles::sam::header::record::value::Map;
45use noodles::sam::header::record::value::map::{Program, ReadGroup, ReferenceSequence};
46use std::num::NonZeroUsize;
47use std::path::Path;
48use std::sync::atomic::{AtomicU64, Ordering};
49use tempfile::{NamedTempFile, TempDir};
50
51pub const DEFAULT_READ_LENGTH: usize = 100;
53pub const DEFAULT_BASE_QUALITY: u8 = 30;
54pub const DEFAULT_MAPQ: u8 = 60;
55pub const DEFAULT_SAMPLE_NAME: &str = "Sample";
56pub const DEFAULT_READ_GROUP_ID: &str = "A";
57pub const DEFAULT_REFERENCE_LENGTH: usize = 200_000_000;
58
59#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum Strand {
62 Plus,
63 Minus,
64}
65
66impl Strand {
67 #[must_use]
68 pub fn is_negative(&self) -> bool {
69 matches!(self, Strand::Minus)
70 }
71}
72
73#[derive(Debug)]
106pub struct SamBuilder {
107 pub header: Header,
109 records: Vec<RecordBuf>,
111 read_length: usize,
113 base_quality: u8,
115 read_group_id: String,
117 counter: AtomicU64,
119}
120
121impl Default for SamBuilder {
122 fn default() -> Self {
123 Self::new()
124 }
125}
126
127impl SamBuilder {
128 #[must_use]
137 pub fn new() -> Self {
138 Self::with_defaults(DEFAULT_READ_LENGTH, DEFAULT_BASE_QUALITY)
139 }
140
141 #[must_use]
147 pub fn with_defaults(read_length: usize, base_quality: u8) -> Self {
148 let mut header = Header::builder();
149
150 for i in 1..=22 {
152 let name = format!("chr{i}");
153 let map =
154 Map::<ReferenceSequence>::new(NonZeroUsize::new(DEFAULT_REFERENCE_LENGTH).unwrap());
155 header = header.add_reference_sequence(BString::from(name), map);
156 }
157 for chr in ["chrX", "chrY", "chrM"] {
158 let map =
159 Map::<ReferenceSequence>::new(NonZeroUsize::new(DEFAULT_REFERENCE_LENGTH).unwrap());
160 header = header.add_reference_sequence(BString::from(chr), map);
161 }
162
163 let rg_map = Map::<ReadGroup>::default();
165 header = header.add_read_group(BString::from(DEFAULT_READ_GROUP_ID), rg_map);
166
167 let pg_map = Map::<Program>::default();
169 header = header.add_program(BString::from("SamBuilder"), pg_map);
170
171 let header = header.build();
172
173 Self {
174 header,
175 records: Vec::new(),
176 read_length,
177 base_quality,
178 read_group_id: DEFAULT_READ_GROUP_ID.to_string(),
179 counter: AtomicU64::new(0),
180 }
181 }
182
183 #[must_use]
185 pub fn new_unmapped() -> Self {
186 let mut header = Header::builder();
187
188 let rg_map = Map::<ReadGroup>::default();
190 header = header.add_read_group(BString::from(DEFAULT_READ_GROUP_ID), rg_map);
191
192 let pg_map = Map::<Program>::default();
194 header = header.add_program(BString::from("SamBuilder"), pg_map);
195
196 let header = header.build();
197
198 Self {
199 header,
200 records: Vec::new(),
201 read_length: DEFAULT_READ_LENGTH,
202 base_quality: DEFAULT_BASE_QUALITY,
203 read_group_id: DEFAULT_READ_GROUP_ID.to_string(),
204 counter: AtomicU64::new(0),
205 }
206 }
207
208 #[must_use]
214 pub fn with_single_ref(ref_name: &str, ref_length: usize) -> Self {
215 let mut header = Header::builder();
216
217 let map = Map::<ReferenceSequence>::new(NonZeroUsize::new(ref_length).unwrap());
218 header = header.add_reference_sequence(BString::from(ref_name), map);
219
220 let rg_map = Map::<ReadGroup>::default();
222 header = header.add_read_group(BString::from(DEFAULT_READ_GROUP_ID), rg_map);
223
224 let pg_map = Map::<Program>::default();
226 header = header.add_program(BString::from("SamBuilder"), pg_map);
227
228 let header = header.build();
229
230 Self {
231 header,
232 records: Vec::new(),
233 read_length: DEFAULT_READ_LENGTH,
234 base_quality: DEFAULT_BASE_QUALITY,
235 read_group_id: DEFAULT_READ_GROUP_ID.to_string(),
236 counter: AtomicU64::new(0),
237 }
238 }
239
240 fn next_name(&self) -> String {
242 format!("{:04}", self.counter.fetch_add(1, Ordering::SeqCst))
243 }
244
245 fn random_bases(&self) -> String {
247 use std::collections::hash_map::DefaultHasher;
248 use std::hash::{Hash, Hasher};
249
250 let bases = [b'A', b'C', b'G', b'T'];
251 let seed = self.counter.load(Ordering::SeqCst);
252
253 let mut result = Vec::with_capacity(self.read_length);
254 for i in 0..self.read_length {
255 let mut hasher = DefaultHasher::new();
256 (seed, i).hash(&mut hasher);
257 let idx = usize::try_from(hasher.finish() % 4).expect("modulo 4 always fits in usize");
258 result.push(bases[idx]);
259 }
260 String::from_utf8(result).unwrap()
261 }
262
263 #[must_use]
265 pub fn records(&self) -> &[RecordBuf] {
266 &self.records
267 }
268
269 #[must_use]
271 pub fn len(&self) -> usize {
272 self.records.len()
273 }
274
275 #[must_use]
277 pub fn is_empty(&self) -> bool {
278 self.records.is_empty()
279 }
280
281 pub fn clear(&mut self) {
283 self.records.clear();
284 }
285
286 pub fn iter(&self) -> impl Iterator<Item = &RecordBuf> {
288 self.records.iter()
289 }
290
291 pub fn push_record(&mut self, record: RecordBuf) {
295 self.records.push(record);
296 }
297
298 #[must_use]
303 pub fn add_pair(&mut self) -> PairBuilder<'_> {
304 PairBuilder::new(self)
305 }
306
307 #[must_use]
312 pub fn add_frag(&mut self) -> FragBuilder<'_> {
313 FragBuilder::new(self)
314 }
315
316 pub fn write_bam(&self, path: &Path) -> Result<()> {
322 let file = std::fs::File::create(path)?;
323 let mut writer = noodles::bam::io::Writer::new(file);
324 writer.write_header(&self.header)?;
325
326 for record in &self.records {
327 writer.write_alignment_record(&self.header, record)?;
328 }
329
330 Ok(())
331 }
332
333 pub fn write_sam(&self, path: &Path) -> Result<()> {
339 let file = std::fs::File::create(path)?;
340 let mut writer = noodles::sam::io::Writer::new(file);
341 writer.write_header(&self.header)?;
342
343 for record in &self.records {
344 writer.write_alignment_record(&self.header, record)?;
345 }
346
347 Ok(())
348 }
349
350 pub fn to_temp_file(&self) -> Result<NamedTempFile> {
356 let temp = NamedTempFile::new()?;
357 self.write_bam(temp.path())?;
358 Ok(temp)
359 }
360}
361
362pub struct PairBuilder<'a> {
364 parent: &'a mut SamBuilder,
365 name: Option<String>,
366 bases1: Option<String>,
367 bases2: Option<String>,
368 quals1: Option<Vec<u8>>,
369 quals2: Option<Vec<u8>>,
370 contig: usize,
371 contig2: Option<usize>,
372 start1: Option<usize>,
373 start2: Option<usize>,
374 cigar1: Option<String>,
375 cigar2: Option<String>,
376 mapq1: u8,
377 mapq2: u8,
378 strand1: Strand,
379 strand2: Strand,
380 attrs: Vec<(String, BufValue)>,
381}
382
383impl<'a> PairBuilder<'a> {
384 fn new(parent: &'a mut SamBuilder) -> Self {
385 Self {
386 parent,
387 name: None,
388 bases1: None,
389 bases2: None,
390 quals1: None,
391 quals2: None,
392 contig: 0,
393 contig2: None,
394 start1: None,
395 start2: None,
396 cigar1: None,
397 cigar2: None,
398 mapq1: DEFAULT_MAPQ,
399 mapq2: DEFAULT_MAPQ,
400 strand1: Strand::Plus,
401 strand2: Strand::Minus,
402 attrs: Vec::new(),
403 }
404 }
405
406 #[must_use]
408 pub fn name(mut self, name: &str) -> Self {
409 self.name = Some(name.to_string());
410 self
411 }
412
413 #[must_use]
415 pub fn bases1(mut self, bases: &str) -> Self {
416 self.bases1 = Some(bases.to_string());
417 self
418 }
419
420 #[must_use]
422 pub fn bases2(mut self, bases: &str) -> Self {
423 self.bases2 = Some(bases.to_string());
424 self
425 }
426
427 #[must_use]
429 pub fn quals1(mut self, quals: &[u8]) -> Self {
430 self.quals1 = Some(quals.to_vec());
431 self
432 }
433
434 #[must_use]
436 pub fn quals2(mut self, quals: &[u8]) -> Self {
437 self.quals2 = Some(quals.to_vec());
438 self
439 }
440
441 #[must_use]
443 pub fn contig(mut self, contig: usize) -> Self {
444 self.contig = contig;
445 self
446 }
447
448 #[must_use]
450 pub fn contig2(mut self, contig: usize) -> Self {
451 self.contig2 = Some(contig);
452 self
453 }
454
455 #[must_use]
457 pub fn start1(mut self, start: usize) -> Self {
458 self.start1 = Some(start);
459 self
460 }
461
462 #[must_use]
464 pub fn start2(mut self, start: usize) -> Self {
465 self.start2 = Some(start);
466 self
467 }
468
469 #[must_use]
471 pub fn cigar1(mut self, cigar: &str) -> Self {
472 self.cigar1 = Some(cigar.to_string());
473 self
474 }
475
476 #[must_use]
478 pub fn cigar2(mut self, cigar: &str) -> Self {
479 self.cigar2 = Some(cigar.to_string());
480 self
481 }
482
483 #[must_use]
485 pub fn mapq1(mut self, mapq: u8) -> Self {
486 self.mapq1 = mapq;
487 self
488 }
489
490 #[must_use]
492 pub fn mapq2(mut self, mapq: u8) -> Self {
493 self.mapq2 = mapq;
494 self
495 }
496
497 #[must_use]
499 pub fn strand1(mut self, strand: Strand) -> Self {
500 self.strand1 = strand;
501 self
502 }
503
504 #[must_use]
506 pub fn strand2(mut self, strand: Strand) -> Self {
507 self.strand2 = strand;
508 self
509 }
510
511 #[must_use]
513 pub fn unmapped1(mut self) -> Self {
514 self.start1 = None;
515 self
516 }
517
518 #[must_use]
520 pub fn unmapped2(mut self) -> Self {
521 self.start2 = None;
522 self
523 }
524
525 #[must_use]
527 pub fn attr<V: Into<BufValue>>(mut self, tag: &str, value: V) -> Self {
528 self.attrs.push((tag.to_string(), value.into()));
529 self
530 }
531
532 #[must_use]
538 #[expect(clippy::too_many_lines, reason = "test builder with many configuration steps")]
539 pub fn build(self) -> (RecordBuf, RecordBuf) {
540 let name = self.name.unwrap_or_else(|| self.parent.next_name());
541 let bases1 = self.bases1.unwrap_or_else(|| self.parent.random_bases());
542 let bases2 = self.bases2.unwrap_or_else(|| self.parent.random_bases());
543 let quals1 = self.quals1.unwrap_or_else(|| vec![self.parent.base_quality; bases1.len()]);
544 let quals2 = self.quals2.unwrap_or_else(|| vec![self.parent.base_quality; bases2.len()]);
545 let cigar1 = self.cigar1.unwrap_or_else(|| format!("{}M", bases1.len()));
546 let cigar2 = self.cigar2.unwrap_or_else(|| format!("{}M", bases2.len()));
547 let contig2 = self.contig2.unwrap_or(self.contig);
548
549 let unmapped1 = self.start1.is_none();
550 let unmapped2 = self.start2.is_none();
551
552 let mut first_read = RecordBuf::default();
554 *first_read.name_mut() = Some(BString::from(name.as_bytes()));
555 *first_read.sequence_mut() = Sequence::from(bases1.as_bytes().to_vec());
556 *first_read.quality_scores_mut() = QualityScores::from(quals1);
557
558 let mut flags1 = Flags::SEGMENTED | Flags::FIRST_SEGMENT;
559 if unmapped1 {
560 flags1 |= Flags::UNMAPPED;
561 }
562 if unmapped2 {
563 flags1 |= Flags::MATE_UNMAPPED;
564 }
565 if self.strand1.is_negative() {
566 flags1 |= Flags::REVERSE_COMPLEMENTED;
567 }
568 if self.strand2.is_negative() {
569 flags1 |= Flags::MATE_REVERSE_COMPLEMENTED;
570 }
571 *first_read.flags_mut() = flags1;
572
573 if !unmapped1 {
574 *first_read.reference_sequence_id_mut() = Some(self.contig);
575 *first_read.alignment_start_mut() =
576 Some(Position::try_from(self.start1.unwrap()).unwrap());
577 *first_read.cigar_mut() = parse_cigar(&cigar1).into_iter().collect();
578 *first_read.mapping_quality_mut() = Some(
579 noodles::sam::alignment::record::MappingQuality::try_from(self.mapq1).unwrap(),
580 );
581 }
582
583 if !unmapped2 {
584 *first_read.mate_reference_sequence_id_mut() = Some(contig2);
585 *first_read.mate_alignment_start_mut() =
586 Some(Position::try_from(self.start2.unwrap()).unwrap());
587 }
588
589 let rg_tag = Tag::new(b'R', b'G');
591 first_read.data_mut().insert(rg_tag, BufValue::from(self.parent.read_group_id.clone()));
592
593 for (tag_str, value) in &self.attrs {
595 if tag_str.len() == 2 {
596 let tag = Tag::new(tag_str.as_bytes()[0], tag_str.as_bytes()[1]);
597 first_read.data_mut().insert(tag, value.clone());
598 }
599 }
600
601 let mut second_read = RecordBuf::default();
603 *second_read.name_mut() = Some(BString::from(name.as_bytes()));
604 *second_read.sequence_mut() = Sequence::from(bases2.as_bytes().to_vec());
605 *second_read.quality_scores_mut() = QualityScores::from(quals2);
606
607 let mut flags2 = Flags::SEGMENTED | Flags::LAST_SEGMENT;
608 if unmapped2 {
609 flags2 |= Flags::UNMAPPED;
610 }
611 if unmapped1 {
612 flags2 |= Flags::MATE_UNMAPPED;
613 }
614 if self.strand2.is_negative() {
615 flags2 |= Flags::REVERSE_COMPLEMENTED;
616 }
617 if self.strand1.is_negative() {
618 flags2 |= Flags::MATE_REVERSE_COMPLEMENTED;
619 }
620 *second_read.flags_mut() = flags2;
621
622 if !unmapped2 {
623 *second_read.reference_sequence_id_mut() = Some(contig2);
624 *second_read.alignment_start_mut() =
625 Some(Position::try_from(self.start2.unwrap()).unwrap());
626 *second_read.cigar_mut() = parse_cigar(&cigar2).into_iter().collect();
627 *second_read.mapping_quality_mut() = Some(
628 noodles::sam::alignment::record::MappingQuality::try_from(self.mapq2).unwrap(),
629 );
630 }
631
632 if !unmapped1 {
633 *second_read.mate_reference_sequence_id_mut() = Some(self.contig);
634 *second_read.mate_alignment_start_mut() =
635 Some(Position::try_from(self.start1.unwrap()).unwrap());
636 }
637
638 second_read.data_mut().insert(rg_tag, BufValue::from(self.parent.read_group_id.clone()));
640
641 for (tag_str, value) in &self.attrs {
643 if tag_str.len() == 2 {
644 let tag = Tag::new(tag_str.as_bytes()[0], tag_str.as_bytes()[1]);
645 second_read.data_mut().insert(tag, value.clone());
646 }
647 }
648
649 if !unmapped1 && !unmapped2 && self.contig == contig2 {
651 let pos1 = i32::try_from(self.start1.unwrap()).expect("start1 fits in i32");
652 let pos2 = i32::try_from(self.start2.unwrap()).expect("start2 fits in i32");
653 let end1 = pos1
654 + i32::try_from(cigar_ref_len(&cigar1)).expect("cigar1 ref len fits in i32")
655 - 1;
656 let end2 = pos2
657 + i32::try_from(cigar_ref_len(&cigar2)).expect("cigar2 ref len fits in i32")
658 - 1;
659
660 let (left, right) = if pos1 <= pos2 { (pos1, end2) } else { (pos2, end1) };
661 let tlen = right - left + 1;
662
663 *first_read.template_length_mut() = if pos1 <= pos2 { tlen } else { -tlen };
664 *second_read.template_length_mut() = if pos2 <= pos1 { tlen } else { -tlen };
665 }
666
667 self.parent.records.push(first_read.clone());
668 self.parent.records.push(second_read.clone());
669
670 (first_read, second_read)
671 }
672}
673
674pub struct FragBuilder<'a> {
676 parent: &'a mut SamBuilder,
677 name: Option<String>,
678 bases: Option<String>,
679 quals: Option<Vec<u8>>,
680 contig: usize,
681 start: Option<usize>,
682 cigar: Option<String>,
683 mapq: u8,
684 strand: Strand,
685 attrs: Vec<(String, BufValue)>,
686}
687
688impl<'a> FragBuilder<'a> {
689 fn new(parent: &'a mut SamBuilder) -> Self {
690 Self {
691 parent,
692 name: None,
693 bases: None,
694 quals: None,
695 contig: 0,
696 start: None,
697 cigar: None,
698 mapq: DEFAULT_MAPQ,
699 strand: Strand::Plus,
700 attrs: Vec::new(),
701 }
702 }
703
704 #[must_use]
706 pub fn name(mut self, name: &str) -> Self {
707 self.name = Some(name.to_string());
708 self
709 }
710
711 #[must_use]
713 pub fn bases(mut self, bases: &str) -> Self {
714 self.bases = Some(bases.to_string());
715 self
716 }
717
718 #[must_use]
720 pub fn quals(mut self, quals: &[u8]) -> Self {
721 self.quals = Some(quals.to_vec());
722 self
723 }
724
725 #[must_use]
727 pub fn contig(mut self, contig: usize) -> Self {
728 self.contig = contig;
729 self
730 }
731
732 #[must_use]
734 pub fn start(mut self, start: usize) -> Self {
735 self.start = Some(start);
736 self
737 }
738
739 #[must_use]
741 pub fn cigar(mut self, cigar: &str) -> Self {
742 self.cigar = Some(cigar.to_string());
743 self
744 }
745
746 #[must_use]
748 pub fn mapq(mut self, mapq: u8) -> Self {
749 self.mapq = mapq;
750 self
751 }
752
753 #[must_use]
755 pub fn strand(mut self, strand: Strand) -> Self {
756 self.strand = strand;
757 self
758 }
759
760 #[must_use]
762 pub fn unmapped(mut self) -> Self {
763 self.start = None;
764 self
765 }
766
767 #[must_use]
769 pub fn attr<V: Into<BufValue>>(mut self, tag: &str, value: V) -> Self {
770 self.attrs.push((tag.to_string(), value.into()));
771 self
772 }
773
774 #[must_use]
780 pub fn build(self) -> RecordBuf {
781 let name = self.name.unwrap_or_else(|| self.parent.next_name());
782 let bases = self.bases.unwrap_or_else(|| self.parent.random_bases());
783 let quals = self.quals.unwrap_or_else(|| vec![self.parent.base_quality; bases.len()]);
784 let cigar = self.cigar.unwrap_or_else(|| format!("{}M", bases.len()));
785
786 let unmapped = self.start.is_none();
787
788 let mut rec = RecordBuf::default();
789 *rec.name_mut() = Some(BString::from(name.as_bytes()));
790 *rec.sequence_mut() = Sequence::from(bases.as_bytes().to_vec());
791 *rec.quality_scores_mut() = QualityScores::from(quals);
792
793 let mut flags = Flags::empty();
794 if unmapped {
795 flags |= Flags::UNMAPPED;
796 }
797 if self.strand.is_negative() {
798 flags |= Flags::REVERSE_COMPLEMENTED;
799 }
800 *rec.flags_mut() = flags;
801
802 if !unmapped {
803 *rec.reference_sequence_id_mut() = Some(self.contig);
804 *rec.alignment_start_mut() = Some(Position::try_from(self.start.unwrap()).unwrap());
805 *rec.cigar_mut() = parse_cigar(&cigar).into_iter().collect();
806 *rec.mapping_quality_mut() =
807 Some(noodles::sam::alignment::record::MappingQuality::try_from(self.mapq).unwrap());
808 }
809
810 let rg_tag = Tag::new(b'R', b'G');
812 rec.data_mut().insert(rg_tag, BufValue::from(self.parent.read_group_id.clone()));
813
814 for (tag_str, value) in &self.attrs {
816 if tag_str.len() == 2 {
817 let tag = Tag::new(tag_str.as_bytes()[0], tag_str.as_bytes()[1]);
818 rec.data_mut().insert(tag, value.clone());
819 }
820 }
821
822 self.parent.records.push(rec.clone());
823 rec
824 }
825}
826
827#[must_use]
833pub fn parse_cigar(cigar_str: &str) -> Vec<Op> {
834 let mut ops = Vec::new();
835 let mut num_str = String::new();
836
837 for c in cigar_str.chars() {
838 if c.is_ascii_digit() {
839 num_str.push(c);
840 } else {
841 let len: usize = num_str.parse().expect("Invalid CIGAR: expected number");
842 let kind = match c {
843 'M' => Kind::Match,
844 'I' => Kind::Insertion,
845 'D' => Kind::Deletion,
846 'N' => Kind::Skip,
847 'S' => Kind::SoftClip,
848 'H' => Kind::HardClip,
849 'P' => Kind::Pad,
850 '=' => Kind::SequenceMatch,
851 'X' => Kind::SequenceMismatch,
852 _ => panic!("Unknown CIGAR operation: {c}"),
853 };
854 ops.push(Op::new(kind, len));
855 num_str.clear();
856 }
857 }
858
859 ops
860}
861
862pub const REFERENCE_LENGTH: usize = 1_000;
868
869pub const MAPPED_PG_ID: &str = "mapped";
871
872impl SamBuilder {
873 #[must_use]
877 pub fn new_mapped() -> Self {
878 Self::with_single_ref("chr1", REFERENCE_LENGTH)
879 }
880
881 pub fn add_pair_with_attrs(
893 &mut self,
894 name: &str,
895 start1: Option<usize>,
896 start2: Option<usize>,
897 strand1: bool,
898 strand2: bool,
899 attrs: &std::collections::HashMap<&str, BufValue>,
900 ) {
901 let mut pair = self
902 .add_pair()
903 .name(name)
904 .strand1(if strand1 { Strand::Plus } else { Strand::Minus })
905 .strand2(if strand2 { Strand::Plus } else { Strand::Minus });
906
907 if let Some(s) = start1 {
908 pair = pair.start1(s);
909 }
910 if let Some(s) = start2 {
911 pair = pair.start2(s);
912 }
913 for (tag, value) in attrs {
914 pair = pair.attr(tag, value.clone());
915 }
916 let _ = pair.build();
917 }
918
919 pub fn add_frag_with_attrs(
929 &mut self,
930 name: &str,
931 start: Option<usize>,
932 strand: bool,
933 attrs: &std::collections::HashMap<&str, BufValue>,
934 ) {
935 let mut frag =
936 self.add_frag().name(name).strand(if strand { Strand::Plus } else { Strand::Minus });
937
938 if let Some(s) = start {
939 frag = frag.start(s);
940 }
941 for (tag, value) in attrs {
942 frag = frag.attr(tag, value.clone());
943 }
944 let _ = frag.build();
945 }
946
947 pub fn write(&self, path: &std::path::Path) -> Result<()> {
955 self.write_bam(path)
956 }
957}
958
959pub fn create_ref_dict(
969 dir: &TempDir,
970 ref_name: &str,
971 ref_length: usize,
972) -> Result<std::path::PathBuf> {
973 let dict_path = dir.path().join("ref.dict");
974 let mut header = Header::builder();
975
976 let map = Map::<ReferenceSequence>::new(NonZeroUsize::new(ref_length).unwrap());
977 header = header.add_reference_sequence(BString::from(ref_name), map);
978
979 let header = header.build();
980
981 let file = std::fs::File::create(&dict_path)?;
982 let mut writer = noodles::sam::io::Writer::new(file);
983 writer.write_header(&header)?;
984
985 Ok(dict_path)
986}
987
988#[derive(Debug, Default)]
1033pub struct RecordBuilder {
1034 name: Option<Vec<u8>>,
1035 flags: Flags,
1036 reference_sequence_id: Option<usize>,
1037 alignment_start: Option<usize>,
1038 mapping_quality: Option<u8>,
1039 cigar: Option<String>,
1040 sequence: Vec<u8>,
1041 qualities: Vec<u8>,
1042 tags: Vec<(Tag, BufValue)>,
1043 mate_reference_sequence_id: Option<usize>,
1044 mate_alignment_start: Option<usize>,
1045 template_length: Option<i32>,
1046}
1047
1048impl RecordBuilder {
1049 #[must_use]
1051 pub fn new() -> Self {
1052 Self {
1053 name: None,
1054 flags: Flags::empty(),
1055 reference_sequence_id: None,
1056 alignment_start: None,
1057 mapping_quality: Some(60), cigar: None,
1059 sequence: Vec::new(),
1060 qualities: Vec::new(),
1061 tags: Vec::new(),
1062 mate_reference_sequence_id: None,
1063 mate_alignment_start: None,
1064 template_length: None,
1065 }
1066 }
1067
1068 #[must_use]
1091 pub fn mapped_read() -> Self {
1092 Self { reference_sequence_id: Some(0), ..Self::new() }
1093 }
1094
1095 #[must_use]
1097 pub fn name(mut self, name: &str) -> Self {
1098 self.name = Some(name.as_bytes().to_vec());
1099 self
1100 }
1101
1102 #[must_use]
1104 pub fn sequence(mut self, seq: &str) -> Self {
1105 self.sequence = seq.as_bytes().to_vec();
1106 if self.qualities.is_empty() {
1108 self.qualities = vec![DEFAULT_BASE_QUALITY; seq.len()];
1109 }
1110 self
1111 }
1112
1113 #[must_use]
1115 pub fn qualities(mut self, quals: &[u8]) -> Self {
1116 self.qualities = quals.to_vec();
1117 self
1118 }
1119
1120 #[must_use]
1122 pub fn flags(mut self, flags: Flags) -> Self {
1123 self.flags = flags;
1124 self
1125 }
1126
1127 #[must_use]
1129 pub fn paired(mut self, paired: bool) -> Self {
1130 self.flags.set(Flags::SEGMENTED, paired);
1131 self
1132 }
1133
1134 #[must_use]
1136 pub fn first_segment(mut self, is_first: bool) -> Self {
1137 self.flags.set(Flags::SEGMENTED, true);
1138 self.flags.set(Flags::FIRST_SEGMENT, is_first);
1139 if !is_first {
1140 self.flags.set(Flags::LAST_SEGMENT, true);
1141 }
1142 self
1143 }
1144
1145 #[must_use]
1147 pub fn properly_paired(mut self, properly_paired: bool) -> Self {
1148 if properly_paired {
1149 self.flags.set(Flags::SEGMENTED, true);
1150 }
1151 self.flags.set(Flags::PROPERLY_SEGMENTED, properly_paired);
1152 self
1153 }
1154
1155 #[must_use]
1157 pub fn unmapped(mut self, unmapped: bool) -> Self {
1158 self.flags.set(Flags::UNMAPPED, unmapped);
1159 self
1160 }
1161
1162 #[must_use]
1164 pub fn reverse_complement(mut self, reverse: bool) -> Self {
1165 self.flags.set(Flags::REVERSE_COMPLEMENTED, reverse);
1166 self
1167 }
1168
1169 #[must_use]
1171 pub fn secondary(mut self, secondary: bool) -> Self {
1172 self.flags.set(Flags::SECONDARY, secondary);
1173 self
1174 }
1175
1176 #[must_use]
1178 pub fn supplementary(mut self, supplementary: bool) -> Self {
1179 self.flags.set(Flags::SUPPLEMENTARY, supplementary);
1180 self
1181 }
1182
1183 #[must_use]
1185 pub fn reference_sequence_id(mut self, id: usize) -> Self {
1186 self.reference_sequence_id = Some(id);
1187 self
1188 }
1189
1190 #[must_use]
1192 pub fn alignment_start(mut self, pos: usize) -> Self {
1193 self.alignment_start = Some(pos);
1194 self
1195 }
1196
1197 #[must_use]
1199 pub fn mapping_quality(mut self, mapq: u8) -> Self {
1200 self.mapping_quality = Some(mapq);
1201 self
1202 }
1203
1204 #[must_use]
1206 pub fn cigar(mut self, cigar: &str) -> Self {
1207 self.cigar = Some(cigar.to_string());
1208 self
1209 }
1210
1211 #[must_use]
1213 pub fn mate_reference_sequence_id(mut self, id: usize) -> Self {
1214 self.mate_reference_sequence_id = Some(id);
1215 self
1216 }
1217
1218 #[must_use]
1220 pub fn mate_alignment_start(mut self, pos: usize) -> Self {
1221 self.mate_alignment_start = Some(pos);
1222 self
1223 }
1224
1225 #[must_use]
1227 pub fn template_length(mut self, tlen: i32) -> Self {
1228 self.template_length = Some(tlen);
1229 self
1230 }
1231
1232 #[must_use]
1234 pub fn mate_reverse_complement(mut self, reverse: bool) -> Self {
1235 self.flags.set(Flags::MATE_REVERSE_COMPLEMENTED, reverse);
1236 self
1237 }
1238
1239 #[must_use]
1241 pub fn mate_unmapped(mut self, unmapped: bool) -> Self {
1242 self.flags.set(Flags::MATE_UNMAPPED, unmapped);
1243 self
1244 }
1245
1246 #[must_use]
1259 pub fn tag<V: Into<BufValue>>(mut self, tag: &str, value: V) -> Self {
1260 let tag_bytes = tag.as_bytes();
1261 if tag_bytes.len() == 2 {
1262 let tag = Tag::from([tag_bytes[0], tag_bytes[1]]);
1263 self.tags.push((tag, value.into()));
1264 }
1265 self
1266 }
1267
1268 #[must_use]
1285 pub fn consensus_tags(mut self, builder: ConsensusTagsBuilder) -> Self {
1286 for (tag, value) in builder.build() {
1287 self.tags.push((tag, value));
1288 }
1289 self
1290 }
1291
1292 #[must_use]
1298 pub fn build(self) -> RecordBuf {
1299 let mut record = RecordBuf::default();
1300
1301 if let Some(name) = self.name {
1303 *record.name_mut() = Some(name.into());
1304 }
1305
1306 *record.flags_mut() = self.flags;
1308
1309 if let Some(ref_id) = self.reference_sequence_id {
1311 *record.reference_sequence_id_mut() = Some(ref_id);
1312 }
1313 if let Some(pos) = self.alignment_start {
1314 *record.alignment_start_mut() =
1315 Some(Position::try_from(pos).expect("alignment_start must be >= 1"));
1316 }
1317
1318 if let Some(mate_ref_id) = self.mate_reference_sequence_id {
1320 *record.mate_reference_sequence_id_mut() = Some(mate_ref_id);
1321 }
1322 if let Some(mate_pos) = self.mate_alignment_start {
1323 *record.mate_alignment_start_mut() =
1324 Some(Position::try_from(mate_pos).expect("mate_alignment_start must be >= 1"));
1325 }
1326 if let Some(tlen) = self.template_length {
1327 *record.template_length_mut() = tlen;
1328 }
1329
1330 if let Some(mapq) = self.mapping_quality {
1332 *record.mapping_quality_mut() = Some(
1333 noodles::sam::alignment::record::MappingQuality::try_from(mapq)
1334 .expect("mapping_quality must be valid"),
1335 );
1336 }
1337
1338 let (cigar_str, sequence) = match (self.cigar, self.sequence.is_empty()) {
1343 (Some(cigar), true) => {
1344 let seq_len = cigar_seq_len(&cigar);
1346 let generated_seq: String =
1347 (0..seq_len).map(|i| "ACGT".chars().nth(i % 4).unwrap()).collect();
1348 (cigar, generated_seq.into_bytes())
1349 }
1350 (Some(cigar), false) => (cigar, self.sequence),
1351 (None, false) => (format!("{}M", self.sequence.len()), self.sequence),
1352 (None, true) => (String::new(), Vec::new()),
1353 };
1354
1355 if !cigar_str.is_empty() {
1356 let ops = parse_cigar(&cigar_str);
1357 *record.cigar_mut() = ops.into_iter().collect();
1358 }
1359
1360 let qualities = if self.qualities.is_empty() && !sequence.is_empty() {
1362 vec![DEFAULT_BASE_QUALITY; sequence.len()]
1363 } else {
1364 self.qualities
1365 };
1366 *record.sequence_mut() = Sequence::from(sequence);
1367 *record.quality_scores_mut() = QualityScores::from(qualities);
1368
1369 for (tag, value) in self.tags {
1371 record.data_mut().insert(tag, value);
1372 }
1373
1374 record
1375 }
1376}
1377
1378#[derive(Debug, Default)]
1405#[expect(
1406 clippy::struct_excessive_bools,
1407 reason = "test builder struct; bools represent independent SAM flags (r1_reverse, r2_reverse, secondary, supplementary)"
1408)]
1409pub struct RecordPairBuilder {
1410 name: Option<String>,
1411 r1_sequence: Option<String>,
1412 r2_sequence: Option<String>,
1413 r1_qualities: Option<Vec<u8>>,
1414 r2_qualities: Option<Vec<u8>>,
1415 r1_start: Option<usize>,
1416 r2_start: Option<usize>,
1417 r1_cigar: Option<String>,
1418 r2_cigar: Option<String>,
1419 reference_sequence_id: usize,
1420 r2_reference_sequence_id: Option<usize>,
1421 mapping_quality: u8,
1422 r1_reverse: bool,
1423 r2_reverse: bool,
1424 secondary: bool,
1425 supplementary: bool,
1426 tags: Vec<(String, BufValue)>,
1427 r1_tags: Vec<(String, BufValue)>,
1428 r2_tags: Vec<(String, BufValue)>,
1429}
1430
1431impl RecordPairBuilder {
1432 #[must_use]
1439 pub fn new() -> Self {
1440 Self { mapping_quality: 60, r2_reverse: true, ..Self::default() }
1441 }
1442
1443 #[must_use]
1445 pub fn name(mut self, name: &str) -> Self {
1446 self.name = Some(name.to_string());
1447 self
1448 }
1449
1450 #[must_use]
1452 pub fn r1_sequence(mut self, seq: &str) -> Self {
1453 self.r1_sequence = Some(seq.to_string());
1454 self
1455 }
1456
1457 #[must_use]
1459 pub fn r2_sequence(mut self, seq: &str) -> Self {
1460 self.r2_sequence = Some(seq.to_string());
1461 self
1462 }
1463
1464 #[must_use]
1466 pub fn r1_qualities(mut self, quals: &[u8]) -> Self {
1467 self.r1_qualities = Some(quals.to_vec());
1468 self
1469 }
1470
1471 #[must_use]
1473 pub fn r2_qualities(mut self, quals: &[u8]) -> Self {
1474 self.r2_qualities = Some(quals.to_vec());
1475 self
1476 }
1477
1478 #[must_use]
1480 pub fn r1_start(mut self, start: usize) -> Self {
1481 self.r1_start = Some(start);
1482 self
1483 }
1484
1485 #[must_use]
1487 pub fn r2_start(mut self, start: usize) -> Self {
1488 self.r2_start = Some(start);
1489 self
1490 }
1491
1492 #[must_use]
1494 pub fn r1_cigar(mut self, cigar: &str) -> Self {
1495 self.r1_cigar = Some(cigar.to_string());
1496 self
1497 }
1498
1499 #[must_use]
1501 pub fn r2_cigar(mut self, cigar: &str) -> Self {
1502 self.r2_cigar = Some(cigar.to_string());
1503 self
1504 }
1505
1506 #[must_use]
1508 pub fn reference_sequence_id(mut self, id: usize) -> Self {
1509 self.reference_sequence_id = id;
1510 self
1511 }
1512
1513 #[must_use]
1515 pub fn r2_reference_sequence_id(mut self, id: usize) -> Self {
1516 self.r2_reference_sequence_id = Some(id);
1517 self
1518 }
1519
1520 #[must_use]
1522 pub fn mapping_quality(mut self, mapq: u8) -> Self {
1523 self.mapping_quality = mapq;
1524 self
1525 }
1526
1527 #[must_use]
1529 pub fn r1_reverse(mut self, reverse: bool) -> Self {
1530 self.r1_reverse = reverse;
1531 self
1532 }
1533
1534 #[must_use]
1536 pub fn r2_reverse(mut self, reverse: bool) -> Self {
1537 self.r2_reverse = reverse;
1538 self
1539 }
1540
1541 #[must_use]
1543 pub fn secondary(mut self, secondary: bool) -> Self {
1544 self.secondary = secondary;
1545 self
1546 }
1547
1548 #[must_use]
1550 pub fn supplementary(mut self, supplementary: bool) -> Self {
1551 self.supplementary = supplementary;
1552 self
1553 }
1554
1555 #[must_use]
1557 pub fn tag<V: Into<BufValue>>(mut self, tag: &str, value: V) -> Self {
1558 self.tags.push((tag.to_string(), value.into()));
1559 self
1560 }
1561
1562 #[must_use]
1564 pub fn r1_tag<V: Into<BufValue>>(mut self, tag: &str, value: V) -> Self {
1565 self.r1_tags.push((tag.to_string(), value.into()));
1566 self
1567 }
1568
1569 #[must_use]
1571 pub fn r2_tag<V: Into<BufValue>>(mut self, tag: &str, value: V) -> Self {
1572 self.r2_tags.push((tag.to_string(), value.into()));
1573 self
1574 }
1575
1576 #[must_use]
1583 pub fn build(self) -> (RecordBuf, RecordBuf) {
1584 let name = self.name.unwrap_or_else(|| "pair".to_string());
1585 let r1_seq = self.r1_sequence.unwrap_or_else(|| "ACGT".to_string());
1586 let r2_seq = self.r2_sequence.unwrap_or_else(|| "ACGT".to_string());
1587 let r1_quals =
1588 self.r1_qualities.unwrap_or_else(|| vec![DEFAULT_BASE_QUALITY; r1_seq.len()]);
1589 let r2_quals =
1590 self.r2_qualities.unwrap_or_else(|| vec![DEFAULT_BASE_QUALITY; r2_seq.len()]);
1591 let r1_cigar = self.r1_cigar.unwrap_or_else(|| format!("{}M", r1_seq.len()));
1592 let r2_cigar = self.r2_cigar.unwrap_or_else(|| format!("{}M", r2_seq.len()));
1593 let r2_ref_id = self.r2_reference_sequence_id.unwrap_or(self.reference_sequence_id);
1594
1595 let r1_mapped = self.r1_start.is_some();
1596 let r2_mapped = self.r2_start.is_some();
1597
1598 let mut r1_builder = RecordBuilder::new()
1600 .name(&name)
1601 .sequence(&r1_seq)
1602 .qualities(&r1_quals)
1603 .paired(true)
1604 .first_segment(true)
1605 .reverse_complement(self.r1_reverse)
1606 .mate_reverse_complement(self.r2_reverse)
1607 .secondary(self.secondary)
1608 .supplementary(self.supplementary);
1609
1610 if r1_mapped {
1611 r1_builder = r1_builder
1612 .reference_sequence_id(self.reference_sequence_id)
1613 .alignment_start(self.r1_start.unwrap())
1614 .cigar(&r1_cigar)
1615 .mapping_quality(self.mapping_quality);
1616 } else {
1617 r1_builder = r1_builder.unmapped(true);
1618 }
1619
1620 if r2_mapped {
1621 r1_builder = r1_builder
1622 .mate_reference_sequence_id(r2_ref_id)
1623 .mate_alignment_start(self.r2_start.unwrap());
1624 } else {
1625 r1_builder = r1_builder.mate_unmapped(true);
1626 }
1627
1628 for (tag, value) in &self.tags {
1629 r1_builder = r1_builder.tag(tag, value.clone());
1630 }
1631 for (tag, value) in &self.r1_tags {
1632 r1_builder = r1_builder.tag(tag, value.clone());
1633 }
1634
1635 let mut r2_builder = RecordBuilder::new()
1637 .name(&name)
1638 .sequence(&r2_seq)
1639 .qualities(&r2_quals)
1640 .paired(true)
1641 .first_segment(false) .reverse_complement(self.r2_reverse)
1643 .mate_reverse_complement(self.r1_reverse)
1644 .secondary(self.secondary)
1645 .supplementary(self.supplementary);
1646
1647 if r2_mapped {
1648 r2_builder = r2_builder
1649 .reference_sequence_id(r2_ref_id)
1650 .alignment_start(self.r2_start.unwrap())
1651 .cigar(&r2_cigar)
1652 .mapping_quality(self.mapping_quality);
1653 } else {
1654 r2_builder = r2_builder.unmapped(true);
1655 }
1656
1657 if r1_mapped {
1658 r2_builder = r2_builder
1659 .mate_reference_sequence_id(self.reference_sequence_id)
1660 .mate_alignment_start(self.r1_start.unwrap());
1661 } else {
1662 r2_builder = r2_builder.mate_unmapped(true);
1663 }
1664
1665 for (tag, value) in &self.tags {
1666 r2_builder = r2_builder.tag(tag, value.clone());
1667 }
1668 for (tag, value) in &self.r2_tags {
1669 r2_builder = r2_builder.tag(tag, value.clone());
1670 }
1671
1672 if r1_mapped && r2_mapped {
1674 r1_builder = r1_builder.tag("MC", r2_cigar.as_str());
1675 r2_builder = r2_builder.tag("MC", r1_cigar.as_str());
1676 }
1677
1678 let mut first_read = r1_builder.build();
1680 let mut second_read = r2_builder.build();
1681
1682 if r1_mapped && r2_mapped && self.reference_sequence_id == r2_ref_id {
1683 let pos1 = i32::try_from(self.r1_start.unwrap()).expect("r1_start fits in i32");
1684 let pos2 = i32::try_from(self.r2_start.unwrap()).expect("r2_start fits in i32");
1685 let end1 = pos1
1686 + i32::try_from(cigar_ref_len(&r1_cigar)).expect("r1 cigar ref len fits in i32")
1687 - 1;
1688 let end2 = pos2
1689 + i32::try_from(cigar_ref_len(&r2_cigar)).expect("r2 cigar ref len fits in i32")
1690 - 1;
1691
1692 let (left, right) = if pos1 <= pos2 { (pos1, end2) } else { (pos2, end1) };
1693 let tlen = right - left + 1;
1694
1695 *first_read.template_length_mut() = if pos1 <= pos2 { tlen } else { -tlen };
1696 *second_read.template_length_mut() = if pos2 <= pos1 { tlen } else { -tlen };
1697 }
1698
1699 (first_read, second_read)
1700 }
1701}
1702
1703#[derive(Debug, Default)]
1724pub struct ConsensusTagsBuilder {
1725 depth_max: Option<i32>,
1726 depth_min: Option<i32>,
1727 error_rate: Option<f32>,
1728 error_count: Option<i32>,
1729 per_base_depths: Option<Vec<u16>>,
1730 per_base_errors: Option<Vec<u16>>,
1731 ab_depth: Option<i32>,
1733 ba_depth: Option<i32>,
1734 ab_min_depth: Option<i32>,
1735 ba_min_depth: Option<i32>,
1736 ab_errors: Option<f32>,
1737 ba_errors: Option<f32>,
1738 ab_error_count: Option<i32>,
1739 ba_error_count: Option<i32>,
1740}
1741
1742impl ConsensusTagsBuilder {
1743 #[must_use]
1745 pub fn new() -> Self {
1746 Self::default()
1747 }
1748
1749 #[must_use]
1751 pub fn depth_max(mut self, depth: i32) -> Self {
1752 self.depth_max = Some(depth);
1753 self
1754 }
1755
1756 #[must_use]
1758 pub fn depth_min(mut self, depth: i32) -> Self {
1759 self.depth_min = Some(depth);
1760 self
1761 }
1762
1763 #[must_use]
1765 pub fn error_rate(mut self, rate: f32) -> Self {
1766 self.error_rate = Some(rate);
1767 self
1768 }
1769
1770 #[must_use]
1772 pub fn per_base_depths(mut self, depths: &[u16]) -> Self {
1773 self.per_base_depths = Some(depths.to_vec());
1774 self
1775 }
1776
1777 #[must_use]
1779 pub fn per_base_errors(mut self, errors: &[u16]) -> Self {
1780 self.per_base_errors = Some(errors.to_vec());
1781 self
1782 }
1783
1784 #[must_use]
1786 pub fn ab_depth(mut self, depth: i32) -> Self {
1787 self.ab_depth = Some(depth);
1788 self
1789 }
1790
1791 #[must_use]
1793 pub fn ba_depth(mut self, depth: i32) -> Self {
1794 self.ba_depth = Some(depth);
1795 self
1796 }
1797
1798 #[must_use]
1800 pub fn ab_min_depth(mut self, depth: i32) -> Self {
1801 self.ab_min_depth = Some(depth);
1802 self
1803 }
1804
1805 #[must_use]
1807 pub fn ba_min_depth(mut self, depth: i32) -> Self {
1808 self.ba_min_depth = Some(depth);
1809 self
1810 }
1811
1812 #[must_use]
1814 pub fn ab_errors(mut self, rate: f32) -> Self {
1815 self.ab_errors = Some(rate);
1816 self
1817 }
1818
1819 #[must_use]
1821 pub fn ba_errors(mut self, rate: f32) -> Self {
1822 self.ba_errors = Some(rate);
1823 self
1824 }
1825
1826 #[must_use]
1830 pub fn tag_ce_as_int(mut self, count: i32) -> Self {
1831 self.error_count = Some(count);
1832 self
1833 }
1834
1835 #[must_use]
1837 pub fn ab_errors_int(mut self, count: i32) -> Self {
1838 self.ab_error_count = Some(count);
1839 self
1840 }
1841
1842 #[must_use]
1844 pub fn ba_errors_int(mut self, count: i32) -> Self {
1845 self.ba_error_count = Some(count);
1846 self
1847 }
1848
1849 #[must_use]
1851 pub fn build(self) -> Vec<(Tag, BufValue)> {
1852 debug_assert!(
1854 !(self.depth_max.is_some() && self.per_base_depths.is_some()),
1855 "depth_max (cD) and per_base_depths (cd) are mutually exclusive; use one or the other"
1856 );
1857 debug_assert!(
1858 !(self.error_rate.is_some() && self.per_base_errors.is_some()),
1859 "error_rate (cE) and per_base_errors (ce) are mutually exclusive; use one or the other"
1860 );
1861 debug_assert!(
1862 !(self.error_count.is_some() && self.per_base_errors.is_some()),
1863 "error_count (cE) and per_base_errors (ce) are mutually exclusive; use one or the other"
1864 );
1865
1866 let mut tags = Vec::new();
1867
1868 if let Some(depth) = self.depth_max {
1870 tags.push((Tag::from([b'c', b'D']), to_smallest_signed_int(depth)));
1871 }
1872 if let Some(depth) = self.depth_min {
1873 tags.push((Tag::from([b'c', b'M']), to_smallest_signed_int(depth)));
1874 }
1875 if let Some(rate) = self.error_rate {
1876 tags.push((Tag::from([b'c', b'E']), BufValue::from(rate)));
1877 }
1878 if let Some(count) = self.error_count {
1879 tags.push((Tag::from([b'c', b'E']), to_smallest_signed_int(count)));
1880 }
1881 if let Some(depths) = self.per_base_depths {
1882 tags.push((Tag::from([b'c', b'd']), BufValue::from(depths)));
1883 }
1884 if let Some(errors) = self.per_base_errors {
1885 tags.push((Tag::from([b'c', b'e']), BufValue::from(errors)));
1886 }
1887
1888 if let Some(depth) = self.ab_depth {
1890 tags.push((Tag::from([b'a', b'D']), to_smallest_signed_int(depth)));
1891 }
1892 if let Some(depth) = self.ba_depth {
1893 tags.push((Tag::from([b'b', b'D']), to_smallest_signed_int(depth)));
1894 }
1895 if let Some(depth) = self.ab_min_depth {
1896 tags.push((Tag::from([b'a', b'M']), to_smallest_signed_int(depth)));
1897 }
1898 if let Some(depth) = self.ba_min_depth {
1899 tags.push((Tag::from([b'b', b'M']), to_smallest_signed_int(depth)));
1900 }
1901 if let Some(rate) = self.ab_errors {
1902 tags.push((Tag::from([b'a', b'E']), BufValue::from(rate)));
1903 }
1904 if let Some(rate) = self.ba_errors {
1905 tags.push((Tag::from([b'b', b'E']), BufValue::from(rate)));
1906 }
1907 if let Some(count) = self.ab_error_count {
1908 tags.push((Tag::from([b'a', b'E']), to_smallest_signed_int(count)));
1909 }
1910 if let Some(count) = self.ba_error_count {
1911 tags.push((Tag::from([b'b', b'E']), to_smallest_signed_int(count)));
1912 }
1913
1914 tags
1915 }
1916}
1917
1918#[cfg(test)]
1919#[expect(clippy::similar_names, reason = "test code uses domain-specific tag names like cd/cm/ce")]
1920mod tests {
1921 use super::*;
1922 use noodles::sam::alignment::record::Cigar;
1923
1924 #[test]
1925 fn test_new_builder() {
1926 let builder = SamBuilder::new();
1927 assert!(builder.is_empty());
1928 assert!(!builder.header.reference_sequences().is_empty());
1929 }
1930
1931 #[test]
1932 fn test_add_frag_unmapped() {
1933 let mut builder = SamBuilder::new_unmapped();
1934 let rec = builder.add_frag().name("read1").bases("ACGT").build();
1935
1936 assert_eq!(rec.name().map(std::convert::AsRef::as_ref), Some(b"read1".as_ref()));
1937 assert!(rec.flags().is_unmapped());
1938 assert_eq!(builder.len(), 1);
1939 }
1940
1941 #[test]
1942 fn test_add_frag_mapped() {
1943 let mut builder = SamBuilder::new();
1944 let rec = builder.add_frag().name("read1").bases("ACGT").contig(0).start(100).build();
1945
1946 assert!(!rec.flags().is_unmapped());
1947 assert_eq!(rec.reference_sequence_id(), Some(0));
1948 assert_eq!(rec.alignment_start(), Some(Position::try_from(100).unwrap()));
1949 }
1950
1951 #[test]
1952 fn test_add_pair_unmapped() {
1953 let mut builder = SamBuilder::new_unmapped();
1954 let (read_one, read_two) =
1955 builder.add_pair().name("pair1").bases1("AAAA").bases2("CCCC").build();
1956
1957 assert!(read_one.flags().is_first_segment());
1958 assert!(read_two.flags().is_last_segment());
1959 assert!(read_one.flags().is_unmapped());
1960 assert!(read_two.flags().is_unmapped());
1961 assert_eq!(builder.len(), 2);
1962 }
1963
1964 #[test]
1965 fn test_add_pair_mapped() {
1966 let mut builder = SamBuilder::new();
1967 let (read_one, read_two) = builder
1968 .add_pair()
1969 .name("pair1")
1970 .bases1("AAAA")
1971 .bases2("CCCC")
1972 .start1(100)
1973 .start2(200)
1974 .build();
1975
1976 assert!(!read_one.flags().is_unmapped());
1977 assert!(!read_two.flags().is_unmapped());
1978 assert_eq!(read_one.alignment_start(), Some(Position::try_from(100).unwrap()));
1979 assert_eq!(read_two.alignment_start(), Some(Position::try_from(200).unwrap()));
1980 }
1981
1982 #[test]
1983 fn test_add_pair_with_attrs() {
1984 let mut builder = SamBuilder::new_unmapped();
1985 let (read_one, read_two) =
1986 builder.add_pair().attr("MI", "test_umi").attr("RX", "ACGT").build();
1987
1988 let mi_tag = Tag::new(b'M', b'I');
1989 assert!(read_one.data().get(&mi_tag).is_some());
1990 assert!(read_two.data().get(&mi_tag).is_some());
1991 }
1992
1993 #[test]
1994 fn test_sequential_names() {
1995 let mut builder = SamBuilder::new_unmapped();
1996 let first = builder.add_frag().bases("ACGT").build();
1997 let second = builder.add_frag().bases("ACGT").build();
1998 let third = builder.add_frag().bases("ACGT").build();
1999
2000 assert_eq!(first.name().map(std::convert::AsRef::as_ref), Some(b"0000".as_ref()));
2001 assert_eq!(second.name().map(std::convert::AsRef::as_ref), Some(b"0001".as_ref()));
2002 assert_eq!(third.name().map(std::convert::AsRef::as_ref), Some(b"0002".as_ref()));
2003 }
2004
2005 #[test]
2006 fn test_write_to_temp_file() {
2007 let mut builder = SamBuilder::new();
2008 let _ = builder.add_frag().name("read1").bases("ACGT").contig(0).start(100).build();
2009
2010 let temp = builder.to_temp_file().unwrap();
2011 assert!(temp.path().exists());
2012 }
2013
2014 #[test]
2015 fn test_parse_cigar() {
2016 let ops = parse_cigar("10M2I5M");
2017 assert_eq!(ops.len(), 3);
2018 assert_eq!(ops[0].kind(), Kind::Match);
2019 assert_eq!(ops[0].len(), 10);
2020 assert_eq!(ops[1].kind(), Kind::Insertion);
2021 assert_eq!(ops[1].len(), 2);
2022 assert_eq!(ops[2].kind(), Kind::Match);
2023 assert_eq!(ops[2].len(), 5);
2024 }
2025
2026 #[test]
2027 fn test_strand_setting() {
2028 let mut builder = SamBuilder::new();
2029 let rec =
2030 builder.add_frag().bases("ACGT").contig(0).start(100).strand(Strand::Minus).build();
2031
2032 assert!(rec.flags().is_reverse_complemented());
2033 }
2034
2035 #[test]
2036 fn test_pair_strands() {
2037 let mut builder = SamBuilder::new();
2038 let (read_one, read_two) = builder
2039 .add_pair()
2040 .start1(100)
2041 .start2(200)
2042 .strand1(Strand::Minus)
2043 .strand2(Strand::Plus)
2044 .build();
2045
2046 assert!(read_one.flags().is_reverse_complemented());
2047 assert!(!read_two.flags().is_reverse_complemented());
2048 assert!(!read_one.flags().is_mate_reverse_complemented());
2049 assert!(read_two.flags().is_mate_reverse_complemented());
2050 }
2051
2052 #[test]
2053 fn test_template_length_calculation() {
2054 let mut builder = SamBuilder::new();
2055 let (read_one, read_two) =
2056 builder.add_pair().bases1("AAAA").bases2("CCCC").start1(100).start2(110).build();
2057
2058 assert_eq!(read_one.template_length(), 14);
2061 assert_eq!(read_two.template_length(), -14);
2062 }
2063
2064 #[test]
2065 fn test_template_length_with_deletion() {
2066 let mut builder = SamBuilder::new();
2067 let (read_one, read_two) = builder
2070 .add_pair()
2071 .bases1("AAAA")
2072 .cigar1("2M2D2M")
2073 .bases2("CCCC")
2074 .start1(100)
2075 .start2(110)
2076 .build();
2077
2078 assert_eq!(read_one.template_length(), 14);
2080 assert_eq!(read_two.template_length(), -14);
2081 }
2082
2083 #[test]
2084 fn test_template_length_with_insertion() {
2085 let mut builder = SamBuilder::new();
2086 let (read_one, read_two) = builder
2089 .add_pair()
2090 .bases1("AAAAAA")
2091 .cigar1("2M2I2M")
2092 .bases2("CCCC")
2093 .start1(100)
2094 .start2(110)
2095 .build();
2096
2097 assert_eq!(read_one.template_length(), 14);
2099 assert_eq!(read_two.template_length(), -14);
2100 }
2101
2102 #[test]
2103 fn test_cigar_ref_len_simple() {
2104 assert_eq!(cigar_ref_len("10M"), 10);
2105 assert_eq!(cigar_ref_len("5M3I5M"), 10); assert_eq!(cigar_ref_len("5M3D5M"), 13); assert_eq!(cigar_ref_len("2S5M3S"), 5); assert_eq!(cigar_ref_len("5M2N5M"), 12); assert_eq!(cigar_ref_len("5=3X"), 8); }
2111
2112 #[test]
2117 fn test_record_builder_basic() {
2118 let record = RecordBuilder::new()
2119 .name("read1")
2120 .sequence("ACGT")
2121 .qualities(&[30, 30, 30, 30])
2122 .build();
2123
2124 assert_eq!(record.name().map(std::convert::AsRef::as_ref), Some(b"read1".as_ref()));
2125 assert_eq!(record.sequence().as_ref(), b"ACGT");
2126 assert_eq!(record.quality_scores().as_ref(), &[30, 30, 30, 30]);
2127 }
2128
2129 #[test]
2130 fn test_record_builder_paired() {
2131 let record =
2132 RecordBuilder::new().name("read1").sequence("ACGT").first_segment(true).build();
2133
2134 assert!(record.flags().is_segmented());
2135 assert!(record.flags().is_first_segment());
2136 }
2137
2138 #[test]
2139 fn test_record_builder_mapped() {
2140 let record = RecordBuilder::new()
2141 .name("read1")
2142 .sequence("ACGT")
2143 .reference_sequence_id(0)
2144 .alignment_start(100)
2145 .cigar("4M")
2146 .mapping_quality(60)
2147 .build();
2148
2149 assert_eq!(record.reference_sequence_id(), Some(0));
2150 assert_eq!(record.alignment_start(), Some(Position::try_from(100).unwrap()));
2151 }
2152
2153 #[test]
2154 fn test_record_builder_with_tags() {
2155 let record = RecordBuilder::new()
2156 .name("read1")
2157 .sequence("ACGT")
2158 .tag("RG", "A")
2159 .tag("MI", "AAAA-CCCC/A")
2160 .build();
2161
2162 let rg_tag = Tag::from([b'R', b'G']);
2163 let mi_tag = Tag::from([b'M', b'I']);
2164
2165 assert!(record.data().get(&rg_tag).is_some());
2166 assert!(record.data().get(&mi_tag).is_some());
2167 }
2168
2169 #[test]
2170 fn test_record_builder_auto_qualities() {
2171 let record = RecordBuilder::new().name("read1").sequence("ACGTACGT").build();
2172
2173 assert_eq!(record.quality_scores().as_ref(), &[30; 8]);
2175 }
2176
2177 #[test]
2178 fn test_record_builder_secondary_supplementary() {
2179 let secondary = RecordBuilder::new().sequence("ACGT").secondary(true).build();
2180 assert!(secondary.flags().is_secondary());
2181
2182 let supplementary = RecordBuilder::new().sequence("ACGT").supplementary(true).build();
2183 assert!(supplementary.flags().is_supplementary());
2184 }
2185
2186 #[test]
2191 fn test_consensus_tags_builder() {
2192 let record = RecordBuilder::new()
2193 .sequence("ACGT")
2194 .consensus_tags(ConsensusTagsBuilder::new().depth_max(10).depth_min(5).error_rate(0.01))
2195 .build();
2196
2197 let cd_tag = Tag::from([b'c', b'D']);
2198 let cm_tag = Tag::from([b'c', b'M']);
2199 let ce_tag = Tag::from([b'c', b'E']);
2200
2201 assert!(record.data().get(&cd_tag).is_some());
2202 assert!(record.data().get(&cm_tag).is_some());
2203 assert!(record.data().get(&ce_tag).is_some());
2204 }
2205
2206 #[test]
2207 fn test_consensus_tags_per_base() {
2208 let tags = ConsensusTagsBuilder::new()
2209 .per_base_depths(&[10, 20, 30])
2210 .per_base_errors(&[1, 2, 3])
2211 .build();
2212
2213 assert_eq!(tags.len(), 2);
2214 assert_eq!(tags[0].0, Tag::from([b'c', b'd']));
2215 assert_eq!(tags[1].0, Tag::from([b'c', b'e']));
2216 }
2217
2218 #[test]
2223 fn test_mapped_read_convenience() {
2224 let record = RecordBuilder::mapped_read()
2225 .name("read1")
2226 .sequence("ACGTACGT")
2227 .alignment_start(100)
2228 .build();
2229
2230 assert_eq!(record.reference_sequence_id(), Some(0));
2231 assert_eq!(record.mapping_quality().map(u8::from), Some(60));
2232 assert!(!record.cigar().is_empty());
2234 }
2235
2236 #[test]
2237 fn test_record_builder_auto_cigar() {
2238 let record = RecordBuilder::new().sequence("ACGTACGT").build();
2239
2240 let ops: Vec<_> = record.cigar().iter().map(|r| r.unwrap()).collect();
2242 assert_eq!(ops.len(), 1);
2243 assert_eq!(ops[0].kind(), Kind::Match);
2244 assert_eq!(ops[0].len(), 8);
2245 }
2246
2247 #[test]
2248 fn test_record_builder_explicit_cigar_overrides_auto() {
2249 let record = RecordBuilder::new().sequence("ACGTACGT").cigar("4M2I2M").build();
2250
2251 let ops: Vec<_> = record.cigar().iter().map(|r| r.unwrap()).collect();
2252 assert_eq!(ops.len(), 3);
2253 assert_eq!(ops[0].kind(), Kind::Match);
2254 assert_eq!(ops[0].len(), 4);
2255 assert_eq!(ops[1].kind(), Kind::Insertion);
2256 assert_eq!(ops[1].len(), 2);
2257 }
2258
2259 #[test]
2260 fn test_record_pair_builder_basic() {
2261 let (read_one, read_two) = RecordPairBuilder::new()
2262 .name("pair1")
2263 .r1_sequence("AAAA")
2264 .r2_sequence("TTTT")
2265 .r1_start(100)
2266 .r2_start(200)
2267 .build();
2268
2269 assert!(read_one.flags().is_first_segment());
2270 assert!(read_two.flags().is_last_segment());
2271 assert!(!read_one.flags().is_reverse_complemented());
2272 assert!(read_two.flags().is_reverse_complemented()); }
2274
2275 #[test]
2276 fn test_record_pair_builder_fr_orientation() {
2277 let (read_one, read_two) = RecordPairBuilder::new()
2278 .r1_sequence("ACGT")
2279 .r2_sequence("TGCA")
2280 .r1_start(100)
2281 .r2_start(200)
2282 .build();
2283
2284 assert!(!read_one.flags().is_reverse_complemented());
2286 assert!(read_two.flags().is_reverse_complemented());
2287
2288 assert!(read_one.flags().is_mate_reverse_complemented());
2290 assert!(!read_two.flags().is_mate_reverse_complemented());
2291 }
2292
2293 #[test]
2294 fn test_record_pair_builder_with_tags() {
2295 let (read_one, read_two) = RecordPairBuilder::new()
2296 .name("pair1")
2297 .r1_sequence("ACGT")
2298 .r2_sequence("TGCA")
2299 .r1_start(100)
2300 .r2_start(200)
2301 .tag("MI", "1/A")
2302 .tag("RX", "AAAA-TTTT")
2303 .build();
2304
2305 let mi_tag = Tag::from([b'M', b'I']);
2306 let rx_tag = Tag::from([b'R', b'X']);
2307 assert!(read_one.data().get(&mi_tag).is_some());
2308 assert!(read_two.data().get(&mi_tag).is_some());
2309 assert!(read_one.data().get(&rx_tag).is_some());
2310 assert!(read_two.data().get(&rx_tag).is_some());
2311 }
2312
2313 #[test]
2314 fn test_record_pair_builder_template_length() {
2315 let (read_one, read_two) = RecordPairBuilder::new()
2316 .r1_sequence("AAAA")
2317 .r2_sequence("CCCC")
2318 .r1_start(100)
2319 .r2_start(110)
2320 .build();
2321
2322 assert_eq!(read_one.template_length(), 14);
2325 assert_eq!(read_two.template_length(), -14);
2326 }
2327
2328 #[test]
2329 fn test_record_pair_builder_unmapped() {
2330 let (read_one, read_two) = RecordPairBuilder::new()
2331 .name("unmapped_pair")
2332 .r1_sequence("ACGT")
2333 .r2_sequence("TGCA")
2334 .build();
2335
2336 assert!(read_one.flags().is_unmapped());
2338 assert!(read_two.flags().is_unmapped());
2339 assert!(read_one.flags().is_mate_unmapped());
2340 assert!(read_two.flags().is_mate_unmapped());
2341 }
2342
2343 #[test]
2344 fn test_record_pair_builder_per_read_tags() {
2345 let (read_one, read_two) = RecordPairBuilder::new()
2346 .name("pair1")
2347 .r1_sequence("ACGT")
2348 .r2_sequence("TGCA")
2349 .r1_start(100)
2350 .r2_start(200)
2351 .tag("MI", "UMI123") .r1_tag("MQ", 0i32) .r2_tag("MQ", 60i32) .build();
2355
2356 let mi_tag = Tag::from([b'M', b'I']);
2357 let mq_tag = Tag::from([b'M', b'Q']);
2358
2359 assert!(read_one.data().get(&mi_tag).is_some());
2361 assert!(read_two.data().get(&mi_tag).is_some());
2362
2363 let read_one_mq = read_one.data().get(&mq_tag).expect("R1 should have MQ tag");
2365 let read_two_mq = read_two.data().get(&mq_tag).expect("R2 should have MQ tag");
2366
2367 match read_one_mq {
2369 BufValue::Int8(v) => assert_eq!(*v, 0),
2370 BufValue::UInt8(v) => assert_eq!(*v, 0),
2371 BufValue::Int16(v) => assert_eq!(*v, 0),
2372 BufValue::UInt16(v) => assert_eq!(*v, 0),
2373 BufValue::Int32(v) => assert_eq!(*v, 0),
2374 BufValue::UInt32(v) => assert_eq!(*v, 0),
2375 _ => panic!("Expected integer type for MQ tag, got {read_one_mq:?}"),
2376 }
2377 match read_two_mq {
2378 BufValue::Int8(v) => assert_eq!(*v, 60),
2379 BufValue::UInt8(v) => assert_eq!(*v, 60),
2380 BufValue::Int16(v) => assert_eq!(*v, 60),
2381 BufValue::UInt16(v) => assert_eq!(*v, 60),
2382 BufValue::Int32(v) => assert_eq!(*v, 60),
2383 BufValue::UInt32(v) => assert_eq!(*v, 60),
2384 _ => panic!("Expected integer type for MQ tag, got {read_two_mq:?}"),
2385 }
2386 }
2387
2388 #[test]
2389 fn test_record_pair_builder_secondary() {
2390 let (read_one, read_two) = RecordPairBuilder::new()
2391 .name("sec_pair")
2392 .r1_sequence("ACGT")
2393 .r2_sequence("TGCA")
2394 .r1_start(100)
2395 .r2_start(200)
2396 .secondary(true)
2397 .build();
2398
2399 assert!(read_one.flags().is_secondary());
2400 assert!(read_two.flags().is_secondary());
2401 assert!(!read_one.flags().is_supplementary());
2402 assert!(!read_two.flags().is_supplementary());
2403 }
2404
2405 #[test]
2406 fn test_record_pair_builder_supplementary() {
2407 let (read_one, read_two) = RecordPairBuilder::new()
2408 .name("sup_pair")
2409 .r1_sequence("ACGT")
2410 .r2_sequence("TGCA")
2411 .r1_start(100)
2412 .r2_start(200)
2413 .supplementary(true)
2414 .build();
2415
2416 assert!(!read_one.flags().is_secondary());
2417 assert!(!read_two.flags().is_secondary());
2418 assert!(read_one.flags().is_supplementary());
2419 assert!(read_two.flags().is_supplementary());
2420 }
2421
2422 #[test]
2423 fn test_duplex_consensus_tags() {
2424 let record = RecordBuilder::new()
2425 .sequence("ACGT")
2426 .consensus_tags(
2427 ConsensusTagsBuilder::new()
2428 .depth_max(10)
2429 .depth_min(5)
2430 .error_rate(0.01)
2431 .ab_depth(6)
2432 .ba_depth(4)
2433 .ab_min_depth(3)
2434 .ba_min_depth(2)
2435 .ab_errors(0.005)
2436 .ba_errors(0.008),
2437 )
2438 .build();
2439
2440 let cd_tag = Tag::from([b'c', b'D']);
2442 let cm_tag = Tag::from([b'c', b'M']);
2443 let ce_tag = Tag::from([b'c', b'E']);
2444 assert!(record.data().get(&cd_tag).is_some());
2445 assert!(record.data().get(&cm_tag).is_some());
2446 assert!(record.data().get(&ce_tag).is_some());
2447
2448 let ab_depth_tag = Tag::from([b'a', b'D']);
2450 let ba_depth_tag = Tag::from([b'b', b'D']);
2451 let ab_min_tag = Tag::from([b'a', b'M']);
2452 let ba_min_tag = Tag::from([b'b', b'M']);
2453 let ab_error_tag = Tag::from([b'a', b'E']);
2454 let ba_error_tag = Tag::from([b'b', b'E']);
2455 assert!(record.data().get(&ab_depth_tag).is_some());
2456 assert!(record.data().get(&ba_depth_tag).is_some());
2457 assert!(record.data().get(&ab_min_tag).is_some());
2458 assert!(record.data().get(&ba_min_tag).is_some());
2459 assert!(record.data().get(&ab_error_tag).is_some());
2460 assert!(record.data().get(&ba_error_tag).is_some());
2461 }
2462}
2463
2464#[must_use]
2483pub fn cigar_seq_len(cigar: &str) -> usize {
2484 let ops = parse_cigar(cigar);
2485 ops.iter()
2486 .filter(|op| {
2487 matches!(
2488 op.kind(),
2489 noodles::sam::alignment::record::cigar::op::Kind::Match
2490 | noodles::sam::alignment::record::cigar::op::Kind::Insertion
2491 | noodles::sam::alignment::record::cigar::op::Kind::SoftClip
2492 | noodles::sam::alignment::record::cigar::op::Kind::SequenceMatch
2493 | noodles::sam::alignment::record::cigar::op::Kind::SequenceMismatch
2494 )
2495 })
2496 .map(|op| op.len())
2497 .sum()
2498}
2499
2500#[must_use]
2504pub fn cigar_ref_len(cigar: &str) -> usize {
2505 let ops = parse_cigar(cigar);
2506 ops.iter()
2507 .filter(|op| {
2508 matches!(
2509 op.kind(),
2510 Kind::Match
2511 | Kind::Deletion
2512 | Kind::Skip
2513 | Kind::SequenceMatch
2514 | Kind::SequenceMismatch
2515 )
2516 })
2517 .map(|op| op.len())
2518 .sum()
2519}
2520
2521#[must_use]
2534pub fn repeat_n<T: Clone>(item: T, n: usize) -> Vec<T> {
2535 vec![item; n]
2536}
2537
2538#[must_use]
2551pub fn uniform_qualities(quality: u8, length: usize) -> Vec<u8> {
2552 vec![quality; length]
2553}
2554
2555#[must_use]
2574#[expect(
2575 clippy::cast_possible_truncation,
2576 reason = "f64 -> i64 cast is safe: values are interpolated between two u8 inputs (0-255)"
2577)]
2578#[expect(
2579 clippy::cast_precision_loss,
2580 reason = "usize -> f64 cast is acceptable: length values in tests are small enough that precision loss is negligible"
2581)]
2582pub fn degrading_qualities(start: u8, end: u8, length: usize) -> Vec<u8> {
2583 if length == 0 {
2584 return Vec::new();
2585 }
2586 if length == 1 {
2587 return vec![start];
2588 }
2589
2590 let start_f = f64::from(start);
2591 let end_f = f64::from(end);
2592 let step = (end_f - start_f) / (length - 1) as f64;
2593
2594 (0..length)
2595 .map(|i| {
2596 let quality = (start_f + step * i as f64).round();
2597 u8::try_from(quality as i64).expect("quality score fits in u8")
2598 })
2599 .collect()
2600}
2601
2602pub fn create_test_fasta(sequences: &[(&str, &str)]) -> std::io::Result<NamedTempFile> {
2621 use std::io::Write;
2622 let mut file = NamedTempFile::new()?;
2623 for (name, seq) in sequences {
2624 writeln!(file, ">{name}")?;
2625 writeln!(file, "{seq}")?;
2626 }
2627 file.flush()?;
2628 Ok(file)
2629}
2630
2631pub fn create_default_test_fasta() -> std::io::Result<NamedTempFile> {
2648 create_test_fasta(&[("chr1", "ACGTACGTACGT"), ("chr2", "GGGGCCCCAAAA")])
2649}
2650
2651#[cfg(test)]
2652mod generator_tests {
2653 use super::*;
2654
2655 #[test]
2656 fn test_repeat_n() {
2657 let items = repeat_n(42, 5);
2658 assert_eq!(items, vec![42, 42, 42, 42, 42]);
2659 }
2660
2661 #[test]
2662 fn test_repeat_n_strings() {
2663 let items = repeat_n("ACGT".to_string(), 3);
2664 assert_eq!(items.len(), 3);
2665 assert!(items.iter().all(|s| s == "ACGT"));
2666 }
2667
2668 #[test]
2669 fn test_uniform_qualities() {
2670 let quals = uniform_qualities(30, 10);
2671 assert_eq!(quals, vec![30; 10]);
2672 }
2673
2674 #[test]
2675 fn test_degrading_qualities() {
2676 let quals = degrading_qualities(40, 20, 5);
2677 assert_eq!(quals.len(), 5);
2678 assert_eq!(quals[0], 40);
2679 assert_eq!(quals[4], 20);
2680 for i in 0..quals.len() - 1 {
2682 assert!(quals[i] >= quals[i + 1]);
2683 }
2684 }
2685
2686 #[test]
2687 fn test_degrading_qualities_edge_cases() {
2688 assert_eq!(degrading_qualities(40, 20, 0), Vec::<u8>::new());
2690
2691 assert_eq!(degrading_qualities(40, 20, 1), vec![40]);
2693
2694 let quals = degrading_qualities(30, 30, 5);
2696 assert!(quals.iter().all(|&q| q == 30));
2697 }
2698
2699 #[test]
2700 fn test_create_test_fasta() {
2701 let fasta = create_test_fasta(&[("chr1", "ACGT"), ("chr2", "GGGG")]).unwrap();
2702 assert!(fasta.path().exists());
2703
2704 let contents = std::fs::read_to_string(fasta.path()).unwrap();
2706 assert!(contents.contains(">chr1"));
2707 assert!(contents.contains("ACGT"));
2708 assert!(contents.contains(">chr2"));
2709 assert!(contents.contains("GGGG"));
2710 }
2711
2712 #[test]
2713 fn test_create_default_test_fasta() {
2714 let fasta = create_default_test_fasta().unwrap();
2715 assert!(fasta.path().exists());
2716
2717 let contents = std::fs::read_to_string(fasta.path()).unwrap();
2718 assert!(contents.contains(">chr1"));
2719 assert!(contents.contains("ACGTACGTACGT"));
2720 assert!(contents.contains(">chr2"));
2721 assert!(contents.contains("GGGGCCCCAAAA"));
2722 }
2723}