1#![deny(unsafe_code)]
2
3pub mod alignment_tags;
36pub mod builder;
37pub mod clipper;
38pub mod record_utils;
39
40pub trait ReferenceProvider {
49 fn fetch(
59 &self,
60 chrom: &str,
61 start: noodles::core::Position,
62 end: noodles::core::Position,
63 ) -> anyhow::Result<Vec<u8>>;
64}
65
66impl<T: std::ops::Deref> ReferenceProvider for T
67where
68 T::Target: ReferenceProvider,
69{
70 fn fetch(
71 &self,
72 chrom: &str,
73 start: noodles::core::Position,
74 end: noodles::core::Position,
75 ) -> anyhow::Result<Vec<u8>> {
76 self.deref().fetch(chrom, start, end)
77 }
78}
79
80pub use builder::{
82 ConsensusTagsBuilder, FragBuilder, MAPPED_PG_ID, PairBuilder, REFERENCE_LENGTH, RecordBuilder,
83 SamBuilder, Strand, create_default_test_fasta, create_ref_dict, create_test_fasta,
84 degrading_qualities, parse_cigar, repeat_n, uniform_qualities,
85};
86pub use clipper::{ClippingMode, SamRecordClipper};
87pub use record_utils::{
88 PairOrientation, alignment_end, cigar_reference_length, get_pair_orientation, is_fr_pair,
89 is_fr_pair_from_tags, leading_clipping, leading_soft_clipping, mate_unclipped_end,
90 mate_unclipped_start, num_bases_extending_past_mate, parse_cigar_string, read_pos_at_ref_pos,
91 reference_length, trailing_clipping, trailing_soft_clipping, unclipped_end,
92 unclipped_five_prime_position, unclipped_start,
93};
94
95use bstr::ByteSlice;
96use log::warn;
97use noodles::sam::Header;
98
99use noodles::sam::alignment::record_buf::data::field::Value as BufValue;
100use noodles::sam::header::record::value::map::header::sort_order::{QUERY_NAME, UNSORTED};
101use std::path::Path;
102
103#[must_use]
128pub fn is_sorted(header: &Header, sort_order: &[u8]) -> bool {
129 if let Some(hdr_map) = header.header() {
130 hdr_map
131 .other_fields()
132 .get(b"SO")
133 .is_some_and(|so| <_ as AsRef<[u8]>>::as_ref(so) == sort_order)
134 } else {
135 false
136 }
137}
138
139#[must_use]
168pub fn is_template_coordinate_sorted(header: &Header) -> bool {
169 if let Some(hdr_map) = header.header() {
170 let other_fields = hdr_map.other_fields();
171
172 let is_unsorted =
174 other_fields.get(b"SO").is_some_and(|so| <_ as AsRef<[u8]>>::as_ref(so) == UNSORTED);
175
176 let is_query_grouped =
178 other_fields.get(b"GO").is_some_and(|go| <_ as AsRef<[u8]>>::as_ref(go) == b"query");
179
180 let ss_matches = other_fields.get(b"SS").is_none_or(|ss| {
184 let ss_bytes = <_ as AsRef<[u8]>>::as_ref(ss);
185 if let Some(colon_pos) = ss_bytes.iter().position(|&b| b == b':') {
187 &ss_bytes[colon_pos + 1..] == b"template-coordinate"
188 } else {
189 ss_bytes == b"template-coordinate"
190 }
191 }); is_unsorted && is_query_grouped && ss_matches
194 } else {
195 false
196 }
197}
198
199pub fn check_sort(header: &Header, path: &Path, name: &str) {
222 if !is_sorted(header, QUERY_NAME) {
223 warn!(
224 "{name} file {} does not appear to be queryname sorted per the SAM header.",
225 path.display()
226 );
227 warn!("Continuing, but your output may be incorrect.");
228 }
229}
230
231#[must_use]
259pub fn reverse_buf_value(value: &BufValue) -> BufValue {
260 use noodles::sam::alignment::record_buf::data::field::value::Array;
261
262 macro_rules! clone_reverse {
264 ($variant:ident, $vec:expr) => {{
265 let mut values = $vec.clone();
266 values.reverse();
267 Array::$variant(values)
268 }};
269 }
270
271 match value {
272 BufValue::Array(arr) => {
273 let new_arr = match arr {
274 Array::Int8(v) => clone_reverse!(Int8, v),
275 Array::UInt8(v) => clone_reverse!(UInt8, v),
276 Array::Int16(v) => clone_reverse!(Int16, v),
277 Array::UInt16(v) => clone_reverse!(UInt16, v),
278 Array::Int32(v) => clone_reverse!(Int32, v),
279 Array::UInt32(v) => clone_reverse!(UInt32, v),
280 Array::Float(v) => clone_reverse!(Float, v),
281 };
282 BufValue::Array(new_arr)
283 }
284 BufValue::String(s) => {
285 let mut bytes = s.as_bytes().to_vec();
286 bytes.reverse();
287 BufValue::from(String::from_utf8_lossy(&bytes).to_string())
288 }
289 _ => value.clone(),
290 }
291}
292
293#[must_use]
322pub fn revcomp_buf_value(value: &BufValue) -> BufValue {
323 match value {
324 BufValue::String(s) => {
325 let revcomp = fgumi_dna::reverse_complement(s.as_bytes());
326 BufValue::from(String::from_utf8_lossy(&revcomp).into_owned())
327 }
328 _ => value.clone(),
329 }
330}
331
332#[must_use]
361pub fn to_smallest_signed_int(value: i32) -> BufValue {
362 if let Ok(v) = i8::try_from(value) {
363 BufValue::Int8(v)
364 } else if let Ok(v) = i16::try_from(value) {
365 BufValue::Int16(v)
366 } else {
367 BufValue::Int32(value)
368 }
369}
370
371#[must_use]
385pub fn buf_value_to_smallest_signed_int(value: &BufValue) -> Option<BufValue> {
386 let int_value = match value {
387 BufValue::Int8(i) => i32::from(*i),
388 BufValue::Int16(i) => i32::from(*i),
389 BufValue::Int32(i) => *i,
390 BufValue::UInt8(i) => i32::from(*i),
391 BufValue::UInt16(i) => i32::from(*i),
392 BufValue::UInt32(i) => i32::try_from(*i).ok()?,
393 _ => return None,
394 };
395 Some(to_smallest_signed_int(int_value))
396}
397
398#[cfg(test)]
399mod tests {
400 use super::*;
401 use noodles::sam::alignment::record_buf::data::field::value::Array;
402
403 #[test]
404 fn test_reverse_buf_value_int8_array() {
405 let value = BufValue::Array(Array::Int8(vec![1, 2, 3, 4, 5]));
406 let reversed = reverse_buf_value(&value);
407
408 if let BufValue::Array(Array::Int8(vals)) = reversed {
409 assert_eq!(vals, vec![5, 4, 3, 2, 1]);
410 } else {
411 panic!("Expected Int8 array");
412 }
413 }
414
415 #[test]
416 fn test_reverse_buf_value_uint8_array() {
417 let value = BufValue::Array(Array::UInt8(vec![10, 20, 30]));
418 let reversed = reverse_buf_value(&value);
419
420 if let BufValue::Array(Array::UInt8(vals)) = reversed {
421 assert_eq!(vals, vec![30, 20, 10]);
422 } else {
423 panic!("Expected UInt8 array");
424 }
425 }
426
427 #[test]
428 fn test_reverse_buf_value_int16_array() {
429 let value = BufValue::Array(Array::Int16(vec![100, 200, 300]));
430 let reversed = reverse_buf_value(&value);
431
432 if let BufValue::Array(Array::Int16(vals)) = reversed {
433 assert_eq!(vals, vec![300, 200, 100]);
434 } else {
435 panic!("Expected Int16 array");
436 }
437 }
438
439 #[test]
440 fn test_reverse_buf_value_uint16_array() {
441 let value = BufValue::Array(Array::UInt16(vec![1000, 2000]));
442 let reversed = reverse_buf_value(&value);
443
444 if let BufValue::Array(Array::UInt16(vals)) = reversed {
445 assert_eq!(vals, vec![2000, 1000]);
446 } else {
447 panic!("Expected UInt16 array");
448 }
449 }
450
451 #[test]
452 fn test_reverse_buf_value_int32_array() {
453 let value = BufValue::Array(Array::Int32(vec![10000, 20000, 30000]));
454 let reversed = reverse_buf_value(&value);
455
456 if let BufValue::Array(Array::Int32(vals)) = reversed {
457 assert_eq!(vals, vec![30000, 20000, 10000]);
458 } else {
459 panic!("Expected Int32 array");
460 }
461 }
462
463 #[test]
464 fn test_reverse_buf_value_uint32_array() {
465 let value = BufValue::Array(Array::UInt32(vec![100_000, 200_000]));
466 let reversed = reverse_buf_value(&value);
467
468 if let BufValue::Array(Array::UInt32(vals)) = reversed {
469 assert_eq!(vals, vec![200_000, 100_000]);
470 } else {
471 panic!("Expected UInt32 array");
472 }
473 }
474
475 #[test]
476 fn test_reverse_buf_value_float_array() {
477 let value = BufValue::Array(Array::Float(vec![1.1, 2.2, 3.3]));
478 let reversed = reverse_buf_value(&value);
479
480 if let BufValue::Array(Array::Float(vals)) = reversed {
481 assert!((vals[0] - 3.3).abs() < 0.001);
482 assert!((vals[1] - 2.2).abs() < 0.001);
483 assert!((vals[2] - 1.1).abs() < 0.001);
484 } else {
485 panic!("Expected Float array");
486 }
487 }
488
489 #[test]
490 fn test_reverse_buf_value_string() {
491 let value = BufValue::from("abcde".to_string());
492 let reversed = reverse_buf_value(&value);
493
494 if let BufValue::String(s) = reversed {
495 assert_eq!(s.to_string(), "edcba");
496 } else {
497 panic!("Expected String");
498 }
499 }
500
501 #[test]
502 fn test_revcomp_buf_value_simple() {
503 let value = BufValue::from("ACGT".to_string());
504 let revcomp = revcomp_buf_value(&value);
505
506 if let BufValue::String(s) = revcomp {
507 assert_eq!(s.to_string(), "ACGT"); } else {
509 panic!("Expected String");
510 }
511 }
512
513 #[test]
514 fn test_revcomp_buf_value_lowercase() {
515 let value = BufValue::from("acgt".to_string());
516 let revcomp = revcomp_buf_value(&value);
517
518 if let BufValue::String(s) = revcomp {
520 assert_eq!(s.to_string(), "ACGT");
521 } else {
522 panic!("Expected String");
523 }
524 }
525
526 #[test]
527 fn test_revcomp_buf_value_mixed_case() {
528 let value = BufValue::from("AcGt".to_string());
529 let revcomp = revcomp_buf_value(&value);
530
531 if let BufValue::String(s) = revcomp {
533 assert_eq!(s.to_string(), "ACGT");
534 } else {
535 panic!("Expected String");
536 }
537 }
538
539 #[test]
540 fn test_revcomp_buf_value_with_n() {
541 let value = BufValue::from("ACGTN".to_string());
542 let revcomp = revcomp_buf_value(&value);
543
544 if let BufValue::String(s) = revcomp {
545 assert_eq!(s.to_string(), "NACGT");
546 } else {
547 panic!("Expected String");
548 }
549 }
550
551 #[test]
552 fn test_revcomp_buf_value_complex() {
553 let value = BufValue::from("AAAGG".to_string());
554 let revcomp = revcomp_buf_value(&value);
555
556 if let BufValue::String(s) = revcomp {
557 assert_eq!(s.to_string(), "CCTTT");
558 } else {
559 panic!("Expected String");
560 }
561 }
562
563 #[test]
564 fn test_revcomp_buf_value_array() {
565 let value = BufValue::Array(Array::UInt8(vec![1, 2, 3]));
566 let revcomp = revcomp_buf_value(&value);
567
568 if let BufValue::Array(Array::UInt8(vals)) = revcomp {
570 assert_eq!(vals, vec![1, 2, 3]);
571 } else {
572 panic!("Expected UInt8 array");
573 }
574 }
575
576 #[test]
577 fn test_reverse_and_revcomp_combined() {
578 let original = BufValue::from("ACGT".to_string());
580
581 let reversed = reverse_buf_value(&original);
583 if let BufValue::String(s) = reversed {
584 assert_eq!(s.to_string(), "TGCA");
585 }
586
587 let revcomped = revcomp_buf_value(&original);
589 if let BufValue::String(s) = revcomped {
590 assert_eq!(s.to_string(), "ACGT");
591 }
592 }
593
594 #[test]
595 fn test_empty_string_operations() {
596 let value = BufValue::from(String::new());
597
598 let reversed = reverse_buf_value(&value);
599 if let BufValue::String(s) = reversed {
600 assert_eq!(s.to_string(), "");
601 }
602
603 let revcomped = revcomp_buf_value(&value);
604 if let BufValue::String(s) = revcomped {
605 assert_eq!(s.to_string(), "");
606 }
607 }
608
609 #[test]
610 fn test_empty_array_operations() {
611 let value = BufValue::Array(Array::UInt8(vec![]));
612 let reversed = reverse_buf_value(&value);
613
614 if let BufValue::Array(Array::UInt8(vals)) = reversed {
615 assert!(vals.is_empty());
616 } else {
617 panic!("Expected empty UInt8 array");
618 }
619 }
620
621 fn create_header_with_so(sort_order: &str) -> Header {
626 let header_str = format!("@HD\tVN:1.6\tSO:{sort_order}\n");
627 header_str.parse().unwrap()
628 }
629
630 fn create_header_without_so() -> Header {
631 let header_str = "@HD\tVN:1.6\n";
632 header_str.parse().unwrap()
633 }
634
635 fn create_empty_header() -> Header {
636 Header::default()
637 }
638
639 #[test]
640 fn test_is_sorted_queryname_matches() {
641 use noodles::sam::header::record::value::map::header::sort_order::QUERY_NAME;
642 let header = create_header_with_so("queryname");
643 assert!(is_sorted(&header, QUERY_NAME));
644 }
645
646 #[test]
647 fn test_is_sorted_coordinate_matches() {
648 use noodles::sam::header::record::value::map::header::sort_order::COORDINATE;
649 let header = create_header_with_so("coordinate");
650 assert!(is_sorted(&header, COORDINATE));
651 }
652
653 #[test]
654 fn test_is_sorted_unsorted_matches() {
655 let header = create_header_with_so("unsorted");
656 assert!(is_sorted(&header, UNSORTED));
657 }
658
659 #[test]
660 fn test_is_sorted_mismatch() {
661 use noodles::sam::header::record::value::map::header::sort_order::COORDINATE;
662 let header = create_header_with_so("queryname");
663 assert!(!is_sorted(&header, COORDINATE));
664 }
665
666 #[test]
667 fn test_is_sorted_no_so_tag() {
668 use noodles::sam::header::record::value::map::header::sort_order::COORDINATE;
669 let header = create_header_without_so();
670 assert!(!is_sorted(&header, COORDINATE));
671 }
672
673 #[test]
674 fn test_is_sorted_empty_header() {
675 use noodles::sam::header::record::value::map::header::sort_order::COORDINATE;
676 let header = create_empty_header();
677 assert!(!is_sorted(&header, COORDINATE));
678 }
679
680 fn create_template_coord_header(so: &str, go: Option<&str>, ss: Option<&str>) -> Header {
685 use std::fmt::Write;
686 let mut header_str = format!("@HD\tVN:1.6\tSO:{so}");
687 if let Some(go_val) = go {
688 write!(header_str, "\tGO:{go_val}").unwrap();
689 }
690 if let Some(ss_val) = ss {
691 write!(header_str, "\tSS:{ss_val}").unwrap();
692 }
693 header_str.push('\n');
694 header_str.parse().unwrap()
695 }
696
697 #[test]
698 fn test_is_template_coordinate_sorted_valid_minimal() {
699 let header = create_template_coord_header("unsorted", Some("query"), None);
701 assert!(is_template_coordinate_sorted(&header));
702 }
703
704 #[test]
705 fn test_is_template_coordinate_sorted_valid_with_ss() {
706 let header =
708 create_template_coord_header("unsorted", Some("query"), Some("template-coordinate"));
709 assert!(is_template_coordinate_sorted(&header));
710 }
711
712 #[test]
713 fn test_is_template_coordinate_sorted_valid_with_prefixed_ss() {
714 let header = create_template_coord_header(
716 "unsorted",
717 Some("query"),
718 Some("unsorted:template-coordinate"),
719 );
720 assert!(is_template_coordinate_sorted(&header));
721 }
722
723 #[test]
724 fn test_is_template_coordinate_sorted_invalid_so_coordinate() {
725 let header = create_template_coord_header("coordinate", Some("query"), None);
727 assert!(!is_template_coordinate_sorted(&header));
728 }
729
730 #[test]
731 fn test_is_template_coordinate_sorted_invalid_so_queryname() {
732 let header = create_template_coord_header("queryname", Some("query"), None);
734 assert!(!is_template_coordinate_sorted(&header));
735 }
736
737 #[test]
738 fn test_is_template_coordinate_sorted_invalid_go_none() {
739 let header = create_template_coord_header("unsorted", Some("none"), None);
741 assert!(!is_template_coordinate_sorted(&header));
742 }
743
744 #[test]
745 fn test_is_template_coordinate_sorted_invalid_go_reference() {
746 let header = create_template_coord_header("unsorted", Some("reference"), None);
748 assert!(!is_template_coordinate_sorted(&header));
749 }
750
751 #[test]
752 fn test_is_template_coordinate_sorted_missing_go() {
753 let header = create_template_coord_header("unsorted", None, None);
755 assert!(!is_template_coordinate_sorted(&header));
756 }
757
758 #[test]
759 fn test_is_template_coordinate_sorted_invalid_ss() {
760 let header = create_template_coord_header("unsorted", Some("query"), Some("coordinate"));
762 assert!(!is_template_coordinate_sorted(&header));
763 }
764
765 #[test]
766 fn test_is_template_coordinate_sorted_empty_header() {
767 let header = create_empty_header();
768 assert!(!is_template_coordinate_sorted(&header));
769 }
770
771 #[test]
776 fn test_reverse_buf_value_integer_unchanged() {
777 let value = BufValue::from(42_i32);
778 let reversed = reverse_buf_value(&value);
779 assert_eq!(reversed, BufValue::from(42_i32));
780 }
781
782 #[test]
783 fn test_reverse_buf_value_character_unchanged() {
784 let value = BufValue::Character(b'X');
785 let reversed = reverse_buf_value(&value);
786 assert_eq!(reversed, BufValue::Character(b'X'));
787 }
788
789 #[test]
794 fn test_revcomp_buf_value_ambiguous_bases_unchanged() {
795 let value = BufValue::from("RYSWKM".to_string());
797 let revcomp = revcomp_buf_value(&value);
798 if let BufValue::String(s) = revcomp {
799 assert_eq!(s.to_string(), "MKWSYR");
801 } else {
802 panic!("Expected String");
803 }
804 }
805
806 #[test]
807 fn test_revcomp_buf_value_integer_unchanged() {
808 let value = BufValue::from(42_i32);
809 let revcomp = revcomp_buf_value(&value);
810 assert_eq!(revcomp, BufValue::from(42_i32));
811 }
812
813 #[test]
818 fn test_to_smallest_signed_int_fits_in_i8() {
819 assert_eq!(to_smallest_signed_int(0), BufValue::Int8(0));
821 assert_eq!(to_smallest_signed_int(60), BufValue::Int8(60));
822 assert_eq!(to_smallest_signed_int(127), BufValue::Int8(127));
823 assert_eq!(to_smallest_signed_int(-128), BufValue::Int8(-128));
824 assert_eq!(to_smallest_signed_int(-1), BufValue::Int8(-1));
825 }
826
827 #[test]
828 fn test_to_smallest_signed_int_fits_in_i16() {
829 assert_eq!(to_smallest_signed_int(128), BufValue::Int16(128));
831 assert_eq!(to_smallest_signed_int(1000), BufValue::Int16(1000));
832 assert_eq!(to_smallest_signed_int(32767), BufValue::Int16(32767));
833 assert_eq!(to_smallest_signed_int(-129), BufValue::Int16(-129));
834 assert_eq!(to_smallest_signed_int(-32768), BufValue::Int16(-32768));
835 }
836
837 #[test]
838 fn test_to_smallest_signed_int_requires_i32() {
839 assert_eq!(to_smallest_signed_int(32768), BufValue::Int32(32768));
841 assert_eq!(to_smallest_signed_int(100_000), BufValue::Int32(100_000));
842 assert_eq!(to_smallest_signed_int(-32769), BufValue::Int32(-32769));
843 }
844
845 #[test]
850 fn test_buf_value_to_smallest_signed_int_from_uint8() {
851 let value = BufValue::UInt8(60);
853 assert_eq!(buf_value_to_smallest_signed_int(&value), Some(BufValue::Int8(60)));
854
855 let value = BufValue::UInt8(200);
856 assert_eq!(buf_value_to_smallest_signed_int(&value), Some(BufValue::Int16(200)));
857 }
858
859 #[test]
860 fn test_buf_value_to_smallest_signed_int_from_int8() {
861 let value = BufValue::Int8(60);
863 assert_eq!(buf_value_to_smallest_signed_int(&value), Some(BufValue::Int8(60)));
864
865 let value = BufValue::Int8(-50);
866 assert_eq!(buf_value_to_smallest_signed_int(&value), Some(BufValue::Int8(-50)));
867 }
868
869 #[test]
870 fn test_buf_value_to_smallest_signed_int_from_int16() {
871 let value = BufValue::Int16(60);
873 assert_eq!(buf_value_to_smallest_signed_int(&value), Some(BufValue::Int8(60)));
874
875 let value = BufValue::Int16(1000);
876 assert_eq!(buf_value_to_smallest_signed_int(&value), Some(BufValue::Int16(1000)));
877 }
878
879 #[test]
880 fn test_buf_value_to_smallest_signed_int_from_int32() {
881 let value = BufValue::Int32(60);
883 assert_eq!(buf_value_to_smallest_signed_int(&value), Some(BufValue::Int8(60)));
884
885 let value = BufValue::Int32(1000);
886 assert_eq!(buf_value_to_smallest_signed_int(&value), Some(BufValue::Int16(1000)));
887
888 let value = BufValue::Int32(100_000);
889 assert_eq!(buf_value_to_smallest_signed_int(&value), Some(BufValue::Int32(100_000)));
890 }
891
892 #[test]
893 fn test_buf_value_to_smallest_signed_int_non_integer_returns_none() {
894 let value = BufValue::from("hello".to_string());
896 assert_eq!(buf_value_to_smallest_signed_int(&value), None);
897
898 let value = BufValue::Character(b'X');
899 assert_eq!(buf_value_to_smallest_signed_int(&value), None);
900 }
901}