Skip to main content

fgumi_sam/
lib.rs

1#![deny(unsafe_code)]
2
3//! SAM/BAM file utilities and header manipulation.
4//!
5//! This module provides utilities for working with SAM/BAM files, including:
6//! - Checking and validating SAM header sort orders
7//! - Reversing and reverse-complementing per-base tag values
8//! - Template-coordinate sorting validation
9//! - Test utilities for building SAM/BAM records
10//! - Record-level utilities (position mapping, FR pair detection, CIGAR parsing)
11//! - Read-pair clipping utilities
12//!
13//! # Sort Orders
14//!
15//! The module supports several important sort orders:
16//! - **queryname** - Reads sorted by query name (required for grouping by UMI)
17//! - **template-coordinate** - Special sort order from fgbio where reads are grouped
18//!   by query name but ordered by genomic position within each template
19//!
20//! # Tag Manipulation
21//!
22//! Functions are provided to reverse or reverse-complement per-base tag values when
23//! reads are mapped to the negative strand, ensuring tag values match the orientation
24//! of the read sequence.
25//!
26//! # Record Utilities
27//!
28//! The [`record_utils`] submodule provides utilities for working with individual records:
29//! - [`record_utils::read_pos_at_ref_pos`] - Map reference position to read position
30//! - [`record_utils::is_fr_pair_from_tags`] - Check if read is part of FR pair using tags
31//! - [`record_utils::mate_unclipped_start`] / [`record_utils::mate_unclipped_end`] - Get mate boundaries from MC tag
32//! - [`record_utils::num_bases_extending_past_mate`] - Calculate overlap with mate
33//! - [`record_utils::parse_cigar_string`] - Parse CIGAR string to operations
34
35pub mod alignment_tags;
36pub mod builder;
37pub mod clipper;
38pub mod record_utils;
39
40/// Abstraction over reference genome access for alignment tag calculation.
41///
42/// This trait allows alignment tag functions to work with any reference
43/// provider without pulling in heavy dependencies like FASTA file I/O.
44///
45/// A blanket implementation is provided for all types that dereference to a
46/// `ReferenceProvider` (e.g., `Arc<T>`, `Box<T>`, `&T`), so callers can pass
47/// smart pointers directly.
48pub trait ReferenceProvider {
49    /// Fetches a subsequence from the reference genome.
50    ///
51    /// # Arguments
52    /// * `chrom` - Chromosome/sequence name (e.g., "chr1")
53    /// * `start` - Start position (1-based, inclusive)
54    /// * `end` - End position (1-based, inclusive)
55    ///
56    /// # Errors
57    /// Returns an error if the chromosome is not found or the region is out of bounds.
58    fn fetch(
59        &self,
60        chrom: &str,
61        start: noodles::core::Position,
62        end: noodles::core::Position,
63    ) -> anyhow::Result<Vec<u8>>;
64}
65
66impl<T: std::ops::Deref> ReferenceProvider for T
67where
68    T::Target: ReferenceProvider,
69{
70    fn fetch(
71        &self,
72        chrom: &str,
73        start: noodles::core::Position,
74        end: noodles::core::Position,
75    ) -> anyhow::Result<Vec<u8>> {
76        self.deref().fetch(chrom, start, end)
77    }
78}
79
80// Re-export commonly used items from submodules for convenience
81pub use builder::{
82    ConsensusTagsBuilder, FragBuilder, MAPPED_PG_ID, PairBuilder, REFERENCE_LENGTH, RecordBuilder,
83    SamBuilder, Strand, create_default_test_fasta, create_ref_dict, create_test_fasta,
84    degrading_qualities, parse_cigar, repeat_n, uniform_qualities,
85};
86pub use clipper::{ClippingMode, SamRecordClipper};
87pub use record_utils::{
88    PairOrientation, alignment_end, cigar_reference_length, get_pair_orientation, is_fr_pair,
89    is_fr_pair_from_tags, leading_clipping, leading_soft_clipping, mate_unclipped_end,
90    mate_unclipped_start, num_bases_extending_past_mate, parse_cigar_string, read_pos_at_ref_pos,
91    reference_length, trailing_clipping, trailing_soft_clipping, unclipped_end,
92    unclipped_five_prime_position, unclipped_start,
93};
94
95use bstr::ByteSlice;
96use log::warn;
97use noodles::sam::Header;
98
99use noodles::sam::alignment::record_buf::data::field::Value as BufValue;
100use noodles::sam::header::record::value::map::header::sort_order::{QUERY_NAME, UNSORTED};
101use std::path::Path;
102
103/// Checks if a BAM file has a specified sort order according to its header.
104///
105/// Examines the SAM header for the SO (sort order) tag and compares it to the
106/// specified sort order. This is useful for validating that input files are
107/// properly sorted before processing.
108///
109/// # Arguments
110///
111/// * `header` - SAM header to check
112/// * `sort_order` - The expected sort order (e.g., `QUERY_NAME`, `COORDINATE`)
113///
114/// # Returns
115///
116/// `true` if the header's SO tag matches the specified sort order, `false` otherwise
117///
118/// # Examples
119///
120/// ```rust,ignore
121/// use noodles::sam::header::record::value::map::header::sort_order::QUERY_NAME;
122///
123/// if is_sorted(&header, QUERY_NAME) {
124///     // Process queryname-sorted data
125/// }
126/// ```
127#[must_use]
128pub fn is_sorted(header: &Header, sort_order: &[u8]) -> bool {
129    if let Some(hdr_map) = header.header() {
130        hdr_map
131            .other_fields()
132            .get(b"SO")
133            .is_some_and(|so| <_ as AsRef<[u8]>>::as_ref(so) == sort_order)
134    } else {
135        false
136    }
137}
138
139/// Checks if a BAM file is template-coordinate sorted according to its header.
140///
141/// Template-coordinate sorting is a special sort order used by fgbio where reads are:
142/// 1. Grouped by query name (all reads with same name together)
143/// 2. Ordered by the genomic position of the template's lower coordinate
144///
145/// This sort order is indicated in the SAM header by:
146/// - SO:unsorted (not coordinate sorted)
147/// - GO:query (grouped by query name)
148/// - SS:template-coordinate (optional, but must match if present)
149///
150/// This matches the behavior of fgbio's `GroupReadsByUmi` output format.
151///
152/// # Arguments
153///
154/// * `header` - SAM header to check
155///
156/// # Returns
157///
158/// `true` if the header indicates template-coordinate sorting, `false` otherwise
159///
160/// # Examples
161///
162/// ```rust,ignore
163/// if is_template_coordinate_sorted(&header) {
164///     // Process template-coordinate sorted data efficiently
165/// }
166/// ```
167#[must_use]
168pub fn is_template_coordinate_sorted(header: &Header) -> bool {
169    if let Some(hdr_map) = header.header() {
170        let other_fields = hdr_map.other_fields();
171
172        // Check SO tag - must be "unsorted"
173        let is_unsorted =
174            other_fields.get(b"SO").is_some_and(|so| <_ as AsRef<[u8]>>::as_ref(so) == UNSORTED);
175
176        // Check GO tag - must be "query"
177        let is_query_grouped =
178            other_fields.get(b"GO").is_some_and(|go| <_ as AsRef<[u8]>>::as_ref(go) == b"query");
179
180        // Check SS tag - if present, must be "template-coordinate" (or "SO:template-coordinate"), but it's optional
181        // The SS tag may be prefixed with the sort order (e.g., "unsorted:template-coordinate")
182        // per the SAM spec, so we need to extract the part after the colon
183        let ss_matches = other_fields.get(b"SS").is_none_or(|ss| {
184            let ss_bytes = <_ as AsRef<[u8]>>::as_ref(ss);
185            // Find the last colon and take everything after it, or use the whole value if no colon
186            if let Some(colon_pos) = ss_bytes.iter().position(|&b| b == b':') {
187                &ss_bytes[colon_pos + 1..] == b"template-coordinate"
188            } else {
189                ss_bytes == b"template-coordinate"
190            }
191        }); // If SS is missing, that's acceptable
192
193        is_unsorted && is_query_grouped && ss_matches
194    } else {
195        false
196    }
197}
198
199/// Checks if a BAM file is queryname sorted and logs a warning if not.
200///
201/// This function validates that the input file has the correct sort order for
202/// processing (typically queryname). If the sort order is incorrect, it logs
203/// a warning but does not fail, allowing processing to continue with potentially
204/// incorrect results.
205///
206/// Use this for non-critical validation where you want to warn users but not
207/// prevent them from proceeding.
208///
209/// # Arguments
210///
211/// * `header` - SAM header to check
212/// * `path` - Path to the BAM file (used in warning messages)
213/// * `name` - Descriptive name of the file (e.g., "unmapped", "input")
214///
215/// # Examples
216///
217/// ```rust,ignore
218/// check_sort(&header, input_path, "input");
219/// // Will log: "input file 'foo.bam' does not appear to be queryname sorted..."
220/// ```
221pub fn check_sort(header: &Header, path: &Path, name: &str) {
222    if !is_sorted(header, QUERY_NAME) {
223        warn!(
224            "{name} file {} does not appear to be queryname sorted per the SAM header.",
225            path.display()
226        );
227        warn!("Continuing, but your output may be incorrect.");
228    }
229}
230
231/// Reverses a `BufValue` (array or string).
232///
233/// This function reverses per-base tag values to match read orientation when reads
234/// are mapped to the negative strand. Arrays and strings are reversed element-wise.
235///
236/// Supported types:
237/// - Arrays of any numeric type (i8, u8, i16, u16, i32, u32, f32)
238/// - Strings (character order reversed)
239///
240/// Other value types (integers, floats, characters) are returned unchanged.
241///
242/// # Arguments
243///
244/// * `value` - The value to reverse
245///
246/// # Returns
247///
248/// A new `BufValue` with reversed contents, or a clone if not reversible
249///
250/// # Examples
251///
252/// ```rust,ignore
253/// // Reverse per-base quality scores for negative strand read
254/// let quals = BufValue::Array(UInt8(vec![30, 25, 20, 15]));
255/// let reversed = reverse_buf_value(&quals);
256/// // Result: Array(UInt8(vec![15, 20, 25, 30]))
257/// ```
258#[must_use]
259pub fn reverse_buf_value(value: &BufValue) -> BufValue {
260    use noodles::sam::alignment::record_buf::data::field::value::Array;
261
262    /// Clones a vec and reverses it, wrapping back in the same Array variant.
263    macro_rules! clone_reverse {
264        ($variant:ident, $vec:expr) => {{
265            let mut values = $vec.clone();
266            values.reverse();
267            Array::$variant(values)
268        }};
269    }
270
271    match value {
272        BufValue::Array(arr) => {
273            let new_arr = match arr {
274                Array::Int8(v) => clone_reverse!(Int8, v),
275                Array::UInt8(v) => clone_reverse!(UInt8, v),
276                Array::Int16(v) => clone_reverse!(Int16, v),
277                Array::UInt16(v) => clone_reverse!(UInt16, v),
278                Array::Int32(v) => clone_reverse!(Int32, v),
279                Array::UInt32(v) => clone_reverse!(UInt32, v),
280                Array::Float(v) => clone_reverse!(Float, v),
281            };
282            BufValue::Array(new_arr)
283        }
284        BufValue::String(s) => {
285            let mut bytes = s.as_bytes().to_vec();
286            bytes.reverse();
287            BufValue::from(String::from_utf8_lossy(&bytes).to_string())
288        }
289        _ => value.clone(),
290    }
291}
292
293/// Reverse complements a DNA sequence `BufValue`.
294///
295/// This function performs a full reverse complement operation on DNA sequences stored
296/// in per-base tags. It reverses the order of bases and complements each base:
297/// - A <-> T
298/// - C <-> G
299/// - Normalizes to uppercase (a -> T, t -> A, c -> G, g -> C)
300/// - Other characters are left unchanged
301///
302/// This is essential for per-base sequence tags (like consensus bases) when reads
303/// are mapped to the negative strand, ensuring the tag sequences match the read orientation.
304///
305/// # Arguments
306///
307/// * `value` - The value to reverse complement (must be a String)
308///
309/// # Returns
310///
311/// A new `BufValue` with reverse complemented sequence, or a clone if not a string
312///
313/// # Examples
314///
315/// ```rust,ignore
316/// // Reverse complement per-base consensus sequences for negative strand
317/// let bases = BufValue::String("ACGT".to_string());
318/// let revcomp = revcomp_buf_value(&bases);
319/// // Result: String("ACGT") [reverse of TGCA]
320/// ```
321#[must_use]
322pub fn revcomp_buf_value(value: &BufValue) -> BufValue {
323    match value {
324        BufValue::String(s) => {
325            let revcomp = fgumi_dna::reverse_complement(s.as_bytes());
326            BufValue::from(String::from_utf8_lossy(&revcomp).into_owned())
327        }
328        _ => value.clone(),
329    }
330}
331
332/// Converts an integer value to the smallest signed integer `BufValue` that fits.
333///
334/// This is used to ensure integer tags are written with the same type encoding as fgbio.
335/// fgbio uses signed integer types for tags like MQ, AS, XS, and ms, while noodles
336/// may choose unsigned types when writing positive values.
337///
338/// The function chooses the smallest signed type that fits:
339/// - Int8 for values in [-128, 127]
340/// - Int16 for values in [-32768, 32767]
341/// - Int32 for larger values
342///
343/// # Arguments
344///
345/// * `value` - The integer value to convert
346///
347/// # Returns
348///
349/// A `BufValue` using the smallest signed type that fits the value
350///
351/// # Examples
352///
353/// ```rust,ignore
354/// let mq_value = to_smallest_signed_int(60);
355/// // Result: BufValue::Int8(60)
356///
357/// let as_value = to_smallest_signed_int(1000);
358/// // Result: BufValue::Int16(1000)
359/// ```
360#[must_use]
361pub fn to_smallest_signed_int(value: i32) -> BufValue {
362    if let Ok(v) = i8::try_from(value) {
363        BufValue::Int8(v)
364    } else if let Ok(v) = i16::try_from(value) {
365        BufValue::Int16(v)
366    } else {
367        BufValue::Int32(value)
368    }
369}
370
371/// Converts an integer `BufValue` from any integer type to the smallest signed type.
372///
373/// This is used to normalize integer tags that may have been read with varying types
374/// (`Int32`, `UInt8`, etc.) to signed integer format that matches fgbio's encoding.
375///
376/// # Arguments
377///
378/// * `value` - The `BufValue` to convert
379///
380/// # Returns
381///
382/// `Some(BufValue)` with the smallest signed type if the value is an integer type,
383/// `None` if not an integer type
384#[must_use]
385pub fn buf_value_to_smallest_signed_int(value: &BufValue) -> Option<BufValue> {
386    let int_value = match value {
387        BufValue::Int8(i) => i32::from(*i),
388        BufValue::Int16(i) => i32::from(*i),
389        BufValue::Int32(i) => *i,
390        BufValue::UInt8(i) => i32::from(*i),
391        BufValue::UInt16(i) => i32::from(*i),
392        BufValue::UInt32(i) => i32::try_from(*i).ok()?,
393        _ => return None,
394    };
395    Some(to_smallest_signed_int(int_value))
396}
397
398#[cfg(test)]
399mod tests {
400    use super::*;
401    use noodles::sam::alignment::record_buf::data::field::value::Array;
402
403    #[test]
404    fn test_reverse_buf_value_int8_array() {
405        let value = BufValue::Array(Array::Int8(vec![1, 2, 3, 4, 5]));
406        let reversed = reverse_buf_value(&value);
407
408        if let BufValue::Array(Array::Int8(vals)) = reversed {
409            assert_eq!(vals, vec![5, 4, 3, 2, 1]);
410        } else {
411            panic!("Expected Int8 array");
412        }
413    }
414
415    #[test]
416    fn test_reverse_buf_value_uint8_array() {
417        let value = BufValue::Array(Array::UInt8(vec![10, 20, 30]));
418        let reversed = reverse_buf_value(&value);
419
420        if let BufValue::Array(Array::UInt8(vals)) = reversed {
421            assert_eq!(vals, vec![30, 20, 10]);
422        } else {
423            panic!("Expected UInt8 array");
424        }
425    }
426
427    #[test]
428    fn test_reverse_buf_value_int16_array() {
429        let value = BufValue::Array(Array::Int16(vec![100, 200, 300]));
430        let reversed = reverse_buf_value(&value);
431
432        if let BufValue::Array(Array::Int16(vals)) = reversed {
433            assert_eq!(vals, vec![300, 200, 100]);
434        } else {
435            panic!("Expected Int16 array");
436        }
437    }
438
439    #[test]
440    fn test_reverse_buf_value_uint16_array() {
441        let value = BufValue::Array(Array::UInt16(vec![1000, 2000]));
442        let reversed = reverse_buf_value(&value);
443
444        if let BufValue::Array(Array::UInt16(vals)) = reversed {
445            assert_eq!(vals, vec![2000, 1000]);
446        } else {
447            panic!("Expected UInt16 array");
448        }
449    }
450
451    #[test]
452    fn test_reverse_buf_value_int32_array() {
453        let value = BufValue::Array(Array::Int32(vec![10000, 20000, 30000]));
454        let reversed = reverse_buf_value(&value);
455
456        if let BufValue::Array(Array::Int32(vals)) = reversed {
457            assert_eq!(vals, vec![30000, 20000, 10000]);
458        } else {
459            panic!("Expected Int32 array");
460        }
461    }
462
463    #[test]
464    fn test_reverse_buf_value_uint32_array() {
465        let value = BufValue::Array(Array::UInt32(vec![100_000, 200_000]));
466        let reversed = reverse_buf_value(&value);
467
468        if let BufValue::Array(Array::UInt32(vals)) = reversed {
469            assert_eq!(vals, vec![200_000, 100_000]);
470        } else {
471            panic!("Expected UInt32 array");
472        }
473    }
474
475    #[test]
476    fn test_reverse_buf_value_float_array() {
477        let value = BufValue::Array(Array::Float(vec![1.1, 2.2, 3.3]));
478        let reversed = reverse_buf_value(&value);
479
480        if let BufValue::Array(Array::Float(vals)) = reversed {
481            assert!((vals[0] - 3.3).abs() < 0.001);
482            assert!((vals[1] - 2.2).abs() < 0.001);
483            assert!((vals[2] - 1.1).abs() < 0.001);
484        } else {
485            panic!("Expected Float array");
486        }
487    }
488
489    #[test]
490    fn test_reverse_buf_value_string() {
491        let value = BufValue::from("abcde".to_string());
492        let reversed = reverse_buf_value(&value);
493
494        if let BufValue::String(s) = reversed {
495            assert_eq!(s.to_string(), "edcba");
496        } else {
497            panic!("Expected String");
498        }
499    }
500
501    #[test]
502    fn test_revcomp_buf_value_simple() {
503        let value = BufValue::from("ACGT".to_string());
504        let revcomp = revcomp_buf_value(&value);
505
506        if let BufValue::String(s) = revcomp {
507            assert_eq!(s.to_string(), "ACGT"); // reverse of "TGCA" -> "ACGT"
508        } else {
509            panic!("Expected String");
510        }
511    }
512
513    #[test]
514    fn test_revcomp_buf_value_lowercase() {
515        let value = BufValue::from("acgt".to_string());
516        let revcomp = revcomp_buf_value(&value);
517
518        // Lowercase is normalized to uppercase
519        if let BufValue::String(s) = revcomp {
520            assert_eq!(s.to_string(), "ACGT");
521        } else {
522            panic!("Expected String");
523        }
524    }
525
526    #[test]
527    fn test_revcomp_buf_value_mixed_case() {
528        let value = BufValue::from("AcGt".to_string());
529        let revcomp = revcomp_buf_value(&value);
530
531        // Mixed case is normalized to uppercase
532        if let BufValue::String(s) = revcomp {
533            assert_eq!(s.to_string(), "ACGT");
534        } else {
535            panic!("Expected String");
536        }
537    }
538
539    #[test]
540    fn test_revcomp_buf_value_with_n() {
541        let value = BufValue::from("ACGTN".to_string());
542        let revcomp = revcomp_buf_value(&value);
543
544        if let BufValue::String(s) = revcomp {
545            assert_eq!(s.to_string(), "NACGT");
546        } else {
547            panic!("Expected String");
548        }
549    }
550
551    #[test]
552    fn test_revcomp_buf_value_complex() {
553        let value = BufValue::from("AAAGG".to_string());
554        let revcomp = revcomp_buf_value(&value);
555
556        if let BufValue::String(s) = revcomp {
557            assert_eq!(s.to_string(), "CCTTT");
558        } else {
559            panic!("Expected String");
560        }
561    }
562
563    #[test]
564    fn test_revcomp_buf_value_array() {
565        let value = BufValue::Array(Array::UInt8(vec![1, 2, 3]));
566        let revcomp = revcomp_buf_value(&value);
567
568        // Should return unchanged for non-string types
569        if let BufValue::Array(Array::UInt8(vals)) = revcomp {
570            assert_eq!(vals, vec![1, 2, 3]);
571        } else {
572            panic!("Expected UInt8 array");
573        }
574    }
575
576    #[test]
577    fn test_reverse_and_revcomp_combined() {
578        // Test that reverse and revcomp work correctly together
579        let original = BufValue::from("ACGT".to_string());
580
581        // Reverse: "ACGT" -> "TGCA"
582        let reversed = reverse_buf_value(&original);
583        if let BufValue::String(s) = reversed {
584            assert_eq!(s.to_string(), "TGCA");
585        }
586
587        // Revcomp: "ACGT" -> "ACGT"
588        let revcomped = revcomp_buf_value(&original);
589        if let BufValue::String(s) = revcomped {
590            assert_eq!(s.to_string(), "ACGT");
591        }
592    }
593
594    #[test]
595    fn test_empty_string_operations() {
596        let value = BufValue::from(String::new());
597
598        let reversed = reverse_buf_value(&value);
599        if let BufValue::String(s) = reversed {
600            assert_eq!(s.to_string(), "");
601        }
602
603        let revcomped = revcomp_buf_value(&value);
604        if let BufValue::String(s) = revcomped {
605            assert_eq!(s.to_string(), "");
606        }
607    }
608
609    #[test]
610    fn test_empty_array_operations() {
611        let value = BufValue::Array(Array::UInt8(vec![]));
612        let reversed = reverse_buf_value(&value);
613
614        if let BufValue::Array(Array::UInt8(vals)) = reversed {
615            assert!(vals.is_empty());
616        } else {
617            panic!("Expected empty UInt8 array");
618        }
619    }
620
621    // =========================================================================
622    // Tests for is_sorted()
623    // =========================================================================
624
625    fn create_header_with_so(sort_order: &str) -> Header {
626        let header_str = format!("@HD\tVN:1.6\tSO:{sort_order}\n");
627        header_str.parse().unwrap()
628    }
629
630    fn create_header_without_so() -> Header {
631        let header_str = "@HD\tVN:1.6\n";
632        header_str.parse().unwrap()
633    }
634
635    fn create_empty_header() -> Header {
636        Header::default()
637    }
638
639    #[test]
640    fn test_is_sorted_queryname_matches() {
641        use noodles::sam::header::record::value::map::header::sort_order::QUERY_NAME;
642        let header = create_header_with_so("queryname");
643        assert!(is_sorted(&header, QUERY_NAME));
644    }
645
646    #[test]
647    fn test_is_sorted_coordinate_matches() {
648        use noodles::sam::header::record::value::map::header::sort_order::COORDINATE;
649        let header = create_header_with_so("coordinate");
650        assert!(is_sorted(&header, COORDINATE));
651    }
652
653    #[test]
654    fn test_is_sorted_unsorted_matches() {
655        let header = create_header_with_so("unsorted");
656        assert!(is_sorted(&header, UNSORTED));
657    }
658
659    #[test]
660    fn test_is_sorted_mismatch() {
661        use noodles::sam::header::record::value::map::header::sort_order::COORDINATE;
662        let header = create_header_with_so("queryname");
663        assert!(!is_sorted(&header, COORDINATE));
664    }
665
666    #[test]
667    fn test_is_sorted_no_so_tag() {
668        use noodles::sam::header::record::value::map::header::sort_order::COORDINATE;
669        let header = create_header_without_so();
670        assert!(!is_sorted(&header, COORDINATE));
671    }
672
673    #[test]
674    fn test_is_sorted_empty_header() {
675        use noodles::sam::header::record::value::map::header::sort_order::COORDINATE;
676        let header = create_empty_header();
677        assert!(!is_sorted(&header, COORDINATE));
678    }
679
680    // =========================================================================
681    // Tests for is_template_coordinate_sorted()
682    // =========================================================================
683
684    fn create_template_coord_header(so: &str, go: Option<&str>, ss: Option<&str>) -> Header {
685        use std::fmt::Write;
686        let mut header_str = format!("@HD\tVN:1.6\tSO:{so}");
687        if let Some(go_val) = go {
688            write!(header_str, "\tGO:{go_val}").unwrap();
689        }
690        if let Some(ss_val) = ss {
691            write!(header_str, "\tSS:{ss_val}").unwrap();
692        }
693        header_str.push('\n');
694        header_str.parse().unwrap()
695    }
696
697    #[test]
698    fn test_is_template_coordinate_sorted_valid_minimal() {
699        // SO:unsorted + GO:query (no SS)
700        let header = create_template_coord_header("unsorted", Some("query"), None);
701        assert!(is_template_coordinate_sorted(&header));
702    }
703
704    #[test]
705    fn test_is_template_coordinate_sorted_valid_with_ss() {
706        // SO:unsorted + GO:query + SS:template-coordinate
707        let header =
708            create_template_coord_header("unsorted", Some("query"), Some("template-coordinate"));
709        assert!(is_template_coordinate_sorted(&header));
710    }
711
712    #[test]
713    fn test_is_template_coordinate_sorted_valid_with_prefixed_ss() {
714        // SO:unsorted + GO:query + SS:unsorted:template-coordinate
715        let header = create_template_coord_header(
716            "unsorted",
717            Some("query"),
718            Some("unsorted:template-coordinate"),
719        );
720        assert!(is_template_coordinate_sorted(&header));
721    }
722
723    #[test]
724    fn test_is_template_coordinate_sorted_invalid_so_coordinate() {
725        // SO:coordinate is wrong
726        let header = create_template_coord_header("coordinate", Some("query"), None);
727        assert!(!is_template_coordinate_sorted(&header));
728    }
729
730    #[test]
731    fn test_is_template_coordinate_sorted_invalid_so_queryname() {
732        // SO:queryname is wrong
733        let header = create_template_coord_header("queryname", Some("query"), None);
734        assert!(!is_template_coordinate_sorted(&header));
735    }
736
737    #[test]
738    fn test_is_template_coordinate_sorted_invalid_go_none() {
739        // GO:none is wrong
740        let header = create_template_coord_header("unsorted", Some("none"), None);
741        assert!(!is_template_coordinate_sorted(&header));
742    }
743
744    #[test]
745    fn test_is_template_coordinate_sorted_invalid_go_reference() {
746        // GO:reference is wrong
747        let header = create_template_coord_header("unsorted", Some("reference"), None);
748        assert!(!is_template_coordinate_sorted(&header));
749    }
750
751    #[test]
752    fn test_is_template_coordinate_sorted_missing_go() {
753        // Missing GO tag
754        let header = create_template_coord_header("unsorted", None, None);
755        assert!(!is_template_coordinate_sorted(&header));
756    }
757
758    #[test]
759    fn test_is_template_coordinate_sorted_invalid_ss() {
760        // Wrong SS value
761        let header = create_template_coord_header("unsorted", Some("query"), Some("coordinate"));
762        assert!(!is_template_coordinate_sorted(&header));
763    }
764
765    #[test]
766    fn test_is_template_coordinate_sorted_empty_header() {
767        let header = create_empty_header();
768        assert!(!is_template_coordinate_sorted(&header));
769    }
770
771    // =========================================================================
772    // Tests for reverse_buf_value with scalar types
773    // =========================================================================
774
775    #[test]
776    fn test_reverse_buf_value_integer_unchanged() {
777        let value = BufValue::from(42_i32);
778        let reversed = reverse_buf_value(&value);
779        assert_eq!(reversed, BufValue::from(42_i32));
780    }
781
782    #[test]
783    fn test_reverse_buf_value_character_unchanged() {
784        let value = BufValue::Character(b'X');
785        let reversed = reverse_buf_value(&value);
786        assert_eq!(reversed, BufValue::Character(b'X'));
787    }
788
789    // =========================================================================
790    // Tests for revcomp_buf_value edge cases
791    // =========================================================================
792
793    #[test]
794    fn test_revcomp_buf_value_ambiguous_bases_unchanged() {
795        // IUPAC ambiguity codes are not complemented
796        let value = BufValue::from("RYSWKM".to_string());
797        let revcomp = revcomp_buf_value(&value);
798        if let BufValue::String(s) = revcomp {
799            // Reversed but not complemented (our function only complements ACGTN)
800            assert_eq!(s.to_string(), "MKWSYR");
801        } else {
802            panic!("Expected String");
803        }
804    }
805
806    #[test]
807    fn test_revcomp_buf_value_integer_unchanged() {
808        let value = BufValue::from(42_i32);
809        let revcomp = revcomp_buf_value(&value);
810        assert_eq!(revcomp, BufValue::from(42_i32));
811    }
812
813    // =========================================================================
814    // Tests for to_smallest_signed_int()
815    // =========================================================================
816
817    #[test]
818    fn test_to_smallest_signed_int_fits_in_i8() {
819        // Values that fit in i8 (-128 to 127) should be Int8
820        assert_eq!(to_smallest_signed_int(0), BufValue::Int8(0));
821        assert_eq!(to_smallest_signed_int(60), BufValue::Int8(60));
822        assert_eq!(to_smallest_signed_int(127), BufValue::Int8(127));
823        assert_eq!(to_smallest_signed_int(-128), BufValue::Int8(-128));
824        assert_eq!(to_smallest_signed_int(-1), BufValue::Int8(-1));
825    }
826
827    #[test]
828    fn test_to_smallest_signed_int_fits_in_i16() {
829        // Values that fit in i16 but not i8 should be Int16
830        assert_eq!(to_smallest_signed_int(128), BufValue::Int16(128));
831        assert_eq!(to_smallest_signed_int(1000), BufValue::Int16(1000));
832        assert_eq!(to_smallest_signed_int(32767), BufValue::Int16(32767));
833        assert_eq!(to_smallest_signed_int(-129), BufValue::Int16(-129));
834        assert_eq!(to_smallest_signed_int(-32768), BufValue::Int16(-32768));
835    }
836
837    #[test]
838    fn test_to_smallest_signed_int_requires_i32() {
839        // Values that require i32 should be Int32
840        assert_eq!(to_smallest_signed_int(32768), BufValue::Int32(32768));
841        assert_eq!(to_smallest_signed_int(100_000), BufValue::Int32(100_000));
842        assert_eq!(to_smallest_signed_int(-32769), BufValue::Int32(-32769));
843    }
844
845    // =========================================================================
846    // Tests for buf_value_to_smallest_signed_int()
847    // =========================================================================
848
849    #[test]
850    fn test_buf_value_to_smallest_signed_int_from_uint8() {
851        // UInt8 values should be converted to the smallest signed type
852        let value = BufValue::UInt8(60);
853        assert_eq!(buf_value_to_smallest_signed_int(&value), Some(BufValue::Int8(60)));
854
855        let value = BufValue::UInt8(200);
856        assert_eq!(buf_value_to_smallest_signed_int(&value), Some(BufValue::Int16(200)));
857    }
858
859    #[test]
860    fn test_buf_value_to_smallest_signed_int_from_int8() {
861        // Int8 values should stay as Int8
862        let value = BufValue::Int8(60);
863        assert_eq!(buf_value_to_smallest_signed_int(&value), Some(BufValue::Int8(60)));
864
865        let value = BufValue::Int8(-50);
866        assert_eq!(buf_value_to_smallest_signed_int(&value), Some(BufValue::Int8(-50)));
867    }
868
869    #[test]
870    fn test_buf_value_to_smallest_signed_int_from_int16() {
871        // Int16 values should be converted appropriately
872        let value = BufValue::Int16(60);
873        assert_eq!(buf_value_to_smallest_signed_int(&value), Some(BufValue::Int8(60)));
874
875        let value = BufValue::Int16(1000);
876        assert_eq!(buf_value_to_smallest_signed_int(&value), Some(BufValue::Int16(1000)));
877    }
878
879    #[test]
880    fn test_buf_value_to_smallest_signed_int_from_int32() {
881        // Int32 values should be converted appropriately
882        let value = BufValue::Int32(60);
883        assert_eq!(buf_value_to_smallest_signed_int(&value), Some(BufValue::Int8(60)));
884
885        let value = BufValue::Int32(1000);
886        assert_eq!(buf_value_to_smallest_signed_int(&value), Some(BufValue::Int16(1000)));
887
888        let value = BufValue::Int32(100_000);
889        assert_eq!(buf_value_to_smallest_signed_int(&value), Some(BufValue::Int32(100_000)));
890    }
891
892    #[test]
893    fn test_buf_value_to_smallest_signed_int_non_integer_returns_none() {
894        // Non-integer types should return None
895        let value = BufValue::from("hello".to_string());
896        assert_eq!(buf_value_to_smallest_signed_int(&value), None);
897
898        let value = BufValue::Character(b'X');
899        assert_eq!(buf_value_to_smallest_signed_int(&value), None);
900    }
901}