Skip to main content

fgumi_sam/
clipper.rs

1//! Read clipping utilities for BAM/SAM records.
2//!
3//! This module provides functionality for clipping reads in various ways (soft, hard, etc.)
4//! and is used by tools like `clip` and consensus calling tools.
5
6use anyhow::Result;
7use noodles::core::Position;
8use noodles::sam::alignment::RecordBuf;
9use noodles::sam::alignment::record::cigar::Cigar;
10use noodles::sam::alignment::record::cigar::Op as CigarOp;
11use noodles::sam::alignment::record::cigar::op::Kind;
12use noodles::sam::alignment::record_buf::data::field::Value;
13use noodles::sam::alignment::record_buf::{Cigar as CigarBuf, QualityScores, Sequence};
14
15use crate::record_utils;
16use fgumi_dna::{MIN_PHRED, NO_CALL_BASE};
17
18/// Helper macro to get array length for any Array variant
19macro_rules! array_len {
20    ($arr:expr) => {
21        match $arr {
22            Array::Int8(a) => a.len(),
23            Array::UInt8(a) => a.len(),
24            Array::Int16(a) => a.len(),
25            Array::UInt16(a) => a.len(),
26            Array::Int32(a) => a.len(),
27            Array::UInt32(a) => a.len(),
28            Array::Float(a) => a.len(),
29        }
30    };
31}
32
33/// Helper macro to slice an array and create a new Value
34macro_rules! slice_array {
35    ($arr:expr, $start:expr, $end:expr) => {
36        match $arr {
37            Array::Int8(a) => Value::from(a[$start..$end].to_vec()),
38            Array::UInt8(a) => Value::from(a[$start..$end].to_vec()),
39            Array::Int16(a) => Value::from(a[$start..$end].to_vec()),
40            Array::UInt16(a) => Value::from(a[$start..$end].to_vec()),
41            Array::Int32(a) => Value::from(a[$start..$end].to_vec()),
42            Array::UInt32(a) => Value::from(a[$start..$end].to_vec()),
43            Array::Float(a) => Value::from(a[$start..$end].to_vec()),
44        }
45    };
46}
47
48/// Modes of clipping that can be applied to reads
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50pub enum ClippingMode {
51    /// Soft clip: convert bases to S operators in CIGAR, keep bases and qualities
52    Soft,
53    /// Soft clip with masking: convert to S operators and mask bases to N, qualities to min
54    SoftWithMask,
55    /// Hard clip: remove bases, qualities, and convert to H operators in CIGAR
56    Hard,
57}
58
59/// Utility for clipping BAM/SAM records in various ways
60pub struct SamRecordClipper {
61    /// The clipping mode to use
62    mode: ClippingMode,
63    /// Whether to automatically clip extended attributes that match read length
64    auto_clip_attributes: bool,
65}
66
67impl SamRecordClipper {
68    /// Creates a new clipper with the specified mode
69    #[must_use]
70    pub fn new(mode: ClippingMode) -> Self {
71        Self { mode, auto_clip_attributes: false }
72    }
73
74    /// Creates a new clipper with auto-clip attributes enabled
75    ///
76    /// When enabled with hard clipping mode, any tags that are the same length as the
77    /// read's sequence will be automatically clipped to match. This ensures per-base
78    /// tags (like quality arrays, per-base depths, etc.) stay synchronized with the sequence.
79    #[must_use]
80    pub fn with_auto_clip(mode: ClippingMode, auto_clip_attributes: bool) -> Self {
81        Self { mode, auto_clip_attributes }
82    }
83
84    /// Clips extended attributes (per-base tags) when hard clipping
85    ///
86    /// When auto-clipping is enabled and using hard clipping mode, this method
87    /// automatically trims any tag values (strings or arrays) that are the same length
88    /// as the original read sequence to match the new clipped length.
89    ///
90    /// # Arguments
91    /// * `record` - The record to modify
92    /// * `remove` - Number of bases being removed
93    /// * `from_start` - If true, clip from start (5' end); if false, clip from end (3' end)
94    fn clip_extended_attributes(&self, record: &mut RecordBuf, remove: usize, from_start: bool) {
95        use noodles::sam::alignment::record_buf::data::field::value::Array;
96
97        // Only clip attributes when using hard clipping mode with auto-clip enabled
98        if !matches!(self.mode, ClippingMode::Hard) || remove == 0 || !self.auto_clip_attributes {
99            return;
100        }
101
102        let new_length = record.sequence().len();
103        let old_length = new_length + remove;
104
105        // Collect tags to update (we can't modify while iterating)
106        let mut tags_to_update: Vec<(Vec<u8>, Value)> = Vec::new();
107
108        // Iterate through all data fields
109        for (tag, value) in record.data().iter() {
110            // Check if this is a String or Array that matches the old length
111            let should_clip = match value {
112                Value::String(s) => {
113                    let bytes: &[u8] = s.as_ref();
114                    bytes.len() == old_length
115                }
116                Value::Array(arr) => array_len!(arr) == old_length,
117                _ => false,
118            };
119
120            if should_clip {
121                // Create clipped version of the value
122                let (start, end) = if from_start { (remove, old_length) } else { (0, new_length) };
123                let new_value = match value {
124                    Value::String(s) => {
125                        let bytes: &[u8] = s.as_ref();
126                        Value::from(std::str::from_utf8(&bytes[start..end]).unwrap_or(""))
127                    }
128                    Value::Array(arr) => slice_array!(arr, start, end),
129                    _ => continue, // Should not reach here due to should_clip check
130                };
131
132                tags_to_update.push((tag.as_ref().to_vec(), new_value));
133            }
134        }
135
136        // Now update the tags
137        for (tag, value) in tags_to_update {
138            let tag_array: [u8; 2] = [tag[0], tag[1]];
139            record.data_mut().insert(tag_array.into(), value);
140        }
141    }
142
143    /// Clips a specified number of bases from the start (left side) of the alignment
144    ///
145    /// Returns the number of bases actually clipped
146    #[expect(
147        clippy::too_many_lines,
148        reason = "clipping logic with multiple modes requires handling many CIGAR edge cases"
149    )]
150    pub fn clip_start_of_alignment(&self, record: &mut RecordBuf, bases_to_clip: usize) -> usize {
151        if bases_to_clip == 0 {
152            return 0;
153        }
154
155        // Don't clip unmapped reads
156        if record.flags().is_unmapped() {
157            return 0;
158        }
159
160        let sequence_len = record.sequence();
161        if sequence_len.is_empty() {
162            return 0;
163        }
164
165        // Collect existing CIGAR ops
166        let old_ops: Vec<CigarOp> = record.cigar().iter().filter_map(Result::ok).collect();
167
168        // Extract existing hard and soft clips from the start
169        let existing_hard_clip = old_ops
170            .iter()
171            .take_while(|op| op.kind() == Kind::HardClip)
172            .map(|op| op.len())
173            .sum::<usize>();
174
175        let existing_soft_clip = old_ops
176            .iter()
177            .skip_while(|op| op.kind() == Kind::HardClip)
178            .take_while(|op| op.kind() == Kind::SoftClip)
179            .map(|op| op.len())
180            .sum::<usize>();
181
182        // Skip to operations after existing clips
183        let post_clip_ops: Vec<CigarOp> = old_ops
184            .into_iter()
185            .skip_while(|op| matches!(op.kind(), Kind::HardClip | Kind::SoftClip))
186            .collect();
187
188        let mut read_bases_clipped = 0;
189        let mut ref_bases_clipped = 0;
190        let mut new_ops = Vec::new();
191        let mut iter = post_clip_ops.iter().peekable();
192
193        // Clip operations from the start
194        // Continue until we've clipped enough read bases, OR if we've clipped the right amount
195        // but the next operation is a deletion (which should be removed)
196        while read_bases_clipped < bases_to_clip
197            || (read_bases_clipped == bases_to_clip
198                && new_ops.is_empty()
199                && iter.peek().map(|op| op.kind()) == Some(Kind::Deletion))
200        {
201            let Some(op) = iter.next() else { break };
202
203            let kind = op.kind();
204            let len = op.len();
205
206            let consumes_read = matches!(
207                kind,
208                Kind::Match | Kind::SequenceMatch | Kind::SequenceMismatch | Kind::Insertion
209            );
210            let consumes_ref = matches!(
211                kind,
212                Kind::Match | Kind::SequenceMatch | Kind::SequenceMismatch | Kind::Deletion
213            );
214
215            if consumes_read && len > (bases_to_clip - read_bases_clipped) {
216                // This operation extends past our clip point
217                if kind == Kind::Insertion {
218                    // Consume entire insertion at clip boundary
219                    read_bases_clipped += len;
220                } else {
221                    // Split the operation
222                    let remaining_clip = bases_to_clip - read_bases_clipped;
223                    let remaining_length = len - remaining_clip;
224                    read_bases_clipped += remaining_clip;
225                    ref_bases_clipped += remaining_clip;
226                    new_ops.push(CigarOp::new(kind, remaining_length));
227                }
228            } else {
229                // Consume entire operation
230                if consumes_read {
231                    read_bases_clipped += len;
232                }
233                if consumes_ref {
234                    ref_bases_clipped += len;
235                }
236            }
237        }
238
239        // Add remaining operations
240        new_ops.extend(iter.copied());
241
242        // Prepend appropriate clipping operators
243        let (final_ops, bases_to_remove) = match self.mode {
244            ClippingMode::Hard => {
245                // Match fgbio's behavior: convert ALL existing soft clips to hard clips
246                let added_hard_clip = existing_soft_clip + read_bases_clipped;
247                let total_hard_clip = existing_hard_clip + added_hard_clip;
248                let mut result = Vec::new();
249                result.push(CigarOp::new(Kind::HardClip, total_hard_clip));
250                result.extend(new_ops);
251                (result, added_hard_clip)
252            }
253            ClippingMode::Soft | ClippingMode::SoftWithMask => {
254                let total_soft_clip = existing_soft_clip + read_bases_clipped;
255                let mut result = Vec::new();
256                if existing_hard_clip > 0 {
257                    result.push(CigarOp::new(Kind::HardClip, existing_hard_clip));
258                }
259                result.push(CigarOp::new(Kind::SoftClip, total_soft_clip));
260                result.extend(new_ops);
261                (result, 0)
262            }
263        };
264
265        // Update CIGAR
266        *record.cigar_mut() = CigarBuf::from(final_ops);
267
268        // Update alignment start position
269        if ref_bases_clipped > 0 {
270            if let Some(start_pos) = record.alignment_start() {
271                if let Some(new_start) = Position::new(usize::from(start_pos) + ref_bases_clipped) {
272                    *record.alignment_start_mut() = Some(new_start);
273                }
274            }
275        }
276
277        // Handle sequence and quality updates based on mode
278        match self.mode {
279            ClippingMode::Soft => {
280                // Keep sequence and qualities as-is
281            }
282            ClippingMode::SoftWithMask => {
283                // Mask clipped bases to N and quality to min
284                let seq = record.sequence();
285                let qual = record.quality_scores();
286                let mut new_seq: Vec<u8> = seq.as_ref().to_vec();
287                let mut new_qual: Vec<u8> = qual.as_ref().to_vec();
288
289                let total_soft_clip = existing_soft_clip + read_bases_clipped;
290                for i in 0..total_soft_clip.min(new_seq.len()) {
291                    new_seq[i] = NO_CALL_BASE;
292                    new_qual[i] = MIN_PHRED;
293                }
294
295                *record.sequence_mut() = Sequence::from(new_seq);
296                *record.quality_scores_mut() = QualityScores::from(new_qual);
297            }
298            ClippingMode::Hard => {
299                // Remove clipped bases and qualities using direct slice indexing
300                let seq = record.sequence();
301                let qual = record.quality_scores();
302                let new_seq = seq.as_ref()[bases_to_remove..].to_vec();
303                let new_qual = qual.as_ref()[bases_to_remove..].to_vec();
304
305                *record.sequence_mut() = Sequence::from(new_seq);
306                *record.quality_scores_mut() = QualityScores::from(new_qual);
307
308                // Clip extended attributes if auto-clipping is enabled
309                self.clip_extended_attributes(record, bases_to_remove, true);
310            }
311        }
312
313        read_bases_clipped
314    }
315
316    /// Clips a specified number of bases from the end (right side) of the alignment
317    ///
318    /// Returns the number of bases actually clipped
319    #[expect(
320        clippy::too_many_lines,
321        reason = "mirrors clip_start_of_alignment with symmetric end-clipping logic"
322    )]
323    pub fn clip_end_of_alignment(&self, record: &mut RecordBuf, bases_to_clip: usize) -> usize {
324        if bases_to_clip == 0 {
325            return 0;
326        }
327
328        // Don't clip unmapped reads
329        if record.flags().is_unmapped() {
330            return 0;
331        }
332
333        let sequence_len = record.sequence();
334        if sequence_len.is_empty() {
335            return 0;
336        }
337
338        // Collect existing CIGAR ops
339        let old_ops: Vec<CigarOp> = record.cigar().iter().filter_map(Result::ok).collect();
340
341        // Extract existing hard and soft clips from the end (process in reverse)
342        let existing_hard_clip = old_ops
343            .iter()
344            .rev()
345            .take_while(|op| op.kind() == Kind::HardClip)
346            .map(|op| op.len())
347            .sum::<usize>();
348
349        let existing_soft_clip = old_ops
350            .iter()
351            .rev()
352            .skip_while(|op| op.kind() == Kind::HardClip)
353            .take_while(|op| op.kind() == Kind::SoftClip)
354            .map(|op| op.len())
355            .sum::<usize>();
356
357        // Skip to operations before existing clips (reverse order, so use rev/skip_while/rev pattern)
358        let mut post_clip_ops: Vec<CigarOp> = old_ops
359            .into_iter()
360            .rev()
361            .skip_while(|op| matches!(op.kind(), Kind::HardClip | Kind::SoftClip))
362            .collect();
363        post_clip_ops.reverse(); // Un-reverse to get normal order
364
365        let mut read_bases_clipped = 0;
366        let mut new_ops = Vec::new();
367        let mut iter = post_clip_ops.iter().rev().peekable();
368
369        // Clip operations from the end (working backwards)
370        // Continue until we've clipped enough read bases, OR if we've clipped the right amount
371        // but the next operation (when working backwards) is a deletion (which should be removed)
372        while read_bases_clipped < bases_to_clip
373            || (read_bases_clipped == bases_to_clip
374                && new_ops.is_empty()
375                && iter.peek().map(|op| op.kind()) == Some(Kind::Deletion))
376        {
377            let Some(op) = iter.next() else { break };
378
379            let kind = op.kind();
380            let len = op.len();
381
382            let consumes_read = matches!(
383                kind,
384                Kind::Match | Kind::SequenceMatch | Kind::SequenceMismatch | Kind::Insertion
385            );
386
387            if consumes_read && len > (bases_to_clip - read_bases_clipped) {
388                // This operation extends past our clip point
389                if kind == Kind::Insertion {
390                    // Consume entire insertion at clip boundary
391                    read_bases_clipped += len;
392                } else {
393                    // Split the operation
394                    let remaining_clip = bases_to_clip - read_bases_clipped;
395                    let remaining_length = len - remaining_clip;
396                    read_bases_clipped += remaining_clip;
397                    new_ops.push(CigarOp::new(kind, remaining_length));
398                }
399            } else {
400                // Consume entire operation
401                if consumes_read {
402                    read_bases_clipped += len;
403                }
404            }
405        }
406
407        // Add remaining operations (iter is already reversed, so collect as-is)
408        let remaining: Vec<CigarOp> = iter.copied().collect();
409        new_ops.extend(remaining.iter());
410
411        // new_ops is in reverse order, so reverse it
412        new_ops.reverse();
413
414        // Append appropriate clipping operators
415        let (final_ops, bases_to_remove) = match self.mode {
416            ClippingMode::Hard => {
417                // Match fgbio's behavior: convert ALL existing soft clips to hard clips
418                let added_hard_clip = existing_soft_clip + read_bases_clipped;
419                let total_hard_clip = existing_hard_clip + added_hard_clip;
420                let mut result = new_ops;
421                result.push(CigarOp::new(Kind::HardClip, total_hard_clip));
422                (result, added_hard_clip)
423            }
424            ClippingMode::Soft | ClippingMode::SoftWithMask => {
425                let total_soft_clip = existing_soft_clip + read_bases_clipped;
426                let mut result = new_ops;
427                result.push(CigarOp::new(Kind::SoftClip, total_soft_clip));
428                if existing_hard_clip > 0 {
429                    result.push(CigarOp::new(Kind::HardClip, existing_hard_clip));
430                }
431                (result, 0)
432            }
433        };
434
435        // Update CIGAR
436        *record.cigar_mut() = CigarBuf::from(final_ops);
437
438        // Handle sequence and quality updates based on mode
439        let seq_len = record.sequence().len();
440        match self.mode {
441            ClippingMode::Soft => {
442                // Keep sequence and qualities as-is
443            }
444            ClippingMode::SoftWithMask => {
445                // Mask clipped bases to N and quality to min
446                let seq = record.sequence();
447                let qual = record.quality_scores();
448                let mut new_seq: Vec<u8> = seq.as_ref().to_vec();
449                let mut new_qual: Vec<u8> = qual.as_ref().to_vec();
450
451                let total_soft_clip = existing_soft_clip + read_bases_clipped;
452                let start_mask = seq_len.saturating_sub(total_soft_clip);
453                for i in start_mask..seq_len {
454                    new_seq[i] = NO_CALL_BASE;
455                    new_qual[i] = MIN_PHRED;
456                }
457
458                *record.sequence_mut() = Sequence::from(new_seq);
459                *record.quality_scores_mut() = QualityScores::from(new_qual);
460            }
461            ClippingMode::Hard => {
462                // Remove clipped bases and qualities using direct slice indexing
463                let seq = record.sequence();
464                let qual = record.quality_scores();
465                let keep_len = seq_len.saturating_sub(bases_to_remove);
466                let new_seq = seq.as_ref()[..keep_len].to_vec();
467                let new_qual = qual.as_ref()[..keep_len].to_vec();
468
469                *record.sequence_mut() = Sequence::from(new_seq);
470                *record.quality_scores_mut() = QualityScores::from(new_qual);
471
472                // Clip extended attributes if auto-clipping is enabled
473                self.clip_extended_attributes(record, bases_to_remove, false);
474            }
475        }
476
477        read_bases_clipped
478    }
479
480    /// Clips bases from the 5' end of the read (strand-aware)
481    ///
482    /// For positive strand reads, clips from the start of alignment.
483    /// For negative strand reads, clips from the end of alignment.
484    ///
485    /// Returns the number of bases actually clipped
486    pub fn clip_5_prime_end_of_alignment(
487        &self,
488        record: &mut RecordBuf,
489        bases_to_clip: usize,
490    ) -> usize {
491        if record.flags().is_reverse_complemented() {
492            self.clip_end_of_alignment(record, bases_to_clip)
493        } else {
494            self.clip_start_of_alignment(record, bases_to_clip)
495        }
496    }
497
498    /// Clips bases from the 3' end of the read (strand-aware)
499    ///
500    /// For positive strand reads, clips from the end of alignment.
501    /// For negative strand reads, clips from the start of alignment.
502    ///
503    /// Returns the number of bases actually clipped
504    pub fn clip_3_prime_end_of_alignment(
505        &self,
506        record: &mut RecordBuf,
507        bases_to_clip: usize,
508    ) -> usize {
509        if record.flags().is_reverse_complemented() {
510            self.clip_start_of_alignment(record, bases_to_clip)
511        } else {
512            self.clip_end_of_alignment(record, bases_to_clip)
513        }
514    }
515
516    /// Clips overlapping portions of an FR read pair
517    ///
518    /// **Important:** Only clips reads that are in FR (forward-reverse) orientation.
519    /// Non-FR pairs (FF, RR, RF) are not clipped.
520    ///
521    /// This implementation matches fgbio's midpoint approach: calculates the midpoint
522    /// between the 5' ends of the two reads and clips both reads at that position.
523    ///
524    /// Returns (`bases_clipped_r1`, `bases_clipped_r2`)
525    pub fn clip_overlapping_reads(&self, r1: &mut RecordBuf, r2: &mut RecordBuf) -> (usize, usize) {
526        // Check if this is a valid FR pair before clipping
527        if !record_utils::is_fr_pair(r1, r2) {
528            return (0, 0);
529        }
530
531        // Get alignment positions
532        let r1_start = match r1.alignment_start() {
533            Some(pos) => usize::from(pos),
534            None => return (0, 0),
535        };
536        let r2_start = match r2.alignment_start() {
537            Some(pos) => usize::from(pos),
538            None => return (0, 0),
539        };
540
541        // Calculate reference end positions using CIGAR
542        let r1_end = r1_start + cigar_utils::reference_length(&r1.cigar()) - 1; // -1 for inclusive end
543        let r2_end = r2_start + cigar_utils::reference_length(&r2.cigar()) - 1; // -1 for inclusive end
544
545        // Check if they overlap on the reference
546        let overlap_start = r1_start.max(r2_start);
547        let overlap_end = r1_end.min(r2_end);
548
549        if overlap_start > overlap_end {
550            // No overlap
551            return (0, 0);
552        }
553
554        // Calculate midpoint between the 5' ends (r1.start and r2.end)
555        // In FR orientation: r1 is forward (5' at start), r2 is reverse (5' at end)
556        let mut midpoint = usize::midpoint(r1_start, r2_end);
557
558        // Adjust midpoint if it falls outside the overlap region
559        if midpoint > r1_end {
560            // midpoint is past r1's end, trim only the reverse strand read (r2)
561            midpoint = r1_end;
562        } else if midpoint < r2_start {
563            // midpoint is before r2's start, trim only the positive strand read (r1)
564            // Use r2_start - 1 to ensure we clip at least one base
565            midpoint = r2_start.saturating_sub(1);
566        }
567
568        // Calculate how many bases to clip from each read
569        // R1 (forward): clip from 3' end everything after midpoint
570        let r1_bases_to_clip = if r1_end > midpoint {
571            let ref_bases_to_clip = r1_end - midpoint;
572            // Convert reference bases to query bases
573            self.calculate_query_bases_for_ref_region(r1, ref_bases_to_clip, false)
574        } else {
575            0
576        };
577
578        // R2 (reverse): clip from 3' end (which is the 5' end in reference coordinates)
579        // everything before midpoint + 1
580        let r2_bases_to_clip = if midpoint + 1 > r2_start {
581            let ref_bases_to_clip = midpoint + 1 - r2_start;
582            // Convert reference bases to query bases, clipping from start (5' in ref = 3' in read)
583            self.calculate_query_bases_for_ref_region(r2, ref_bases_to_clip, true)
584        } else {
585            0
586        };
587
588        let clipped_r1 =
589            if r1_bases_to_clip > 0 { self.clip_end_of_alignment(r1, r1_bases_to_clip) } else { 0 };
590
591        // For R2 (reverse read), we clip from the beginning of the alignment (5' in reference),
592        // which corresponds to the beginning of the stored sequence, so use clip_5_prime
593        let clipped_r2 = if r2_bases_to_clip > 0 {
594            self.clip_start_of_alignment(r2, r2_bases_to_clip)
595        } else {
596            0
597        };
598
599        // If in Hard mode, upgrade any remaining soft clips to hard clips
600        if matches!(self.mode, ClippingMode::Hard) {
601            let _ = self.upgrade_all_clipping(r1);
602            let _ = self.upgrade_all_clipping(r2);
603        }
604
605        (clipped_r1, clipped_r2)
606    }
607
608    /// Calculates the number of bases in a read that extend past the mate's unclipped boundary
609    ///
610    /// This matches fgbio's `numBasesExtendingPastMate` function exactly.
611    ///
612    /// # Arguments
613    /// * `record` - The SAM record to check
614    /// * `mate_unclipped_start` - The mate's unclipped start position
615    /// * `mate_unclipped_end` - The mate's unclipped end position
616    ///
617    /// # Returns
618    /// The number of bases that extend past the mate's boundary, or 0 if not applicable
619    #[must_use]
620    pub fn num_bases_extending_past_mate(
621        record: &RecordBuf,
622        mate_unclipped_start: usize,
623        mate_unclipped_end: usize,
624    ) -> usize {
625        // Note: FR pair check is done in the caller (clip_extending_past_mate_ends)
626        // so we don't need to check it here
627
628        let is_positive_strand = !record.flags().is_reverse_complemented();
629
630        // Get read length (total bases in the read, excluding hard clips)
631        let read_length: usize = record
632            .cigar()
633            .iter()
634            .filter_map(Result::ok)
635            .filter(|op| {
636                matches!(
637                    op.kind(),
638                    Kind::Match
639                        | Kind::SequenceMatch
640                        | Kind::SequenceMismatch
641                        | Kind::Insertion
642                        | Kind::SoftClip
643                )
644            })
645            .map(CigarOp::len)
646            .sum();
647
648        if is_positive_strand {
649            // Positive strand: check if read extends past mate's unclipped end
650            let Some(alignment_start) = record.alignment_start().map(usize::from) else {
651                return 0;
652            };
653            let alignment_end =
654                alignment_start + cigar_utils::reference_length(&record.cigar()).saturating_sub(1);
655
656            if alignment_end >= mate_unclipped_end {
657                // Aligned portion reaches or extends past mate's unclipped end
658                // Use readPosAtRefPos to find where in the read the mate ends
659                // fgbio: Math.max(0, rec.length - rec.readPosAtRefPos(pos=mateUnclippedEnd, returnLastBaseIfDeleted=false))
660                let pos_at_mate_end =
661                    record_utils::read_pos_at_ref_pos(record, mate_unclipped_end, false);
662                // When pos_at_mate_end is 0 (position in deletion or outside alignment),
663                // fgbio's formula gives: max(0, read_length - 0) = read_length (clip all)
664                read_length.saturating_sub(pos_at_mate_end)
665            } else {
666                // Aligned portion ends before mate's unclipped end
667                // Only clip excess soft-clipped bases that extend past the mate
668                let trailing_soft_clip = Self::trailing_soft_clips(record.cigar());
669                let gap = mate_unclipped_end - alignment_end;
670                trailing_soft_clip.saturating_sub(gap)
671            }
672        } else {
673            // Negative strand: check if read extends before mate's unclipped start
674            let Some(alignment_start) = record.alignment_start().map(usize::from) else {
675                return 0;
676            };
677
678            if alignment_start > mate_unclipped_start {
679                // Aligned portion starts after mate's unclipped start
680                // Only clip excess soft-clipped bases that extend before the mate
681                let leading_soft_clip = Self::leading_soft_clips(record.cigar());
682                let gap = alignment_start - mate_unclipped_start;
683                leading_soft_clip.saturating_sub(gap)
684            } else {
685                // Aligned portion starts at or before mate's unclipped start
686                // Use readPosAtRefPos to find where in the read the mate starts
687                // fgbio: Math.max(0, rec.readPosAtRefPos(pos=mateUnclippedStart, returnLastBaseIfDeleted=false) - 1)
688                let pos_at_mate_start =
689                    record_utils::read_pos_at_ref_pos(record, mate_unclipped_start, false);
690                // When pos_at_mate_start is 0 (position in deletion or outside alignment),
691                // fgbio's formula gives: max(0, 0 - 1) = 0 (clip nothing)
692                // pos_at_mate_start is 1-based, so subtract 1 to get bases before it
693                pos_at_mate_start.saturating_sub(1)
694            }
695        }
696    }
697
698    /// Clips reads that extend beyond their mate's alignment ends
699    ///
700    /// This matches fgbio's `clipExtendingPastMateEnds` function exactly.
701    ///
702    /// **Important:** Only clips reads that are in FR (forward-reverse) orientation.
703    /// Non-FR pairs (FF, RR, RF) are not clipped.
704    ///
705    /// Returns (`bases_clipped_r1`, `bases_clipped_r2`)
706    pub fn clip_extending_past_mate_ends(
707        &self,
708        r1: &mut RecordBuf,
709        r2: &mut RecordBuf,
710    ) -> (usize, usize) {
711        // Check if this is a valid FR pair before clipping
712        if !record_utils::is_fr_pair(r1, r2) {
713            return (0, 0);
714        }
715
716        // Get unclipped positions for both reads
717        let r1_unclipped_start = Self::unclipped_start(r1);
718        let r1_unclipped_end = Self::unclipped_end(r1);
719        let r2_unclipped_start = Self::unclipped_start(r2);
720        let r2_unclipped_end = Self::unclipped_end(r2);
721
722        let (Some(r1_start), Some(r1_end), Some(r2_start), Some(r2_end)) =
723            (r1_unclipped_start, r1_unclipped_end, r2_unclipped_start, r2_unclipped_end)
724        else {
725            return (0, 0);
726        };
727
728        // Clip each read individually using fgbio's algorithm
729        let clipped_r1 = self.clip_single_read_extending_past_mate(r1, r2_start, r2_end);
730        let clipped_r2 = self.clip_single_read_extending_past_mate(r2, r1_start, r1_end);
731
732        (clipped_r1, clipped_r2)
733    }
734
735    /// Clips a single read if it extends past its mate's alignment boundaries.
736    ///
737    /// This matches fgbio's `clipExtendingPastMateEnd` private method exactly.
738    ///
739    /// - Positive strand reads: clips from the 3' end (end of read)
740    /// - Negative strand reads: clips from the 5' end (start of read)
741    fn clip_single_read_extending_past_mate(
742        &self,
743        rec: &mut RecordBuf,
744        mate_unclipped_start: usize,
745        mate_unclipped_end: usize,
746    ) -> usize {
747        // Calculate how many bases extend past the mate
748        let total_clipped_bases =
749            Self::num_bases_extending_past_mate(rec, mate_unclipped_start, mate_unclipped_end);
750
751        if total_clipped_bases == 0 {
752            return 0;
753        }
754
755        // Clip based on strand
756        let is_positive = !rec.flags().is_reverse_complemented();
757        if is_positive {
758            // Positive strand: clip from end of read (3' in sequencing order)
759            self.clip_end_of_read(rec, total_clipped_bases)
760        } else {
761            // Negative strand: clip from start of read (5' in sequencing order, which is 3' in read orientation)
762            self.clip_start_of_read(rec, total_clipped_bases)
763        }
764    }
765
766    /// Get unclipped start position (delegates to `record_utils`)
767    fn unclipped_start(rec: &RecordBuf) -> Option<usize> {
768        record_utils::unclipped_start(rec)
769    }
770
771    /// Get unclipped end position (delegates to `record_utils`)
772    fn unclipped_end(rec: &RecordBuf) -> Option<usize> {
773        record_utils::unclipped_end(rec)
774    }
775
776    /// Count leading soft clips
777    fn leading_soft_clips(cigar: &noodles::sam::alignment::record_buf::Cigar) -> usize {
778        let ops: Vec<_> = cigar.as_ref().iter().map(|op| (op.kind(), op.len())).collect();
779        record_utils::leading_soft_clipping(&ops)
780    }
781
782    /// Count trailing soft clips
783    fn trailing_soft_clips(cigar: &noodles::sam::alignment::record_buf::Cigar) -> usize {
784        let ops: Vec<_> = cigar.as_ref().iter().map(|op| (op.kind(), op.len())).collect();
785        record_utils::trailing_soft_clipping(&ops)
786    }
787
788    /// Helper function to calculate query bases corresponding to a reference region
789    ///
790    /// Given a reference length, calculates how many query bases correspond to that region
791    /// starting from either the 5' end (`from_start=true`) or 3' end (`from_start=false`)
792    #[expect(
793        clippy::unused_self,
794        reason = "kept as a method for consistency with other clipper operations"
795    )]
796    fn calculate_query_bases_for_ref_region(
797        &self,
798        record: &RecordBuf,
799        ref_bases: usize,
800        from_start: bool,
801    ) -> usize {
802        let cigar = record.cigar();
803        let ops: Vec<_> = cigar.iter().filter_map(Result::ok).collect();
804
805        let mut remaining_ref = ref_bases;
806        let mut query_bases = 0;
807
808        // Process CIGAR operations in the appropriate direction
809        let iter: Box<dyn Iterator<Item = &CigarOp>> =
810            if from_start { Box::new(ops.iter()) } else { Box::new(ops.iter().rev()) };
811
812        for op in iter {
813            if remaining_ref == 0 {
814                break;
815            }
816
817            let kind = op.kind();
818            let len = op.len();
819
820            let consumes_ref = matches!(
821                kind,
822                Kind::Match | Kind::SequenceMatch | Kind::SequenceMismatch | Kind::Deletion
823            );
824            let consumes_query = matches!(
825                kind,
826                Kind::Match | Kind::SequenceMatch | Kind::SequenceMismatch | Kind::Insertion
827            );
828
829            if consumes_ref {
830                let ref_consumed = len.min(remaining_ref);
831                remaining_ref -= ref_consumed;
832
833                // Add corresponding query bases
834                if consumes_query {
835                    query_bases += ref_consumed;
836                }
837            } else if consumes_query && remaining_ref > 0 {
838                // Insertion - consumes query but not reference
839                // If we haven't finished consuming reference, include these bases
840                query_bases += len;
841            }
842        }
843
844        query_bases
845    }
846
847    /// Upgrades soft clipping to hard clipping for a specified number of bases
848    ///
849    /// This matches fgbio's private `upgradeClipping` method.
850    ///
851    /// # Arguments
852    /// * `record` - The record to modify
853    /// * `length` - The total number of clipped bases requested
854    /// * `from_start` - If true, upgrade from the start; if false, from the end
855    fn upgrade_clipping(&self, record: &mut RecordBuf, length: usize, from_start: bool) {
856        // Only upgrade if not in Soft mode and length > 0
857        if self.mode == ClippingMode::Soft || length == 0 {
858            return;
859        }
860
861        let old_ops: Vec<CigarOp> = record.cigar().iter().filter_map(Result::ok).collect();
862
863        // Count existing hard and soft clips in the appropriate direction
864        let (hard_clipped, soft_clipped) = if from_start {
865            let hard: usize = old_ops
866                .iter()
867                .take_while(|op| op.kind() == Kind::HardClip)
868                .map(|op| op.len())
869                .sum();
870            let soft: usize = old_ops
871                .iter()
872                .skip_while(|op| op.kind() == Kind::HardClip)
873                .take_while(|op| op.kind() == Kind::SoftClip)
874                .map(|op| op.len())
875                .sum();
876            (hard, soft)
877        } else {
878            let hard: usize = old_ops
879                .iter()
880                .rev()
881                .take_while(|op| op.kind() == Kind::HardClip)
882                .map(|op| op.len())
883                .sum();
884            let soft: usize = old_ops
885                .iter()
886                .rev()
887                .skip_while(|op| op.kind() == Kind::HardClip)
888                .take_while(|op| op.kind() == Kind::SoftClip)
889                .map(|op| op.len())
890                .sum();
891            (hard, soft)
892        };
893
894        // If requested length is already hard-clipped, or there are no soft clips, nothing to do
895        if hard_clipped >= length || soft_clipped == 0 {
896            return;
897        }
898
899        // Calculate how many soft clips to upgrade
900        let length_to_upgrade = soft_clipped.min(length - hard_clipped);
901
902        // Build new CIGAR and update sequence/quals
903        let mut new_ops = Vec::new();
904        let ops_to_process =
905            if from_start { old_ops.clone() } else { old_ops.iter().rev().copied().collect() };
906
907        // Handle Hard mode
908        if self.mode == ClippingMode::Hard {
909            let remaining_to_upgrade = length_to_upgrade;
910
911            // Process leading clips
912            let mut i = 0;
913            let mut existing_hard = 0;
914            let mut existing_soft = 0;
915
916            // Count leading hard clips
917            while i < ops_to_process.len() && ops_to_process[i].kind() == Kind::HardClip {
918                existing_hard += ops_to_process[i].len();
919                i += 1;
920            }
921
922            // Count leading soft clips
923            while i < ops_to_process.len() && ops_to_process[i].kind() == Kind::SoftClip {
924                existing_soft += ops_to_process[i].len();
925                i += 1;
926            }
927
928            // Build new clipping ops
929            let new_hard_count = existing_hard + remaining_to_upgrade;
930            new_ops.push(CigarOp::new(Kind::HardClip, new_hard_count));
931
932            if existing_soft > remaining_to_upgrade {
933                // Some soft clips remain
934                new_ops.push(CigarOp::new(Kind::SoftClip, existing_soft - remaining_to_upgrade));
935            }
936
937            // Add remaining ops
938            new_ops.extend_from_slice(&ops_to_process[i..]);
939
940            // Update CIGAR
941            let final_ops = if from_start { new_ops } else { new_ops.into_iter().rev().collect() };
942            *record.cigar_mut() = CigarBuf::from(final_ops);
943
944            // Update sequence and quals by dropping the upgraded bases
945            let seq = record.sequence().as_ref().to_vec();
946            let quals = record.quality_scores().as_ref().to_vec();
947
948            if from_start {
949                *record.sequence_mut() = Sequence::from(seq[length_to_upgrade..].to_vec());
950                *record.quality_scores_mut() =
951                    QualityScores::from(quals[length_to_upgrade..].to_vec());
952            } else {
953                let new_len = seq.len() - length_to_upgrade;
954                *record.sequence_mut() = Sequence::from(seq[..new_len].to_vec());
955                *record.quality_scores_mut() = QualityScores::from(quals[..new_len].to_vec());
956            }
957        }
958        // Note: SoftWithMask mode would use hard_mask_start_of_read/hard_mask_end_of_read
959        // but we don't implement that yet
960    }
961
962    /// Ensures at least `clip_length` bases are clipped at the start of the read
963    ///
964    /// This includes any existing hard and soft clipping. If more clipping is needed,
965    /// delegates to `clip_start_of_alignment`.
966    ///
967    /// Returns the number of additional bases clipped (not including existing clips)
968    pub fn clip_start_of_read(&self, record: &mut RecordBuf, clip_length: usize) -> usize {
969        // Count existing clipping at the start
970        let existing_clipping: usize = record
971            .cigar()
972            .iter()
973            .filter_map(Result::ok)
974            .take_while(|op| matches!(op.kind(), Kind::HardClip | Kind::SoftClip))
975            .map(CigarOp::len)
976            .sum();
977
978        if clip_length > existing_clipping {
979            self.clip_start_of_alignment(record, clip_length - existing_clipping)
980        } else {
981            self.upgrade_clipping(record, clip_length, true);
982            0
983        }
984    }
985
986    /// Ensures at least `clip_length` bases are clipped at the end of the read
987    ///
988    /// This includes any existing hard and soft clipping. If more clipping is needed,
989    /// delegates to `clip_end_of_alignment`.
990    ///
991    /// Returns the number of additional bases clipped (not including existing clips)
992    pub fn clip_end_of_read(&self, record: &mut RecordBuf, clip_length: usize) -> usize {
993        // Count existing clipping at the end
994        let ops: Vec<_> = record.cigar().iter().filter_map(Result::ok).collect();
995        let existing_clipping: usize = ops
996            .iter()
997            .rev()
998            .take_while(|op| matches!(op.kind(), Kind::HardClip | Kind::SoftClip))
999            .map(|op| op.len())
1000            .sum();
1001
1002        if clip_length > existing_clipping {
1003            self.clip_end_of_alignment(record, clip_length - existing_clipping)
1004        } else {
1005            self.upgrade_clipping(record, clip_length, false);
1006            0
1007        }
1008    }
1009
1010    /// Ensures at least `clip_length` bases are clipped at the 5' end (strand-aware)
1011    ///
1012    /// For positive strand: clips from start. For negative strand: clips from end.
1013    ///
1014    /// Returns the number of additional bases clipped
1015    pub fn clip_5_prime_end_of_read(&self, record: &mut RecordBuf, clip_length: usize) -> usize {
1016        if record.flags().is_reverse_complemented() {
1017            self.clip_end_of_read(record, clip_length)
1018        } else {
1019            self.clip_start_of_read(record, clip_length)
1020        }
1021    }
1022
1023    /// Ensures at least `clip_length` bases are clipped at the 3' end (strand-aware)
1024    ///
1025    /// For positive strand: clips from end. For negative strand: clips from start.
1026    ///
1027    /// Returns the number of additional bases clipped
1028    pub fn clip_3_prime_end_of_read(&self, record: &mut RecordBuf, clip_length: usize) -> usize {
1029        if record.flags().is_reverse_complemented() {
1030            self.clip_start_of_read(record, clip_length)
1031        } else {
1032            self.clip_end_of_read(record, clip_length)
1033        }
1034    }
1035
1036    /// Upgrades all existing clipping in a read to the current clipping mode
1037    ///
1038    /// E.g., converts soft clips to hard clips if mode is Hard,
1039    /// or soft clips to soft-with-mask if mode is `SoftWithMask`
1040    ///
1041    /// Returns `(leading_clipped, trailing_clipped)` - the number of soft-clipped
1042    /// bases that were converted to hard clips at the 5' and 3' ends respectively.
1043    /// Returns `(0, 0)` if no conversion occurred.
1044    ///
1045    /// # Panics
1046    ///
1047    /// Panics if an adjacent hard-clip CIGAR op disappears between the `last()` check
1048    /// and the subsequent access (should not happen in practice), or if a BAM tag
1049    /// is not exactly 2 bytes.
1050    ///
1051    /// # Errors
1052    ///
1053    /// Returns `Result` for API compatibility, but the current implementation is
1054    /// infallible and always returns `Ok`.
1055    #[expect(
1056        clippy::too_many_lines,
1057        reason = "CIGAR rewriting with attribute clipping requires many branches"
1058    )]
1059    pub fn upgrade_all_clipping(&self, record: &mut RecordBuf) -> Result<(usize, usize)> {
1060        // Only upgrade in Hard mode
1061        if !matches!(self.mode, ClippingMode::Hard) {
1062            return Ok((0, 0));
1063        }
1064
1065        // Don't upgrade unmapped reads
1066        if record.flags().is_unmapped() {
1067            return Ok((0, 0));
1068        }
1069        let cigar = record.cigar();
1070        let ops: Vec<CigarOp> = cigar.iter().filter_map(Result::ok).collect();
1071
1072        // Check if there are any soft clips to convert
1073        let has_soft_clips = ops.iter().any(|op| op.kind() == Kind::SoftClip);
1074        if !has_soft_clips {
1075            return Ok((0, 0));
1076        }
1077
1078        // Count leading and trailing soft clips (and existing hard clips)
1079        let mut leading_hard = 0;
1080        let mut leading_soft = 0;
1081        let mut trailing_soft = 0;
1082
1083        // Count leading clips
1084        for op in &ops {
1085            match op.kind() {
1086                Kind::HardClip => leading_hard += op.len(),
1087                Kind::SoftClip => {
1088                    leading_soft += op.len();
1089                    break;
1090                }
1091                _ => break,
1092            }
1093        }
1094
1095        // Count trailing soft clips (from the end)
1096        for op in ops.iter().rev() {
1097            match op.kind() {
1098                Kind::HardClip => {}
1099                Kind::SoftClip => {
1100                    trailing_soft += op.len();
1101                    break;
1102                }
1103                _ => break,
1104            }
1105        }
1106
1107        // Build new CIGAR, converting soft clips to hard clips
1108        let mut new_cigar_ops = Vec::new();
1109        let sequence = record.sequence();
1110        let qualities = record.quality_scores();
1111        let old_seq_len = sequence.len(); // Capture this before any mutations
1112        let mut seq_pos = 0;
1113        let mut new_sequence = Vec::new();
1114        let mut new_qualities = Vec::new();
1115        let mut is_leading = true;
1116
1117        for op in &ops {
1118            let kind = op.kind();
1119            let len = op.len();
1120
1121            match kind {
1122                Kind::SoftClip => {
1123                    // Convert to hard clip
1124                    // Merge with existing hard clips at this position
1125                    if is_leading && new_cigar_ops.is_empty() && leading_hard > 0 {
1126                        // Merge with preceding hard clip
1127                        new_cigar_ops.push(CigarOp::new(Kind::HardClip, leading_hard + len));
1128                    } else if new_cigar_ops.last().map(|o| o.kind()) == Some(Kind::HardClip) {
1129                        // Merge with adjacent hard clip
1130                        // SAFETY: We just checked that last() is Some(HardClip)
1131                        let last_len = new_cigar_ops.last().expect("last() checked above").len();
1132                        new_cigar_ops.pop();
1133                        new_cigar_ops.push(CigarOp::new(Kind::HardClip, last_len + len));
1134                    } else {
1135                        new_cigar_ops.push(CigarOp::new(Kind::HardClip, len));
1136                    }
1137                    seq_pos += len;
1138                }
1139                Kind::HardClip => {
1140                    // Merge with preceding hard clip if present
1141                    if new_cigar_ops.last().map(|o| o.kind()) == Some(Kind::HardClip) {
1142                        // SAFETY: We just checked that last() is Some(HardClip)
1143                        let last_len = new_cigar_ops.last().expect("last() checked above").len();
1144                        new_cigar_ops.pop();
1145                        new_cigar_ops.push(CigarOp::new(Kind::HardClip, last_len + len));
1146                    } else if !is_leading || new_cigar_ops.is_empty() {
1147                        // Keep hard clips but don't add duplicates if we already merged leading
1148                        new_cigar_ops.push(*op);
1149                    }
1150                }
1151                _ => {
1152                    is_leading = false;
1153                    new_cigar_ops.push(*op);
1154                    // Copy bases/quals for query-consuming operations
1155                    let consumes_query = matches!(
1156                        kind,
1157                        Kind::Match
1158                            | Kind::SequenceMatch
1159                            | Kind::SequenceMismatch
1160                            | Kind::Insertion
1161                    );
1162                    if consumes_query {
1163                        for _ in 0..len {
1164                            if seq_pos < sequence.len() {
1165                                new_sequence.push(sequence.as_ref()[seq_pos]);
1166                                new_qualities.push(qualities.as_ref()[seq_pos]);
1167                            }
1168                            seq_pos += 1;
1169                        }
1170                    }
1171                }
1172            }
1173        }
1174
1175        // Update the record
1176        *record.cigar_mut() = CigarBuf::from(new_cigar_ops);
1177
1178        // Clip extended attributes if auto-clipping is enabled (do this BEFORE updating sequence)
1179        if self.auto_clip_attributes && (leading_soft > 0 || trailing_soft > 0) {
1180            // Manually clip attributes that match the original sequence length
1181            use noodles::sam::alignment::record::data::field::Tag;
1182            use noodles::sam::alignment::record_buf::data::field::value::Array;
1183            let mut tags_to_update: Vec<(Vec<u8>, Value)> = Vec::new();
1184
1185            for (tag, value) in record.data().iter() {
1186                let should_clip = match value {
1187                    Value::String(s) => {
1188                        let bytes: &[u8] = s.as_ref();
1189                        bytes.len() == old_seq_len
1190                    }
1191                    Value::Array(arr) => array_len!(arr) == old_seq_len,
1192                    _ => false,
1193                };
1194
1195                if should_clip {
1196                    // Clip from both ends: remove leading_soft from start, trailing_soft from end
1197                    let start = leading_soft;
1198                    let end = old_seq_len - trailing_soft;
1199                    let new_value = match value {
1200                        Value::String(s) => {
1201                            let bytes: &[u8] = s.as_ref();
1202                            Value::from(String::from_utf8_lossy(&bytes[start..end]).to_string())
1203                        }
1204                        Value::Array(arr) => slice_array!(arr, start, end),
1205                        _ => value.clone(),
1206                    };
1207                    tags_to_update.push((tag.as_ref().to_vec(), new_value));
1208                }
1209            }
1210
1211            // Apply updates
1212            for (tag_bytes, value) in tags_to_update {
1213                // SAFETY: Tags are always 2 bytes, as tag_bytes came from tag.as_ref().to_vec() where tag is a 2-byte tag
1214                let tag = Tag::from(
1215                    <[u8; 2]>::try_from(tag_bytes.as_slice())
1216                        .expect("tag bytes are always 2 bytes"),
1217                );
1218                record.data_mut().insert(tag, value);
1219            }
1220        }
1221
1222        *record.sequence_mut() = Sequence::from(new_sequence);
1223        *record.quality_scores_mut() = QualityScores::from(new_qualities);
1224
1225        Ok((leading_soft, trailing_soft))
1226    }
1227
1228    /// Returns the number of bases that are currently clipped in the read
1229    #[must_use]
1230    pub fn clipped_bases(record: &RecordBuf) -> usize {
1231        cigar_utils::clipped_bases(&record.cigar())
1232    }
1233}
1234
1235/// Helper functions for CIGAR manipulation
1236pub mod cigar_utils {
1237    use super::Kind;
1238    use noodles::sam::alignment::record::Cigar as CigarTrait;
1239
1240    /// Counts the number of aligned bases in a CIGAR string
1241    #[must_use]
1242    #[expect(
1243        clippy::redundant_closure_for_method_calls,
1244        reason = "Op::len is not a method on the trait"
1245    )]
1246    pub fn aligned_bases(cigar: &impl CigarTrait) -> usize {
1247        cigar
1248            .iter()
1249            .filter_map(Result::ok)
1250            .filter(|op| {
1251                matches!(op.kind(), Kind::Match | Kind::SequenceMatch | Kind::SequenceMismatch)
1252            })
1253            .map(|op| op.len())
1254            .sum()
1255    }
1256
1257    /// Counts the number of clipped bases in a CIGAR string
1258    #[must_use]
1259    #[expect(
1260        clippy::redundant_closure_for_method_calls,
1261        reason = "Op::len is not a method on the trait"
1262    )]
1263    pub fn clipped_bases(cigar: &impl CigarTrait) -> usize {
1264        cigar
1265            .iter()
1266            .filter_map(Result::ok)
1267            .filter(|op| matches!(op.kind(), Kind::SoftClip | Kind::HardClip))
1268            .map(|op| op.len())
1269            .sum()
1270    }
1271
1272    /// Counts reference-consuming operations.
1273    ///
1274    /// Re-exported from [`crate::record_utils::reference_length`] to avoid duplication.
1275    pub use crate::record_utils::reference_length;
1276
1277    /// Simplified CIGAR representation: a vector of (operation kind, length) pairs.
1278    pub type SimplifiedCigar = Vec<(Kind, usize)>;
1279
1280    /// Simplifies a CIGAR string by:
1281    /// 1. Converting S, EQ, X, H operations to M (match)
1282    /// 2. Coalescing adjacent operations of the same type
1283    ///
1284    /// This is useful for comparing read alignments where only indel positions matter.
1285    #[must_use]
1286    pub fn simplify_cigar(cigar: &noodles::sam::alignment::record_buf::Cigar) -> SimplifiedCigar {
1287        let mut simplified: SimplifiedCigar = Vec::new();
1288
1289        for op in cigar.as_ref() {
1290            let (kind, len) = (op.kind(), op.len());
1291
1292            // Convert S, EQ, X, H to M; keep I, D, and M as-is
1293            let new_kind = match kind {
1294                Kind::SoftClip | Kind::SequenceMatch | Kind::SequenceMismatch | Kind::HardClip => {
1295                    Kind::Match
1296                }
1297                _ => kind,
1298            };
1299
1300            // Coalesce adjacent operations of the same type
1301            if let Some((last_kind, last_len)) = simplified.last_mut() {
1302                if *last_kind == new_kind {
1303                    *last_len += len;
1304                    continue;
1305                }
1306            }
1307
1308            simplified.push((new_kind, len));
1309        }
1310
1311        simplified
1312    }
1313
1314    /// Checks if `cigar_a` is a prefix of `cigar_b`.
1315    ///
1316    /// This allows for reads of different lengths to be grouped together if they share
1317    /// the same alignment pattern (same indel positions and lengths).
1318    ///
1319    /// For example, 10M is a prefix of 20M, but 10M1I is not a prefix of 10M1D.
1320    #[must_use]
1321    pub fn is_cigar_prefix(a: &[(Kind, usize)], b: &[(Kind, usize)]) -> bool {
1322        if a.len() > b.len() {
1323            return false;
1324        }
1325
1326        let last_index = a.len().saturating_sub(1);
1327
1328        for (i, &(op_a, len_a)) in a.iter().enumerate() {
1329            let (op_b, len_b) = b[i];
1330            if op_a != op_b {
1331                return false;
1332            }
1333            // For the last element, a's length can be <= b's length (prefix)
1334            // For other elements, lengths must match exactly
1335            if i == last_index {
1336                if len_a > len_b {
1337                    return false;
1338                }
1339            } else if len_a != len_b {
1340                return false;
1341            }
1342        }
1343        true
1344    }
1345}
1346
1347#[cfg(test)]
1348#[expect(clippy::similar_names, reason = "test code uses clipper/clipped naming pattern")]
1349mod tests {
1350    use super::*;
1351    use crate::builder::RecordBuilder;
1352
1353    #[test]
1354    fn test_clipping_mode() {
1355        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1356        assert_eq!(clipper.mode, ClippingMode::Soft);
1357
1358        let clipper = SamRecordClipper::new(ClippingMode::Hard);
1359        assert_eq!(clipper.mode, ClippingMode::Hard);
1360    }
1361
1362    #[test]
1363    fn test_auto_clip() {
1364        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::Soft, true);
1365        assert!(clipper.auto_clip_attributes);
1366
1367        let clipper_disabled = SamRecordClipper::with_auto_clip(ClippingMode::Hard, false);
1368        assert!(!clipper_disabled.auto_clip_attributes);
1369    }
1370
1371    use noodles::core::Position;
1372    use noodles::sam::alignment::record::Cigar as CigarTrait;
1373    use noodles::sam::alignment::record::Flags;
1374    use noodles::sam::alignment::record_buf::RecordBuf;
1375
1376    /// Helper to format a CIGAR string for comparison
1377    fn format_cigar(cigar: &impl CigarTrait) -> String {
1378        use std::fmt::Write;
1379        cigar.iter().filter_map(Result::ok).fold(String::new(), |mut acc, op| {
1380            let kind_char = match op.kind() {
1381                Kind::Match => 'M',
1382                Kind::Insertion => 'I',
1383                Kind::Deletion => 'D',
1384                Kind::Skip => 'N',
1385                Kind::SoftClip => 'S',
1386                Kind::HardClip => 'H',
1387                Kind::Pad => 'P',
1388                Kind::SequenceMatch => '=',
1389                Kind::SequenceMismatch => 'X',
1390            };
1391            let _ = write!(acc, "{}{}", op.len(), kind_char);
1392            acc
1393        })
1394    }
1395
1396    /// Helper to create a simple test record with given CIGAR, sequence, and position
1397    fn create_test_record(cigar_str: &str, seq: &str, start_pos: usize) -> RecordBuf {
1398        RecordBuilder::mapped_read()
1399            .sequence(seq)
1400            .cigar(cigar_str)
1401            .alignment_start(start_pos)
1402            .build()
1403    }
1404
1405    // ===================================================================
1406    // clip_5_prime (clipStartOfAlignment) tests - Basic functionality
1407    // ===================================================================
1408
1409    #[test]
1410    fn test_clip_start_of_alignment_soft_matched_bases() {
1411        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1412        let mut record = create_test_record("50M", "ACGTACGTACGT", 10);
1413
1414        let clipped = clipper.clip_start_of_alignment(&mut record, 10);
1415        assert_eq!(clipped, 10);
1416        assert_eq!(record.alignment_start(), Position::new(20));
1417
1418        let cigar_str = format_cigar(&record.cigar());
1419        assert_eq!(cigar_str, "10S40M");
1420    }
1421
1422    #[test]
1423    fn test_clip_start_of_alignment_soft_with_insertion() {
1424        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1425        let mut record = create_test_record("4M2I44M", "ACGTACGTACGT", 10);
1426
1427        let clipped = clipper.clip_start_of_alignment(&mut record, 10);
1428        assert_eq!(clipped, 10);
1429        assert_eq!(record.alignment_start(), Position::new(18)); // 10 + (4+2 ref consumed)
1430
1431        let cigar_str = format_cigar(&record.cigar());
1432        assert_eq!(cigar_str, "10S40M");
1433    }
1434
1435    #[test]
1436    fn test_clip_start_of_alignment_soft_with_deletion() {
1437        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1438        let mut record = create_test_record("6M2D44M", "ACGTACGTACGT", 10);
1439
1440        let clipped = clipper.clip_start_of_alignment(&mut record, 10);
1441        assert_eq!(clipped, 10);
1442        assert_eq!(record.alignment_start(), Position::new(22)); // 10 + 6 + 2 (deletion) + 4
1443
1444        let cigar_str = format_cigar(&record.cigar());
1445        assert_eq!(cigar_str, "10S40M");
1446    }
1447
1448    #[test]
1449    fn test_clip_start_of_alignment_soft_additional_bases() {
1450        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1451        let mut record = create_test_record("10S40M", "ACGTACGTACGT", 10);
1452
1453        let clipped = clipper.clip_start_of_alignment(&mut record, 10);
1454        assert_eq!(clipped, 10);
1455        assert_eq!(record.alignment_start(), Position::new(20));
1456
1457        let cigar_str = format_cigar(&record.cigar());
1458        assert_eq!(cigar_str, "20S30M");
1459    }
1460
1461    #[test]
1462    fn test_clip_start_of_alignment_preserve_hard_clip() {
1463        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1464        let mut record = create_test_record("10H40M", "ACGTACGTACGT", 10);
1465
1466        let clipped = clipper.clip_start_of_alignment(&mut record, 10);
1467        assert_eq!(clipped, 10);
1468        assert_eq!(record.alignment_start(), Position::new(20));
1469
1470        let cigar_str = format_cigar(&record.cigar());
1471        assert_eq!(cigar_str, "10H10S30M");
1472    }
1473
1474    #[test]
1475    fn test_clip_start_of_alignment_complex_cigar() {
1476        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1477        // 2H4S16M10I5M5I10M
1478        let mut record = create_test_record("2H4S16M10I5M5I10M", "ACGTACGTACGT", 10);
1479
1480        let clipped = clipper.clip_start_of_alignment(&mut record, 10);
1481        assert_eq!(clipped, 10);
1482        assert_eq!(record.alignment_start(), Position::new(20));
1483
1484        let cigar_str = format_cigar(&record.cigar());
1485        assert_eq!(cigar_str, "2H14S6M10I5M5I10M");
1486    }
1487
1488    #[test]
1489    fn test_clip_start_of_alignment_consumes_trailing_insertion() {
1490        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1491        let mut record = create_test_record("8M4I38M", "ACGTACGTACGT", 10);
1492
1493        // Ask to clip 10 bases, but should consume 12 bases (8M + 4I) because
1494        // the insertion at the clip boundary is consumed entirely
1495        let clipped = clipper.clip_start_of_alignment(&mut record, 10);
1496        assert_eq!(clipped, 12);
1497        assert_eq!(record.alignment_start(), Position::new(18));
1498
1499        let cigar_str = format_cigar(&record.cigar());
1500        assert_eq!(cigar_str, "12S38M");
1501    }
1502
1503    #[test]
1504    fn test_clip_start_of_alignment_preserve_insertion_after_clip() {
1505        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1506        let mut record = create_test_record("10M4I36M", "ACGTACGTACGT", 10);
1507
1508        let clipped = clipper.clip_start_of_alignment(&mut record, 10);
1509        assert_eq!(clipped, 10);
1510        assert_eq!(record.alignment_start(), Position::new(20));
1511
1512        let cigar_str = format_cigar(&record.cigar());
1513        assert_eq!(cigar_str, "10S4I36M");
1514    }
1515
1516    #[test]
1517    fn test_clip_start_of_alignment_remove_deletion_after_clip() {
1518        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1519        let mut record = create_test_record("10M4D40M", "ACGTACGTACGT", 10);
1520
1521        let clipped = clipper.clip_start_of_alignment(&mut record, 10);
1522        assert_eq!(clipped, 10);
1523        assert_eq!(record.alignment_start(), Position::new(24)); // 10 + 10 + 4 (deletion)
1524
1525        let cigar_str = format_cigar(&record.cigar());
1526        assert_eq!(cigar_str, "10S40M");
1527    }
1528
1529    #[test]
1530    fn test_clip_start_of_alignment_preserve_distant_deletion() {
1531        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1532        let mut record = create_test_record("25M4D25M", "ACGTACGTACGT", 10);
1533
1534        let clipped = clipper.clip_start_of_alignment(&mut record, 10);
1535        assert_eq!(clipped, 10);
1536        assert_eq!(record.alignment_start(), Position::new(20));
1537
1538        let cigar_str = format_cigar(&record.cigar());
1539        assert_eq!(cigar_str, "10S15M4D25M");
1540    }
1541
1542    #[test]
1543    fn test_clip_start_of_alignment_soft_with_mask() {
1544        let clipper = SamRecordClipper::new(ClippingMode::SoftWithMask);
1545        let seq = "ACGTACGTAC"; // 10 bases
1546        let mut record = create_test_record("10M", seq, 10);
1547
1548        let clipped = clipper.clip_start_of_alignment(&mut record, 5);
1549        assert_eq!(clipped, 5);
1550
1551        let cigar_str = format_cigar(&record.cigar());
1552        assert_eq!(cigar_str, "5S5M");
1553
1554        // Check first 5 bases are masked to N
1555        let bases = record.sequence();
1556        for i in 0..5 {
1557            assert_eq!(bases.as_ref()[i], b'N', "Base at position {i} should be N");
1558        }
1559
1560        // Check first 5 quals are set to min
1561        let quals = record.quality_scores();
1562        for i in 0..5 {
1563            assert_eq!(quals.as_ref()[i], 2, "Quality at position {i} should be 2");
1564        }
1565    }
1566
1567    #[test]
1568    fn test_clip_start_of_alignment_soft_with_mask_existing_soft_clip() {
1569        let clipper = SamRecordClipper::new(ClippingMode::SoftWithMask);
1570        let seq = "ACGTACGTACGTACGTACGT"; // 20 bases
1571        let mut record = create_test_record("10S10M", seq, 10);
1572
1573        let clipped = clipper.clip_start_of_alignment(&mut record, 10);
1574        assert_eq!(clipped, 10);
1575
1576        let cigar_str = format_cigar(&record.cigar());
1577        assert_eq!(cigar_str, "20S"); // All bases now soft-clipped (0M operations are removed)
1578
1579        // Check all 20 bases are masked to N
1580        let bases = record.sequence();
1581        for i in 0..20 {
1582            assert_eq!(bases.as_ref()[i], b'N');
1583        }
1584    }
1585
1586    #[test]
1587    fn test_clip_start_of_alignment_unmapped_read_does_nothing() {
1588        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1589        let mut record = create_test_record("50M", "ACGTACGTACGTACGTACGT", 1000);
1590
1591        // Set unmapped flag
1592        *record.flags_mut() = Flags::UNMAPPED;
1593
1594        let clipped = clipper.clip_start_of_alignment(&mut record, 10);
1595
1596        // Should not clip unmapped reads
1597        assert_eq!(clipped, 0);
1598        assert_eq!(record.alignment_start(), Position::new(1000));
1599
1600        // CIGAR should remain unchanged
1601        let cigar_str = format_cigar(&record.cigar());
1602        assert_eq!(cigar_str, "50M");
1603    }
1604
1605    #[test]
1606    fn test_clip_start_of_alignment_soft() {
1607        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1608        let mut record = create_test_record("100M", "ACGTACGTACGT", 1000);
1609
1610        let clipped = clipper.clip_start_of_alignment(&mut record, 5);
1611        assert_eq!(clipped, 5);
1612
1613        // Check that CIGAR starts with soft clip
1614        let cigar = record.cigar();
1615        let first_op = cigar.iter().next().unwrap().unwrap();
1616        assert_eq!(first_op.kind(), Kind::SoftClip);
1617        assert_eq!(first_op.len(), 5);
1618    }
1619
1620    #[test]
1621    fn test_clip_end_of_alignment_soft() {
1622        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1623        let mut record = create_test_record("100M", "ACGTACGTACGT", 1000);
1624
1625        let clipped = clipper.clip_end_of_alignment(&mut record, 5);
1626        assert_eq!(clipped, 5);
1627
1628        // Check that CIGAR ends with soft clip
1629        let cigar = record.cigar();
1630        let ops: Vec<_> = cigar.iter().filter_map(Result::ok).collect();
1631        let last_op = ops.last().unwrap();
1632        assert_eq!(last_op.kind(), Kind::SoftClip);
1633        assert_eq!(last_op.len(), 5);
1634    }
1635
1636    // ===================================================================
1637    // clip_3_prime (clipEndOfAlignment) tests - Extended coverage
1638    // ===================================================================
1639
1640    #[test]
1641    fn test_clip_end_of_alignment_soft_matched_bases() {
1642        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1643        // 50 bases to match 50M CIGAR
1644        let seq = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC";
1645        assert_eq!(seq.len(), 50, "Sequence length should be 50");
1646        let mut record = create_test_record("50M", seq, 10);
1647
1648        let clipped = clipper.clip_end_of_alignment(&mut record, 10);
1649        assert_eq!(clipped, 10);
1650        assert_eq!(record.alignment_start(), Position::new(10));
1651
1652        let cigar_str = format_cigar(&record.cigar());
1653        assert_eq!(cigar_str, "40M10S");
1654    }
1655
1656    #[test]
1657    fn test_clip_end_of_alignment_hard_existing_soft_clip() {
1658        let clipper = SamRecordClipper::new(ClippingMode::Hard);
1659        let seq = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC"; // 50 bases
1660        assert_eq!(seq.len(), 50, "Sequence length should be 50");
1661        let mut record = create_test_record("40M10S", seq, 10);
1662
1663        let orig_seq: Vec<u8> = record.sequence().as_ref().to_vec();
1664        let orig_qual: Vec<u8> = record.quality_scores().as_ref().to_vec();
1665
1666        let clipped = clipper.clip_end_of_alignment(&mut record, 10);
1667        assert_eq!(clipped, 10);
1668        assert_eq!(record.alignment_start(), Position::new(10));
1669
1670        let cigar_str = format_cigar(&record.cigar());
1671        assert_eq!(cigar_str, "30M20H");
1672
1673        // Check sequence and qualities are truncated
1674        assert_eq!(record.sequence().as_ref(), &orig_seq[..30]);
1675        assert_eq!(record.quality_scores().as_ref(), &orig_qual[..30]);
1676    }
1677
1678    #[test]
1679    fn test_clip_start_of_alignment_hard() {
1680        let clipper = SamRecordClipper::new(ClippingMode::Hard);
1681        let mut record = create_test_record("12M", "ACGTACGTACGT", 1000);
1682
1683        let original_len = record.sequence().len();
1684        let clipped = clipper.clip_start_of_alignment(&mut record, 5);
1685        assert_eq!(clipped, 5);
1686
1687        // Check that sequence is shortened
1688        assert_eq!(record.sequence().len(), original_len - 5);
1689
1690        // Check that CIGAR starts with hard clip
1691        let cigar = record.cigar();
1692        let first_op = cigar.iter().next().unwrap().unwrap();
1693        assert_eq!(first_op.kind(), Kind::HardClip);
1694        assert_eq!(first_op.len(), 5);
1695    }
1696
1697    #[test]
1698    fn test_clip_overlapping_reads_no_overlap() {
1699        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1700
1701        // R1: 1000-1100, R2: 1200-1300 (no overlap)
1702        let mut r1 = create_test_record("100M", "ACGTACGTACGT", 1000);
1703        let mut r2 = create_test_record("100M", "TGCATGCATGCA", 1200);
1704
1705        let (clipped_r1, clipped_r2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
1706
1707        // No clipping should occur
1708        assert_eq!(clipped_r1, 0);
1709        assert_eq!(clipped_r2, 0);
1710    }
1711
1712    #[test]
1713    fn test_clip_overlapping_reads_with_overlap() {
1714        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1715
1716        // R1: 1000-1099 (forward), R2: 1050-1149 (reverse) - 50bp overlap
1717        // Create proper FR pair
1718        let mut r1 = create_paired_record("100M", "ACGTACGTACGT", 1000, false, true, 1050, "100M");
1719        let mut r2 = create_paired_record("100M", "TGCATGCATGCA", 1050, true, false, 1000, "100M");
1720
1721        let (clipped_r1, clipped_r2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
1722
1723        // With midpoint algorithm, both reads should be clipped
1724        // Midpoint = (1000 + 1149) / 2 = 1074
1725        // R1 clips from 1075 onwards (approximately 25 bases)
1726        // R2 clips from start to 1074 (approximately 25 bases)
1727        assert!(clipped_r1 > 0, "R1 should be clipped");
1728        assert!(clipped_r2 > 0, "R2 should be clipped");
1729
1730        // Both should clip roughly equal amounts (within a few bases due to rounding)
1731        let diff = (i32::try_from(clipped_r1).unwrap() - i32::try_from(clipped_r2).unwrap()).abs();
1732        assert!(
1733            diff <= 2,
1734            "Clipping should be roughly equal, but got {clipped_r1} vs {clipped_r2}"
1735        );
1736    }
1737
1738    #[test]
1739    fn test_clip_extending_past_mate_ends() {
1740        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1741
1742        // R1: 1000-1149, R2: 1100-1199
1743        // R1 does NOT extend past R2's end (1149 < 1199)
1744        // R2 does NOT start before R1's start (1100 > 1000)
1745        // So neither should be clipped
1746        let mut r1 = create_paired_record("150M", "ACGTACGTACGT", 1000, false, true, 1100, "100M");
1747        let mut r2 = create_paired_record("100M", "TGCATGCATGCA", 1100, true, false, 1000, "150M");
1748
1749        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
1750
1751        // Neither should be clipped
1752        assert_eq!(clipped_r1, 0);
1753        assert_eq!(clipped_r2, 0);
1754    }
1755
1756    #[test]
1757    fn test_cigar_utils_reference_length() {
1758        let record = create_test_record("50M10I40M", "ACGTACGTACGT", 1000);
1759        let ref_len = cigar_utils::reference_length(&record.cigar());
1760
1761        // 50M + 40M = 90 (insertion doesn't consume reference)
1762        assert_eq!(ref_len, 90);
1763    }
1764
1765    #[test]
1766    fn test_cigar_utils_aligned_bases() {
1767        let record = create_test_record("50M10I40M", "ACGTACGTACGT", 1000);
1768        let aligned = cigar_utils::aligned_bases(&record.cigar());
1769
1770        // 50M + 40M = 90 (insertion is not aligned)
1771        assert_eq!(aligned, 90);
1772    }
1773
1774    #[test]
1775    fn test_cigar_utils_clipped_bases() {
1776        let record = create_test_record("5S90M5S", "ACGTACGTACGT", 1000);
1777        let clipped = cigar_utils::clipped_bases(&record.cigar());
1778
1779        // 5S + 5S = 10
1780        assert_eq!(clipped, 10);
1781    }
1782
1783    #[test]
1784    fn test_clip_start_of_alignment_with_existing_soft_clip() {
1785        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1786        let mut record = create_test_record("5S95M", "ACGTACGTACGT", 1000);
1787
1788        // Clip additional 3 bases from 5' end
1789        let clipped = clipper.clip_start_of_alignment(&mut record, 3);
1790        assert_eq!(clipped, 3);
1791
1792        // Total soft clip should be 5 + 3 = 8
1793        let cigar = record.cigar();
1794        let first_op = cigar.iter().next().unwrap().unwrap();
1795        assert_eq!(first_op.kind(), Kind::SoftClip);
1796    }
1797
1798    #[test]
1799    fn test_clip_entire_read() {
1800        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1801        let seq = "ACGTACGTACGT";
1802        let mut record = create_test_record("12M", seq, 1000);
1803
1804        // Try to clip more than the read length
1805        let clipped = clipper.clip_start_of_alignment(&mut record, 20);
1806
1807        // Should only clip up to sequence length
1808        assert_eq!(clipped, seq.len());
1809    }
1810
1811    #[test]
1812    fn test_clip_zero_bases() {
1813        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1814        let mut record = create_test_record("100M", "ACGTACGTACGT", 1000);
1815
1816        let clipped = clipper.clip_start_of_alignment(&mut record, 0);
1817        assert_eq!(clipped, 0);
1818
1819        // CIGAR should remain unchanged
1820        let cigar = record.cigar();
1821        let first_op = cigar.iter().next().unwrap().unwrap();
1822        assert_eq!(first_op.kind(), Kind::Match);
1823    }
1824
1825    /// Helper to create a paired read with specific flags and positions
1826    fn create_paired_record(
1827        cigar_str: &str,
1828        seq: &str,
1829        start_pos: usize,
1830        is_reverse: bool,
1831        mate_reverse: bool,
1832        mate_pos: usize,
1833        mate_cigar_str: &str,
1834    ) -> RecordBuf {
1835        let mut record = create_test_record(cigar_str, seq, start_pos);
1836
1837        // Set paired flags
1838        let mut flags = Flags::SEGMENTED;
1839        if is_reverse {
1840            flags |= Flags::REVERSE_COMPLEMENTED;
1841        }
1842        if mate_reverse {
1843            flags |= Flags::MATE_REVERSE_COMPLEMENTED;
1844        }
1845        *record.flags_mut() = flags;
1846
1847        // Set mate position
1848        *record.mate_alignment_start_mut() = Position::new(mate_pos);
1849
1850        // Set same reference for both reads
1851        *record.mate_reference_sequence_id_mut() = record.reference_sequence_id();
1852
1853        // Calculate and set template_length for FR pair detection
1854        // Parse mate CIGAR to calculate its reference length
1855        let mate_cigar_ops: Vec<CigarOp> = mate_cigar_str
1856            .split(|c: char| !c.is_numeric())
1857            .filter(|s| !s.is_empty())
1858            .zip(mate_cigar_str.chars().filter(|c| c.is_alphabetic()))
1859            .map(|(len_str, kind_char)| {
1860                let len: usize = len_str.parse().unwrap();
1861                let kind = match kind_char {
1862                    'M' => Kind::Match,
1863                    'I' => Kind::Insertion,
1864                    'D' => Kind::Deletion,
1865                    'S' => Kind::SoftClip,
1866                    'H' => Kind::HardClip,
1867                    'N' => Kind::Skip,
1868                    _ => panic!("Invalid CIGAR operation: {kind_char}"),
1869                };
1870                CigarOp::new(kind, len)
1871            })
1872            .collect();
1873        let mate_cigar = CigarBuf::from(mate_cigar_ops);
1874        let mate_ref_len = cigar_utils::reference_length(&mate_cigar);
1875        let ref_len = cigar_utils::reference_length(&record.cigar());
1876
1877        let tlen = if is_reverse {
1878            // This read is reverse, mate is forward
1879            // tlen = -(this_end - mate_start + 1)
1880            let this_end = start_pos + ref_len - 1;
1881            if this_end >= mate_pos {
1882                -i32::try_from(this_end - mate_pos + 1).unwrap()
1883            } else {
1884                i32::try_from(mate_pos - this_end - 1).unwrap()
1885            }
1886        } else {
1887            // This read is forward, mate is reverse
1888            // tlen = mate_end - this_start + 1
1889            let mate_end = mate_pos + mate_ref_len - 1;
1890            if mate_end >= start_pos {
1891                i32::try_from(mate_end - start_pos + 1).unwrap()
1892            } else {
1893                -i32::try_from(start_pos - mate_end - 1).unwrap()
1894            }
1895        };
1896
1897        *record.template_length_mut() = tlen;
1898
1899        record
1900    }
1901
1902    #[test]
1903    fn test_is_fr_pair_valid() {
1904        // Create valid FR pair: R1 forward, R2 reverse
1905        let r1 = create_paired_record("100M", "ACGTACGTACGT", 1000, false, true, 1100, "100M");
1906        let r2 = create_paired_record("100M", "TGCATGCATGCA", 1100, true, false, 1000, "100M");
1907
1908        assert!(record_utils::is_fr_pair(&r1, &r2));
1909    }
1910
1911    #[test]
1912    fn test_is_fr_pair_both_forward() {
1913        // FF orientation - should not be considered FR
1914        let r1 = create_paired_record("100M", "ACGTACGTACGT", 1000, false, false, 1100, "100M");
1915        let r2 = create_paired_record("100M", "TGCATGCATGCA", 1100, false, false, 1000, "100M");
1916
1917        assert!(!record_utils::is_fr_pair(&r1, &r2));
1918    }
1919
1920    #[test]
1921    fn test_is_fr_pair_both_reverse() {
1922        // RR orientation - should not be considered FR
1923        let r1 = create_paired_record("100M", "ACGTACGTACGT", 1000, true, true, 1100, "100M");
1924        let r2 = create_paired_record("100M", "TGCATGCATGCA", 1100, true, true, 1000, "100M");
1925
1926        assert!(!record_utils::is_fr_pair(&r1, &r2));
1927    }
1928
1929    #[test]
1930    fn test_is_fr_pair_rf_orientation() {
1931        // RF orientation (reverse of FR) - should not be considered FR
1932        let r1 = create_paired_record("100M", "ACGTACGTACGT", 1000, true, false, 1100, "100M");
1933        let r2 = create_paired_record("100M", "TGCATGCATGCA", 1100, false, true, 1000, "100M");
1934
1935        assert!(!record_utils::is_fr_pair(&r1, &r2));
1936    }
1937
1938    #[test]
1939    fn test_is_fr_pair_unmapped() {
1940        let mut r1 = create_paired_record("100M", "ACGTACGTACGT", 1000, false, true, 1100, "100M");
1941        let r2 = create_paired_record("100M", "TGCATGCATGCA", 1100, true, false, 1000, "150M");
1942
1943        // Set R1 as unmapped
1944        *r1.flags_mut() |= Flags::UNMAPPED;
1945
1946        assert!(!record_utils::is_fr_pair(&r1, &r2));
1947    }
1948
1949    #[test]
1950    fn test_is_fr_pair_mate_unmapped() {
1951        let mut r1 = create_paired_record("100M", "ACGTACGTACGT", 1000, false, true, 1100, "100M");
1952        let r2 = create_paired_record("100M", "TGCATGCATGCA", 1100, true, false, 1000, "150M");
1953
1954        // Set R1's mate as unmapped
1955        *r1.flags_mut() |= Flags::MATE_UNMAPPED;
1956
1957        assert!(!record_utils::is_fr_pair(&r1, &r2));
1958    }
1959
1960    #[test]
1961    fn test_is_fr_pair_different_chromosomes() {
1962        let r1 = create_paired_record("100M", "ACGTACGTACGT", 1000, false, true, 1100, "100M");
1963        let mut r2 = create_paired_record("100M", "TGCATGCATGCA", 1100, true, false, 1000, "150M");
1964
1965        // Set R2 to different reference sequence
1966        *r2.reference_sequence_id_mut() = Some(1);
1967
1968        assert!(!record_utils::is_fr_pair(&r1, &r2));
1969    }
1970
1971    #[test]
1972    fn test_is_fr_pair_not_paired() {
1973        let mut r1 = create_paired_record("100M", "ACGTACGTACGT", 1000, false, true, 1100, "100M");
1974        let r2 = create_paired_record("100M", "TGCATGCATGCA", 1100, true, false, 1000, "150M");
1975
1976        // Remove paired flag from R1
1977        *r1.flags_mut() = Flags::empty();
1978
1979        assert!(!record_utils::is_fr_pair(&r1, &r2));
1980    }
1981
1982    #[test]
1983    fn test_clip_overlapping_reads_non_fr_pair() {
1984        let clipper = SamRecordClipper::new(ClippingMode::Soft);
1985
1986        // Create FF pair (both forward) with overlap
1987        // R1: 1000-1100, R2: 1050-1150 (50bp overlap)
1988        let mut r1 = create_paired_record("100M", "ACGTACGTACGT", 1000, false, false, 1050, "100M");
1989        let mut r2 = create_paired_record("100M", "TGCATGCATGCA", 1050, false, false, 1000, "100M");
1990
1991        let (clipped_r1, clipped_r2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
1992
1993        // Should NOT clip because this is not an FR pair
1994        assert_eq!(clipped_r1, 0);
1995        assert_eq!(clipped_r2, 0);
1996    }
1997
1998    #[test]
1999    fn test_clip_extending_past_mate_ends_non_fr_pair() {
2000        let clipper = SamRecordClipper::new(ClippingMode::Soft);
2001
2002        // Create RR pair (both reverse) with extension
2003        // R1: 1000-1150, R2: 1100-1200
2004        let mut r1 = create_paired_record("150M", "ACGTACGTACGT", 1000, true, true, 1100, "100M");
2005        let mut r2 = create_paired_record("100M", "TGCATGCATGCA", 1100, true, true, 1000, "150M");
2006
2007        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
2008
2009        // Should NOT clip because this is not an FR pair
2010        assert_eq!(clipped_r1, 0);
2011        assert_eq!(clipped_r2, 0);
2012    }
2013
2014    #[test]
2015    fn test_auto_clip_attributes_string_5_prime() {
2016        use noodles::sam::alignment::record::data::field::Tag;
2017
2018        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::Hard, true);
2019        let mut record = create_test_record("10M", "ACGTACGTAC", 1000);
2020
2021        // Add a string attribute that matches the read length
2022        let tag = Tag::from([b'X', b'S']);
2023        let value = Value::from("0123456789");
2024        record.data_mut().insert(tag, value);
2025
2026        // Clip 3 bases from 5' end
2027        let clipped = clipper.clip_start_of_alignment(&mut record, 3);
2028        assert_eq!(clipped, 3);
2029
2030        // Check that the attribute was clipped
2031        if let Some(Value::String(s)) = record.data().get(&tag) {
2032            let bytes: &[u8] = s.as_ref();
2033            assert_eq!(bytes, b"3456789");
2034        } else {
2035            panic!("Tag XS not found or wrong type");
2036        }
2037    }
2038
2039    #[test]
2040    fn test_auto_clip_attributes_string_3_prime() {
2041        use noodles::sam::alignment::record::data::field::Tag;
2042
2043        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::Hard, true);
2044        let mut record = create_test_record("10M", "ACGTACGTAC", 1000);
2045
2046        // Add a string attribute that matches the read length
2047        let tag = Tag::from([b'X', b'S']);
2048        let value = Value::from("0123456789");
2049        record.data_mut().insert(tag, value);
2050
2051        // Clip 3 bases from 3' end
2052        let clipped = clipper.clip_end_of_alignment(&mut record, 3);
2053        assert_eq!(clipped, 3);
2054
2055        // Check that the attribute was clipped
2056        if let Some(Value::String(s)) = record.data().get(&tag) {
2057            let bytes: &[u8] = s.as_ref();
2058            assert_eq!(bytes, b"0123456");
2059        } else {
2060            panic!("Tag XS not found or wrong type");
2061        }
2062    }
2063
2064    #[test]
2065    fn test_auto_clip_attributes_array_5_prime() {
2066        use noodles::sam::alignment::record::data::field::Tag;
2067        use noodles::sam::alignment::record_buf::data::field::value::Array;
2068
2069        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::Hard, true);
2070        let mut record = create_test_record("10M", "ACGTACGTAC", 1000);
2071
2072        // Add a UInt8Array attribute that matches the read length
2073        let tag = Tag::from([b'X', b'A']);
2074        let array: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
2075        let value = Value::from(array);
2076        record.data_mut().insert(tag, value);
2077
2078        // Clip 3 bases from 5' end
2079        let clipped = clipper.clip_start_of_alignment(&mut record, 3);
2080        assert_eq!(clipped, 3);
2081
2082        // Check that the attribute was clipped
2083        if let Some(Value::Array(Array::UInt8(arr))) = record.data().get(&tag) {
2084            let vec_data: Vec<u8> = arr.clone();
2085            assert_eq!(vec_data, vec![3, 4, 5, 6, 7, 8, 9]);
2086        } else {
2087            panic!("Tag XA not found or wrong type");
2088        }
2089    }
2090
2091    #[test]
2092    fn test_auto_clip_attributes_array_3_prime() {
2093        use noodles::sam::alignment::record::data::field::Tag;
2094        use noodles::sam::alignment::record_buf::data::field::value::Array;
2095
2096        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::Hard, true);
2097        let mut record = create_test_record("10M", "ACGTACGTAC", 1000);
2098
2099        // Add a UInt8Array attribute that matches the read length
2100        let tag = Tag::from([b'X', b'A']);
2101        let array: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
2102        let value = Value::from(array);
2103        record.data_mut().insert(tag, value);
2104
2105        // Clip 3 bases from 3' end
2106        let clipped = clipper.clip_end_of_alignment(&mut record, 3);
2107        assert_eq!(clipped, 3);
2108
2109        // Check that the attribute was clipped
2110        if let Some(Value::Array(Array::UInt8(arr))) = record.data().get(&tag) {
2111            let vec_data: Vec<u8> = arr.clone();
2112            assert_eq!(vec_data, vec![0, 1, 2, 3, 4, 5, 6]);
2113        } else {
2114            panic!("Tag XA not found or wrong type");
2115        }
2116    }
2117
2118    #[test]
2119    fn test_auto_clip_attributes_only_in_hard_mode() {
2120        use noodles::sam::alignment::record::data::field::Tag;
2121
2122        // Test with Soft mode - attributes should NOT be clipped
2123        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::Soft, true);
2124        let mut record = create_test_record("10M", "ACGTACGTAC", 1000);
2125
2126        let tag = Tag::from([b'X', b'S']);
2127        let value = Value::from("0123456789");
2128        record.data_mut().insert(tag, value);
2129
2130        clipper.clip_start_of_alignment(&mut record, 3);
2131
2132        // Attribute should remain unchanged in Soft mode
2133        if let Some(Value::String(s)) = record.data().get(&tag) {
2134            let bytes: &[u8] = s.as_ref();
2135            assert_eq!(bytes, b"0123456789");
2136        }
2137    }
2138
2139    #[test]
2140    fn test_auto_clip_attributes_only_when_enabled() {
2141        use noodles::sam::alignment::record::data::field::Tag;
2142
2143        // Test with auto_clip_attributes disabled - attributes should NOT be clipped
2144        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::Hard, false);
2145        let mut record = create_test_record("10M", "ACGTACGTAC", 1000);
2146
2147        let tag = Tag::from([b'X', b'S']);
2148        let value = Value::from("0123456789");
2149        record.data_mut().insert(tag, value);
2150
2151        clipper.clip_start_of_alignment(&mut record, 3);
2152
2153        // Attribute should remain unchanged when auto-clip is disabled
2154        if let Some(Value::String(s)) = record.data().get(&tag) {
2155            let bytes: &[u8] = s.as_ref();
2156            assert_eq!(bytes, b"0123456789");
2157        }
2158    }
2159
2160    #[test]
2161    fn test_auto_clip_attributes_only_matching_length() {
2162        use noodles::sam::alignment::record::data::field::Tag;
2163
2164        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::Hard, true);
2165        let mut record = create_test_record("10M", "ACGTACGTAC", 1000);
2166
2167        // Add two attributes: one matching length, one not
2168        let tag1 = Tag::from([b'X', b'1']);
2169        let value1 = Value::from("0123456789"); // Matches length (10)
2170        record.data_mut().insert(tag1, value1);
2171
2172        let tag2 = Tag::from([b'X', b'2']);
2173        let value2 = Value::from("01234"); // Does not match length (5)
2174        record.data_mut().insert(tag2, value2);
2175
2176        clipper.clip_start_of_alignment(&mut record, 3);
2177
2178        // Check tag1 was clipped
2179        if let Some(Value::String(s)) = record.data().get(&tag1) {
2180            let bytes: &[u8] = s.as_ref();
2181            assert_eq!(bytes, b"3456789");
2182        }
2183
2184        // Check tag2 was NOT clipped
2185        if let Some(Value::String(s)) = record.data().get(&tag2) {
2186            let bytes: &[u8] = s.as_ref();
2187            assert_eq!(bytes, b"01234");
2188        }
2189    }
2190
2191    // ===================================================================
2192    // clip_3_prime tests - Additional coverage matching Scala
2193    // ===================================================================
2194
2195    #[test]
2196    fn test_clip_end_of_alignment_soft_with_insertion() {
2197        let clipper = SamRecordClipper::new(ClippingMode::Soft);
2198        // 44M + 2I + 4M = 50 bases
2199        let seq = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC"; // 50 bases
2200        assert_eq!(seq.len(), 50);
2201        let mut record = create_test_record("44M2I4M", seq, 10);
2202
2203        let clipped = clipper.clip_end_of_alignment(&mut record, 10);
2204        assert_eq!(clipped, 10);
2205        assert_eq!(record.alignment_start(), Position::new(10));
2206
2207        let cigar_str = format_cigar(&record.cigar());
2208        assert_eq!(cigar_str, "40M10S");
2209    }
2210
2211    #[test]
2212    fn test_clip_end_of_alignment_soft_with_deletion() {
2213        let clipper = SamRecordClipper::new(ClippingMode::Soft);
2214        // 44M + 2D + 6M = 50 bases in query
2215        let seq = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC"; // 50 bases
2216        assert_eq!(seq.len(), 50);
2217        let mut record = create_test_record("44M2D6M", seq, 10);
2218
2219        let clipped = clipper.clip_end_of_alignment(&mut record, 10);
2220        assert_eq!(clipped, 10);
2221        assert_eq!(record.alignment_start(), Position::new(10));
2222
2223        let cigar_str = format_cigar(&record.cigar());
2224        assert_eq!(cigar_str, "40M10S");
2225    }
2226
2227    #[test]
2228    fn test_clip_end_of_alignment_soft_additional_bases() {
2229        let clipper = SamRecordClipper::new(ClippingMode::Soft);
2230        let seq = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC"; // 50 bases
2231        let mut record = create_test_record("40M10S", seq, 10);
2232
2233        let clipped = clipper.clip_end_of_alignment(&mut record, 10);
2234        assert_eq!(clipped, 10);
2235        assert_eq!(record.alignment_start(), Position::new(10));
2236
2237        let cigar_str = format_cigar(&record.cigar());
2238        assert_eq!(cigar_str, "30M20S");
2239    }
2240
2241    #[test]
2242    fn test_clip_end_of_alignment_preserve_hard_clip() {
2243        let clipper = SamRecordClipper::new(ClippingMode::Soft);
2244        let seq = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC"; // 40 bases (no hard clip in sequence)
2245        let mut record = create_test_record("40M10H", &seq[..40], 10);
2246
2247        let clipped = clipper.clip_end_of_alignment(&mut record, 10);
2248        assert_eq!(clipped, 10);
2249        assert_eq!(record.alignment_start(), Position::new(10));
2250
2251        let cigar_str = format_cigar(&record.cigar());
2252        assert_eq!(cigar_str, "30M10S10H");
2253    }
2254
2255    #[test]
2256    fn test_clip_end_of_alignment_complex_cigar() {
2257        let clipper = SamRecordClipper::new(ClippingMode::Soft);
2258        // 10M + 5I + 5M + 10I + 16M + 4S + 2H = 50 query bases
2259        let seq = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC"; // 50 bases
2260        let mut record = create_test_record("10M5I5M10I16M4S2H", seq, 10);
2261
2262        let clipped = clipper.clip_end_of_alignment(&mut record, 10);
2263        assert_eq!(clipped, 10);
2264        assert_eq!(record.alignment_start(), Position::new(10));
2265
2266        let cigar_str = format_cigar(&record.cigar());
2267        assert_eq!(cigar_str, "10M5I5M10I6M14S2H");
2268    }
2269
2270    #[test]
2271    fn test_clip_end_of_alignment_consumes_leading_insertion() {
2272        let clipper = SamRecordClipper::new(ClippingMode::Soft);
2273        // 38M + 4I + 8M = 50 bases
2274        let seq = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC"; // 50 bases
2275        let mut record = create_test_record("38M4I8M", seq, 10);
2276
2277        // Ask to clip 10 bases, but should consume 12 bases (4I + 8M) because
2278        // the insertion at the clip boundary is consumed entirely
2279        let clipped = clipper.clip_end_of_alignment(&mut record, 10);
2280        assert_eq!(clipped, 12);
2281        assert_eq!(record.alignment_start(), Position::new(10));
2282
2283        let cigar_str = format_cigar(&record.cigar());
2284        assert_eq!(cigar_str, "38M12S");
2285    }
2286
2287    #[test]
2288    fn test_clip_end_of_alignment_preserve_insertion_after_clip() {
2289        let clipper = SamRecordClipper::new(ClippingMode::Soft);
2290        // 36M + 4I + 10M = 50 bases
2291        let seq = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC"; // 50 bases
2292        let mut record = create_test_record("36M4I10M", seq, 10);
2293
2294        let clipped = clipper.clip_end_of_alignment(&mut record, 10);
2295        assert_eq!(clipped, 10);
2296        assert_eq!(record.alignment_start(), Position::new(10));
2297
2298        let cigar_str = format_cigar(&record.cigar());
2299        assert_eq!(cigar_str, "36M4I10S");
2300    }
2301
2302    #[test]
2303    fn test_clip_end_of_alignment_remove_deletion_before_clip() {
2304        let clipper = SamRecordClipper::new(ClippingMode::Soft);
2305        // 40M + 4D + 10M = 50 query bases
2306        let seq = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC"; // 50 bases
2307        let mut record = create_test_record("40M4D10M", seq, 10);
2308
2309        let clipped = clipper.clip_end_of_alignment(&mut record, 10);
2310        assert_eq!(clipped, 10);
2311        assert_eq!(record.alignment_start(), Position::new(10));
2312
2313        let cigar_str = format_cigar(&record.cigar());
2314        assert_eq!(cigar_str, "40M10S");
2315    }
2316
2317    #[test]
2318    fn test_clip_end_of_alignment_preserve_distant_deletion() {
2319        let clipper = SamRecordClipper::new(ClippingMode::Soft);
2320        // 25M + 4D + 25M = 50 query bases
2321        let seq = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC"; // 50 bases
2322        let mut record = create_test_record("25M4D25M", seq, 10);
2323
2324        let clipped = clipper.clip_end_of_alignment(&mut record, 10);
2325        assert_eq!(clipped, 10);
2326        assert_eq!(record.alignment_start(), Position::new(10));
2327
2328        let cigar_str = format_cigar(&record.cigar());
2329        assert_eq!(cigar_str, "25M4D15M10S");
2330    }
2331
2332    #[test]
2333    fn test_clip_end_of_alignment_unmapped_read_does_nothing() {
2334        let clipper = SamRecordClipper::new(ClippingMode::Soft);
2335        let seq = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC"; // 50 bases
2336        let mut record = create_test_record("50M", seq, 10);
2337
2338        // Set unmapped flag
2339        *record.flags_mut() = Flags::UNMAPPED;
2340
2341        let clipped = clipper.clip_end_of_alignment(&mut record, 10);
2342
2343        // Should not clip unmapped reads
2344        assert_eq!(clipped, 0);
2345        assert_eq!(record.alignment_start(), Position::new(10));
2346
2347        // CIGAR should remain unchanged
2348        let cigar_str = format_cigar(&record.cigar());
2349        assert_eq!(cigar_str, "50M");
2350    }
2351
2352    #[test]
2353    fn test_clip_end_of_alignment_soft_with_mask() {
2354        let clipper = SamRecordClipper::new(ClippingMode::SoftWithMask);
2355        let seq = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC"; // 50 bases
2356        let mut record = create_test_record("50M", seq, 10);
2357
2358        let clipped = clipper.clip_end_of_alignment(&mut record, 10);
2359        assert_eq!(clipped, 10);
2360        assert_eq!(record.alignment_start(), Position::new(10));
2361
2362        let cigar_str = format_cigar(&record.cigar());
2363        assert_eq!(cigar_str, "40M10S");
2364
2365        // Check last 10 bases are masked to N
2366        let bases = record.sequence();
2367        for i in 40..50 {
2368            assert_eq!(bases.as_ref()[i], b'N', "Base at position {i} should be N");
2369        }
2370
2371        // Check last 10 quals are set to min
2372        let quals = record.quality_scores();
2373        for i in 40..50 {
2374            assert_eq!(quals.as_ref()[i], 2, "Quality at position {i} should be 2");
2375        }
2376    }
2377
2378    #[test]
2379    fn test_clip_end_of_alignment_soft_with_mask_existing_soft_clip() {
2380        let clipper = SamRecordClipper::new(ClippingMode::SoftWithMask);
2381        let seq = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC"; // 50 bases
2382        let mut record = create_test_record("40M10S", seq, 10);
2383
2384        let clipped = clipper.clip_end_of_alignment(&mut record, 10);
2385        assert_eq!(clipped, 10);
2386        assert_eq!(record.alignment_start(), Position::new(10));
2387
2388        let cigar_str = format_cigar(&record.cigar());
2389        assert_eq!(cigar_str, "30M20S");
2390
2391        // Check last 20 bases are masked to N
2392        let bases = record.sequence();
2393        for i in 30..50 {
2394            assert_eq!(bases.as_ref()[i], b'N', "Base at position {i} should be N");
2395        }
2396
2397        // Check last 20 quals are set to min
2398        let quals = record.quality_scores();
2399        for i in 30..50 {
2400            assert_eq!(quals.as_ref()[i], 2, "Quality at position {i} should be 2");
2401        }
2402    }
2403
2404    #[test]
2405    fn test_clip_end_of_alignment_hard() {
2406        let clipper = SamRecordClipper::new(ClippingMode::Hard);
2407        let seq = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC"; // 50 bases
2408        let mut record = create_test_record("50M", seq, 10);
2409
2410        let orig_seq: Vec<u8> = record.sequence().as_ref().to_vec();
2411        let orig_qual: Vec<u8> = record.quality_scores().as_ref().to_vec();
2412
2413        let clipped = clipper.clip_end_of_alignment(&mut record, 10);
2414        assert_eq!(clipped, 10);
2415        assert_eq!(record.alignment_start(), Position::new(10));
2416
2417        let cigar_str = format_cigar(&record.cigar());
2418        assert_eq!(cigar_str, "40M10H");
2419
2420        // Check sequence and qualities are truncated
2421        assert_eq!(record.sequence().as_ref(), &orig_seq[..40]);
2422        assert_eq!(record.quality_scores().as_ref(), &orig_qual[..40]);
2423    }
2424
2425    #[test]
2426    fn test_clip_start_of_alignment_hard_existing_soft_clip() {
2427        let clipper = SamRecordClipper::new(ClippingMode::Hard);
2428        let seq = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC"; // 50 bases
2429        assert_eq!(seq.len(), 50);
2430        let mut record = create_test_record("10S40M", seq, 10);
2431
2432        let orig_seq: Vec<u8> = record.sequence().as_ref().to_vec();
2433        let orig_qual: Vec<u8> = record.quality_scores().as_ref().to_vec();
2434
2435        let clipped = clipper.clip_start_of_alignment(&mut record, 10);
2436        assert_eq!(clipped, 10);
2437        assert_eq!(record.alignment_start(), Position::new(20));
2438
2439        let cigar_str = format_cigar(&record.cigar());
2440        assert_eq!(cigar_str, "20H30M");
2441
2442        // Check sequence and qualities are truncated (removed 10S + 10M = 20 bases)
2443        assert_eq!(record.sequence().as_ref(), &orig_seq[20..]);
2444        assert_eq!(record.quality_scores().as_ref(), &orig_qual[20..]);
2445    }
2446
2447    // ===================================================================
2448    // Auto-trim attribute tests - all mode combinations
2449    // ===================================================================
2450
2451    #[test]
2452    fn test_clip_start_of_alignment_auto_trim_soft_mode_false() {
2453        use noodles::sam::alignment::record::data::field::Tag;
2454
2455        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::Soft, false);
2456        let seq = "ACGTACGTACGTACGTACGT"; // 20 bases
2457        let mut record = create_test_record("20M", seq, 10);
2458
2459        let a1_tag = Tag::from([b'A', b'1']);
2460        record.data_mut().insert(a1_tag, Value::from("AB".repeat(10)));
2461
2462        let clipped = clipper.clip_start_of_alignment(&mut record, 5);
2463        assert_eq!(clipped, 5);
2464
2465        // In Soft mode with auto=false, attributes should NOT be modified
2466        if let Some(Value::String(s)) = record.data().get(&a1_tag) {
2467            let bytes: &[u8] = s.as_ref();
2468            assert_eq!(bytes, "AB".repeat(10).as_bytes());
2469        }
2470    }
2471
2472    #[test]
2473    fn test_clip_start_of_alignment_auto_trim_soft_mode_true() {
2474        use noodles::sam::alignment::record::data::field::Tag;
2475
2476        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::Soft, true);
2477        let seq = "ACGTACGTACGTACGTACGT"; // 20 bases
2478        let mut record = create_test_record("20M", seq, 10);
2479
2480        let a1_tag = Tag::from([b'A', b'1']);
2481        record.data_mut().insert(a1_tag, Value::from("AB".repeat(10)));
2482
2483        let clipped = clipper.clip_start_of_alignment(&mut record, 5);
2484        assert_eq!(clipped, 5);
2485
2486        // In Soft mode, even with auto=true, attributes should NOT be modified
2487        if let Some(Value::String(s)) = record.data().get(&a1_tag) {
2488            let bytes: &[u8] = s.as_ref();
2489            assert_eq!(bytes, "AB".repeat(10).as_bytes());
2490        }
2491    }
2492
2493    #[test]
2494    fn test_clip_start_of_alignment_auto_trim_soft_with_mask_mode_false() {
2495        use noodles::sam::alignment::record::data::field::Tag;
2496
2497        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::SoftWithMask, false);
2498        let seq = "ACGTACGTACGTACGTACGT"; // 20 bases
2499        let mut record = create_test_record("20M", seq, 10);
2500
2501        let a1_tag = Tag::from([b'A', b'1']);
2502        record.data_mut().insert(a1_tag, Value::from("AB".repeat(10)));
2503
2504        let clipped = clipper.clip_start_of_alignment(&mut record, 5);
2505        assert_eq!(clipped, 5);
2506
2507        // In SoftWithMask mode with auto=false, attributes should NOT be modified
2508        if let Some(Value::String(s)) = record.data().get(&a1_tag) {
2509            let bytes: &[u8] = s.as_ref();
2510            assert_eq!(bytes, "AB".repeat(10).as_bytes());
2511        }
2512    }
2513
2514    #[test]
2515    fn test_clip_start_of_alignment_auto_trim_soft_with_mask_mode_true() {
2516        use noodles::sam::alignment::record::data::field::Tag;
2517
2518        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::SoftWithMask, true);
2519        let seq = "ACGTACGTACGTACGTACGT"; // 20 bases
2520        let mut record = create_test_record("20M", seq, 10);
2521
2522        let a1_tag = Tag::from([b'A', b'1']);
2523        record.data_mut().insert(a1_tag, Value::from("AB".repeat(10)));
2524
2525        let clipped = clipper.clip_start_of_alignment(&mut record, 5);
2526        assert_eq!(clipped, 5);
2527
2528        // In SoftWithMask mode, even with auto=true, attributes should NOT be modified
2529        if let Some(Value::String(s)) = record.data().get(&a1_tag) {
2530            let bytes: &[u8] = s.as_ref();
2531            assert_eq!(bytes, "AB".repeat(10).as_bytes());
2532        }
2533    }
2534
2535    #[test]
2536    fn test_clip_start_of_alignment_auto_trim_hard_mode_false() {
2537        use noodles::sam::alignment::record::data::field::Tag;
2538        use noodles::sam::alignment::record_buf::data::field::value::Array;
2539
2540        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::Hard, false);
2541        let seq = "ACGTACGTACGTACGTACGT"; // 20 bases
2542        let mut record = create_test_record("20M", seq, 10);
2543
2544        let a1_tag = Tag::from([b'A', b'1']);
2545        let a2_tag = Tag::from([b'A', b'2']);
2546
2547        record.data_mut().insert(a1_tag, Value::from("AB".repeat(10)));
2548        record.data_mut().insert(a2_tag, Value::from((1..=20).collect::<Vec<i32>>()));
2549
2550        let clipped = clipper.clip_start_of_alignment(&mut record, 5);
2551        assert_eq!(clipped, 5);
2552
2553        // In Hard mode with auto=false, attributes should NOT be modified
2554        if let Some(Value::String(s)) = record.data().get(&a1_tag) {
2555            let bytes: &[u8] = s.as_ref();
2556            assert_eq!(bytes, "AB".repeat(10).as_bytes());
2557        }
2558        if let Some(Value::Array(Array::Int32(arr))) = record.data().get(&a2_tag) {
2559            let vec: Vec<i32> = arr.clone();
2560            assert_eq!(vec, (1..=20).collect::<Vec<i32>>());
2561        }
2562    }
2563
2564    #[test]
2565    fn test_clip_start_of_alignment_auto_trim_hard_mode_true() {
2566        use noodles::sam::alignment::record::data::field::Tag;
2567        use noodles::sam::alignment::record_buf::data::field::value::Array;
2568
2569        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::Hard, true);
2570        let seq = "ACGTACGTACGTACGTACGT"; // 20 bases
2571        let mut record = create_test_record("20M", seq, 10);
2572
2573        let a1_tag = Tag::from([b'A', b'1']);
2574        let a2_tag = Tag::from([b'A', b'2']);
2575        let b1_tag = Tag::from([b'B', b'1']);
2576        let b2_tag = Tag::from([b'B', b'2']);
2577
2578        record.data_mut().insert(a1_tag, Value::from("AB".repeat(10)));
2579        record.data_mut().insert(a2_tag, Value::from((1..=20).collect::<Vec<i32>>()));
2580        record.data_mut().insert(b1_tag, Value::from("A".repeat(10)));
2581        record.data_mut().insert(b2_tag, Value::from((1..=10).collect::<Vec<i32>>()));
2582
2583        let clipped = clipper.clip_start_of_alignment(&mut record, 5);
2584        assert_eq!(clipped, 5);
2585
2586        // In Hard mode with auto=true, attributes matching read length should be clipped
2587        if let Some(Value::String(s)) = record.data().get(&a1_tag) {
2588            // "ABABABABABABABABABAB" -> remove first 5 -> "BABABABABABABAB"
2589            let bytes: &[u8] = s.as_ref();
2590            assert_eq!(bytes, "BABABABABABABAB".as_bytes());
2591        }
2592        if let Some(Value::Array(Array::Int32(arr))) = record.data().get(&a2_tag) {
2593            let vec: Vec<i32> = arr.clone();
2594            assert_eq!(vec, (6..=20).collect::<Vec<i32>>());
2595        }
2596        // B1 and B2 should NOT be modified (length doesn't match)
2597        if let Some(Value::String(s)) = record.data().get(&b1_tag) {
2598            let bytes: &[u8] = s.as_ref();
2599            assert_eq!(bytes, "A".repeat(10).as_bytes());
2600        }
2601        if let Some(Value::Array(Array::Int32(arr))) = record.data().get(&b2_tag) {
2602            let vec: Vec<i32> = arr.clone();
2603            assert_eq!(vec, (1..=10).collect::<Vec<i32>>());
2604        }
2605    }
2606
2607    #[test]
2608    fn test_clip_end_of_alignment_auto_trim_soft_mode_false() {
2609        use noodles::sam::alignment::record::data::field::Tag;
2610
2611        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::Soft, false);
2612        let seq = "ACGTACGTACGTACGTACGT"; // 20 bases
2613        let mut record = create_test_record("20M", seq, 10);
2614
2615        let a1_tag = Tag::from([b'A', b'1']);
2616        record.data_mut().insert(a1_tag, Value::from("AB".repeat(10)));
2617
2618        let clipped = clipper.clip_end_of_alignment(&mut record, 5);
2619        assert_eq!(clipped, 5);
2620
2621        // In Soft mode with auto=false, attributes should NOT be modified
2622        if let Some(Value::String(s)) = record.data().get(&a1_tag) {
2623            let bytes: &[u8] = s.as_ref();
2624            assert_eq!(bytes, "AB".repeat(10).as_bytes());
2625        }
2626    }
2627
2628    #[test]
2629    fn test_clip_end_of_alignment_auto_trim_soft_mode_true() {
2630        use noodles::sam::alignment::record::data::field::Tag;
2631
2632        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::Soft, true);
2633        let seq = "ACGTACGTACGTACGTACGTACGT"; // 20 bases
2634        let mut record = create_test_record("20M", seq, 10);
2635
2636        let a1_tag = Tag::from([b'A', b'1']);
2637        record.data_mut().insert(a1_tag, Value::from("AB".repeat(10)));
2638
2639        let clipped = clipper.clip_end_of_alignment(&mut record, 5);
2640        assert_eq!(clipped, 5);
2641
2642        // In Soft mode, even with auto=true, attributes should NOT be modified
2643        if let Some(Value::String(s)) = record.data().get(&a1_tag) {
2644            let bytes: &[u8] = s.as_ref();
2645            assert_eq!(bytes, "AB".repeat(10).as_bytes());
2646        }
2647    }
2648
2649    #[test]
2650    fn test_clip_end_of_alignment_auto_trim_soft_with_mask_mode_false() {
2651        use noodles::sam::alignment::record::data::field::Tag;
2652
2653        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::SoftWithMask, false);
2654        let seq = "ACGTACGTACGTACGTACGT"; // 20 bases
2655        let mut record = create_test_record("20M", seq, 10);
2656
2657        let a1_tag = Tag::from([b'A', b'1']);
2658        record.data_mut().insert(a1_tag, Value::from("AB".repeat(10)));
2659
2660        let clipped = clipper.clip_end_of_alignment(&mut record, 5);
2661        assert_eq!(clipped, 5);
2662
2663        // In SoftWithMask mode with auto=false, attributes should NOT be modified
2664        if let Some(Value::String(s)) = record.data().get(&a1_tag) {
2665            let bytes: &[u8] = s.as_ref();
2666            assert_eq!(bytes, "AB".repeat(10).as_bytes());
2667        }
2668    }
2669
2670    #[test]
2671    fn test_clip_end_of_alignment_auto_trim_soft_with_mask_mode_true() {
2672        use noodles::sam::alignment::record::data::field::Tag;
2673
2674        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::SoftWithMask, true);
2675        let seq = "ACGTACGTACGTACGTACGT"; // 20 bases
2676        let mut record = create_test_record("20M", seq, 10);
2677
2678        let a1_tag = Tag::from([b'A', b'1']);
2679        record.data_mut().insert(a1_tag, Value::from("AB".repeat(10)));
2680
2681        let clipped = clipper.clip_end_of_alignment(&mut record, 5);
2682        assert_eq!(clipped, 5);
2683
2684        // In SoftWithMask mode, even with auto=true, attributes should NOT be modified
2685        if let Some(Value::String(s)) = record.data().get(&a1_tag) {
2686            let bytes: &[u8] = s.as_ref();
2687            assert_eq!(bytes, "AB".repeat(10).as_bytes());
2688        }
2689    }
2690
2691    #[test]
2692    fn test_clip_end_of_alignment_auto_trim_hard_mode_false() {
2693        use noodles::sam::alignment::record::data::field::Tag;
2694        use noodles::sam::alignment::record_buf::data::field::value::Array;
2695
2696        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::Hard, false);
2697        let seq = "ACGTACGTACGTACGTACGT"; // 20 bases
2698        let mut record = create_test_record("20M", seq, 10);
2699
2700        let a1_tag = Tag::from([b'A', b'1']);
2701        let a2_tag = Tag::from([b'A', b'2']);
2702
2703        record.data_mut().insert(a1_tag, Value::from("AB".repeat(10)));
2704        record.data_mut().insert(a2_tag, Value::from((1..=20).collect::<Vec<i32>>()));
2705
2706        let clipped = clipper.clip_end_of_alignment(&mut record, 5);
2707        assert_eq!(clipped, 5);
2708
2709        // In Hard mode with auto=false, attributes should NOT be modified
2710        if let Some(Value::String(s)) = record.data().get(&a1_tag) {
2711            let bytes: &[u8] = s.as_ref();
2712            assert_eq!(bytes, "AB".repeat(10).as_bytes());
2713        }
2714        if let Some(Value::Array(Array::Int32(arr))) = record.data().get(&a2_tag) {
2715            let vec: Vec<i32> = arr.clone();
2716            assert_eq!(vec, (1..=20).collect::<Vec<i32>>());
2717        }
2718    }
2719
2720    #[test]
2721    fn test_clip_end_of_alignment_auto_trim_hard_mode_true() {
2722        use noodles::sam::alignment::record::data::field::Tag;
2723        use noodles::sam::alignment::record_buf::data::field::value::Array;
2724
2725        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::Hard, true);
2726        let seq = "ACGTACGTACGTACGTACGT"; // 20 bases
2727        let mut record = create_test_record("20M", seq, 10);
2728
2729        let a1_tag = Tag::from([b'A', b'1']);
2730        let a2_tag = Tag::from([b'A', b'2']);
2731        let b1_tag = Tag::from([b'B', b'1']);
2732        let b2_tag = Tag::from([b'B', b'2']);
2733
2734        record.data_mut().insert(a1_tag, Value::from("AB".repeat(10)));
2735        record.data_mut().insert(a2_tag, Value::from((1..=20).collect::<Vec<i32>>()));
2736        record.data_mut().insert(b1_tag, Value::from("A".repeat(10)));
2737        record.data_mut().insert(b2_tag, Value::from((1..=10).collect::<Vec<i32>>()));
2738
2739        let clipped = clipper.clip_end_of_alignment(&mut record, 5);
2740        assert_eq!(clipped, 5);
2741
2742        // In Hard mode with auto=true, attributes matching read length should be clipped
2743        if let Some(Value::String(s)) = record.data().get(&a1_tag) {
2744            // "ABABABABABABABABABAB" -> remove last 5 -> "ABABABABABABABA"
2745            let bytes: &[u8] = s.as_ref();
2746            assert_eq!(bytes, "ABABABABABABABA".as_bytes());
2747        }
2748        if let Some(Value::Array(Array::Int32(arr))) = record.data().get(&a2_tag) {
2749            let vec: Vec<i32> = arr.clone();
2750            assert_eq!(vec, (1..=15).collect::<Vec<i32>>());
2751        }
2752        // B1 and B2 should NOT be modified (length doesn't match)
2753        if let Some(Value::String(s)) = record.data().get(&b1_tag) {
2754            let bytes: &[u8] = s.as_ref();
2755            assert_eq!(bytes, "A".repeat(10).as_bytes());
2756        }
2757        if let Some(Value::Array(Array::Int32(arr))) = record.data().get(&b2_tag) {
2758            let vec: Vec<i32> = arr.clone();
2759            assert_eq!(vec, (1..=10).collect::<Vec<i32>>());
2760        }
2761    }
2762
2763    // ===================================================================
2764    // upgrade_clipping tests
2765    // ===================================================================
2766
2767    #[test]
2768    fn test_upgrade_all_clipping_convert_leading_trailing() {
2769        use noodles::sam::alignment::record::data::field::Tag;
2770
2771        // Test without auto-clip
2772        let clipper = SamRecordClipper::new(ClippingMode::Hard);
2773        let seq = "12345678901234567890123456789012345678901234567890"; // 50 bases
2774        let mut no_auto = create_test_record("5S35M10S", seq, 10);
2775        let az_tag = Tag::from([b'a', b'z']);
2776        no_auto
2777            .data_mut()
2778            .insert(az_tag, Value::from("12345678901234567890123456789012345678901234567890"));
2779
2780        let result = clipper.upgrade_all_clipping(&mut no_auto).unwrap();
2781        assert_eq!(result, (5, 10));
2782
2783        let cigar_str = format_cigar(&no_auto.cigar());
2784        assert_eq!(cigar_str, "5H35M10H");
2785        assert_eq!(no_auto.sequence().len(), 35);
2786
2787        // Attributes should NOT be modified without auto-clip
2788        if let Some(Value::String(s)) = no_auto.data().get(&az_tag) {
2789            let bytes: &[u8] = s.as_ref();
2790            assert_eq!(bytes, "12345678901234567890123456789012345678901234567890".as_bytes());
2791        }
2792
2793        // Test with auto-clip
2794        let clipper_auto = SamRecordClipper::with_auto_clip(ClippingMode::Hard, true);
2795        let mut with_auto = create_test_record("5S35M10S", seq, 10);
2796        with_auto
2797            .data_mut()
2798            .insert(az_tag, Value::from("12345678901234567890123456789012345678901234567890"));
2799
2800        let result = clipper_auto.upgrade_all_clipping(&mut with_auto).unwrap();
2801        assert_eq!(result, (5, 10));
2802
2803        let cigar_str = format_cigar(&with_auto.cigar());
2804        assert_eq!(cigar_str, "5H35M10H");
2805        assert_eq!(with_auto.sequence().len(), 35);
2806
2807        // Attributes SHOULD be modified with auto-clip (remove first 5 and last 10)
2808        if let Some(Value::String(s)) = with_auto.data().get(&az_tag) {
2809            let bytes: &[u8] = s.as_ref();
2810            assert_eq!(bytes, "67890123456789012345678901234567890".as_bytes());
2811        }
2812    }
2813
2814    #[test]
2815    fn test_upgrade_all_clipping_soft_clips_after_hard_clips() {
2816        use noodles::sam::alignment::record::data::field::Tag;
2817
2818        // Test without auto-clip
2819        let clipper = SamRecordClipper::new(ClippingMode::Hard);
2820        let seq = "12345678901234567890123456789012345678901234567890"; // 50 bases
2821        let mut no_auto = create_test_record("5H5S35M10S5H", seq, 10);
2822        let az_tag = Tag::from([b'a', b'z']);
2823        no_auto
2824            .data_mut()
2825            .insert(az_tag, Value::from("12345678901234567890123456789012345678901234567890"));
2826
2827        let result = clipper.upgrade_all_clipping(&mut no_auto).unwrap();
2828        assert_eq!(result, (5, 10));
2829
2830        let cigar_str = format_cigar(&no_auto.cigar());
2831        assert_eq!(cigar_str, "10H35M15H");
2832        assert_eq!(no_auto.sequence().len(), 35);
2833
2834        // Attributes should NOT be modified without auto-clip
2835        if let Some(Value::String(s)) = no_auto.data().get(&az_tag) {
2836            let bytes: &[u8] = s.as_ref();
2837            assert_eq!(bytes, "12345678901234567890123456789012345678901234567890".as_bytes());
2838        }
2839
2840        // Test with auto-clip
2841        let clipper_auto = SamRecordClipper::with_auto_clip(ClippingMode::Hard, true);
2842        let mut with_auto = create_test_record("5H5S35M10S5H", seq, 10);
2843        with_auto
2844            .data_mut()
2845            .insert(az_tag, Value::from("12345678901234567890123456789012345678901234567890"));
2846
2847        let result = clipper_auto.upgrade_all_clipping(&mut with_auto).unwrap();
2848        assert_eq!(result, (5, 10));
2849
2850        let cigar_str = format_cigar(&with_auto.cigar());
2851        assert_eq!(cigar_str, "10H35M15H");
2852        assert_eq!(with_auto.sequence().len(), 35);
2853
2854        // Attributes SHOULD be modified with auto-clip
2855        if let Some(Value::String(s)) = with_auto.data().get(&az_tag) {
2856            let bytes: &[u8] = s.as_ref();
2857            assert_eq!(bytes, "67890123456789012345678901234567890".as_bytes());
2858        }
2859    }
2860
2861    #[test]
2862    fn test_upgrade_all_clipping_no_soft_clipping() {
2863        use noodles::sam::alignment::record::data::field::Tag;
2864
2865        let clipper = SamRecordClipper::new(ClippingMode::Hard);
2866        let az_tag = Tag::from([b'a', b'z']);
2867
2868        // Test with no soft clips
2869        let seq1 = "1234567890123456789012345678901234567890123456789012345"; // 55 bases
2870        let mut no_soft = create_test_record("55M", seq1, 10);
2871        no_soft
2872            .data_mut()
2873            .insert(az_tag, Value::from("12345678901234567890123456789012345678901234567890"));
2874
2875        let result = clipper.upgrade_all_clipping(&mut no_soft).unwrap();
2876        assert_eq!(result, (0, 0));
2877
2878        let cigar_str = format_cigar(&no_soft.cigar());
2879        assert_eq!(cigar_str, "55M");
2880        assert_eq!(no_soft.sequence().len(), 55);
2881
2882        // Test with only hard clips (no soft clips)
2883        let mut hard_only = create_test_record("5H55M10H", seq1, 10);
2884        hard_only
2885            .data_mut()
2886            .insert(az_tag, Value::from("12345678901234567890123456789012345678901234567890"));
2887
2888        let result = clipper.upgrade_all_clipping(&mut hard_only).unwrap();
2889        assert_eq!(result, (0, 0));
2890
2891        let cigar_str = format_cigar(&hard_only.cigar());
2892        assert_eq!(cigar_str, "5H55M10H");
2893        assert_eq!(hard_only.sequence().len(), 55);
2894    }
2895
2896    #[test]
2897    fn test_upgrade_all_clipping_unmapped_or_wrong_mode() {
2898        use noodles::sam::alignment::record::data::field::Tag;
2899
2900        let az_tag = Tag::from([b'a', b'z']);
2901        let seq = "1234567890123456789012345678901234567890123456789012345"; // 55 bases
2902
2903        // Test Soft mode (should not convert)
2904        let clipper_soft = SamRecordClipper::new(ClippingMode::Soft);
2905        let mut mapped = create_test_record("55M", seq, 10);
2906        mapped
2907            .data_mut()
2908            .insert(az_tag, Value::from("12345678901234567890123456789012345678901234567890"));
2909
2910        let result = clipper_soft.upgrade_all_clipping(&mut mapped).unwrap();
2911        assert_eq!(result, (0, 0));
2912
2913        // Test SoftWithMask mode (should not convert)
2914        let clipper_mask = SamRecordClipper::new(ClippingMode::SoftWithMask);
2915        let mut mapped2 = create_test_record("55M", seq, 10);
2916        mapped2
2917            .data_mut()
2918            .insert(az_tag, Value::from("12345678901234567890123456789012345678901234567890"));
2919
2920        let result = clipper_mask.upgrade_all_clipping(&mut mapped2).unwrap();
2921        assert_eq!(result, (0, 0));
2922
2923        // Test unmapped read (should not convert)
2924        let clipper_hard = SamRecordClipper::new(ClippingMode::Hard);
2925        let mut unmapped = create_test_record("55M", seq, 10);
2926        *unmapped.flags_mut() = Flags::UNMAPPED;
2927        unmapped
2928            .data_mut()
2929            .insert(az_tag, Value::from("12345678901234567890123456789012345678901234567890"));
2930
2931        let result = clipper_hard.upgrade_all_clipping(&mut unmapped).unwrap();
2932        assert_eq!(result, (0, 0));
2933    }
2934
2935    // ===================================================================
2936    // clipOverlappingReads tests - complex overlap scenarios
2937    // ===================================================================
2938
2939    fn create_pair(
2940        r1_start: usize,
2941        r1_cigar: &str,
2942        r1_seq: &str,
2943        r2_start: usize,
2944        r2_cigar: &str,
2945        r2_seq: &str,
2946    ) -> (RecordBuf, RecordBuf) {
2947        let mut r1 = create_test_record(r1_cigar, r1_seq, r1_start);
2948        let mut r2 = create_test_record(r2_cigar, r2_seq, r2_start);
2949
2950        // Calculate template length (insert size)
2951        // For FR pair: template length = r2_end - r1_start + 1
2952        let r2_len = cigar_utils::reference_length(&r2.cigar());
2953        let r2_end = r2_start + r2_len - 1;
2954        let tlen = i32::try_from(r2_end).unwrap() - i32::try_from(r1_start).unwrap() + 1;
2955
2956        // Set up FR pair flags
2957        // R1 is forward strand, R2 is reverse strand (typical FR pair)
2958        *r1.flags_mut() = Flags::SEGMENTED | Flags::MATE_REVERSE_COMPLEMENTED;
2959        *r2.flags_mut() = Flags::SEGMENTED | Flags::REVERSE_COMPLEMENTED;
2960
2961        // Set mate information
2962        *r1.mate_reference_sequence_id_mut() = Some(0);
2963        *r1.mate_alignment_start_mut() = Position::new(r2_start);
2964        *r1.template_length_mut() = tlen;
2965        *r2.mate_reference_sequence_id_mut() = Some(0);
2966        *r2.mate_alignment_start_mut() = Position::new(r1_start);
2967        *r2.template_length_mut() = -tlen;
2968
2969        (r1, r2)
2970    }
2971
2972    #[test]
2973    fn test_clip_overlapping_reads_one_base_overlap() {
2974        let clipper = SamRecordClipper::new(ClippingMode::Soft);
2975        let seq = "A".repeat(100);
2976        let (mut r1, mut r2) = create_pair(1, "100M", &seq, 100, "100M", &seq);
2977
2978        let (clipped_r1, clipped_r2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
2979        assert_eq!(clipped_r1, 0);
2980        assert_eq!(clipped_r2, 1);
2981
2982        assert_eq!(r1.alignment_start(), Position::new(1));
2983        let cigar_r1 = format_cigar(&r1.cigar());
2984        assert_eq!(cigar_r1, "100M");
2985
2986        assert_eq!(r2.alignment_start(), Position::new(101));
2987        let cigar_r2 = format_cigar(&r2.cigar());
2988        assert_eq!(cigar_r2, "1S99M");
2989    }
2990
2991    #[test]
2992    fn test_clip_overlapping_reads_two_base_overlap() {
2993        let clipper = SamRecordClipper::new(ClippingMode::Soft);
2994        let seq = "A".repeat(100);
2995        let (mut r1, mut r2) = create_pair(2, "100M", &seq, 100, "100M", &seq);
2996
2997        let (clipped_r1, clipped_r2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
2998        assert_eq!(clipped_r1, 1);
2999        assert_eq!(clipped_r2, 1);
3000
3001        assert_eq!(r1.alignment_start(), Position::new(2));
3002        let cigar_r1 = format_cigar(&r1.cigar());
3003        assert_eq!(cigar_r1, "99M1S");
3004
3005        assert_eq!(r2.alignment_start(), Position::new(101));
3006        let cigar_r2 = format_cigar(&r2.cigar());
3007        assert_eq!(cigar_r2, "1S99M");
3008    }
3009
3010    #[test]
3011    fn test_clip_overlapping_reads_with_deletion() {
3012        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3013        let seq = "A".repeat(90);
3014        let (mut r1, mut r2) = create_pair(2, "80M10D10M", &seq, 70, "10M10D80M", &seq);
3015
3016        let (clipped_r1, clipped_r2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
3017        assert_eq!(clipped_r1, 10);
3018        assert_eq!(clipped_r2, 10);
3019
3020        assert_eq!(r1.alignment_start(), Position::new(2));
3021        let cigar_r1 = format_cigar(&r1.cigar());
3022        assert_eq!(cigar_r1, "80M10S");
3023
3024        assert_eq!(r2.alignment_start(), Position::new(90));
3025        let cigar_r2 = format_cigar(&r2.cigar());
3026        assert_eq!(cigar_r2, "10S80M");
3027    }
3028
3029    #[test]
3030    fn test_clip_overlapping_reads_full_overlap() {
3031        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3032        let seq = "A".repeat(100);
3033        let (mut r1, mut r2) = create_pair(1, "100M", &seq, 1, "100M", &seq);
3034
3035        let (clipped_r1, clipped_r2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
3036        assert_eq!(clipped_r1, 50);
3037        assert_eq!(clipped_r2, 50);
3038
3039        assert_eq!(r1.alignment_start(), Position::new(1));
3040        let cigar_r1 = format_cigar(&r1.cigar());
3041        assert_eq!(cigar_r1, "50M50S");
3042
3043        assert_eq!(r2.alignment_start(), Position::new(51));
3044        let cigar_r2 = format_cigar(&r2.cigar());
3045        assert_eq!(cigar_r2, "50S50M");
3046    }
3047
3048    #[test]
3049    fn test_clip_overlapping_reads_extend_past_each_other() {
3050        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3051        let seq = "A".repeat(100);
3052        let (mut r1, mut r2) = create_pair(50, "100M", &seq, 1, "100M", &seq);
3053
3054        // Midpoint should be (50+100)/2 = 75
3055        let (clipped_r1, clipped_r2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
3056        assert_eq!(clipped_r1, 74);
3057        assert_eq!(clipped_r2, 75);
3058
3059        assert_eq!(r1.alignment_start(), Position::new(50));
3060        let cigar_r1 = format_cigar(&r1.cigar());
3061        assert_eq!(cigar_r1, "26M74S");
3062
3063        assert_eq!(r2.alignment_start(), Position::new(76));
3064        let cigar_r2 = format_cigar(&r2.cigar());
3065        assert_eq!(cigar_r2, "75S25M");
3066    }
3067
3068    #[test]
3069    fn test_clip_overlapping_reads_forward_much_longer() {
3070        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::Hard, false);
3071        let r1_seq = "A".repeat(100);
3072        let r2_seq = "A".repeat(100);
3073        let (mut r1, mut r2) = create_pair(1, "100M", &r1_seq, 30, "80S20M", &r2_seq);
3074
3075        let (clipped_r1, clipped_r2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
3076        assert_eq!(clipped_r1, 71);
3077        assert_eq!(clipped_r2, 0);
3078
3079        assert_eq!(r1.alignment_start(), Position::new(1));
3080        let cigar_r1 = format_cigar(&r1.cigar());
3081        assert_eq!(cigar_r1, "29M71H");
3082
3083        assert_eq!(r2.alignment_start(), Position::new(30));
3084        let cigar_r2 = format_cigar(&r2.cigar());
3085        assert_eq!(cigar_r2, "80H20M");
3086    }
3087
3088    #[test]
3089    fn test_clip_overlapping_reads_reverse_much_longer() {
3090        let clipper = SamRecordClipper::with_auto_clip(ClippingMode::Hard, false);
3091        let r1_seq = "A".repeat(100);
3092        let r2_seq = "A".repeat(100);
3093        let (mut r1, mut r2) = create_pair(50, "20M80S", &r1_seq, 1, "100M", &r2_seq);
3094
3095        let (clipped_r1, clipped_r2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
3096        assert_eq!(clipped_r1, 0);
3097        assert_eq!(clipped_r2, 69);
3098
3099        assert_eq!(r1.alignment_start(), Position::new(50));
3100        let cigar_r1 = format_cigar(&r1.cigar());
3101        assert_eq!(cigar_r1, "20M80H");
3102
3103        assert_eq!(r2.alignment_start(), Position::new(70));
3104        let cigar_r2 = format_cigar(&r2.cigar());
3105        assert_eq!(cigar_r2, "69H31M");
3106    }
3107
3108    #[test]
3109    fn test_clip_overlapping_reads_with_one_end_deletion() {
3110        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3111        let r1_seq = "A".repeat(100);
3112        let r2_seq = "A".repeat(110);
3113        let (mut r1, mut r2) = create_pair(1, "60M10D40M", &r1_seq, 50, "10M10D80M10D10M", &r2_seq);
3114
3115        let (clipped_r1, clipped_r2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
3116        assert_eq!(clipped_r1, 25);
3117        assert_eq!(clipped_r2, 26);
3118
3119        assert_eq!(r1.alignment_start(), Position::new(1));
3120        let cigar_r1 = format_cigar(&r1.cigar());
3121        assert_eq!(cigar_r1, "60M10D15M25S");
3122
3123        assert_eq!(r2.alignment_start(), Position::new(86));
3124        let cigar_r2 = format_cigar(&r2.cigar());
3125        assert_eq!(cigar_r2, "26S64M10D10M");
3126    }
3127
3128    #[test]
3129    fn test_clip_overlapping_reads_both_ends_deletion() {
3130        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3131        let seq = "A".repeat(100);
3132        let (mut r1, mut r2) = create_pair(1, "50M10D50M", &seq, 3, "47M10D53M", &seq);
3133
3134        let (clipped_r1, clipped_r2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
3135        assert_eq!(clipped_r1, 50);
3136        assert_eq!(clipped_r2, 47);
3137
3138        assert_eq!(r1.alignment_start(), Position::new(1));
3139        let cigar_r1 = format_cigar(&r1.cigar());
3140        assert_eq!(cigar_r1, "50M50S");
3141
3142        // R2 clips through the 47M and 10D, so the new start is at position 60 (where 53M begins)
3143        assert_eq!(r2.alignment_start(), Position::new(60));
3144        let cigar_r2 = format_cigar(&r2.cigar());
3145        assert_eq!(cigar_r2, "47S53M");
3146    }
3147
3148    #[test]
3149    fn test_clip_overlapping_reads_no_overlap_far_apart() {
3150        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3151        let seq = "A".repeat(100);
3152        let (mut r1, mut r2) = create_pair(1000, "100M", &seq, 1, "100M", &seq);
3153
3154        let (clipped_r1, clipped_r2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
3155        assert_eq!(clipped_r1, 0);
3156        assert_eq!(clipped_r2, 0);
3157
3158        assert_eq!(r1.alignment_start(), Position::new(1000));
3159        let cigar_r1 = format_cigar(&r1.cigar());
3160        assert_eq!(cigar_r1, "100M");
3161
3162        assert_eq!(r2.alignment_start(), Position::new(1));
3163        let cigar_r2 = format_cigar(&r2.cigar());
3164        assert_eq!(cigar_r2, "100M");
3165    }
3166
3167    // Tests for clip_extending_past_mate
3168
3169    #[test]
3170    fn test_clip_extending_past_mate_ends_basic() {
3171        // Based on the Scala test: r1 at 100-149, r2 at 90-139
3172        // After clipping, both should have same start and end
3173        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3174        let seq = "A".repeat(50);
3175        let (mut r1, mut r2) = create_pair(100, "50M", &seq, 90, "50M", &seq);
3176
3177        // Before: R1: 100-149, R2: 90-139
3178        // R1 extends 10 bases past R2's end (149 vs 139), so clip 10 from R1's 3' end
3179        // R2 starts 10 bases before R1's start (90 vs 100), so clip 10 from R2's 5' end (reference)
3180
3181        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
3182
3183        // Both reads should be clipped
3184        assert_eq!(clipped_r1, 10); // R1 clipped from 3' end
3185        assert_eq!(clipped_r2, 10); // R2 clipped from 5' reference end (3' read end for reverse strand)
3186
3187        // After clipping: R1: 100-139, R2: 100-139 (same start and end)
3188        assert_eq!(r1.alignment_start(), Position::new(100));
3189        let cigar_r1 = format_cigar(&r1.cigar());
3190        assert_eq!(cigar_r1, "40M10S");
3191
3192        assert_eq!(r2.alignment_start(), Position::new(100));
3193        let cigar_r2 = format_cigar(&r2.cigar());
3194        assert_eq!(cigar_r2, "10S40M");
3195    }
3196
3197    #[test]
3198    fn test_clip_extending_past_mate_ends_both_extend() {
3199        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3200        let seq = "A".repeat(60);
3201        // R1: 100-159, R2: 110-169
3202        let (mut r1, mut r2) = create_pair(100, "60M", &seq, 110, "60M", &seq);
3203
3204        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
3205
3206        // R1 extends 10 bases past R2's end (159 vs 169)
3207        assert_eq!(clipped_r1, 0); // R1 doesn't extend past R2
3208        // R2 starts 10 bases before R1's start (110 vs 100) - wait, that's backwards
3209        assert_eq!(clipped_r2, 0); // R2 starts after R1
3210
3211        let cigar_r1 = format_cigar(&r1.cigar());
3212        assert_eq!(cigar_r1, "60M");
3213
3214        let cigar_r2 = format_cigar(&r2.cigar());
3215        assert_eq!(cigar_r2, "60M");
3216    }
3217
3218    #[test]
3219    fn test_clip_extending_past_mate_ends_with_soft_clips() {
3220        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3221        let seq = "A".repeat(60);
3222        // R1: `10S50M` at 100 → unclipped 90-149, alignment 100-149
3223        // R2: `50M` at 90 → unclipped 90-139, alignment 90-139
3224        // R1's alignment end (149) extends 10 bases past R2's unclipped end (139)
3225        // R2's alignment start (90) is 10 bases before R1's unclipped start (90) - no extension
3226        let (mut r1, mut r2) = create_pair(100, "10S50M", &seq, 90, "50M", &seq);
3227
3228        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
3229
3230        // R1 should be clipped by 10 from the end
3231        assert_eq!(clipped_r1, 10);
3232        // R2 should not be clipped (its start doesn't extend past R1's unclipped start)
3233        assert_eq!(clipped_r2, 0);
3234
3235        let cigar_r1 = format_cigar(&r1.cigar());
3236        assert_eq!(cigar_r1, "10S40M10S");
3237
3238        // R2 unchanged
3239        assert_eq!(r2.alignment_start(), Position::new(90));
3240        let cigar_r2 = format_cigar(&r2.cigar());
3241        assert_eq!(cigar_r2, "50M");
3242    }
3243
3244    #[test]
3245    fn test_clip_extending_past_mate_ends_no_extension() {
3246        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3247        let seq = "A".repeat(50);
3248        // R1: 100-149, R2: 200-249 (no extension)
3249        let (mut r1, mut r2) = create_pair(100, "50M", &seq, 200, "50M", &seq);
3250
3251        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
3252
3253        assert_eq!(clipped_r1, 0);
3254        assert_eq!(clipped_r2, 0);
3255
3256        let cigar_r1 = format_cigar(&r1.cigar());
3257        assert_eq!(cigar_r1, "50M");
3258
3259        let cigar_r2 = format_cigar(&r2.cigar());
3260        assert_eq!(cigar_r2, "50M");
3261    }
3262
3263    #[test]
3264    fn test_clip_extending_past_mate_ends_ff_pair() {
3265        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3266        let seq = "A".repeat(50);
3267        let mut r1 = create_test_record("50M", &seq, 100);
3268        let mut r2 = create_test_record("50M", &seq, 90);
3269
3270        // Make them both forward strand (not FR pair)
3271        *r1.flags_mut() = Flags::SEGMENTED;
3272        *r2.flags_mut() = Flags::SEGMENTED; // Both forward, not FR
3273
3274        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
3275
3276        // Should not clip non-FR pairs
3277        assert_eq!(clipped_r1, 0);
3278        assert_eq!(clipped_r2, 0);
3279    }
3280
3281    #[test]
3282    fn test_clip_extending_past_mate_ends_with_deletion() {
3283        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3284        let seq = "A".repeat(50);
3285        // R1: 100-159 (50M + 10D)
3286        // R2: 110-159
3287        let (mut r1, mut r2) = create_pair(100, "50M10D10M", &seq, 110, "50M", &seq);
3288
3289        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
3290
3291        // R1 ends at 169, extends past R2's end at 159
3292        // Should clip ~10 bases worth
3293        assert!(clipped_r1 > 0);
3294        assert_eq!(clipped_r2, 0);
3295    }
3296
3297    #[test]
3298    fn test_clip_extending_past_mate_ends_hard_mode() {
3299        let clipper = SamRecordClipper::new(ClippingMode::Hard);
3300        let seq = "A".repeat(50);
3301        let (mut r1, mut r2) = create_pair(100, "50M", &seq, 90, "50M", &seq);
3302
3303        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
3304
3305        assert_eq!(clipped_r1, 10);
3306        assert_eq!(clipped_r2, 10);
3307
3308        let cigar_r1 = format_cigar(&r1.cigar());
3309        assert_eq!(cigar_r1, "40M10H");
3310
3311        let cigar_r2 = format_cigar(&r2.cigar());
3312        assert_eq!(cigar_r2, "10H40M");
3313
3314        // Sequence should be trimmed in hard mode
3315        assert_eq!(r1.sequence().len(), 40);
3316        assert_eq!(r2.sequence().len(), 40);
3317    }
3318
3319    // Tests for overlapping reads with insertions
3320
3321    #[test]
3322    fn test_clip_overlapping_reads_with_insertions() {
3323        // Based on Scala test: "handle reads that contain insertions"
3324        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3325        let r1_seq = "A".repeat(50);
3326        let r2_seq = "A".repeat(50);
3327        // R1: 100-147 with 2bp insertion (40M2I8M)
3328        // R2: 130-169 with 2bp insertion (10M2I38M)
3329        let (mut r1, mut r2) = create_pair(100, "40M2I8M", &r1_seq, 130, "10M2I38M", &r2_seq);
3330
3331        // Before clipping, r1.end >= r2.start (they overlap)
3332        let r1_end_before = 100 + 48; // 40M + 8M = 48 reference bases
3333        let r2_start_before = 130;
3334        assert!(r1_end_before >= r2_start_before);
3335
3336        let (clipped_r1, clipped_r2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
3337
3338        // Both should be clipped
3339        assert!(clipped_r1 > 0 || clipped_r2 > 0);
3340
3341        // After clipping, they should not overlap (abutting is OK)
3342        let r1_end_after =
3343            usize::from(r1.alignment_start().unwrap()) + cigar_utils::reference_length(&r1.cigar());
3344        let r2_start_after = usize::from(r2.alignment_start().unwrap());
3345        assert!(
3346            r1_end_after <= r2_start_after,
3347            "After clipping, r1 end ({r1_end_after}) should be at or before r2 start ({r2_start_after})"
3348        );
3349    }
3350
3351    #[test]
3352    fn test_clip_overlapping_reads_with_multiple_insertions() {
3353        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3354        let seq = "A".repeat(60);
3355        // R1: Multiple insertions
3356        // R2: Multiple insertions
3357        let (mut r1, mut r2) = create_pair(100, "20M2I20M3I10M", &seq, 120, "15M2I25M3I10M", &seq);
3358
3359        let (clipped_r1, clipped_r2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
3360
3361        // Should clip without errors
3362        assert!(clipped_r1 > 0 || clipped_r2 > 0);
3363    }
3364
3365    #[test]
3366    fn test_clip_overlapping_reads_insertion_at_overlap_boundary() {
3367        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3368        let seq = "A".repeat(55);
3369        // R1: 100-149, with insertion near the end
3370        // R2: 145-194
3371        let (mut r1, mut r2) = create_pair(100, "48M2I5M", &seq, 145, "50M", &seq);
3372
3373        let (clipped_r1, clipped_r2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
3374
3375        // Should handle insertion at overlap boundary
3376        assert!(clipped_r1 > 0 || clipped_r2 > 0);
3377    }
3378
3379    // Tests for SoftWithMask mode
3380
3381    #[test]
3382    fn test_clip_end_of_alignment_soft_with_mask_masking() {
3383        let clipper = SamRecordClipper::new(ClippingMode::SoftWithMask);
3384        let mut record = create_test_record("50M", &"A".repeat(50), 1000);
3385
3386        let clipped = clipper.clip_end_of_alignment(&mut record, 10);
3387        assert_eq!(clipped, 10);
3388
3389        let cigar = format_cigar(&record.cigar());
3390        assert_eq!(cigar, "40M10S");
3391
3392        // Check that the last 10 bases are masked to N
3393        let seq = record.sequence();
3394        let seq_bytes: Vec<u8> = seq.as_ref().to_vec();
3395        for (i, &base) in seq_bytes.iter().enumerate().skip(40).take(10) {
3396            assert_eq!(base, b'N', "Base at position {i} should be N");
3397        }
3398
3399        // Check that qualities are set to min (2)
3400        let quals = record.quality_scores();
3401        let qual_bytes: Vec<u8> = quals.as_ref().to_vec();
3402        for (i, &qual) in qual_bytes.iter().enumerate().skip(40).take(10) {
3403            assert_eq!(qual, 2, "Quality at position {i} should be 2");
3404        }
3405    }
3406
3407    #[test]
3408    fn test_clip_start_of_alignment_soft_with_mask_masking() {
3409        let clipper = SamRecordClipper::new(ClippingMode::SoftWithMask);
3410        let mut record = create_test_record("50M", &"A".repeat(50), 1000);
3411
3412        let clipped = clipper.clip_start_of_alignment(&mut record, 10);
3413        assert_eq!(clipped, 10);
3414
3415        let cigar = format_cigar(&record.cigar());
3416        assert_eq!(cigar, "10S40M");
3417
3418        // Check that the first 10 bases are masked to N
3419        let seq = record.sequence();
3420        let seq_bytes: Vec<u8> = seq.as_ref().to_vec();
3421        for (i, &base) in seq_bytes.iter().enumerate().take(10) {
3422            assert_eq!(base, b'N', "Base at position {i} should be N");
3423        }
3424
3425        // Check that qualities are set to min (2)
3426        let quals = record.quality_scores();
3427        let qual_bytes: Vec<u8> = quals.as_ref().to_vec();
3428        for (i, &qual) in qual_bytes.iter().enumerate().take(10) {
3429            assert_eq!(qual, 2, "Quality at position {i} should be 2");
3430        }
3431    }
3432
3433    #[test]
3434    fn test_clip_overlapping_reads_soft_with_mask() {
3435        let clipper = SamRecordClipper::new(ClippingMode::SoftWithMask);
3436        let seq = "A".repeat(100);
3437        let (mut r1, mut r2) = create_pair(1, "100M", &seq, 50, "100M", &seq);
3438
3439        let (clipped_r1, clipped_r2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
3440
3441        assert!(clipped_r1 > 0 || clipped_r2 > 0);
3442
3443        // Check that clipped bases are masked
3444        if clipped_r1 > 0 {
3445            let seq = r1.sequence();
3446            let seq_bytes: Vec<u8> = seq.as_ref().to_vec();
3447            let start_mask = 100 - clipped_r1;
3448            for (i, &base) in seq_bytes.iter().enumerate().skip(start_mask).take(100 - start_mask) {
3449                assert_eq!(base, b'N', "R1 base at position {i} should be N");
3450            }
3451        }
3452    }
3453
3454    // Edge cases
3455
3456    #[test]
3457    fn test_clip_very_short_read() {
3458        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3459        let mut record = create_test_record("5M", "ACGTA", 1000);
3460
3461        // Try to clip more than the read length
3462        let clipped = clipper.clip_end_of_alignment(&mut record, 3);
3463        assert_eq!(clipped, 3);
3464
3465        let cigar = format_cigar(&record.cigar());
3466        assert_eq!(cigar, "2M3S");
3467    }
3468
3469    #[test]
3470    fn test_clip_entire_read_3_prime() {
3471        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3472        let mut record = create_test_record("10M", &"A".repeat(10), 1000);
3473
3474        let clipped = clipper.clip_end_of_alignment(&mut record, 10);
3475        assert_eq!(clipped, 10);
3476
3477        let cigar = format_cigar(&record.cigar());
3478        assert_eq!(cigar, "10S");
3479    }
3480
3481    #[test]
3482    fn test_clip_overlapping_reads_complex_cigar() {
3483        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3484        let r1_seq = "A".repeat(70);
3485        let r2_seq = "A".repeat(70);
3486        // Complex CIGAR with multiple operation types
3487        let (mut r1, mut r2) =
3488            create_pair(100, "20M5I10M3D15M2I10M", &r1_seq, 130, "15M2I20M5D10M3I10M", &r2_seq);
3489
3490        let (_clipped_r1, _clipped_r2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
3491
3492        // Should handle complex CIGAR without panicking (test passes if no panic occurs)
3493    }
3494
3495    #[test]
3496    fn test_clip_with_leading_and_trailing_soft_clips() {
3497        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3498        let mut record = create_test_record("10S30M10S", &"A".repeat(50), 1000);
3499
3500        // Clip additional 5 bases from 3' end
3501        let clipped = clipper.clip_end_of_alignment(&mut record, 5);
3502        assert_eq!(clipped, 5);
3503
3504        let cigar = format_cigar(&record.cigar());
3505        assert_eq!(cigar, "10S25M15S");
3506    }
3507
3508    #[test]
3509    fn test_clip_overlapping_reads_one_base_different_positions() {
3510        // Test overlapping by exactly one base at different reference positions
3511        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3512        let seq = "A".repeat(50);
3513
3514        // Test at low position
3515        let (mut r1, mut r2) = create_pair(10, "50M", &seq, 59, "50M", &seq);
3516        let (c1, c2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
3517        assert!(c1 > 0 || c2 > 0);
3518
3519        // Test at high position
3520        let (mut r1, mut r2) = create_pair(10000, "50M", &seq, 10049, "50M", &seq);
3521        let (c1, c2) = clipper.clip_overlapping_reads(&mut r1, &mut r2);
3522        assert!(c1 > 0 || c2 > 0);
3523    }
3524
3525    // Tests for NOT upgrading clipping (same mode or going backwards)
3526
3527    #[test]
3528    fn test_not_upgrade_soft_to_soft() {
3529        // Same mode - should not upgrade
3530        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3531        let seq = "12345678901234567890123456789012345678901234567890";
3532        let mut record = create_test_record("5S35M10S", seq, 10);
3533
3534        let result = clipper.upgrade_all_clipping(&mut record).unwrap();
3535        assert_eq!(result, (0, 0));
3536
3537        let cigar_str = format_cigar(&record.cigar());
3538        assert_eq!(cigar_str, "5S35M10S"); // No change
3539        assert_eq!(record.sequence().len(), 50); // No sequence change
3540    }
3541
3542    #[test]
3543    fn test_not_upgrade_soft_with_mask_to_soft() {
3544        // Going backwards - should not upgrade
3545        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3546        let seq = "12345678901234567890123456789012345678901234567890";
3547        let mut record = create_test_record("5S35M10S", seq, 10);
3548
3549        let result = clipper.upgrade_all_clipping(&mut record).unwrap();
3550        assert_eq!(result, (0, 0));
3551
3552        let cigar_str = format_cigar(&record.cigar());
3553        assert_eq!(cigar_str, "5S35M10S");
3554    }
3555
3556    #[test]
3557    fn test_not_upgrade_soft_with_mask_to_soft_with_mask() {
3558        // Same mode - should not upgrade
3559        let clipper = SamRecordClipper::new(ClippingMode::SoftWithMask);
3560        let seq = "12345678901234567890123456789012345678901234567890";
3561        let mut record = create_test_record("5S35M10S", seq, 10);
3562
3563        let result = clipper.upgrade_all_clipping(&mut record).unwrap();
3564        assert_eq!(result, (0, 0));
3565
3566        let cigar_str = format_cigar(&record.cigar());
3567        assert_eq!(cigar_str, "5S35M10S");
3568    }
3569
3570    #[test]
3571    fn test_not_upgrade_hard_to_hard() {
3572        // Same mode - should not upgrade
3573        let clipper = SamRecordClipper::new(ClippingMode::Hard);
3574        let seq = "12345678901234567890123456789012345"; // 35 bases
3575        let mut record = create_test_record("5H35M10H", seq, 10);
3576
3577        let result = clipper.upgrade_all_clipping(&mut record).unwrap();
3578        assert_eq!(result, (0, 0));
3579
3580        let cigar_str = format_cigar(&record.cigar());
3581        assert_eq!(cigar_str, "5H35M10H");
3582        assert_eq!(record.sequence().len(), 35);
3583    }
3584
3585    #[test]
3586    fn test_not_upgrade_hard_to_soft_with_mask() {
3587        // Going backwards - should not upgrade (Hard is already most restrictive)
3588        let clipper = SamRecordClipper::new(ClippingMode::SoftWithMask);
3589        let seq = "12345678901234567890123456789012345";
3590        let mut record = create_test_record("5H35M10H", seq, 10);
3591
3592        let result = clipper.upgrade_all_clipping(&mut record).unwrap();
3593        assert_eq!(result, (0, 0));
3594
3595        let cigar_str = format_cigar(&record.cigar());
3596        assert_eq!(cigar_str, "5H35M10H");
3597    }
3598
3599    #[test]
3600    fn test_not_upgrade_hard_to_soft() {
3601        // Going backwards - should not upgrade
3602        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3603        let seq = "12345678901234567890123456789012345";
3604        let mut record = create_test_record("5H35M10H", seq, 10);
3605
3606        let result = clipper.upgrade_all_clipping(&mut record).unwrap();
3607        assert_eq!(result, (0, 0));
3608
3609        let cigar_str = format_cigar(&record.cigar());
3610        assert_eq!(cigar_str, "5H35M10H");
3611    }
3612
3613    #[test]
3614    fn test_not_upgrade_unmapped_read() {
3615        // Unmapped reads should not be upgraded
3616        let clipper = SamRecordClipper::new(ClippingMode::Hard);
3617        let seq = "12345678901234567890123456789012345678901234567890";
3618        let mut record = create_test_record("5S35M10S", seq, 10);
3619
3620        // Mark as unmapped
3621        *record.flags_mut() = Flags::UNMAPPED;
3622
3623        let result = clipper.upgrade_all_clipping(&mut record).unwrap();
3624        assert_eq!(result, (0, 0));
3625
3626        let cigar_str = format_cigar(&record.cigar());
3627        assert_eq!(cigar_str, "5S35M10S");
3628    }
3629
3630    #[test]
3631    fn test_upgrade_soft_to_hard_with_existing_hard_clips() {
3632        // Soft with existing hard clips -> Hard mode
3633        let clipper = SamRecordClipper::new(ClippingMode::Hard);
3634        let seq = "1234567890123456789012345678901234567890"; // 40 bases (2H + 5S + 30M + 3S)
3635        let mut record = create_test_record("2H5S30M3S", seq, 10);
3636
3637        let result = clipper.upgrade_all_clipping(&mut record).unwrap();
3638        assert_eq!(result, (5, 3));
3639
3640        let cigar_str = format_cigar(&record.cigar());
3641        assert_eq!(cigar_str, "7H30M3H"); // 2H + 5S -> 7H, 3S -> 3H
3642        assert_eq!(record.sequence().len(), 30);
3643    }
3644
3645    #[test]
3646    fn test_upgrade_soft_with_mask_to_hard() {
3647        // SoftWithMask -> Hard should upgrade
3648        let clipper = SamRecordClipper::new(ClippingMode::Hard);
3649        let seq = "12345678901234567890123456789012345678901234567890";
3650        let mut record = create_test_record("5S35M10S", seq, 10);
3651
3652        let result = clipper.upgrade_all_clipping(&mut record).unwrap();
3653        assert_eq!(result, (5, 10));
3654
3655        let cigar_str = format_cigar(&record.cigar());
3656        assert_eq!(cigar_str, "5H35M10H");
3657        assert_eq!(record.sequence().len(), 35);
3658    }
3659
3660    #[test]
3661    fn test_upgrade_clipping_with_only_leading_soft_clip() {
3662        let clipper = SamRecordClipper::new(ClippingMode::Hard);
3663        let seq = "12345678901234567890123456789012345678901234567890";
3664        let mut record = create_test_record("10S40M", seq, 10);
3665
3666        let result = clipper.upgrade_all_clipping(&mut record).unwrap();
3667        assert_eq!(result, (10, 0));
3668
3669        let cigar_str = format_cigar(&record.cigar());
3670        assert_eq!(cigar_str, "10H40M");
3671        assert_eq!(record.sequence().len(), 40);
3672    }
3673
3674    #[test]
3675    fn test_upgrade_clipping_with_only_trailing_soft_clip() {
3676        let clipper = SamRecordClipper::new(ClippingMode::Hard);
3677        let seq = "12345678901234567890123456789012345678901234567890";
3678        let mut record = create_test_record("40M10S", seq, 10);
3679
3680        let result = clipper.upgrade_all_clipping(&mut record).unwrap();
3681        assert_eq!(result, (0, 10));
3682
3683        let cigar_str = format_cigar(&record.cigar());
3684        assert_eq!(cigar_str, "40M10H");
3685        assert_eq!(record.sequence().len(), 40);
3686    }
3687
3688    #[test]
3689    fn test_upgrade_clipping_complex_cigar_with_indels() {
3690        let clipper = SamRecordClipper::new(ClippingMode::Hard);
3691        let seq = "123456789012345678901234567890123456789012345"; // 45 bases (5S + 20M + 5I + 10M + 5S)
3692        let mut record = create_test_record("5S20M5I10M5S", seq, 10);
3693
3694        let result = clipper.upgrade_all_clipping(&mut record).unwrap();
3695        assert_eq!(result, (5, 5));
3696
3697        let cigar_str = format_cigar(&record.cigar());
3698        assert_eq!(cigar_str, "5H20M5I10M5H");
3699        assert_eq!(record.sequence().len(), 35); // 20 + 5 + 10
3700    }
3701
3702    #[test]
3703    fn test_upgrade_clipping_no_soft_clips_present() {
3704        let clipper = SamRecordClipper::new(ClippingMode::Hard);
3705        let seq = "12345678901234567890123456789012345678901234567890";
3706        let mut record = create_test_record("50M", seq, 10);
3707
3708        let result = clipper.upgrade_all_clipping(&mut record).unwrap();
3709        assert_eq!(result, (0, 0));
3710
3711        let cigar_str = format_cigar(&record.cigar());
3712        assert_eq!(cigar_str, "50M");
3713        assert_eq!(record.sequence().len(), 50);
3714    }
3715
3716    // ===== Additional tests ported from fgbio to match clipExtendingPastMateEnds coverage =====
3717
3718    #[test]
3719    fn test_clip_extending_past_mate_ends_one_base_extension() {
3720        // fgbio test: "clip reads that extend one base past their mate's start"
3721        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3722        let seq = "A".repeat(100);
3723        let (mut r1, mut r2) = create_pair(2, "100M", &seq, 1, "100M", &seq);
3724
3725        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
3726
3727        assert_eq!(clipped_r1, 1);
3728        assert_eq!(clipped_r2, 1);
3729        assert_eq!(r1.alignment_start(), Some(Position::new(2).unwrap()));
3730        assert_eq!(format_cigar(&r1.cigar()), "99M1S");
3731        assert_eq!(r2.alignment_start(), Some(Position::new(2).unwrap()));
3732        assert_eq!(format_cigar(&r2.cigar()), "1S99M");
3733    }
3734
3735    #[test]
3736    fn test_clip_extending_past_mate_ends_two_base_extension() {
3737        // fgbio test: "clip reads that extend two bases past their mate's start"
3738        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3739        let seq = "A".repeat(100);
3740        let (mut r1, mut r2) = create_pair(3, "100M", &seq, 1, "100M", &seq);
3741
3742        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
3743
3744        assert_eq!(clipped_r1, 2);
3745        assert_eq!(clipped_r2, 2);
3746        assert_eq!(r1.alignment_start(), Some(Position::new(3).unwrap()));
3747        assert_eq!(format_cigar(&r1.cigar()), "98M2S");
3748        assert_eq!(r2.alignment_start(), Some(Position::new(3).unwrap()));
3749        assert_eq!(format_cigar(&r2.cigar()), "2S98M");
3750    }
3751
3752    #[test]
3753    fn test_clip_extending_past_mate_ends_only_one_end_extends() {
3754        // fgbio test: "only one end extends their mate's start"
3755        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3756        let seq = "A".repeat(100);
3757        let (mut r1, mut r2) = create_pair(1, "100M", &seq, 1, "50S50M", &seq);
3758
3759        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
3760
3761        assert_eq!(clipped_r1, 50); // R1 gets clipped
3762        assert_eq!(clipped_r2, 0); // R2 already has clipping
3763        assert_eq!(r1.alignment_start(), Some(Position::new(1).unwrap()));
3764        assert_eq!(format_cigar(&r1.cigar()), "50M50S");
3765        assert_eq!(r2.alignment_start(), Some(Position::new(1).unwrap()));
3766        assert_eq!(format_cigar(&r2.cigar()), "50S50M"); // unchanged
3767    }
3768
3769    #[test]
3770    fn test_clip_extending_past_mate_ends_with_insertions() {
3771        // fgbio test: "only one end extends with insertions"
3772        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3773        let seq = "A".repeat(100);
3774        let (mut r1, mut r2) = create_pair(1, "40M10I50M", &seq, 1, "50S50M", &seq);
3775
3776        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
3777
3778        assert_eq!(clipped_r1, 40); // 40 bases clipped (insertion doesn't count toward ref)
3779        assert_eq!(clipped_r2, 0);
3780        assert_eq!(r1.alignment_start(), Some(Position::new(1).unwrap()));
3781        assert_eq!(format_cigar(&r1.cigar()), "40M10I10M40S");
3782        assert_eq!(r2.alignment_start(), Some(Position::new(1).unwrap()));
3783        assert_eq!(format_cigar(&r2.cigar()), "50S50M"); // unchanged
3784    }
3785
3786    #[test]
3787    fn test_clip_extending_past_mate_ends_soft_clips_extend_hard_mode() {
3788        // fgbio test: "forward read soft-clips extend past (hard mode)"
3789        let clipper = SamRecordClipper::new(ClippingMode::Hard);
3790        let seq = "A".repeat(50);
3791        let (mut r1, mut r2) = create_pair(20, "30M20S", &seq, 20, "10S40M", &seq);
3792
3793        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
3794
3795        assert_eq!(clipped_r1, 0); // No aligned bases clipped
3796        assert_eq!(clipped_r2, 0); // No aligned bases clipped
3797        assert_eq!(r1.alignment_start(), Some(Position::new(20).unwrap()));
3798        assert_eq!(format_cigar(&r1.cigar()), "30M10S10H"); // 10 of 20S hard-clipped
3799        assert_eq!(r2.alignment_start(), Some(Position::new(20).unwrap()));
3800        assert_eq!(format_cigar(&r2.cigar()), "10H40M"); // 10S hard-clipped
3801    }
3802
3803    #[test]
3804    fn test_clip_extending_past_mate_ends_soft_clips_extend_with_deletion_hard_mode() {
3805        // fgbio test: "forward read soft-clips extend past with deletion (hard mode)"
3806        let clipper = SamRecordClipper::new(ClippingMode::Hard);
3807        let seq = "A".repeat(50);
3808        let (mut r1, mut r2) = create_pair(20, "15M1D15M20S", &seq, 20, "10S15M1D25M", &seq);
3809
3810        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
3811
3812        assert_eq!(clipped_r1, 0); // No aligned bases clipped
3813        assert_eq!(clipped_r2, 0); // No aligned bases clipped
3814        assert_eq!(r1.alignment_start(), Some(Position::new(20).unwrap()));
3815        assert_eq!(format_cigar(&r1.cigar()), "15M1D15M10S10H");
3816        assert_eq!(r2.alignment_start(), Some(Position::new(20).unwrap()));
3817        assert_eq!(format_cigar(&r2.cigar()), "10H15M1D25M");
3818    }
3819
3820    #[test]
3821    fn test_clip_extending_past_mate_ends_soft_clips_extend_with_insertion_hard_mode() {
3822        // fgbio test: "forward read soft-clips extend past with insertion (hard mode)"
3823        let clipper = SamRecordClipper::new(ClippingMode::Hard);
3824        let seq = "A".repeat(51);
3825        let (mut r1, mut r2) = create_pair(20, "15M1I15M20S", &seq, 20, "10S15M1I25M", &seq);
3826
3827        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
3828
3829        assert_eq!(clipped_r1, 0); // No aligned bases clipped
3830        assert_eq!(clipped_r2, 0); // No aligned bases clipped
3831        assert_eq!(r1.alignment_start(), Some(Position::new(20).unwrap()));
3832        assert_eq!(format_cigar(&r1.cigar()), "15M1I15M10S10H");
3833        assert_eq!(r2.alignment_start(), Some(Position::new(20).unwrap()));
3834        assert_eq!(format_cigar(&r2.cigar()), "10H15M1I25M");
3835    }
3836
3837    #[test]
3838    fn test_clip_extending_past_mate_ends_reverse_soft_clips_extend_hard_mode() {
3839        // fgbio test: "reverse read soft-clips extend past (hard mode)"
3840        let clipper = SamRecordClipper::new(ClippingMode::Hard);
3841        let seq = "A".repeat(50);
3842        let (mut r1, mut r2) = create_pair(20, "40M10S", &seq, 30, "20S30M", &seq);
3843
3844        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
3845
3846        assert_eq!(clipped_r1, 0); // No aligned bases clipped
3847        assert_eq!(clipped_r2, 0); // No aligned bases clipped
3848        assert_eq!(r1.alignment_start(), Some(Position::new(20).unwrap()));
3849        assert_eq!(format_cigar(&r1.cigar()), "40M10H");
3850        assert_eq!(r2.alignment_start(), Some(Position::new(30).unwrap()));
3851        assert_eq!(format_cigar(&r2.cigar()), "10H10S30M");
3852    }
3853
3854    #[test]
3855    fn test_clip_extending_past_mate_ends_no_overlap_far_apart() {
3856        // fgbio test: "not clip when mapped +/- with start(R1) > end(R2) but no overlap"
3857        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3858        let seq = "A".repeat(100);
3859        let (mut r1, mut r2) = create_pair(1000, "100M", &seq, 1, "100M", &seq);
3860
3861        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
3862
3863        assert_eq!(clipped_r1, 0);
3864        assert_eq!(clipped_r2, 0);
3865        assert_eq!(r1.alignment_start(), Some(Position::new(1000).unwrap()));
3866        assert_eq!(format_cigar(&r1.cigar()), "100M");
3867        assert_eq!(r2.alignment_start(), Some(Position::new(1).unwrap()));
3868        assert_eq!(format_cigar(&r2.cigar()), "100M");
3869    }
3870
3871    #[test]
3872    fn test_clip_extending_past_mate_ends_no_extension_with_insertions() {
3873        // fgbio test: "not clip when no extension with insertions"
3874        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3875        let seq = "A".repeat(100);
3876        let (mut r1, mut r2) = create_pair(1, "40M20I40M", &seq, 1, "40M20I40M", &seq);
3877
3878        let (clipped_r1, clipped_r2) = clipper.clip_extending_past_mate_ends(&mut r1, &mut r2);
3879
3880        assert_eq!(clipped_r1, 0);
3881        assert_eq!(clipped_r2, 0);
3882        assert_eq!(r1.alignment_start(), Some(Position::new(1).unwrap()));
3883        assert_eq!(format_cigar(&r1.cigar()), "40M20I40M");
3884        assert_eq!(r2.alignment_start(), Some(Position::new(1).unwrap()));
3885        assert_eq!(format_cigar(&r2.cigar()), "40M20I40M");
3886    }
3887
3888    #[test]
3889    fn test_clip_5_prime_end_of_alignment_positive_strand() {
3890        // fgbio test: "add more clipping to the 5' end" - positive strand
3891        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3892
3893        // Positive strand, no existing clipping
3894        let mut rec1 = create_test_record("50M", &"A".repeat(50), 10);
3895        *rec1.flags_mut() = Flags::empty(); // positive strand
3896        let clipped = clipper.clip_5_prime_end_of_alignment(&mut rec1, 10);
3897        assert_eq!(clipped, 10);
3898        assert_eq!(format_cigar(&rec1.cigar()), "10S40M");
3899
3900        // Positive strand, existing soft clipping
3901        let mut rec2 = create_test_record("10S40M", &"A".repeat(50), 10);
3902        *rec2.flags_mut() = Flags::empty(); // positive strand
3903        let clipped = clipper.clip_5_prime_end_of_alignment(&mut rec2, 10);
3904        assert_eq!(clipped, 10);
3905        assert_eq!(format_cigar(&rec2.cigar()), "20S30M");
3906    }
3907
3908    #[test]
3909    fn test_clip_5_prime_end_of_alignment_negative_strand() {
3910        // fgbio test: "add more clipping to the 5' end" - negative strand
3911        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3912
3913        // Negative strand, no existing clipping
3914        let mut rec3 = create_test_record("50M", &"A".repeat(50), 10);
3915        *rec3.flags_mut() = Flags::REVERSE_COMPLEMENTED;
3916        let clipped = clipper.clip_5_prime_end_of_alignment(&mut rec3, 10);
3917        assert_eq!(clipped, 10);
3918        assert_eq!(format_cigar(&rec3.cigar()), "40M10S");
3919
3920        // Negative strand, existing soft clipping
3921        let mut rec4 = create_test_record("40M10S", &"A".repeat(50), 10);
3922        *rec4.flags_mut() = Flags::REVERSE_COMPLEMENTED;
3923        let clipped = clipper.clip_5_prime_end_of_alignment(&mut rec4, 10);
3924        assert_eq!(clipped, 10);
3925        assert_eq!(format_cigar(&rec4.cigar()), "30M20S");
3926    }
3927
3928    #[test]
3929    fn test_clip_3_prime_end_of_alignment_negative_strand() {
3930        // fgbio test: "add more clipping to the 3' end" - negative strand
3931        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3932
3933        // Negative strand, no existing clipping
3934        let mut rec1 = create_test_record("50M", &"A".repeat(50), 10);
3935        *rec1.flags_mut() = Flags::REVERSE_COMPLEMENTED;
3936        let clipped = clipper.clip_3_prime_end_of_alignment(&mut rec1, 10);
3937        assert_eq!(clipped, 10);
3938        assert_eq!(format_cigar(&rec1.cigar()), "10S40M");
3939
3940        // Negative strand, existing soft clipping
3941        let mut rec2 = create_test_record("10S40M", &"A".repeat(50), 10);
3942        *rec2.flags_mut() = Flags::REVERSE_COMPLEMENTED;
3943        let clipped = clipper.clip_3_prime_end_of_alignment(&mut rec2, 10);
3944        assert_eq!(clipped, 10);
3945        assert_eq!(format_cigar(&rec2.cigar()), "20S30M");
3946    }
3947
3948    #[test]
3949    fn test_clip_3_prime_end_of_alignment_positive_strand() {
3950        // fgbio test: "add more clipping to the 3' end" - positive strand
3951        let clipper = SamRecordClipper::new(ClippingMode::Soft);
3952
3953        // Positive strand, no existing clipping
3954        let mut rec3 = create_test_record("50M", &"A".repeat(50), 10);
3955        *rec3.flags_mut() = Flags::empty(); // positive strand
3956        let clipped = clipper.clip_3_prime_end_of_alignment(&mut rec3, 10);
3957        assert_eq!(clipped, 10);
3958        assert_eq!(format_cigar(&rec3.cigar()), "40M10S");
3959
3960        // Positive strand, existing soft clipping
3961        let mut rec4 = create_test_record("40M10S", &"A".repeat(50), 10);
3962        *rec4.flags_mut() = Flags::empty(); // positive strand
3963        let clipped = clipper.clip_3_prime_end_of_alignment(&mut rec4, 10);
3964        assert_eq!(clipped, 10);
3965        assert_eq!(format_cigar(&rec4.cigar()), "30M20S");
3966    }
3967}