1use ahash::AHashMap;
7use anyhow::Result;
8use noodles::sam::alignment::record::Sequence as SequenceTrait;
9use noodles::sam::alignment::record_buf::data::field::Value;
10use noodles::sam::alignment::record_buf::{QualityScores, RecordBuf, Sequence};
11
12use crate::phred::{MIN_PHRED, NO_CALL_BASE};
13use crate::tags::{per_base, per_read};
14use fgumi_metrics::rejection::RejectionReason;
15
16#[derive(Debug, Clone)]
18pub struct FilterThresholds {
19 pub min_reads: usize,
21
22 pub max_read_error_rate: f64,
24
25 pub max_base_error_rate: f64,
27}
28
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
31pub enum ConsensusType {
32 SingleStrand,
34
35 Duplex,
37}
38
39#[derive(Debug, Clone)]
41pub struct FilterConfig {
42 pub duplex_thresholds: Option<FilterThresholds>,
44
45 pub ab_thresholds: Option<FilterThresholds>,
47
48 pub ba_thresholds: Option<FilterThresholds>,
50
51 pub single_strand_thresholds: Option<FilterThresholds>,
53
54 pub min_base_quality: Option<u8>,
56
57 pub min_mean_base_quality: Option<f64>,
59
60 pub max_no_call_fraction: f64,
62}
63
64impl FilterConfig {
65 #[must_use]
75 pub fn for_single_strand(
76 thresholds: FilterThresholds,
77 min_base_quality: Option<u8>,
78 min_mean_base_quality: Option<f64>,
79 max_no_call_fraction: f64,
80 ) -> Self {
81 Self {
82 duplex_thresholds: Some(thresholds.clone()),
83 ab_thresholds: Some(thresholds.clone()),
84 ba_thresholds: Some(thresholds.clone()),
85 single_strand_thresholds: Some(thresholds),
86 min_base_quality,
87 min_mean_base_quality,
88 max_no_call_fraction,
89 }
90 }
91
92 #[must_use]
107 pub fn for_duplex(
108 duplex: FilterThresholds,
109 strand: FilterThresholds,
110 min_base_quality: Option<u8>,
111 min_mean_base_quality: Option<f64>,
112 max_no_call_fraction: f64,
113 ) -> Self {
114 Self::for_duplex_asymmetric(
115 duplex,
116 strand.clone(),
117 strand,
118 min_base_quality,
119 min_mean_base_quality,
120 max_no_call_fraction,
121 )
122 }
123
124 #[must_use]
142 pub fn for_duplex_asymmetric(
143 duplex: FilterThresholds,
144 ab: FilterThresholds,
145 ba: FilterThresholds,
146 min_base_quality: Option<u8>,
147 min_mean_base_quality: Option<f64>,
148 max_no_call_fraction: f64,
149 ) -> Self {
150 assert!(
152 ab.min_reads <= duplex.min_reads,
153 "min-reads values must be specified high to low: AB ({}) > duplex ({})",
154 ab.min_reads,
155 duplex.min_reads
156 );
157 assert!(
158 ba.min_reads <= ab.min_reads,
159 "min-reads values must be specified high to low: BA ({}) > AB ({})",
160 ba.min_reads,
161 ab.min_reads
162 );
163 assert!(
164 ab.max_read_error_rate <= ba.max_read_error_rate,
165 "max-read-error-rate for AB ({}) must be <= BA ({})",
166 ab.max_read_error_rate,
167 ba.max_read_error_rate
168 );
169 assert!(
170 ab.max_base_error_rate <= ba.max_base_error_rate,
171 "max-base-error-rate for AB ({}) must be <= BA ({})",
172 ab.max_base_error_rate,
173 ba.max_base_error_rate
174 );
175
176 Self {
177 duplex_thresholds: Some(duplex.clone()),
178 ab_thresholds: Some(ab),
179 ba_thresholds: Some(ba),
180 single_strand_thresholds: Some(duplex),
181 min_base_quality,
182 min_mean_base_quality,
183 max_no_call_fraction,
184 }
185 }
186
187 #[must_use]
189 pub fn duplex_thresholds(
190 &self,
191 ) -> Option<(&FilterThresholds, &FilterThresholds, &FilterThresholds)> {
192 match (
193 self.duplex_thresholds.as_ref(),
194 self.ab_thresholds.as_ref(),
195 self.ba_thresholds.as_ref(),
196 ) {
197 (Some(cc), Some(ab), Some(ba)) => Some((cc, ab, ba)),
198 _ => None,
199 }
200 }
201
202 #[must_use]
204 pub fn effective_single_strand_thresholds(&self) -> Option<&FilterThresholds> {
205 self.single_strand_thresholds.as_ref().or(self.duplex_thresholds.as_ref())
206 }
207
208 #[must_use]
224 pub fn new(
225 min_reads: &[usize],
226 max_read_error_rate: &[f64],
227 max_base_error_rate: &[f64],
228 min_base_quality: Option<u8>,
229 min_mean_base_quality: Option<f64>,
230 max_no_call_fraction: f64,
231 ) -> Self {
232 let get_threshold = |values: &[usize], index: usize| -> usize {
234 match values.len() {
235 2 => {
236 if index == 0 {
237 values[0]
238 } else {
239 values[1]
240 }
241 }
242 3 => values[index],
243 _ => values[0],
244 }
245 };
246
247 let get_threshold_f64 = |values: &[f64], index: usize| -> f64 {
248 match values.len() {
249 2 => {
250 if index == 0 {
251 values[0]
252 } else {
253 values[1]
254 }
255 }
256 3 => values[index],
257 _ => values[0],
258 }
259 };
260
261 let duplex_thresholds = Some(FilterThresholds {
264 min_reads: get_threshold(min_reads, 0),
265 max_read_error_rate: get_threshold_f64(max_read_error_rate, 0),
266 max_base_error_rate: get_threshold_f64(max_base_error_rate, 0),
267 });
268
269 let ab_thresholds = Some(FilterThresholds {
270 min_reads: get_threshold(min_reads, 1),
271 max_read_error_rate: get_threshold_f64(max_read_error_rate, 1),
272 max_base_error_rate: get_threshold_f64(max_base_error_rate, 1),
273 });
274
275 let ba_thresholds = Some(FilterThresholds {
276 min_reads: get_threshold(min_reads, 2),
277 max_read_error_rate: get_threshold_f64(max_read_error_rate, 2),
278 max_base_error_rate: get_threshold_f64(max_base_error_rate, 2),
279 });
280
281 let single_strand_thresholds = Some(FilterThresholds {
283 min_reads: min_reads[0],
284 max_read_error_rate: if max_read_error_rate.is_empty() {
285 1.0
286 } else {
287 max_read_error_rate[0]
288 },
289 max_base_error_rate: if max_base_error_rate.is_empty() {
290 1.0
291 } else {
292 max_base_error_rate[0]
293 },
294 });
295
296 if let (Some(cc), Some(ab), Some(ba)) = (&duplex_thresholds, &ab_thresholds, &ba_thresholds)
300 {
301 assert!(
303 ab.min_reads <= cc.min_reads,
304 "min-reads values must be specified high to low: AB ({}) > CC ({})",
305 ab.min_reads,
306 cc.min_reads
307 );
308 assert!(
309 ba.min_reads <= ab.min_reads,
310 "min-reads values must be specified high to low: BA ({}) > AB ({})",
311 ba.min_reads,
312 ab.min_reads
313 );
314
315 assert!(
317 ab.max_read_error_rate <= ba.max_read_error_rate,
318 "max-read-error-rate for AB ({}) must be <= BA ({})",
319 ab.max_read_error_rate,
320 ba.max_read_error_rate
321 );
322
323 assert!(
325 ab.max_base_error_rate <= ba.max_base_error_rate,
326 "max-base-error-rate for AB ({}) must be <= BA ({})",
327 ab.max_base_error_rate,
328 ba.max_base_error_rate
329 );
330 }
331
332 Self {
333 duplex_thresholds,
334 ab_thresholds,
335 ba_thresholds,
336 single_strand_thresholds,
337 min_base_quality,
338 min_mean_base_quality,
339 max_no_call_fraction,
340 }
341 }
342}
343
344#[derive(Debug, Clone, Copy, PartialEq, Eq)]
346pub enum FilterResult {
347 Pass,
349
350 InsufficientReads,
352
353 ExcessiveErrorRate,
355
356 LowQuality,
358
359 TooManyNoCalls,
361}
362
363impl FilterResult {
364 #[must_use]
369 pub fn to_rejection_reason(&self) -> Option<RejectionReason> {
370 match self {
371 Self::Pass => None,
372 Self::InsufficientReads => Some(RejectionReason::InsufficientSupport),
373 Self::ExcessiveErrorRate => Some(RejectionReason::ExcessiveErrorRate),
374 Self::LowQuality => Some(RejectionReason::LowMeanQuality),
375 Self::TooManyNoCalls => Some(RejectionReason::ExcessiveNBases),
376 }
377 }
378}
379
380fn extract_int_tag(data: &noodles::sam::alignment::record_buf::Data, tag_str: &str) -> Option<i32> {
382 let tag = per_read::tag(tag_str);
383 match data.get(&tag)? {
384 Value::Int8(v) => Some(i32::from(*v)),
385 Value::UInt8(v) => Some(i32::from(*v)),
386 Value::Int16(v) => Some(i32::from(*v)),
387 Value::UInt16(v) => Some(i32::from(*v)),
388 Value::Int32(v) => Some(*v),
389 #[expect(
390 clippy::cast_possible_wrap,
391 reason = "BAM tag values in practice never exceed i32::MAX"
392 )]
393 Value::UInt32(v) => Some(*v as i32),
394 _ => None,
395 }
396}
397
398fn extract_float_tag(
400 data: &noodles::sam::alignment::record_buf::Data,
401 tag_str: &str,
402) -> Option<f32> {
403 let tag = per_read::tag(tag_str);
404 match data.get(&tag)? {
405 Value::Float(v) => Some(*v),
406 _ => None,
407 }
408}
409
410pub fn filter_read(record: &RecordBuf, thresholds: &FilterThresholds) -> Result<FilterResult> {
427 let depth_tag = per_read::tag("cD");
429 if let Some(Value::UInt8(depth)) = record.data().get(&depth_tag) {
430 if (*depth as usize) < thresholds.min_reads {
431 return Ok(FilterResult::InsufficientReads);
432 }
433 }
434
435 let error_tag = per_read::tag("cE");
438 if let Some(Value::Float(error_rate)) = record.data().get(&error_tag) {
439 if f64::from(*error_rate) > thresholds.max_read_error_rate {
440 return Ok(FilterResult::ExcessiveErrorRate);
441 }
442 }
443
444 Ok(FilterResult::Pass)
445}
446
447pub fn filter_duplex_read(
462 record: &RecordBuf,
463 cc_thresholds: &FilterThresholds,
464 ab_thresholds: &FilterThresholds,
465 ba_thresholds: &FilterThresholds,
466) -> Result<FilterResult> {
467 let data = record.data();
468
469 let result = filter_read(record, cc_thresholds)?;
471 if result != FilterResult::Pass {
472 return Ok(result);
473 }
474
475 let ab_depth = extract_int_tag(data, "aD").or_else(|| extract_int_tag(data, "aM"));
477 let ba_depth = extract_int_tag(data, "bD").or_else(|| extract_int_tag(data, "bM"));
478 let ab_error = extract_float_tag(data, "aE");
479 let ba_error = extract_float_tag(data, "bE");
480
481 let (min_depth, max_depth) = match (ab_depth, ba_depth) {
484 (Some(a), Some(b)) => {
485 if a < b {
486 (a, b)
487 } else {
488 (b, a)
489 }
490 }
491 (Some(a), None) => (0, a),
492 (None, Some(b)) => (0, b),
493 (None, None) => return Ok(FilterResult::Pass), };
495
496 let (min_error, max_error) = match (ab_error, ba_error) {
497 (Some(a), Some(b)) => {
498 if a < b {
499 (a, b)
500 } else {
501 (b, a)
502 }
503 }
504 (Some(a), None) => (a, a),
505 (None, Some(b)) => (b, b),
506 (None, None) => (0.0, 0.0),
507 };
508
509 #[expect(clippy::cast_sign_loss, reason = "depth values are non-negative in BAM tags")]
512 if (max_depth as usize) < ab_thresholds.min_reads {
513 return Ok(FilterResult::InsufficientReads);
514 }
515 if f64::from(min_error) > ab_thresholds.max_read_error_rate {
516 return Ok(FilterResult::ExcessiveErrorRate);
517 }
518
519 #[expect(clippy::cast_sign_loss, reason = "depth values are non-negative in BAM tags")]
521 if (min_depth as usize) < ba_thresholds.min_reads {
522 return Ok(FilterResult::InsufficientReads);
523 }
524 if f64::from(max_error) > ba_thresholds.max_read_error_rate {
525 return Ok(FilterResult::ExcessiveErrorRate);
526 }
527
528 Ok(FilterResult::Pass)
529}
530
531fn extract_per_base_array(
533 data: &noodles::sam::alignment::record_buf::Data,
534 tag_str: &str,
535 len: usize,
536) -> Vec<u16> {
537 let tag = per_base::tag(tag_str);
538 if let Some(Value::Array(arr)) = data.get(&tag) {
539 use noodles::sam::alignment::record_buf::data::field::value::Array;
540 match arr {
541 #[expect(clippy::cast_sign_loss, reason = "clamped to non-negative via .max(0)")]
542 Array::Int16(values) => values.iter().map(|&v| v.max(0) as u16).collect(),
543 Array::UInt16(values) => values.clone(),
544 Array::UInt8(values) => values.iter().map(|&v| u16::from(v)).collect(),
545 _ => vec![0; len],
546 }
547 } else {
548 vec![0; len]
549 }
550}
551
552fn extract_consensus_bases(
554 data: &noodles::sam::alignment::record_buf::Data,
555 tag_str: &str,
556 len: usize,
557) -> Vec<u8> {
558 let tag = per_base::tag(tag_str);
559 if let Some(value) = data.get(&tag) {
560 match value {
561 Value::String(s) => {
562 let bytes: &[u8] = s.as_ref();
563 bytes.to_vec()
564 }
565 Value::Array(arr) => {
566 use noodles::sam::alignment::record_buf::data::field::value::Array;
567 match arr {
568 Array::UInt8(bytes) => bytes.clone(),
569 _ => vec![NO_CALL_BASE; len],
570 }
571 }
572 _ => vec![NO_CALL_BASE; len],
573 }
574 } else {
575 vec![NO_CALL_BASE; len]
576 }
577}
578
579pub fn mask_bases(
593 record: &mut RecordBuf,
594 thresholds: &FilterThresholds,
595 min_base_quality: Option<u8>,
596) -> Result<usize> {
597 let mut new_seq: Vec<u8> = record.sequence().iter().collect();
599 let mut new_quals: Vec<u8> = record.quality_scores().iter().collect();
600 let mut masked_count = 0;
601 let len = new_seq.len();
602
603 let data = record.data();
605 let depths = extract_per_base_array(data, "cd", len);
606 let errors = extract_per_base_array(data, "ce", len);
607
608 for i in 0..len {
610 let should_mask =
611 min_base_quality.is_some_and(|min_qual| new_quals[i] < min_qual) ||
613 (depths[i] as usize) < thresholds.min_reads ||
615 (depths[i] > 0 && (f64::from(errors[i]) / f64::from(depths[i])) > thresholds.max_base_error_rate);
617
618 if should_mask {
619 if new_seq[i] != NO_CALL_BASE {
621 masked_count += 1;
622 }
623 new_seq[i] = NO_CALL_BASE;
624 new_quals[i] = MIN_PHRED;
625 }
626 }
627
628 *record.sequence_mut() = Sequence::from(new_seq);
630 *record.quality_scores_mut() = QualityScores::from(new_quals);
631
632 Ok(masked_count)
633}
634
635pub fn mask_duplex_bases(
664 record: &mut RecordBuf,
665 cc_thresholds: &FilterThresholds,
666 ab_thresholds: &FilterThresholds,
667 ba_thresholds: &FilterThresholds,
668 min_base_quality: Option<u8>,
669 require_ss_agreement: bool,
670) -> Result<usize> {
671 let mut new_seq: Vec<u8> = record.sequence().iter().collect();
673 let mut new_quals: Vec<u8> = record.quality_scores().iter().collect();
674 let len = new_seq.len();
675 let mut masked_count = 0;
676
677 let data = record.data();
678
679 let ab_depths = extract_per_base_array(data, "ad", len);
681 let ab_errors = extract_per_base_array(data, "ae", len);
682
683 let ba_depths = extract_per_base_array(data, "bd", len);
685 let ba_errors = extract_per_base_array(data, "be", len);
686
687 let ab_bases = if require_ss_agreement {
689 extract_consensus_bases(data, "ac", len)
690 } else {
691 vec![NO_CALL_BASE; len]
692 };
693 let ba_bases = if require_ss_agreement {
694 extract_consensus_bases(data, "bc", len)
695 } else {
696 vec![NO_CALL_BASE; len]
697 };
698
699 for i in 0..len {
701 if new_seq[i] == NO_CALL_BASE {
702 continue;
703 }
704
705 let ab_depth_i = ab_depths[i];
707 let ba_depth_i = ba_depths[i];
708 let ab_error_i = ab_errors[i];
709 let ba_error_i = ba_errors[i];
710
711 let max_depth = std::cmp::max(ab_depth_i, ba_depth_i);
713 let min_depth = std::cmp::min(ab_depth_i, ba_depth_i);
714
715 let ab_error_rate =
717 if ab_depth_i > 0 { f64::from(ab_error_i) / f64::from(ab_depth_i) } else { 0.0 };
718 let ba_error_rate =
719 if ba_depth_i > 0 { f64::from(ba_error_i) / f64::from(ba_depth_i) } else { 0.0 };
720
721 let min_error_rate = ab_error_rate.min(ba_error_rate);
722 let max_error_rate = ab_error_rate.max(ba_error_rate);
723
724 let total_depth = ab_depth_i + ba_depth_i;
726 let total_error_rate = if total_depth > 0 {
727 f64::from(ab_error_i + ba_error_i) / f64::from(total_depth)
728 } else {
729 0.0
730 };
731
732 let should_mask = min_base_quality.is_some_and(|min_qual| new_quals[i] < min_qual)
734 || (total_depth as usize) < cc_thresholds.min_reads
735 || total_error_rate > cc_thresholds.max_base_error_rate
736 || (max_depth as usize) < ab_thresholds.min_reads
737 || min_error_rate > ab_thresholds.max_base_error_rate
738 || (min_depth as usize) < ba_thresholds.min_reads
739 || max_error_rate > ba_thresholds.max_base_error_rate;
740
741 let ss_disagree =
743 require_ss_agreement && ab_depth_i > 0 && ba_depth_i > 0 && ab_bases[i] != ba_bases[i];
744
745 if should_mask || ss_disagree {
746 if new_seq[i] != NO_CALL_BASE {
748 masked_count += 1;
749 }
750 new_seq[i] = NO_CALL_BASE;
751 new_quals[i] = MIN_PHRED;
752 }
753 }
754
755 *record.sequence_mut() = Sequence::from(new_seq);
757 *record.quality_scores_mut() = QualityScores::from(new_quals);
758
759 Ok(masked_count)
760}
761
762#[must_use]
764pub fn count_no_calls(record: &RecordBuf) -> usize {
765 record.sequence().iter().filter(|b| *b == NO_CALL_BASE).count()
766}
767
768#[must_use]
770pub fn mean_base_quality(record: &RecordBuf) -> f64 {
771 let (sum, count) = record
773 .sequence()
774 .iter()
775 .zip(record.quality_scores().iter())
776 .filter(|&(base, _)| base != NO_CALL_BASE)
777 .fold((0u64, 0usize), |(sum, count), (_, qual)| (sum + u64::from(qual), count + 1));
778
779 #[expect(
780 clippy::cast_precision_loss,
781 reason = "precision loss is acceptable for quality averaging"
782 )]
783 if count == 0 { 0.0 } else { sum as f64 / count as f64 }
784}
785
786#[must_use]
794pub fn compute_read_stats(record: &RecordBuf) -> (usize, f64) {
795 let (qual_sum, non_n_count, n_count) = record
796 .sequence()
797 .iter()
798 .zip(record.quality_scores().iter())
799 .fold((0u64, 0usize, 0usize), |(sum, non_n, n), (base, qual)| {
800 if base == NO_CALL_BASE {
801 (sum, non_n, n + 1)
802 } else {
803 (sum + u64::from(qual), non_n + 1, n)
804 }
805 });
806
807 #[expect(
808 clippy::cast_precision_loss,
809 reason = "precision loss is acceptable for quality averaging"
810 )]
811 let mean_qual = if non_n_count == 0 { 0.0 } else { qual_sum as f64 / non_n_count as f64 };
812 (n_count, mean_qual)
813}
814
815#[must_use]
827pub fn template_passes(template_records: &[RecordBuf], pass_map: &AHashMap<usize, bool>) -> bool {
828 let mut has_primary = false;
830 let mut all_primary_pass = true;
831
832 for (idx, record) in template_records.iter().enumerate() {
833 let flags = record.flags();
834 let is_primary = !flags.is_secondary() && !flags.is_supplementary();
835
836 if is_primary {
837 has_primary = true;
838 if let Some(&passes) = pass_map.get(&idx) {
839 if !passes {
840 all_primary_pass = false;
841 break;
842 }
843 } else {
844 all_primary_pass = false;
846 break;
847 }
848 }
849 }
850
851 has_primary && all_primary_pass
854}
855
856#[must_use]
860pub fn template_passes_raw(raw_records: &[Vec<u8>], pass_map: &AHashMap<usize, bool>) -> bool {
861 let mut has_primary = false;
862 let mut all_primary_pass = true;
863
864 for (idx, record) in raw_records.iter().enumerate() {
865 let flags = bam_fields::flags(record);
866 let is_primary = (flags & bam_fields::flags::SECONDARY) == 0
867 && (flags & bam_fields::flags::SUPPLEMENTARY) == 0;
868
869 if is_primary {
870 has_primary = true;
871 if let Some(&passes) = pass_map.get(&idx) {
872 if !passes {
873 all_primary_pass = false;
874 break;
875 }
876 } else {
877 all_primary_pass = false;
878 break;
879 }
880 }
881 }
882
883 has_primary && all_primary_pass
884}
885
886#[must_use]
896pub fn is_duplex_consensus(record: &RecordBuf) -> bool {
897 let ad_tag = per_read::tag("aD");
898 let bd_tag = per_read::tag("bD");
899
900 record.data().get(&ad_tag).is_some() || record.data().get(&bd_tag).is_some()
901}
902
903use fgumi_raw_bam as bam_fields;
908
909#[must_use]
913pub fn is_duplex_consensus_raw(aux_data: &[u8]) -> bool {
914 bam_fields::find_tag_type(aux_data, b"aD").is_some()
915 || bam_fields::find_tag_type(aux_data, b"bD").is_some()
916}
917
918pub fn filter_read_raw(aux_data: &[u8], thresholds: &FilterThresholds) -> Result<FilterResult> {
924 if let Some(depth) = bam_fields::find_uint8_tag(aux_data, b"cD") {
926 if (depth as usize) < thresholds.min_reads {
927 return Ok(FilterResult::InsufficientReads);
928 }
929 }
930
931 if let Some(error_rate) = bam_fields::find_float_tag(aux_data, b"cE") {
933 if f64::from(error_rate) > thresholds.max_read_error_rate {
934 return Ok(FilterResult::ExcessiveErrorRate);
935 }
936 }
937
938 Ok(FilterResult::Pass)
939}
940
941pub fn filter_duplex_read_raw(
947 aux_data: &[u8],
948 cc_thresholds: &FilterThresholds,
949 ab_thresholds: &FilterThresholds,
950 ba_thresholds: &FilterThresholds,
951) -> Result<FilterResult> {
952 let result = filter_read_raw(aux_data, cc_thresholds)?;
954 if result != FilterResult::Pass {
955 return Ok(result);
956 }
957
958 let ab_depth = bam_fields::find_int_tag(aux_data, b"aD")
960 .or_else(|| bam_fields::find_int_tag(aux_data, b"aM"));
961 let ba_depth = bam_fields::find_int_tag(aux_data, b"bD")
962 .or_else(|| bam_fields::find_int_tag(aux_data, b"bM"));
963 let ab_error = bam_fields::find_float_tag(aux_data, b"aE");
964 let ba_error = bam_fields::find_float_tag(aux_data, b"bE");
965
966 let (min_depth, max_depth) = match (ab_depth, ba_depth) {
968 (Some(a), Some(b)) => {
969 if a < b {
970 (a, b)
971 } else {
972 (b, a)
973 }
974 }
975 (Some(a), None) => (0, a),
976 (None, Some(b)) => (0, b),
977 (None, None) => return Ok(FilterResult::Pass),
978 };
979
980 let (min_error, max_error) = match (ab_error, ba_error) {
981 (Some(a), Some(b)) => {
982 if a < b {
983 (a, b)
984 } else {
985 (b, a)
986 }
987 }
988 (Some(a), None) => (a, a),
989 (None, Some(b)) => (b, b),
990 (None, None) => (0.0, 0.0),
991 };
992
993 #[expect(
995 clippy::cast_sign_loss,
996 clippy::cast_possible_truncation,
997 reason = "depth values are non-negative and fit in usize on all supported platforms"
998 )]
999 if (max_depth as usize) < ab_thresholds.min_reads {
1000 return Ok(FilterResult::InsufficientReads);
1001 }
1002 if f64::from(min_error) > ab_thresholds.max_read_error_rate {
1003 return Ok(FilterResult::ExcessiveErrorRate);
1004 }
1005
1006 #[expect(
1008 clippy::cast_sign_loss,
1009 clippy::cast_possible_truncation,
1010 reason = "depth values are non-negative and fit in usize on all supported platforms"
1011 )]
1012 if (min_depth as usize) < ba_thresholds.min_reads {
1013 return Ok(FilterResult::InsufficientReads);
1014 }
1015 if f64::from(max_error) > ba_thresholds.max_read_error_rate {
1016 return Ok(FilterResult::ExcessiveErrorRate);
1017 }
1018
1019 Ok(FilterResult::Pass)
1020}
1021
1022#[must_use]
1029pub fn compute_read_stats_raw(bam: &[u8]) -> (usize, f64) {
1030 assert!(bam.len() >= bam_fields::MIN_BAM_RECORD_LEN, "BAM record too short");
1031 let seq_off = bam_fields::seq_offset(bam);
1032 let qual_off = bam_fields::qual_offset(bam);
1033 let len = bam_fields::l_seq(bam) as usize;
1034
1035 let mut n_count = 0usize;
1036 let mut qual_sum = 0u64;
1037 let mut non_n_count = 0usize;
1038
1039 for i in 0..len {
1040 if bam_fields::is_base_n(bam, seq_off, i) {
1041 n_count += 1;
1042 } else {
1043 qual_sum += u64::from(bam_fields::get_qual(bam, qual_off, i));
1044 non_n_count += 1;
1045 }
1046 }
1047
1048 #[expect(
1049 clippy::cast_precision_loss,
1050 reason = "precision loss is acceptable for quality averaging"
1051 )]
1052 let mean_qual = if non_n_count == 0 { 0.0 } else { qual_sum as f64 / non_n_count as f64 };
1053 (n_count, mean_qual)
1054}
1055
1056fn find_string_or_uint8_array(aux_data: &[u8], tag: [u8; 2]) -> Option<Vec<u8>> {
1060 if let Some(s) = bam_fields::find_string_tag(aux_data, &tag) {
1061 Some(s.to_vec())
1062 } else {
1063 let arr = bam_fields::find_array_tag(aux_data, &tag)?;
1064 if matches!(arr.elem_type, b'C' | b'c') {
1065 #[expect(
1066 clippy::cast_possible_truncation,
1067 reason = "UInt8 array elements are guaranteed to fit in u8"
1068 )]
1069 Some((0..arr.count).map(|i| bam_fields::array_tag_element_u16(&arr, i) as u8).collect())
1070 } else {
1071 None
1072 }
1073 }
1074}
1075
1076#[expect(
1085 clippy::similar_names,
1086 reason = "threshold variable names mirror the consensus tag names they check"
1087)]
1088pub fn mask_bases_raw(
1089 record: &mut [u8],
1090 thresholds: &FilterThresholds,
1091 min_base_quality: Option<u8>,
1092) -> Result<usize> {
1093 anyhow::ensure!(record.len() >= bam_fields::MIN_BAM_RECORD_LEN, "BAM record too short");
1094 let seq_off = bam_fields::seq_offset(record);
1095 let qual_off = bam_fields::qual_offset(record);
1096 let len = bam_fields::l_seq(record) as usize;
1097 let aux_off = bam_fields::aux_data_offset_from_record(record).unwrap_or(record.len());
1098
1099 let cd_vals = bam_fields::find_array_tag(&record[aux_off..], b"cd")
1101 .map(|r| bam_fields::array_tag_to_vec_u16(&r));
1102 let ce_vals = bam_fields::find_array_tag(&record[aux_off..], b"ce")
1103 .map(|r| bam_fields::array_tag_to_vec_u16(&r));
1104
1105 let mut masked_count = 0;
1106 for i in 0..len {
1107 let depth = cd_vals.as_ref().map_or(0u16, |v| v.get(i).copied().unwrap_or(0));
1108 let errors = ce_vals.as_ref().map_or(0u16, |v| v.get(i).copied().unwrap_or(0));
1109 let qual = bam_fields::get_qual(record, qual_off, i);
1110
1111 let should_mask = min_base_quality.is_some_and(|min_qual| qual < min_qual)
1112 || (depth as usize) < thresholds.min_reads
1113 || (depth > 0
1114 && (f64::from(errors) / f64::from(depth)) > thresholds.max_base_error_rate);
1115
1116 if should_mask {
1117 if !bam_fields::is_base_n(record, seq_off, i) {
1119 masked_count += 1;
1120 }
1121 bam_fields::mask_base(record, seq_off, i);
1122 bam_fields::set_qual(record, qual_off, i, MIN_PHRED);
1123 }
1124 }
1125
1126 Ok(masked_count)
1127}
1128
1129#[expect(
1137 clippy::similar_names,
1138 reason = "threshold variable names mirror the duplex strand tag names"
1139)]
1140pub fn mask_duplex_bases_raw(
1141 record: &mut [u8],
1142 cc_thresholds: &FilterThresholds,
1143 ab_thresholds: &FilterThresholds,
1144 ba_thresholds: &FilterThresholds,
1145 min_base_quality: Option<u8>,
1146 require_ss_agreement: bool,
1147) -> Result<usize> {
1148 anyhow::ensure!(record.len() >= bam_fields::MIN_BAM_RECORD_LEN, "BAM record too short");
1149 let seq_off = bam_fields::seq_offset(record);
1150 let qual_off = bam_fields::qual_offset(record);
1151 let len = bam_fields::l_seq(record) as usize;
1152 let aux_off = bam_fields::aux_data_offset_from_record(record).unwrap_or(record.len());
1153
1154 let ad_vals = bam_fields::find_array_tag(&record[aux_off..], b"ad")
1156 .map(|r| bam_fields::array_tag_to_vec_u16(&r));
1157 let ae_vals = bam_fields::find_array_tag(&record[aux_off..], b"ae")
1158 .map(|r| bam_fields::array_tag_to_vec_u16(&r));
1159 let bd_vals = bam_fields::find_array_tag(&record[aux_off..], b"bd")
1160 .map(|r| bam_fields::array_tag_to_vec_u16(&r));
1161 let be_vals = bam_fields::find_array_tag(&record[aux_off..], b"be")
1162 .map(|r| bam_fields::array_tag_to_vec_u16(&r));
1163
1164 let ac_owned: Option<Vec<u8>> = if require_ss_agreement {
1167 find_string_or_uint8_array(&record[aux_off..], *b"ac")
1168 } else {
1169 None
1170 };
1171 let bc_owned: Option<Vec<u8>> = if require_ss_agreement {
1172 find_string_or_uint8_array(&record[aux_off..], *b"bc")
1173 } else {
1174 None
1175 };
1176
1177 let mut masked_count = 0;
1178 for i in 0..len {
1179 if bam_fields::is_base_n(record, seq_off, i) {
1181 continue;
1182 }
1183
1184 let ab_depth = ad_vals.as_ref().map_or(0u16, |v| v.get(i).copied().unwrap_or(0));
1185 let ba_depth = bd_vals.as_ref().map_or(0u16, |v| v.get(i).copied().unwrap_or(0));
1186 let ab_errors = ae_vals.as_ref().map_or(0u16, |v| v.get(i).copied().unwrap_or(0));
1187 let ba_errors = be_vals.as_ref().map_or(0u16, |v| v.get(i).copied().unwrap_or(0));
1188
1189 let max_depth = std::cmp::max(ab_depth, ba_depth);
1190 let min_depth = std::cmp::min(ab_depth, ba_depth);
1191
1192 let ab_error_rate =
1193 if ab_depth > 0 { f64::from(ab_errors) / f64::from(ab_depth) } else { 0.0 };
1194 let ba_error_rate =
1195 if ba_depth > 0 { f64::from(ba_errors) / f64::from(ba_depth) } else { 0.0 };
1196
1197 let min_error_rate = ab_error_rate.min(ba_error_rate);
1198 let max_error_rate = ab_error_rate.max(ba_error_rate);
1199
1200 let total_depth = u32::from(ab_depth) + u32::from(ba_depth);
1201 let total_error_rate = if total_depth > 0 {
1202 f64::from(u32::from(ab_errors) + u32::from(ba_errors)) / f64::from(total_depth)
1203 } else {
1204 0.0
1205 };
1206
1207 let qual = bam_fields::get_qual(record, qual_off, i);
1208
1209 let should_mask = min_base_quality.is_some_and(|min_qual| qual < min_qual)
1210 || (total_depth as usize) < cc_thresholds.min_reads
1211 || total_error_rate > cc_thresholds.max_base_error_rate
1212 || (max_depth as usize) < ab_thresholds.min_reads
1213 || min_error_rate > ab_thresholds.max_base_error_rate
1214 || (min_depth as usize) < ba_thresholds.min_reads
1215 || max_error_rate > ba_thresholds.max_base_error_rate;
1216
1217 let ss_disagree = require_ss_agreement && ab_depth > 0 && ba_depth > 0 && {
1220 let ac_base =
1221 ac_owned.as_ref().and_then(|ac| ac.get(i).copied()).unwrap_or(NO_CALL_BASE);
1222 let bc_base =
1223 bc_owned.as_ref().and_then(|bc| bc.get(i).copied()).unwrap_or(NO_CALL_BASE);
1224 ac_base != bc_base
1225 };
1226
1227 if should_mask || ss_disagree {
1228 masked_count += 1;
1229 bam_fields::mask_base(record, seq_off, i);
1230 bam_fields::set_qual(record, qual_off, i, MIN_PHRED);
1231 }
1232 }
1233
1234 Ok(masked_count)
1235}
1236
1237#[cfg(test)]
1238mod tests {
1239 use super::*;
1240 use fgumi_sam::builder::RecordBuilder;
1241 use noodles::sam::alignment::record::data::field::Tag;
1242
1243 fn create_test_record() -> RecordBuf {
1244 RecordBuilder::new().sequence("ACGTACGT").build()
1246 }
1247
1248 #[test]
1249 fn test_filter_result_to_rejection_reason() {
1250 assert_eq!(FilterResult::Pass.to_rejection_reason(), None);
1251 assert_eq!(
1252 FilterResult::InsufficientReads.to_rejection_reason(),
1253 Some(RejectionReason::InsufficientSupport)
1254 );
1255 assert_eq!(
1256 FilterResult::ExcessiveErrorRate.to_rejection_reason(),
1257 Some(RejectionReason::ExcessiveErrorRate)
1258 );
1259 assert_eq!(
1260 FilterResult::LowQuality.to_rejection_reason(),
1261 Some(RejectionReason::LowMeanQuality)
1262 );
1263 assert_eq!(
1264 FilterResult::TooManyNoCalls.to_rejection_reason(),
1265 Some(RejectionReason::ExcessiveNBases)
1266 );
1267 }
1268
1269 #[test]
1270 fn test_filter_config_single_value() {
1271 let config = FilterConfig::new(&[3], &[0.1], &[0.2], Some(13), None, 0.1);
1272
1273 assert!(config.single_strand_thresholds.is_some());
1275 assert!(config.duplex_thresholds.is_some());
1276
1277 let thresh = config.single_strand_thresholds.unwrap();
1278 assert_eq!(thresh.min_reads, 3);
1279 assert!((thresh.max_read_error_rate - 0.1).abs() < f64::EPSILON);
1280 assert!((thresh.max_base_error_rate - 0.2).abs() < f64::EPSILON);
1281 }
1282
1283 #[test]
1284 fn test_filter_config_three_values() {
1285 let config =
1286 FilterConfig::new(&[5, 3, 3], &[0.05, 0.1, 0.1], &[0.1, 0.2, 0.2], Some(13), None, 0.1);
1287
1288 assert!(config.duplex_thresholds.is_some());
1290 assert!(config.ab_thresholds.is_some());
1291 assert!(config.ba_thresholds.is_some());
1292 assert!(config.single_strand_thresholds.is_some());
1293
1294 let duplex = config.duplex_thresholds.unwrap();
1295 assert_eq!(duplex.min_reads, 5);
1296 assert!((duplex.max_read_error_rate - 0.05).abs() < f64::EPSILON);
1297
1298 let ab = config.ab_thresholds.unwrap();
1299 assert_eq!(ab.min_reads, 3);
1300 assert!((ab.max_read_error_rate - 0.1).abs() < f64::EPSILON);
1301 }
1302
1303 #[test]
1304 fn test_filter_config_valid_threshold_ordering() {
1305 let config = FilterConfig::new(
1308 &[10, 5, 3],
1309 &[0.02, 0.05, 0.1],
1310 &[0.05, 0.1, 0.2],
1311 Some(13),
1312 None,
1313 0.1,
1314 );
1315
1316 let cc = config.duplex_thresholds.unwrap();
1317 let ab = config.ab_thresholds.unwrap();
1318 let ba = config.ba_thresholds.unwrap();
1319
1320 assert_eq!(cc.min_reads, 10);
1321 assert_eq!(ab.min_reads, 5);
1322 assert_eq!(ba.min_reads, 3);
1323 }
1324
1325 #[test]
1328 fn test_filter_config_for_single_strand() {
1329 let thresholds =
1330 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.2 };
1331
1332 let config = FilterConfig::for_single_strand(thresholds.clone(), Some(13), None, 0.1);
1333
1334 let ss = config.single_strand_thresholds.unwrap();
1336 assert_eq!(ss.min_reads, 3);
1337 assert!((ss.max_read_error_rate - 0.1).abs() < f64::EPSILON);
1338 assert!((ss.max_base_error_rate - 0.2).abs() < f64::EPSILON);
1339
1340 assert!(config.duplex_thresholds.is_some());
1342 assert!(config.ab_thresholds.is_some());
1343 assert!(config.ba_thresholds.is_some());
1344 }
1345
1346 #[test]
1347 fn test_filter_config_for_duplex_symmetric() {
1348 let duplex_thresholds =
1349 FilterThresholds { min_reads: 10, max_read_error_rate: 0.05, max_base_error_rate: 0.1 };
1350 let strand_thresholds =
1351 FilterThresholds { min_reads: 5, max_read_error_rate: 0.1, max_base_error_rate: 0.2 };
1352
1353 let config =
1354 FilterConfig::for_duplex(duplex_thresholds, strand_thresholds, Some(13), None, 0.1);
1355
1356 let cc = config.duplex_thresholds.unwrap();
1357 let ab = config.ab_thresholds.unwrap();
1358 let ba = config.ba_thresholds.unwrap();
1359
1360 assert_eq!(cc.min_reads, 10);
1361 assert_eq!(ab.min_reads, 5);
1362 assert_eq!(ba.min_reads, 5); }
1364
1365 #[test]
1366 fn test_filter_config_for_duplex_asymmetric() {
1367 let duplex = FilterThresholds {
1368 min_reads: 10,
1369 max_read_error_rate: 0.02,
1370 max_base_error_rate: 0.05,
1371 };
1372 let ab =
1373 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.1 };
1374 let ba =
1375 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.2 };
1376
1377 let config =
1378 FilterConfig::for_duplex_asymmetric(duplex, ab, ba, Some(13), Some(20.0), 0.15);
1379
1380 let cc = config.duplex_thresholds.unwrap();
1381 let ab_t = config.ab_thresholds.unwrap();
1382 let ba_t = config.ba_thresholds.unwrap();
1383
1384 assert_eq!(cc.min_reads, 10);
1385 assert_eq!(ab_t.min_reads, 5);
1386 assert_eq!(ba_t.min_reads, 3);
1387
1388 assert_eq!(config.min_base_quality, Some(13));
1390 assert_eq!(config.min_mean_base_quality, Some(20.0));
1391 assert!((config.max_no_call_fraction - 0.15).abs() < f64::EPSILON);
1392 }
1393
1394 #[test]
1395 #[should_panic(expected = "min-reads values must be specified high to low: AB")]
1396 fn test_filter_config_for_duplex_asymmetric_invalid_ab() {
1397 let duplex =
1398 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.1 };
1399 let ab =
1400 FilterThresholds { min_reads: 10, max_read_error_rate: 0.1, max_base_error_rate: 0.2 };
1401 let ba =
1402 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.2 };
1403
1404 let _ = FilterConfig::for_duplex_asymmetric(duplex, ab, ba, Some(13), None, 0.1);
1406 }
1407
1408 #[test]
1409 #[should_panic(expected = "min-reads values must be specified high to low: AB")]
1410 fn test_filter_config_invalid_ab_greater_than_cc() {
1411 let _ =
1413 FilterConfig::new(&[5, 6, 3], &[0.05, 0.1, 0.1], &[0.1, 0.2, 0.2], Some(13), None, 0.1);
1414 }
1415
1416 #[test]
1417 #[should_panic(expected = "min-reads values must be specified high to low: BA")]
1418 fn test_filter_config_invalid_ba_greater_than_ab() {
1419 let _ =
1421 FilterConfig::new(&[5, 3, 4], &[0.05, 0.1, 0.1], &[0.1, 0.2, 0.2], Some(13), None, 0.1);
1422 }
1423
1424 #[test]
1425 #[should_panic(expected = "max-read-error-rate for AB")]
1426 fn test_filter_config_invalid_ab_read_error_rate_greater_than_ba() {
1427 let _ =
1429 FilterConfig::new(&[5, 3, 3], &[0.05, 0.2, 0.1], &[0.1, 0.2, 0.2], Some(13), None, 0.1);
1430 }
1431
1432 #[test]
1433 #[should_panic(expected = "max-base-error-rate for AB")]
1434 fn test_filter_config_invalid_ab_base_error_rate_greater_than_ba() {
1435 let _ =
1437 FilterConfig::new(&[5, 3, 3], &[0.05, 0.1, 0.1], &[0.1, 0.3, 0.2], Some(13), None, 0.1);
1438 }
1439
1440 #[test]
1441 fn test_count_no_calls() {
1442 let mut record = create_test_record();
1443 *record.sequence_mut() = Sequence::from(b"ACNNGCNN".to_vec());
1444
1445 assert_eq!(count_no_calls(&record), 4);
1446 }
1447
1448 #[test]
1449 fn test_mean_base_quality() {
1450 let mut record = create_test_record();
1451 *record.quality_scores_mut() = QualityScores::from(vec![10, 20, 30, 40, 10, 20, 30, 40]);
1452
1453 let mean = mean_base_quality(&record);
1454 assert!((mean - 25.0).abs() < f64::EPSILON); }
1456
1457 #[test]
1458 fn test_mean_base_quality_with_n() {
1459 let mut record = create_test_record();
1460 *record.sequence_mut() = Sequence::from(b"ACNNACGT".to_vec());
1461 *record.quality_scores_mut() = QualityScores::from(vec![10, 20, 0, 0, 30, 40, 10, 20]);
1462
1463 let mean = mean_base_quality(&record);
1464 assert!((mean - 21.666).abs() < 0.01);
1466 }
1467
1468 #[test]
1469 fn test_compute_read_stats() {
1470 let mut record = create_test_record();
1471 *record.sequence_mut() = Sequence::from(b"ACNNACGT".to_vec());
1472 *record.quality_scores_mut() = QualityScores::from(vec![10, 20, 0, 0, 30, 40, 10, 20]);
1473
1474 let (n_count, mean_qual) = compute_read_stats(&record);
1475
1476 assert_eq!(n_count, count_no_calls(&record));
1478 assert!((mean_qual - mean_base_quality(&record)).abs() < f64::EPSILON);
1479
1480 assert_eq!(n_count, 2);
1482 assert!((mean_qual - 21.666).abs() < 0.01);
1483 }
1484
1485 #[test]
1486 fn test_compute_read_stats_all_n() {
1487 let mut record = create_test_record();
1488 *record.sequence_mut() = Sequence::from(b"NNNNNNNN".to_vec());
1489 *record.quality_scores_mut() = QualityScores::from(vec![0; 8]);
1490
1491 let (n_count, mean_qual) = compute_read_stats(&record);
1492
1493 assert_eq!(n_count, 8);
1494 assert!((mean_qual - 0.0).abs() < f64::EPSILON);
1495 }
1496
1497 #[test]
1498 fn test_compute_read_stats_no_n() {
1499 let mut record = create_test_record();
1500 *record.quality_scores_mut() = QualityScores::from(vec![30, 30, 30, 30, 30, 30, 30, 30]);
1501
1502 let (n_count, mean_qual) = compute_read_stats(&record);
1503
1504 assert_eq!(n_count, 0);
1505 assert!((mean_qual - 30.0).abs() < f64::EPSILON);
1506 }
1507
1508 #[test]
1509 fn test_filter_result() {
1510 let thresholds =
1511 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.2 };
1512
1513 let mut record = create_test_record();
1514
1515 let tag = Tag::from([b'c', b'D']);
1517 record.data_mut().insert(tag, Value::UInt8(5));
1518
1519 let result = filter_read(&record, &thresholds).unwrap();
1520 assert_eq!(result, FilterResult::Pass);
1521 }
1522
1523 #[test]
1524 fn test_filter_insufficient_reads() {
1525 let thresholds =
1526 FilterThresholds { min_reads: 10, max_read_error_rate: 0.1, max_base_error_rate: 0.2 };
1527
1528 let mut record = create_test_record();
1529
1530 let tag = Tag::from([b'c', b'D']);
1532 record.data_mut().insert(tag, Value::UInt8(5));
1533
1534 let result = filter_read(&record, &thresholds).unwrap();
1535 assert_eq!(result, FilterResult::InsufficientReads);
1536 }
1537
1538 #[test]
1539 fn test_template_passes_all_pass() {
1540 use noodles::sam::alignment::record::Flags;
1541
1542 let mut rec1 = create_test_record();
1543 *rec1.flags_mut() = Flags::SEGMENTED | Flags::FIRST_SEGMENT;
1544
1545 let mut rec2 = create_test_record();
1546 *rec2.flags_mut() = Flags::SEGMENTED | Flags::LAST_SEGMENT;
1547
1548 let records = vec![rec1, rec2];
1549 let mut pass_map = AHashMap::new();
1550 pass_map.insert(0, true);
1551 pass_map.insert(1, true);
1552
1553 assert!(template_passes(&records, &pass_map));
1554 }
1555
1556 #[test]
1557 fn test_template_passes_one_fails() {
1558 use noodles::sam::alignment::record::Flags;
1559
1560 let mut rec1 = create_test_record();
1561 *rec1.flags_mut() = Flags::SEGMENTED | Flags::FIRST_SEGMENT;
1562
1563 let mut rec2 = create_test_record();
1564 *rec2.flags_mut() = Flags::SEGMENTED | Flags::LAST_SEGMENT;
1565
1566 let records = vec![rec1, rec2];
1567 let mut pass_map = AHashMap::new();
1568 pass_map.insert(0, true);
1569 pass_map.insert(1, false); assert!(!template_passes(&records, &pass_map));
1572 }
1573
1574 #[test]
1575 fn test_template_passes_secondary_ignored() {
1576 use noodles::sam::alignment::record::Flags;
1577
1578 let mut rec1 = create_test_record();
1579 *rec1.flags_mut() = Flags::SEGMENTED | Flags::FIRST_SEGMENT;
1580
1581 let mut rec2 = create_test_record();
1582 *rec2.flags_mut() = Flags::SEGMENTED | Flags::FIRST_SEGMENT | Flags::SECONDARY;
1583
1584 let records = vec![rec1, rec2];
1585 let mut pass_map = AHashMap::new();
1586 pass_map.insert(0, true); pass_map.insert(1, false); assert!(template_passes(&records, &pass_map));
1590 }
1591
1592 #[test]
1593 fn test_is_duplex_consensus() {
1594 let mut record = create_test_record();
1595
1596 assert!(!is_duplex_consensus(&record));
1598
1599 let ad_tag = Tag::from([b'a', b'D']);
1601 record.data_mut().insert(ad_tag, Value::UInt8(3));
1602
1603 assert!(is_duplex_consensus(&record));
1605 }
1606
1607 #[test]
1610 fn test_filter_duplex_read_pass() {
1611 let mut record = create_test_record();
1612
1613 record.data_mut().insert(Tag::from([b'c', b'D']), Value::UInt8(10));
1615 record.data_mut().insert(Tag::from([b'c', b'E']), Value::Float(0.01));
1616
1617 record.data_mut().insert(Tag::from([b'a', b'D']), Value::UInt8(6));
1619 record.data_mut().insert(Tag::from([b'b', b'D']), Value::UInt8(4));
1620 record.data_mut().insert(Tag::from([b'a', b'E']), Value::Float(0.01));
1621 record.data_mut().insert(Tag::from([b'b', b'E']), Value::Float(0.02));
1622
1623 let cc_thresholds =
1624 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.1 };
1625 let ab_thresholds =
1626 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.2 };
1627 let ba_thresholds =
1628 FilterThresholds { min_reads: 2, max_read_error_rate: 0.15, max_base_error_rate: 0.25 };
1629
1630 let result =
1631 filter_duplex_read(&record, &cc_thresholds, &ab_thresholds, &ba_thresholds).unwrap();
1632 assert_eq!(result, FilterResult::Pass);
1633 }
1634
1635 #[test]
1636 fn test_filter_duplex_read_insufficient_ab_reads() {
1637 let mut record = create_test_record();
1638
1639 record.data_mut().insert(Tag::from([b'c', b'D']), Value::UInt8(10));
1641 record.data_mut().insert(Tag::from([b'c', b'E']), Value::Float(0.01));
1642
1643 record.data_mut().insert(Tag::from([b'a', b'D']), Value::UInt8(2));
1645 record.data_mut().insert(Tag::from([b'b', b'D']), Value::UInt8(2));
1646 record.data_mut().insert(Tag::from([b'a', b'E']), Value::Float(0.01));
1647 record.data_mut().insert(Tag::from([b'b', b'E']), Value::Float(0.02));
1648
1649 let cc_thresholds =
1650 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.1 };
1651 let ab_thresholds =
1652 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.2 };
1653 let ba_thresholds =
1654 FilterThresholds { min_reads: 2, max_read_error_rate: 0.15, max_base_error_rate: 0.25 };
1655
1656 let result =
1657 filter_duplex_read(&record, &cc_thresholds, &ab_thresholds, &ba_thresholds).unwrap();
1658 assert_eq!(result, FilterResult::InsufficientReads);
1659 }
1660
1661 #[test]
1662 fn test_filter_duplex_read_insufficient_ba_reads() {
1663 let mut record = create_test_record();
1664
1665 record.data_mut().insert(Tag::from([b'c', b'D']), Value::UInt8(10));
1667 record.data_mut().insert(Tag::from([b'c', b'E']), Value::Float(0.01));
1668
1669 record.data_mut().insert(Tag::from([b'a', b'D']), Value::UInt8(5));
1671 record.data_mut().insert(Tag::from([b'b', b'D']), Value::UInt8(1));
1672 record.data_mut().insert(Tag::from([b'a', b'E']), Value::Float(0.01));
1673 record.data_mut().insert(Tag::from([b'b', b'E']), Value::Float(0.02));
1674
1675 let cc_thresholds =
1676 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.1 };
1677 let ab_thresholds =
1678 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.2 };
1679 let ba_thresholds =
1680 FilterThresholds { min_reads: 2, max_read_error_rate: 0.15, max_base_error_rate: 0.25 };
1681
1682 let result =
1683 filter_duplex_read(&record, &cc_thresholds, &ab_thresholds, &ba_thresholds).unwrap();
1684 assert_eq!(result, FilterResult::InsufficientReads);
1685 }
1686
1687 #[test]
1688 fn test_filter_duplex_read_consensus_fails_first() {
1689 let mut record = create_test_record();
1690
1691 record.data_mut().insert(Tag::from([b'c', b'D']), Value::UInt8(3));
1693 record.data_mut().insert(Tag::from([b'c', b'E']), Value::Float(0.01));
1694
1695 record.data_mut().insert(Tag::from([b'a', b'D']), Value::UInt8(6));
1697 record.data_mut().insert(Tag::from([b'b', b'D']), Value::UInt8(4));
1698 record.data_mut().insert(Tag::from([b'a', b'E']), Value::Float(0.01));
1699 record.data_mut().insert(Tag::from([b'b', b'E']), Value::Float(0.02));
1700
1701 let cc_thresholds =
1702 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.1 };
1703 let ab_thresholds =
1704 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.2 };
1705 let ba_thresholds =
1706 FilterThresholds { min_reads: 2, max_read_error_rate: 0.15, max_base_error_rate: 0.25 };
1707
1708 let result =
1709 filter_duplex_read(&record, &cc_thresholds, &ab_thresholds, &ba_thresholds).unwrap();
1710 assert_eq!(result, FilterResult::InsufficientReads);
1712 }
1713
1714 #[test]
1715 fn test_filter_duplex_read_with_only_one_strand() {
1716 let mut record = create_test_record();
1717
1718 record.data_mut().insert(Tag::from([b'c', b'D']), Value::UInt8(10));
1720 record.data_mut().insert(Tag::from([b'c', b'E']), Value::Float(0.01));
1721
1722 record.data_mut().insert(Tag::from([b'a', b'D']), Value::UInt8(6));
1724 record.data_mut().insert(Tag::from([b'a', b'E']), Value::Float(0.01));
1725
1726 let cc_thresholds =
1727 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.1 };
1728 let ab_thresholds =
1729 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.2 };
1730 let ba_thresholds =
1731 FilterThresholds { min_reads: 2, max_read_error_rate: 0.15, max_base_error_rate: 0.25 };
1732
1733 let result =
1734 filter_duplex_read(&record, &cc_thresholds, &ab_thresholds, &ba_thresholds).unwrap();
1735 assert_eq!(result, FilterResult::InsufficientReads);
1737 }
1738
1739 fn create_duplex_record_with_per_base_tags() -> RecordBuf {
1742 let mut record = create_test_record();
1743
1744 record.data_mut().insert(
1747 Tag::from([b'c', b'd']),
1748 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt16(
1749 vec![10, 10, 10, 10, 10, 10, 10, 10],
1750 )),
1751 );
1752 record.data_mut().insert(
1753 Tag::from([b'c', b'e']),
1754 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt8(
1755 vec![1, 1, 1, 1, 1, 1, 1, 1], )),
1757 );
1758
1759 record.data_mut().insert(
1762 Tag::from([b'a', b'd']),
1763 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt16(
1764 vec![6, 6, 6, 6, 6, 6, 6, 6],
1765 )),
1766 );
1767 record.data_mut().insert(
1768 Tag::from([b'a', b'e']),
1769 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt8(
1770 vec![0, 0, 0, 0, 0, 0, 0, 0], )),
1772 );
1773
1774 record.data_mut().insert(
1777 Tag::from([b'b', b'd']),
1778 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt16(
1779 vec![4, 4, 4, 4, 4, 4, 4, 4],
1780 )),
1781 );
1782 record.data_mut().insert(
1783 Tag::from([b'b', b'e']),
1784 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt8(
1785 vec![1, 1, 1, 1, 1, 1, 1, 1], )),
1787 );
1788
1789 record
1790 }
1791
1792 #[test]
1793 fn test_mask_duplex_bases_all_pass() {
1794 let mut record = create_duplex_record_with_per_base_tags();
1795
1796 let cc_thresholds =
1797 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.15 };
1798 let ab_thresholds =
1799 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.25 };
1800 let ba_thresholds =
1801 FilterThresholds { min_reads: 2, max_read_error_rate: 0.15, max_base_error_rate: 0.3 };
1802
1803 mask_duplex_bases(
1804 &mut record,
1805 &cc_thresholds,
1806 &ab_thresholds,
1807 &ba_thresholds,
1808 Some(13),
1809 false,
1810 )
1811 .unwrap();
1812
1813 let seq = record.sequence();
1815 assert_eq!(seq.as_ref(), b"ACGTACGT");
1816 }
1817
1818 #[test]
1819 fn test_mask_duplex_bases_low_ab_depth() {
1820 let mut record = create_duplex_record_with_per_base_tags();
1821
1822 record.data_mut().insert(
1824 Tag::from([b'a', b'd']),
1825 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt16(
1826 vec![6, 6, 2, 6, 6, 6, 6, 6], )),
1828 );
1829
1830 let cc_thresholds =
1831 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.15 };
1832 let ab_thresholds =
1833 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.25 };
1834 let ba_thresholds =
1835 FilterThresholds { min_reads: 2, max_read_error_rate: 0.15, max_base_error_rate: 0.3 };
1836
1837 mask_duplex_bases(
1838 &mut record,
1839 &cc_thresholds,
1840 &ab_thresholds,
1841 &ba_thresholds,
1842 Some(13),
1843 false,
1844 )
1845 .unwrap();
1846
1847 let seq = record.sequence();
1849 assert_eq!(seq.as_ref(), b"ACNTACGT");
1850 }
1851
1852 #[test]
1853 fn test_mask_duplex_bases_low_ba_depth() {
1854 let mut record = create_duplex_record_with_per_base_tags();
1855
1856 record.data_mut().insert(
1858 Tag::from([b'b', b'd']),
1859 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt16(
1860 vec![4, 4, 4, 1, 1, 4, 4, 4], )),
1862 );
1863
1864 let cc_thresholds =
1865 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.15 };
1866 let ab_thresholds =
1867 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.25 };
1868 let ba_thresholds =
1869 FilterThresholds { min_reads: 2, max_read_error_rate: 0.15, max_base_error_rate: 0.3 };
1870
1871 mask_duplex_bases(
1872 &mut record,
1873 &cc_thresholds,
1874 &ab_thresholds,
1875 &ba_thresholds,
1876 Some(13),
1877 false,
1878 )
1879 .unwrap();
1880
1881 let seq = record.sequence();
1883 assert_eq!(seq.as_ref(), b"ACGNNCGT");
1884 }
1885
1886 #[test]
1887 fn test_mask_duplex_bases_high_ab_error() {
1888 let mut record = create_duplex_record_with_per_base_tags();
1889
1890 record.data_mut().insert(
1893 Tag::from([b'a', b'e']),
1894 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt8(
1895 vec![0, 0, 0, 0, 0, 2, 0, 0], )),
1897 );
1898
1899 let cc_thresholds =
1900 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.15 };
1901 let ab_thresholds =
1902 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.25 };
1903 let ba_thresholds =
1904 FilterThresholds { min_reads: 2, max_read_error_rate: 0.15, max_base_error_rate: 0.3 };
1905
1906 mask_duplex_bases(
1907 &mut record,
1908 &cc_thresholds,
1909 &ab_thresholds,
1910 &ba_thresholds,
1911 Some(13),
1912 false,
1913 )
1914 .unwrap();
1915
1916 let seq = record.sequence();
1918 assert_eq!(seq.as_ref(), b"ACGTANGT");
1919 }
1920
1921 #[test]
1922 fn test_mask_duplex_bases_high_ba_error() {
1923 let mut record = create_duplex_record_with_per_base_tags();
1924
1925 record.data_mut().insert(
1928 Tag::from([b'b', b'e']),
1929 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt8(
1930 vec![1, 1, 1, 1, 1, 1, 2, 2], )),
1932 );
1933
1934 let cc_thresholds =
1935 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.15 };
1936 let ab_thresholds =
1937 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.25 };
1938 let ba_thresholds =
1939 FilterThresholds { min_reads: 2, max_read_error_rate: 0.15, max_base_error_rate: 0.3 };
1940
1941 mask_duplex_bases(
1942 &mut record,
1943 &cc_thresholds,
1944 &ab_thresholds,
1945 &ba_thresholds,
1946 Some(13),
1947 false,
1948 )
1949 .unwrap();
1950
1951 let seq = record.sequence();
1953 assert_eq!(seq.as_ref(), b"ACGTACNN");
1954 }
1955
1956 #[test]
1957 fn test_mask_duplex_bases_low_total_depth() {
1958 let mut record = create_duplex_record_with_per_base_tags();
1959
1960 record.data_mut().insert(
1963 Tag::from([b'a', b'd']),
1964 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt16(
1965 vec![6, 2, 6, 6, 6, 6, 6, 6],
1966 )),
1967 );
1968 record.data_mut().insert(
1969 Tag::from([b'b', b'd']),
1970 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt16(
1971 vec![4, 1, 4, 4, 4, 4, 4, 4],
1972 )),
1973 );
1974
1975 let cc_thresholds =
1976 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.15 };
1977 let ab_thresholds =
1978 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.25 };
1979 let ba_thresholds =
1980 FilterThresholds { min_reads: 2, max_read_error_rate: 0.15, max_base_error_rate: 0.3 };
1981
1982 mask_duplex_bases(
1983 &mut record,
1984 &cc_thresholds,
1985 &ab_thresholds,
1986 &ba_thresholds,
1987 Some(13),
1988 false,
1989 )
1990 .unwrap();
1991
1992 let seq = record.sequence();
1994 assert_eq!(seq.as_ref(), b"ANGTACGT");
1995 }
1996
1997 #[test]
1998 fn test_mask_duplex_bases_high_total_error() {
1999 let mut record = create_duplex_record_with_per_base_tags();
2000
2001 record.data_mut().insert(
2004 Tag::from([b'a', b'e']),
2005 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt8(
2006 vec![1, 0, 0, 0, 0, 0, 0, 0],
2007 )),
2008 );
2009 record.data_mut().insert(
2010 Tag::from([b'b', b'e']),
2011 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt8(
2012 vec![1, 1, 1, 1, 1, 1, 1, 1],
2013 )),
2014 );
2015
2016 let cc_thresholds =
2017 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.15 };
2018 let ab_thresholds =
2019 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.25 };
2020 let ba_thresholds =
2021 FilterThresholds { min_reads: 2, max_read_error_rate: 0.15, max_base_error_rate: 0.3 };
2022
2023 mask_duplex_bases(
2024 &mut record,
2025 &cc_thresholds,
2026 &ab_thresholds,
2027 &ba_thresholds,
2028 Some(13),
2029 false,
2030 )
2031 .unwrap();
2032
2033 let seq = record.sequence();
2035 assert_eq!(seq.as_ref(), b"NCGTACGT");
2036 }
2037
2038 #[test]
2039 fn test_mask_duplex_bases_multiple_failures() {
2040 let mut record = create_duplex_record_with_per_base_tags();
2041
2042 record.data_mut().insert(
2045 Tag::from([b'a', b'd']),
2046 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt16(
2047 vec![2, 6, 6, 6, 6, 6, 6, 6],
2048 )),
2049 );
2050 record.data_mut().insert(
2052 Tag::from([b'b', b'd']),
2053 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt16(
2054 vec![4, 1, 4, 4, 4, 4, 4, 4],
2055 )),
2056 );
2057 record.data_mut().insert(
2059 Tag::from([b'a', b'e']),
2060 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt8(
2061 vec![0, 0, 2, 0, 0, 0, 0, 0],
2062 )),
2063 );
2064
2065 let cc_thresholds =
2066 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.15 };
2067 let ab_thresholds =
2068 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.25 };
2069 let ba_thresholds =
2070 FilterThresholds { min_reads: 2, max_read_error_rate: 0.15, max_base_error_rate: 0.3 };
2071
2072 mask_duplex_bases(
2073 &mut record,
2074 &cc_thresholds,
2075 &ab_thresholds,
2076 &ba_thresholds,
2077 Some(13),
2078 false,
2079 )
2080 .unwrap();
2081
2082 let seq = record.sequence();
2084 assert_eq!(seq.as_ref(), b"NNNTACGT");
2085 }
2086
2087 #[test]
2090 fn test_mask_duplex_bases_single_strand_agreement_pass() {
2091 let mut record = create_duplex_record_with_per_base_tags();
2092
2093 record.data_mut().insert(
2095 Tag::from([b'a', b'c']),
2096 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt8(
2097 vec![b'A', b'C', b'G', b'T', b'A', b'C', b'G', b'T'],
2098 )),
2099 );
2100 record.data_mut().insert(
2101 Tag::from([b'b', b'c']),
2102 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt8(
2103 vec![b'A', b'C', b'G', b'T', b'A', b'C', b'G', b'T'],
2104 )),
2105 );
2106
2107 let cc_thresholds =
2108 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.15 };
2109 let ab_thresholds =
2110 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.25 };
2111 let ba_thresholds =
2112 FilterThresholds { min_reads: 2, max_read_error_rate: 0.15, max_base_error_rate: 0.3 };
2113
2114 mask_duplex_bases(
2115 &mut record,
2116 &cc_thresholds,
2117 &ab_thresholds,
2118 &ba_thresholds,
2119 Some(13),
2120 true,
2121 )
2122 .unwrap();
2123
2124 let seq = record.sequence();
2126 assert_eq!(seq.as_ref(), b"ACGTACGT");
2127 }
2128
2129 #[test]
2130 fn test_mask_duplex_bases_single_strand_agreement_fail() {
2131 let mut record = create_duplex_record_with_per_base_tags();
2132
2133 record.data_mut().insert(
2135 Tag::from([b'a', b'c']),
2136 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt8(
2137 vec![b'A', b'C', b'G', b'T', b'A', b'C', b'G', b'T'],
2138 )),
2139 );
2140 record.data_mut().insert(
2141 Tag::from([b'b', b'c']),
2142 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt8(
2143 vec![b'A', b'C', b'T', b'T', b'A', b'G', b'G', b'A'], )),
2145 );
2146
2147 let cc_thresholds =
2148 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.15 };
2149 let ab_thresholds =
2150 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.25 };
2151 let ba_thresholds =
2152 FilterThresholds { min_reads: 2, max_read_error_rate: 0.15, max_base_error_rate: 0.3 };
2153
2154 mask_duplex_bases(
2155 &mut record,
2156 &cc_thresholds,
2157 &ab_thresholds,
2158 &ba_thresholds,
2159 Some(13),
2160 true,
2161 )
2162 .unwrap();
2163
2164 let seq = record.sequence();
2166 assert_eq!(seq.as_ref(), b"ACNTANGN");
2167 }
2168
2169 #[test]
2170 fn test_mask_duplex_bases_single_strand_agreement_disabled() {
2171 let mut record = create_duplex_record_with_per_base_tags();
2172
2173 record.data_mut().insert(
2175 Tag::from([b'a', b'c']),
2176 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt8(
2177 vec![b'A', b'C', b'G', b'T', b'A', b'C', b'G', b'T'],
2178 )),
2179 );
2180 record.data_mut().insert(
2181 Tag::from([b'b', b'c']),
2182 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt8(
2183 vec![b'A', b'C', b'T', b'T', b'A', b'G', b'G', b'A'], )),
2185 );
2186
2187 let cc_thresholds =
2188 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.15 };
2189 let ab_thresholds =
2190 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.25 };
2191 let ba_thresholds =
2192 FilterThresholds { min_reads: 2, max_read_error_rate: 0.15, max_base_error_rate: 0.3 };
2193
2194 mask_duplex_bases(
2196 &mut record,
2197 &cc_thresholds,
2198 &ab_thresholds,
2199 &ba_thresholds,
2200 Some(13),
2201 false,
2202 )
2203 .unwrap();
2204
2205 let seq = record.sequence();
2207 assert_eq!(seq.as_ref(), b"ACGTACGT");
2208 }
2209
2210 #[test]
2211 fn test_mask_duplex_bases_combined_failures_with_agreement() {
2212 let mut record = create_duplex_record_with_per_base_tags();
2213
2214 record.data_mut().insert(
2216 Tag::from([b'a', b'd']),
2217 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt16(
2218 vec![2, 6, 6, 6, 6, 6, 6, 6],
2219 )),
2220 );
2221
2222 record.data_mut().insert(
2224 Tag::from([b'a', b'c']),
2225 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt8(
2226 vec![b'A', b'C', b'G', b'T', b'A', b'C', b'G', b'T'],
2227 )),
2228 );
2229 record.data_mut().insert(
2230 Tag::from([b'b', b'c']),
2231 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt8(
2232 vec![b'A', b'C', b'G', b'A', b'A', b'G', b'G', b'T'], )),
2234 );
2235
2236 let cc_thresholds =
2237 FilterThresholds { min_reads: 5, max_read_error_rate: 0.05, max_base_error_rate: 0.15 };
2238 let ab_thresholds =
2239 FilterThresholds { min_reads: 3, max_read_error_rate: 0.1, max_base_error_rate: 0.25 };
2240 let ba_thresholds =
2241 FilterThresholds { min_reads: 2, max_read_error_rate: 0.15, max_base_error_rate: 0.3 };
2242
2243 mask_duplex_bases(
2244 &mut record,
2245 &cc_thresholds,
2246 &ab_thresholds,
2247 &ba_thresholds,
2248 Some(13),
2249 true,
2250 )
2251 .unwrap();
2252
2253 let seq = record.sequence();
2255 assert_eq!(seq.as_ref(), b"NCGNANGT");
2256 }
2257
2258 #[test]
2259 fn test_extract_per_base_array_int16() {
2260 use noodles::sam::alignment::record_buf::data::field::value::Array;
2262
2263 let mut data = noodles::sam::alignment::record_buf::Data::default();
2264 let tag = Tag::from([b'c', b'D']);
2265 data.insert(tag, Value::Array(Array::Int16(vec![5, 10, -1, 15, 0])));
2266
2267 let result = extract_per_base_array(&data, "cD", 5);
2268 assert_eq!(result, vec![5, 10, 0, 15, 0]);
2270 }
2271
2272 #[test]
2273 fn test_masked_base_quality_phred_2() {
2274 let mut record = RecordBuilder::new().sequence("ACGT").qualities(&[30, 30, 30, 30]).build();
2276
2277 record.data_mut().insert(
2279 Tag::from([b'c', b'd']),
2280 Value::Array(noodles::sam::alignment::record_buf::data::field::value::Array::UInt16(
2281 vec![1, 10, 1, 10],
2282 )),
2283 );
2284
2285 let thresholds =
2286 FilterThresholds { min_reads: 5, max_read_error_rate: 1.0, max_base_error_rate: 1.0 };
2287
2288 mask_bases(&mut record, &thresholds, Some(10)).unwrap();
2289
2290 let seq: Vec<u8> = record.sequence().iter().collect();
2292 let quals: Vec<u8> = record.quality_scores().iter().collect();
2293 assert_eq!(seq, b"NCNT");
2294 assert_eq!(
2295 quals,
2296 vec![2, 30, 2, 30],
2297 "Masked bases should have quality=2 (Phred MIN_VALUE)"
2298 );
2299 }
2300
2301 #[test]
2302 fn test_error_rate_f64_comparison() {
2303 let thresholds = FilterThresholds {
2306 min_reads: 1,
2307 max_read_error_rate: 0.1, max_base_error_rate: 0.15, };
2310
2311 let error_rate: f32 = 0.099;
2313 assert!(
2314 f64::from(error_rate) <= thresholds.max_read_error_rate,
2315 "f32 -> f64 comparison should work correctly"
2316 );
2317
2318 let error_rate_high: f32 = 0.101;
2320 assert!(
2321 f64::from(error_rate_high) > thresholds.max_read_error_rate,
2322 "f32 -> f64 comparison should catch values over threshold"
2323 );
2324 }
2325
2326 #[test]
2327 fn test_ac_bc_string_tag_handling() {
2328 let mut record = RecordBuilder::new()
2331 .sequence("ACGT")
2332 .qualities(&[30, 30, 30, 30])
2333 .tag("ac", "ACGT")
2334 .tag("bc", "ACGT")
2335 .build();
2336
2337 let cc_thresholds =
2340 FilterThresholds { min_reads: 1, max_read_error_rate: 1.0, max_base_error_rate: 1.0 };
2341 let ab_thresholds =
2342 FilterThresholds { min_reads: 1, max_read_error_rate: 1.0, max_base_error_rate: 1.0 };
2343 let ba_thresholds =
2344 FilterThresholds { min_reads: 1, max_read_error_rate: 1.0, max_base_error_rate: 1.0 };
2345
2346 let result = mask_duplex_bases(
2348 &mut record,
2349 &cc_thresholds,
2350 &ab_thresholds,
2351 &ba_thresholds,
2352 Some(10),
2353 true,
2354 );
2355 assert!(result.is_ok());
2356 }
2357
2358 #[test]
2361 fn test_find_string_or_uint8_array_z_tag() {
2362 let mut aux = Vec::new();
2364 aux.extend_from_slice(b"ac"); aux.push(b'Z'); aux.extend_from_slice(b"ACGT\0"); let result = super::find_string_or_uint8_array(&aux, *b"ac");
2369 assert_eq!(result, Some(b"ACGT".to_vec()));
2370 }
2371
2372 #[test]
2373 fn test_find_string_or_uint8_array_b_uint8_tag() {
2374 let mut aux = Vec::new();
2376 aux.extend_from_slice(b"ac"); aux.push(b'B'); aux.push(b'C'); aux.extend_from_slice(&4u32.to_le_bytes()); aux.extend_from_slice(&[65u8, 67, 71, 84]); let result = super::find_string_or_uint8_array(&aux, *b"ac");
2383 assert_eq!(result, Some(vec![65u8, 67, 71, 84]));
2384 }
2385
2386 #[test]
2387 fn test_find_string_or_uint8_array_missing_tag() {
2388 let aux: Vec<u8> = Vec::new();
2389 let result = super::find_string_or_uint8_array(&aux, *b"ac");
2390 assert!(result.is_none());
2391 }
2392
2393 #[test]
2394 fn test_find_string_or_uint8_array_wrong_array_type() {
2395 let mut aux = Vec::new();
2397 aux.extend_from_slice(b"ac"); aux.push(b'B'); aux.push(b's'); aux.extend_from_slice(&2u32.to_le_bytes()); aux.extend_from_slice(&1i16.to_le_bytes());
2402 aux.extend_from_slice(&2i16.to_le_bytes());
2403
2404 let result = super::find_string_or_uint8_array(&aux, *b"ac");
2405 assert!(result.is_none());
2406 }
2407}