use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use crate::rejection::RejectionReason;
use crate::{Metric, format_float, frac_u64};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConsensusKvMetric {
pub key: String,
pub value: String,
pub description: String,
}
impl ConsensusKvMetric {
#[must_use]
pub fn new(key: impl Into<String>, value: String, description: impl Into<String>) -> Self {
Self { key: key.into(), value, description: description.into() }
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConsensusMetrics {
pub total_input_reads: u64,
pub consensus_reads: u64,
pub filtered_reads: u64,
pub total_umi_groups: u64,
pub umi_groups_with_consensus: u64,
pub umi_groups_failed: u64,
pub avg_input_reads_per_consensus: f64,
pub avg_raw_read_depth: f64,
pub min_raw_read_depth: u64,
pub max_raw_read_depth: u64,
pub rejected_insufficient_support: u64,
pub rejected_minority_alignment: u64,
pub rejected_insufficient_strand_support: u64,
pub rejected_low_base_quality: u64,
pub rejected_excessive_n_bases: u64,
pub rejected_no_valid_alignment: u64,
pub rejected_low_mapping_quality: u64,
pub rejected_n_bases_in_umi: u64,
pub rejected_missing_umi: u64,
pub rejected_not_passing_filter: u64,
pub rejected_low_mean_quality: u64,
pub rejected_insufficient_min_depth: u64,
pub rejected_excessive_error_rate: u64,
pub rejected_umi_too_short: u64,
pub rejected_same_strand_only: u64,
pub rejected_duplicate_umi: u64,
pub rejected_orphan_consensus: u64,
pub rejected_zero_bases_post_trimming: u64,
}
const CORE_REJECTIONS: [RejectionReason; 4] = [
RejectionReason::InsufficientSupport,
RejectionReason::MinorityAlignment,
RejectionReason::OrphanConsensus,
RejectionReason::ZeroBasesPostTrimming,
];
const OPTIONAL_REJECTIONS: [RejectionReason; 14] = [
RejectionReason::InsufficientStrandSupport,
RejectionReason::LowBaseQuality,
RejectionReason::ExcessiveNBases,
RejectionReason::NoValidAlignment,
RejectionReason::LowMappingQuality,
RejectionReason::NBasesInUmi,
RejectionReason::MissingUmi,
RejectionReason::NotPassingFilter,
RejectionReason::LowMeanQuality,
RejectionReason::InsufficientMinDepth,
RejectionReason::ExcessiveErrorRate,
RejectionReason::UmiTooShort,
RejectionReason::SameStrandOnly,
RejectionReason::DuplicateUmi,
];
fn all_rejections() -> impl Iterator<Item = RejectionReason> {
CORE_REJECTIONS.into_iter().chain(OPTIONAL_REJECTIONS)
}
impl ConsensusMetrics {
#[must_use]
pub fn new() -> Self {
Self {
total_input_reads: 0,
consensus_reads: 0,
filtered_reads: 0,
total_umi_groups: 0,
umi_groups_with_consensus: 0,
umi_groups_failed: 0,
avg_input_reads_per_consensus: 0.0,
avg_raw_read_depth: 0.0,
min_raw_read_depth: 0,
max_raw_read_depth: 0,
rejected_insufficient_support: 0,
rejected_minority_alignment: 0,
rejected_insufficient_strand_support: 0,
rejected_low_base_quality: 0,
rejected_excessive_n_bases: 0,
rejected_no_valid_alignment: 0,
rejected_low_mapping_quality: 0,
rejected_n_bases_in_umi: 0,
rejected_missing_umi: 0,
rejected_not_passing_filter: 0,
rejected_low_mean_quality: 0,
rejected_insufficient_min_depth: 0,
rejected_excessive_error_rate: 0,
rejected_umi_too_short: 0,
rejected_same_strand_only: 0,
rejected_duplicate_umi: 0,
rejected_orphan_consensus: 0,
rejected_zero_bases_post_trimming: 0,
}
}
#[must_use]
fn rejection_count(&self, reason: RejectionReason) -> u64 {
match reason {
RejectionReason::InsufficientSupport => self.rejected_insufficient_support,
RejectionReason::MinorityAlignment => self.rejected_minority_alignment,
RejectionReason::InsufficientStrandSupport => self.rejected_insufficient_strand_support,
RejectionReason::LowBaseQuality => self.rejected_low_base_quality,
RejectionReason::ExcessiveNBases => self.rejected_excessive_n_bases,
RejectionReason::NoValidAlignment => self.rejected_no_valid_alignment,
RejectionReason::LowMappingQuality => self.rejected_low_mapping_quality,
RejectionReason::NBasesInUmi => self.rejected_n_bases_in_umi,
RejectionReason::MissingUmi => self.rejected_missing_umi,
RejectionReason::NotPassingFilter => self.rejected_not_passing_filter,
RejectionReason::LowMeanQuality => self.rejected_low_mean_quality,
RejectionReason::InsufficientMinDepth => self.rejected_insufficient_min_depth,
RejectionReason::ExcessiveErrorRate => self.rejected_excessive_error_rate,
RejectionReason::UmiTooShort => self.rejected_umi_too_short,
RejectionReason::SameStrandOnly => self.rejected_same_strand_only,
RejectionReason::DuplicateUmi => self.rejected_duplicate_umi,
RejectionReason::OrphanConsensus => self.rejected_orphan_consensus,
RejectionReason::ZeroBasesPostTrimming => self.rejected_zero_bases_post_trimming,
}
}
pub fn add_rejection(&mut self, reason: RejectionReason, count: u64) {
match reason {
RejectionReason::InsufficientSupport => self.rejected_insufficient_support += count,
RejectionReason::MinorityAlignment => self.rejected_minority_alignment += count,
RejectionReason::InsufficientStrandSupport => {
self.rejected_insufficient_strand_support += count;
}
RejectionReason::LowBaseQuality => self.rejected_low_base_quality += count,
RejectionReason::ExcessiveNBases => self.rejected_excessive_n_bases += count,
RejectionReason::NoValidAlignment => self.rejected_no_valid_alignment += count,
RejectionReason::LowMappingQuality => self.rejected_low_mapping_quality += count,
RejectionReason::NBasesInUmi => self.rejected_n_bases_in_umi += count,
RejectionReason::MissingUmi => self.rejected_missing_umi += count,
RejectionReason::NotPassingFilter => self.rejected_not_passing_filter += count,
RejectionReason::LowMeanQuality => self.rejected_low_mean_quality += count,
RejectionReason::InsufficientMinDepth => self.rejected_insufficient_min_depth += count,
RejectionReason::ExcessiveErrorRate => self.rejected_excessive_error_rate += count,
RejectionReason::UmiTooShort => self.rejected_umi_too_short += count,
RejectionReason::SameStrandOnly => self.rejected_same_strand_only += count,
RejectionReason::DuplicateUmi => self.rejected_duplicate_umi += count,
RejectionReason::OrphanConsensus => self.rejected_orphan_consensus += count,
RejectionReason::ZeroBasesPostTrimming => {
self.rejected_zero_bases_post_trimming += count;
}
}
}
#[must_use]
pub fn total_rejections(&self) -> u64 {
all_rejections().map(|r| self.rejection_count(r)).sum()
}
#[must_use]
pub fn rejection_summary(&self) -> HashMap<RejectionReason, u64> {
all_rejections()
.filter_map(|reason| {
let count = self.rejection_count(reason);
if count > 0 { Some((reason, count)) } else { None }
})
.collect()
}
#[must_use]
pub fn to_kv_metrics(&self) -> Vec<ConsensusKvMetric> {
let mut metrics = Vec::new();
let raw_reads_used = self.total_input_reads.saturating_sub(self.filtered_reads);
let frac_used = frac_u64(raw_reads_used, self.total_input_reads);
metrics.push(ConsensusKvMetric::new(
"raw_reads_considered",
self.total_input_reads.to_string(),
"Total raw reads considered from input file",
));
metrics.push(ConsensusKvMetric::new(
"raw_reads_rejected",
self.filtered_reads.to_string(),
"Total number of raw reads rejected before consensus calling",
));
metrics.push(ConsensusKvMetric::new(
"raw_reads_used",
raw_reads_used.to_string(),
"Total count of raw reads used in consensus reads",
));
metrics.push(ConsensusKvMetric::new(
"frac_raw_reads_used",
format_float(frac_used),
"Fraction of raw reads used in consensus reads",
));
for reason in &CORE_REJECTIONS {
metrics.push(ConsensusKvMetric::new(
reason.tsv_key(),
self.rejection_count(*reason).to_string(),
reason.kv_description(),
));
}
self.push_optional_rejections(&mut metrics);
metrics.push(ConsensusKvMetric::new(
"consensus_reads_emitted",
self.consensus_reads.to_string(),
"Total number of consensus reads (R1+R2=2) emitted.",
));
metrics
}
fn push_optional_rejections(&self, metrics: &mut Vec<ConsensusKvMetric>) {
for reason in &OPTIONAL_REJECTIONS {
let count = self.rejection_count(*reason);
if count > 0 {
metrics.push(ConsensusKvMetric::new(
reason.tsv_key(),
count.to_string(),
reason.kv_description(),
));
}
}
}
}
impl Default for ConsensusMetrics {
fn default() -> Self {
Self::new()
}
}
impl Metric for ConsensusMetrics {
fn metric_name() -> &'static str {
"consensus"
}
}
impl crate::ProcessingMetrics for ConsensusMetrics {
fn total_input(&self) -> u64 {
self.total_input_reads
}
fn total_output(&self) -> u64 {
self.consensus_reads
}
fn total_filtered(&self) -> u64 {
self.filtered_reads
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_consensus_metrics_new() {
let metrics = ConsensusMetrics::new();
assert_eq!(metrics.total_input_reads, 0);
assert_eq!(metrics.consensus_reads, 0);
assert_eq!(metrics.filtered_reads, 0);
}
#[test]
fn test_consensus_metrics_rejection_summary() {
let mut metrics = ConsensusMetrics::new();
metrics.rejected_insufficient_support = 10;
metrics.rejected_low_base_quality = 5;
metrics.rejected_excessive_n_bases = 3;
let summary = metrics.rejection_summary();
assert_eq!(summary.len(), 3);
assert_eq!(summary.get(&RejectionReason::InsufficientSupport), Some(&10));
assert_eq!(summary.get(&RejectionReason::LowBaseQuality), Some(&5));
assert_eq!(summary.get(&RejectionReason::ExcessiveNBases), Some(&3));
}
#[test]
fn test_consensus_metrics_total_rejections() {
let mut metrics = ConsensusMetrics::new();
metrics.rejected_insufficient_support = 1;
metrics.rejected_minority_alignment = 2;
metrics.rejected_low_base_quality = 3;
metrics.rejected_excessive_n_bases = 4;
assert_eq!(metrics.total_rejections(), 10);
}
#[test]
fn test_consensus_metrics_default() {
let metrics = ConsensusMetrics::default();
assert_eq!(metrics.total_input_reads, 0);
assert_eq!(metrics.consensus_reads, 0);
}
#[test]
fn test_consensus_metrics_rejection_summary_all_types() {
let mut metrics = ConsensusMetrics::new();
metrics.rejected_insufficient_support = 1;
metrics.rejected_minority_alignment = 2;
metrics.rejected_insufficient_strand_support = 3;
metrics.rejected_low_base_quality = 4;
metrics.rejected_excessive_n_bases = 5;
metrics.rejected_no_valid_alignment = 6;
metrics.rejected_low_mapping_quality = 7;
metrics.rejected_n_bases_in_umi = 8;
metrics.rejected_not_passing_filter = 9;
metrics.rejected_low_mean_quality = 10;
metrics.rejected_insufficient_min_depth = 11;
metrics.rejected_excessive_error_rate = 12;
metrics.rejected_umi_too_short = 13;
metrics.rejected_same_strand_only = 14;
metrics.rejected_duplicate_umi = 15;
let summary = metrics.rejection_summary();
assert_eq!(summary.len(), 15);
assert_eq!(summary.get(&RejectionReason::InsufficientSupport), Some(&1));
assert_eq!(summary.get(&RejectionReason::MinorityAlignment), Some(&2));
assert_eq!(summary.get(&RejectionReason::InsufficientStrandSupport), Some(&3));
assert_eq!(summary.get(&RejectionReason::LowBaseQuality), Some(&4));
assert_eq!(summary.get(&RejectionReason::ExcessiveNBases), Some(&5));
assert_eq!(summary.get(&RejectionReason::NoValidAlignment), Some(&6));
assert_eq!(summary.get(&RejectionReason::LowMappingQuality), Some(&7));
assert_eq!(summary.get(&RejectionReason::NBasesInUmi), Some(&8));
assert_eq!(summary.get(&RejectionReason::NotPassingFilter), Some(&9));
assert_eq!(summary.get(&RejectionReason::LowMeanQuality), Some(&10));
assert_eq!(summary.get(&RejectionReason::InsufficientMinDepth), Some(&11));
assert_eq!(summary.get(&RejectionReason::ExcessiveErrorRate), Some(&12));
assert_eq!(summary.get(&RejectionReason::UmiTooShort), Some(&13));
assert_eq!(summary.get(&RejectionReason::SameStrandOnly), Some(&14));
assert_eq!(summary.get(&RejectionReason::DuplicateUmi), Some(&15));
}
#[test]
fn test_consensus_metrics_rejection_summary_empty() {
let metrics = ConsensusMetrics::new();
let summary = metrics.rejection_summary();
assert!(summary.is_empty());
}
#[test]
fn test_consensus_metrics_total_rejections_all() {
let mut metrics = ConsensusMetrics::new();
metrics.rejected_insufficient_support = 1;
metrics.rejected_minority_alignment = 1;
metrics.rejected_insufficient_strand_support = 1;
metrics.rejected_low_base_quality = 1;
metrics.rejected_excessive_n_bases = 1;
metrics.rejected_no_valid_alignment = 1;
metrics.rejected_low_mapping_quality = 1;
metrics.rejected_n_bases_in_umi = 1;
metrics.rejected_not_passing_filter = 1;
metrics.rejected_low_mean_quality = 1;
metrics.rejected_insufficient_min_depth = 1;
metrics.rejected_excessive_error_rate = 1;
metrics.rejected_umi_too_short = 1;
metrics.rejected_same_strand_only = 1;
metrics.rejected_duplicate_umi = 1;
assert_eq!(metrics.total_rejections(), 15);
}
#[test]
fn test_metric_trait_impl() {
assert_eq!(ConsensusMetrics::metric_name(), "consensus");
}
#[test]
fn test_to_kv_metrics_basic() {
let mut metrics = ConsensusMetrics::new();
metrics.total_input_reads = 1000;
metrics.filtered_reads = 100;
metrics.consensus_reads = 450;
metrics.rejected_insufficient_support = 50;
metrics.rejected_minority_alignment = 50;
let kv_metrics = metrics.to_kv_metrics();
let raw_reads = kv_metrics.iter().find(|m| m.key == "raw_reads_considered");
assert!(raw_reads.is_some());
assert_eq!(raw_reads.expect("raw_reads_considered metric should be present").value, "1000");
assert_eq!(
raw_reads.expect("raw_reads_considered metric should be present").description,
"Total raw reads considered from input file"
);
let rejected = kv_metrics.iter().find(|m| m.key == "raw_reads_rejected");
assert!(rejected.is_some());
assert_eq!(rejected.expect("raw_reads_rejected metric should be present").value, "100");
let used = kv_metrics.iter().find(|m| m.key == "raw_reads_used");
assert!(used.is_some());
assert_eq!(used.expect("raw_reads_used metric should be present").value, "900");
let frac = kv_metrics.iter().find(|m| m.key == "frac_raw_reads_used");
assert!(frac.is_some());
assert_eq!(frac.expect("frac_raw_reads_used metric should be present").value, "0.900000");
let consensus = kv_metrics.iter().find(|m| m.key == "consensus_reads_emitted");
assert!(consensus.is_some());
assert_eq!(
consensus.expect("consensus_reads_emitted metric should be present").value,
"450"
);
let insuff =
kv_metrics.iter().find(|m| m.key == "raw_reads_rejected_for_insufficient_support");
assert!(insuff.is_some());
assert_eq!(
insuff.expect("insufficient_support rejection metric should be present").value,
"50"
);
let minority =
kv_metrics.iter().find(|m| m.key == "raw_reads_rejected_for_minority_alignment");
assert!(minority.is_some());
assert_eq!(
minority.expect("minority_alignment rejection metric should be present").value,
"50"
);
}
#[test]
fn test_to_kv_metrics_zero_reads() {
let metrics = ConsensusMetrics::new();
let kv_metrics = metrics.to_kv_metrics();
let frac = kv_metrics.iter().find(|m| m.key == "frac_raw_reads_used");
assert!(frac.is_some());
assert_eq!(frac.expect("frac_raw_reads_used metric should be present").value, "0.000000");
}
#[test]
fn test_to_kv_metrics_only_nonzero_optional_rejections() {
let mut metrics = ConsensusMetrics::new();
metrics.total_input_reads = 100;
metrics.rejected_low_base_quality = 5;
let kv_metrics = metrics.to_kv_metrics();
let lbq = kv_metrics.iter().find(|m| m.key == "raw_reads_rejected_for_low_base_quality");
assert!(lbq.is_some());
assert_eq!(lbq.expect("low_base_quality rejection metric should be present").value, "5");
let en = kv_metrics.iter().find(|m| m.key == "raw_reads_rejected_for_excessive_n_bases");
assert!(en.is_none());
}
#[test]
fn test_consensus_kv_metric_new() {
let kv = ConsensusKvMetric::new("test_key", 42u64.to_string(), "A test metric");
assert_eq!(kv.key, "test_key");
assert_eq!(kv.value, "42");
assert_eq!(kv.description, "A test metric");
}
}