use serde::{Deserialize, Serialize};
use crate::Metric;
#[allow(clippy::cast_precision_loss)]
fn build_size_distribution<T>(
counts: impl IntoIterator<Item = (usize, u64)>,
ctor: impl Fn(usize, u64, f64, f64) -> T,
) -> Vec<T> {
let mut sorted: Vec<_> = counts.into_iter().collect();
sorted.sort_by_key(|(size, _)| *size);
let total: f64 = sorted.iter().map(|(_, count)| *count as f64).sum();
if total == 0.0 {
return Vec::new();
}
let mut metrics = Vec::with_capacity(sorted.len());
let mut cumulative = 0.0;
for &(size, count) in sorted.iter().rev() {
let fraction = count as f64 / total;
cumulative += fraction;
metrics.push(ctor(size, count, fraction, cumulative));
}
metrics.reverse();
metrics
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct UmiGroupingMetrics {
pub total_records: u64,
pub accepted_records: u64,
pub discarded_non_pf: u64,
pub discarded_poor_alignment: u64,
pub discarded_ns_in_umi: u64,
pub discarded_umi_too_short: u64,
pub unique_molecule_ids: u64,
pub total_families: u64,
pub avg_reads_per_molecule: f64,
pub median_reads_per_molecule: u64,
pub min_reads_per_molecule: u64,
pub max_reads_per_molecule: u64,
}
impl UmiGroupingMetrics {
#[must_use]
pub fn new() -> Self {
Self::default()
}
}
impl Metric for UmiGroupingMetrics {
fn metric_name() -> &'static str {
"UMI grouping"
}
}
impl crate::ProcessingMetrics for UmiGroupingMetrics {
fn total_input(&self) -> u64 {
self.total_records
}
fn total_output(&self) -> u64 {
self.accepted_records
}
fn total_filtered(&self) -> u64 {
self.discarded_non_pf
+ self.discarded_poor_alignment
+ self.discarded_ns_in_umi
+ self.discarded_umi_too_short
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct FamilySizeMetrics {
pub family_size: usize,
pub count: u64,
pub fraction: f64,
pub fraction_gt_or_eq_family_size: f64,
}
impl FamilySizeMetrics {
#[must_use]
pub fn new(family_size: usize) -> Self {
Self { family_size, count: 0, fraction: 0.0, fraction_gt_or_eq_family_size: 0.0 }
}
#[must_use]
pub fn from_size_counts(counts: impl IntoIterator<Item = (usize, u64)>) -> Vec<Self> {
build_size_distribution(counts, |size, count, fraction, cumulative| Self {
family_size: size,
count,
fraction,
fraction_gt_or_eq_family_size: cumulative,
})
}
}
impl Default for FamilySizeMetrics {
fn default() -> Self {
Self::new(0)
}
}
impl Metric for FamilySizeMetrics {
fn metric_name() -> &'static str {
"family size"
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct PositionGroupSizeMetrics {
pub position_group_size: usize,
pub count: u64,
pub fraction: f64,
pub fraction_gt_or_eq_position_group_size: f64,
}
impl PositionGroupSizeMetrics {
#[must_use]
pub fn new(position_group_size: usize) -> Self {
Self {
position_group_size,
count: 0,
fraction: 0.0,
fraction_gt_or_eq_position_group_size: 0.0,
}
}
#[must_use]
pub fn from_size_counts(counts: impl IntoIterator<Item = (usize, u64)>) -> Vec<Self> {
build_size_distribution(counts, |size, count, fraction, cumulative| Self {
position_group_size: size,
count,
fraction,
fraction_gt_or_eq_position_group_size: cumulative,
})
}
}
impl Default for PositionGroupSizeMetrics {
fn default() -> Self {
Self::new(0)
}
}
impl Metric for PositionGroupSizeMetrics {
fn metric_name() -> &'static str {
"position group size"
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_umi_grouping_metrics_new() {
let metrics = UmiGroupingMetrics::new();
assert_eq!(metrics.total_records, 0);
assert_eq!(metrics.accepted_records, 0);
assert_eq!(metrics.unique_molecule_ids, 0);
}
#[test]
fn test_umi_grouping_metrics_default() {
let metrics = UmiGroupingMetrics::default();
assert_eq!(metrics.total_records, 0);
assert_eq!(metrics.accepted_records, 0);
}
#[test]
fn test_family_size_metrics_new() {
let metrics = FamilySizeMetrics::new(5);
assert_eq!(metrics.family_size, 5);
assert_eq!(metrics.count, 0);
assert!(metrics.fraction.abs() < f64::EPSILON);
}
#[test]
fn test_metric_trait_impl() {
assert_eq!(UmiGroupingMetrics::metric_name(), "UMI grouping");
assert_eq!(FamilySizeMetrics::metric_name(), "family size");
}
#[test]
fn test_from_size_counts() {
let counts = vec![(3, 1u64), (1, 1), (2, 1)];
let metrics = FamilySizeMetrics::from_size_counts(counts);
assert_eq!(metrics.len(), 3);
assert_eq!(metrics[0].family_size, 1);
assert_eq!(metrics[1].family_size, 2);
assert_eq!(metrics[2].family_size, 3);
assert!((metrics[0].fraction - 1.0 / 3.0).abs() < 1e-10);
assert!((metrics[0].fraction_gt_or_eq_family_size - 1.0).abs() < 1e-10);
assert!((metrics[2].fraction_gt_or_eq_family_size - 1.0 / 3.0).abs() < 1e-10);
}
#[test]
fn test_from_size_counts_empty() {
let metrics = FamilySizeMetrics::from_size_counts(std::iter::empty());
assert!(metrics.is_empty());
}
#[test]
fn test_position_group_size_metrics_new() {
let metrics = PositionGroupSizeMetrics::new(5);
assert_eq!(metrics.position_group_size, 5);
assert_eq!(metrics.count, 0);
assert!(metrics.fraction.abs() < f64::EPSILON);
}
#[test]
fn test_position_group_size_metric_name() {
assert_eq!(PositionGroupSizeMetrics::metric_name(), "position group size");
}
#[test]
fn test_position_group_size_from_size_counts() {
let counts = vec![(3, 1u64), (1, 1), (2, 1)];
let metrics = PositionGroupSizeMetrics::from_size_counts(counts);
assert_eq!(metrics.len(), 3);
assert_eq!(metrics[0].position_group_size, 1);
assert_eq!(metrics[1].position_group_size, 2);
assert_eq!(metrics[2].position_group_size, 3);
assert!((metrics[0].fraction - 1.0 / 3.0).abs() < 1e-10);
assert!((metrics[0].fraction_gt_or_eq_position_group_size - 1.0).abs() < 1e-10);
assert!((metrics[2].fraction_gt_or_eq_position_group_size - 1.0 / 3.0).abs() < 1e-10);
}
#[test]
fn test_position_group_size_from_size_counts_empty() {
let metrics = PositionGroupSizeMetrics::from_size_counts(std::iter::empty());
assert!(metrics.is_empty());
}
}