use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use crate::shared::{UmiCountTracker, UmiMetric};
use crate::{Metric, frac};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SimplexFamilySizeMetric {
pub family_size: usize,
pub cs_count: usize,
pub cs_fraction: f64,
pub cs_fraction_gt_or_eq_size: f64,
pub ss_count: usize,
pub ss_fraction: f64,
pub ss_fraction_gt_or_eq_size: f64,
}
impl SimplexFamilySizeMetric {
#[must_use]
pub fn new(family_size: usize) -> Self {
Self {
family_size,
cs_count: 0,
cs_fraction: 0.0,
cs_fraction_gt_or_eq_size: 0.0,
ss_count: 0,
ss_fraction: 0.0,
ss_fraction_gt_or_eq_size: 0.0,
}
}
}
impl Default for SimplexFamilySizeMetric {
fn default() -> Self {
Self::new(0)
}
}
impl Metric for SimplexFamilySizeMetric {
fn metric_name() -> &'static str {
"simplex family size"
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SimplexYieldMetric {
pub fraction: f64,
pub read_pairs: usize,
pub cs_families: usize,
pub ss_families: usize,
pub mean_ss_family_size: f64,
pub ss_singletons: usize,
pub ss_singleton_fraction: f64,
pub ss_consensus_families: usize,
}
impl SimplexYieldMetric {
#[must_use]
pub fn new(fraction: f64) -> Self {
Self {
fraction,
read_pairs: 0,
cs_families: 0,
ss_families: 0,
mean_ss_family_size: 0.0,
ss_singletons: 0,
ss_singleton_fraction: 0.0,
ss_consensus_families: 0,
}
}
}
impl Default for SimplexYieldMetric {
fn default() -> Self {
Self::new(0.0)
}
}
impl Metric for SimplexYieldMetric {
fn metric_name() -> &'static str {
"simplex yield"
}
}
pub struct SimplexMetricsCollector {
cs_family_sizes: HashMap<usize, usize>,
ss_family_sizes: HashMap<usize, usize>,
umi_counts: UmiCountTracker,
}
impl SimplexMetricsCollector {
#[must_use]
pub fn new() -> Self {
Self {
cs_family_sizes: HashMap::new(),
ss_family_sizes: HashMap::new(),
umi_counts: UmiCountTracker::new(),
}
}
pub fn record_cs_family(&mut self, size: usize) {
*self.cs_family_sizes.entry(size).or_insert(0) += 1;
}
pub fn record_ss_family(&mut self, size: usize) {
*self.ss_family_sizes.entry(size).or_insert(0) += 1;
}
pub fn record_umi(&mut self, umi: &str, raw_count: usize, error_count: usize, is_unique: bool) {
self.umi_counts.record(umi, raw_count, error_count, is_unique);
}
#[must_use]
pub fn family_size_metrics(&self) -> Vec<SimplexFamilySizeMetric> {
let max_size = self
.cs_family_sizes
.keys()
.copied()
.max()
.unwrap_or(0)
.max(self.ss_family_sizes.keys().copied().max().unwrap_or(0));
if max_size == 0 {
return Vec::new();
}
let cs_total: usize = self.cs_family_sizes.values().sum();
let ss_total: usize = self.ss_family_sizes.values().sum();
let mut metrics = Vec::new();
for size in 1..=max_size {
let mut metric = SimplexFamilySizeMetric::new(size);
metric.cs_count = *self.cs_family_sizes.get(&size).unwrap_or(&0);
metric.cs_fraction = frac(metric.cs_count, cs_total);
metric.ss_count = *self.ss_family_sizes.get(&size).unwrap_or(&0);
metric.ss_fraction = frac(metric.ss_count, ss_total);
metrics.push(metric);
}
for i in (0..metrics.len()).rev() {
let next_coord_strand =
if i + 1 < metrics.len() { metrics[i + 1].cs_fraction_gt_or_eq_size } else { 0.0 };
let next_single_strand =
if i + 1 < metrics.len() { metrics[i + 1].ss_fraction_gt_or_eq_size } else { 0.0 };
metrics[i].cs_fraction_gt_or_eq_size = metrics[i].cs_fraction + next_coord_strand;
metrics[i].ss_fraction_gt_or_eq_size = metrics[i].ss_fraction + next_single_strand;
}
metrics
}
#[must_use]
pub fn umi_metrics(&self) -> Vec<UmiMetric> {
self.umi_counts.to_metrics()
}
}
impl Default for SimplexMetricsCollector {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simplex_family_size_metric_new() {
let metric = SimplexFamilySizeMetric::new(5);
assert_eq!(metric.family_size, 5);
assert_eq!(metric.cs_count, 0);
assert!(metric.cs_fraction.abs() < f64::EPSILON);
assert!(metric.cs_fraction_gt_or_eq_size.abs() < f64::EPSILON);
assert_eq!(metric.ss_count, 0);
assert!(metric.ss_fraction.abs() < f64::EPSILON);
assert!(metric.ss_fraction_gt_or_eq_size.abs() < f64::EPSILON);
}
#[test]
fn test_simplex_yield_metric_new() {
let metric = SimplexYieldMetric::new(0.5);
assert!((metric.fraction - 0.5).abs() < f64::EPSILON);
assert_eq!(metric.read_pairs, 0);
assert_eq!(metric.cs_families, 0);
assert_eq!(metric.ss_families, 0);
assert!(metric.mean_ss_family_size.abs() < f64::EPSILON);
assert_eq!(metric.ss_singletons, 0);
assert!(metric.ss_singleton_fraction.abs() < f64::EPSILON);
assert_eq!(metric.ss_consensus_families, 0);
}
#[test]
fn test_metric_trait_impl() {
assert_eq!(SimplexFamilySizeMetric::metric_name(), "simplex family size");
assert_eq!(SimplexYieldMetric::metric_name(), "simplex yield");
}
#[test]
fn test_record_cs_family() {
let mut collector = SimplexMetricsCollector::new();
collector.record_cs_family(5);
collector.record_cs_family(5);
collector.record_cs_family(10);
let metrics = collector.family_size_metrics();
let size_5 = metrics
.iter()
.find(|m| m.family_size == 5)
.expect("family_size 5 metric should be present");
assert_eq!(size_5.cs_count, 2);
let size_10 = metrics
.iter()
.find(|m| m.family_size == 10)
.expect("family_size 10 metric should be present");
assert_eq!(size_10.cs_count, 1);
}
#[test]
fn test_record_ss_family() {
let mut collector = SimplexMetricsCollector::new();
collector.record_ss_family(3);
collector.record_ss_family(3);
collector.record_ss_family(3);
let metrics = collector.family_size_metrics();
let size_3 = metrics
.iter()
.find(|m| m.family_size == 3)
.expect("family_size 3 metric should be present");
assert_eq!(size_3.ss_count, 3);
}
#[test]
fn test_family_size_metrics_fractions() {
let mut collector = SimplexMetricsCollector::new();
collector.record_ss_family(1);
collector.record_ss_family(1);
collector.record_ss_family(2);
collector.record_ss_family(3);
let metrics = collector.family_size_metrics();
let size_1 = metrics
.iter()
.find(|m| m.family_size == 1)
.expect("family_size 1 metric should be present");
assert_eq!(size_1.ss_count, 2);
assert!((size_1.ss_fraction - 0.5).abs() < 0.001); assert!((size_1.ss_fraction_gt_or_eq_size - 1.0).abs() < 0.001);
let size_2 = metrics
.iter()
.find(|m| m.family_size == 2)
.expect("family_size 2 metric should be present");
assert_eq!(size_2.ss_count, 1);
assert!((size_2.ss_fraction - 0.25).abs() < 0.001); assert!((size_2.ss_fraction_gt_or_eq_size - 0.5).abs() < 0.001);
let size_3 = metrics
.iter()
.find(|m| m.family_size == 3)
.expect("family_size 3 metric should be present");
assert_eq!(size_3.ss_count, 1);
assert!((size_3.ss_fraction - 0.25).abs() < 0.001); assert!((size_3.ss_fraction_gt_or_eq_size - 0.25).abs() < 0.001); }
#[test]
fn test_empty_collector() {
let collector = SimplexMetricsCollector::new();
let family_metrics = collector.family_size_metrics();
assert!(family_metrics.is_empty());
let umi_metrics = collector.umi_metrics();
assert!(umi_metrics.is_empty());
}
#[test]
fn test_record_umi() {
let mut collector = SimplexMetricsCollector::new();
collector.record_umi("AAAA", 10, 2, true);
collector.record_umi("AAAA", 5, 1, false);
collector.record_umi("CCCC", 8, 0, true);
let metrics = collector.umi_metrics();
assert_eq!(metrics.len(), 2);
let aaaa =
metrics.iter().find(|m| m.umi == "AAAA").expect("AAAA UMI metric should be present");
assert_eq!(aaaa.raw_observations, 15); assert_eq!(aaaa.raw_observations_with_errors, 3); assert_eq!(aaaa.unique_observations, 1);
let cccc =
metrics.iter().find(|m| m.umi == "CCCC").expect("CCCC UMI metric should be present");
assert_eq!(cccc.raw_observations, 8);
assert_eq!(cccc.unique_observations, 1);
}
}