Skip to main content

fgumi_metrics/
lib.rs

1#![deny(unsafe_code)]
2
3//! Structured metric types and TSV writer for fgumi operations.
4//!
5//! This crate provides:
6//! - [`Metric`] and [`ProcessingMetrics`] traits for extensible metric types
7//! - Metric structs for consensus, grouping, correction, and duplex operations
8//! - [`rejection`] module for rejection reason tracking
9//! - [`writer`] module for TSV file output
10
11#[cfg(feature = "clip")]
12pub mod clip;
13pub mod consensus;
14pub mod correct;
15pub mod duplex;
16pub mod group;
17pub mod rejection;
18pub mod shared;
19pub mod simplex;
20pub mod writer;
21
22use serde::{Deserialize, Serialize};
23
24/// Number of decimal places used for float metrics (matches fgbio).
25pub const FLOAT_PRECISION: usize = 6;
26
27/// Formats a float value with the standard precision for metrics.
28///
29/// This ensures consistent float formatting across all metrics output,
30/// matching fgbio's 6 decimal place precision.
31///
32/// # Example
33/// ```
34/// use fgumi_metrics::format_float;
35/// assert_eq!(format_float(0.9), "0.900000");
36/// assert_eq!(format_float(0.0), "0.000000");
37/// ```
38#[must_use]
39pub fn format_float(value: f64) -> String {
40    format!("{value:.FLOAT_PRECISION$}")
41}
42
43/// Computes `numerator / denominator`, returning 0.0 if the denominator is zero.
44#[must_use]
45#[expect(clippy::cast_precision_loss, reason = "metric counts never exceed 2^53")]
46pub fn frac(numerator: usize, denominator: usize) -> f64 {
47    if denominator > 0 { numerator as f64 / denominator as f64 } else { 0.0 }
48}
49
50/// Computes `numerator / denominator` for `u64` values, returning 0.0 if denominator is zero.
51#[must_use]
52#[expect(clippy::cast_precision_loss, reason = "metric counts never exceed 2^53")]
53pub fn frac_u64(numerator: u64, denominator: u64) -> f64 {
54    if denominator > 0 { numerator as f64 / denominator as f64 } else { 0.0 }
55}
56
57/// A metric type that can be serialized to TSV files.
58///
59/// All metric types in fgumi implement this trait, providing a consistent
60/// interface for serialization and identification.
61pub trait Metric: Serialize + for<'de> Deserialize<'de> + Clone + Default {
62    /// Human-readable name for this metric type.
63    ///
64    /// Used in error messages and logging when writing metrics files.
65    fn metric_name() -> &'static str;
66}
67
68/// Common interface for metrics that track processing pipeline counts.
69///
70/// This trait provides a consistent way to access input, output, and filtered
71/// counts across different metric types, enabling generic summary output.
72pub trait ProcessingMetrics {
73    /// Total number of input items (reads, records, etc.) processed.
74    fn total_input(&self) -> u64;
75
76    /// Total number of output items (consensus reads, accepted records, etc.) produced.
77    fn total_output(&self) -> u64;
78
79    /// Total number of items filtered out or rejected.
80    fn total_filtered(&self) -> u64;
81
82    /// Processing efficiency as a percentage (output / input * 100).
83    fn efficiency(&self) -> f64 {
84        frac_u64(self.total_output(), self.total_input()) * 100.0
85    }
86}
87
88// Re-export commonly used types
89#[cfg(feature = "clip")]
90pub use clip::{ClipCounts, ClippingMetrics, ClippingMetricsCollection, ReadType};
91pub use consensus::{ConsensusKvMetric, ConsensusMetrics};
92pub use correct::UmiCorrectionMetrics;
93pub use duplex::{
94    DuplexFamilySizeMetric, DuplexMetricsCollector, DuplexUmiMetric, DuplexYieldMetric,
95    FamilySizeMetric,
96};
97pub use group::{FamilySizeMetrics, PositionGroupSizeMetrics, UmiGroupingMetrics};
98pub use rejection::{RejectionReason, format_count};
99pub use shared::UmiMetric;
100pub use simplex::{SimplexFamilySizeMetric, SimplexMetricsCollector, SimplexYieldMetric};
101pub use writer::{read_metrics, read_metrics_auto, write_metrics};
102
103#[cfg(test)]
104mod tests {
105    use super::*;
106
107    #[test]
108    fn test_frac_normal() {
109        assert!((frac(3, 4) - 0.75).abs() < f64::EPSILON);
110    }
111
112    #[test]
113    fn test_frac_zero_denominator() {
114        assert!((frac(5, 0)).abs() < f64::EPSILON);
115    }
116
117    #[test]
118    fn test_frac_zero_numerator() {
119        assert!((frac(0, 10)).abs() < f64::EPSILON);
120    }
121
122    #[test]
123    fn test_frac_u64_normal() {
124        assert!((frac_u64(3, 4) - 0.75).abs() < f64::EPSILON);
125    }
126
127    #[test]
128    fn test_frac_u64_zero_denominator() {
129        assert!((frac_u64(5, 0)).abs() < f64::EPSILON);
130    }
131
132    #[test]
133    fn test_frac_u64_zero_numerator() {
134        assert!((frac_u64(0, 10)).abs() < f64::EPSILON);
135    }
136
137    #[test]
138    fn test_processing_metrics_consensus() {
139        let metrics = ConsensusMetrics {
140            total_input_reads: 1000,
141            consensus_reads: 800,
142            filtered_reads: 200,
143            ..Default::default()
144        };
145
146        assert_eq!(metrics.total_input(), 1000);
147        assert_eq!(metrics.total_output(), 800);
148        assert_eq!(metrics.total_filtered(), 200);
149        assert!((metrics.efficiency() - 80.0).abs() < f64::EPSILON);
150    }
151
152    #[test]
153    fn test_processing_metrics_grouping() {
154        let metrics = UmiGroupingMetrics {
155            total_records: 1000,
156            accepted_records: 900,
157            discarded_non_pf: 50,
158            discarded_poor_alignment: 30,
159            discarded_ns_in_umi: 20,
160            ..Default::default()
161        };
162
163        assert_eq!(metrics.total_input(), 1000);
164        assert_eq!(metrics.total_output(), 900);
165        assert_eq!(metrics.total_filtered(), 100);
166        assert!((metrics.efficiency() - 90.0).abs() < f64::EPSILON);
167    }
168
169    #[test]
170    fn test_processing_metrics_zero_input() {
171        let metrics = ConsensusMetrics::default();
172
173        assert_eq!(metrics.total_input(), 0);
174        assert_eq!(metrics.total_output(), 0);
175        assert_eq!(metrics.total_filtered(), 0);
176        assert!((metrics.efficiency()).abs() < f64::EPSILON);
177    }
178
179    #[test]
180    fn test_processing_metrics_generic_usage() {
181        fn log_efficiency(m: &impl ProcessingMetrics) -> f64 {
182            m.efficiency()
183        }
184
185        let consensus =
186            ConsensusMetrics { total_input_reads: 100, consensus_reads: 50, ..Default::default() };
187
188        let grouping =
189            UmiGroupingMetrics { total_records: 100, accepted_records: 75, ..Default::default() };
190
191        assert!((log_efficiency(&consensus) - 50.0).abs() < f64::EPSILON);
192        assert!((log_efficiency(&grouping) - 75.0).abs() < f64::EPSILON);
193    }
194}