Skip to main content

fgumi_lib/
logging.rs

1//! Enhanced logging utilities for formatted output.
2//!
3//! This module provides consistent, user-friendly logging utilities for metrics,
4//! progress tracking, and operation summaries.
5
6use std::time::{Duration, Instant};
7
8use crate::metrics::{ConsensusMetrics, UmiGroupingMetrics};
9use crate::rejection::format_count;
10
11/// Formats a percentage with specified decimal places.
12///
13/// # Arguments
14///
15/// * `value` - The fraction (0.0-1.0) to format as percentage
16/// * `decimals` - Number of decimal places to include
17///
18/// # Returns
19///
20/// A string formatted as "XX.XX%" (e.g., "95.43%")
21///
22/// # Examples
23///
24/// ```
25/// use fgumi_lib::logging::format_percent;
26///
27/// assert_eq!(format_percent(0.9543, 2), "95.43%");
28/// assert_eq!(format_percent(0.5, 1), "50.0%");
29/// assert_eq!(format_percent(1.0, 0), "100%");
30/// ```
31#[must_use]
32pub fn format_percent(value: f64, decimals: usize) -> String {
33    format!("{:.decimals$}%", value * 100.0, decimals = decimals)
34}
35
36/// Formats a duration in human-readable form.
37///
38/// # Arguments
39///
40/// * `duration` - The duration to format
41///
42/// # Returns
43///
44/// A human-readable string (e.g., "2m 15s", "1h 30m", "45s")
45///
46/// # Examples
47///
48/// ```
49/// use fgumi_lib::logging::format_duration;
50/// use std::time::Duration;
51///
52/// assert_eq!(format_duration(Duration::from_secs(45)), "45s");
53/// assert_eq!(format_duration(Duration::from_secs(135)), "2m 15s");
54/// assert_eq!(format_duration(Duration::from_secs(5400)), "1h 30m");
55/// ```
56#[must_use]
57pub fn format_duration(duration: Duration) -> String {
58    let secs = duration.as_secs();
59    if secs < 60 {
60        format!("{secs}s")
61    } else if secs < 3600 {
62        let mins = secs / 60;
63        let remaining_secs = secs % 60;
64        if remaining_secs == 0 { format!("{mins}m") } else { format!("{mins}m {remaining_secs}s") }
65    } else {
66        let hours = secs / 3600;
67        let mins = (secs % 3600) / 60;
68        if mins == 0 { format!("{hours}h") } else { format!("{hours}h {mins}m") }
69    }
70}
71
72/// Formats a rate (items per second) with appropriate units.
73///
74/// # Arguments
75///
76/// * `count` - Number of items processed
77/// * `duration` - Time taken to process items
78///
79/// # Returns
80///
81/// A formatted rate string (e.g., "1,234 reads/s", "50 reads/min")
82///
83/// # Examples
84///
85/// ```
86/// use fgumi_lib::logging::format_rate;
87/// use std::time::Duration;
88///
89/// assert_eq!(format_rate(1000, Duration::from_secs(1)), "1,000 items/s");
90/// assert_eq!(format_rate(600, Duration::from_secs(60)), "10 items/s");
91/// ```
92#[must_use]
93#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation, clippy::cast_sign_loss)]
94pub fn format_rate(count: u64, duration: Duration) -> String {
95    let secs = duration.as_secs_f64();
96    if secs < 0.001 {
97        return format!("{} items/s", format_count(count));
98    }
99
100    let rate = count as f64 / secs;
101    if rate >= 1.0 {
102        format!("{} items/s", format_count(rate as u64))
103    } else {
104        let items_per_min = count as f64 / (secs / 60.0);
105        format!("{items_per_min:.1} items/min")
106    }
107}
108
109/// Logs a formatted summary of consensus metrics.
110///
111/// Outputs key metrics including input/output counts, rejection breakdown,
112/// and quality statistics.
113///
114/// # Arguments
115///
116/// * `metrics` - The consensus metrics to summarize
117///
118/// # Examples
119///
120/// ```no_run
121/// use fgumi_lib::logging::log_consensus_summary;
122/// use fgumi_lib::metrics::ConsensusMetrics;
123///
124/// let mut metrics = ConsensusMetrics::new();
125/// metrics.total_input_reads = 10_000;
126/// metrics.consensus_reads = 8_000;
127/// metrics.filtered_reads = 2_000;
128///
129/// log_consensus_summary(&metrics);
130/// ```
131#[allow(clippy::cast_precision_loss)]
132pub fn log_consensus_summary(metrics: &ConsensusMetrics) {
133    log::info!("Consensus Calling Summary:");
134    log::info!("  Input reads: {}", format_count(metrics.total_input_reads));
135    log::info!("  Consensus reads: {}", format_count(metrics.consensus_reads));
136    log::info!("  Filtered reads: {}", format_count(metrics.filtered_reads));
137
138    if metrics.total_input_reads > 0 {
139        let pass_rate = metrics.consensus_reads as f64 / metrics.total_input_reads as f64;
140        log::info!("  Pass rate: {}", format_percent(pass_rate, 2));
141    }
142
143    if metrics.consensus_reads > 0 {
144        log::info!("  Avg reads per consensus: {:.1}", metrics.avg_input_reads_per_consensus);
145        log::info!("  Avg depth: {:.1}", metrics.avg_raw_read_depth);
146    }
147
148    if metrics.total_rejections() > 0 {
149        log::info!("  Top rejection reasons:");
150        let summary = metrics.rejection_summary();
151        let mut sorted: Vec<_> = summary.iter().collect();
152        sorted.sort_by(|a, b| b.1.cmp(a.1));
153
154        for (reason, count) in sorted.iter().take(5) {
155            log::info!("    {}: {}", reason.description(), format_count(**count));
156        }
157    }
158}
159
160/// Logs a formatted summary of UMI grouping metrics.
161///
162/// Outputs grouping statistics including molecule counts and family sizes.
163///
164/// # Arguments
165///
166/// * `metrics` - The UMI grouping metrics to summarize
167///
168/// # Examples
169///
170/// ```no_run
171/// use fgumi_lib::logging::log_umi_grouping_summary;
172/// use fgumi_lib::metrics::UmiGroupingMetrics;
173///
174/// let mut metrics = UmiGroupingMetrics::new();
175/// metrics.total_records = 10_000;
176/// metrics.accepted_records = 9_500;
177/// metrics.unique_molecule_ids = 1_000;
178///
179/// log_umi_grouping_summary(&metrics);
180/// ```
181#[allow(clippy::cast_precision_loss)]
182pub fn log_umi_grouping_summary(metrics: &UmiGroupingMetrics) {
183    log::info!("UMI Grouping Summary:");
184    log::info!("  Total records: {}", format_count(metrics.total_records));
185    log::info!("  Accepted records: {}", format_count(metrics.accepted_records));
186
187    if metrics.total_records > 0 {
188        let accept_rate = metrics.accepted_records as f64 / metrics.total_records as f64;
189        log::info!("  Accept rate: {}", format_percent(accept_rate, 2));
190    }
191
192    log::info!("  Unique molecules: {}", format_count(metrics.unique_molecule_ids));
193    log::info!("  Total families: {}", format_count(metrics.total_families));
194
195    if metrics.total_families > 0 {
196        log::info!("  Avg reads/molecule: {:.1}", metrics.avg_reads_per_molecule);
197        log::info!("  Median reads/molecule: {}", metrics.median_reads_per_molecule);
198        log::info!("  Min reads/molecule: {}", metrics.min_reads_per_molecule);
199        log::info!("  Max reads/molecule: {}", metrics.max_reads_per_molecule);
200    }
201
202    // Log filter counts (matching fgbio's logging style)
203    // Non-PF only logged if > 0
204    if metrics.discarded_non_pf > 0 {
205        log::info!("Filtered out {} non-PF records.", format_count(metrics.discarded_non_pf));
206    }
207    // Poor alignment always logged (like fgbio)
208    log::info!(
209        "Filtered out {} records due to mapping issues.",
210        format_count(metrics.discarded_poor_alignment)
211    );
212    // Ns in UMI always logged (like fgbio)
213    log::info!(
214        "Filtered out {} records that contained one or more Ns in their UMIs.",
215        format_count(metrics.discarded_ns_in_umi)
216    );
217    // UMI too short only logged if > 0
218    if metrics.discarded_umi_too_short > 0 {
219        log::info!(
220            "Filtered out {} records that contained UMIs that were too short.",
221            format_count(metrics.discarded_umi_too_short)
222        );
223    }
224}
225
226/// Operation timing and summary helper.
227///
228/// Tracks operation timing and provides formatted summary output.
229///
230/// # Examples
231///
232/// ```no_run
233/// use fgumi_lib::logging::OperationTimer;
234///
235/// let timer = OperationTimer::new("Processing reads");
236///
237/// // ... do work ...
238///
239/// timer.log_completion(10_000); // Log with item count
240/// ```
241pub struct OperationTimer {
242    operation: String,
243    start_time: Instant,
244}
245
246impl OperationTimer {
247    /// Creates a new operation timer and logs the start.
248    #[must_use]
249    pub fn new(operation: &str) -> Self {
250        log::info!("{operation} ...");
251        Self { operation: operation.to_string(), start_time: Instant::now() }
252    }
253
254    /// Logs the completion with item count and rate.
255    pub fn log_completion(&self, count: u64) {
256        let duration = self.start_time.elapsed();
257        log::info!(
258            "{} completed: {} in {} ({})",
259            self.operation,
260            format_count(count),
261            format_duration(duration),
262            format_rate(count, duration)
263        );
264    }
265}
266
267#[cfg(test)]
268#[allow(clippy::cast_precision_loss)]
269mod tests {
270    use super::*;
271
272    #[test]
273    fn test_format_percent() {
274        assert_eq!(format_percent(0.9543, 2), "95.43%");
275        assert_eq!(format_percent(0.5, 1), "50.0%");
276        assert_eq!(format_percent(1.0, 0), "100%");
277        assert_eq!(format_percent(0.0, 2), "0.00%");
278    }
279
280    #[test]
281    fn test_format_duration() {
282        assert_eq!(format_duration(Duration::from_secs(0)), "0s");
283        assert_eq!(format_duration(Duration::from_secs(45)), "45s");
284        assert_eq!(format_duration(Duration::from_secs(60)), "1m");
285        assert_eq!(format_duration(Duration::from_secs(135)), "2m 15s");
286        assert_eq!(format_duration(Duration::from_secs(3600)), "1h");
287        assert_eq!(format_duration(Duration::from_secs(5400)), "1h 30m");
288    }
289
290    #[test]
291    fn test_format_rate() {
292        assert_eq!(format_rate(1000, Duration::from_secs(1)), "1,000 items/s");
293        assert_eq!(format_rate(60, Duration::from_secs(60)), "1 items/s");
294        assert_eq!(format_rate(30, Duration::from_secs(60)), "30.0 items/min");
295        // Near-zero duration
296        assert!(format_rate(1000, Duration::from_nanos(1)).contains("items/s"));
297    }
298
299    #[test]
300    fn test_operation_timer() {
301        let timer = OperationTimer::new("Test");
302        timer.log_completion(1000);
303    }
304
305    #[test]
306    fn test_log_consensus_summary() {
307        // Empty metrics
308        log_consensus_summary(&ConsensusMetrics::new());
309
310        // With data
311        let mut metrics = ConsensusMetrics::new();
312        metrics.total_input_reads = 10000;
313        metrics.consensus_reads = 8000;
314        metrics.avg_input_reads_per_consensus = 1.25;
315        metrics.rejected_insufficient_support = 1000;
316        log_consensus_summary(&metrics);
317    }
318
319    #[test]
320    fn test_log_umi_grouping_summary() {
321        // Empty metrics
322        log_umi_grouping_summary(&UmiGroupingMetrics::new());
323
324        // With data and discards
325        let mut metrics = UmiGroupingMetrics::new();
326        metrics.total_records = 10000;
327        metrics.accepted_records = 8000;
328        metrics.total_families = 950;
329        metrics.discarded_non_pf = 500;
330        metrics.discarded_ns_in_umi = 200;
331        log_umi_grouping_summary(&metrics);
332    }
333}