use crate::commands::common::parse_bool;
use crate::logging::OperationTimer;
use crate::progress::ProgressTracker;
use crate::validation::validate_file_exists;
use anyhow::Result;
use clap::Parser;
use log::info;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::PathBuf;
use crate::commands::command::Command;
#[derive(Debug, Parser)]
#[command(
name = "metrics",
about = "Compare two TSV metrics files",
long_about = r#"
Compare two TSV metrics files with optional float precision rounding.
This tool compares TSV files line by line and field by field:
- Integers are compared exactly
- Floats are optionally rounded to a specified precision before comparison
- Float comparison uses both relative and absolute tolerance
- Strings are compared exactly
The tool is designed to compare metrics files from fgbio and fgumi, which
may produce slightly different floating-point representations.
Example usage:
fgumi compare metrics file1.txt file2.txt
fgumi compare metrics file1.txt file2.txt --precision 6
fgumi compare metrics file1.txt file2.txt --rel-tol 1e-6 --abs-tol 1e-9
fgumi compare metrics file1.txt file2.txt --quiet
"#
)]
pub struct CompareMetrics {
#[arg(index = 1)]
pub file1: PathBuf,
#[arg(index = 2)]
pub file2: PathBuf,
#[arg(long = "precision", default_value = "6")]
pub precision: i32,
#[arg(long = "rel-tol", default_value = "1e-9")]
pub rel_tol: f64,
#[arg(long = "abs-tol", default_value = "1e-9")]
pub abs_tol: f64,
#[arg(short = 'm', long = "max-diffs", default_value = "20")]
pub max_diffs: usize,
#[arg(short = 'q', long = "quiet", default_value = "false", num_args = 0..=1, default_missing_value = "true", action = clap::ArgAction::Set, value_parser = parse_bool)]
pub quiet: bool,
#[arg(short = 'v', long = "verbose", default_value = "false", num_args = 0..=1, default_missing_value = "true", action = clap::ArgAction::Set, value_parser = parse_bool)]
pub verbose: bool,
}
#[derive(Debug, Clone)]
enum ParsedValue {
Int(i64),
Float(f64),
String(String),
}
impl std::fmt::Display for ParsedValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ParsedValue::Int(i) => write!(f, "{i}"),
ParsedValue::Float(fl) => write!(f, "{fl}"),
ParsedValue::String(s) => write!(f, "{s}"),
}
}
}
fn parse_value(value: &str, precision: Option<u32>) -> ParsedValue {
if let Ok(i) = value.parse::<i64>() {
return ParsedValue::Int(i);
}
if let Ok(f) = value.parse::<f64>() {
let f = if let Some(p) = precision {
let multiplier = 10_f64.powi(p as i32);
(f * multiplier).round() / multiplier
} else {
f
};
return ParsedValue::Float(f);
}
ParsedValue::String(value.to_string())
}
fn values_equal(val1: &ParsedValue, val2: &ParsedValue, rel_tol: f64, abs_tol: f64) -> bool {
match (val1, val2) {
(ParsedValue::Int(i1), ParsedValue::Int(i2)) => i1 == i2,
(ParsedValue::Float(f1), ParsedValue::Float(f2)) => {
if f1.is_nan() && f2.is_nan() {
return true;
}
if f1.is_infinite() && f2.is_infinite() {
return f1.signum() == f2.signum();
}
if f1.is_infinite() || f2.is_infinite() {
return false;
}
let diff = (f1 - f2).abs();
let max_val = f1.abs().max(f2.abs());
diff <= (rel_tol * max_val).max(abs_tol)
}
(ParsedValue::Int(i), ParsedValue::Float(f))
| (ParsedValue::Float(f), ParsedValue::Int(i)) => {
let f1 = *i as f64;
let f2 = *f;
let diff = (f1 - f2).abs();
let max_val = f1.abs().max(f2.abs());
diff <= (rel_tol * max_val).max(abs_tol)
}
(ParsedValue::String(s1), ParsedValue::String(s2)) => s1 == s2,
_ => false,
}
}
impl Command for CompareMetrics {
fn execute(&self, _command_line: &str) -> Result<()> {
validate_file_exists(&self.file1, "First file")?;
validate_file_exists(&self.file2, "Second file")?;
let timer = OperationTimer::new("Comparing metrics");
let precision = if self.precision >= 0 { Some(self.precision as u32) } else { None };
let file1 = File::open(&self.file1)?;
let file2 = File::open(&self.file2)?;
let reader1 = BufReader::new(file1);
let reader2 = BufReader::new(file2);
let mut lines1 = reader1.lines();
let mut lines2 = reader2.lines();
let mut differences: Vec<String> = Vec::new();
let mut line_num = 0usize;
let progress = ProgressTracker::new("Compared lines").with_interval(100_000);
loop {
let line1 = lines1.next();
let line2 = lines2.next();
match (line1, line2) {
(Some(Ok(l1)), Some(Ok(l2))) => {
line_num += 1;
progress.log_if_needed(1);
let fields1: Vec<&str> =
l1.trim_end_matches(['\n', '\r']).split('\t').collect();
let fields2: Vec<&str> =
l2.trim_end_matches(['\n', '\r']).split('\t').collect();
if fields1.len() != fields2.len() {
differences.push(format!(
"Line {line_num}: field count mismatch ({} vs {})",
fields1.len(),
fields2.len()
));
continue;
}
for (col_num, (f1, f2)) in fields1.iter().zip(fields2.iter()).enumerate() {
let col_num = col_num + 1; let val1 = parse_value(f1, precision);
let val2 = parse_value(f2, precision);
if !values_equal(&val1, &val2, self.rel_tol, self.abs_tol) {
differences.push(format!(
"Line {line_num}, col {col_num}: {f1:?} != {f2:?} (parsed: {val1} != {val2})"
));
}
}
}
(None, None) => {
break;
}
(Some(_), None) => {
let extra_lines = 1 + lines1.count();
if !self.quiet {
println!(
"Line count mismatch: {} has {} extra lines after line {}",
self.file1.display(),
extra_lines,
line_num
);
}
std::process::exit(1);
}
(None, Some(_)) => {
let extra_lines = 1 + lines2.count();
if !self.quiet {
println!(
"Line count mismatch: {} has {} extra lines after line {}",
self.file2.display(),
extra_lines,
line_num
);
}
std::process::exit(1);
}
(Some(Err(e)), _) | (_, Some(Err(e))) => {
return Err(anyhow::anyhow!(
"Error reading file at line {}: {}",
line_num + 1,
e
));
}
}
}
progress.log_final();
let is_equal = differences.is_empty();
if !self.quiet {
if !differences.is_empty() {
println!("Found {} difference(s) between files:", differences.len());
for diff in differences.iter().take(self.max_diffs) {
println!(" {diff}");
}
if differences.len() > self.max_diffs {
println!(" ... and {} more", differences.len() - self.max_diffs);
}
} else if self.verbose {
println!("Files match ({line_num} lines compared)");
}
}
if is_equal {
info!("Metrics files are identical");
timer.log_completion(line_num as u64);
Ok(())
} else {
info!("Metrics files differ");
std::process::exit(1);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_nan_equality() {
let nan1 = ParsedValue::Float(f64::NAN);
let nan2 = ParsedValue::Float(f64::NAN);
assert!(values_equal(&nan1, &nan2, 1e-9, 1e-9));
}
#[test]
fn test_positive_infinity_equality() {
let inf1 = ParsedValue::Float(f64::INFINITY);
let inf2 = ParsedValue::Float(f64::INFINITY);
assert!(values_equal(&inf1, &inf2, 1e-9, 1e-9));
}
#[test]
fn test_negative_infinity_equality() {
let neg_inf1 = ParsedValue::Float(f64::NEG_INFINITY);
let neg_inf2 = ParsedValue::Float(f64::NEG_INFINITY);
assert!(values_equal(&neg_inf1, &neg_inf2, 1e-9, 1e-9));
}
#[test]
fn test_mixed_infinity_not_equal() {
let pos_inf = ParsedValue::Float(f64::INFINITY);
let neg_inf = ParsedValue::Float(f64::NEG_INFINITY);
assert!(!values_equal(&pos_inf, &neg_inf, 1e-9, 1e-9));
}
#[test]
fn test_nan_not_equal_to_number() {
let nan = ParsedValue::Float(f64::NAN);
let num = ParsedValue::Float(1.0);
assert!(!values_equal(&nan, &num, 1e-9, 1e-9));
}
#[test]
fn test_parse_nan_string() {
let parsed = parse_value("NaN", Some(6));
let ParsedValue::Float(f) = parsed else {
unreachable!("Expected Float variant for NaN");
};
assert!(f.is_nan());
}
#[test]
fn test_parse_infinity_string() {
let parsed = parse_value("Infinity", Some(6));
let ParsedValue::Float(f) = parsed else {
unreachable!("Expected Float variant for Infinity");
};
assert!(f.is_infinite() && f.is_sign_positive());
let parsed = parse_value("inf", Some(6));
let ParsedValue::Float(f) = parsed else {
unreachable!("Expected Float variant for inf");
};
assert!(f.is_infinite() && f.is_sign_positive());
}
}