#![allow(clippy::cast_precision_loss)]
use fgumi_lib::logging::{format_duration, format_percent, format_rate};
use fgumi_lib::metrics::{ConsensusMetrics, FamilySizeMetrics, UmiGroupingMetrics};
use fgumi_lib::umi::assigner::{AdjacencyUmiAssigner, IdentityUmiAssigner, UmiAssigner};
use std::collections::HashSet;
use std::time::Duration;
fn create_test_umis_simple() -> Vec<String> {
vec![
"ACGTACGT".to_string(),
"ACGTACGT".to_string(),
"ACGTACGT".to_string(),
"TGCATGCA".to_string(),
"TGCATGCA".to_string(),
"GGGGGGGG".to_string(),
]
}
#[test]
fn test_identity_assigner_basic_workflow() {
let umis = create_test_umis_simple();
let assigner = IdentityUmiAssigner::default();
let assignments = assigner.assign(&umis);
let get_assignment = |umi: &str| -> Option<&fgumi_lib::template::MoleculeId> {
umis.iter().position(|u| u == umi).map(|idx| &assignments[idx])
};
let unique_ids: HashSet<_> = assignments.iter().collect();
assert_eq!(unique_ids.len(), 3, "Should have 3 unique molecule IDs for 3 different UMIs");
assert_eq!(
get_assignment("ACGTACGT"),
get_assignment("ACGTACGT"),
"Same UMI should get same molecule ID"
);
assert_ne!(
get_assignment("ACGTACGT"),
get_assignment("TGCATGCA"),
"Different UMIs should get different molecule IDs"
);
}
#[test]
fn test_adjacency_assigner_with_error_correction() {
let mut umis = Vec::new();
for _ in 0..50 {
umis.push("AAAAAAAA".to_string());
}
for _ in 0..5 {
umis.push("AAAAAAAC".to_string());
}
for _ in 0..30 {
umis.push("TTTTTTTT".to_string());
}
let assigner = AdjacencyUmiAssigner::new(1, 1, 100);
let assignments = assigner.assign(&umis);
let get_assignment = |umi: &str| -> Option<&fgumi_lib::template::MoleculeId> {
umis.iter().position(|u| u == umi).map(|idx| &assignments[idx])
};
assert_eq!(
get_assignment("AAAAAAAA"),
get_assignment("AAAAAAAC"),
"Error UMI should be captured by high-abundance base UMI"
);
assert_ne!(
get_assignment("AAAAAAAA"),
get_assignment("TTTTTTTT"),
"Distinct UMI family should not be merged"
);
let unique_ids: HashSet<_> = assignments.iter().collect();
assert_eq!(
unique_ids.len(),
2,
"Should have 2 molecule IDs: merged AAAA* family and TTTT* family"
);
}
#[test]
fn test_adjacency_respects_count_gradient() {
let mut umis = Vec::new();
for _ in 0..30 {
umis.push("GGGGGGGG".to_string());
}
for _ in 0..20 {
umis.push("GGGGGGGC".to_string());
}
let assigner = AdjacencyUmiAssigner::new(1, 1, 100);
let assignments = assigner.assign(&umis);
let get_assignment = |umi: &str| -> Option<&fgumi_lib::template::MoleculeId> {
umis.iter().position(|u| u == umi).map(|idx| &assignments[idx])
};
assert_ne!(
get_assignment("GGGGGGGG"),
get_assignment("GGGGGGGC"),
"Similar-abundance UMIs should not be merged due to count gradient"
);
let unique_ids: HashSet<_> = assignments.iter().collect();
assert_eq!(unique_ids.len(), 2, "Should have 2 separate molecule IDs due to count gradient");
}
#[test]
fn test_umi_grouping_metrics_workflow() {
let mut metrics = UmiGroupingMetrics::new();
metrics.total_records = 10_000;
metrics.accepted_records = 9_500;
metrics.discarded_non_pf = 200;
metrics.discarded_ns_in_umi = 150;
metrics.discarded_umi_too_short = 150;
metrics.unique_molecule_ids = 1_000;
metrics.total_families = 1_000;
metrics.avg_reads_per_molecule = 9.5;
assert_eq!(metrics.total_records, 10_000);
assert_eq!(metrics.unique_molecule_ids, 1_000);
assert!((metrics.avg_reads_per_molecule - 9.5).abs() < f64::EPSILON);
}
#[test]
fn test_family_size_metrics_distribution() {
let mut size_1 = FamilySizeMetrics::new(1);
let mut size_2 = FamilySizeMetrics::new(2);
let mut size_5 = FamilySizeMetrics::new(5);
size_1.count = 100;
size_1.fraction = 0.5;
size_1.fraction_gt_or_eq_family_size = 1.0;
size_2.count = 50;
size_2.fraction = 0.25;
size_2.fraction_gt_or_eq_family_size = 0.5;
size_5.count = 50;
size_5.fraction = 0.25;
size_5.fraction_gt_or_eq_family_size = 0.25;
assert_eq!(size_1.family_size, 1);
assert_eq!(size_2.family_size, 2);
assert_eq!(size_5.family_size, 5);
}
#[test]
fn test_metrics_basic_fields() {
let mut metrics = ConsensusMetrics::new();
metrics.total_input_reads = 1000;
metrics.consensus_reads = 800;
metrics.filtered_reads = 200;
assert_eq!(metrics.total_input_reads, 1000);
assert_eq!(metrics.consensus_reads, 800);
assert_eq!(metrics.filtered_reads, 200);
}
#[test]
fn test_format_percent_integration() {
let pass_rate = 0.9543;
assert_eq!(format_percent(pass_rate, 2), "95.43%");
let low_rate = 0.0123;
assert_eq!(format_percent(low_rate, 2), "1.23%");
let perfect_rate = 1.0;
assert_eq!(format_percent(perfect_rate, 1), "100.0%");
}
#[test]
fn test_format_duration_realistic() {
let short_job = Duration::from_secs(45);
assert_eq!(format_duration(short_job), "45s");
let medium_job = Duration::from_secs(125);
assert_eq!(format_duration(medium_job), "2m 5s");
let long_job = Duration::from_secs(7200); assert_eq!(format_duration(long_job), "2h");
}
#[test]
fn test_format_rate_with_realistic_data() {
let count = 100_000;
let duration = Duration::from_secs(10);
let rate = format_rate(count, duration);
assert!(rate.contains("10,000 items/s"));
let slow_count = 50;
let slow_duration = Duration::from_secs(60);
let slow_rate = format_rate(slow_count, slow_duration);
assert!(slow_rate.contains("items/min"));
}