use anyhow::Result;
use log::warn;
#[derive(Debug, Clone, clap::ValueEnum)]
pub enum NormalizationMethod {
Raw,
RPKM,
CPM,
}
impl NormalizationMethod {
#[allow(clippy::should_implement_trait)]
pub fn from_str(s: &str) -> Result<NormalizationMethod> {
match s.to_lowercase().as_str() {
"raw" => Ok(NormalizationMethod::Raw),
"rpkm" => Ok(NormalizationMethod::RPKM),
"cpm" => Ok(NormalizationMethod::CPM),
_ => {
warn!("Unknown normalization method: {s}. Defaulting to Raw");
Ok(NormalizationMethod::Raw)
}
}
}
pub fn scale_factor(&self, base_scale: f32, bin_size: u64, n_reads: u64) -> f64 {
let base = base_scale as f64;
match self {
Self::Raw => base,
Self::CPM => {
base * (1_000_000.0 / n_reads as f64)
}
Self::RPKM => {
let reads_per_million = 1_000_000.0 / n_reads as f64;
let per_kilobase = 1_000.0 / bin_size as f64; base * reads_per_million * per_kilobase
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_scaling_factors() {
let bin_size = 1000; let n_reads = 10_000_000; let base_scale = 1.0;
let raw_factor = NormalizationMethod::Raw.scale_factor(base_scale, bin_size, n_reads);
let cpm_factor = NormalizationMethod::CPM.scale_factor(base_scale, bin_size, n_reads);
let rpkm_factor = NormalizationMethod::RPKM.scale_factor(base_scale, bin_size, n_reads);
println!("Raw factor: {raw_factor}"); println!("CPM factor: {cpm_factor}"); println!("RPKM factor: {rpkm_factor}");
let raw_value = 1000.0 * raw_factor; let cpm_value = 1000.0 * cpm_factor; let rpkm_value = 1000.0 * rpkm_factor;
assert_eq!(raw_value, 1000.0);
assert_eq!(cpm_value, 100.0);
assert_eq!(rpkm_value, 100.0); }
}