use std::sync::Arc;
use arrow::{
array::Float64Array,
datatypes::{DataType, Field, Schema},
record_batch::RecordBatch,
};
use transcriptomic_rs::{ExpressionMatrix, Normalize};
fn create_test_matrix(
genes: Vec<&str>,
samples: Vec<&str>,
values: Vec<Vec<Option<f64>>>,
) -> ExpressionMatrix {
let fields: Vec<Field> = samples
.iter()
.map(|s| Field::new(s.to_string(), DataType::Float64, true))
.collect();
let schema = Schema::new(fields);
let mut columns = Vec::with_capacity(samples.len());
for sample_idx in 0..samples.len() {
let mut col_values = Vec::with_capacity(genes.len());
for gene_values_row in &values {
col_values.push(gene_values_row[sample_idx]);
}
columns.push(Arc::new(Float64Array::from(col_values)) as Arc<dyn arrow::array::Array>);
}
let batch =
RecordBatch::try_new(Arc::new(schema), columns).expect("Failed to create RecordBatch");
ExpressionMatrix {
genes: genes.iter().map(|s| s.to_string()).collect(),
samples: samples.iter().map(|s| s.to_string()).collect(),
values: batch,
}
}
#[test]
fn test_log2_transformation() {
let matrix = create_test_matrix(
vec!["GENE_A", "GENE_B"],
vec!["SAMPLE_1", "SAMPLE_2"],
vec![
vec![Some(0.0), Some(1.0)], vec![Some(3.0), Some(7.0)], ],
);
let normalized = Normalize::log2(&matrix).expect("Failed to apply log2");
assert_eq!(normalized.genes, matrix.genes);
assert_eq!(normalized.samples, matrix.samples);
let val_a1 = normalized.get("GENE_A", "SAMPLE_1");
assert_eq!(val_a1, Some(0.0));
let val_a2 = normalized.get("GENE_A", "SAMPLE_2");
assert_eq!(val_a2, Some(1.0));
let val_b1 = normalized.get("GENE_B", "SAMPLE_1");
assert_eq!(val_b1, Some(2.0));
let val_b2 = normalized.get("GENE_B", "SAMPLE_2");
assert_eq!(val_b2, Some(3.0));
}
#[test]
fn test_z_score_per_gene() {
let matrix = create_test_matrix(
vec!["GENE_A", "GENE_B"],
vec!["S1", "S2", "S3"],
vec![
vec![Some(1.0), Some(2.0), Some(3.0)], vec![Some(10.0), Some(20.0), Some(30.0)], ],
);
let normalized = Normalize::z_score_per_gene(&matrix).expect("Failed to apply z-score");
let mean_a = 2.0_f64;
let variance_a =
((1.0_f64 - 2.0_f64).powi(2) + (2.0_f64 - 2.0_f64).powi(2) + (3.0_f64 - 2.0_f64).powi(2))
/ 3.0_f64;
let std_a = variance_a.sqrt();
let z_a1 = normalized
.get("GENE_A", "S1")
.expect("Should have GENE_A, S1");
let expected_z_a1 = (1.0_f64 - mean_a) / std_a;
assert!(
(z_a1 - expected_z_a1).abs() < 1e-10,
"Expected {}, got {}",
expected_z_a1,
z_a1
);
let z_a2 = normalized
.get("GENE_A", "S2")
.expect("Should have GENE_A, S2");
assert!(z_a2.abs() < 1e-10, "Expected 0, got {}", z_a2);
let z_a3 = normalized
.get("GENE_A", "S3")
.expect("Should have GENE_A, S3");
let expected_z_a3 = (3.0_f64 - mean_a) / std_a;
assert!(
(z_a3 - expected_z_a3).abs() < 1e-10,
"Expected {}, got {}",
expected_z_a3,
z_a3
);
let mean_b = 20.0_f64;
let variance_b = ((10.0_f64 - 20.0_f64).powi(2)
+ (20.0_f64 - 20.0_f64).powi(2)
+ (30.0_f64 - 20.0_f64).powi(2))
/ 3.0_f64;
let std_b = variance_b.sqrt();
let z_b1 = normalized
.get("GENE_B", "S1")
.expect("Should have GENE_B, S1");
let expected_z_b1 = (10.0_f64 - mean_b) / std_b;
assert!(
(z_b1 - expected_z_b1).abs() < 1e-10,
"Expected {}, got {}",
expected_z_b1,
z_b1
);
}
#[test]
fn test_z_score_zero_variance_gene() {
let matrix = create_test_matrix(
vec!["CONST_GENE", "VAR_GENE"],
vec!["S1", "S2", "S3"],
vec![
vec![Some(5.0), Some(5.0), Some(5.0)], vec![Some(1.0), Some(2.0), Some(3.0)], ],
);
let normalized = Normalize::z_score_per_gene(&matrix).expect("Failed to apply z-score");
assert_eq!(normalized.get("CONST_GENE", "S1"), Some(5.0));
assert_eq!(normalized.get("CONST_GENE", "S2"), Some(5.0));
assert_eq!(normalized.get("CONST_GENE", "S3"), Some(5.0));
}
#[test]
fn test_quantile_normalization() {
let matrix = create_test_matrix(
vec!["GENE_A", "GENE_B"],
vec!["S1", "S2", "S3"],
vec![
vec![Some(1.0), Some(2.0), Some(5.0)], vec![Some(3.0), Some(4.0), Some(6.0)], ],
);
let normalized = Normalize::quantile(&matrix).expect("Failed to apply quantile normalization");
assert_eq!(normalized.genes, matrix.genes);
assert_eq!(normalized.samples, matrix.samples);
for gene in &normalized.genes {
for sample in &normalized.samples {
let val = normalized.get(gene, sample);
let _ = val;
}
}
}
#[test]
fn test_composable_normalization() {
let matrix = create_test_matrix(
vec!["GENE_A"],
vec!["S1", "S2", "S3"],
vec![vec![Some(1.0), Some(3.0), Some(7.0)]],
);
let step1 = Normalize::log2(&matrix).expect("Failed log2");
let step2 = Normalize::z_score_per_gene(&step1).expect("Failed z-score after log2");
assert_eq!(step2.genes, matrix.genes);
assert_eq!(step2.samples, matrix.samples);
assert_eq!(step2.values.num_rows(), 1);
assert_eq!(step2.values.num_columns(), 3);
}
#[test]
fn test_normalization_preserves_structure() {
let matrix = create_test_matrix(
vec!["GENE_1", "GENE_2", "GENE_3"],
vec!["SAMPLE_A", "SAMPLE_B"],
vec![
vec![Some(1.0), Some(2.0)],
vec![Some(3.0), Some(4.0)],
vec![Some(5.0), Some(6.0)],
],
);
let log2 = Normalize::log2(&matrix).expect("log2");
assert_eq!(log2.genes, matrix.genes);
assert_eq!(log2.samples, matrix.samples);
let zscore = Normalize::z_score_per_gene(&matrix).expect("z_score");
assert_eq!(zscore.genes, matrix.genes);
assert_eq!(zscore.samples, matrix.samples);
let quantile = Normalize::quantile(&matrix).expect("quantile");
assert_eq!(quantile.genes, matrix.genes);
assert_eq!(quantile.samples, matrix.samples);
}