pub const LOWER_QUANTILE: f32 = 0.15;
pub const UPPER_QUANTILE: f32 = 0.65;
pub struct EstimateResult {
pub lower: Option<f32>,
pub estimate: Option<f32>,
pub upper: Option<f32>,
pub no_mapping_count: u32,
}
pub trait Estimate {
fn generate_estimates(&mut self) -> crate::Result<(Vec<f32>, u32)>;
fn estimate(
&mut self,
finite: bool,
lower_quant: Option<f32>,
upper_quant: Option<f32>,
) -> crate::Result<EstimateResult> {
let (estimates, no_mapping_count) = self.generate_estimates()?;
let iter: Box<dyn Iterator<Item = f32>> = if finite {
Box::new(estimates.iter().filter(|&x| x.is_finite()).copied())
} else {
Box::new(estimates.iter().copied())
};
let (lower, median, upper) = median(iter, lower_quant, upper_quant);
Ok(EstimateResult {
lower,
estimate: median,
upper,
no_mapping_count,
})
}
}
fn median(
iter: impl Iterator<Item = f32>,
lower_quant: Option<f32>,
upper_quant: Option<f32>,
) -> (Option<f32>, Option<f32>, Option<f32>) {
let mut values: Vec<f32> = iter.collect();
let len = values.len();
if len == 0 {
return (None, None, None);
}
values.sort_by(|a, b| a.partial_cmp(b).unwrap());
let mut quantiles = vec![0.5];
if let Some(lower) = lower_quant {
quantiles.push(lower);
}
if let Some(upper) = upper_quant {
quantiles.push(upper);
}
let quantiles: Vec<_> = quantiles
.iter()
.map(|&q| calculate_quantile(&values, q))
.collect();
match (lower_quant, upper_quant) {
(Some(_), Some(_)) => (quantiles[1], quantiles[0], quantiles[2]),
(Some(_), None) => (quantiles[1], quantiles[0], None),
(None, Some(_)) => (quantiles[0], quantiles[1], quantiles[2]),
(None, None) => (None, quantiles[0], None),
}
}
fn calculate_quantile(data: &[f32], quantile: f32) -> Option<f32> {
if data.is_empty() {
return None;
}
if !(0.0..=1.0).contains(&quantile) {
panic!("Quantile must be between 0.0 and 1.0");
}
let n = data.len();
let pos = quantile * (n - 1) as f32;
let idx = pos.floor() as usize;
let frac = pos - idx as f32;
if idx + 1 < n {
Some(data[idx] * (1.0 - frac) + data[idx + 1] * frac)
} else {
Some(data[idx])
}
}
pub(crate) fn per_read_estimate(
read_len: usize,
avg_target_len: f32,
n_target_reads: usize,
n_ovlaps: usize,
ovlap_thresh: u32,
) -> f32 {
if n_ovlaps == 0 {
return f32::INFINITY;
}
let ovlap_ratio: f32 = n_target_reads as f32 / n_ovlaps as f32;
read_len as f32
+ ovlap_ratio * (read_len as f32 + avg_target_len - 2.0 * ovlap_thresh as f32 + 1.0)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_median_odd_length() {
let data = vec![1.0f32, 3.0, 5.0, 7.0, 9.0];
assert_eq!(
median(data.into_iter(), None, None),
(None, Some(5.0), None)
);
}
#[test]
fn test_median_unsorted() {
let data = vec![3.0f32, 1.0, 7.0, 5.0, 9.0];
assert_eq!(
median(data.into_iter(), None, None),
(None, Some(5.0), None)
);
}
#[test]
fn test_median_even_length() {
let data = vec![1.0f32, 3.0, 5.0, 7.0];
assert_eq!(
median(data.into_iter(), None, None),
(None, Some(4.0), None)
);
}
#[test]
fn test_median_single_element() {
let data = vec![10.0f32];
assert_eq!(
median(data.into_iter(), None, None),
(None, Some(10.0), None)
);
}
#[test]
fn test_median_empty() {
let data: Vec<f32> = vec![];
assert_eq!(median(data.into_iter(), None, None), (None, None, None));
}
#[test]
fn test_median_with_negative_numbers() {
let data = vec![-3.0f32, 1.0, 0.0, 3.0, -1.0];
assert_eq!(
median(data.into_iter(), None, None),
(None, Some(0.0), None)
);
}
#[test]
fn test_median_with_positive_infinity() {
let data = vec![1.0f32, 2.0, 3.0, f32::INFINITY];
assert_eq!(
median(data.into_iter(), None, None),
(None, Some(2.5), None)
);
}
#[test]
fn test_median_with_negative_infinity() {
let data = vec![f32::NEG_INFINITY, 1.0, 2.0, 3.0];
assert_eq!(
median(data.into_iter(), None, None),
(None, Some(1.5), None)
);
}
#[test]
fn test_median_with_both_infinities() {
let data = vec![f32::NEG_INFINITY, 1.0, 2.0, f32::INFINITY];
assert_eq!(
median(data.into_iter(), None, None),
(None, Some(1.5), None)
);
}
#[test]
fn test_median_with_only_infinity() {
let data = vec![f32::INFINITY, f32::INFINITY];
assert_eq!(
median(data.into_iter(), None, None),
(None, Some(f32::INFINITY), None)
);
}
#[test]
fn test_median_with_only_negative_infinity() {
let data = vec![f32::NEG_INFINITY, f32::NEG_INFINITY];
assert_eq!(
median(data.into_iter(), None, None),
(None, Some(f32::NEG_INFINITY), None)
);
}
#[test]
fn test_median_with_inf_and_regular_values() {
let data = vec![-1.0, f32::NEG_INFINITY, 0.0, 1.0, f32::INFINITY];
assert_eq!(
median(data.into_iter(), None, None),
(None, Some(0.0), None)
);
}
#[test]
fn test_median_with_quantiles() {
let data = vec![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
assert_eq!(
median(data.into_iter(), Some(0.15), Some(0.65)),
(Some(2.35), Some(5.5), Some(6.85))
);
}
#[test]
fn test_calculate_quantile_with_infinity_in_quantile() {
let data = vec![
1.0f32,
2.0,
3.0,
4.0,
5.0,
6.0,
f32::INFINITY,
f32::INFINITY,
f32::INFINITY,
f32::INFINITY,
];
assert_eq!(
median(data.into_iter(), Some(0.15), Some(0.65)),
(Some(2.35), Some(5.5), Some(f32::INFINITY))
);
}
#[test]
#[should_panic(expected = "Quantile must be between 0.0 and 1.0")]
fn test_calculate_quantile_panics() {
let data = vec![1.0f32, 2.0, 3.0, 4.0, 5.0];
calculate_quantile(&data, 1.1);
}
#[test]
fn test_per_read_estimate() {
let read_len = 100;
let avg_target_len = 200.0;
let n_target_reads = 1000;
let n_ovlaps = 100;
let ovlap_thresh = 10;
let expected = 2910.0;
assert_eq!(
per_read_estimate(
read_len,
avg_target_len,
n_target_reads,
n_ovlaps,
ovlap_thresh
),
expected
);
}
#[test]
fn test_per_read_estimate_zero_ovlaps() {
let read_len = 100;
let avg_target_len = 200.0;
let n_target_reads = 1000;
let n_ovlaps = 0;
let ovlap_thresh = 10;
let expected = f32::INFINITY;
assert_eq!(
per_read_estimate(
read_len,
avg_target_len,
n_target_reads,
n_ovlaps,
ovlap_thresh
),
expected
);
}
}