use core::f64;
use crate::data_structs::typedef::CountType;
use crate::prelude::BsxBatch;
pub trait SegmentationData {
fn cost_function(
&self,
r: usize,
t: usize,
) -> f64;
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
}
pub struct MethDataBinom {
count_m_cumsum: Vec<u32>,
count_total_cumsum: Vec<u32>,
}
impl From<&BsxBatch> for MethDataBinom {
fn from(value: &BsxBatch) -> Self {
assert_eq!(
value.count_m().null_count(),
0,
"'count_m' column cannot contain nulls"
);
assert_eq!(
value.count_total().null_count(),
0,
"'count_total' column cannot contain nulls"
);
let (count_m, count_total) = unsafe {
(
value
.count_m()
.to_vec_null_aware()
.left()
.unwrap_unchecked(),
value
.count_total()
.to_vec_null_aware()
.left()
.unwrap_unchecked(),
)
};
Self::new(&count_m, &count_total)
}
}
impl MethDataBinom {
pub fn new(
count_m: &[CountType],
count_total: &[CountType],
) -> Self {
assert_eq!(
count_m.len(),
count_total.len(),
"count_m and count_total must have the same length"
);
let count_m_cumsum = count_m.iter().fold(vec![0], |mut acc, new| {
acc.push(acc.last().unwrap() + *new as u32);
acc
});
let count_total_cumsum = count_total.iter().fold(vec![0], |mut acc, new| {
acc.push(acc.last().unwrap() + *new as u32);
acc
});
Self {
count_m_cumsum,
count_total_cumsum,
}
}
}
impl SegmentationData for MethDataBinom {
fn cost_function(
&self,
r: usize,
t: usize,
) -> f64 {
let m = self.count_m_cumsum[t + 1] - self.count_m_cumsum[r];
let n = self.count_total_cumsum[t + 1] - self.count_total_cumsum[r];
if n == 0 {
return 0.0; }
let p = m as f64 / n as f64;
if p == 0.0 || p == 1.0 {
return 0.0;
}
-2.0 * (m as f64 * p.ln() + (n - m) as f64 * (1.0 - p).ln())
}
fn len(&self) -> usize {
self.count_m_cumsum.len() - 1
}
}
pub fn pelt<T: SegmentationData>(
data: &T,
beta: f64,
min_size: usize,
) -> (Vec<usize>, f64) {
let n = data.len();
#[allow(non_snake_case)]
let mut F = vec![f64::INFINITY; n + 1];
let mut prev = vec![-1; n + 1];
let mut candidate_set = vec![0];
F[0] = -beta;
for t in 0..n {
let mut best_cost = f64::INFINITY;
let mut best_r = -1;
for r in candidate_set.iter() {
if (t + 1 - *r) >= min_size {
let c = F[*r] + data.cost_function(*r, t) + beta;
if c < best_cost {
best_cost = c;
best_r = *r as i32;
}
}
}
F[t + 1] = best_cost;
prev[t + 1] = best_r;
candidate_set.retain(|r| {
F[*r] + data.cost_function(*r, t) <= F[t + 1]
});
candidate_set.push(t + 1); }
let mut cps = Vec::new();
let mut t = n as i32;
while t > 0 {
let r = prev[t as usize]; if r < 0 {
break; }
cps.push(r as usize); t = r; }
cps.retain(|&x| x > 0);
cps.sort();
(cps, F[n])
}
pub enum SegmentAlgorithm {
Pelt(Option<f64>, usize),
}