use crate::shared::sequence::{nucleotides_inv, NUCLEOTIDES};
use crate::shared::utils::normalize_transition_matrix;
use crate::shared::Dna;
use anyhow::{anyhow, Result};
use ndarray::{Array1, Array2, Axis};
use rand::Rng;
use rand_distr::{Distribution, Uniform, WeightedAliasIndex};
#[derive(Clone, Debug)]
pub struct DiscreteDistribution {
distribution: WeightedAliasIndex<f64>,
}
impl DiscreteDistribution {
pub fn new(weights: Vec<f64>) -> Result<Self> {
if !weights.iter().all(|&x| x >= 0.) {
return Err(anyhow!(
"Error when creating distribution: negative weights"
))?;
}
let distribution = match weights.iter().sum::<f64>().abs() < 1e-10 {
true => WeightedAliasIndex::new(vec![1.; weights.len()]) .map_err(|e| anyhow!(format!("Error when creating distribution: {}", e)))?,
false => WeightedAliasIndex::new(weights)
.map_err(|e| anyhow!(format!("Error when creating distribution: {}", e)))?
};
Ok(DiscreteDistribution { distribution })
}
pub fn generate<R: Rng>(&mut self, rng: &mut R) -> usize {
self.distribution.sample(rng)
}
}
impl Default for DiscreteDistribution {
fn default() -> Self {
DiscreteDistribution {
distribution: WeightedAliasIndex::new(vec![1.]).unwrap(),
}
}
}
#[derive(Clone, Debug)]
pub struct ErrorDistribution {
pub is_error: Uniform<f64>,
pub nucleotide: Uniform<usize>,
}
impl Default for ErrorDistribution {
fn default() -> Self {
ErrorDistribution {
is_error: Uniform::new(0.0, 1.0),
nucleotide: Uniform::new_inclusive(0, 3),
}
}
}
#[derive(Default, Clone, Debug)]
pub struct MarkovDNA {
transition_matrix: Vec<DiscreteDistribution>, }
impl MarkovDNA {
pub fn new(transition_probs: Array2<f64>) -> Result<Self> {
let mut transition_matrix = Vec::with_capacity(transition_probs.dim().0);
for probs in transition_probs.axis_iter(Axis(0)) {
transition_matrix.push(DiscreteDistribution::new(probs.to_vec())?);
}
Ok(MarkovDNA { transition_matrix })
}
pub fn generate<R: Rng>(&mut self, length: usize, previous_nucleotide: u8, rng: &mut R) -> Dna {
let mut dna = Dna {
seq: Vec::with_capacity(length),
};
let mut current_state = nucleotides_inv(previous_nucleotide);
for _ in 0..length {
current_state = self.transition_matrix[current_state].generate(rng);
dna.seq.push(NUCLEOTIDES[current_state]);
}
dna
}
}
pub fn calc_steady_state_dist(transition_matrix: &Array2<f64>) -> Result<Vec<f64>> {
let mat = normalize_transition_matrix(transition_matrix)?;
let epsilon: f64 = 1e-10;
if mat.sum() == 0.0 {
return Ok(vec![0.; mat.dim().0]);
}
let n = mat.nrows();
let mut vec = Array1::from_elem(n, 1.0 / n as f64);
for _ in 0..10000 {
let vec_next = mat.dot(&vec);
let norm = vec_next.sum();
let vec_next = vec_next / norm;
if (&vec_next - &vec).mapv(|a| a.abs()).sum() < epsilon {
return Ok(vec_next.to_vec());
}
vec = vec_next;
}
Err(anyhow!("No suitable eigenvector found"))?
}