use std::borrow::Borrow;
use itertools::Itertools;
use ndarray::prelude::*;
mod dnamotif;
pub mod errors;
mod protmotif;
pub use self::dnamotif::DNAMotif;
pub use self::errors::{Error, Result};
pub use self::protmotif::ProtMotif;
pub const DEF_PSEUDO: f32 = 0.5;
pub const EPSILON: f32 = 1e-5;
pub const INVALID_MONO: u8 = 255;
#[derive(Clone, PartialEq, PartialOrd, Debug, Serialize, Deserialize)]
pub struct ScoredPos {
pub loc: usize,
pub sum: f32,
pub scores: Vec<f32>,
}
impl Default for ScoredPos {
fn default() -> ScoredPos {
ScoredPos {
loc: 0,
sum: f32::NEG_INFINITY,
scores: Vec::new(),
}
}
}
pub trait Motif {
const LK: [u8; 127] = [INVALID_MONO; 127];
const MONOS: &'static [u8] = b"";
const MONO_CT: usize = 0;
fn seqs_to_weights(seqs: &[Vec<u8>], _pseudos: Option<&[f32]>) -> Result<Array2<f32>> {
if _pseudos.is_some() {
if _pseudos.unwrap().len() != Self::MONO_CT {
return Err(Error::InvalidPseudos {
expected: Self::MONO_CT as u8,
received: _pseudos.unwrap().len() as u8,
});
}
}
let pseudos: Array1<f32> = match _pseudos {
Some(p) => Array::from_vec(p.iter().cloned().collect()),
None => Array::from_vec(vec![DEF_PSEUDO; Self::MONO_CT]),
};
if seqs.is_empty() {
return Err(Error::EmptyMotif);
}
let seqlen = seqs[0].len();
let mut counts = Array2::zeros((seqlen, Self::MONO_CT));
for mut row in counts.axis_iter_mut(Axis(0)) {
row += &pseudos;
}
for seq in seqs {
if seq.len() != seqlen {
return Err(Error::InconsistentLen);
}
for (mut ctr, base) in counts.axis_iter_mut(Axis(0)).zip(seq.iter()) {
ctr += &Self::incr(*base)?;
}
}
Ok(counts)
}
fn lookup(mono: u8) -> Result<usize> {
if mono >= 127 {
Err(Error::InvalidMonomer { mono })
} else {
let idx = Self::LK[mono as usize];
if idx == INVALID_MONO {
Err(Error::InvalidMonomer { mono })
} else {
Ok(idx as usize)
}
}
}
fn incr(mono: u8) -> Result<Array1<f32>>;
fn rev_lk(idx: usize) -> u8;
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0usize
}
fn degenerate_consensus(&self) -> Vec<u8>;
fn get_scores(&self) -> &Array2<f32>;
fn get_min_score(&self) -> f32;
fn get_max_score(&self) -> f32;
fn get_bits() -> f32;
fn raw_score<C, T>(&self, seq_it: T) -> Result<(usize, f32, Vec<f32>)>
where
C: Borrow<u8>,
T: IntoIterator<Item = C>,
{
let pssm_len = self.len();
let mut best_start = 0;
let mut best_score = -1.0;
let mut best_m = Vec::new();
let seq = seq_it.into_iter().map(|c| *c.borrow()).collect_vec();
let scores = self.get_scores();
for start in 0..=seq.len() - pssm_len {
let m: Vec<f32> = match (0..pssm_len)
.map(|i| match Self::lookup(seq[start + i]) {
Err(e) => Err(e),
Ok(pos) => Ok(scores[[i, pos]]),
})
.collect()
{
Ok(m) => m,
Err(e) => return Err(e),
};
let tot = m.iter().sum();
if tot > best_score {
best_score = tot;
best_start = start;
best_m = m;
}
}
Ok((best_start, best_score, best_m))
}
fn score<C, T>(&self, seq_it: T) -> Result<ScoredPos>
where
C: Borrow<u8>,
T: IntoIterator<Item = C>,
{
let pssm_len = self.len();
let seq = seq_it.into_iter().map(|c| *c.borrow()).collect_vec();
if seq.len() < pssm_len {
return Err(Error::QueryTooShort {
motif_len: pssm_len,
query_len: seq.len(),
});
}
let min_score = self.get_min_score();
let max_score = self.get_max_score();
if abs_diff_eq!(max_score, min_score) {
return Err(Error::NullMotif);
}
let (best_start, best_score, best_m) = self.raw_score(&seq)?;
Ok(ScoredPos {
loc: best_start,
sum: (best_score - min_score) / (max_score - min_score),
scores: best_m,
})
}
fn info_content(&self) -> f32 {
fn ent<'a, I>(probs: I) -> f32
where
I: Iterator<Item = &'a f32>,
{
probs
.map(|p| {
if *p == 0.0 {
0.0
} else {
-1.0 * *p * p.log(2.0)
}
})
.sum()
}
let bits = Self::get_bits();
let scores = self.get_scores();
let mut tot = 0.0;
for row in scores.rows() {
tot += bits - ent(row.iter());
}
tot
}
}