use crate::feat::Meros;
use crate::feat::{canonical_representation, char_to_value, fmix64 as murmur_hash3};
use crate::Base;
use crate::OptionPair;
use crate::BITS_PER_CHAR;
use std::collections::VecDeque;
#[inline]
fn to_candidate_lmer(meros: &Meros, lmer: u64) -> u64 {
let mut canonical_lmer = canonical_representation(lmer, meros.l_mer);
if meros.spaced_seed_mask > 0 {
canonical_lmer &= meros.spaced_seed_mask;
}
canonical_lmer ^ meros.toggle_mask
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_to_candidate_lmer() {
let meros = Meros::new(11, 3, Some(0), None, None);
let lmer = 0b11001100110011001100u64;
let candidate = to_candidate_lmer(&meros, lmer);
assert_eq!(candidate, 0b11110u64);
}
}
#[derive(Debug)]
pub struct MinimizerData {
pub pos: usize,
pub candidate_lmer: u64,
}
impl MinimizerData {
pub fn new(candidate_lmer: u64, pos: usize) -> Self {
Self {
candidate_lmer,
pos,
}
}
}
pub struct MinimizerWindow {
queue: VecDeque<MinimizerData>,
queue_pos: usize,
capacity: usize,
count: usize,
}
impl MinimizerWindow {
pub fn new(capacity: usize) -> Self {
Self {
queue: VecDeque::with_capacity(capacity),
capacity,
count: 0,
queue_pos: 0,
}
}
#[inline]
pub fn next(&mut self, candidate_lmer: u64) -> Option<u64> {
if self.capacity == 1 {
return Some(candidate_lmer);
}
let data = MinimizerData::new(candidate_lmer, self.count);
while let Some(m_data) = self.queue.back() {
if m_data.candidate_lmer > candidate_lmer {
self.queue.pop_back();
} else {
break;
}
}
let mut changed = false;
if (self.queue.is_empty() && self.count >= self.capacity) || self.count == self.capacity {
changed = true
}
self.queue.push_back(data);
while !self.queue.is_empty()
&& self.queue.front().map_or(false, |front| {
self.count >= self.capacity && front.pos < self.count - self.capacity
})
{
self.queue.pop_front();
changed = true;
}
self.count += 1;
if changed {
self.queue.front().map(|front| front.candidate_lmer)
} else {
None
}
}
fn clear(&mut self) {
self.count = 0;
self.queue_pos = 0;
self.queue.clear();
}
}
#[derive(Clone, Copy)]
pub struct Cursor {
pos: usize,
capacity: usize,
value: u64,
mask: u64,
}
impl Cursor {
pub fn new(meros: &Meros) -> Self {
Self {
pos: 0,
value: 0,
capacity: meros.l_mer,
mask: meros.mask,
}
}
fn next_lmer(&mut self, item: u64) -> Option<u64> {
self.value = ((self.value << BITS_PER_CHAR) | item) & self.mask;
self.pos += 1;
if self.pos >= self.capacity {
return Some(self.value);
}
None
}
#[inline]
fn clear(&mut self) {
self.pos = 0;
self.value = 0;
}
}
pub struct MinimizerIterator<'a> {
cursor: Cursor,
window: MinimizerWindow,
seq: &'a [u8],
meros: &'a Meros,
pos: usize,
end: usize,
pub size: usize,
}
impl<'a> MinimizerIterator<'a> {
pub fn new(seq: &'a [u8], cursor: Cursor, window: MinimizerWindow, meros: &'a Meros) -> Self {
MinimizerIterator {
cursor,
window,
seq,
meros,
pos: 0,
size: 0,
end: seq.len(),
}
}
fn clear_state(&mut self) {
self.cursor.clear();
self.window.clear();
}
pub fn seq_size(&self) -> usize {
self.end
}
}
impl<'a> Iterator for MinimizerIterator<'a> {
type Item = (usize, u64);
fn next(&mut self) -> Option<Self::Item> {
while self.pos < self.end {
let ch = self.seq[self.pos];
self.pos += 1;
if ch == b'\n' || ch == b'\r' {
continue;
} else {
let data = match char_to_value(ch) {
Some(code) => self.cursor.next_lmer(code).and_then(|lmer| {
let candidate_lmer = to_candidate_lmer(&self.meros, lmer);
self.window
.next(candidate_lmer)
.map(|minimizer| murmur_hash3(minimizer ^ self.meros.toggle_mask))
}),
None => {
self.clear_state();
None
}
};
if data.is_some() {
self.size += 1;
return Some((self.size, data.unwrap()));
}
}
}
None
}
}
impl<'a> Base<MinimizerIterator<'a>> {
pub fn seq_size_str(&self) -> OptionPair<String> {
self.body.apply(|m_iter| m_iter.seq_size().to_string())
}
pub fn fmt_seq_size(&self) -> String {
self.body
.reduce_str("|", |m_iter| m_iter.seq_size().to_string())
}
pub fn fmt_size(&self) -> String {
self.body.reduce_str("|", |m_iter| m_iter.size.to_string())
}
pub fn fold<F, T>(&mut self, mut f: F) -> Vec<T>
where
F: FnMut(&mut Vec<T>, &mut MinimizerIterator<'a>, usize) -> usize,
T: Clone,
{
let mut init = Vec::new();
match &mut self.body {
OptionPair::Single(m_iter) => {
f(&mut init, m_iter, 0);
}
OptionPair::Pair(m_iter1, m_iter2) => {
let offset = f(&mut init, m_iter1, 0);
f(&mut init, m_iter2, offset);
}
}
init
}
pub fn range(&self) -> OptionPair<(usize, usize)> {
match &self.body {
OptionPair::Single(m_iter) => OptionPair::Single((0, m_iter.size)),
OptionPair::Pair(m_iter1, m_iter2) => {
let size1 = m_iter1.size;
OptionPair::Pair((0, size1), (size1, m_iter2.size + size1))
}
}
}
}
pub fn scan_sequence<'a>(
sequence: &'a Base<Vec<u8>>,
meros: &'a Meros,
) -> Base<MinimizerIterator<'a>> {
let func = |seq: &'a Vec<u8>| {
let cursor = Cursor::new(meros);
let window = MinimizerWindow::new(meros.window_size());
MinimizerIterator::new(seq, cursor, window, meros)
};
match &sequence.body {
OptionPair::Pair(seq1, seq2) => Base::new(
sequence.header.clone(),
OptionPair::Pair(func(&seq1), func(&seq2)),
),
OptionPair::Single(seq1) => {
Base::new(sequence.header.clone(), OptionPair::Single(func(&seq1)))
}
}
}