use std::borrow::Cow;
use std::cmp::Ordering;
use super::super::QualFilter;
use super::bit_encoding::*;
use super::nthash::NtHashIterator;
#[derive(Debug)]
pub struct SplitKmer<'a, IntT> {
k: usize,
upper_mask: IntT,
lower_mask: IntT,
seq: Cow<'a, [u8]>,
seq_len: usize,
qual: Option<&'a [u8]>,
qual_filter: QualFilter,
min_qual: u8,
index: usize,
upper: IntT,
lower: IntT,
middle_base: u8,
rc: bool,
rc_upper: IntT,
rc_lower: IntT,
rc_middle_base: u8,
hash_gen: Option<NtHashIterator>,
}
impl<'a, IntT: for<'b> UInt<'b>> SplitKmer<'a, IntT> {
#[inline(always)]
fn valid_qual(idx: usize, qual: Option<&'a [u8]>, min_qual: u8) -> bool {
match qual {
Some(qual_seq) => (qual_seq[idx] - 33) > min_qual, None => true,
}
}
#[allow(clippy::too_many_arguments)]
fn build(
seq: &[u8],
seq_len: usize,
qual: Option<&'a [u8]>,
k: usize,
idx: &mut usize,
qual_filter: &QualFilter,
min_qual: u8,
is_reads: bool,
rc: bool,
) -> Option<(IntT, IntT, u8, Option<NtHashIterator>)> {
if *idx + k >= seq_len {
return None;
}
let mut upper = IntT::zero_init();
let mut lower = IntT::zero_init();
let mut middle_base: u8 = 0;
let middle_idx = k.div_ceil(2) - 1;
let mut i = 0;
while i < k {
if valid_base(seq[i + *idx])
&& (*qual_filter != QualFilter::Strict
|| Self::valid_qual(i + *idx, qual, min_qual))
{
let next_base = encode_base(seq[i + *idx]);
match i.cmp(&middle_idx) {
Ordering::Greater => {
lower <<= 2;
lower |= IntT::from_encoded_base(next_base);
}
Ordering::Less => {
upper <<= 2;
upper |= IntT::from_encoded_base(next_base) << (middle_idx * 2);
}
Ordering::Equal => {
middle_base = next_base;
}
}
i += 1;
} else {
*idx += i + 1;
if *idx + k >= seq_len {
return None;
}
upper = IntT::zero_init();
lower = IntT::zero_init();
middle_base = 0;
i = 0;
}
}
let hash_gen = if is_reads {
Some(NtHashIterator::new(&seq[*idx..(*idx + k)], k, rc))
} else {
None
};
*idx += k - 1;
Some((upper, lower, middle_base, hash_gen))
}
pub fn self_palindrome(&mut self) -> bool {
self.rc && self.upper == self.rc_upper && self.lower == self.rc_lower
}
fn update_rc(&mut self) {
self.rc_upper = self.lower.rev_comp(self.k - 1) & self.upper_mask;
self.rc_middle_base = rc_base(self.middle_base);
self.rc_lower = self.upper.rev_comp(self.k - 1) & self.lower_mask;
}
fn roll_fwd(&mut self) -> bool {
let mut success = false;
self.index += 1;
if self.index >= self.seq_len {
return success;
}
let base = self.seq[self.index];
if !valid_base(base)
|| (self.qual_filter == QualFilter::Strict
&& !Self::valid_qual(self.index, self.qual, self.min_qual))
{
let new_kmer = Self::build(
&self.seq,
self.seq_len,
self.qual,
self.k,
&mut self.index,
&self.qual_filter,
self.min_qual,
self.hash_gen.is_some(),
self.rc,
);
if let Some(kmer_tuple) = new_kmer {
(self.upper, self.lower, self.middle_base, self.hash_gen) = kmer_tuple;
if self.rc {
self.update_rc();
}
success = true;
}
} else {
let half_k: usize = (self.k - 1) / 2;
let new_base = encode_base(base);
if let Some(ref mut roll_hash) = self.hash_gen {
let old_base = self.upper >> ((self.k - 2) * 2);
roll_hash.roll_fwd(old_base.as_u8(), new_base);
}
self.upper = (self.upper << 2
| (IntT::from_encoded_base(self.middle_base) << (half_k * 2)))
& self.upper_mask;
self.middle_base = (self.lower >> (2 * (half_k - 1))).as_u8();
self.lower =
((self.lower << 2) | (IntT::from_encoded_base(new_base))) & self.lower_mask;
if self.rc {
self.rc_lower = (self.rc_lower >> 2
| ((IntT::from_encoded_base(self.rc_middle_base)) << (2 * (half_k - 1))))
& self.lower_mask;
self.rc_middle_base = rc_base(self.middle_base);
self.rc_upper = (self.rc_upper >> 2
| (IntT::from_encoded_base(rc_base(new_base))) << (2 * ((half_k * 2) - 1)))
& self.upper_mask;
}
success = true;
}
success
}
#[allow(clippy::too_many_arguments)]
pub fn new(
seq: Cow<'a, [u8]>,
seq_len: usize,
qual: Option<&'a [u8]>,
k: usize,
rc: bool,
min_qual: u8,
qual_filter: QualFilter,
is_reads: bool,
) -> Option<Self> {
let mut index = 0;
let first_kmer = Self::build(
&seq,
seq_len,
qual,
k,
&mut index,
&qual_filter,
min_qual,
is_reads,
rc,
);
if let Some((upper, lower, middle_base, hash_gen)) = first_kmer {
let (lower_mask, upper_mask) = IntT::generate_masks(k);
let mut split_kmer = Self {
k,
upper_mask,
lower_mask,
seq_len,
seq,
qual,
qual_filter,
min_qual,
upper,
lower,
middle_base,
rc,
rc_upper: IntT::zero_init(),
rc_lower: IntT::zero_init(),
rc_middle_base: 0,
index,
hash_gen,
};
if rc {
split_kmer.update_rc();
}
Some(split_kmer)
} else {
None
}
}
pub fn get_curr_kmer(&self) -> (IntT, u8, bool) {
let split_kmer = self.upper | self.lower;
if self.rc {
let rc_split_kmer = self.rc_upper | self.rc_lower;
if split_kmer > rc_split_kmer {
return (rc_split_kmer, self.rc_middle_base, true);
}
}
(split_kmer, self.middle_base, false)
}
pub fn get_hash(&self) -> u64 {
self.hash_gen
.as_ref()
.expect("Trying to get unitialised hash")
.curr_hash()
}
pub fn get_next_kmer(&mut self) -> Option<(IntT, u8, bool)> {
let next = self.roll_fwd();
match next {
true => Some(self.get_curr_kmer()),
false => None,
}
}
pub fn get_middle_pos(&self) -> usize {
let middle_idx = self.k.div_ceil(2) - 1;
self.index - middle_idx
}
pub fn middle_base_qual(&self) -> bool {
if self.qual.is_some() {
match self.qual_filter {
QualFilter::Middle | QualFilter::Strict => {
Self::valid_qual(self.get_middle_pos(), self.qual, self.min_qual)
}
QualFilter::NoFilter => true,
}
} else {
true
}
}
}