use crate::kmer::{Kmer, KmerMode};
use ragc_common::Contig;
pub fn enumerate_kmers(contig: &Contig, k: usize) -> Vec<u64> {
let mut vec = Vec::new();
if contig.len() < k {
return vec;
}
vec.reserve(contig.len() + 1 - k);
let mut kmer = Kmer::new(k as u32, KmerMode::Canonical);
for &base in contig {
if base > 3 {
kmer.reset();
} else {
kmer.insert(base as u64);
if kmer.is_full() {
vec.push(kmer.data());
}
}
}
vec
}
pub fn remove_non_singletons(vec: &mut Vec<u64>, virtual_begin: usize) {
let mut curr_end = virtual_begin;
let mut i = virtual_begin;
while i < vec.len() {
let mut j = i + 1;
while j < vec.len() && vec[i] == vec[j] {
j += 1;
}
if i + 1 == j {
vec[curr_end] = vec[i];
curr_end += 1;
}
i = j;
}
vec.truncate(curr_end);
}
pub fn remove_non_singletons_with_duplicates(
vec: &mut Vec<u64>,
duplicated: &mut Vec<u64>,
virtual_begin: usize,
) {
duplicated.clear();
let mut curr_end = virtual_begin;
let mut i = virtual_begin;
while i < vec.len() {
let mut j = i + 1;
while j < vec.len() && vec[i] == vec[j] {
j += 1;
}
if i + 1 == j {
vec[curr_end] = vec[i];
curr_end += 1;
} else {
duplicated.push(vec[i]);
}
i = j;
}
vec.truncate(curr_end);
}
pub fn find_candidate_kmers(contig: &Contig, k: usize) -> Vec<u64> {
let mut kmers = enumerate_kmers(contig, k);
kmers.sort_unstable();
remove_non_singletons(&mut kmers, 0);
kmers
}
fn set_difference_sorted(left: &[u64], right: &[u64]) -> Vec<u64> {
let mut result = Vec::new();
let mut i = 0;
let mut j = 0;
while i < left.len() && j < right.len() {
if left[i] < right[j] {
result.push(left[i]);
i += 1;
} else if left[i] > right[j] {
j += 1;
} else {
i += 1;
j += 1;
}
}
while i < left.len() {
result.push(left[i]);
i += 1;
}
result
}
pub fn find_new_splitters_kmers(
contig: &[u8],
k: u32,
candidate_kmers: &[u64], candidate_kmers_offset: usize,
duplicated_kmers: &[u64], ) -> Vec<u64> {
let contig_vec = contig.to_vec();
let mut v_contig_kmers = enumerate_kmers(&contig_vec, k as usize);
v_contig_kmers.sort_unstable();
remove_non_singletons(&mut v_contig_kmers, 0);
let ref_singletons = &candidate_kmers[candidate_kmers_offset..];
let v_tmp = set_difference_sorted(&v_contig_kmers, ref_singletons);
let result = set_difference_sorted(&v_tmp, duplicated_kmers);
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_enumerate_kmers_simple() {
let contig = vec![0, 1, 2, 3]; let kmers = enumerate_kmers(&contig, 3);
assert_eq!(kmers.len(), 2);
}
#[test]
fn test_enumerate_kmers_with_n() {
let contig = vec![0, 1, 4, 2, 3]; let kmers = enumerate_kmers(&contig, 3);
assert_eq!(kmers.len(), 0);
}
#[test]
fn test_enumerate_kmers_short_sequence() {
let contig = vec![0, 1]; let kmers = enumerate_kmers(&contig, 3);
assert_eq!(kmers.len(), 0);
}
#[test]
fn test_remove_non_singletons() {
let mut vec = vec![1, 2, 2, 3, 3, 3, 4, 5, 5, 6];
remove_non_singletons(&mut vec, 0);
assert_eq!(vec, vec![1, 4, 6]);
}
#[test]
fn test_remove_non_singletons_with_virtual_begin() {
let mut vec = vec![1, 1, 2, 3, 3, 4, 5, 5];
remove_non_singletons(&mut vec, 2);
assert_eq!(vec, vec![1, 1, 2, 4]);
}
#[test]
fn test_find_candidate_kmers() {
let contig = vec![0, 1, 2, 3, 0, 1, 2, 3];
let candidates = find_candidate_kmers(&contig, 3);
assert_eq!(candidates.len(), 0);
}
}