use std::iter::zip;
use crate::{
canonical,
sliding_min::{Cache, sliding_lr_min_mapper_scalar, sliding_min_mapper_scalar},
};
use super::{
canonical::canonical_mapper_simd,
sliding_min::{sliding_lr_min_mapper_simd, sliding_min_mapper_simd},
};
use itertools::{Itertools, izip};
use packed_seq::{Advance, ChunkIt, Delay, PaddedIt, Seq};
use seq_hash::KmerHasher;
use wide::u32x8;
pub const SKIPPED: u32 = u32::MAX - 1;
pub(crate) const SIMD_SKIPPED: u32x8 = u32x8::new([SKIPPED; 8]);
pub fn one_minimizer<'s>(seq: impl Seq<'s>, hasher: &impl KmerHasher) -> usize {
hasher
.hash_kmers_scalar(seq)
.map(|x| x & 0xffff_0000)
.position_min()
.unwrap()
}
#[inline(always)]
pub fn minimizers_seq_scalar<'s>(
seq: impl Seq<'s>,
hasher: &impl KmerHasher,
w: usize,
cache: &mut Cache,
) -> impl ExactSizeIterator<Item = u32> {
let kmer_hashes = hasher.hash_kmers_scalar(seq);
let len = kmer_hashes.len();
kmer_hashes
.map(sliding_min_mapper_scalar::<true>(w, len, cache))
.advance(w - 1)
}
#[inline(always)]
pub fn minimizers_seq_simd<'s>(
seq: impl Seq<'s>,
hasher: &impl KmerHasher,
w: usize,
cache: &mut Cache,
) -> PaddedIt<impl ChunkIt<u32x8>> {
let kmer_hashes = hasher.hash_kmers_simd(seq, w);
let len = kmer_hashes.it.len();
kmer_hashes
.map(sliding_min_mapper_simd::<true>(w, len, cache))
.advance(w - 1)
}
#[inline(always)]
pub fn canonical_minimizers_seq_scalar<'s>(
seq: impl Seq<'s>,
hasher: &impl KmerHasher,
w: usize,
cache: &mut Cache,
) -> impl ExactSizeIterator<Item = u32> {
assert!(hasher.is_canonical());
let k = hasher.k();
let delay1 = hasher.delay().0;
let mut hash_mapper = hasher.in_out_mapper_scalar(seq);
let mut sliding_min_mapper = sliding_lr_min_mapper_scalar(w, seq.len(), cache);
let (Delay(delay2), mut canonical_mapper) = canonical::canonical_mapper_scalar(k + w - 1);
assert!(delay1 < k);
assert!(k - 1 <= delay2);
assert!(delay2 == k + w - 2);
let mut a = seq.iter_bp();
let mut rh = seq.iter_bp();
let rc = seq.iter_bp();
a.by_ref().take(delay1).for_each(|a| {
hash_mapper((a, 0));
canonical_mapper((a, 0));
});
zip(a.by_ref(), rh.by_ref())
.take((k - 1) - delay1)
.for_each(|(a, rh)| {
hash_mapper((a, rh));
canonical_mapper((a, 0));
});
zip(a.by_ref(), rh.by_ref())
.take(delay2 - (k - 1))
.for_each(|(a, rh)| {
let hash = hash_mapper((a, rh));
canonical_mapper((a, 0));
sliding_min_mapper(hash);
});
izip!(a, rh, rc).map(
#[inline(always)]
#[allow(clippy::let_and_return)]
move |(a, rh, rc)| {
let hash = hash_mapper((a, rh));
let canonical = canonical_mapper((a, rc));
let (left, right) = sliding_min_mapper(hash);
let x = if canonical { left } else { right };
x
},
)
}
#[inline(always)]
pub fn canonical_minimizers_seq_simd<'s>(
seq: impl Seq<'s>,
hasher: &impl KmerHasher,
w: usize,
cache: &mut Cache,
) -> PaddedIt<impl ChunkIt<u32x8>> {
assert!(hasher.is_canonical());
let k = hasher.k();
let l = k + w - 1;
let mut hash_mapper = hasher.in_out_mapper_simd(seq);
let (c_delay, mut canonical_mapper) = canonical_mapper_simd(l);
let mut padded_it = seq.par_iter_bp_delayed_2(l, hasher.delay(), c_delay);
padded_it.advance_with(k - 1, |(a, rh, rc)| {
hash_mapper((a, rh));
canonical_mapper((a, rc));
});
let mut sliding_min_mapper = sliding_lr_min_mapper_simd(w, padded_it.it.len(), cache);
padded_it.advance_with(w - 1, |(a, rh, rc)| {
let hash = hash_mapper((a, rh));
canonical_mapper((a, rc));
sliding_min_mapper(hash);
});
padded_it.map(move |(a, rh, rc)| {
let hash = hash_mapper((a, rh));
let canonical = canonical_mapper((a, rc));
let (lmin, rmin) = sliding_min_mapper(hash);
canonical.blend(lmin, rmin)
})
}
#[inline(always)]
pub fn canonical_minimizers_skip_ambiguous_windows<'s>(
nseq: packed_seq::PackedNSeq<'s>,
hasher: &impl KmerHasher,
w: usize,
cache: &'s mut (Cache, Vec<u32x8>, Vec<u32x8>),
) -> PaddedIt<impl ChunkIt<u32x8>> {
assert!(hasher.is_canonical());
let k = hasher.k();
let l = k + w - 1;
let mut hash_mapper = hasher.in_out_mapper_simd(nseq.seq);
let (c_delay, mut canonical_mapper) = canonical_mapper_simd(l);
let mut padded_it = nseq.seq.par_iter_bp_delayed_2_with_factor_and_buf(
l,
hasher.delay(),
c_delay,
2,
&mut cache.1,
);
padded_it.advance_with(k - 1, |(a, rh, rc)| {
hash_mapper((a, rh));
canonical_mapper((a, rc));
});
let mut sliding_min_mapper = sliding_lr_min_mapper_simd(w, padded_it.it.len(), &mut cache.0);
padded_it.advance_with(w - 1, |(a, rh, rc)| {
let hash = hash_mapper((a, rh));
canonical_mapper((a, rc));
sliding_min_mapper(hash);
});
padded_it
.zip(
nseq.ambiguous
.par_iter_kmer_ambiguity_with_buf(l, l, l - 1, &mut cache.2),
)
.map(move |((a, rh, rc), ambi)| {
let hash = hash_mapper((a, rh));
let canonical = canonical_mapper((a, rc));
let (lmin, rmin) = sliding_min_mapper(hash);
ambi.blend(SIMD_SKIPPED, canonical.blend(lmin, rmin))
})
}