1use std::iter::zip;
3
4use crate::{
5 canonical,
6 sliding_min::{Cache, sliding_lr_min_mapper_scalar, sliding_min_mapper_scalar},
7};
8
9use super::{
10 canonical::canonical_mapper_simd,
11 sliding_min::{sliding_lr_min_mapper_simd, sliding_min_mapper_simd},
12};
13use itertools::{Itertools, izip};
14use packed_seq::{Advance, ChunkIt, Delay, PaddedIt, Seq};
15use seq_hash::KmerHasher;
16use wide::u32x8;
17
18pub const SKIPPED: u32 = u32::MAX - 1;
19pub(crate) const SIMD_SKIPPED: u32x8 = u32x8::new([SKIPPED; 8]);
20
21pub fn one_minimizer<'s>(seq: impl Seq<'s>, hasher: &impl KmerHasher) -> usize {
23 hasher
24 .hash_kmers_scalar(seq)
25 .map(|x| x & 0xffff_0000)
26 .position_min()
27 .unwrap()
28}
29
30#[inline(always)]
38pub fn minimizers_seq_scalar<'s>(
39 seq: impl Seq<'s>,
40 hasher: &impl KmerHasher,
41 w: usize,
42 cache: &mut Cache,
43) -> impl ExactSizeIterator<Item = u32> {
44 let kmer_hashes = hasher.hash_kmers_scalar(seq);
45 let len = kmer_hashes.len();
46 kmer_hashes
47 .map(sliding_min_mapper_scalar::<true>(w, len, cache))
48 .advance(w - 1)
49}
50
51#[inline(always)]
55pub fn minimizers_seq_simd<'s>(
56 seq: impl Seq<'s>,
57 hasher: &impl KmerHasher,
58 w: usize,
59 cache: &mut Cache,
60) -> PaddedIt<impl ChunkIt<u32x8>> {
61 let kmer_hashes = hasher.hash_kmers_simd(seq, w);
62 let len = kmer_hashes.it.len();
63 kmer_hashes
64 .map(sliding_min_mapper_simd::<true>(w, len, cache))
65 .advance(w - 1)
66}
67
68#[inline(always)]
74pub fn canonical_minimizers_seq_scalar<'s>(
75 seq: impl Seq<'s>,
76 hasher: &impl KmerHasher,
77 w: usize,
78 cache: &mut Cache,
79) -> impl ExactSizeIterator<Item = u32> {
80 assert!(hasher.is_canonical());
82
83 let k = hasher.k();
84 let delay1 = hasher.delay().0;
85 let mut hash_mapper = hasher.in_out_mapper_scalar(seq);
86 let mut sliding_min_mapper = sliding_lr_min_mapper_scalar(w, seq.len(), cache);
87 let (Delay(delay2), mut canonical_mapper) = canonical::canonical_mapper_scalar(k + w - 1);
88
89 assert!(delay1 < k);
90 assert!(k - 1 <= delay2);
91 assert!(delay2 == k + w - 2);
92
93 let mut a = seq.iter_bp();
94 let mut rh = seq.iter_bp();
95 let rc = seq.iter_bp();
96
97 a.by_ref().take(delay1).for_each(|a| {
98 hash_mapper((a, 0));
99 canonical_mapper((a, 0));
100 });
101
102 zip(a.by_ref(), rh.by_ref())
103 .take((k - 1) - delay1)
104 .for_each(|(a, rh)| {
105 hash_mapper((a, rh));
106 canonical_mapper((a, 0));
107 });
108
109 zip(a.by_ref(), rh.by_ref())
110 .take(delay2 - (k - 1))
111 .for_each(|(a, rh)| {
112 let hash = hash_mapper((a, rh));
113 canonical_mapper((a, 0));
114 sliding_min_mapper(hash);
115 });
116
117 izip!(a, rh, rc).map(
118 #[inline(always)]
119 #[allow(clippy::let_and_return)]
120 move |(a, rh, rc)| {
121 let hash = hash_mapper((a, rh));
122 let canonical = canonical_mapper((a, rc));
123 let (left, right) = sliding_min_mapper(hash);
124 let x = if canonical { left } else { right };
126 x
127 },
128 )
129}
130
131#[inline(always)]
133pub fn canonical_minimizers_seq_simd<'s>(
134 seq: impl Seq<'s>,
135 hasher: &impl KmerHasher,
136 w: usize,
137 cache: &mut Cache,
138) -> PaddedIt<impl ChunkIt<u32x8>> {
139 assert!(hasher.is_canonical());
140
141 let k = hasher.k();
142 let l = k + w - 1;
143 let mut hash_mapper = hasher.in_out_mapper_simd(seq);
144 let (c_delay, mut canonical_mapper) = canonical_mapper_simd(l);
145
146 let mut padded_it = seq.par_iter_bp_delayed_2(l, hasher.delay(), c_delay);
147
148 padded_it.advance_with(k - 1, |(a, rh, rc)| {
150 hash_mapper((a, rh));
151 canonical_mapper((a, rc));
152 });
153 let mut sliding_min_mapper = sliding_lr_min_mapper_simd(w, padded_it.it.len(), cache);
154 padded_it.advance_with(w - 1, |(a, rh, rc)| {
155 let hash = hash_mapper((a, rh));
156 canonical_mapper((a, rc));
157 sliding_min_mapper(hash);
158 });
159
160 padded_it.map(move |(a, rh, rc)| {
161 let hash = hash_mapper((a, rh));
162 let canonical = canonical_mapper((a, rc));
163 let (lmin, rmin) = sliding_min_mapper(hash);
164 canonical.blend(lmin, rmin)
165 })
166}
167
168#[inline(always)]
169pub fn canonical_minimizers_skip_ambiguous_windows<'s>(
170 nseq: packed_seq::PackedNSeq<'s>,
171 hasher: &impl KmerHasher,
172 w: usize,
173 cache: &'s mut (Cache, Vec<u32x8>, Vec<u32x8>),
174) -> PaddedIt<impl ChunkIt<u32x8>> {
175 assert!(hasher.is_canonical());
176
177 let k = hasher.k();
178 let l = k + w - 1;
179 let mut hash_mapper = hasher.in_out_mapper_simd(nseq.seq);
180 let (c_delay, mut canonical_mapper) = canonical_mapper_simd(l);
181
182 let mut padded_it = nseq.seq.par_iter_bp_delayed_2_with_factor_and_buf(
183 l,
184 hasher.delay(),
185 c_delay,
186 2,
187 &mut cache.1,
188 );
189
190 padded_it.advance_with(k - 1, |(a, rh, rc)| {
192 hash_mapper((a, rh));
193 canonical_mapper((a, rc));
194 });
195 let mut sliding_min_mapper = sliding_lr_min_mapper_simd(w, padded_it.it.len(), &mut cache.0);
196 padded_it.advance_with(w - 1, |(a, rh, rc)| {
197 let hash = hash_mapper((a, rh));
198 canonical_mapper((a, rc));
199 sliding_min_mapper(hash);
200 });
201
202 padded_it
204 .zip(
205 nseq.ambiguous
206 .par_iter_kmer_ambiguity_with_buf(l, l, l - 1, &mut cache.2),
207 )
208 .map(move |((a, rh, rc), ambi)| {
209 let hash = hash_mapper((a, rh));
210 let canonical = canonical_mapper((a, rc));
211 let (lmin, rmin) = sliding_min_mapper(hash);
212 ambi.blend(SIMD_SKIPPED, canonical.blend(lmin, rmin))
213 })
214}