1use crate::error::DryIceError;
4use simd_minimizers::packed_seq::{PackedSeqVec, SeqVec};
5
6pub trait RecordKey: Ord + Sized {
19 const WIDTH: u16;
21
22 const TYPE_TAG: [u8; 16];
24
25 fn encode_into(&self, out: &mut [u8]);
31
32 fn decode_from(bytes: &[u8]) -> Result<Self, DryIceError>;
38}
39
40pub trait KmerKey: RecordKey {
59 const K: u8;
61}
62
63#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
65pub struct NoRecordKey;
66
67#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
69pub struct Bytes8Key(pub [u8; 8]);
70
71impl From<[u8; 8]> for Bytes8Key {
72 fn from(value: [u8; 8]) -> Self {
73 Self(value)
74 }
75}
76
77impl RecordKey for Bytes8Key {
78 const WIDTH: u16 = 8;
79 const TYPE_TAG: [u8; 16] = *b"dryi:bytes8:key!";
80
81 fn encode_into(&self, out: &mut [u8]) {
82 debug_assert_eq!(out.len(), usize::from(Self::WIDTH));
83 out.copy_from_slice(&self.0);
84 }
85
86 fn decode_from(bytes: &[u8]) -> Result<Self, DryIceError> {
87 let arr: [u8; 8] = bytes
88 .try_into()
89 .map_err(|_| DryIceError::InvalidRecordKeyEncoding {
90 message: "invalid bytes8 key length",
91 })?;
92 Ok(Self(arr))
93 }
94}
95
96#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
98pub struct Bytes16Key(pub [u8; 16]);
99
100impl From<[u8; 16]> for Bytes16Key {
101 fn from(value: [u8; 16]) -> Self {
102 Self(value)
103 }
104}
105
106impl RecordKey for Bytes16Key {
107 const WIDTH: u16 = 16;
108 const TYPE_TAG: [u8; 16] = *b"dryi:bytes16:key";
109
110 fn encode_into(&self, out: &mut [u8]) {
111 debug_assert_eq!(out.len(), usize::from(Self::WIDTH));
112 out.copy_from_slice(&self.0);
113 }
114
115 fn decode_from(bytes: &[u8]) -> Result<Self, DryIceError> {
116 let arr: [u8; 16] =
117 bytes
118 .try_into()
119 .map_err(|_| DryIceError::InvalidRecordKeyEncoding {
120 message: "invalid bytes16 key length",
121 })?;
122 Ok(Self(arr))
123 }
124}
125
126#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
136pub struct PrefixKmer64<const K: u8>(pub u64);
137
138impl<const K: u8> KmerKey for PrefixKmer64<K> {
139 const K: u8 = K;
140}
141
142impl<const K: u8> RecordKey for PrefixKmer64<K> {
143 const WIDTH: u16 = 8;
144 const TYPE_TAG: [u8; 16] = *b"dryi:kmer:pref64";
145
146 fn encode_into(&self, out: &mut [u8]) {
147 debug_assert_eq!(out.len(), usize::from(Self::WIDTH));
148 out.copy_from_slice(&self.0.to_le_bytes());
149 }
150
151 fn decode_from(bytes: &[u8]) -> Result<Self, DryIceError> {
152 let arr: [u8; 8] = bytes
153 .try_into()
154 .map_err(|_| DryIceError::InvalidRecordKeyEncoding {
155 message: "invalid prefix kmer64 key length",
156 })?;
157 Ok(Self(u64::from_le_bytes(arr)))
158 }
159}
160
161impl<const K: u8> PrefixKmer64<K> {
162 const ASSERT_VALID: () = {
163 assert!(K > 0, "PrefixKmer64 requires K > 0");
164 assert!(K <= 32, "PrefixKmer64 requires K <= 32");
165 };
166
167 pub fn try_from_sequence(seq: &[u8]) -> Result<Option<Self>, DryIceError> {
178 let () = Self::ASSERT_VALID;
179
180 if seq.len() < usize::from(K) {
181 return Ok(None);
182 }
183
184 let prefix = &seq[..usize::from(K)];
185 let mut forward = 0u64;
186 let mut revcomp = 0u64;
187
188 for &base in prefix {
189 let bits = match base {
190 b'A' | b'a' => 0u64,
191 b'C' | b'c' => 1u64,
192 b'G' | b'g' => 2u64,
193 b'T' | b't' => 3u64,
194 _ => return Ok(None),
195 };
196 forward = (forward << 2) | bits;
197 }
198
199 for &base in prefix.iter().rev() {
200 let bits = match base {
201 b'A' | b'a' => 0u64,
202 b'C' | b'c' => 1u64,
203 b'G' | b'g' => 2u64,
204 b'T' | b't' => 3u64,
205 _ => return Ok(None),
206 };
207 revcomp = (revcomp << 2) | (3 - bits);
208 }
209
210 Ok(Some(Self(forward.min(revcomp))))
211 }
212}
213
214#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
226pub struct Minimizer64<const K: u8, const W: u8>(pub u64);
227
228impl<const K: u8, const W: u8> KmerKey for Minimizer64<K, W> {
229 const K: u8 = K;
230}
231
232impl<const K: u8, const W: u8> RecordKey for Minimizer64<K, W> {
233 const WIDTH: u16 = 8;
234 const TYPE_TAG: [u8; 16] = *b"dryi:kmer:mini64";
235
236 fn encode_into(&self, out: &mut [u8]) {
237 debug_assert_eq!(out.len(), usize::from(Self::WIDTH));
238 out.copy_from_slice(&self.0.to_le_bytes());
239 }
240
241 fn decode_from(bytes: &[u8]) -> Result<Self, DryIceError> {
242 let arr: [u8; 8] = bytes
243 .try_into()
244 .map_err(|_| DryIceError::InvalidRecordKeyEncoding {
245 message: "invalid minimizer64 key length",
246 })?;
247 Ok(Self(u64::from_le_bytes(arr)))
248 }
249}
250
251impl<const K: u8, const W: u8> Minimizer64<K, W> {
252 const ASSERT_VALID: () = {
253 assert!(K > 0, "Minimizer64 requires K > 0");
254 assert!(K <= 32, "Minimizer64 requires K <= 32");
255 assert!(W > 0, "Minimizer64 requires W > 0");
256 };
257
258 pub fn try_from_sequence(seq: &[u8]) -> Result<Option<Self>, DryIceError> {
270 let () = Self::ASSERT_VALID;
271
272 let l = usize::from(K) + usize::from(W) - 1;
273 if seq.len() < l {
274 return Ok(None);
275 }
276 if !seq
277 .iter()
278 .all(|base| matches!(base, b'A' | b'a' | b'C' | b'c' | b'G' | b'g' | b'T' | b't'))
279 {
280 return Ok(None);
281 }
282
283 let packed = PackedSeqVec::from_ascii(seq);
284 let mut positions = Vec::new();
285 let values: Vec<u64> =
286 simd_minimizers::canonical_minimizers(usize::from(K), usize::from(W))
287 .run(packed.as_slice(), &mut positions)
288 .values_u64()
289 .collect();
290
291 Ok(values.into_iter().min().map(Self))
292 }
293}