use crate::error::DryIceError;
use simd_minimizers::packed_seq::{PackedSeqVec, SeqVec};
pub trait RecordKey: Ord + Sized {
const WIDTH: u16;
const TYPE_TAG: [u8; 16];
fn encode_into(&self, out: &mut [u8]);
fn decode_from(bytes: &[u8]) -> Result<Self, DryIceError>;
}
pub trait KmerKey: RecordKey {
const K: u8;
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct NoRecordKey;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Bytes8Key(pub [u8; 8]);
impl From<[u8; 8]> for Bytes8Key {
fn from(value: [u8; 8]) -> Self {
Self(value)
}
}
impl RecordKey for Bytes8Key {
const WIDTH: u16 = 8;
const TYPE_TAG: [u8; 16] = *b"dryi:bytes8:key!";
fn encode_into(&self, out: &mut [u8]) {
debug_assert_eq!(out.len(), usize::from(Self::WIDTH));
out.copy_from_slice(&self.0);
}
fn decode_from(bytes: &[u8]) -> Result<Self, DryIceError> {
let arr: [u8; 8] = bytes
.try_into()
.map_err(|_| DryIceError::InvalidRecordKeyEncoding {
message: "invalid bytes8 key length",
})?;
Ok(Self(arr))
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Bytes16Key(pub [u8; 16]);
impl From<[u8; 16]> for Bytes16Key {
fn from(value: [u8; 16]) -> Self {
Self(value)
}
}
impl RecordKey for Bytes16Key {
const WIDTH: u16 = 16;
const TYPE_TAG: [u8; 16] = *b"dryi:bytes16:key";
fn encode_into(&self, out: &mut [u8]) {
debug_assert_eq!(out.len(), usize::from(Self::WIDTH));
out.copy_from_slice(&self.0);
}
fn decode_from(bytes: &[u8]) -> Result<Self, DryIceError> {
let arr: [u8; 16] =
bytes
.try_into()
.map_err(|_| DryIceError::InvalidRecordKeyEncoding {
message: "invalid bytes16 key length",
})?;
Ok(Self(arr))
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct PrefixKmer64<const K: u8>(pub u64);
impl<const K: u8> KmerKey for PrefixKmer64<K> {
const K: u8 = K;
}
impl<const K: u8> RecordKey for PrefixKmer64<K> {
const WIDTH: u16 = 8;
const TYPE_TAG: [u8; 16] = *b"dryi:kmer:pref64";
fn encode_into(&self, out: &mut [u8]) {
debug_assert_eq!(out.len(), usize::from(Self::WIDTH));
out.copy_from_slice(&self.0.to_le_bytes());
}
fn decode_from(bytes: &[u8]) -> Result<Self, DryIceError> {
let arr: [u8; 8] = bytes
.try_into()
.map_err(|_| DryIceError::InvalidRecordKeyEncoding {
message: "invalid prefix kmer64 key length",
})?;
Ok(Self(u64::from_le_bytes(arr)))
}
}
impl<const K: u8> PrefixKmer64<K> {
const ASSERT_VALID: () = {
assert!(K > 0, "PrefixKmer64 requires K > 0");
assert!(K <= 32, "PrefixKmer64 requires K <= 32");
};
pub fn try_from_sequence(seq: &[u8]) -> Result<Option<Self>, DryIceError> {
let () = Self::ASSERT_VALID;
if seq.len() < usize::from(K) {
return Ok(None);
}
let prefix = &seq[..usize::from(K)];
let mut forward = 0u64;
let mut revcomp = 0u64;
for &base in prefix {
let bits = match base {
b'A' | b'a' => 0u64,
b'C' | b'c' => 1u64,
b'G' | b'g' => 2u64,
b'T' | b't' => 3u64,
_ => return Ok(None),
};
forward = (forward << 2) | bits;
}
for &base in prefix.iter().rev() {
let bits = match base {
b'A' | b'a' => 0u64,
b'C' | b'c' => 1u64,
b'G' | b'g' => 2u64,
b'T' | b't' => 3u64,
_ => return Ok(None),
};
revcomp = (revcomp << 2) | (3 - bits);
}
Ok(Some(Self(forward.min(revcomp))))
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Minimizer64<const K: u8, const W: u8>(pub u64);
impl<const K: u8, const W: u8> KmerKey for Minimizer64<K, W> {
const K: u8 = K;
}
impl<const K: u8, const W: u8> RecordKey for Minimizer64<K, W> {
const WIDTH: u16 = 8;
const TYPE_TAG: [u8; 16] = *b"dryi:kmer:mini64";
fn encode_into(&self, out: &mut [u8]) {
debug_assert_eq!(out.len(), usize::from(Self::WIDTH));
out.copy_from_slice(&self.0.to_le_bytes());
}
fn decode_from(bytes: &[u8]) -> Result<Self, DryIceError> {
let arr: [u8; 8] = bytes
.try_into()
.map_err(|_| DryIceError::InvalidRecordKeyEncoding {
message: "invalid minimizer64 key length",
})?;
Ok(Self(u64::from_le_bytes(arr)))
}
}
impl<const K: u8, const W: u8> Minimizer64<K, W> {
const ASSERT_VALID: () = {
assert!(K > 0, "Minimizer64 requires K > 0");
assert!(K <= 32, "Minimizer64 requires K <= 32");
assert!(W > 0, "Minimizer64 requires W > 0");
};
pub fn try_from_sequence(seq: &[u8]) -> Result<Option<Self>, DryIceError> {
let () = Self::ASSERT_VALID;
let l = usize::from(K) + usize::from(W) - 1;
if seq.len() < l {
return Ok(None);
}
if !seq
.iter()
.all(|base| matches!(base, b'A' | b'a' | b'C' | b'c' | b'G' | b'g' | b'T' | b't'))
{
return Ok(None);
}
let packed = PackedSeqVec::from_ascii(seq);
let mut positions = Vec::new();
let values: Vec<u64> =
simd_minimizers::canonical_minimizers(usize::from(K), usize::from(W))
.run(packed.as_slice(), &mut positions)
.values_u64()
.collect();
Ok(values.into_iter().min().map(Self))
}
}