1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
use crate::codec::{Codec, ParseBioErr};
use crate::{Seq, SeqSlice};
use bitvec::prelude::*;
use core::fmt;
use core::hash::{Hash, Hasher};
use core::marker::PhantomData;
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Kmer<C: Codec, const K: usize> {
pub _p: PhantomData<C>,
pub bs: usize,
}
impl<A: Codec, const K: usize> From<usize> for Kmer<A, K> {
fn from(i: usize) -> Kmer<A, K> {
Kmer {
_p: PhantomData,
bs: i,
}
}
}
impl<A: Codec, const K: usize> From<&Kmer<A, K>> for usize {
fn from(kmer: &Kmer<A, K>) -> usize {
kmer.bs
}
}
impl<A: Codec, const K: usize> From<Kmer<A, K>> for usize {
fn from(kmer: Kmer<A, K>) -> usize {
kmer.bs
}
}
impl<A: Codec, const K: usize> fmt::Display for Kmer<A, K> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut s = String::new();
for chunk in BitArray::<usize, Lsb0>::from(self.bs)[..K * A::WIDTH as usize]
.chunks(A::WIDTH as usize)
{
s.push_str(
&A::unsafe_from_bits(chunk.load::<u8>())
.to_char()
.to_string(),
);
}
write!(f, "{}", s,)
}
}
impl<A: Codec, const K: usize> Hash for Kmer<A, K> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.bs.hash(state);
K.hash(state);
}
}
impl<A: Codec, const K: usize> TryFrom<Seq<A>> for Kmer<A, K> {
type Error = ParseBioErr;
fn try_from(seq: Seq<A>) -> Result<Self, Self::Error> {
if seq.len() != K {
Err(ParseBioErr)
} else {
Ok(Kmer::<A, K>::from(&seq[0..K]))
}
}
}
impl<A: Codec, const K: usize> From<&SeqSlice<A>> for Kmer<A, K> {
fn from(slice: &SeqSlice<A>) -> Self {
assert_eq!(K, slice.len());
Kmer {
_p: PhantomData,
bs: slice.into(),
}
}
}
#[cfg(test)]
mod tests {
use crate::codec::amino::Amino;
use crate::codec::dna::Dna;
use crate::Seq;
use core::str::FromStr;
#[test]
fn kmer_to_usize() {
for (kmer, index) in dna!("AACTT").kmers::<2>().zip([0, 4, 13, 15]) {
assert_eq!(index as usize, (&kmer).into());
}
}
#[test]
fn amino_kmer_to_usize() {
for (kmer, index) in amino!("SRY")
.kmers::<2>()
.zip([0b001000_011000, 0b010011_001000])
{
assert_eq!(index as usize, (&kmer).into());
}
}
#[test]
fn amino_kmer_iter() {
for (kmer, target) in amino!("SSLMNHKKL")
.kmers::<3>()
.zip(["SSL", "SLM", "LMN", "MNH", "NHK", "HKK", "KKL"])
{
assert_eq!(format!("{}", kmer), target);
}
}
}