bio_seq/
kmer.rs

1// Copyright 2021-2024 Jeff Knaggs
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6//! Encoded sequences of static length
7//!
8//! Generally, the underlying storage type of `Kmer` should lend itself to optimisation. The default `Kmer` instance is packed into a `usize`, which can be efficiently `Copy`ed on the stack.
9//!
10//! `k * codec::BITS` must fit in the storage type, e.g. `usize` (64 bits).
11//!
12//! ```
13//! use bio_seq::prelude::*;
14//!
15//! for (amino_kmer, amino_string) in Seq::<Amino>::try_from("SSLMNHKKL").unwrap()
16//!         .kmers::<3>()
17//!         .zip(["SSL", "SLM", "LMN", "MNH", "NHK", "HKK", "KKL"])
18//!     {
19//!         assert_eq!(amino_kmer, amino_string);
20//!     }
21//! ```
22//!
23//! Kmers can be copied from other sequence types:
24//!
25//! ```
26//! # use bio_seq::prelude::*;
27//! let kmer: Kmer<Dna, 8> = dna!("AGTTGGCA").try_into().unwrap();
28//! ```
29
30// permit truncations that may happen on 32-bit platforms which are unsupported anyway
31#![allow(clippy::cast_possible_truncation)]
32
33use crate::Bs;
34use crate::codec::{self, Codec};
35use crate::prelude::ParseBioError;
36use crate::seq::{Seq, SeqArray, SeqSlice};
37use crate::{
38    Complement, ComplementMut, Reverse, ReverseComplement, ReverseComplementMut, ReverseMut,
39};
40use bitvec::field::BitField;
41use bitvec::view::BitView;
42use core::fmt;
43use core::hash::{Hash, Hasher};
44use core::marker::PhantomData;
45use core::ops::Deref;
46use core::ptr;
47use core::str::FromStr;
48
49//#[cfg(target_feature(enable = "avx2,bmi2"))]
50//pub mod avx2;
51
52//#[cfg(target_arch = "wasm32")]
53//pub mod wasm;
54
55#[cfg(target_pointer_width = "64")]
56pub(crate) mod integral64;
57
58#[cfg(target_pointer_width = "32")]
59pub(crate) mod integral32;
60
61#[cfg(feature = "serde")]
62use serde_derive::{Deserialize, Serialize};
63
64const fn make_2bit_table() -> [u8; 256] {
65    let mut table = [0u8; 256];
66    let mut i: usize = 0;
67    while i < 256 {
68        let b0: u8 = (i as u8 & 0b11_00_00_00) >> 6;
69        let b1: u8 = (i as u8 & 0b00_11_00_00) >> 2;
70        let b2: u8 = (i as u8 & 0b00_00_11_00) << 2;
71        let b3: u8 = (i as u8 & 0b00_00_00_11) << 6;
72
73        table[i] = b3 | b2 | b1 | b0;
74        i += 1;
75    }
76    table
77}
78
79const REV_2BIT: [u8; 256] = make_2bit_table();
80
81pub(crate) mod sealed {
82    use crate::Bs;
83
84    pub trait KmerStorage: Copy + Clone + PartialEq + std::fmt::Debug {
85        const BITS: usize;
86        type BaN: AsRef<Bs> + AsMut<Bs>;
87
88        fn to_bitarray(self) -> Self::BaN;
89        fn from_bitslice(bs: &Bs) -> Self;
90
91        //        fn rotate_left(self, n: u32) -> Self;
92        //        fn rotate_right(self, n: u32) -> Self;
93
94        fn shiftr(&mut self, n: u32);
95
96        fn shiftl(&mut self, n: u32);
97
98        fn mask(&mut self, bits: usize);
99
100        fn complement(&mut self, mask: usize);
101        fn rev_blocks_2(&mut self);
102    }
103}
104
105pub trait KmerStorage: sealed::KmerStorage {}
106
107impl KmerStorage for usize {}
108
109impl KmerStorage for u64 {}
110
111impl KmerStorage for u128 {}
112
113/// By default k-mers are backed by `usize` and `Codec::BITS` * `K` must be <= 64 on 64-bit platforms
114#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Copy, Clone)]
115#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
116#[repr(transparent)]
117pub struct Kmer<C: Codec, const K: usize, S: KmerStorage = usize> {
118    pub _p: PhantomData<C>,
119    pub bs: S,
120}
121
122impl<A: Codec, const K: usize, S: KmerStorage> Kmer<A, K, S> {
123    // This error message can be formatted with constants in nightly (const_format)
124    const _ASSERT_K: () = assert!(
125        K * A::BITS as usize <= S::BITS,
126        "`KmerStorage` not large enough for `Kmer`",
127    );
128
129    const _ASSERT_K_NONZERO: () = assert!(K > 0, "`K` must be greater than 0");
130
131    const BITS: usize = K * A::BITS as usize;
132
133    pub fn len(&self) -> usize {
134        K
135    }
136
137    pub fn is_empty(&self) -> bool {
138        // This is recommended by clippy since we have `len`
139        // Kmers are never empty if K > 0
140        false
141    }
142
143    pub fn rotated_left(&self, n: u32) -> Self {
144        let n: usize = (n as usize % K) * A::BITS as usize;
145        let mut ba = self.bs.to_bitarray();
146        let bs: &mut Bs = ba.as_mut();
147        bs[..Self::BITS].rotate_left(n);
148
149        Kmer {
150            _p: PhantomData,
151            bs: S::from_bitslice(&bs[..Self::BITS]),
152        }
153    }
154
155    pub fn rotated_right(&self, n: u32) -> Self {
156        let n: usize = (n as usize % K) * A::BITS as usize;
157        let mut ba = self.bs.to_bitarray();
158        let bs: &mut Bs = ba.as_mut();
159        bs[..Self::BITS].rotate_right(n);
160
161        Kmer {
162            _p: PhantomData,
163            bs: S::from_bitslice(&bs[..Self::BITS]),
164        }
165    }
166
167    /// Shift bases to the right and push a base onto the end.
168    ///
169    /// ```
170    /// use bio_seq::prelude::*;
171    /// use bio_seq::codec::dna::Dna;
172    ///
173    /// let k = kmer!("ACGAT");
174    /// assert_eq!(k.pushr(Dna::T).to_string(), "CGATT");
175    /// ```
176    pub fn pushr(self, base: A) -> Self {
177        let mut ba = self.rotated_left(1).bs.to_bitarray();
178        let bs: &mut Bs = ba.as_mut();
179
180        let start = Self::BITS - A::BITS as usize;
181        let end = start + A::BITS as usize;
182
183        bs[start..end].store(base.to_bits());
184
185        Kmer {
186            _p: PhantomData,
187            bs: S::from_bitslice(bs),
188        }
189    }
190
191    /// Push a base from the left
192    pub fn pushl(self, base: A) -> Self {
193        let mut ba = self.rotated_right(1).bs.to_bitarray();
194        let bs: &mut Bs = ba.as_mut();
195
196        bs[..A::BITS as usize].store(base.to_bits());
197
198        Kmer {
199            _p: PhantomData,
200            bs: S::from_bitslice(bs),
201        }
202    }
203
204    /// Create Kmer from sequence without checking length
205    pub fn unsafe_from_seqslice(seq: &SeqSlice<A>) -> Self {
206        debug_assert!(K == seq.len(), "K != seq.len()");
207        Kmer {
208            _p: PhantomData,
209            bs: S::from_bitslice(&seq.bs),
210        }
211    }
212
213    fn complement(&mut self) {
214        self.bs.complement(K * A::BITS as usize);
215    }
216
217    fn rev_blocks_2(&mut self) {
218        // TODO: assert K == 2
219        self.bs.rev_blocks_2();
220        self.bs.shiftr((S::BITS - (A::BITS as usize * K)) as u32);
221    }
222}
223
224impl<A: Codec, const K: usize> From<usize> for Kmer<A, K, usize> {
225    fn from(i: usize) -> Kmer<A, K, usize> {
226        Kmer {
227            _p: PhantomData,
228            bs: i,
229        }
230    }
231}
232
233impl<A: Codec, const K: usize> From<u64> for Kmer<A, K, u64> {
234    fn from(i: u64) -> Kmer<A, K, u64> {
235        Kmer {
236            _p: PhantomData,
237            bs: i,
238        }
239    }
240}
241
242impl<A: Codec, const K: usize> From<usize> for Kmer<A, K, u64> {
243    fn from(i: usize) -> Kmer<A, K, u64> {
244        Kmer {
245            _p: PhantomData,
246            bs: i as u64,
247        }
248    }
249}
250
251/*
252impl<A: Codec, const K: usize, S: KmerStorage> From<&SeqSlice<A>> for Kmer<A, K, S> {
253    fn from(seq: &SeqSlice<A>) -> Self {
254        Kmer {
255            _p: PhantomData,
256            bs: S::from_bitslice(&seq.bs),
257        }
258    }
259}
260*/
261
262impl<S: KmerStorage + Into<usize>, A: Codec, const K: usize> From<&Kmer<A, K, S>> for usize {
263    fn from(kmer: &Kmer<A, K, S>) -> usize {
264        kmer.bs.into()
265    }
266}
267
268impl<A: Codec, const K: usize> Deref for Kmer<A, K, usize> {
269    type Target = SeqSlice<A>;
270
271    fn deref(&self) -> &Self::Target {
272        let bs: &Bs = &self.bs.view_bits()[0..(K * A::BITS as usize)];
273        let bs: *const Bs = ptr::from_ref::<Bs>(bs);
274        unsafe { &*(bs as *const SeqSlice<A>) }
275    }
276}
277
278impl<A: Codec, const K: usize> AsRef<SeqSlice<A>> for Kmer<A, K, usize> {
279    fn as_ref(&self) -> &SeqSlice<A> {
280        self
281    }
282}
283
284impl<A: Codec, const K: usize, S: KmerStorage> fmt::Display for Kmer<A, K, S> {
285    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
286        let mut s = String::new();
287        let ba = self.bs.to_bitarray();
288        let bs: &Bs = &ba.as_ref()[0..(K * A::BITS as usize)];
289
290        bs.chunks(A::BITS as usize).for_each(|chunk| {
291            s.push(A::unsafe_from_bits(chunk.load_le::<u8>()).to_char());
292        });
293        write!(f, "{s}")
294    }
295}
296
297/// An iterator over all kmers of a sequence with a specified length
298pub struct KmerIter<'a, A: Codec, const K: usize> {
299    pub slice: &'a SeqSlice<A>,
300    pub index: usize,
301    pub len: usize,
302    pub _p: PhantomData<A>,
303}
304
305impl<A: Codec, const K: usize, S: KmerStorage> Kmer<A, K, S> {
306    fn unsafe_from(seq: &SeqSlice<A>) -> Self {
307        debug_assert!(K == seq.len(), "K != seq.len()");
308        Kmer {
309            _p: PhantomData,
310            bs: S::from_bitslice(&seq.bs),
311        }
312    }
313}
314
315impl<A: Codec, const K: usize> Iterator for KmerIter<'_, A, K> {
316    type Item = Kmer<A, K>;
317    fn next(&mut self) -> Option<Kmer<A, K>> {
318        let i = self.index;
319        if self.index + K > self.len {
320            return None;
321        }
322        self.index += 1;
323        Some(Kmer::<A, K>::unsafe_from(&self.slice[i..i + K]))
324    }
325}
326
327/// ```
328/// use bio_seq::prelude::*;
329/// use std::hash::{Hash, Hasher, DefaultHasher};
330///
331/// let mut hasher1 = DefaultHasher::new();
332/// kmer!("AAA").hash(&mut hasher1);
333/// let hash1 = hasher1.finish();
334///
335/// let mut hasher2 = DefaultHasher::new();
336/// kmer!("AAAA").hash(&mut hasher2);
337/// let hash2 = hasher2.finish();
338///
339/// assert_ne!(hash1, hash2);
340/// ```
341impl<A: Codec, const K: usize, S: KmerStorage> Hash for Kmer<A, K, S> {
342    fn hash<H: Hasher>(&self, state: &mut H) {
343        let ba = self.bs.to_bitarray();
344        let bs: &Bs = ba.as_ref();
345        bs.hash(state);
346        K.hash(state);
347    }
348}
349
350impl<A: Codec, const K: usize, S: KmerStorage> TryFrom<&SeqSlice<A>> for Kmer<A, K, S> {
351    type Error = ParseBioError;
352
353    fn try_from(seq: &SeqSlice<A>) -> Result<Self, Self::Error> {
354        if seq.len() == K {
355            Ok(Kmer::<A, K, S>::unsafe_from(&seq[0..K]))
356        } else {
357            Err(ParseBioError::MismatchedLength(K, seq.len()))
358        }
359    }
360}
361
362impl<A: Codec, const K: usize> TryFrom<Seq<A>> for Kmer<A, K> {
363    type Error = ParseBioError;
364
365    fn try_from(seq: Seq<A>) -> Result<Self, Self::Error> {
366        Self::try_from(seq.as_ref())
367    }
368}
369
370impl<A: Codec, const K: usize, S: KmerStorage> PartialEq<SeqArray<A, K, 1>> for Kmer<A, K, S> {
371    fn eq(&self, seq: &SeqArray<A, K, 1>) -> bool {
372        if seq.len() != K {
373            return false;
374        }
375        &Kmer::<A, K, S>::unsafe_from(seq.as_ref()) == self
376    }
377}
378
379impl<A: Codec, const K: usize, S: KmerStorage> PartialEq<&SeqArray<A, K, 1>> for Kmer<A, K, S> {
380    fn eq(&self, seq: &&SeqArray<A, K, 1>) -> bool {
381        if seq.len() != K {
382            return false;
383        }
384        &Kmer::<A, K, S>::unsafe_from(seq.as_ref()) == self
385    }
386}
387
388impl<A: Codec, const K: usize> PartialEq<Seq<A>> for Kmer<A, K> {
389    fn eq(&self, seq: &Seq<A>) -> bool {
390        if seq.len() != K {
391            return false;
392        }
393        &Kmer::<A, K>::unsafe_from(seq.as_ref()) == self
394    }
395}
396
397impl<A: Codec, const K: usize, S: KmerStorage> PartialEq<SeqSlice<A>> for Kmer<A, K, S> {
398    fn eq(&self, seq: &SeqSlice<A>) -> bool {
399        if seq.len() != K {
400            return false;
401        }
402        &Kmer::<A, K, S>::unsafe_from(seq) == self
403    }
404}
405
406impl<A: Codec, const K: usize, S: KmerStorage> PartialEq<&SeqSlice<A>> for Kmer<A, K, S> {
407    fn eq(&self, seq: &&SeqSlice<A>) -> bool {
408        if seq.len() != K {
409            return false;
410        }
411        &Kmer::<A, K, S>::unsafe_from(seq) == self
412    }
413}
414
415impl<A: Codec, const K: usize> PartialEq<&str> for Kmer<A, K> {
416    fn eq(&self, seq: &&str) -> bool {
417        &self.to_string() == seq
418    }
419}
420
421impl<A: Codec, const K: usize, S: KmerStorage> FromStr for Kmer<A, K, S> {
422    type Err = ParseBioError;
423
424    fn from_str(s: &str) -> Result<Self, Self::Err> {
425        if s.len() != K {
426            return Err(ParseBioError::MismatchedLength(K, s.len()));
427        }
428        let seq: Seq<A> = Seq::from_str(s)?;
429        Kmer::<A, K, S>::try_from(seq.as_ref())
430    }
431}
432
433impl<A: Codec, const K: usize> From<Kmer<A, K, usize>> for Seq<A> {
434    fn from(kmer: Kmer<A, K, usize>) -> Self {
435        let mut seq: Seq<A> = Seq::with_capacity(K);
436        seq.extend(kmer.iter());
437        seq
438    }
439}
440
441impl<const K: usize> ComplementMut for Kmer<codec::dna::Dna, K, usize> {
442    fn comp(&mut self) {
443        self.complement();
444    }
445}
446
447impl<const K: usize> Complement for Kmer<codec::dna::Dna, K, usize> {}
448
449impl<A: Codec, const K: usize> ReverseMut for Kmer<A, K, usize> {
450    fn rev(&mut self) {
451        self.rev_blocks_2();
452    }
453}
454
455impl<A: Codec, const K: usize> Reverse for Kmer<A, K, usize> {}
456
457impl<const K: usize> ReverseComplementMut for Kmer<codec::dna::Dna, K, usize> {}
458
459impl<const K: usize> ReverseComplement for Kmer<codec::dna::Dna, K, usize> {}
460
461/// Convenient compile time kmer constructor
462///
463/// This is a wrapper for the `dna!` macro that returns a `Kmer`:
464/// ```
465/// # use bio_seq::prelude::*;
466/// let kmer: Kmer<Dna, 8> = kmer!("ACGTACGT");
467/// ```
468#[macro_export]
469macro_rules! kmer {
470    ($seq:expr) => {
471        Kmer::<Dna, { $seq.len() }>::unsafe_from_seqslice(dna!($seq))
472    };
473    ($seq:expr, $storage:ty) => {
474        Kmer::<Dna, { $seq.len() }, $storage>::unsafe_from_seqslice(dna!($seq))
475    };
476}
477
478#[cfg(test)]
479mod tests {
480    use crate::prelude::*;
481    use crate::seq::SeqArray;
482
483    #[test]
484    fn kmer_to_usize() {
485        let s: &'static SeqSlice<Dna> = dna!("AACTT");
486        println!("{}", s.to_string());
487
488        for (kmer, index) in s.kmers::<2>().zip([0b00_00, 0b01_00, 0b11_01, 0b11_11]) {
489            println!("{kmer}");
490            assert_eq!(index as usize, (&kmer).into());
491        }
492    }
493    #[test]
494    fn pushl_test() {
495        let k = kmer!("ACGT");
496        let k1 = k.pushl(Dna::G);
497        let k2 = k1.pushl(Dna::A);
498        let k3 = k2.pushl(Dna::T);
499        let k4 = k3.pushl(Dna::C);
500        let k5 = k4.pushl(Dna::C);
501
502        assert_eq!(k1, kmer!("GACG"));
503        assert_eq!(k2, kmer!("AGAC"));
504        assert_eq!(k3, kmer!("TAGA"));
505        assert_eq!(k4, kmer!("CTAG"));
506        assert_eq!(k5, kmer!("CCTA"));
507    }
508    #[test]
509    fn pushr_test() {
510        let k = kmer!("ACGT");
511        let k1 = k.pushr(Dna::G);
512        let k2 = k1.pushr(Dna::A);
513        let k3 = k2.pushr(Dna::T);
514        let k4 = k3.pushr(Dna::C);
515        let k5 = k4.pushr(Dna::C);
516
517        println!("{}", k1);
518        assert_eq!(k1, kmer!("CGTG"));
519        assert_eq!(k2, kmer!("GTGA"));
520        assert_eq!(k3, kmer!("TGAT"));
521        assert_eq!(k4, kmer!("GATC"));
522        assert_eq!(k5, kmer!("ATCC"));
523    }
524
525    #[test]
526    fn amino_kmer_to_usize() {
527        for (kmer, index) in Seq::<Amino>::try_from("SRY")
528            .unwrap()
529            .kmers::<2>()
530            .zip([0b001000_011000, 0b010011_001000])
531        {
532            assert_eq!(index as usize, usize::from(&kmer));
533        }
534    }
535    #[test]
536    fn big_kmer_shiftr() {
537        let mut kmer: Kmer<Dna, 32, u64> = kmer!("AATTTGTGGGTTCGTCTGCGGCTCCGCCCTTA", u64);
538        for base in dna!("TACTATGAGGACGATCAGCACCATAAGAACAAA").into_iter() {
539            kmer = kmer.pushr(base);
540        }
541        assert_eq!(kmer!("ACTATGAGGACGATCAGCACCATAAGAACAAA", u64), kmer);
542    }
543
544    #[test]
545    fn big_kmer_shiftl() {
546        let mut kmer: Kmer<Dna, 32, u64> = kmer!("AATTTGTGGGTTCGTCTGCGGCTCCGCCCTTA", u64);
547        for base in dna!("GTACTATGAGGACGATCAGCACCATAAGAACAAA").into_iter() {
548            kmer = kmer.pushl(base);
549        }
550        assert_eq!(kmer!("AAACAAGAATACCACGACTAGCAGGAGTATCA", u64), kmer);
551    }
552
553    #[test]
554    fn amino_kmer_iter() {
555        for (kmer, target) in Seq::<Amino>::try_from("SSLMNHKKL")
556            .unwrap()
557            .kmers::<3>()
558            .zip(["SSL", "SLM", "LMN", "MNH", "NHK", "HKK", "KKL"])
559        {
560            assert_eq!(kmer, target);
561        }
562    }
563
564    #[test]
565    fn test_rotations() {
566        let kmer: Kmer<Dna, 9> = Kmer::try_from(dna!("ACTGCGATG")).unwrap();
567
568        for (shift, rotation) in vec![
569            "ACTGCGATG",
570            "CTGCGATGA",
571            "TGCGATGAC",
572            "GCGATGACT",
573            "CGATGACTG",
574            "GATGACTGC",
575            "ATGACTGCG",
576            "TGACTGCGA",
577            "GACTGCGAT",
578            "ACTGCGATG",
579            "CTGCGATGA",
580            "TGCGATGAC",
581        ]
582        .into_iter()
583        .enumerate()
584        {
585            //            println!("{} {} {}", shift, kmer.rotated_left(shift as u32), rotation);
586            assert_eq!(kmer.rotated_left(shift as u32), rotation);
587        }
588
589        for (shift, rotation) in vec![
590            "ACTGCGATG",
591            "GACTGCGAT",
592            "TGACTGCGA",
593            "ATGACTGCG",
594            "GATGACTGC",
595            "CGATGACTG",
596            "GCGATGACT",
597            "TGCGATGAC",
598            "CTGCGATGA",
599            "ACTGCGATG",
600            "GACTGCGAT",
601        ]
602        .into_iter()
603        .enumerate()
604        {
605            //            println!("{} {} {}", shift, kmer.rotated_right(shift as u32), rotation);
606            assert_eq!(kmer.rotated_right(shift as u32), rotation);
607        }
608
609        let kmer: Kmer<Dna, 8> = Kmer::try_from(dna!("ACTGCGAT")).unwrap().rotated_left(1);
610
611        assert_ne!(kmer.to_string(), "ACTGCGAT");
612        assert_eq!(kmer.to_string(), "CTGCGATA");
613
614        let kmer: Kmer<Dna, 8> = kmer!("ACTGCGAT").rotated_right(1);
615
616        assert_ne!(kmer.to_string(), "ACTGCGAT");
617        assert_eq!(kmer.to_string(), "TACTGCGA");
618
619        let kmer: Kmer<Dna, 9> = Kmer::from_str("ACTGCGATG").unwrap().rotated_left(0);
620
621        assert_eq!(kmer.to_string(), "ACTGCGATG");
622        assert_ne!(kmer.to_string(), "ACTGCGATGA");
623
624        let kmer: Kmer<Dna, 9> = Kmer::try_from(dna!("ACTGCGATG")).unwrap().rotated_right(0);
625
626        assert_eq!(kmer.to_string(), "ACTGCGATG");
627        assert_ne!(kmer.to_string(), "ACTGCGATGA");
628
629        let kmer: Kmer<Dna, 9> = Kmer::from_str("ACTGCGATG").unwrap().rotated_left(9 * 3307);
630
631        assert_eq!(kmer.to_string(), "ACTGCGATG");
632        assert_ne!(kmer.to_string(), "ACTGCGATGA");
633
634        let kmer: Kmer<Dna, 9> = kmer!("ACTGCGATG").rotated_right(9 * 3307);
635
636        assert_eq!(kmer.to_string(), "ACTGCGATG");
637        assert_ne!(kmer.to_string(), "ACTGCGATGA");
638    }
639
640    #[test]
641    fn eq_functions() {
642        assert_eq!(kmer!("ACGT"), dna!("ACGT"));
643
644        // this should be a compiler error:
645        // assert_ne!(kmer!("ACGT"), dna!("ACGTA"));
646
647        let kmer: Kmer<Iupac, 4> = Kmer::from_str("ACGT").unwrap();
648        assert_eq!(kmer, iupac!("ACGT"));
649        assert_ne!(kmer, iupac!("NCGT"));
650    }
651
652    #[test]
653    fn kmer_iter() {
654        //let seq = dna!("ACTGA");
655        let cs: Vec<Kmer<Dna, 3>> = dna!("ACTGA").kmers().collect();
656        assert_eq!(cs[0], "ACT");
657        assert_eq!(cs[1], "CTG");
658        assert_eq!(cs[2], "TGA");
659        assert_eq!(cs.len(), 3);
660    }
661
662    #[test]
663    fn k_check() {
664        let _kmer = Kmer::<Dna, 32>::from(0);
665        let _kmer = Kmer::<Amino, 10>::from(0);
666        let _kmer = Kmer::<Iupac, 14>::from(0);
667    }
668
669    #[test]
670    fn kmer_revcomp() {
671        assert_eq!(kmer!("ACGT"), kmer!("ACGT").to_revcomp());
672        assert_ne!(kmer!("GTCGTA"), kmer!("TACGAC"));
673
674        let rc = kmer!("GTCGTA").to_revcomp();
675
676        assert_eq!(rc, kmer!("TACGAC"));
677
678        assert_eq!(
679            kmer!("GCTATCGATCTGATCG"),
680            kmer!("CGATCAGATCGATAGC").to_revcomp()
681        );
682    }
683
684    #[test]
685    fn kmer_deref() {
686        let kmer: Kmer<Dna, 3> = kmer!("ACG");
687        let seq: &SeqSlice<Dna> = &kmer;
688
689        assert_eq!(*seq, *kmer);
690        assert_eq!(seq.to_string(), "ACG");
691
692        let kmer: Kmer<Dna, 8> = kmer!("AAAAAAAA");
693        let seq: &SeqSlice<Dna> = &kmer;
694
695        assert_eq!(seq.to_string(), "AAAAAAAA");
696
697        let kmer: Kmer<Dna, 8> = kmer!("TTTTTTTT");
698        let seq: &SeqSlice<Dna> = &kmer;
699
700        assert_eq!(seq.to_string(), "TTTTTTTT");
701
702        let kmer: Kmer<Dna, 16> = kmer!("AGCTAGCTAGCTAGCT");
703        let seq: &SeqSlice<Dna> = &kmer;
704
705        assert_eq!(seq.to_string(), "AGCTAGCTAGCTAGCT");
706    }
707
708    #[test]
709    fn kmer_as_ref() {
710        let kmer: Kmer<Dna, 4> = kmer!("ACGT");
711        let seq: &SeqSlice<Dna> = &kmer.as_ref();
712
713        assert_eq!(seq.to_string(), "ACGT");
714
715        let kmer: Kmer<Dna, 16> = kmer!("AGCTAGCTAGCTAGCT");
716        let seq: &SeqSlice<Dna> = &kmer.as_ref();
717
718        assert_eq!(seq.to_string(), "AGCTAGCTAGCTAGCT");
719    }
720
721    #[test]
722    fn kmer_rev() {
723        let mut kmer: Kmer<Dna, 4> = kmer!("ACGT");
724
725        kmer.rev();
726
727        assert_eq!(kmer.to_string(), "TGCA");
728
729        kmer.rev();
730
731        assert_eq!(kmer.to_string(), "ACGT");
732
733        kmer.rev();
734
735        assert_eq!(kmer.to_string(), "TGCA");
736    }
737
738    #[test]
739    fn kmer_storage_types() {
740        let s1 = "AACGTAGCCGCGAACTTACGTAGCCGCGAAAA";
741        let s2 = "AACGTAGCCGCGAACTTACGTAGCCGCGAAA";
742        let s3 = "ACGTAGCCGCGAACTTACGTAGCCGCGAAAA";
743
744        let s4 = "AACGTAGCCGCGAACTTACGTAGCCGCGAAAAAACGTAGCCGCGAACTTACGTAGCCGCGAAAA";
745        let s5 = "AACGTAGCCGCGAACTTACGTAGCCGCGAAAAAACGTAGCCGCGAACTTACGTAGCCGCGAAAAA";
746
747        assert_eq!(s1.len(), 32);
748        assert_eq!(s2.len(), 31);
749        assert_eq!(s3.len(), 31);
750        assert_eq!(s4.len(), 64);
751        assert_eq!(s5.len(), 65);
752
753        let kmer1_64 = Kmer::<Dna, 32, u64>::from_str(&s1).unwrap();
754        let kmer2_64 = Kmer::<Dna, 31, u64>::from_str(&s2).unwrap();
755        let kmer3_64 = Kmer::<Dna, 31, u64>::from_str(&s3).unwrap();
756
757        let kmer1 = Kmer::<Dna, 32, u64>::from_str(&s1).unwrap();
758        let kmer2 = Kmer::<Dna, 31, u64>::from_str(&s2).unwrap();
759        let kmer3 = Kmer::<Dna, 31, u64>::from_str(&s3).unwrap();
760
761        let kmer4_128 = Kmer::<Dna, 64, u128>::from_str(&s4).unwrap();
762
763        let seq5: Seq<Dna> = s5.try_into().unwrap();
764
765        assert_eq!(kmer4_128, &seq5[..64]);
766        assert_ne!(kmer4_128, &seq5[1..]);
767
768        assert_eq!(kmer1, &seq5[..32]);
769        assert_eq!(kmer1, &seq5[32..64]);
770
771        assert_eq!(kmer1_64, &seq5[..32]);
772        assert_eq!(kmer1_64, &seq5[32..64]);
773
774        assert_ne!(kmer1, &seq5[..31]);
775        assert_ne!(kmer1, &seq5[32..]);
776
777        assert_ne!(kmer1_64, &seq5[1..33]);
778        assert_ne!(kmer1_64, &seq5[33..]);
779
780        assert_eq!(kmer4_128, kmer4_128);
781        assert_eq!(kmer1_64, kmer1_64);
782        assert_eq!(kmer2, kmer2);
783
784        assert_ne!(kmer2, kmer3);
785        assert_ne!(kmer2_64, kmer3_64);
786        // PartialEq is not implemented for different storgage types
787        /*
788                assert_ne!(kmer2, kmer3_64);
789                assert_eq!(kmer2, kmer2_64);
790                assert_eq!(kmer1, kmer1_64);
791        */
792    }
793
794    #[test]
795    fn try_from_seq() {
796        let seq: Seq<Dna> = Seq::try_from("ACACACACACACGT").unwrap();
797        assert_eq!(
798            Kmer::<Dna, 8>::try_from(&seq[..8]).unwrap().to_string(),
799            "ACACACAC"
800        );
801        assert_eq!(
802            Kmer::<Dna, 8>::try_from(&seq[1..9]).unwrap().to_string(),
803            "CACACACA"
804        );
805
806        let err: Result<Kmer<Dna, 8>, ParseBioError> = Kmer::try_from(&seq[2..9]);
807        assert_eq!(err, Err(ParseBioError::MismatchedLength(8, 7)));
808
809        let err: Result<Kmer<Dna, 8>, ParseBioError> = Kmer::try_from(seq);
810        assert_eq!(err, Err(ParseBioError::MismatchedLength(8, 14)));
811
812        let seq: Seq<Dna> = Seq::try_from("ACACACACACACGT").unwrap();
813
814        assert_eq!(
815            Kmer::<Dna, 14>::try_from(seq).unwrap().to_string(),
816            "ACACACACACACGT"
817        );
818    }
819}