compact_genome/implementation/
array_kmer.rs

1//! A simple representation of a k-mer as an array.
2
3use crate::implementation::vec_sequence::SliceSubGenome;
4use crate::interface::alphabet::Alphabet;
5use crate::interface::k_mer::{Kmer, OwnedKmer};
6use crate::interface::sequence::{GenomeSequence, GenomeSequenceMut, OwnedGenomeSequence};
7use ref_cast::RefCast;
8use std::ops::{Index, IndexMut, Range};
9use traitsequence::interface::{OwnedSequence, Sequence, SequenceMut};
10
11/// A k-mer stored as array of plain characters.
12#[derive(Debug, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
13pub struct ArrayKmer<const K: usize, AlphabetType: Alphabet> {
14    array: [AlphabetType::CharacterType; K],
15}
16
17impl<const K: usize, AlphabetType: Alphabet> Kmer<K, AlphabetType, SliceSubGenome<AlphabetType>>
18    for SliceSubGenome<AlphabetType>
19{
20}
21
22impl<const K: usize, AlphabetType: Alphabet>
23    OwnedKmer<K, AlphabetType, SliceSubGenome<AlphabetType>> for ArrayKmer<K, AlphabetType>
24{
25    fn successor(&self, successor: <AlphabetType as Alphabet>::CharacterType) -> Self {
26        let mut array = self.array.clone();
27
28        array.rotate_left(1);
29        array[array.len() - 1] = successor;
30
31        Self { array }
32    }
33}
34
35impl<const K: usize, AlphabetType: Alphabet>
36    GenomeSequence<AlphabetType, SliceSubGenome<AlphabetType>> for ArrayKmer<K, AlphabetType>
37{
38    fn as_genome_subsequence(&self) -> &SliceSubGenome<AlphabetType> {
39        SliceSubGenome::ref_cast(&self.array[..])
40    }
41}
42
43impl<const K: usize, AlphabetType: Alphabet>
44    Sequence<AlphabetType::CharacterType, SliceSubGenome<AlphabetType>>
45    for ArrayKmer<K, AlphabetType>
46{
47    type Iterator<'a>
48        = std::slice::Iter<'a, AlphabetType::CharacterType>
49    where
50        Self: 'a,
51        AlphabetType::CharacterType: 'a;
52
53    fn iter(&self) -> Self::Iterator<'_> {
54        self.array.iter()
55    }
56
57    fn len(&self) -> usize {
58        self.array.len()
59    }
60}
61
62impl<const K: usize, AlphabetType: Alphabet>
63    OwnedGenomeSequence<AlphabetType, SliceSubGenome<AlphabetType>> for ArrayKmer<K, AlphabetType>
64{
65}
66
67impl<const K: usize, AlphabetType: Alphabet>
68    OwnedSequence<AlphabetType::CharacterType, SliceSubGenome<AlphabetType>>
69    for ArrayKmer<K, AlphabetType>
70{
71}
72
73impl<const K: usize, AlphabetType: Alphabet>
74    GenomeSequenceMut<AlphabetType, SliceSubGenome<AlphabetType>> for ArrayKmer<K, AlphabetType>
75{
76    fn as_genome_subsequence_mut(&mut self) -> &mut SliceSubGenome<AlphabetType> {
77        SliceSubGenome::ref_cast_mut(&mut self.array[..])
78    }
79}
80
81impl<const K: usize, AlphabetType: Alphabet>
82    SequenceMut<AlphabetType::CharacterType, SliceSubGenome<AlphabetType>>
83    for ArrayKmer<K, AlphabetType>
84{
85    type IteratorMut<'a>
86        = std::slice::IterMut<'a, AlphabetType::CharacterType>
87    where
88        Self: 'a,
89        AlphabetType::CharacterType: 'a;
90
91    fn iter_mut(&mut self) -> Self::IteratorMut<'_> {
92        self.array.iter_mut()
93    }
94}
95
96impl<const K: usize, AlphabetType: Alphabet> FromIterator<AlphabetType::CharacterType>
97    for ArrayKmer<K, AlphabetType>
98{
99    fn from_iter<T: IntoIterator<Item = AlphabetType::CharacterType>>(iter: T) -> Self {
100        Self {
101            array: iter
102                .into_iter()
103                .collect::<Vec<_>>()
104                .try_into()
105                .unwrap_or_else(|error: Vec<_>| {
106                    panic!("iterator is not of length k = {K}, but {}", error.len())
107                }),
108        }
109    }
110}
111
112impl<const K: usize, AlphabetType: Alphabet> Index<Range<usize>> for ArrayKmer<K, AlphabetType> {
113    type Output = SliceSubGenome<AlphabetType>;
114
115    fn index(&self, index: Range<usize>) -> &Self::Output {
116        self.as_genome_subsequence().index(index)
117    }
118}
119
120impl<const K: usize, AlphabetType: Alphabet> Index<usize> for ArrayKmer<K, AlphabetType> {
121    type Output = AlphabetType::CharacterType;
122
123    fn index(&self, index: usize) -> &Self::Output {
124        self.as_genome_subsequence().index(index)
125    }
126}
127
128impl<const K: usize, AlphabetType: Alphabet> IndexMut<Range<usize>> for ArrayKmer<K, AlphabetType> {
129    fn index_mut(&mut self, index: Range<usize>) -> &mut Self::Output {
130        self.as_genome_subsequence_mut().index_mut(index)
131    }
132}
133
134impl<const K: usize, AlphabetType: Alphabet> IndexMut<usize> for ArrayKmer<K, AlphabetType> {
135    fn index_mut(&mut self, index: usize) -> &mut Self::Output {
136        self.as_genome_subsequence_mut().index_mut(index)
137    }
138}
139
140#[cfg(test)]
141mod tests {
142    use traitsequence::interface::Sequence;
143
144    use crate::{
145        implementation::alphabets::dna_alphabet::{DnaAlphabet, DnaCharacter},
146        interface::{k_mer::OwnedKmer, sequence::OwnedGenomeSequence},
147    };
148
149    use super::ArrayKmer;
150
151    #[test]
152    fn successor() {
153        let kmer = ArrayKmer::<4, DnaAlphabet>::from_slice_u8(b"ACGT").unwrap();
154        let successor_a = kmer.successor(b'A'.try_into().unwrap());
155        let successor_c = kmer.successor(b'C'.try_into().unwrap());
156        let successor_g = kmer.successor(b'G'.try_into().unwrap());
157        let successor_t = kmer.successor(b'T'.try_into().unwrap());
158
159        assert_eq!(
160            kmer.iter().cloned().collect::<Vec<_>>(),
161            vec![
162                DnaCharacter::try_from(b'A').unwrap(),
163                DnaCharacter::try_from(b'C').unwrap(),
164                DnaCharacter::try_from(b'G').unwrap(),
165                DnaCharacter::try_from(b'T').unwrap()
166            ],
167        );
168
169        assert_eq!(
170            successor_a.iter().cloned().collect::<Vec<_>>(),
171            vec![
172                DnaCharacter::try_from(b'C').unwrap(),
173                DnaCharacter::try_from(b'G').unwrap(),
174                DnaCharacter::try_from(b'T').unwrap(),
175                DnaCharacter::try_from(b'A').unwrap()
176            ],
177        );
178
179        assert_eq!(
180            successor_c.iter().cloned().collect::<Vec<_>>(),
181            vec![
182                DnaCharacter::try_from(b'C').unwrap(),
183                DnaCharacter::try_from(b'G').unwrap(),
184                DnaCharacter::try_from(b'T').unwrap(),
185                DnaCharacter::try_from(b'C').unwrap()
186            ],
187        );
188
189        assert_eq!(
190            successor_g.iter().cloned().collect::<Vec<_>>(),
191            vec![
192                DnaCharacter::try_from(b'C').unwrap(),
193                DnaCharacter::try_from(b'G').unwrap(),
194                DnaCharacter::try_from(b'T').unwrap(),
195                DnaCharacter::try_from(b'G').unwrap()
196            ],
197        );
198
199        assert_eq!(
200            successor_t.iter().cloned().collect::<Vec<_>>(),
201            vec![
202                DnaCharacter::try_from(b'C').unwrap(),
203                DnaCharacter::try_from(b'G').unwrap(),
204                DnaCharacter::try_from(b'T').unwrap(),
205                DnaCharacter::try_from(b'T').unwrap()
206            ],
207        );
208    }
209}