compact_genome/implementation/
bit_array_kmer.rs

1//! A simple representation of a k-mer as an array.
2
3use crate::implementation::bit_vec_sequence::alphabet_character_bit_width;
4use crate::implementation::bit_vec_sequence::{BitVectorSubGenome, BitVectorSubGenomeIterator};
5use crate::interface::alphabet::Alphabet;
6use crate::interface::alphabet::AlphabetCharacter;
7use crate::interface::k_mer::{Kmer, OwnedKmer};
8use crate::interface::sequence::{GenomeSequence, OwnedGenomeSequence};
9use bitvec::array::BitArray;
10use bitvec::field::BitField;
11pub use bitvec::store::BitStore;
12pub use bitvec::view::BitView;
13pub use bitvec::view::BitViewSized;
14use ref_cast::RefCast;
15use std::marker::PhantomData;
16use std::ops::{Index, Range};
17use traitsequence::interface::{OwnedSequence, Sequence};
18
19/// A k-mer stored as array of minimum-bit characters.
20#[derive(Debug)]
21pub struct BitArrayKmer<const K: usize, AlphabetType: Alphabet, BitArrayType = usize>
22where
23    BitArrayType: BitViewSized + BitStore,
24{
25    phantom_data: PhantomData<AlphabetType>,
26    array: BitArray<BitArrayType>,
27}
28
29impl<const K: usize, AlphabetType: Alphabet, BitArrayType: BitViewSized + BitStore>
30    Kmer<K, AlphabetType, BitVectorSubGenome<AlphabetType, BitArrayType>>
31    for BitVectorSubGenome<AlphabetType, BitArrayType>
32{
33}
34
35impl<
36        const K: usize,
37        AlphabetType: Alphabet,
38        BitArrayType: BitViewSized + BitStore + BitView<Store = BitArrayType>,
39    > OwnedKmer<K, AlphabetType, BitVectorSubGenome<AlphabetType, BitArrayType>>
40    for BitArrayKmer<K, AlphabetType, BitArrayType>
41{
42    fn successor(&self, successor: <AlphabetType as Alphabet>::CharacterType) -> Self {
43        if K == 0 {
44            return self.clone();
45        }
46
47        let bit_width = alphabet_character_bit_width(AlphabetType::SIZE);
48
49        let mut array = self.array.clone();
50        array.shift_left(bit_width);
51
52        let offset = (K - 1) * bit_width;
53        let limit = K * bit_width;
54
55        array[offset..limit].store(successor.index());
56
57        Self {
58            phantom_data: Default::default(),
59            array,
60        }
61    }
62}
63
64impl<
65        const K: usize,
66        AlphabetType: Alphabet,
67        BitArrayType: BitViewSized + BitStore + BitView<Store = BitArrayType>,
68    > GenomeSequence<AlphabetType, BitVectorSubGenome<AlphabetType, BitArrayType>>
69    for BitArrayKmer<K, AlphabetType, BitArrayType>
70{
71    fn as_genome_subsequence(&self) -> &BitVectorSubGenome<AlphabetType, BitArrayType> {
72        BitVectorSubGenome::ref_cast(
73            &self.array.as_bitslice()[..K * alphabet_character_bit_width(AlphabetType::SIZE)],
74        )
75    }
76}
77
78impl<
79        const K: usize,
80        AlphabetType: Alphabet,
81        BitArrayType: BitViewSized + BitStore + BitView<Store = BitArrayType>,
82    > Sequence<AlphabetType::CharacterType, BitVectorSubGenome<AlphabetType, BitArrayType>>
83    for BitArrayKmer<K, AlphabetType, BitArrayType>
84{
85    type Iterator<'a>
86        = BitVectorSubGenomeIterator<'a, AlphabetType, BitArrayType>
87    where
88        Self: 'a,
89        AlphabetType::CharacterType: 'a;
90
91    fn iter(&self) -> Self::Iterator<'_> {
92        self.as_genome_subsequence().iter()
93    }
94
95    fn len(&self) -> usize {
96        K
97    }
98}
99
100impl<
101        const K: usize,
102        AlphabetType: Alphabet,
103        BitArrayType: BitViewSized + BitStore + BitView<Store = BitArrayType>,
104    > OwnedGenomeSequence<AlphabetType, BitVectorSubGenome<AlphabetType, BitArrayType>>
105    for BitArrayKmer<K, AlphabetType, BitArrayType>
106{
107}
108
109impl<
110        const K: usize,
111        AlphabetType: Alphabet,
112        BitArrayType: BitViewSized + BitStore + BitView<Store = BitArrayType>,
113    > OwnedSequence<AlphabetType::CharacterType, BitVectorSubGenome<AlphabetType, BitArrayType>>
114    for BitArrayKmer<K, AlphabetType, BitArrayType>
115{
116}
117
118impl<const K: usize, AlphabetType: Alphabet, BitArrayType: BitViewSized + BitStore>
119    FromIterator<AlphabetType::CharacterType> for BitArrayKmer<K, AlphabetType, BitArrayType>
120{
121    fn from_iter<T: IntoIterator<Item = AlphabetType::CharacterType>>(iter: T) -> Self {
122        let mut array: BitArray<BitArrayType> =
123            <BitArrayType as BitViewSized>::ZERO.into_bitarray();
124        let mut iter = iter.into_iter();
125
126        for index in 0..K {
127            let bit_width = alphabet_character_bit_width(AlphabetType::SIZE);
128            let offset = index * bit_width;
129            let limit = (index + 1) * bit_width;
130
131            let character = iter.next().unwrap();
132            array[offset..limit].store(character.index());
133        }
134        assert!(iter.next().is_none());
135
136        Self {
137            phantom_data: Default::default(),
138            array,
139        }
140    }
141}
142
143impl<
144        const K: usize,
145        AlphabetType: Alphabet,
146        BitArrayType: BitViewSized + BitStore + BitView<Store = BitArrayType>,
147    > Index<Range<usize>> for BitArrayKmer<K, AlphabetType, BitArrayType>
148{
149    type Output = BitVectorSubGenome<AlphabetType, BitArrayType>;
150
151    fn index(&self, index: Range<usize>) -> &Self::Output {
152        self.as_genome_subsequence().index(index)
153    }
154}
155
156impl<
157        const K: usize,
158        AlphabetType: Alphabet,
159        BitArrayType: BitViewSized + BitStore + BitView<Store = BitArrayType>,
160    > Index<usize> for BitArrayKmer<K, AlphabetType, BitArrayType>
161{
162    type Output = AlphabetType::CharacterType;
163
164    fn index(&self, index: usize) -> &Self::Output {
165        self.as_genome_subsequence().index(index)
166    }
167}
168
169impl<const K: usize, AlphabetType: Alphabet, BitArrayType: BitViewSized + BitStore> Clone
170    for BitArrayKmer<K, AlphabetType, BitArrayType>
171{
172    fn clone(&self) -> Self {
173        Self {
174            phantom_data: PhantomData,
175            array: self.array.clone(),
176        }
177    }
178}
179
180impl<const K: usize, AlphabetType: Alphabet, BitArrayType: BitViewSized + BitStore + Copy> Copy
181    for BitArrayKmer<K, AlphabetType, BitArrayType>
182{
183}
184
185impl<const K: usize, AlphabetType: Alphabet, BitArrayType: BitViewSized + BitStore> PartialEq
186    for BitArrayKmer<K, AlphabetType, BitArrayType>
187{
188    fn eq(&self, other: &Self) -> bool {
189        self.array == other.array
190    }
191}
192
193impl<const K: usize, AlphabetType: Alphabet, BitArrayType: BitViewSized + BitStore> Eq
194    for BitArrayKmer<K, AlphabetType, BitArrayType>
195{
196}
197
198impl<const K: usize, AlphabetType: Alphabet, BitArrayType: BitViewSized + BitStore> PartialOrd
199    for BitArrayKmer<K, AlphabetType, BitArrayType>
200{
201    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
202        Some(self.cmp(other))
203    }
204}
205
206impl<const K: usize, AlphabetType: Alphabet, BitArrayType: BitViewSized + BitStore> Ord
207    for BitArrayKmer<K, AlphabetType, BitArrayType>
208{
209    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
210        self.array.cmp(&other.array)
211    }
212}
213
214impl<const K: usize, AlphabetType: Alphabet, BitArrayType: BitViewSized + BitStore> core::hash::Hash
215    for BitArrayKmer<K, AlphabetType, BitArrayType>
216{
217    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
218        self.array.hash(state)
219    }
220}
221
222#[cfg(feature = "serde")]
223mod serde {
224    use bitvec::{array::BitArray, store::BitStore, view::BitViewSized};
225    use serde::{Deserialize, Serialize};
226
227    use crate::interface::alphabet::Alphabet;
228
229    use super::BitArrayKmer;
230
231    impl<
232            const K: usize,
233            AlphabetType: Alphabet,
234            BitArrayType: BitViewSized + BitStore + Serialize,
235        > Serialize for BitArrayKmer<K, AlphabetType, BitArrayType>
236    {
237        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
238        where
239            S: serde::Serializer,
240        {
241            self.array.data.serialize(serializer)
242        }
243    }
244
245    impl<
246            'a,
247            const K: usize,
248            AlphabetType: Alphabet,
249            BitArrayType: BitViewSized + BitStore + Deserialize<'a>,
250        > Deserialize<'a> for BitArrayKmer<K, AlphabetType, BitArrayType>
251    {
252        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
253        where
254            D: serde::Deserializer<'a>,
255        {
256            Ok(Self {
257                phantom_data: Default::default(),
258                array: BitArray {
259                    _ord: Default::default(),
260                    data: BitArrayType::deserialize(deserializer)?,
261                },
262            })
263        }
264
265        fn deserialize_in_place<D>(deserializer: D, place: &mut Self) -> Result<(), D::Error>
266        where
267            D: serde::Deserializer<'a>,
268        {
269            BitArrayType::deserialize_in_place(deserializer, &mut place.array.data)
270        }
271    }
272}
273
274#[cfg(test)]
275mod tests {
276    use traitsequence::interface::Sequence;
277
278    use crate::{
279        implementation::alphabets::dna_alphabet::{DnaAlphabet, DnaCharacter},
280        interface::{k_mer::OwnedKmer, sequence::OwnedGenomeSequence},
281    };
282
283    use super::BitArrayKmer;
284
285    #[test]
286    fn successor() {
287        let kmer = BitArrayKmer::<4, DnaAlphabet>::from_slice_u8(b"ACGT").unwrap();
288        let successor_a = kmer.successor(b'A'.try_into().unwrap());
289        let successor_c = kmer.successor(b'C'.try_into().unwrap());
290        let successor_g = kmer.successor(b'G'.try_into().unwrap());
291        let successor_t = kmer.successor(b'T'.try_into().unwrap());
292
293        assert_eq!(
294            kmer.iter().cloned().collect::<Vec<_>>(),
295            vec![
296                DnaCharacter::try_from(b'A').unwrap(),
297                DnaCharacter::try_from(b'C').unwrap(),
298                DnaCharacter::try_from(b'G').unwrap(),
299                DnaCharacter::try_from(b'T').unwrap()
300            ],
301        );
302
303        assert_eq!(
304            successor_a.iter().cloned().collect::<Vec<_>>(),
305            vec![
306                DnaCharacter::try_from(b'C').unwrap(),
307                DnaCharacter::try_from(b'G').unwrap(),
308                DnaCharacter::try_from(b'T').unwrap(),
309                DnaCharacter::try_from(b'A').unwrap()
310            ],
311        );
312
313        assert_eq!(
314            successor_c.iter().cloned().collect::<Vec<_>>(),
315            vec![
316                DnaCharacter::try_from(b'C').unwrap(),
317                DnaCharacter::try_from(b'G').unwrap(),
318                DnaCharacter::try_from(b'T').unwrap(),
319                DnaCharacter::try_from(b'C').unwrap()
320            ],
321        );
322
323        assert_eq!(
324            successor_g.iter().cloned().collect::<Vec<_>>(),
325            vec![
326                DnaCharacter::try_from(b'C').unwrap(),
327                DnaCharacter::try_from(b'G').unwrap(),
328                DnaCharacter::try_from(b'T').unwrap(),
329                DnaCharacter::try_from(b'G').unwrap()
330            ],
331        );
332
333        assert_eq!(
334            successor_t.iter().cloned().collect::<Vec<_>>(),
335            vec![
336                DnaCharacter::try_from(b'C').unwrap(),
337                DnaCharacter::try_from(b'G').unwrap(),
338                DnaCharacter::try_from(b'T').unwrap(),
339                DnaCharacter::try_from(b'T').unwrap()
340            ],
341        );
342    }
343}