compact_genome/implementation/
bit_vec_sequence_store.rs

1//! A vector sequence store where each character is encoded as two bits.
2
3use crate::implementation::bit_vec_sequence::{
4    alphabet_character_bit_width, BitVectorGenome, BitVectorSubGenome,
5};
6use crate::interface::alphabet::{Alphabet, AlphabetCharacter, AlphabetError};
7use crate::interface::sequence::GenomeSequence;
8use crate::interface::sequence_store::{
9    HandleWithLength, HandleWithSubsequence, InverseMappingSequenceStore, SequenceStore,
10};
11use bitvec::order::Lsb0;
12use bitvec::view::BitView;
13use std::marker::PhantomData;
14use traitsequence::interface::Sequence;
15
16/// A bitvector based sequence store.
17#[derive(Default, Clone, Eq, PartialEq, Debug)]
18pub struct BitVectorSequenceStore<AlphabetType: Alphabet> {
19    sequence: BitVectorGenome<AlphabetType>,
20}
21
22/// A handle of a sequence in an [BitVectorSequenceStore].
23#[derive(Default, Debug, Clone, Copy, Eq, PartialEq)]
24pub struct BitVectorSequenceStoreHandle<AlphabetType: Alphabet> {
25    offset: usize,
26    len: usize,
27    phantom_data: PhantomData<AlphabetType>,
28}
29
30impl<AlphabetType: Alphabet> BitVectorSequenceStore<AlphabetType> {
31    /// Creates a new instance.
32    pub fn new() -> Self {
33        Self {
34            sequence: Default::default(),
35        }
36    }
37
38    /// Returns the number of bytes consumed by the characters stored in this sequence store.
39    pub fn size_in_memory(&self) -> usize {
40        (self.sequence.len() - 1) / 4 + 1 // Rounding up integer division
41    }
42}
43
44impl<AlphabetType: Alphabet + 'static> SequenceStore<AlphabetType>
45    for BitVectorSequenceStore<AlphabetType>
46{
47    type Handle = BitVectorSequenceStoreHandle<AlphabetType>;
48    type SequenceRef = BitVectorSubGenome<AlphabetType>;
49
50    fn add<
51        Sequence: GenomeSequence<AlphabetType, Subsequence> + ?Sized,
52        Subsequence: GenomeSequence<AlphabetType, Subsequence> + ?Sized,
53    >(
54        &mut self,
55        s: &Sequence,
56    ) -> Self::Handle {
57        let offset = self.sequence.len();
58        let len = s.len();
59        self.sequence.extend(s.iter().cloned());
60        Self::Handle {
61            offset,
62            len,
63            phantom_data: Default::default(),
64        }
65    }
66
67    fn add_from_iter(
68        &mut self,
69        iter: impl IntoIterator<Item = <AlphabetType as Alphabet>::CharacterType>,
70    ) -> Self::Handle {
71        let offset = self.sequence.len();
72        let iter = iter.into_iter();
73        let (size, _) = iter.size_hint();
74        let bit_width = alphabet_character_bit_width(AlphabetType::SIZE);
75        self.sequence.bits.reserve(size * bit_width);
76        for character in iter {
77            self.sequence
78                .bits
79                .extend_from_bitslice(&character.index().view_bits::<Lsb0>()[0..bit_width]);
80        }
81
82        let len = self.sequence.len() - offset;
83        Self::Handle {
84            offset,
85            len,
86            phantom_data: Default::default(),
87        }
88    }
89
90    fn add_from_iter_u8<IteratorType: IntoIterator<Item = u8>>(
91        &mut self,
92        iter: IteratorType,
93    ) -> Result<Self::Handle, AlphabetError> {
94        let offset = self.sequence.len();
95        let iter = iter.into_iter();
96        let (size, _) = iter.size_hint();
97        let bit_width = alphabet_character_bit_width(AlphabetType::SIZE);
98        self.sequence.bits.reserve(size * bit_width);
99        for item in iter {
100            match AlphabetType::ascii_to_character(item) {
101                Ok(character) => self
102                    .sequence
103                    .bits
104                    .extend_from_bitslice(&character.index().view_bits::<Lsb0>()[0..bit_width]),
105
106                Err(error) => {
107                    self.sequence.bits.resize(offset * bit_width, false);
108                    return Err(error);
109                }
110            }
111        }
112
113        let len = self.sequence.len() - offset;
114        Ok(Self::Handle {
115            offset,
116            len,
117            phantom_data: Default::default(),
118        })
119    }
120
121    fn get<'this: 'result, 'handle: 'result, 'result>(
122        &'this self,
123        handle: &'handle Self::Handle,
124    ) -> &'result Self::SequenceRef {
125        &self.sequence[handle.offset..handle.offset + handle.len]
126    }
127}
128
129impl<AlphabetType: Alphabet + 'static> InverseMappingSequenceStore<AlphabetType>
130    for BitVectorSequenceStore<AlphabetType>
131{
132    fn map_sequence_ref_to_handle(&self, sequence_ref: &Self::SequenceRef) -> Self::Handle {
133        let raw_offset = unsafe {
134            sequence_ref
135                .bits
136                .as_bitptr()
137                .offset_from(self.sequence.bits.as_bitptr())
138        };
139        debug_assert!(raw_offset >= 0);
140        let bit_width = alphabet_character_bit_width(AlphabetType::SIZE);
141        let offset = raw_offset as usize / bit_width;
142
143        Self::Handle {
144            offset,
145            len: sequence_ref.len(),
146            phantom_data: Default::default(),
147        }
148    }
149}
150
151impl<AlphabetType: Alphabet> HandleWithLength for BitVectorSequenceStoreHandle<AlphabetType> {
152    fn len(&self) -> usize {
153        self.len
154    }
155}
156
157impl<AlphabetType: Alphabet> HandleWithSubsequence<core::ops::Range<usize>>
158    for BitVectorSequenceStoreHandle<AlphabetType>
159{
160    fn subsequence_handle(&self, range: core::ops::Range<usize>) -> Self {
161        let result = Self {
162            offset: self.offset + range.start,
163            len: range.end - range.start,
164            phantom_data: self.phantom_data,
165        };
166        debug_assert!(self.offset + self.len >= result.offset + result.len);
167        result
168    }
169}
170
171#[cfg(test)]
172mod tests {
173    use crate::implementation::alphabets::dna_alphabet::DnaAlphabet;
174    use crate::implementation::bit_vec_sequence_store::BitVectorSequenceStore;
175    use crate::implementation::vec_sequence::VectorGenome;
176    use crate::interface::sequence::{GenomeSequence, OwnedGenomeSequence};
177    use crate::interface::sequence_store::{InverseMappingSequenceStore, SequenceStore};
178
179    #[test]
180    fn test_inverse_mapping() {
181        let mut sequence_store = BitVectorSequenceStore::<DnaAlphabet>::new();
182        let handle1 = sequence_store.add_from_slice_u8(b"ACGTTG").unwrap();
183        let handle2 = sequence_store.add_from_slice_u8(b"CGACTG").unwrap();
184        let reference1 = sequence_store.get(&handle1);
185        let reference2 = sequence_store.get(&handle2);
186        debug_assert_eq!(
187            reference1.convert::<VectorGenome<_>, _>(),
188            VectorGenome::from_slice_u8(b"ACGTTG").unwrap()
189        );
190        debug_assert_eq!(
191            reference2.convert::<VectorGenome<_>, _>(),
192            VectorGenome::from_slice_u8(b"CGACTG").unwrap()
193        );
194        debug_assert_eq!(
195            sequence_store.map_sequence_ref_to_handle(reference1),
196            handle1
197        );
198        debug_assert_eq!(
199            sequence_store.map_sequence_ref_to_handle(reference2),
200            handle2
201        );
202    }
203}