compact_genome/implementation/
bit_vec_sequence_store.rs1use crate::implementation::bit_vec_sequence::{
4 alphabet_character_bit_width, BitVectorGenome, BitVectorSubGenome,
5};
6use crate::interface::alphabet::{Alphabet, AlphabetCharacter, AlphabetError};
7use crate::interface::sequence::GenomeSequence;
8use crate::interface::sequence_store::{
9 HandleWithLength, HandleWithSubsequence, InverseMappingSequenceStore, SequenceStore,
10};
11use bitvec::order::Lsb0;
12use bitvec::view::BitView;
13use std::marker::PhantomData;
14use traitsequence::interface::Sequence;
15
16#[derive(Default, Clone, Eq, PartialEq, Debug)]
18pub struct BitVectorSequenceStore<AlphabetType: Alphabet> {
19 sequence: BitVectorGenome<AlphabetType>,
20}
21
22#[derive(Default, Debug, Clone, Copy, Eq, PartialEq)]
24pub struct BitVectorSequenceStoreHandle<AlphabetType: Alphabet> {
25 offset: usize,
26 len: usize,
27 phantom_data: PhantomData<AlphabetType>,
28}
29
30impl<AlphabetType: Alphabet> BitVectorSequenceStore<AlphabetType> {
31 pub fn new() -> Self {
33 Self {
34 sequence: Default::default(),
35 }
36 }
37
38 pub fn size_in_memory(&self) -> usize {
40 (self.sequence.len() - 1) / 4 + 1 }
42}
43
44impl<AlphabetType: Alphabet + 'static> SequenceStore<AlphabetType>
45 for BitVectorSequenceStore<AlphabetType>
46{
47 type Handle = BitVectorSequenceStoreHandle<AlphabetType>;
48 type SequenceRef = BitVectorSubGenome<AlphabetType>;
49
50 fn add<
51 Sequence: GenomeSequence<AlphabetType, Subsequence> + ?Sized,
52 Subsequence: GenomeSequence<AlphabetType, Subsequence> + ?Sized,
53 >(
54 &mut self,
55 s: &Sequence,
56 ) -> Self::Handle {
57 let offset = self.sequence.len();
58 let len = s.len();
59 self.sequence.extend(s.iter().cloned());
60 Self::Handle {
61 offset,
62 len,
63 phantom_data: Default::default(),
64 }
65 }
66
67 fn add_from_iter(
68 &mut self,
69 iter: impl IntoIterator<Item = <AlphabetType as Alphabet>::CharacterType>,
70 ) -> Self::Handle {
71 let offset = self.sequence.len();
72 let iter = iter.into_iter();
73 let (size, _) = iter.size_hint();
74 let bit_width = alphabet_character_bit_width(AlphabetType::SIZE);
75 self.sequence.bits.reserve(size * bit_width);
76 for character in iter {
77 self.sequence
78 .bits
79 .extend_from_bitslice(&character.index().view_bits::<Lsb0>()[0..bit_width]);
80 }
81
82 let len = self.sequence.len() - offset;
83 Self::Handle {
84 offset,
85 len,
86 phantom_data: Default::default(),
87 }
88 }
89
90 fn add_from_iter_u8<IteratorType: IntoIterator<Item = u8>>(
91 &mut self,
92 iter: IteratorType,
93 ) -> Result<Self::Handle, AlphabetError> {
94 let offset = self.sequence.len();
95 let iter = iter.into_iter();
96 let (size, _) = iter.size_hint();
97 let bit_width = alphabet_character_bit_width(AlphabetType::SIZE);
98 self.sequence.bits.reserve(size * bit_width);
99 for item in iter {
100 match AlphabetType::ascii_to_character(item) {
101 Ok(character) => self
102 .sequence
103 .bits
104 .extend_from_bitslice(&character.index().view_bits::<Lsb0>()[0..bit_width]),
105
106 Err(error) => {
107 self.sequence.bits.resize(offset * bit_width, false);
108 return Err(error);
109 }
110 }
111 }
112
113 let len = self.sequence.len() - offset;
114 Ok(Self::Handle {
115 offset,
116 len,
117 phantom_data: Default::default(),
118 })
119 }
120
121 fn get<'this: 'result, 'handle: 'result, 'result>(
122 &'this self,
123 handle: &'handle Self::Handle,
124 ) -> &'result Self::SequenceRef {
125 &self.sequence[handle.offset..handle.offset + handle.len]
126 }
127}
128
129impl<AlphabetType: Alphabet + 'static> InverseMappingSequenceStore<AlphabetType>
130 for BitVectorSequenceStore<AlphabetType>
131{
132 fn map_sequence_ref_to_handle(&self, sequence_ref: &Self::SequenceRef) -> Self::Handle {
133 let raw_offset = unsafe {
134 sequence_ref
135 .bits
136 .as_bitptr()
137 .offset_from(self.sequence.bits.as_bitptr())
138 };
139 debug_assert!(raw_offset >= 0);
140 let bit_width = alphabet_character_bit_width(AlphabetType::SIZE);
141 let offset = raw_offset as usize / bit_width;
142
143 Self::Handle {
144 offset,
145 len: sequence_ref.len(),
146 phantom_data: Default::default(),
147 }
148 }
149}
150
151impl<AlphabetType: Alphabet> HandleWithLength for BitVectorSequenceStoreHandle<AlphabetType> {
152 fn len(&self) -> usize {
153 self.len
154 }
155}
156
157impl<AlphabetType: Alphabet> HandleWithSubsequence<core::ops::Range<usize>>
158 for BitVectorSequenceStoreHandle<AlphabetType>
159{
160 fn subsequence_handle(&self, range: core::ops::Range<usize>) -> Self {
161 let result = Self {
162 offset: self.offset + range.start,
163 len: range.end - range.start,
164 phantom_data: self.phantom_data,
165 };
166 debug_assert!(self.offset + self.len >= result.offset + result.len);
167 result
168 }
169}
170
171#[cfg(test)]
172mod tests {
173 use crate::implementation::alphabets::dna_alphabet::DnaAlphabet;
174 use crate::implementation::bit_vec_sequence_store::BitVectorSequenceStore;
175 use crate::implementation::vec_sequence::VectorGenome;
176 use crate::interface::sequence::{GenomeSequence, OwnedGenomeSequence};
177 use crate::interface::sequence_store::{InverseMappingSequenceStore, SequenceStore};
178
179 #[test]
180 fn test_inverse_mapping() {
181 let mut sequence_store = BitVectorSequenceStore::<DnaAlphabet>::new();
182 let handle1 = sequence_store.add_from_slice_u8(b"ACGTTG").unwrap();
183 let handle2 = sequence_store.add_from_slice_u8(b"CGACTG").unwrap();
184 let reference1 = sequence_store.get(&handle1);
185 let reference2 = sequence_store.get(&handle2);
186 debug_assert_eq!(
187 reference1.convert::<VectorGenome<_>, _>(),
188 VectorGenome::from_slice_u8(b"ACGTTG").unwrap()
189 );
190 debug_assert_eq!(
191 reference2.convert::<VectorGenome<_>, _>(),
192 VectorGenome::from_slice_u8(b"CGACTG").unwrap()
193 );
194 debug_assert_eq!(
195 sequence_store.map_sequence_ref_to_handle(reference1),
196 handle1
197 );
198 debug_assert_eq!(
199 sequence_store.map_sequence_ref_to_handle(reference2),
200 handle2
201 );
202 }
203}