template_switch_error_free_inners/lib.rs
1//! Compute all error-free template switch inners for a pair of genome strings.
2
3#![warn(missing_docs)]
4
5use std::iter;
6
7use bitvec::vec::BitVec;
8use compact_genome::{
9 implementation::vec_sequence::VectorGenome,
10 interface::{alphabet::Alphabet, sequence::GenomeSequence},
11};
12use log::debug;
13use suffix::SuffixTable;
14
15#[cfg(test)]
16mod tests;
17
18/// A table of all error-free template switch inner entry points for a pair of genome strings.
19pub struct MatchTable {
20 reference_reference: BitVec,
21 reference_query: BitVec,
22 query_reference: BitVec,
23 query_query: BitVec,
24 reference_kmer_count: usize,
25 query_kmer_count: usize,
26}
27
28impl MatchTable {
29 /// Compute all error-free template switch inner entry points for a pair of genome strings.
30 ///
31 /// The inners must have the given minimum length.
32 pub fn new<
33 AlphabetType: Alphabet,
34 GenomeSubsequence: GenomeSequence<AlphabetType, GenomeSubsequence> + ?Sized,
35 >(
36 reference: &GenomeSubsequence,
37 query: &GenomeSubsequence,
38 minimum_length: usize,
39 ) -> Self {
40 assert!(minimum_length > 0);
41
42 debug!("Converting genomes to strings");
43 let reference_rc =
44 VectorGenome::<AlphabetType>::from_iter(reference.reverse_complement_iter())
45 .as_string();
46 let query_rc =
47 VectorGenome::<AlphabetType>::from_iter(query.reverse_complement_iter()).as_string();
48 let reference = reference.as_string();
49 let query = query.as_string();
50
51 debug!("Computing indexes");
52 let reference_index = SuffixTable::new(&reference);
53 let query_index = SuffixTable::new(&query);
54
55 debug!("Initialising bitvectors");
56 let reference_kmer_count = reference.len() - minimum_length + 1;
57 let query_kmer_count = query.len() - minimum_length + 1;
58
59 let mut reference_reference =
60 BitVec::repeat(false, reference_kmer_count * reference_kmer_count);
61 let mut reference_query = BitVec::repeat(false, reference_kmer_count * query_kmer_count);
62 let mut query_reference = BitVec::repeat(false, query_kmer_count * reference_kmer_count);
63 let mut query_query = BitVec::repeat(false, query_kmer_count * query_kmer_count);
64
65 debug!("Finding matches");
66 let reference_rc_character_offsets: Vec<_> = reference_rc
67 .char_indices()
68 .map(|(index, _)| index)
69 .chain(iter::once(reference_rc.len()))
70 .collect();
71 let query_rc_character_offsets: Vec<_> = query_rc
72 .char_indices()
73 .map(|(index, _)| index)
74 .chain(iter::once(query_rc.len()))
75 .collect();
76
77 for reference_rc_kmer_index in 0..reference_kmer_count {
78 let reference_rc_kmer = &reference_rc[reference_rc_character_offsets
79 [reference_rc_kmer_index]
80 ..reference_rc_character_offsets[reference_rc_kmer_index + minimum_length]];
81
82 for reference_kmer_index in reference_index.positions(reference_rc_kmer) {
83 let reference_kmer_index = usize::try_from(*reference_kmer_index).unwrap();
84 reference_reference.set(
85 reference_kmer_index * reference_kmer_count + reference_rc_kmer_index,
86 true,
87 );
88 }
89
90 for query_kmer_index in query_index.positions(reference_rc_kmer) {
91 let query_kmer_index = usize::try_from(*query_kmer_index).unwrap();
92 query_reference.set(
93 query_kmer_index * reference_kmer_count + reference_rc_kmer_index,
94 true,
95 );
96 }
97 }
98
99 for query_rc_kmer_index in 0..query_kmer_count {
100 let query_rc_kmer = &query_rc[query_rc_character_offsets[query_rc_kmer_index]
101 ..query_rc_character_offsets[query_rc_kmer_index + minimum_length]];
102
103 for reference_kmer_index in reference_index.positions(query_rc_kmer) {
104 let reference_kmer_index = usize::try_from(*reference_kmer_index).unwrap();
105 reference_query.set(
106 reference_kmer_index * query_kmer_count + query_rc_kmer_index,
107 true,
108 );
109 }
110
111 for query_kmer_index in query_index.positions(query_rc_kmer) {
112 let query_kmer_index = usize::try_from(*query_kmer_index).unwrap();
113 query_query.set(
114 query_kmer_index * query_kmer_count + query_rc_kmer_index,
115 true,
116 );
117 }
118 }
119
120 Self {
121 reference_reference,
122 reference_query,
123 query_reference,
124 query_query,
125 reference_kmer_count,
126 query_kmer_count,
127 }
128 }
129
130 /// Returns `true` if the reference kmer at `primary_index` matches the kmer in the reverse-complemented reference at `secondary_rc_index`.
131 ///
132 /// # Example
133 ///
134 /// ```rust
135 /// use compact_genome::interface::sequence::{GenomeSequence, OwnedGenomeSequence};
136 /// use compact_genome::implementation::vec_sequence::VectorGenome;
137 /// use compact_genome::implementation::alphabets::dna_alphabet::DnaAlphabet;
138 /// use template_switch_error_free_inners::MatchTable;
139 ///
140 /// let reference = VectorGenome::<DnaAlphabet>::from_slice_u8(b"AGGGGAACCCCAA").unwrap();
141 /// let query = VectorGenome::from_slice_u8(b"AAAAAAAA").unwrap();
142 /// let matches = MatchTable::new(
143 /// reference.as_genome_subsequence(),
144 /// query.as_genome_subsequence(),
145 /// 4,
146 /// );
147 ///
148 /// assert!(matches.has_reference_reference_match(1, 2));
149 /// assert!(matches.has_reference_reference_match(7, 8));
150 /// ```
151 pub fn has_reference_reference_match(
152 &self,
153 primary_index: usize,
154 secondary_rc_index: usize,
155 ) -> bool {
156 debug_assert!(primary_index < self.reference_kmer_count);
157 debug_assert!(secondary_rc_index < self.reference_kmer_count);
158 self.reference_reference[primary_index * self.reference_kmer_count + secondary_rc_index]
159 }
160
161 /// Returns `true` if the reference kmer at `primary_index` matches the kmer in the reverse-complemented query at `secondary_rc_index`.
162 ///
163 /// # Example
164 ///
165 /// ```rust
166 /// use compact_genome::interface::sequence::{GenomeSequence, OwnedGenomeSequence};
167 /// use compact_genome::implementation::vec_sequence::VectorGenome;
168 /// use compact_genome::implementation::alphabets::dna_alphabet::DnaAlphabet;
169 /// use template_switch_error_free_inners::MatchTable;
170 ///
171 /// let reference = VectorGenome::<DnaAlphabet>::from_slice_u8(b"AGGGGAAA").unwrap();
172 /// let query = VectorGenome::from_slice_u8(b"ACCCCAA").unwrap();
173 /// let matches = MatchTable::new(
174 /// reference.as_genome_subsequence(),
175 /// query.as_genome_subsequence(),
176 /// 4,
177 /// );
178 ///
179 /// assert!(matches.has_reference_query_match(1, 2));
180 /// ```
181 pub fn has_reference_query_match(
182 &self,
183 primary_index: usize,
184 secondary_rc_index: usize,
185 ) -> bool {
186 debug_assert!(primary_index < self.reference_kmer_count);
187 debug_assert!(secondary_rc_index < self.query_kmer_count);
188 self.reference_query[primary_index * self.query_kmer_count + secondary_rc_index]
189 }
190
191 /// Returns `true` if the query kmer at `primary_index` matches the kmer in the reverse-complemented reference at `secondary_rc_index`.
192 ///
193 /// # Example
194 ///
195 /// ```rust
196 /// use compact_genome::interface::sequence::{GenomeSequence, OwnedGenomeSequence};
197 /// use compact_genome::implementation::vec_sequence::VectorGenome;
198 /// use compact_genome::implementation::alphabets::dna_alphabet::DnaAlphabet;
199 /// use template_switch_error_free_inners::MatchTable;
200 ///
201 /// let reference = VectorGenome::<DnaAlphabet>::from_slice_u8(b"AGGGGAAA").unwrap();
202 /// let query = VectorGenome::from_slice_u8(b"ACCCCAA").unwrap();
203 /// let matches = MatchTable::new(
204 /// reference.as_genome_subsequence(),
205 /// query.as_genome_subsequence(),
206 /// 4,
207 /// );
208 ///
209 /// assert!(matches.has_query_reference_match(1, 3));
210 /// ```
211 pub fn has_query_reference_match(
212 &self,
213 primary_index: usize,
214 secondary_rc_index: usize,
215 ) -> bool {
216 debug_assert!(primary_index < self.query_kmer_count);
217 debug_assert!(secondary_rc_index < self.reference_kmer_count);
218 self.query_reference[primary_index * self.reference_kmer_count + secondary_rc_index]
219 }
220
221 /// Returns `true` if the query kmer at `primary_index` matches the kmer in the reverse-complemented query at `secondary_rc_index`.
222 ///
223 /// # Example
224 ///
225 /// ```rust
226 /// use compact_genome::interface::sequence::{GenomeSequence, OwnedGenomeSequence};
227 /// use compact_genome::implementation::vec_sequence::VectorGenome;
228 /// use compact_genome::implementation::alphabets::dna_alphabet::DnaAlphabet;
229 /// use template_switch_error_free_inners::MatchTable;
230 ///
231 /// let reference = VectorGenome::<DnaAlphabet>::from_slice_u8(b"AAAAAA").unwrap();
232 /// let query = VectorGenome::from_slice_u8(b"AAGGGGACCCCA").unwrap();
233 /// let matches = MatchTable::new(
234 /// reference.as_genome_subsequence(),
235 /// query.as_genome_subsequence(),
236 /// 4,
237 /// );
238 ///
239 /// assert!(matches.has_query_query_match(2, 1));
240 /// assert!(matches.has_query_query_match(7, 6));
241 /// ```
242 pub fn has_query_query_match(&self, primary_index: usize, secondary_rc_index: usize) -> bool {
243 debug_assert!(primary_index < self.query_kmer_count);
244 debug_assert!(secondary_rc_index < self.query_kmer_count);
245 self.query_query[primary_index * self.query_kmer_count + secondary_rc_index]
246 }
247}