template_switch_error_free_inners/
lib.rs

1//! Compute all error-free template switch inners for a pair of genome strings.
2
3#![warn(missing_docs)]
4
5use std::iter;
6
7use bitvec::vec::BitVec;
8use compact_genome::{
9    implementation::vec_sequence::VectorGenome,
10    interface::{alphabet::Alphabet, sequence::GenomeSequence},
11};
12use log::debug;
13use suffix::SuffixTable;
14
15#[cfg(test)]
16mod tests;
17
18/// A table of all error-free template switch inner entry points for a pair of genome strings.
19pub struct MatchTable {
20    reference_reference: BitVec,
21    reference_query: BitVec,
22    query_reference: BitVec,
23    query_query: BitVec,
24    reference_kmer_count: usize,
25    query_kmer_count: usize,
26}
27
28impl MatchTable {
29    /// Compute all error-free template switch inner entry points for a pair of genome strings.
30    ///
31    /// The inners must have the given minimum length.
32    pub fn new<
33        AlphabetType: Alphabet,
34        GenomeSubsequence: GenomeSequence<AlphabetType, GenomeSubsequence> + ?Sized,
35    >(
36        reference: &GenomeSubsequence,
37        query: &GenomeSubsequence,
38        minimum_length: usize,
39    ) -> Self {
40        assert!(minimum_length > 0);
41
42        debug!("Converting genomes to strings");
43        let reference_rc =
44            VectorGenome::<AlphabetType>::from_iter(reference.reverse_complement_iter())
45                .as_string();
46        let query_rc =
47            VectorGenome::<AlphabetType>::from_iter(query.reverse_complement_iter()).as_string();
48        let reference = reference.as_string();
49        let query = query.as_string();
50
51        debug!("Computing indexes");
52        let reference_index = SuffixTable::new(&reference);
53        let query_index = SuffixTable::new(&query);
54
55        debug!("Initialising bitvectors");
56        let reference_kmer_count = reference.len() - minimum_length + 1;
57        let query_kmer_count = query.len() - minimum_length + 1;
58
59        let mut reference_reference =
60            BitVec::repeat(false, reference_kmer_count * reference_kmer_count);
61        let mut reference_query = BitVec::repeat(false, reference_kmer_count * query_kmer_count);
62        let mut query_reference = BitVec::repeat(false, query_kmer_count * reference_kmer_count);
63        let mut query_query = BitVec::repeat(false, query_kmer_count * query_kmer_count);
64
65        debug!("Finding matches");
66        let reference_rc_character_offsets: Vec<_> = reference_rc
67            .char_indices()
68            .map(|(index, _)| index)
69            .chain(iter::once(reference_rc.len()))
70            .collect();
71        let query_rc_character_offsets: Vec<_> = query_rc
72            .char_indices()
73            .map(|(index, _)| index)
74            .chain(iter::once(query_rc.len()))
75            .collect();
76
77        for reference_rc_kmer_index in 0..reference_kmer_count {
78            let reference_rc_kmer = &reference_rc[reference_rc_character_offsets
79                [reference_rc_kmer_index]
80                ..reference_rc_character_offsets[reference_rc_kmer_index + minimum_length]];
81
82            for reference_kmer_index in reference_index.positions(reference_rc_kmer) {
83                let reference_kmer_index = usize::try_from(*reference_kmer_index).unwrap();
84                reference_reference.set(
85                    reference_kmer_index * reference_kmer_count + reference_rc_kmer_index,
86                    true,
87                );
88            }
89
90            for query_kmer_index in query_index.positions(reference_rc_kmer) {
91                let query_kmer_index = usize::try_from(*query_kmer_index).unwrap();
92                query_reference.set(
93                    query_kmer_index * reference_kmer_count + reference_rc_kmer_index,
94                    true,
95                );
96            }
97        }
98
99        for query_rc_kmer_index in 0..query_kmer_count {
100            let query_rc_kmer = &query_rc[query_rc_character_offsets[query_rc_kmer_index]
101                ..query_rc_character_offsets[query_rc_kmer_index + minimum_length]];
102
103            for reference_kmer_index in reference_index.positions(query_rc_kmer) {
104                let reference_kmer_index = usize::try_from(*reference_kmer_index).unwrap();
105                reference_query.set(
106                    reference_kmer_index * query_kmer_count + query_rc_kmer_index,
107                    true,
108                );
109            }
110
111            for query_kmer_index in query_index.positions(query_rc_kmer) {
112                let query_kmer_index = usize::try_from(*query_kmer_index).unwrap();
113                query_query.set(
114                    query_kmer_index * query_kmer_count + query_rc_kmer_index,
115                    true,
116                );
117            }
118        }
119
120        Self {
121            reference_reference,
122            reference_query,
123            query_reference,
124            query_query,
125            reference_kmer_count,
126            query_kmer_count,
127        }
128    }
129
130    /// Returns `true` if the reference kmer at `primary_index` matches the kmer in the reverse-complemented reference at `secondary_rc_index`.
131    ///
132    /// # Example
133    ///
134    /// ```rust
135    /// use compact_genome::interface::sequence::{GenomeSequence, OwnedGenomeSequence};
136    /// use compact_genome::implementation::vec_sequence::VectorGenome;
137    /// use compact_genome::implementation::alphabets::dna_alphabet::DnaAlphabet;
138    /// use template_switch_error_free_inners::MatchTable;
139    ///
140    /// let reference = VectorGenome::<DnaAlphabet>::from_slice_u8(b"AGGGGAACCCCAA").unwrap();
141    /// let query = VectorGenome::from_slice_u8(b"AAAAAAAA").unwrap();
142    /// let matches = MatchTable::new(
143    ///     reference.as_genome_subsequence(),
144    ///     query.as_genome_subsequence(),
145    ///     4,
146    /// );
147    ///
148    /// assert!(matches.has_reference_reference_match(1, 2));
149    /// assert!(matches.has_reference_reference_match(7, 8));
150    /// ```
151    pub fn has_reference_reference_match(
152        &self,
153        primary_index: usize,
154        secondary_rc_index: usize,
155    ) -> bool {
156        debug_assert!(primary_index < self.reference_kmer_count);
157        debug_assert!(secondary_rc_index < self.reference_kmer_count);
158        self.reference_reference[primary_index * self.reference_kmer_count + secondary_rc_index]
159    }
160
161    /// Returns `true` if the reference kmer at `primary_index` matches the kmer in the reverse-complemented query at `secondary_rc_index`.
162    ///
163    /// # Example
164    ///
165    /// ```rust
166    /// use compact_genome::interface::sequence::{GenomeSequence, OwnedGenomeSequence};
167    /// use compact_genome::implementation::vec_sequence::VectorGenome;
168    /// use compact_genome::implementation::alphabets::dna_alphabet::DnaAlphabet;
169    /// use template_switch_error_free_inners::MatchTable;
170    ///
171    /// let reference = VectorGenome::<DnaAlphabet>::from_slice_u8(b"AGGGGAAA").unwrap();
172    /// let query = VectorGenome::from_slice_u8(b"ACCCCAA").unwrap();
173    /// let matches = MatchTable::new(
174    ///     reference.as_genome_subsequence(),
175    ///     query.as_genome_subsequence(),
176    ///     4,
177    /// );
178    ///
179    /// assert!(matches.has_reference_query_match(1, 2));
180    /// ```
181    pub fn has_reference_query_match(
182        &self,
183        primary_index: usize,
184        secondary_rc_index: usize,
185    ) -> bool {
186        debug_assert!(primary_index < self.reference_kmer_count);
187        debug_assert!(secondary_rc_index < self.query_kmer_count);
188        self.reference_query[primary_index * self.query_kmer_count + secondary_rc_index]
189    }
190
191    /// Returns `true` if the query kmer at `primary_index` matches the kmer in the reverse-complemented reference at `secondary_rc_index`.
192    ///
193    /// # Example
194    ///
195    /// ```rust
196    /// use compact_genome::interface::sequence::{GenomeSequence, OwnedGenomeSequence};
197    /// use compact_genome::implementation::vec_sequence::VectorGenome;
198    /// use compact_genome::implementation::alphabets::dna_alphabet::DnaAlphabet;
199    /// use template_switch_error_free_inners::MatchTable;
200    ///
201    /// let reference = VectorGenome::<DnaAlphabet>::from_slice_u8(b"AGGGGAAA").unwrap();
202    /// let query = VectorGenome::from_slice_u8(b"ACCCCAA").unwrap();
203    /// let matches = MatchTable::new(
204    ///     reference.as_genome_subsequence(),
205    ///     query.as_genome_subsequence(),
206    ///     4,
207    /// );
208    ///
209    /// assert!(matches.has_query_reference_match(1, 3));
210    /// ```
211    pub fn has_query_reference_match(
212        &self,
213        primary_index: usize,
214        secondary_rc_index: usize,
215    ) -> bool {
216        debug_assert!(primary_index < self.query_kmer_count);
217        debug_assert!(secondary_rc_index < self.reference_kmer_count);
218        self.query_reference[primary_index * self.reference_kmer_count + secondary_rc_index]
219    }
220
221    /// Returns `true` if the query kmer at `primary_index` matches the kmer in the reverse-complemented query at `secondary_rc_index`.
222    ///
223    /// # Example
224    ///
225    /// ```rust
226    /// use compact_genome::interface::sequence::{GenomeSequence, OwnedGenomeSequence};
227    /// use compact_genome::implementation::vec_sequence::VectorGenome;
228    /// use compact_genome::implementation::alphabets::dna_alphabet::DnaAlphabet;
229    /// use template_switch_error_free_inners::MatchTable;
230    ///
231    /// let reference = VectorGenome::<DnaAlphabet>::from_slice_u8(b"AAAAAA").unwrap();
232    /// let query = VectorGenome::from_slice_u8(b"AAGGGGACCCCA").unwrap();
233    /// let matches = MatchTable::new(
234    ///     reference.as_genome_subsequence(),
235    ///     query.as_genome_subsequence(),
236    ///     4,
237    /// );
238    ///
239    /// assert!(matches.has_query_query_match(2, 1));
240    /// assert!(matches.has_query_query_match(7, 6));
241    /// ```
242    pub fn has_query_query_match(&self, primary_index: usize, secondary_rc_index: usize) -> bool {
243        debug_assert!(primary_index < self.query_kmer_count);
244        debug_assert!(secondary_rc_index < self.query_kmer_count);
245        self.query_query[primary_index * self.query_kmer_count + secondary_rc_index]
246    }
247}