compact_genome/interface/
sequence.rs1use crate::interface::alphabet::{Alphabet, AlphabetCharacter, AlphabetError};
4use crate::interface::k_mer::OwnedKmer;
5use std::cmp::Ordering;
6use std::iter;
7use std::iter::{FromIterator, Map, Repeat, Rev, Zip};
8use std::ops::Range;
9use traitsequence::interface::{EditableSequence, OwnedSequence, Sequence, SequenceMut};
10
11pub mod neighbor_iterators;
12
13pub type ReverseComplementIterator<I, AlphabetType> = Map<
15 Rev<I>,
16 for<'c> fn(
17 &'c <AlphabetType as Alphabet>::CharacterType,
18 ) -> <AlphabetType as Alphabet>::CharacterType,
19>;
20
21pub type OwnedKmerIterator<'a, GenomeSequenceType, KmerType> = Map<
23 Zip<Range<usize>, Repeat<&'a GenomeSequenceType>>,
24 fn((usize, &'a GenomeSequenceType)) -> KmerType,
25>;
26
27pub trait GenomeSequence<
29 AlphabetType: Alphabet,
30 GenomeSubsequence: GenomeSequence<AlphabetType, GenomeSubsequence> + ?Sized,
31>: Sequence<AlphabetType::CharacterType, GenomeSubsequence>
32{
33 fn is_valid(&self) -> bool {
35 true
36 }
37
38 fn clone_as_vec(&self) -> Vec<u8> {
40 self.iter()
41 .cloned()
42 .map(AlphabetType::character_to_ascii)
43 .collect()
44 }
45
46 fn as_genome_subsequence(&self) -> &GenomeSubsequence {
48 self.index(0..self.len())
49 }
50
51 fn as_string(&self) -> String {
53 String::from_utf8(self.clone_as_vec())
54 .expect("Genome contains non-utf8 characters (It should be ASCII only).")
55 }
56
57 fn reverse_complement_iter(
60 &self,
61 ) -> ReverseComplementIterator<Self::Iterator<'_>, AlphabetType> {
62 self.iter()
63 .rev()
64 .map(AlphabetType::CharacterType::complement)
65 }
66
67 fn cloned_k_mer_iter<
70 const K: usize,
71 KmerType: OwnedKmer<K, AlphabetType, GenomeSubsequence>,
72 >(
73 &self,
74 ) -> OwnedKmerIterator<'_, Self, KmerType> {
75 (0..self.len() - K + 1)
76 .zip(iter::repeat(self))
77 .map(|(offset, source_genome)| {
78 source_genome.iter().skip(offset).take(K).cloned().collect()
79 })
80 }
81
82 fn convert_with_reverse_complement<
85 ReverseComplementSequence: OwnedGenomeSequence<AlphabetType, ReverseComplementSubsequence>,
86 ReverseComplementSubsequence: GenomeSequence<AlphabetType, ReverseComplementSubsequence> + ?Sized,
87 >(
88 &self,
89 ) -> ReverseComplementSequence {
90 self.reverse_complement_iter().collect()
91 }
92
93 fn convert<
95 ResultSequence: OwnedGenomeSequence<AlphabetType, ResultSubsequence>,
96 ResultSubsequence: GenomeSequence<AlphabetType, ResultSubsequence> + ?Sized,
97 >(
98 &self,
99 ) -> ResultSequence {
100 self.iter().cloned().collect()
101 }
102
103 fn is_canonical(&self) -> bool {
106 for (forward_character, reverse_character) in
107 self.iter().cloned().zip(self.reverse_complement_iter())
108 {
109 match forward_character.cmp(&reverse_character) {
110 Ordering::Less => return true,
111 Ordering::Greater => return false,
112 _ => {}
113 }
114 }
115 true
116 }
117
118 fn is_self_complemental(&self) -> bool {
121 self.iter().cloned().eq(self.reverse_complement_iter())
122 }
123}
124
125pub trait OwnedGenomeSequence<
127 AlphabetType: Alphabet,
128 GenomeSubsequence: GenomeSequence<AlphabetType, GenomeSubsequence> + ?Sized,
129>:
130 GenomeSequence<AlphabetType, GenomeSubsequence>
131 + FromIterator<AlphabetType::CharacterType>
132 + OwnedSequence<AlphabetType::CharacterType, GenomeSubsequence>
133{
134 fn clone_as_reverse_complement(&self) -> Self {
137 self.reverse_complement_iter().collect()
138 }
139
140 fn from_iter_u8<T: IntoIterator<Item = u8>>(iter: T) -> Result<Self, AlphabetError> {
143 iter.into_iter()
144 .map(AlphabetType::ascii_to_character)
145 .collect()
146 }
147
148 fn from_slice_u8(slice: &[u8]) -> Result<Self, AlphabetError> {
151 Self::from_iter_u8(slice.iter().copied())
152 }
153}
154
155pub trait GenomeSequenceMut<
157 AlphabetType: Alphabet,
158 GenomeSubsequenceMut: GenomeSequenceMut<AlphabetType, GenomeSubsequenceMut> + ?Sized,
159>:
160 SequenceMut<AlphabetType::CharacterType, GenomeSubsequenceMut>
161 + GenomeSequence<AlphabetType, GenomeSubsequenceMut>
162{
163 fn as_genome_subsequence_mut(&mut self) -> &mut GenomeSubsequenceMut {
165 self.index_mut(0..self.len())
166 }
167}
168
169type IntoIterU8<SourceType, AlphabetType> = Map<
170 <SourceType as IntoIterator>::IntoIter,
171 fn(<AlphabetType as Alphabet>::CharacterType) -> u8,
172>;
173
174pub trait EditableGenomeSequence<
176 AlphabetType: Alphabet,
177 GenomeSubsequence: GenomeSequence<AlphabetType, GenomeSubsequence> + ?Sized,
178>:
179 EditableSequence<AlphabetType::CharacterType, GenomeSubsequence>
180 + GenomeSequence<AlphabetType, GenomeSubsequence>
181{
182 fn into_iter_u8(self) -> IntoIterU8<Self, AlphabetType> {
184 self.into_iter().map(AlphabetType::character_to_ascii)
185 }
186
187 fn extend_from_iter_u8<IteratorType: IntoIterator<Item = u8>>(
189 &mut self,
190 iter: IteratorType,
191 ) -> Result<(), AlphabetError> {
192 let original_len = self.len();
193 let iter = iter.into_iter();
194 let (size, _) = iter.size_hint();
195 self.reserve(size);
196 for item in iter {
197 match AlphabetType::ascii_to_character(item) {
198 Ok(character) => self.push(character),
199 Err(error) => {
200 self.resize(
201 original_len,
202 AlphabetType::CharacterType::from_index(0).unwrap(),
203 );
204 return Err(error);
205 }
206 }
207 }
208
209 Ok(())
210 }
211
212 fn extend_from_slice_u8(&mut self, slice: &[u8]) -> Result<(), AlphabetError> {
214 self.extend_from_iter_u8(slice.iter().copied())
215 }
216}