use crate::interface::alphabet::{Alphabet, AlphabetCharacter, AlphabetError};
use crate::interface::k_mer::OwnedKmer;
use std::cmp::Ordering;
use std::iter;
use std::iter::{FromIterator, Map, Repeat, Rev, Zip};
use std::ops::Range;
use traitsequence::interface::{EditableSequence, OwnedSequence, Sequence, SequenceMut};
pub mod neighbor_iterators;
pub type ReverseComplementIterator<I, AlphabetType> = Map<
Rev<I>,
for<'c> fn(
&'c <AlphabetType as Alphabet>::CharacterType,
) -> <AlphabetType as Alphabet>::CharacterType,
>;
pub type OwnedKmerIterator<'a, GenomeSequenceType, KmerType> = Map<
Zip<Range<usize>, Repeat<&'a GenomeSequenceType>>,
fn((usize, &'a GenomeSequenceType)) -> KmerType,
>;
pub trait GenomeSequence<
AlphabetType: Alphabet,
GenomeSubsequence: GenomeSequence<AlphabetType, GenomeSubsequence> + ?Sized,
>: Sequence<AlphabetType::CharacterType, GenomeSubsequence>
{
fn is_valid(&self) -> bool {
true
}
fn clone_as_vec(&self) -> Vec<u8> {
self.iter()
.cloned()
.map(AlphabetType::character_to_ascii)
.collect()
}
fn as_genome_subsequence(&self) -> &GenomeSubsequence {
self.index(0..self.len())
}
fn as_string(&self) -> String {
String::from_utf8(self.clone_as_vec())
.expect("Genome contains non-utf8 characters (It should be ASCII only).")
}
fn reverse_complement_iter(
&self,
) -> ReverseComplementIterator<Self::Iterator<'_>, AlphabetType> {
self.iter()
.rev()
.map(AlphabetType::CharacterType::complement)
}
fn cloned_k_mer_iter<
const K: usize,
KmerType: OwnedKmer<K, AlphabetType, GenomeSubsequence>,
>(
&self,
) -> OwnedKmerIterator<'_, Self, KmerType> {
(0..self.len() - K + 1)
.zip(iter::repeat(self))
.map(|(offset, source_genome)| {
source_genome.iter().skip(offset).take(K).cloned().collect()
})
}
fn convert_with_reverse_complement<
ReverseComplementSequence: OwnedGenomeSequence<AlphabetType, ReverseComplementSubsequence>,
ReverseComplementSubsequence: GenomeSequence<AlphabetType, ReverseComplementSubsequence> + ?Sized,
>(
&self,
) -> ReverseComplementSequence {
self.reverse_complement_iter().collect()
}
fn convert<
ResultSequence: OwnedGenomeSequence<AlphabetType, ResultSubsequence>,
ResultSubsequence: GenomeSequence<AlphabetType, ResultSubsequence> + ?Sized,
>(
&self,
) -> ResultSequence {
self.iter().cloned().collect()
}
fn is_canonical(&self) -> bool {
for (forward_character, reverse_character) in
self.iter().cloned().zip(self.reverse_complement_iter())
{
match forward_character.cmp(&reverse_character) {
Ordering::Less => return true,
Ordering::Greater => return false,
_ => {}
}
}
true
}
fn is_self_complemental(&self) -> bool {
self.iter().cloned().eq(self.reverse_complement_iter())
}
}
pub trait OwnedGenomeSequence<
AlphabetType: Alphabet,
GenomeSubsequence: GenomeSequence<AlphabetType, GenomeSubsequence> + ?Sized,
>:
GenomeSequence<AlphabetType, GenomeSubsequence>
+ FromIterator<AlphabetType::CharacterType>
+ OwnedSequence<AlphabetType::CharacterType, GenomeSubsequence>
{
fn clone_as_reverse_complement(&self) -> Self {
self.reverse_complement_iter().collect()
}
fn from_iter_u8<T: IntoIterator<Item = u8>>(iter: T) -> Result<Self, AlphabetError> {
iter.into_iter()
.map(AlphabetType::ascii_to_character)
.collect()
}
fn from_slice_u8(slice: &[u8]) -> Result<Self, AlphabetError> {
Self::from_iter_u8(slice.iter().copied())
}
}
pub trait GenomeSequenceMut<
AlphabetType: Alphabet,
GenomeSubsequenceMut: GenomeSequenceMut<AlphabetType, GenomeSubsequenceMut> + ?Sized,
>:
SequenceMut<AlphabetType::CharacterType, GenomeSubsequenceMut>
+ GenomeSequence<AlphabetType, GenomeSubsequenceMut>
{
fn as_genome_subsequence_mut(&mut self) -> &mut GenomeSubsequenceMut {
self.index_mut(0..self.len())
}
}
type IntoIterU8<SourceType, AlphabetType> = Map<
<SourceType as IntoIterator>::IntoIter,
fn(<AlphabetType as Alphabet>::CharacterType) -> u8,
>;
pub trait EditableGenomeSequence<
AlphabetType: Alphabet,
GenomeSubsequence: GenomeSequence<AlphabetType, GenomeSubsequence> + ?Sized,
>:
EditableSequence<AlphabetType::CharacterType, GenomeSubsequence>
+ GenomeSequence<AlphabetType, GenomeSubsequence>
{
fn into_iter_u8(self) -> IntoIterU8<Self, AlphabetType> {
self.into_iter().map(AlphabetType::character_to_ascii)
}
fn extend_from_iter_u8<IteratorType: IntoIterator<Item = u8>>(
&mut self,
iter: IteratorType,
) -> Result<(), AlphabetError> {
let original_len = self.len();
let iter = iter.into_iter();
let (size, _) = iter.size_hint();
self.reserve(size);
for item in iter {
match AlphabetType::ascii_to_character(item) {
Ok(character) => self.push(character),
Err(error) => {
self.resize(
original_len,
AlphabetType::CharacterType::from_index(0).unwrap(),
);
return Err(error);
}
}
}
Ok(())
}
fn extend_from_slice_u8(&mut self, slice: &[u8]) -> Result<(), AlphabetError> {
self.extend_from_iter_u8(slice.iter().copied())
}
fn reserve(&mut self, additional: usize);
fn resize(&mut self, len: usize, default: AlphabetType::CharacterType);
fn push(&mut self, character: AlphabetType::CharacterType);
}