use itertools::Itertools;
use std::iter::{Copied, FromIterator, Map, Rev};
use traitsequence::interface::{EditableSequence, Sequence, SequenceMut};
pub type ReverseComplementIterator<I> =
Map<Map<Rev<Copied<I>>, fn(u8) -> Option<u8>>, fn(Option<u8>) -> u8>;
pub trait GenomeSequence<'a, GenomeSubsequence: GenomeSequence<'a, GenomeSubsequence> + ?Sized>:
Sequence<'a, u8, GenomeSubsequence>
{
fn is_valid(&'a self) -> bool {
self.iter().copied().all(is_valid_ascii_genome_character)
}
fn get_invalid_characters(&'a self) -> Vec<u8> {
self.iter()
.copied()
.filter(|c| !is_valid_ascii_genome_character(*c))
.unique()
.collect()
}
fn clone_as_vec(&'a self) -> Vec<u8> {
self.iter().copied().collect()
}
fn as_genome_subsequence(&self) -> &GenomeSubsequence {
println!("as_genome_subsequence()");
self.index(0..self.len())
}
fn as_string(&'a self) -> String {
String::from_utf8(self.clone_as_vec())
.expect("Genome contains non-utf8 characters (It should be ASCII only).")
}
fn reverse_complement_iter(&'a self) -> ReverseComplementIterator<Self::Iterator> {
self.iter()
.copied()
.rev()
.map(ascii_complement as fn(u8) -> Option<u8>)
.map(Option::unwrap as fn(Option<u8>) -> u8)
}
}
pub trait OwnedGenomeSequence<'a, GenomeSubsequence: GenomeSequence<'a, GenomeSubsequence> + ?Sized>:
for<'s> GenomeSequence<'s, GenomeSubsequence> + FromIterator<u8>
{
fn reverse_complement(&'a self) -> Self {
self.reverse_complement_iter().collect()
}
}
pub trait GenomeSequenceMut<
'a,
GenomeSubsequenceMut: GenomeSequenceMut<'a, GenomeSubsequenceMut> + ?Sized,
>: SequenceMut<'a, u8, GenomeSubsequenceMut> + GenomeSequence<'a, GenomeSubsequenceMut>
{
}
pub trait EditableGenomeSequence<
'a,
GenomeSubsequence: GenomeSequence<'a, GenomeSubsequence> + ?Sized,
>: EditableSequence<'a, u8, GenomeSubsequence> + GenomeSequence<'a, GenomeSubsequence>
{
}
pub fn ascii_complement(char: u8) -> Option<u8> {
match char {
b'A' => Some(b'T'),
b'T' => Some(b'A'),
b'G' => Some(b'C'),
b'C' => Some(b'G'),
_ => None,
}
}
pub fn is_valid_ascii_genome_character(char: u8) -> bool {
matches!(char, b'A' | b'T' | b'G' | b'C')
}