#![warn(clippy::all, clippy::pedantic, clippy::nursery, clippy::cargo)]
mod errors;
mod utils;
use errors::MultiSeqAlignError;
use std::iter::FromIterator;
#[cfg(feature = "serde")]
#[macro_use]
extern crate serde;
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct Alignment<T> {
sequences: Vec<T>,
n_sequences: usize,
length: usize,
}
impl<T> Default for Alignment<T>
where
T: Clone,
{
fn default() -> Self {
Self {
sequences: Vec::<T>::default(),
n_sequences: 0_usize,
length: 0_usize,
}
}
}
struct AlignmentPositionIterator<'a, T> {
alignment: &'a Alignment<T>,
index: usize,
size_hint: usize,
}
impl<'a, T> Iterator for AlignmentPositionIterator<'a, T>
where
T: Clone,
{
type Item = Vec<&'a T>;
fn next(&mut self) -> Option<Vec<&'a T>> {
if self.index >= self.alignment.length {
return None;
}
match self.alignment.nth_position(self.index) {
Some(position) => {
self.index = self.index.saturating_add(1);
self.size_hint = self.size_hint.saturating_sub(1);
Some(position)
}
None => None,
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
if self.size_hint < usize::max_value() {
(self.size_hint, Some(self.size_hint))
} else {
(usize::max_value(), None)
}
}
}
impl<'a, T> ExactSizeIterator for AlignmentPositionIterator<'a, T>
where
T: Clone,
{
fn len(&self) -> usize {
let (lower, upper) = self.size_hint();
assert_eq!(upper, Some(lower));
lower
}
}
struct AlignmentSequenceIterator<'a, T> {
alignment: &'a Alignment<T>,
index: usize,
size_hint: usize,
}
impl<'a, T> Iterator for AlignmentSequenceIterator<'a, T>
where
T: Clone,
{
type Item = Vec<&'a T>;
fn next(&mut self) -> Option<Vec<&'a T>> {
if self.index >= self.alignment.n_sequences {
return None;
}
match self.alignment.nth_sequence(self.index) {
Some(seq) => {
self.index = self.index.saturating_add(1);
self.size_hint = self.size_hint.saturating_sub(1);
Some(seq)
}
None => None,
}
}
fn nth(&mut self, n: usize) -> Option<Self::Item> {
self.alignment.nth_sequence(n)
}
fn size_hint(&self) -> (usize, Option<usize>) {
if self.size_hint < usize::max_value() {
(self.size_hint, Some(self.size_hint))
} else {
(usize::max_value(), None)
}
}
}
impl<'a, T> ExactSizeIterator for AlignmentSequenceIterator<'a, T>
where
T: Clone,
{
fn len(&self) -> usize {
let (lower, upper) = self.size_hint();
assert_eq!(upper, Some(lower));
lower
}
}
impl<T> Alignment<T> {
#[must_use]
pub const fn length(&self) -> &usize {
&self.length
}
#[must_use]
pub const fn n_sequences(&self) -> &usize {
&self.n_sequences
}
pub fn iter_positions(
&self,
) -> impl Iterator<Item = Vec<&T>> + ExactSizeIterator<Item = Vec<&T>>
where
T: Clone,
{
AlignmentPositionIterator {
alignment: self,
index: 0_usize,
size_hint: self.length,
}
}
pub fn iter_sequences(
&self,
) -> impl Iterator<Item = Vec<&T>> + ExactSizeIterator<Item = Vec<&T>>
where
T: Clone,
{
AlignmentSequenceIterator {
alignment: self,
index: 0_usize,
size_hint: self.n_sequences,
}
}
#[must_use]
pub const fn new(length: usize) -> Self {
Self {
sequences: Vec::new(),
n_sequences: 0_usize,
length,
}
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.n_sequences == 0_usize
}
pub fn with_sequences(sequences: &[Vec<T>]) -> Result<Self, MultiSeqAlignError>
where
T: Clone,
{
let length = utils::first_sequence_length(sequences);
utils::check_unequal_lengths(sequences, length)?;
let n_sequences = sequences.len();
let sequences_vec = sequences.iter().flat_map(|x| x.to_vec()).collect();
Ok(Self {
sequences: sequences_vec,
n_sequences,
length,
})
}
pub fn add<'a>(&'a mut self, sequence: Vec<T>) -> Result<&'a mut Self, MultiSeqAlignError> {
if sequence.len() != self.length {
return Err(MultiSeqAlignError::NewSequenceOfDifferentLength {
expected_length: self.length,
found_length: sequence.len(),
});
}
self.sequences.extend(sequence);
self.n_sequences += 1;
Ok(self)
}
#[must_use]
pub fn nth_position(&self, n: usize) -> Option<Vec<&T>> {
assert!(n < self.length);
(0..self.n_sequences)
.map(|i| self.sequences.get(i * self.length + n))
.collect::<Vec<Option<&T>>>()
.into_iter()
.collect::<Option<Vec<&T>>>()
}
#[must_use]
pub fn nth_sequence(&self, index: usize) -> Option<Vec<&T>> {
debug_assert!(index < self.n_sequences);
(0..self.length)
.map(|i| self.sequences.get(self.length * index + i))
.collect::<Vec<Option<&T>>>()
.into_iter()
.collect::<Option<Vec<&T>>>()
}
}
impl<A> FromIterator<Vec<A>> for Alignment<A>
where
A: Clone,
{
fn from_iter<I: IntoIterator<Item = Vec<A>>>(iter: I) -> Self {
let mut length: Option<usize> = None;
let mut n_sequences = 0_usize;
let sequences = iter
.into_iter()
.flat_map(|x| {
if length.is_none() {
length = Some(x.len());
} else if Some(x.len()) != length {
panic!("sequences of different lengths");
}
n_sequences += 1;
x.to_vec()
})
.collect::<Vec<_>>();
Self {
sequences,
n_sequences,
length: length.unwrap(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn new_align() {
let x = Alignment::<char>::new(5_usize);
assert!(x.sequences.is_empty());
assert_eq!(x.length, 5_usize);
assert_eq!(x.n_sequences, 0_usize);
}
#[test]
fn new_alignment_with_desc() {
let x = Alignment::<u8>::with_sequences(&[b"ELK".to_vec(), b"ILK".to_vec()]).unwrap();
assert_eq!(x.sequences, vec![b'E', b'L', b'K', b'I', b'L', b'K']);
assert_eq!(x.length, 3);
assert_eq!(x.n_sequences, 2);
}
#[test]
fn add_1_sequence() {
let mut align =
Alignment::with_sequences(&[b"ALKHITAN".to_vec(), b"VLK-ITAN".to_vec()]).unwrap();
align.add(b"ALRYITAT".to_vec()).unwrap();
assert_eq!(align.n_sequences, 3_usize);
assert_eq!(align.nth_position(3).unwrap(), vec![&b'H', &b'-', &b'Y'])
}
#[test]
fn add_1_sequence_wrong_length() {
let mut x = Alignment::new(3_usize);
let error = x.add(b"ILKAV".to_vec()).err().unwrap();
let expected = MultiSeqAlignError::NewSequenceOfDifferentLength {
expected_length: 3_usize,
found_length: 5_usize,
};
assert_eq!(error, expected);
}
#[test]
fn add_to_new() {
let mut x = Alignment::new(3_usize);
x.add(b"ELK".to_vec()).unwrap();
assert_eq!(x.n_sequences, 1_usize);
assert_eq!(x.length, 3_usize);
assert_eq!(x.sequences.len(), 3_usize);
x.add(b"ILK".to_vec()).unwrap();
assert_eq!(x.n_sequences, 2_usize);
assert_eq!(x.length, 3_usize);
assert_eq!(x.sequences.len(), 6_usize);
}
#[test]
fn empty_align() {
let mut x = Alignment::new(3_usize);
assert!(x.is_empty());
x.add(b"ILK".to_vec()).unwrap();
assert!(!x.is_empty());
}
#[test]
fn nth_residues_3() {
let align =
Alignment::with_sequences(&[b"ALKHITAN".to_vec(), b"VLK-ITAN".to_vec()]).unwrap();
assert_eq!(align.nth_position(3).unwrap(), vec![&b'H', &b'-'])
}
#[test]
fn nth_residues_more_seqs() {
let align = Alignment::with_sequences(&[
b"ALKHITAN".to_vec(),
b"VLK-ITAN".to_vec(),
b"ALKWITAN".to_vec(),
b"VLKMITAN".to_vec(),
])
.unwrap();
assert_eq!(
align.nth_position(3).unwrap(),
vec![&b'H', &b'-', &b'W', &b'M']
)
}
#[test]
#[should_panic(expected = "assertion failed: n < self.length")]
fn nth_residues_out() {
let align =
Alignment::with_sequences(&[b"ALKHITAN".to_vec(), b"VLK-ITAN".to_vec()]).unwrap();
let _out_of_bonds = align.nth_position(10);
}
#[test]
fn different_seq_lengths() {
let error = Alignment::with_sequences(&[b"ALKHITAN".to_vec(), b"VLK-ITAN---".to_vec()])
.err()
.unwrap();
let expected = MultiSeqAlignError::MultipleSequencesOfDifferentLengths {
expected_length: 8,
found_lengths: vec![11],
};
assert_eq!(error, expected);
}
#[test]
fn for_positions() {
let align =
Alignment::with_sequences(&[b"ALKHITAN".to_vec(), b"VLK-ITAN".to_vec()]).unwrap();
let mut x = Vec::new();
for col in align.iter_positions() {
x.push(col);
}
assert_eq!(x.len(), 8);
assert_eq!(x.get(0).unwrap(), &[&b'A', &b'V']);
assert_eq!(x.get(3).unwrap(), &[&b'H', &b'-']);
}
#[test]
#[should_panic]
fn for_positions_out_of_bonds() {
let align =
Alignment::with_sequences(&[b"ALKHITAN".to_vec(), b"VLK-ITAN".to_vec()]).unwrap();
let mut x = Vec::new();
for col in align.iter_positions() {
x.push(col);
}
let _ = x.get(22).unwrap();
}
#[test]
fn for_positions_exact() {
let align =
Alignment::with_sequences(&[b"ALKHITAN".to_vec(), b"VLK-ITAN".to_vec()]).unwrap();
assert_eq!(align.iter_positions().len(), 8);
assert_eq!(align.iter_positions().next().unwrap().len(), 2);
}
#[test]
fn for_sequences() {
let align =
Alignment::with_sequences(&[b"ALKHITAN".to_vec(), b"VLK-ITAN".to_vec()]).unwrap();
let mut x = Vec::new();
for row in align.iter_sequences() {
assert_eq!(row.len(), 8);
x.push(row);
}
assert_eq!(x.len(), 2)
}
#[test]
fn for_sequences_exact() {
let align =
Alignment::with_sequences(&[b"ALKHITAN".to_vec(), b"VLK-ITAN".to_vec()]).unwrap();
assert_eq!(align.iter_sequences().len(), 2);
assert_eq!(align.iter_sequences().next().unwrap().len(), 8);
}
#[test]
fn for_sequences_collect() {
let align =
Alignment::with_sequences(&[b"ALKHITAN".to_vec(), b"VLK-ITAN".to_vec()]).unwrap();
assert_eq!(align.iter_sequences().len(), 2);
assert_eq!(align.iter_sequences().next().unwrap().len(), 8);
}
}