#![warn(clippy::all, clippy::pedantic, clippy::nursery, clippy::cargo)]
mod errors;
mod utils;
use errors::MultiSeqAlignError;
#[cfg(feature = "serde")]
#[macro_use]
extern crate serde;
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct Alignment<T> {
names: Vec<String>,
descriptions: Vec<String>,
sequences: Vec<T>,
n_sequences: usize,
length: usize,
}
impl<T> Default for Alignment<T>
where
T: Clone,
{
fn default() -> Self {
Self {
names: Vec::<String>::default(),
descriptions: Vec::<String>::default(),
sequences: Vec::<T>::default(),
n_sequences: 0_usize,
length: 0_usize,
}
}
}
struct AlignmentPositionIterator<'a, T> {
alignment: &'a Alignment<T>,
index: usize,
size_hint: usize,
}
impl<'a, T> Iterator for AlignmentPositionIterator<'a, T>
where
T: Clone,
{
type Item = Vec<&'a T>;
fn next(&mut self) -> Option<Vec<&'a T>> {
if self.index >= self.alignment.length {
return None;
}
match self.alignment.nth_position(self.index) {
Some(position) => {
self.index = self.index.saturating_add(1);
self.size_hint = self.size_hint.saturating_sub(1);
Some(position)
}
None => None,
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
if self.size_hint < usize::max_value() {
(self.size_hint, Some(self.size_hint))
} else {
(usize::max_value(), None)
}
}
}
impl<'a, T> ExactSizeIterator for AlignmentPositionIterator<'a, T>
where
T: Clone,
{
fn len(&self) -> usize {
let (lower, upper) = self.size_hint();
assert_eq!(upper, Some(lower));
lower
}
}
struct AlignmentSequenceIterator<'a, T> {
alignment: &'a Alignment<T>,
index: usize,
size_hint: usize,
}
impl<'a, T> Iterator for AlignmentSequenceIterator<'a, T>
where
T: Clone,
{
type Item = Vec<&'a T>;
fn next(&mut self) -> Option<Vec<&'a T>> {
if self.index >= self.alignment.n_sequences {
return None;
}
match self.alignment.nth_sequence(self.index) {
Some(seq) => {
self.index = self.index.saturating_add(1);
self.size_hint = self.size_hint.saturating_sub(1);
Some(seq)
}
None => None,
}
}
fn nth(&mut self, n: usize) -> Option<Self::Item> {
self.alignment.nth_sequence(n)
}
fn size_hint(&self) -> (usize, Option<usize>) {
if self.size_hint < usize::max_value() {
(self.size_hint, Some(self.size_hint))
} else {
(usize::max_value(), None)
}
}
}
impl<'a, T> ExactSizeIterator for AlignmentSequenceIterator<'a, T>
where
T: Clone,
{
fn len(&self) -> usize {
let (lower, upper) = self.size_hint();
assert_eq!(upper, Some(lower));
lower
}
}
impl<T> Alignment<T> {
#[must_use]
pub const fn names(&self) -> &Vec<String> {
&self.names
}
#[must_use]
pub const fn descriptions(&self) -> &Vec<String> {
&self.descriptions
}
#[must_use]
pub const fn length(&self) -> &usize {
&self.length
}
#[must_use]
pub const fn n_sequences(&self) -> &usize {
&self.n_sequences
}
pub fn iter_positions(
&self,
) -> impl Iterator<Item = Vec<&T>> + ExactSizeIterator<Item = Vec<&T>>
where
T: Clone,
{
AlignmentPositionIterator {
alignment: self,
index: 0_usize,
size_hint: self.length,
}
}
pub fn iter_sequences(
&self,
) -> impl Iterator<Item = Vec<&T>> + ExactSizeIterator<Item = Vec<&T>>
where
T: Clone,
{
AlignmentSequenceIterator {
alignment: self,
index: 0_usize,
size_hint: self.n_sequences,
}
}
#[must_use]
pub const fn new(length: usize) -> Self {
Self {
names: Vec::new(),
descriptions: Vec::new(),
sequences: Vec::new(),
n_sequences: 0_usize,
length,
}
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.n_sequences == 0_usize
}
pub fn create(
names: Vec<String>,
descriptions: Vec<String>,
sequences: &[Vec<T>],
) -> Result<Self, MultiSeqAlignError>
where
T: Clone,
{
debug_assert!(names.len() == descriptions.len() && names.len() == sequences.len());
let length = utils::first_sequence_length(sequences);
utils::check_unequal_lengths(sequences, &names, length)?;
let n_sequences = sequences.len();
let sequences_vec = sequences.iter().flat_map(|x| x.to_vec()).collect();
Ok(Self {
names,
descriptions,
sequences: sequences_vec,
n_sequences,
length,
})
}
pub fn add_aligned_sequence<'a>(
&'a mut self,
name: String,
description: String,
sequence: Vec<T>,
) -> Result<&'a mut Self, MultiSeqAlignError> {
if sequence.len() != self.length {
return Err(MultiSeqAlignError::NewSequenceOfDifferentLength {
expected_length: self.length,
sequences_name: name,
found_length: sequence.len(),
});
}
self.names.push(name);
self.descriptions.push(description);
self.sequences.extend(sequence);
self.n_sequences += 1;
Ok(self)
}
#[must_use]
pub fn nth_position(&self, n: usize) -> Option<Vec<&T>> {
assert!(n < self.length);
(0..self.n_sequences)
.map(|i| self.sequences.get(i * self.length + n))
.collect::<Vec<Option<&T>>>()
.into_iter()
.collect::<Option<Vec<&T>>>()
}
#[must_use]
pub fn nth_sequence(&self, index: usize) -> Option<Vec<&T>> {
debug_assert!(index < self.n_sequences);
(0..self.length)
.map(|i| self.sequences.get(self.length * index + i))
.collect::<Vec<Option<&T>>>()
.into_iter()
.collect::<Option<Vec<&T>>>()
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn new_align() {
let x = Alignment::<char>::new(5_usize);
assert!(x.names.is_empty());
assert!(x.sequences.is_empty());
assert_eq!(x.length, 5_usize);
assert_eq!(x.n_sequences, 0_usize);
}
#[test]
fn new_alignment_with_desc() {
let x = Alignment::<u8>::create(
vec!["testname1".to_string(), "testname2".to_string()],
vec!["desc1".to_string(), "desc2".to_string()],
&[b"ELK".to_vec(), b"ILK".to_vec()],
)
.unwrap();
assert_eq!(
x.names,
vec!["testname1".to_string(), "testname2".to_string()]
);
assert_eq!(
x.descriptions,
vec!["desc1".to_string(), "desc2".to_string()]
);
assert_eq!(x.sequences, vec![b'E', b'L', b'K', b'I', b'L', b'K']);
assert_eq!(x.length, 3);
assert_eq!(x.n_sequences, 2);
}
#[test]
fn add_1_sequence() {
let mut align = Alignment::create(
vec![String::from("NAME1"), String::from("NAME2")],
vec![String::from("desc1"), String::from("desc2")],
&[b"ALKHITAN".to_vec(), b"VLK-ITAN".to_vec()],
)
.unwrap();
align
.add_aligned_sequence("added1".to_string(), "".to_string(), b"ALRYITAT".to_vec())
.unwrap();
assert_eq!(align.n_sequences, 3_usize);
assert_eq!(align.nth_position(3).unwrap(), vec![&b'H', &b'-', &b'Y'])
}
#[test]
fn add_1_sequence_wrong_length() {
let mut x = Alignment::new(3_usize);
let error = x
.add_aligned_sequence(
String::from("too_long"),
"add sequence of length 5 to an alignment of length 3".to_string(),
b"ILKAV".to_vec(),
)
.err()
.unwrap();
let expected = MultiSeqAlignError::NewSequenceOfDifferentLength {
expected_length: 3_usize,
sequences_name: String::from("too_long"),
found_length: 5_usize,
};
assert_eq!(error, expected);
}
#[test]
fn add_to_new() {
let mut x = Alignment::new(3_usize);
x.add_aligned_sequence("sequence1".to_string(), "".to_string(), b"ELK".to_vec())
.unwrap();
assert_eq!(x.n_sequences, 1_usize);
assert_eq!(x.length, 3_usize);
assert_eq!(x.names.len(), 1_usize);
assert_eq!(x.descriptions.len(), 1_usize);
assert_eq!(x.sequences.len(), 3_usize);
x.add_aligned_sequence("sequence2".to_string(), "".to_string(), b"ILK".to_vec())
.unwrap();
assert_eq!(x.n_sequences, 2_usize);
assert_eq!(x.length, 3_usize);
assert_eq!(x.names.len(), 2_usize);
assert_eq!(x.descriptions.len(), 2_usize);
assert_eq!(x.sequences.len(), 6_usize);
}
#[test]
fn empty_align() {
let mut x = Alignment::new(3_usize);
assert!(x.is_empty());
x.add_aligned_sequence(String::from("sequence1"), "".to_string(), b"ILK".to_vec())
.unwrap();
assert!(!x.is_empty());
}
#[test]
fn nth_residues_3() {
let align = Alignment::create(
vec![String::from("NAME1"), String::from("NAME2")],
vec![String::from("desc1"), String::from("desc2")],
&[b"ALKHITAN".to_vec(), b"VLK-ITAN".to_vec()],
)
.unwrap();
assert_eq!(align.nth_position(3).unwrap(), vec![&b'H', &b'-'])
}
#[test]
fn nth_residues_more_seqs() {
let align = Alignment::create(
vec![
"seq1".to_string(),
"seq2".to_string(),
"seq3".to_string(),
"seq4".to_string(),
],
vec!["".to_string(); 4],
&[
b"ALKHITAN".to_vec(),
b"VLK-ITAN".to_vec(),
b"ALKWITAN".to_vec(),
b"VLKMITAN".to_vec(),
],
)
.unwrap();
assert_eq!(
align.nth_position(3).unwrap(),
vec![&b'H', &b'-', &b'W', &b'M']
)
}
#[test]
#[should_panic(expected = "assertion failed: n < self.length")]
fn nth_residues_out() {
let align = Alignment::create(
vec![String::from("NAME1"), String::from("NAME2")],
vec![String::from("desc1"), String::from("desc2")],
&[b"ALKHITAN".to_vec(), b"VLK-ITAN".to_vec()],
)
.unwrap();
let _out_of_bonds = align.nth_position(10);
}
#[test]
fn different_seq_lengths() {
let error = Alignment::create(
vec![String::from("NAME1"), String::from("NAME2")],
vec![String::from("desc1"), String::from("desc2")],
&[b"ALKHITAN".to_vec(), b"VLK-ITAN---".to_vec()],
)
.err()
.unwrap();
let expected = MultiSeqAlignError::MultipleSequencesOfDifferentLengths {
expected_length: 8,
sequences_names: vec![String::from("NAME2")],
found_lengths: vec![11],
};
assert_eq!(error, expected);
}
#[test]
fn for_names() {
let align = Alignment::<u8>::create(
vec!["NAME1".to_string(), "NAME2".to_string()],
vec!["desc1".to_string(), "desc2".to_string()],
&[b"ELK".to_vec(), b"ILK".to_vec()],
)
.unwrap();
let mut x: Vec<&String> = Vec::new();
for name in align.names() {
x.push(name);
}
assert_eq!(x, vec!["NAME1", "NAME2"]);
}
#[test]
fn for_positions() {
let align = Alignment::create(
vec![String::from("NAME1"), String::from("NAME2")],
vec![String::from("desc1"), String::from("desc2")],
&[b"ALKHITAN".to_vec(), b"VLK-ITAN".to_vec()],
)
.unwrap();
let mut x = Vec::new();
for col in align.iter_positions() {
x.push(col);
}
assert_eq!(x.len(), 8);
assert_eq!(x.get(0).unwrap(), &[&b'A', &b'V']);
assert_eq!(x.get(3).unwrap(), &[&b'H', &b'-']);
}
#[test]
#[should_panic]
fn for_positions_out_of_bonds() {
let align = Alignment::create(
vec![String::from("NAME1"), String::from("NAME2")],
vec![String::from("desc1"), String::from("desc2")],
&[b"ALKHITAN".to_vec(), b"VLK-ITAN".to_vec()],
)
.unwrap();
let mut x = Vec::new();
for col in align.iter_positions() {
x.push(col);
}
let _ = x.get(22).unwrap();
}
#[test]
fn for_positions_exact() {
let align = Alignment::create(
vec![String::from("NAME1"), String::from("NAME2")],
vec![String::from("desc1"), String::from("desc2")],
&[b"ALKHITAN".to_vec(), b"VLK-ITAN".to_vec()],
)
.unwrap();
assert_eq!(align.iter_positions().len(), 8);
assert_eq!(align.iter_positions().next().unwrap().len(), 2);
}
#[test]
fn for_sequences() {
let align = Alignment::create(
vec![String::from("NAME1"), String::from("NAME2")],
vec![String::from("desc1"), String::from("desc2")],
&[b"ALKHITAN".to_vec(), b"VLK-ITAN".to_vec()],
)
.unwrap();
let mut x = Vec::new();
for row in align.iter_sequences() {
assert_eq!(row.len(), 8);
x.push(row);
}
assert_eq!(x.len(), 2)
}
#[test]
fn for_sequences_exact() {
let align = Alignment::create(
vec![String::from("NAME1"), String::from("NAME2")],
vec![String::from("desc1"), String::from("desc2")],
&[b"ALKHITAN".to_vec(), b"VLK-ITAN".to_vec()],
)
.unwrap();
assert_eq!(align.iter_sequences().len(), 2);
assert_eq!(align.iter_sequences().next().unwrap().len(), 8);
}
#[test]
fn for_sequences_collect() {
let align = Alignment::create(
vec![String::from("NAME1"), String::from("NAME2")],
vec![String::from("desc1"), String::from("desc2")],
&[b"ALKHITAN".to_vec(), b"VLK-ITAN".to_vec()],
)
.unwrap();
assert_eq!(align.iter_sequences().len(), 2);
assert_eq!(align.iter_sequences().next().unwrap().len(), 8);
}
}