use serde::{Deserialize, Serialize};
use std::fmt;
use crate::error::{Error, Result};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum AminoAcid {
Alanine, Arginine, Asparagine, AsparticAcid, Cysteine, GlutamicAcid, Glutamine, Glycine, Histidine, Isoleucine, Leucine, Lysine, Methionine, Phenylalanine, Proline, Serine, Threonine, Tryptophan, Tyrosine, Valine, Ambiguous, Uncertain, Stop, Gap, }
impl AminoAcid {
pub fn from_code(c: char) -> Result<Self> {
match c.to_ascii_uppercase() {
'A' => Ok(AminoAcid::Alanine),
'R' => Ok(AminoAcid::Arginine),
'N' => Ok(AminoAcid::Asparagine),
'D' => Ok(AminoAcid::AsparticAcid),
'C' => Ok(AminoAcid::Cysteine),
'E' => Ok(AminoAcid::GlutamicAcid),
'Q' => Ok(AminoAcid::Glutamine),
'G' => Ok(AminoAcid::Glycine),
'H' => Ok(AminoAcid::Histidine),
'I' => Ok(AminoAcid::Isoleucine),
'L' => Ok(AminoAcid::Leucine),
'K' => Ok(AminoAcid::Lysine),
'M' => Ok(AminoAcid::Methionine),
'F' => Ok(AminoAcid::Phenylalanine),
'P' => Ok(AminoAcid::Proline),
'S' => Ok(AminoAcid::Serine),
'T' => Ok(AminoAcid::Threonine),
'W' => Ok(AminoAcid::Tryptophan),
'Y' => Ok(AminoAcid::Tyrosine),
'V' => Ok(AminoAcid::Valine),
'B' => Ok(AminoAcid::Ambiguous),
'X' => Ok(AminoAcid::Uncertain),
'*' => Ok(AminoAcid::Stop),
'-' => Ok(AminoAcid::Gap),
_ => Err(Error::InvalidAminoAcid(c)),
}
}
pub fn to_code(&self) -> char {
match self {
AminoAcid::Alanine => 'A',
AminoAcid::Arginine => 'R',
AminoAcid::Asparagine => 'N',
AminoAcid::AsparticAcid => 'D',
AminoAcid::Cysteine => 'C',
AminoAcid::GlutamicAcid => 'E',
AminoAcid::Glutamine => 'Q',
AminoAcid::Glycine => 'G',
AminoAcid::Histidine => 'H',
AminoAcid::Isoleucine => 'I',
AminoAcid::Leucine => 'L',
AminoAcid::Lysine => 'K',
AminoAcid::Methionine => 'M',
AminoAcid::Phenylalanine => 'F',
AminoAcid::Proline => 'P',
AminoAcid::Serine => 'S',
AminoAcid::Threonine => 'T',
AminoAcid::Tryptophan => 'W',
AminoAcid::Tyrosine => 'Y',
AminoAcid::Valine => 'V',
AminoAcid::Ambiguous => 'B',
AminoAcid::Uncertain => 'X',
AminoAcid::Stop => '*',
AminoAcid::Gap => '-',
}
}
pub fn index(&self) -> usize {
match self {
AminoAcid::Alanine => 0,
AminoAcid::Arginine => 1,
AminoAcid::Asparagine => 2,
AminoAcid::AsparticAcid => 3,
AminoAcid::Cysteine => 4,
AminoAcid::GlutamicAcid => 5,
AminoAcid::Glutamine => 6,
AminoAcid::Glycine => 7,
AminoAcid::Histidine => 8,
AminoAcid::Isoleucine => 9,
AminoAcid::Leucine => 10,
AminoAcid::Lysine => 11,
AminoAcid::Methionine => 12,
AminoAcid::Phenylalanine => 13,
AminoAcid::Proline => 14,
AminoAcid::Serine => 15,
AminoAcid::Threonine => 16,
AminoAcid::Tryptophan => 17,
AminoAcid::Tyrosine => 18,
AminoAcid::Valine => 19,
AminoAcid::Ambiguous => 20,
AminoAcid::Uncertain => 21,
AminoAcid::Stop => 22,
AminoAcid::Gap => 23,
}
}
}
impl fmt::Display for AminoAcid {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.to_code())
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Protein {
sequence: Vec<AminoAcid>,
id: Option<String>,
description: Option<String>,
}
impl Protein {
pub fn new(sequence: Vec<AminoAcid>) -> Result<Self> {
if sequence.is_empty() {
return Err(Error::EmptySequence);
}
Ok(Protein {
sequence,
id: None,
description: None,
})
}
pub fn from_string(seq_str: &str) -> Result<Self> {
let sequence: Result<Vec<_>> = seq_str
.chars()
.filter(|c| !c.is_whitespace())
.map(AminoAcid::from_code)
.collect();
Self::new(sequence?)
}
pub fn with_id(mut self, id: String) -> Self {
self.id = Some(id);
self
}
pub fn with_description(mut self, description: String) -> Self {
self.description = Some(description);
self
}
pub fn sequence(&self) -> &[AminoAcid] {
&self.sequence
}
pub fn len(&self) -> usize {
self.sequence.len()
}
pub fn is_empty(&self) -> bool {
self.sequence.is_empty()
}
pub fn id(&self) -> Option<&str> {
self.id.as_deref()
}
pub fn description(&self) -> Option<&str> {
self.description.as_deref()
}
pub fn as_string(&self) -> String {
self.sequence.iter().map(|aa| aa.to_code()).collect()
}
}
impl fmt::Display for Protein {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.as_string())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_amino_acid_from_code() {
assert_eq!(AminoAcid::from_code('A').unwrap(), AminoAcid::Alanine);
assert_eq!(AminoAcid::from_code('G').unwrap(), AminoAcid::Glycine);
assert!(AminoAcid::from_code('Z').is_err());
}
#[test]
fn test_amino_acid_to_code() {
assert_eq!(AminoAcid::Alanine.to_code(), 'A');
assert_eq!(AminoAcid::Glycine.to_code(), 'G');
}
#[test]
fn test_protein_creation() {
let seq = Protein::from_string("AGSGD").unwrap();
assert_eq!(seq.len(), 5);
assert_eq!(seq.as_string(), "AGSGD");
}
#[test]
fn test_protein_with_metadata() {
let seq = Protein::from_string("AGS")
.unwrap()
.with_id("P123".to_string())
.with_description("Test protein".to_string());
assert_eq!(seq.id(), Some("P123"));
assert_eq!(seq.description(), Some("Test protein"));
}
}