use serde::{Deserialize, Serialize};
use crate::error::{JivanuError, Result, validate_finite, validate_positive};
#[inline]
#[must_use = "returns the mutation rate without side effects"]
pub fn mutation_rate(mutations: u64, bases: u64, generations: u64) -> Result<f64> {
if bases == 0 {
return Err(JivanuError::ComputationError("bases must be > 0".into()));
}
if generations == 0 {
return Err(JivanuError::ComputationError(
"generations must be > 0".into(),
));
}
Ok(mutations as f64 / (bases as f64 * generations as f64))
}
#[must_use = "returns genotype frequencies (p2, 2pq, q2) without side effects"]
pub fn hardy_weinberg(p: f64) -> Result<(f64, f64, f64)> {
validate_finite(p, "p")?;
if !(0.0..=1.0).contains(&p) {
return Err(JivanuError::ComputationError(
"allele frequency p must be in [0, 1]".into(),
));
}
let q = 1.0 - p;
Ok((p * p, 2.0 * p * q, q * q))
}
#[must_use = "returns the GC content fraction without side effects"]
pub fn gc_content(dna: &str) -> Result<f64> {
if dna.is_empty() {
return Err(JivanuError::ComputationError(
"DNA sequence must not be empty".into(),
));
}
let mut gc = 0usize;
let mut total = 0usize;
for c in dna.chars() {
match c.to_ascii_uppercase() {
'G' | 'C' => {
gc += 1;
total += 1;
}
'A' | 'T' => {
total += 1;
}
_ => {
return Err(JivanuError::ComputationError(format!(
"invalid DNA character: {c}"
)));
}
}
}
Ok(gc as f64 / total as f64)
}
#[inline]
#[must_use = "returns the selection coefficient without side effects"]
pub fn selection_coefficient(fitness_mutant: f64, fitness_wildtype: f64) -> Result<f64> {
validate_finite(fitness_mutant, "fitness_mutant")?;
validate_positive(fitness_wildtype, "fitness_wildtype")?;
Ok((fitness_mutant - fitness_wildtype) / fitness_wildtype)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
#[non_exhaustive]
pub enum AminoAcid {
Alanine,
Arginine,
Asparagine,
AsparticAcid,
Cysteine,
GlutamicAcid,
Glutamine,
Glycine,
Histidine,
Isoleucine,
Leucine,
Lysine,
Methionine,
Phenylalanine,
Proline,
Serine,
Threonine,
Tryptophan,
Tyrosine,
Valine,
Stop,
}
impl AminoAcid {
#[inline]
#[must_use]
pub const fn one_letter(self) -> char {
match self {
Self::Alanine => 'A',
Self::Arginine => 'R',
Self::Asparagine => 'N',
Self::AsparticAcid => 'D',
Self::Cysteine => 'C',
Self::GlutamicAcid => 'E',
Self::Glutamine => 'Q',
Self::Glycine => 'G',
Self::Histidine => 'H',
Self::Isoleucine => 'I',
Self::Leucine => 'L',
Self::Lysine => 'K',
Self::Methionine => 'M',
Self::Phenylalanine => 'F',
Self::Proline => 'P',
Self::Serine => 'S',
Self::Threonine => 'T',
Self::Tryptophan => 'W',
Self::Tyrosine => 'Y',
Self::Valine => 'V',
Self::Stop => '*',
}
}
#[inline]
#[must_use]
pub const fn three_letter(self) -> &'static str {
match self {
Self::Alanine => "Ala",
Self::Arginine => "Arg",
Self::Asparagine => "Asn",
Self::AsparticAcid => "Asp",
Self::Cysteine => "Cys",
Self::GlutamicAcid => "Glu",
Self::Glutamine => "Gln",
Self::Glycine => "Gly",
Self::Histidine => "His",
Self::Isoleucine => "Ile",
Self::Leucine => "Leu",
Self::Lysine => "Lys",
Self::Methionine => "Met",
Self::Phenylalanine => "Phe",
Self::Proline => "Pro",
Self::Serine => "Ser",
Self::Threonine => "Thr",
Self::Tryptophan => "Trp",
Self::Tyrosine => "Tyr",
Self::Valine => "Val",
Self::Stop => "Ter",
}
}
#[inline]
#[must_use]
pub const fn full_name(self) -> &'static str {
match self {
Self::Alanine => "Alanine",
Self::Arginine => "Arginine",
Self::Asparagine => "Asparagine",
Self::AsparticAcid => "Aspartic acid",
Self::Cysteine => "Cysteine",
Self::GlutamicAcid => "Glutamic acid",
Self::Glutamine => "Glutamine",
Self::Glycine => "Glycine",
Self::Histidine => "Histidine",
Self::Isoleucine => "Isoleucine",
Self::Leucine => "Leucine",
Self::Lysine => "Lysine",
Self::Methionine => "Methionine",
Self::Phenylalanine => "Phenylalanine",
Self::Proline => "Proline",
Self::Serine => "Serine",
Self::Threonine => "Threonine",
Self::Tryptophan => "Tryptophan",
Self::Tyrosine => "Tyrosine",
Self::Valine => "Valine",
Self::Stop => "Stop",
}
}
#[inline]
#[must_use]
pub const fn molecular_weight(self) -> f64 {
match self {
Self::Alanine => 89.094,
Self::Arginine => 174.203,
Self::Asparagine => 132.119,
Self::AsparticAcid => 133.104,
Self::Cysteine => 121.159,
Self::GlutamicAcid => 147.130,
Self::Glutamine => 146.146,
Self::Glycine => 75.032,
Self::Histidine => 155.156,
Self::Isoleucine => 131.175,
Self::Leucine => 131.175,
Self::Lysine => 146.189,
Self::Methionine => 149.208,
Self::Phenylalanine => 165.192,
Self::Proline => 115.132,
Self::Serine => 105.093,
Self::Threonine => 119.119,
Self::Tryptophan => 204.228,
Self::Tyrosine => 181.191,
Self::Valine => 117.148,
Self::Stop => 0.0,
}
}
#[must_use = "returns the amino acid without side effects"]
pub fn from_one_letter(code: char) -> Result<Self> {
match code.to_ascii_uppercase() {
'A' => Ok(Self::Alanine),
'R' => Ok(Self::Arginine),
'N' => Ok(Self::Asparagine),
'D' => Ok(Self::AsparticAcid),
'C' => Ok(Self::Cysteine),
'E' => Ok(Self::GlutamicAcid),
'Q' => Ok(Self::Glutamine),
'G' => Ok(Self::Glycine),
'H' => Ok(Self::Histidine),
'I' => Ok(Self::Isoleucine),
'L' => Ok(Self::Leucine),
'K' => Ok(Self::Lysine),
'M' => Ok(Self::Methionine),
'F' => Ok(Self::Phenylalanine),
'P' => Ok(Self::Proline),
'S' => Ok(Self::Serine),
'T' => Ok(Self::Threonine),
'W' => Ok(Self::Tryptophan),
'Y' => Ok(Self::Tyrosine),
'V' => Ok(Self::Valine),
'*' => Ok(Self::Stop),
_ => Err(JivanuError::ComputationError(format!(
"unknown amino acid code: {code}"
))),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[non_exhaustive]
pub enum ChargeClass {
Positive,
Negative,
Polar,
Nonpolar,
}
impl AminoAcid {
#[inline]
#[must_use]
pub const fn charge_class(self) -> ChargeClass {
match self {
Self::Arginine | Self::Histidine | Self::Lysine => ChargeClass::Positive,
Self::AsparticAcid | Self::GlutamicAcid => ChargeClass::Negative,
Self::Asparagine
| Self::Cysteine
| Self::Glutamine
| Self::Serine
| Self::Threonine
| Self::Tyrosine => ChargeClass::Polar,
Self::Alanine
| Self::Glycine
| Self::Isoleucine
| Self::Leucine
| Self::Methionine
| Self::Phenylalanine
| Self::Proline
| Self::Tryptophan
| Self::Valine => ChargeClass::Nonpolar,
Self::Stop => ChargeClass::Nonpolar,
}
}
#[inline]
#[must_use]
pub const fn hydrophobicity(self) -> f64 {
match self {
Self::Isoleucine => 4.5,
Self::Valine => 4.2,
Self::Leucine => 3.8,
Self::Phenylalanine => 2.8,
Self::Cysteine => 2.5,
Self::Methionine => 1.9,
Self::Alanine => 1.8,
Self::Glycine => -0.4,
Self::Threonine => -0.7,
Self::Serine => -0.8,
Self::Tryptophan => -0.9,
Self::Tyrosine => -1.3,
Self::Proline => -1.6,
Self::Histidine => -3.2,
Self::GlutamicAcid => -3.5,
Self::Glutamine => -3.5,
Self::AsparticAcid => -3.5,
Self::Asparagine => -3.5,
Self::Lysine => -3.9,
Self::Arginine => -4.5,
Self::Stop => 0.0,
}
}
#[inline]
#[must_use]
pub const fn isoelectric_point(self) -> f64 {
match self {
Self::Alanine => 6.00,
Self::Arginine => 10.76,
Self::Asparagine => 5.41,
Self::AsparticAcid => 2.77,
Self::Cysteine => 5.07,
Self::GlutamicAcid => 3.22,
Self::Glutamine => 5.65,
Self::Glycine => 5.97,
Self::Histidine => 7.59,
Self::Isoleucine => 6.02,
Self::Leucine => 5.98,
Self::Lysine => 9.74,
Self::Methionine => 5.74,
Self::Phenylalanine => 5.48,
Self::Proline => 6.30,
Self::Serine => 5.68,
Self::Threonine => 5.60,
Self::Tryptophan => 5.89,
Self::Tyrosine => 5.66,
Self::Valine => 5.96,
Self::Stop => 0.0,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[non_exhaustive]
pub enum GeneTransferMechanism {
Conjugation,
Transduction,
Transformation,
}
#[inline]
#[must_use = "returns the amino acid without side effects"]
pub fn translate_codon_to_aa(codon: &str) -> Result<AminoAcid> {
if codon.len() != 3 {
return Err(JivanuError::ComputationError(
"codon must be exactly 3 bases".into(),
));
}
let mut buf = [0u8; 3];
for (i, c) in codon.bytes().enumerate() {
buf[i] = c.to_ascii_uppercase();
}
match &buf {
b"TTT" | b"TTC" => Ok(AminoAcid::Phenylalanine),
b"TTA" | b"TTG" | b"CTT" | b"CTC" | b"CTA" | b"CTG" => Ok(AminoAcid::Leucine),
b"ATT" | b"ATC" | b"ATA" => Ok(AminoAcid::Isoleucine),
b"ATG" => Ok(AminoAcid::Methionine),
b"GTT" | b"GTC" | b"GTA" | b"GTG" => Ok(AminoAcid::Valine),
b"TCT" | b"TCC" | b"TCA" | b"TCG" | b"AGT" | b"AGC" => Ok(AminoAcid::Serine),
b"CCT" | b"CCC" | b"CCA" | b"CCG" => Ok(AminoAcid::Proline),
b"ACT" | b"ACC" | b"ACA" | b"ACG" => Ok(AminoAcid::Threonine),
b"GCT" | b"GCC" | b"GCA" | b"GCG" => Ok(AminoAcid::Alanine),
b"TAT" | b"TAC" => Ok(AminoAcid::Tyrosine),
b"TAA" | b"TAG" | b"TGA" => Ok(AminoAcid::Stop),
b"CAT" | b"CAC" => Ok(AminoAcid::Histidine),
b"CAA" | b"CAG" => Ok(AminoAcid::Glutamine),
b"AAT" | b"AAC" => Ok(AminoAcid::Asparagine),
b"AAA" | b"AAG" => Ok(AminoAcid::Lysine),
b"GAT" | b"GAC" => Ok(AminoAcid::AsparticAcid),
b"GAA" | b"GAG" => Ok(AminoAcid::GlutamicAcid),
b"TGT" | b"TGC" => Ok(AminoAcid::Cysteine),
b"TGG" => Ok(AminoAcid::Tryptophan),
b"CGT" | b"CGC" | b"CGA" | b"CGG" | b"AGA" | b"AGG" => Ok(AminoAcid::Arginine),
b"GGT" | b"GGC" | b"GGA" | b"GGG" => Ok(AminoAcid::Glycine),
_ => Err(JivanuError::ComputationError(format!(
"unknown codon: {}",
core::str::from_utf8(&buf).unwrap_or("???")
))),
}
}
#[inline]
#[must_use = "returns the amino acid without side effects"]
pub fn translate_codon(codon: &str) -> Result<char> {
translate_codon_to_aa(codon).map(|aa| aa.one_letter())
}
#[inline]
#[must_use]
pub const fn codon_degeneracy(aa: AminoAcid) -> u8 {
match aa {
AminoAcid::Methionine | AminoAcid::Tryptophan => 1,
AminoAcid::Phenylalanine
| AminoAcid::Tyrosine
| AminoAcid::Histidine
| AminoAcid::Glutamine
| AminoAcid::Asparagine
| AminoAcid::Lysine
| AminoAcid::AsparticAcid
| AminoAcid::GlutamicAcid
| AminoAcid::Cysteine => 2,
AminoAcid::Isoleucine | AminoAcid::Stop => 3,
AminoAcid::Valine
| AminoAcid::Proline
| AminoAcid::Threonine
| AminoAcid::Alanine
| AminoAcid::Glycine => 4,
AminoAcid::Leucine | AminoAcid::Arginine | AminoAcid::Serine => 6,
}
}
#[must_use = "returns the reverse complement without side effects"]
pub fn reverse_complement(dna: &str) -> Result<String> {
let mut result = Vec::with_capacity(dna.len());
for b in dna.bytes().rev() {
let comp = match b.to_ascii_uppercase() {
b'A' => b'T',
b'T' => b'A',
b'G' => b'C',
b'C' => b'G',
_ => {
return Err(JivanuError::ComputationError(format!(
"invalid DNA character: {}",
b as char
)));
}
};
result.push(comp);
}
String::from_utf8(result)
.map_err(|e| JivanuError::ComputationError(format!("internal error: invalid UTF-8: {e}")))
}
#[must_use = "returns the protein sequence without side effects"]
pub fn translate_orf(dna: &str) -> Result<String> {
let bytes = dna.as_bytes();
let mut protein = String::with_capacity(bytes.len() / 3);
let mut i = 0;
while i + 3 <= bytes.len() {
let codon = &dna[i..i + 3];
let aa = translate_codon_to_aa(codon)?;
if aa == AminoAcid::Stop {
break;
}
protein.push(aa.one_letter());
i += 3;
}
Ok(protein)
}
#[must_use = "returns the molecular weight without side effects"]
pub fn protein_molecular_weight(sequence: &str) -> Result<f64> {
if sequence.is_empty() {
return Err(JivanuError::ComputationError(
"protein sequence must not be empty".into(),
));
}
let mut total = 0.0;
let mut count = 0u64;
for c in sequence.chars() {
if c == '*' {
continue;
}
let aa = AminoAcid::from_one_letter(c)?;
total += aa.molecular_weight();
count += 1;
}
if count == 0 {
return Err(JivanuError::ComputationError(
"protein sequence contains no amino acids".into(),
));
}
if count > 1 {
total -= (count - 1) as f64 * 18.015;
}
Ok(total)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_mutation_rate() {
let rate = mutation_rate(10, 1000, 100).unwrap();
assert!((rate - 0.0001).abs() < 1e-10);
}
#[test]
fn test_mutation_rate_zero_bases() {
assert!(mutation_rate(1, 0, 1).is_err());
}
#[test]
fn test_hardy_weinberg_p06() {
let (p2, pq2, q2) = hardy_weinberg(0.6).unwrap();
assert!((p2 - 0.36).abs() < 1e-10);
assert!((pq2 - 0.48).abs() < 1e-10);
assert!((q2 - 0.16).abs() < 1e-10);
}
#[test]
fn test_hardy_weinberg_sum_to_one() {
let (p2, pq2, q2) = hardy_weinberg(0.3).unwrap();
assert!((p2 + pq2 + q2 - 1.0).abs() < 1e-10);
}
#[test]
fn test_hardy_weinberg_invalid_p() {
assert!(hardy_weinberg(1.5).is_err());
assert!(hardy_weinberg(-0.1).is_err());
}
#[test]
fn test_gc_content_balanced() {
let gc = gc_content("ATGC").unwrap();
assert!((gc - 0.5).abs() < 1e-10);
}
#[test]
fn test_gc_content_all_gc() {
let gc = gc_content("GGCC").unwrap();
assert!((gc - 1.0).abs() < 1e-10);
}
#[test]
fn test_gc_content_all_at() {
let gc = gc_content("AATT").unwrap();
assert!((gc - 0.0).abs() < 1e-10);
}
#[test]
fn test_gc_content_empty() {
assert!(gc_content("").is_err());
}
#[test]
fn test_gc_content_invalid() {
assert!(gc_content("ATXG").is_err());
}
#[test]
fn test_selection_coefficient_neutral() {
let s = selection_coefficient(1.0, 1.0).unwrap();
assert!((s - 0.0).abs() < 1e-10);
}
#[test]
fn test_selection_coefficient_beneficial() {
let s = selection_coefficient(1.1, 1.0).unwrap();
assert!((s - 0.1).abs() < 1e-10);
}
#[test]
fn test_translate_codon_met() {
assert_eq!(translate_codon("ATG").unwrap(), 'M');
}
#[test]
fn test_translate_codon_stop() {
assert_eq!(translate_codon("TAA").unwrap(), '*');
}
#[test]
fn test_translate_codon_invalid() {
assert!(translate_codon("XX").is_err());
}
#[test]
fn test_gene_transfer_serde_roundtrip() {
let mech = GeneTransferMechanism::Conjugation;
let json = serde_json::to_string(&mech).unwrap();
let back: GeneTransferMechanism = serde_json::from_str(&json).unwrap();
assert_eq!(mech, back);
}
#[test]
fn test_amino_acid_serde_roundtrip_all() {
let all = [
AminoAcid::Alanine,
AminoAcid::Arginine,
AminoAcid::Asparagine,
AminoAcid::AsparticAcid,
AminoAcid::Cysteine,
AminoAcid::GlutamicAcid,
AminoAcid::Glutamine,
AminoAcid::Glycine,
AminoAcid::Histidine,
AminoAcid::Isoleucine,
AminoAcid::Leucine,
AminoAcid::Lysine,
AminoAcid::Methionine,
AminoAcid::Phenylalanine,
AminoAcid::Proline,
AminoAcid::Serine,
AminoAcid::Threonine,
AminoAcid::Tryptophan,
AminoAcid::Tyrosine,
AminoAcid::Valine,
AminoAcid::Stop,
];
for aa in &all {
let json = serde_json::to_string(aa).unwrap();
let back: AminoAcid = serde_json::from_str(&json).unwrap();
assert_eq!(*aa, back);
}
}
#[test]
fn test_amino_acid_one_letter_codes() {
assert_eq!(AminoAcid::Alanine.one_letter(), 'A');
assert_eq!(AminoAcid::Tryptophan.one_letter(), 'W');
assert_eq!(AminoAcid::Stop.one_letter(), '*');
}
#[test]
fn test_amino_acid_three_letter_codes() {
assert_eq!(AminoAcid::Alanine.three_letter(), "Ala");
assert_eq!(AminoAcid::Tryptophan.three_letter(), "Trp");
assert_eq!(AminoAcid::Stop.three_letter(), "Ter");
}
#[test]
fn test_amino_acid_full_name() {
assert_eq!(AminoAcid::AsparticAcid.full_name(), "Aspartic acid");
assert_eq!(AminoAcid::GlutamicAcid.full_name(), "Glutamic acid");
}
#[test]
fn test_amino_acid_molecular_weight_known_values() {
assert!((AminoAcid::Glycine.molecular_weight() - 75.032).abs() < 0.001);
assert!((AminoAcid::Tryptophan.molecular_weight() - 204.228).abs() < 0.001);
assert!(
(AminoAcid::Leucine.molecular_weight() - AminoAcid::Isoleucine.molecular_weight())
.abs()
< 1e-10
);
assert!((AminoAcid::Stop.molecular_weight() - 0.0).abs() < 1e-10);
}
#[test]
fn test_amino_acid_from_one_letter_roundtrip() {
let all = [
'A', 'R', 'N', 'D', 'C', 'E', 'Q', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T',
'W', 'Y', 'V', '*',
];
for code in &all {
let aa = AminoAcid::from_one_letter(*code).unwrap();
assert_eq!(aa.one_letter(), *code);
}
}
#[test]
fn test_amino_acid_from_one_letter_case_insensitive() {
assert_eq!(AminoAcid::from_one_letter('a').unwrap(), AminoAcid::Alanine);
assert_eq!(
AminoAcid::from_one_letter('w').unwrap(),
AminoAcid::Tryptophan
);
}
#[test]
fn test_amino_acid_from_one_letter_invalid() {
assert!(AminoAcid::from_one_letter('X').is_err());
assert!(AminoAcid::from_one_letter('Z').is_err());
}
#[test]
fn test_translate_codon_to_aa() {
assert_eq!(translate_codon_to_aa("ATG").unwrap(), AminoAcid::Methionine);
assert_eq!(translate_codon_to_aa("TAA").unwrap(), AminoAcid::Stop);
assert_eq!(translate_codon_to_aa("TGG").unwrap(), AminoAcid::Tryptophan);
assert_eq!(
translate_codon_to_aa("TTT").unwrap(),
AminoAcid::Phenylalanine
);
}
#[test]
fn test_translate_codon_to_aa_case_insensitive() {
assert_eq!(translate_codon_to_aa("atg").unwrap(), AminoAcid::Methionine);
assert_eq!(translate_codon_to_aa("Atg").unwrap(), AminoAcid::Methionine);
}
#[test]
fn test_translate_codon_to_aa_invalid() {
assert!(translate_codon_to_aa("XX").is_err());
assert!(translate_codon_to_aa("ATGC").is_err());
}
#[test]
fn test_translate_codon_backwards_compat() {
assert_eq!(translate_codon("ATG").unwrap(), 'M');
assert_eq!(
translate_codon("ATG").unwrap(),
translate_codon_to_aa("ATG").unwrap().one_letter()
);
}
#[test]
fn test_all_64_codons_covered() {
let bases = ['A', 'T', 'G', 'C'];
let mut count = 0;
for &b1 in &bases {
for &b2 in &bases {
for &b3 in &bases {
let codon = format!("{b1}{b2}{b3}");
let result = translate_codon_to_aa(&codon);
assert!(
result.is_ok(),
"codon {codon} not in table: {:?}",
result.err()
);
count += 1;
}
}
}
assert_eq!(count, 64);
}
#[test]
fn test_codon_degeneracy_known_values() {
assert_eq!(codon_degeneracy(AminoAcid::Methionine), 1);
assert_eq!(codon_degeneracy(AminoAcid::Tryptophan), 1);
assert_eq!(codon_degeneracy(AminoAcid::Leucine), 6);
assert_eq!(codon_degeneracy(AminoAcid::Arginine), 6);
assert_eq!(codon_degeneracy(AminoAcid::Serine), 6);
assert_eq!(codon_degeneracy(AminoAcid::Stop), 3);
}
#[test]
fn test_codon_degeneracy_sums_to_64() {
let all = [
AminoAcid::Alanine,
AminoAcid::Arginine,
AminoAcid::Asparagine,
AminoAcid::AsparticAcid,
AminoAcid::Cysteine,
AminoAcid::GlutamicAcid,
AminoAcid::Glutamine,
AminoAcid::Glycine,
AminoAcid::Histidine,
AminoAcid::Isoleucine,
AminoAcid::Leucine,
AminoAcid::Lysine,
AminoAcid::Methionine,
AminoAcid::Phenylalanine,
AminoAcid::Proline,
AminoAcid::Serine,
AminoAcid::Threonine,
AminoAcid::Tryptophan,
AminoAcid::Tyrosine,
AminoAcid::Valine,
AminoAcid::Stop,
];
let total: u32 = all.iter().map(|aa| codon_degeneracy(*aa) as u32).sum();
assert_eq!(total, 64, "total codon count must equal 64");
}
#[test]
fn test_amino_acid_charge_classes() {
assert_eq!(AminoAcid::Arginine.charge_class(), ChargeClass::Positive);
assert_eq!(AminoAcid::Histidine.charge_class(), ChargeClass::Positive);
assert_eq!(AminoAcid::Lysine.charge_class(), ChargeClass::Positive);
assert_eq!(
AminoAcid::AsparticAcid.charge_class(),
ChargeClass::Negative
);
assert_eq!(
AminoAcid::GlutamicAcid.charge_class(),
ChargeClass::Negative
);
assert_eq!(AminoAcid::Serine.charge_class(), ChargeClass::Polar);
assert_eq!(AminoAcid::Alanine.charge_class(), ChargeClass::Nonpolar);
assert_eq!(AminoAcid::Isoleucine.charge_class(), ChargeClass::Nonpolar);
}
#[test]
fn test_hydrophobicity_extremes() {
assert!((AminoAcid::Isoleucine.hydrophobicity() - 4.5).abs() < 1e-10);
assert!((AminoAcid::Arginine.hydrophobicity() - (-4.5)).abs() < 1e-10);
}
#[test]
fn test_hydrophobicity_scale_range() {
let all = [
AminoAcid::Alanine,
AminoAcid::Arginine,
AminoAcid::Asparagine,
AminoAcid::AsparticAcid,
AminoAcid::Cysteine,
AminoAcid::GlutamicAcid,
AminoAcid::Glutamine,
AminoAcid::Glycine,
AminoAcid::Histidine,
AminoAcid::Isoleucine,
AminoAcid::Leucine,
AminoAcid::Lysine,
AminoAcid::Methionine,
AminoAcid::Phenylalanine,
AminoAcid::Proline,
AminoAcid::Serine,
AminoAcid::Threonine,
AminoAcid::Tryptophan,
AminoAcid::Tyrosine,
AminoAcid::Valine,
];
for aa in &all {
let h = aa.hydrophobicity();
assert!(
(-4.5..=4.5).contains(&h),
"{:?} hydrophobicity {h} out of Kyte-Doolittle range",
aa
);
}
}
#[test]
fn test_isoelectric_point_known_values() {
assert!(AminoAcid::AsparticAcid.isoelectric_point() < 4.0);
assert!(AminoAcid::GlutamicAcid.isoelectric_point() < 4.0);
assert!(AminoAcid::Arginine.isoelectric_point() > 9.0);
assert!(AminoAcid::Lysine.isoelectric_point() > 9.0);
assert!((AminoAcid::Histidine.isoelectric_point() - 7.59).abs() < 0.01);
}
#[test]
fn test_charge_class_serde_roundtrip() {
let cc = ChargeClass::Positive;
let json = serde_json::to_string(&cc).unwrap();
let back: ChargeClass = serde_json::from_str(&json).unwrap();
assert_eq!(cc, back);
}
#[test]
fn test_reverse_complement_simple() {
assert_eq!(reverse_complement("ATGC").unwrap(), "GCAT");
}
#[test]
fn test_reverse_complement_palindrome() {
assert_eq!(reverse_complement("ATAT").unwrap(), "ATAT");
}
#[test]
fn test_reverse_complement_case_insensitive() {
assert_eq!(reverse_complement("atgc").unwrap(), "GCAT");
}
#[test]
fn test_reverse_complement_invalid() {
assert!(reverse_complement("ATXG").is_err());
}
#[test]
fn test_translate_orf_met_only() {
assert_eq!(translate_orf("ATG").unwrap(), "M");
}
#[test]
fn test_translate_orf_with_stop() {
assert_eq!(translate_orf("ATGGCTTAA").unwrap(), "MA");
}
#[test]
fn test_translate_orf_no_stop() {
assert_eq!(translate_orf("ATGGCTGCT").unwrap(), "MAA");
}
#[test]
fn test_translate_orf_partial_codon_ignored() {
assert_eq!(translate_orf("ATGGC").unwrap(), "M");
}
#[test]
fn test_translate_orf_empty() {
assert_eq!(translate_orf("").unwrap(), "");
}
#[test]
fn test_translate_orf_invalid() {
assert!(translate_orf("XYZ").is_err());
}
#[test]
fn test_protein_molecular_weight_single_aa() {
let mw = protein_molecular_weight("G").unwrap();
assert!((mw - 75.032).abs() < 0.001);
}
#[test]
fn test_protein_molecular_weight_dipeptide() {
let mw = protein_molecular_weight("GA").unwrap();
assert!((mw - 146.111).abs() < 0.001);
}
#[test]
fn test_protein_molecular_weight_ignores_stop() {
let mw_with = protein_molecular_weight("GA*").unwrap();
let mw_without = protein_molecular_weight("GA").unwrap();
assert!((mw_with - mw_without).abs() < 1e-10);
}
#[test]
fn test_protein_molecular_weight_empty() {
assert!(protein_molecular_weight("").is_err());
}
#[test]
fn test_protein_molecular_weight_invalid() {
assert!(protein_molecular_weight("GXA").is_err());
}
#[test]
fn test_translate_orf_to_protein_mw_pipeline() {
let protein = translate_orf("ATGGCTTAA").unwrap(); let mw = protein_molecular_weight(&protein).unwrap();
assert!((mw - 220.287).abs() < 0.001);
}
#[test]
fn test_amino_acid_ordering() {
assert!(AminoAcid::Alanine < AminoAcid::Arginine);
assert!(AminoAcid::Valine < AminoAcid::Stop);
}
#[test]
fn test_amino_acid_count() {
let all = [
AminoAcid::Alanine,
AminoAcid::Arginine,
AminoAcid::Asparagine,
AminoAcid::AsparticAcid,
AminoAcid::Cysteine,
AminoAcid::GlutamicAcid,
AminoAcid::Glutamine,
AminoAcid::Glycine,
AminoAcid::Histidine,
AminoAcid::Isoleucine,
AminoAcid::Leucine,
AminoAcid::Lysine,
AminoAcid::Methionine,
AminoAcid::Phenylalanine,
AminoAcid::Proline,
AminoAcid::Serine,
AminoAcid::Threonine,
AminoAcid::Tryptophan,
AminoAcid::Tyrosine,
AminoAcid::Valine,
AminoAcid::Stop,
];
assert_eq!(all.len(), 21);
let mut set = std::collections::HashSet::new();
for aa in &all {
assert!(set.insert(aa));
}
}
}