Expand description
nucs is a library for working with nucleotide and amino acid sequences.
The goal is to supply useful tools for working with DNA/peptides while attempting to integrate with the rest of Rust.
// `Nuc` represents concrete nucleotides, and `Dna` holds `Nuc`s
use nucs::{Dna, Nuc};
// `Dna` can be parsed, modified and displayed.
let mut dna: Dna = "CATG".parse()?;
dna.extend([Nuc::A, Nuc::G]);
assert_eq!(dna, "CATGAG");
// For convenience, there's a helper to build const literals:
const CAT: &[Nuc] = &Nuc::arr(b"CAT");
assert!(dna.starts_with(CAT));
// `Seq` is a wrapper to add convenience features to `Vec`-like collections
use nucs::Seq;
// and `Dna` is actually just an alias for `Seq<Vec<Nuc>>`
let dna: Seq<Vec<Nuc>> = dna;
// ...but `Seq` can wrap any sufficiently `Vec`-like collection:
let mut dna = Seq(std::collections::VecDeque::from_iter(dna));
dna[3] = Nuc::T;
dna.push_front(Nuc::A);
// Displayed `Seq`s can be line-wrapped by using alternate formatting:
assert_eq!(format!("{dna:#4}"), "ACAT\nTAG");
// `Seq` also supports slices:
let slice = Seq::wrap_mut(dna.make_contiguous());
assert_eq!(slice, "ACATTAG");
// `DnaSlice` supplies helpers for working with slices:
// (whether or not they're wrapped in `Seq`)
use nucs::DnaSlice;
use Nuc::{A, C, G, T};
assert_eq!(
slice.reading_frames(),
[
&[[A, C, A], [T, T, A]],
&[[C, A, T], [T, A ,G]],
&[[A, T, T]],
] as [&[_]; _]
);
slice.revcomp(); // in-place reverse-complement
assert_eq!(dna, "CTAATGT");
// `DnaIter` supplies helpers for working with DNA iterators non-destructively:
use nucs::DnaIter;
let iter = dna
.iter()
.trimmed_to_codon()
.revcomped();
// (cloneable) DNA iterators can be displayed too:
let wrapped = format!("{:#3}", iter.display());
assert_eq!(wrapped, "CAT\nTAG");
// Ambiguous nucleotides represent non-empty sets of nucleotides.
use nucs::AmbiNuc;
// `Nuc`s can be composed into `AmbiNuc`s...
assert_eq!(C | A | T, AmbiNuc::H);
// ...which can be decomposed back into `Nuc`s
let dna = AmbiNuc::arr(b"STRAYGYMNAST");
assert!(dna[0].iter().eq([C, G]));
assert!(dna[1].iter().eq([T]));
assert!(dna[8].iter().eq(Nuc::ALL));
// Both concrete and ambiguous amino acids are supported as well:
use nucs::{Amino, AmbiAmino};
// `Seq(T::arr(...))` is common so there's a shorthand for it:
let peptide = Amino::seq(b"KITTY*PAWS");
assert_eq!(format!("{peptide:#5}"), "KITTY\n*PAWS");
assert_eq!(Amino::I | Amino::L, AmbiAmino::J);
assert!((Amino::C | Amino::A | Amino::T).iter().eq(Amino::arr(b"ACT")));
// And it's easy to translate DNA into peptides:
use nucs::NCBI1; // see `nucs::translation` for other genetic codes
// Iterators support translation:
let mut infinite_peptide = Nuc::arr(b"CAT")
.into_iter()
.cycle()
.translated_by(NCBI1);
assert_eq!(infinite_peptide.next(), Some(Amino::H));
// Slices support much faster translation, and it's possible to
// perform translations with allocations:
let dna = Nuc::seq(b"TTTGAGCTCATAAACGAGA");
let peptide: Seq<[_; 6]> = dna.translated_to_array_by(NCBI1);
assert_eq!(peptide, "FELINE");
// Even ambiguous DNA can be translated, and reverse-complement
// translation can be performed at very little extra cost:
let dna = AmbiNuc::seq(b"GCGCTCGGGAGACGCAAK");
let peptide = dna.rc_translated_to_vec_by(NCBI1);
assert_eq!(peptide, "JASPER");§Features
proptest: Enablesproptestintegration and utils, particularlyArbitrarygeneration ofNuc,AmbiNuc,AminoandAmbiAmino.rand: Enablesrandintegration, particularlyStandardUniformgeneration ofNuc,AmbiNuc,AminoandAmbiAmino.serde: Enablesserdeintegration forSeq<T>.unsafe: (experimental) This enables casting between&[Nuc]and&[AmbiNuc].
Re-exports§
Modules§
- error
- Error types
- iter
- Iterator-related types
- slice
- Slice-related types
- translation
- Types related to translation of codons into amino acids.
Structs§
Enums§
Traits§
- Nucleotide
- A nucleotide; either
NucorAmbiNuc. - Symbol
- A sequence element; either
Nuc,AmbiNuc,AminoorAmbiAmino.
Type Aliases§
- AmbiDna
- Common ambiguous nucleotide sequence type
- Ambi
Peptide - Common ambiguous amino acid sequence type
- Dna
- Common nucleotide sequence type
- Peptide
- Common amino acid sequence type