Trait needletail::sequence::Sequence

source ·

pub trait Sequence<'a> {
    // Required method
    fn sequence(&'a self) -> &'a [u8] ⓘ;

    // Provided methods
    fn strip_returns(&'a self) -> Cow<'a, [u8]> { ... }
    fn reverse_complement(&'a self) -> Vec<u8> { ... }
    fn normalize(&'a self, iupac: bool) -> Cow<'a, [u8]> { ... }
    fn canonical_kmers(
        &'a self,
        k: u8,
        reverse_complement: &'a [u8]
    ) -> CanonicalKmers<'a> ⓘ { ... }
    fn kmers(&'a self, k: u8) -> Kmers<'a> ⓘ { ... }
    fn bit_kmers(&'a self, k: u8, canonical: bool) -> BitNuclKmer<'a> ⓘ { ... }
}

Expand description

A generic FASTX record that also abstracts over several logical operations that can be performed on nucleic acid sequences.

Required Methods§

source

fn sequence(&'a self) -> &'a [u8] ⓘ

Provided Methods§

source

fn strip_returns(&'a self) -> Cow<'a, [u8]>

Remove newlines from the sequence; this handles \r, \n, and \r\n and removes internal newlines in addition to ones at the end. Primarily used for FASTA multiline records, but can also help process (the much rarer) multiline FASTQs. Always use before iteration methods below to ensure no newlines are being returned with e.g. .kmers. If you are using normalize, you do not need to call this function directly.

source

fn reverse_complement(&'a self) -> Vec<u8>

Returns the reverse complement of a sequence. Biologically this is equivalent to the sequence of the strand opposite the one you pass in.

use needletail::Sequence;

assert_eq!(b"AACC".reverse_complement(), b"GGTT");

source

fn normalize(&'a self, iupac: bool) -> Cow<'a, [u8]>

[Nucleic Acids] Normalizes the sequence. See documentation for needletail::sequence::normalize. Do not use on amino acid sequences. Note that this returns a Cow so you may have to coerce to a Vec or &u8 as necessary.

use needletail::Sequence;

// IUPAC bases are coerced to N's if `false`
assert_eq!(b"ADGH".normalize(false).as_ref(), b"ANGN");
// otherwise they're preserved
assert_eq!(b"ADGH".normalize(true).as_ref(), b"ADGH");

// Uridine residues are converted to thymidine
assert_eq!(b"ACGU".normalize(true).as_ref(), b"ACGT");

source

fn canonical_kmers( &'a self, k: u8, reverse_complement: &'a [u8] ) -> CanonicalKmers<'a> ⓘ

[Nucleic Acids] Returns an iterator over the sequence that skips non-ACGT bases and returns a tuple containing (position, the canonicalized kmer, if the sequence is the complement of the original).

source

fn kmers(&'a self, k: u8) -> Kmers<'a> ⓘ

Returns an iterator that returns a sliding window of k-sized sequences (k-mers). Does not skip whitespace or correct bases in the original sequence so .normalize or .strip_returns may be appropriate to use first.

source

fn bit_kmers(&'a self, k: u8, canonical: bool) -> BitNuclKmer<'a> ⓘ

Return an iterator that returns valid kmers in 4-bit form

Implementations on Foreign Types§

source §

fn sequence(&'a self) -> &'a [u8] ⓘ

Implementors§

source §

Trait needletail::sequence::Sequence

Required Methods§

fn sequence(&'a self) -> &'a [u8] ⓘ

Provided Methods§

fn strip_returns(&'a self) -> Cow<'a, [u8]>

fn reverse_complement(&'a self) -> Vec<u8>

fn normalize(&'a self, iupac: bool) -> Cow<'a, [u8]>

fn canonical_kmers( &'a self, k: u8, reverse_complement: &'a [u8] ) -> CanonicalKmers<'a> ⓘ

fn kmers(&'a self, k: u8) -> Kmers<'a> ⓘ

fn bit_kmers(&'a self, k: u8, canonical: bool) -> BitNuclKmer<'a> ⓘ

Implementations on Foreign Types§

impl<'a> Sequence<'a> for [u8]

fn sequence(&'a self) -> &'a [u8] ⓘ

impl<'a> Sequence<'a> for Cow<'a, [u8]>

fn sequence(&'a self) -> &'a [u8] ⓘ

impl<'a> Sequence<'a> for &'a [u8]

fn sequence(&'a self) -> &'a [u8] ⓘ

impl<'a> Sequence<'a> for (&'a [u8], &'a [u8])

fn sequence(&'a self) -> &'a [u8] ⓘ

Implementors§

impl<'a> Sequence<'a> for SequenceRecord<'a>