Trait bio::pattern_matching::pssm::Motif

source ·

pub trait Motif {
    const LK: [u8; 127] = _;
    const MONOS: &'static [u8] = b"";
    const MONO_CT: usize = 0usize;

    fn rev_lk(idx: usize) -> u8;
    fn len(&self) -> usize;
    fn degenerate_consensus(&self) -> Vec<u8> ⓘ;
    fn get_scores(&self) -> &Array2<f32>;
    fn get_min_score(&self) -> f32;
    fn get_max_score(&self) -> f32;
    fn get_bits() -> f32;

    fn seqs_to_weights(
        seqs: &Vec<Vec<u8>>,
        _pseudos: Option<&[f32]>
    ) -> Result<Array2<f32>, PSSMError> { ... }
    fn lookup(mono: u8) -> Result<usize, PSSMError> { ... }
    fn raw_score<C, T>(
        &self,
        seq_it: T
    ) -> Result<(usize, f32, Vec<f32>), PSSMError>
    where
        C: Borrow<u8>,
        T: IntoIterator<Item = C>,
    { ... }
    fn score<C, T>(&self, seq_it: T) -> Result<ScoredPos, PSSMError>
    where
        C: Borrow<u8>,
        T: IntoIterator<Item = C>,
    { ... }
    fn info_content(&self) -> f32 { ... }
}

Expand description

Trait containing code shared between DNA and protein implementations of the position-specific scoring matrix.

Provided Associated Constants

source

const LK: [u8; 127] = _

Lookup table mapping monomer -> index

source

const MONOS: &'static [u8] = b""

All monomers, in order corresponding to lookup table

source

const MONO_CT: usize = 0usize

Monomer count - equal to length of MONOS

Required Methods

source

fn rev_lk(idx: usize) -> u8

Returns the monomer associated with the given index; the reverse of lookup. Returns INVALID_MONO if the index isn’t associated with a monomer.

Arguments

idx - the index in question

source

fn len(&self) -> usize

Returns the length of motif

source

fn degenerate_consensus(&self) -> Vec<u8> ⓘ

Returns a representation of the motif using ambiguous codes. Primarily useful for DNA motifs, where ambiguous codes are common (eg, ‘M’ for ‘A or C’); less so for proteins, where we represent any position without a dominant amino acid as an ‘X’

source

fn get_scores(&self) -> &Array2<f32>

Accessor - returns scores matrix

source

fn get_min_score(&self) -> f32

Return sum of “worst” base at each position

source

fn get_max_score(&self) -> f32

Return sum of “best” base at each position

source

fn get_bits() -> f32

Returns information content of a single position. Used info_content method. FIXME: this should be replaced with a CTFE … or maybe just a constant

Provided Methods

source

fn seqs_to_weights(
seqs: &Vec<Vec<u8>>,
_pseudos: Option<&[f32]>
) -> Result<Array2<f32>, PSSMError>

Returns a weight matrix representing the sequences provided. This code is shared by implementations of from_seqs

Arguments

seqs - sequences incorporated into motif
pseudos - array slice with a pseudocount for each monomer; defaults to DEF_PSEUDO for all if None is supplied

FIXME: pseudos should be an array of size MONO_CT, but that is currently unsupported

source

fn lookup(mono: u8) -> Result<usize, PSSMError>

Returns the index of given monomer in the scores matrix using the lookup table LK

Arguments

mono - monomer, eg, b’A’ for DNA or b’R’ for protein

Errors

PSSMError::InvalidMonomer(mono) - mono wasn’t found in the lookup table

source

fn raw_score<C, T>(&self, seq_it: T) -> Result<(usize, f32, Vec<f32>), PSSMError>where
C: Borrow<u8>,
T: IntoIterator<Item = C>,

Returns the un-normalized sum of matching bases, useful for comparing matches from motifs of different lengths

Arguments

seq_it - iterator representing the query sequence

Errors

PSSMError::InvalidMonomer(mono) - sequence seq_it contained invalid monomer mono

source

fn score<C, T>(&self, seq_it: T) -> Result<ScoredPos, PSSMError>where
C: Borrow<u8>,
T: IntoIterator<Item = C>,

Returns a ScoredPos struct representing the best match within the query sequence see: MATCHTM: a tool for searching transcription factor binding sites in DNA sequences Nucleic Acids Res. 2003 Jul 1; 31(13): 3576–3579 https://www.ncbi.nlm.nih.gov/pmc/articles/PMC169193/

Arguments

seq_it - iterator representing the query sequence

Errors

PSSMError::InvalidMonomer(mono) - sequence seq_it contained invalid monomer mono
PSSMError::QueryTooShort - sequence seq_id was too short

Example

let pssm = DNAMotif::from_seqs(vec![ b“AAAA“.to_vec(), b“AATA“.to_vec(), b“AAGA“.to_vec(), b“AAAA“.to_vec(), ].as_ref(), None).unwrap(); let start_pos = pssm.score(b“CCCCCAATA“).unwrap().loc;

source

fn info_content(&self) -> f32

Returns a float representing the information content of a motif; roughly the inverse of Shannon Entropy. Adapted from the information content described here: https://en.wikipedia.org/wiki/Sequence_logo#Logo_creation

Implementors

source

Trait bio::pattern_matching::pssm::Motif

Provided Associated Constants

const LK: [u8; 127] = _

const MONOS: &'static [u8] = b""

const MONO_CT: usize = 0usize

Required Methods

fn rev_lk(idx: usize) -> u8

fn len(&self) -> usize

fn degenerate_consensus(&self) -> Vec<u8> ⓘ

fn get_scores(&self) -> &Array2<f32>

fn get_min_score(&self) -> f32

fn get_max_score(&self) -> f32

fn get_bits() -> f32

Provided Methods

fn seqs_to_weights( seqs: &Vec<Vec<u8>>, _pseudos: Option<&[f32]>) -> Result<Array2<f32>, PSSMError>

fn lookup(mono: u8) -> Result<usize, PSSMError>

fn raw_score<C, T>(&self, seq_it: T) -> Result<(usize, f32, Vec<f32>), PSSMError>where C: Borrow<u8>, T: IntoIterator<Item = C>,

fn score<C, T>(&self, seq_it: T) -> Result<ScoredPos, PSSMError>where C: Borrow<u8>, T: IntoIterator<Item = C>,

fn info_content(&self) -> f32

Implementors

impl Motif for DNAMotif

const LK: [u8; 127] = _

const MONOS: &'static [u8] = b"ATGC"

const MONO_CT: usize = 4usize

impl Motif for ProtMotif

const LK: [u8; 127] = _

const MONOS: &'static [u8] = b"ARNDCEQGHILKMFPSTWYV"

const MONO_CT: usize = 20usize

fn seqs_to_weights(
seqs: &Vec<Vec<u8>>,
_pseudos: Option<&[f32]>
) -> Result<Array2<f32>, PSSMError>

fn raw_score<C, T>(&self, seq_it: T) -> Result<(usize, f32, Vec<f32>), PSSMError>where
C: Borrow<u8>,
T: IntoIterator<Item = C>,

fn score<C, T>(&self, seq_it: T) -> Result<ScoredPos, PSSMError>where
C: Borrow<u8>,
T: IntoIterator<Item = C>,