pub trait Motif {
    const LK: [u8; 127] = _;
    const MONOS: &'static [u8] = b"";
    const MONO_CT: usize = 0usize;
Show 13 methods // Required methods fn rev_lk(idx: usize) -> u8; fn len(&self) -> usize; fn degenerate_consensus(&self) -> Vec<u8> ; fn get_scores(&self) -> &Array2<f32>; fn get_min_score(&self) -> f32; fn get_max_score(&self) -> f32; fn get_bits() -> f32; // Provided methods fn seqs_to_weights( seqs: &[Vec<u8>], _pseudos: Option<&[f32]> ) -> Result<Array2<f32>> { ... } fn lookup(mono: u8) -> Result<usize> { ... } fn is_empty(&self) -> bool { ... } fn raw_score<C, T>(&self, seq_it: T) -> Result<(usize, f32, Vec<f32>)> where C: Borrow<u8>, T: IntoIterator<Item = C> { ... } fn score<C, T>(&self, seq_it: T) -> Result<ScoredPos> where C: Borrow<u8>, T: IntoIterator<Item = C> { ... } fn info_content(&self) -> f32 { ... }
}
Expand description

Trait containing code shared between DNA and protein implementations of the position-specific scoring matrix.

Provided Associated Constants§

source

const LK: [u8; 127] = _

Lookup table mapping monomer -> index

source

const MONOS: &'static [u8] = b""

All monomers, in order corresponding to lookup table

source

const MONO_CT: usize = 0usize

Monomer count - equal to length of MONOS

Required Methods§

source

fn rev_lk(idx: usize) -> u8

Returns the monomer associated with the given index; the reverse of lookup. Returns INVALID_MONO if the index isn’t associated with a monomer.

§Arguments
  • idx - the index in question
source

fn len(&self) -> usize

Returns the length of motif

source

fn degenerate_consensus(&self) -> Vec<u8>

Returns a representation of the motif using ambiguous codes. Primarily useful for DNA motifs, where ambiguous codes are common (eg, ‘M’ for ‘A or C’); less so for proteins, where we represent any position without a dominant amino acid as an ‘X’

source

fn get_scores(&self) -> &Array2<f32>

Accessor - returns scores matrix

source

fn get_min_score(&self) -> f32

Return sum of “worst” base at each position

source

fn get_max_score(&self) -> f32

Return sum of “best” base at each position

source

fn get_bits() -> f32

Returns information content of a single position. Used info_content method. FIXME: this should be replaced with a CTFE … or maybe just a constant

Provided Methods§

source

fn seqs_to_weights( seqs: &[Vec<u8>], _pseudos: Option<&[f32]> ) -> Result<Array2<f32>>

Returns a weight matrix representing the sequences provided. This code is shared by implementations of from_seqs

§Arguments
  • seqs - sequences incorporated into motif
  • pseudos - array slice with a pseudocount for each monomer; defaults to DEF_PSEUDO for all if None is supplied

FIXME: pseudos should be an array of size MONO_CT, but that is currently unsupported

source

fn lookup(mono: u8) -> Result<usize>

Returns the index of given monomer in the scores matrix using the lookup table LK

§Arguments
  • mono - monomer, eg, b’A’ for DNA or b’R’ for protein
§Errors
  • Error::InvalidMonomer(mono) - mono wasn’t found in the lookup table
source

fn is_empty(&self) -> bool

source

fn raw_score<C, T>(&self, seq_it: T) -> Result<(usize, f32, Vec<f32>)>
where C: Borrow<u8>, T: IntoIterator<Item = C>,

Returns the un-normalized sum of matching bases, useful for comparing matches from motifs of different lengths

§Arguments
  • seq_it - iterator representing the query sequence
§Errors
  • Error::InvalidMonomer(mono) - sequence seq_it contained invalid monomer mono
source

fn score<C, T>(&self, seq_it: T) -> Result<ScoredPos>
where C: Borrow<u8>, T: IntoIterator<Item = C>,

Returns a ScoredPos struct representing the best match within the query sequence see: MATCHTM: a tool for searching transcription factor binding sites in DNA sequences Nucleic Acids Res. 2003 Jul 1; 31(13): 3576–3579 https://www.ncbi.nlm.nih.gov/pmc/articles/PMC169193/

§Arguments
  • seq_it - iterator representing the query sequence
§Errors
  • Error::InvalidMonomer(mono) - sequence seq_it contained invalid monomer mono
  • Error::QueryTooShort - sequence seq_id was too short
§Example

let pssm = DNAMotif::from_seqs(vec![ b“AAAA“.to_vec(), b“AATA“.to_vec(), b“AAGA“.to_vec(), b“AAAA“.to_vec(), ].as_ref(), None).unwrap(); let start_pos = pssm.score(b“CCCCCAATA“).unwrap().loc;

source

fn info_content(&self) -> f32

Returns a float representing the information content of a motif; roughly the inverse of Shannon Entropy. Adapted from the information content described here: https://en.wikipedia.org/wiki/Sequence_logo#Logo_creation

Object Safety§

This trait is not object safe.

Implementors§

source§

impl Motif for DNAMotif

source§

const LK: [u8; 127] = _

source§

const MONOS: &'static [u8] = b"ATGC"

source§

const MONO_CT: usize = 4usize

source§

impl Motif for ProtMotif

source§

const LK: [u8; 127] = _

source§

const MONOS: &'static [u8] = b"ARNDCEQGHILKMFPSTWYV"

source§

const MONO_CT: usize = 20usize