#![doc = include_str!("../README.md")]
extern crate generic_array;
extern crate lightmotif;
#[cfg(feature = "pvalues")]
extern crate lightmotif_tfmpvalue;
extern crate pyo3;
use lightmotif::abc::Alphabet;
use lightmotif::abc::Nucleotide;
use lightmotif::abc::Symbol;
use lightmotif::dense::DenseMatrix;
use lightmotif::num::Unsigned;
use lightmotif::pli::platform::Backend;
use lightmotif::pli::Score;
use generic_array::GenericArray;
use pyo3::exceptions::PyBufferError;
use pyo3::exceptions::PyIndexError;
use pyo3::exceptions::PyTypeError;
use pyo3::exceptions::PyValueError;
use pyo3::ffi::Py_ssize_t;
use pyo3::prelude::*;
use pyo3::types::PyDict;
use pyo3::types::PyList;
use pyo3::types::PyString;
use pyo3::AsPyPointer;
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
type C = <lightmotif::pli::platform::Neon as Backend>::LANES;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
type C = <lightmotif::pli::platform::Avx2 as Backend>::LANES;
#[cfg(not(any(
target_arch = "x86",
target_arch = "x86_64",
target_arch = "arm",
target_arch = "aarch64",
)))]
type C = typenum::consts::U1;
fn dict_to_alphabet_array<A: lightmotif::Alphabet>(
d: &PyDict,
) -> PyResult<GenericArray<f32, A::K>> {
let mut p = std::iter::repeat(0.0)
.take(A::K::USIZE)
.collect::<GenericArray<f32, A::K>>();
for (k, v) in d.iter() {
let s = k.extract::<&PyString>()?.to_str()?;
if s.len() != 1 {
return Err(PyValueError::new_err((
"Invalid key for pseudocount:",
s.to_string(),
)));
}
let x = s.chars().next().unwrap();
let symbol = <A as lightmotif::Alphabet>::Symbol::from_char(x)
.map_err(|_| PyValueError::new_err(("Invalid key for pseudocount:", x)))?;
let value = v.extract::<f32>()?;
p[symbol.as_index()] = value;
}
Ok(p)
}
#[pyclass(module = "lightmotif.lib")]
#[derive(Clone, Debug)]
pub struct EncodedSequence {
data: lightmotif::seq::EncodedSequence<lightmotif::Dna>,
}
#[pymethods]
impl EncodedSequence {
#[new]
pub fn __init__(sequence: &PyString) -> PyResult<PyClassInitializer<Self>> {
let seq = sequence.to_str()?;
let py = sequence.py();
let data = py
.allow_threads(|| lightmotif::seq::EncodedSequence::encode(seq))
.map_err(|lightmotif::err::InvalidSymbol(x)| {
PyValueError::new_err(format!("Invalid symbol in input: {}", x))
})?;
Ok(Self::from(data).into())
}
pub fn __str__(&self) -> String {
self.data.to_string()
}
pub fn __len__(&self) -> usize {
self.data.len()
}
pub fn __copy__(&self) -> Self {
self.copy()
}
pub fn __getitem__(&self, mut index: Py_ssize_t) -> PyResult<u8> {
let length = self.data.len();
if index < 0 {
index += length as Py_ssize_t;
}
if index < 0 || index >= length as Py_ssize_t {
Err(PyIndexError::new_err("sequence index out of range"))
} else {
Ok(self.data[index as usize] as u8)
}
}
unsafe fn __getbuffer__(
slf: PyRefMut<'_, Self>,
view: *mut pyo3::ffi::Py_buffer,
flags: std::os::raw::c_int,
) -> PyResult<()> {
if view.is_null() {
return Err(PyBufferError::new_err("View is null"));
}
if (flags & pyo3::ffi::PyBUF_WRITABLE) == pyo3::ffi::PyBUF_WRITABLE {
return Err(PyBufferError::new_err("Object is not writable"));
}
(*view).obj = pyo3::ffi::_Py_NewRef(slf.as_ptr());
let data: &[Nucleotide] = slf.data.as_ref();
(*view).buf = data.as_ptr() as *mut std::os::raw::c_void;
(*view).len = data.len() as isize;
(*view).readonly = 1;
(*view).itemsize = std::mem::size_of::<Nucleotide>() as isize;
let msg = std::ffi::CStr::from_bytes_with_nul(b"B\0").unwrap();
(*view).format = msg.as_ptr() as *mut _;
(*view).ndim = 1;
(*view).shape = std::ptr::null_mut();
(*view).strides = std::ptr::null_mut();
(*view).suboffsets = std::ptr::null_mut();
(*view).internal = std::ptr::null_mut();
Ok(())
}
pub fn copy(&self) -> Self {
self.clone()
}
pub fn stripe(&self) -> PyResult<StripedSequence> {
Ok(self.data.to_striped().into())
}
}
impl From<lightmotif::seq::EncodedSequence<lightmotif::Dna>> for EncodedSequence {
fn from(data: lightmotif::seq::EncodedSequence<lightmotif::Dna>) -> Self {
Self { data }
}
}
#[pyclass(module = "lightmotif.lib")]
#[derive(Clone, Debug)]
pub struct StripedSequence {
data: lightmotif::seq::StripedSequence<lightmotif::Dna, C>,
shape: [Py_ssize_t; 2],
strides: [Py_ssize_t; 2],
}
impl From<lightmotif::seq::StripedSequence<lightmotif::Dna, C>> for StripedSequence {
fn from(data: lightmotif::seq::StripedSequence<lightmotif::Dna, C>) -> Self {
let cols = data.matrix().columns();
let rows = data.matrix().rows();
let shape = [cols as Py_ssize_t, rows as Py_ssize_t];
let strides = [1, data.matrix().stride() as Py_ssize_t];
Self {
data,
shape,
strides,
}
}
}
#[pymethods]
impl StripedSequence {
pub fn __copy__(&self) -> Self {
self.copy()
}
pub fn copy(&self) -> Self {
self.clone()
}
unsafe fn __getbuffer__(
mut slf: PyRefMut<'_, Self>,
view: *mut pyo3::ffi::Py_buffer,
flags: std::os::raw::c_int,
) -> PyResult<()> {
if view.is_null() {
return Err(PyBufferError::new_err("View is null"));
}
if (flags & pyo3::ffi::PyBUF_WRITABLE) == pyo3::ffi::PyBUF_WRITABLE {
return Err(PyBufferError::new_err("Object is not writable"));
}
(*view).obj = pyo3::ffi::_Py_NewRef(slf.as_ptr());
let matrix = slf.data.matrix();
let data = matrix[0].as_ptr();
(*view).buf = data as *mut std::os::raw::c_void;
(*view).len = (matrix.rows() * matrix.columns()) as isize;
(*view).readonly = 1;
(*view).itemsize = std::mem::size_of::<Nucleotide>() as isize;
let msg = std::ffi::CStr::from_bytes_with_nul(b"B\0").unwrap();
(*view).format = msg.as_ptr() as *mut _;
(*view).ndim = 2;
(*view).shape = slf.shape.as_mut_ptr();
(*view).strides = slf.strides.as_mut_ptr();
(*view).suboffsets = std::ptr::null_mut();
(*view).internal = std::ptr::null_mut();
Ok(())
}
}
#[pyclass(module = "lightmotif.lib", sequence)]
#[derive(Clone, Debug)]
pub struct CountMatrix {
data: lightmotif::CountMatrix<lightmotif::Dna>,
}
#[pymethods]
impl CountMatrix {
#[new]
pub fn __init__(_alphabet: &PyString, values: &PyDict) -> PyResult<PyClassInitializer<Self>> {
let mut data: Option<DenseMatrix<u32, <lightmotif::Dna as Alphabet>::K>> = None;
for s in lightmotif::Dna::symbols() {
let key = String::from(s.as_char());
if let Some(res) = values.get_item(&key) {
let column = res;
if data.is_none() {
data = Some(DenseMatrix::new(column.len()?));
}
let matrix = data.as_mut().unwrap();
if matrix.rows() != column.len()? {
return Err(PyValueError::new_err("Invalid number of rows"));
}
for (i, x) in column.iter()?.enumerate() {
matrix[i][s.as_index()] = x?.extract::<u32>()?;
}
}
}
match data {
None => Err(PyValueError::new_err("Invalid count matrix")),
Some(matrix) => match lightmotif::CountMatrix::new(matrix) {
Ok(counts) => Ok(Self::from(counts).into()),
Err(_) => Err(PyValueError::new_err("Inconsistent rows in count matrix")),
},
}
}
pub fn __eq__(&self, object: &PyAny) -> PyResult<bool> {
let py = object.py();
if let Ok(other) = object.extract::<Py<Self>>() {
Ok(self.data == other.borrow(py).data)
} else {
Ok(false)
}
}
pub fn __len__(&self) -> usize {
self.data.matrix().rows()
}
pub fn __getitem__<'py>(slf: PyRef<'py, Self>, index: isize) -> PyResult<PyObject> {
let mut index_ = index;
if index_ < 0 {
index_ += slf.data.matrix().rows() as isize;
}
if index_ < 0 || (index_ as usize) >= slf.data.matrix().rows() {
return Err(PyIndexError::new_err(index));
}
let row = &slf.data.matrix()[index_ as usize];
Ok(row.to_object(slf.py()))
}
pub fn normalize(&self, pseudocount: Option<PyObject>) -> PyResult<WeightMatrix> {
let pseudo = Python::with_gil(|py| {
if let Some(obj) = pseudocount {
if let Ok(x) = obj.extract::<f32>(py) {
Ok(lightmotif::abc::Pseudocounts::from(x))
} else if let Ok(d) = obj.extract::<&PyDict>(py) {
let p = dict_to_alphabet_array::<lightmotif::Dna>(d)?;
Ok(lightmotif::abc::Pseudocounts::from(p))
} else {
Err(PyTypeError::new_err("Invalid type for pseudocount"))
}
} else {
Ok(lightmotif::abc::Pseudocounts::default())
}
})?;
let data = self.data.to_freq(pseudo).to_weight(None);
Ok(WeightMatrix { data })
}
}
impl From<lightmotif::CountMatrix<lightmotif::Dna>> for CountMatrix {
fn from(data: lightmotif::CountMatrix<lightmotif::Dna>) -> Self {
Self { data }
}
}
#[pyclass(module = "lightmotif.lib")]
#[derive(Clone, Debug)]
pub struct WeightMatrix {
data: lightmotif::pwm::WeightMatrix<lightmotif::Dna>,
}
#[pymethods]
impl WeightMatrix {
pub fn __eq__(&self, object: &PyAny) -> PyResult<bool> {
let py = object.py();
if let Ok(other) = object.extract::<Py<Self>>() {
Ok(self.data == other.borrow(py).data)
} else {
Ok(false)
}
}
pub fn __len__(&self) -> usize {
self.data.matrix().rows()
}
pub fn __getitem__<'py>(slf: PyRef<'py, Self>, index: isize) -> PyResult<PyObject> {
let mut index_ = index;
if index_ < 0 {
index_ += slf.data.matrix().rows() as isize;
}
if index_ < 0 || (index_ as usize) >= slf.data.matrix().rows() {
return Err(PyIndexError::new_err(index));
}
let row = &slf.data.matrix()[index_ as usize];
Ok(row.to_object(slf.py()))
}
#[pyo3(signature=(background=None, base=2.0))]
pub fn log_odds(&self, background: Option<PyObject>, base: f32) -> PyResult<ScoringMatrix> {
let bg = Python::with_gil(|py| {
if let Some(obj) = background {
if let Ok(d) = obj.extract::<&PyDict>(py) {
let p = dict_to_alphabet_array::<lightmotif::Dna>(d)?;
lightmotif::abc::Background::new(p)
.map_err(|_| PyValueError::new_err("Invalid background frequencies"))
} else {
Err(PyTypeError::new_err("Invalid type for pseudocount"))
}
} else {
Ok(lightmotif::abc::Background::uniform())
}
})?;
let pwm = match bg.frequencies() != self.data.background().frequencies() {
false => self.data.rescale(bg),
true => self.data.clone(),
};
Ok(ScoringMatrix {
data: pwm.to_scoring_with_base(base),
})
}
}
impl From<lightmotif::WeightMatrix<lightmotif::Dna>> for WeightMatrix {
fn from(data: lightmotif::WeightMatrix<lightmotif::Dna>) -> Self {
Self { data }
}
}
#[pyclass(module = "lightmotif.lib")]
#[derive(Clone, Debug)]
pub struct ScoringMatrix {
data: lightmotif::ScoringMatrix<lightmotif::Dna>,
}
#[pymethods]
impl ScoringMatrix {
#[new]
pub fn __init__(
_alphabet: &PyString,
values: &PyDict,
background: Option<PyObject>,
) -> PyResult<PyClassInitializer<Self>> {
let bg = Python::with_gil(|py| {
if let Some(obj) = background {
if let Ok(d) = obj.extract::<&PyDict>(py) {
let p = dict_to_alphabet_array::<lightmotif::Dna>(d)?;
lightmotif::abc::Background::new(p)
.map_err(|_| PyValueError::new_err("Invalid background frequencies"))
} else {
Err(PyTypeError::new_err("Invalid type for pseudocount"))
}
} else {
Ok(lightmotif::abc::Background::uniform())
}
})?;
let mut data: Option<DenseMatrix<f32, <lightmotif::Dna as Alphabet>::K>> = None;
for s in lightmotif::Dna::symbols() {
let key = String::from(s.as_char());
if let Some(res) = values.get_item(&key) {
let column = res.downcast::<PyList>()?;
if data.is_none() {
data = Some(DenseMatrix::new(column.len()));
}
let matrix = data.as_mut().unwrap();
if matrix.rows() != column.len() {
return Err(PyValueError::new_err("Invalid number of rows"));
}
for (i, x) in column.iter().enumerate() {
matrix[i][s.as_index()] = x.extract::<f32>()?;
}
}
}
match data {
None => Err(PyValueError::new_err("Invalid count matrix")),
Some(matrix) => Ok(Self::from(lightmotif::ScoringMatrix::new(bg, matrix)).into()),
}
}
pub fn __eq__(&self, object: &PyAny) -> PyResult<bool> {
let py = object.py();
if let Ok(other) = object.extract::<Py<Self>>() {
Ok(self.data == other.borrow(py).data)
} else {
Ok(false)
}
}
pub fn __len__(&self) -> usize {
self.data.matrix().rows()
}
pub fn __getitem__<'py>(slf: PyRef<'py, Self>, index: isize) -> PyResult<PyObject> {
let mut index_ = index;
if index_ < 0 {
index_ += slf.data.matrix().rows() as isize;
}
if index_ < 0 || (index_ as usize) >= slf.data.matrix().rows() {
return Err(PyIndexError::new_err(index));
}
let row = &slf.data.matrix()[index_ as usize];
Ok(row.to_object(slf.py()))
}
pub fn calculate(slf: PyRef<'_, Self>, sequence: &mut StripedSequence) -> StripedScores {
let pssm = &slf.data;
let seq = &mut sequence.data;
seq.configure(pssm);
let pli = lightmotif::pli::Pipeline::dispatch();
slf.py().allow_threads(|| pli.score(pssm, seq)).into()
}
pub fn pvalue(slf: PyRef<'_, Self>, score: f64) -> PyResult<f64> {
#[cfg(feature = "pvalues")]
return Ok(lightmotif_tfmpvalue::TfmPvalue::new(&slf.data).pvalue(score));
#[cfg(not(feature = "pvalues"))]
return Err(PyRuntimeError::new_err(
"package compiled without p-value support",
));
}
pub fn score(slf: PyRef<'_, Self>, pvalue: f64) -> PyResult<f64> {
#[cfg(feature = "pvalues")]
return Ok(lightmotif_tfmpvalue::TfmPvalue::new(&slf.data).score(pvalue));
#[cfg(not(feature = "pvalues"))]
return Err(PyRuntimeError::new_err(
"package compiled without p-value support",
));
}
pub fn reverse_complement(slf: PyRef<'_, Self>) -> ScoringMatrix {
slf.data.reverse_complement().into()
}
}
impl From<lightmotif::ScoringMatrix<lightmotif::Dna>> for ScoringMatrix {
fn from(data: lightmotif::ScoringMatrix<lightmotif::Dna>) -> Self {
Self { data }
}
}
#[pyclass(module = "lightmotif.lib", sequence)]
#[derive(Clone, Debug)]
pub struct StripedScores {
scores: lightmotif::scores::StripedScores<C>,
shape: [Py_ssize_t; 2],
strides: [Py_ssize_t; 2],
}
#[pymethods]
impl StripedScores {
fn __len__(&self) -> usize {
self.scores.max_index()
}
fn __getitem__(&self, index: isize) -> PyResult<f32> {
if index < self.scores.max_index() as isize && index >= 0 {
Ok(self.scores[index as usize])
} else {
Err(PyIndexError::new_err("list index out of range"))
}
}
unsafe fn __getbuffer__(
mut slf: PyRefMut<'_, Self>,
view: *mut pyo3::ffi::Py_buffer,
flags: std::os::raw::c_int,
) -> PyResult<()> {
if view.is_null() {
return Err(PyBufferError::new_err("View is null"));
}
if (flags & pyo3::ffi::PyBUF_WRITABLE) == pyo3::ffi::PyBUF_WRITABLE {
return Err(PyBufferError::new_err("Object is not writable"));
}
(*view).obj = pyo3::ffi::_Py_NewRef(slf.as_ptr());
let data = slf.scores.matrix()[0].as_ptr();
(*view).buf = data as *mut std::os::raw::c_void;
(*view).len = (slf.scores.matrix().rows() * slf.scores.matrix().columns()) as isize;
(*view).readonly = 1;
(*view).itemsize = std::mem::size_of::<f32>() as isize;
let msg = std::ffi::CStr::from_bytes_with_nul(b"f\0").unwrap();
(*view).format = msg.as_ptr() as *mut _;
(*view).ndim = 2;
(*view).shape = slf.shape.as_mut_ptr();
(*view).strides = slf.strides.as_mut_ptr();
(*view).suboffsets = std::ptr::null_mut();
(*view).internal = std::ptr::null_mut();
Ok(())
}
pub fn threshold(slf: PyRef<'_, Self>, threshold: f32) -> Vec<usize> {
let scores = &slf.scores;
slf.py().allow_threads(|| scores.threshold(threshold))
}
pub fn max(slf: PyRef<'_, Self>) -> Option<f32> {
let scores = &slf.scores;
slf.py().allow_threads(|| scores.max())
}
pub fn argmax(slf: PyRef<'_, Self>) -> Option<usize> {
let scores = &slf.scores;
slf.py().allow_threads(|| scores.argmax())
}
}
impl From<lightmotif::scores::StripedScores<C>> for StripedScores {
fn from(scores: lightmotif::scores::StripedScores<C>) -> Self {
let cols = scores.matrix().columns();
let rows = scores.matrix().rows();
let shape = [cols as Py_ssize_t, rows as Py_ssize_t];
let strides = [
std::mem::size_of::<f32>() as Py_ssize_t,
(scores.matrix().stride() * std::mem::size_of::<f32>()) as Py_ssize_t,
];
Self {
scores,
shape,
strides,
}
}
}
#[pyclass(module = "lightmotif.lib")]
#[derive(Clone, Debug)]
pub struct Motif {
#[pyo3(get)]
counts: Py<CountMatrix>,
#[pyo3(get)]
pwm: Py<WeightMatrix>,
#[pyo3(get)]
pssm: Py<ScoringMatrix>,
}
#[pyfunction]
pub fn create(sequences: &PyAny) -> PyResult<Motif> {
let py = sequences.py();
let mut encoded = Vec::new();
for seq in sequences.iter()? {
let s = seq?.extract::<&PyString>()?.to_str()?;
let x = py
.allow_threads(|| lightmotif::EncodedSequence::encode(s))
.map_err(|_| PyValueError::new_err("Invalid symbol in sequence"))?;
encoded.push(x);
}
let data = lightmotif::CountMatrix::from_sequences(encoded)
.map_err(|_| PyValueError::new_err("Inconsistent sequence length"))?;
let weights = data.to_freq(0.0).to_weight(None);
let scoring = weights.to_scoring();
Ok(Motif {
counts: Py::new(py, CountMatrix::from(data))?,
pwm: Py::new(py, WeightMatrix::from(weights))?,
pssm: Py::new(py, ScoringMatrix::from(scoring))?,
})
}
#[pyfunction]
pub fn stripe(sequence: &PyAny) -> PyResult<StripedSequence> {
let py = sequence.py();
let s = sequence.extract::<&PyString>()?;
let encoded = EncodedSequence::__init__(s).and_then(|e| Py::new(py, e))?;
let striped = encoded.borrow(py).stripe();
striped
}
#[pymodule]
#[pyo3(name = "lib")]
pub fn init(_py: Python, m: &PyModule) -> PyResult<()> {
m.add("__package__", "lightmotif")?;
m.add("__version__", env!("CARGO_PKG_VERSION"))?;
m.add("__author__", env!("CARGO_PKG_AUTHORS").replace(':', "\n"))?;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
m.add("AVX2_SUPPORTED", std::is_x86_feature_detected!("avx2"))?;
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
m.add("AVX2_SUPPORTED", false)?;
m.add_class::<EncodedSequence>()?;
m.add_class::<StripedSequence>()?;
m.add_class::<CountMatrix>()?;
m.add_class::<WeightMatrix>()?;
m.add_class::<ScoringMatrix>()?;
m.add_class::<StripedScores>()?;
m.add_class::<Motif>()?;
m.add_function(wrap_pyfunction!(create, m)?)?;
m.add_function(wrap_pyfunction!(stripe, m)?)?;
Ok(())
}