1#![doc = include_str!("../README.md")]
2
3extern crate generic_array;
4extern crate lightmotif;
5#[cfg(feature = "tfmpvalue")]
6extern crate lightmotif_tfmpvalue;
7extern crate pyo3;
8
9use std::fmt::Display;
10use std::fmt::Formatter;
11
12use lightmotif::abc::Alphabet;
13use lightmotif::abc::Dna;
14use lightmotif::abc::Nucleotide;
15use lightmotif::abc::Protein;
16use lightmotif::abc::Symbol;
17use lightmotif::dense::DenseMatrix;
18use lightmotif::num::Unsigned;
19use lightmotif::pli::Score;
20#[cfg(feature = "tfmpvalue")]
21use lightmotif_tfmpvalue::TfmPvalue;
22
23use generic_array::GenericArray;
24use pyo3::exceptions::PyBufferError;
25use pyo3::exceptions::PyIndexError;
26use pyo3::exceptions::PyRuntimeError;
27use pyo3::exceptions::PyTypeError;
28use pyo3::exceptions::PyValueError;
29use pyo3::ffi::Py_ssize_t;
30use pyo3::prelude::*;
31use pyo3::types::PyDict;
32use pyo3::types::PyList;
33use pyo3::types::PyString;
34
35mod io;
36mod pyfile;
37
38macro_rules! apply_self {
41 ($self:expr, |$x:ident| $e:expr) => {
42 match &$self {
43 Self::Dna($x) => $e,
44 Self::Protein($x) => $e,
45 }
46 };
47}
48
49macro_rules! impl_matrix_methods {
50 ($datatype:ident) => {
51 impl $datatype {
52 #[allow(unused)]
53 fn rows(&self) -> usize {
54 apply_self!(self, |x| x.matrix().rows())
55 }
56
57 #[allow(unused)]
58 fn columns(&self) -> usize {
59 apply_self!(self, |x| x.matrix().columns())
60 }
61
62 #[allow(unused)]
63 fn stride(&self) -> usize {
64 apply_self!(self, |x| x.matrix().stride())
65 }
66 }
67 };
68}
69
70macro_rules! alphabet_data_enum {
71 (
72 #[derive($($t:ident),*)]
73 enum $name:ident($($p:ident)::*)
74 ) => {
75
76 #[derive($($t),*)]
77 enum $name {
78 Dna($($p)::*<Dna>),
79 Protein($($p)::*<Protein>)
80 }
81
82 impl From<$($p)::*<Dna>> for $name {
83 fn from(value: $($p)::*<Dna>) -> Self {
84 $name::Dna(value)
85 }
86 }
87
88 impl From<$($p)::*<Protein>> for $name {
89 fn from(value: $($p)::*<Protein>) -> Self {
90 $name::Protein(value)
91 }
92 }
93 }
94}
95
96fn dict_to_alphabet_array<'py, A: lightmotif::Alphabet>(
99 d: Bound<'py, PyDict>,
100) -> PyResult<GenericArray<f32, A::K>> {
101 let mut p = std::iter::repeat(0.0)
102 .take(A::K::USIZE)
103 .collect::<GenericArray<f32, A::K>>();
104 for (k, v) in d.iter() {
105 let s = k.extract::<Bound<PyString>>()?;
106 let key = s.to_cow()?;
107 if key.len() != 1 {
108 return Err(PyValueError::new_err((
109 "Invalid key for pseudocount:",
110 s.to_string(),
111 )));
112 }
113 let x = key.chars().next().unwrap();
114 let symbol = <A as lightmotif::Alphabet>::Symbol::from_char(x)
115 .map_err(|_| PyValueError::new_err(("Invalid key for pseudocount:", x)))?;
116 let value = v.extract::<f32>()?;
117 p[symbol.as_index()] = value;
118 }
119 Ok(p)
120}
121
122alphabet_data_enum!(
125 #[derive(Clone, Debug)]
126 enum EncodedSequenceData(lightmotif::seq::EncodedSequence)
127);
128
129impl EncodedSequenceData {
130 unsafe fn as_ptr(&self) -> *const u8 {
131 apply_self!(self, |x| {
132 let data: &[_] = x.as_ref();
133 data.as_ptr() as *const u8
134 })
135 }
136
137 fn len(&self) -> usize {
138 apply_self!(self, |x| x.len())
139 }
140
141 fn get(&self, index: usize) -> u8 {
142 apply_self!(self, |x| x[index] as u8)
143 }
144
145 fn stripe(&self) -> StripedSequenceData {
146 apply_self!(self, |x| x.to_striped().into())
147 }
148}
149
150impl Display for EncodedSequenceData {
151 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
152 apply_self!(self, |x| x.fmt(f))
153 }
154}
155
156#[pyclass(module = "lightmotif.lib")]
158#[derive(Clone, Debug)]
159pub struct EncodedSequence {
160 data: EncodedSequenceData,
161}
162
163#[pymethods]
164impl EncodedSequence {
165 #[new]
167 #[pyo3(signature = (sequence, protein=false))]
168 pub fn __init__<'py>(
169 sequence: Bound<'py, PyString>,
170 protein: bool,
171 ) -> PyResult<PyClassInitializer<Self>> {
172 let seq = sequence.to_cow()?;
173 let py = sequence.py();
174 let data = py
175 .allow_threads(|| {
176 if protein {
177 lightmotif::seq::EncodedSequence::<Protein>::encode(&*seq)
178 .map(EncodedSequenceData::from)
179 } else {
180 lightmotif::seq::EncodedSequence::<Dna>::encode(&*seq)
181 .map(EncodedSequenceData::from)
182 }
183 })
184 .map_err(|lightmotif::err::InvalidSymbol(x)| {
185 PyValueError::new_err(format!("Invalid symbol in input: {}", x))
186 })?;
187 Ok(Self::from(data).into())
188 }
189
190 pub fn __str__(&self) -> String {
192 self.data.to_string()
193 }
194
195 pub fn __len__(&self) -> usize {
197 self.data.len()
198 }
199
200 pub fn __copy__(&self) -> Self {
202 self.copy()
203 }
204
205 pub fn __getitem__(&self, mut index: Py_ssize_t) -> PyResult<u8> {
207 let length = self.data.len();
208 if index < 0 {
209 index += length as Py_ssize_t;
210 }
211 if index < 0 || index >= length as Py_ssize_t {
212 Err(PyIndexError::new_err("sequence index out of range"))
213 } else {
214 Ok(self.data.get(index as usize))
215 }
216 }
217
218 #[cfg(not(feature = "abi3"))]
220 unsafe fn __getbuffer__(
221 slf: PyRef<'_, Self>,
222 view: *mut pyo3::ffi::Py_buffer,
223 flags: std::os::raw::c_int,
224 ) -> PyResult<()> {
225 if view.is_null() {
226 return Err(PyBufferError::new_err("View is null"));
227 }
228 if (flags & pyo3::ffi::PyBUF_WRITABLE) == pyo3::ffi::PyBUF_WRITABLE {
229 return Err(PyBufferError::new_err("Object is not writable"));
230 }
231
232 (*view).buf = slf.data.as_ptr() as *mut std::os::raw::c_void;
233 (*view).len = slf.data.len() as isize;
234 (*view).readonly = 1;
235 (*view).itemsize = std::mem::size_of::<u8>() as isize;
236
237 (*view).obj = slf.as_ptr();
238 pyo3::ffi::Py_INCREF((*view).obj);
239 let msg = std::ffi::CStr::from_bytes_with_nul(b"B\0").unwrap();
240 (*view).format = msg.as_ptr() as *mut _;
241
242 (*view).ndim = 1;
243 (*view).shape = std::ptr::null_mut();
244 (*view).strides = std::ptr::null_mut();
245 (*view).suboffsets = std::ptr::null_mut();
246 (*view).internal = std::ptr::null_mut();
247
248 Ok(())
249 }
250
251 #[getter]
253 pub fn protein(&self) -> bool {
254 matches!(self.data, EncodedSequenceData::Protein(_))
255 }
256
257 pub fn copy(&self) -> Self {
259 self.clone()
260 }
261
262 pub fn stripe(&self) -> StripedSequence {
269 self.data.stripe().into()
270 }
271}
272
273impl From<EncodedSequenceData> for EncodedSequence {
274 fn from(data: EncodedSequenceData) -> Self {
275 Self { data }
276 }
277}
278
279alphabet_data_enum!(
282 #[derive(Clone, Debug)]
283 enum StripedSequenceData(lightmotif::seq::StripedSequence)
284);
285
286impl_matrix_methods!(StripedSequenceData);
287
288impl StripedSequenceData {
289 unsafe fn as_ptr(&self) -> *const u8 {
290 apply_self!(self, |x| x.matrix()[0].as_ptr() as *const u8)
291 }
292}
293
294#[pyclass(module = "lightmotif.lib")]
296#[derive(Clone, Debug)]
297pub struct StripedSequence {
298 data: StripedSequenceData,
299 shape: [Py_ssize_t; 2],
300 strides: [Py_ssize_t; 2],
301}
302
303impl From<StripedSequenceData> for StripedSequence {
304 fn from(data: StripedSequenceData) -> Self {
305 let cols = data.columns();
306 let rows = data.rows();
307 let shape = [cols as Py_ssize_t, rows as Py_ssize_t];
308 let strides = [
309 std::mem::size_of::<u8>() as Py_ssize_t,
310 (std::mem::size_of::<u8>() * data.stride()) as Py_ssize_t,
311 ];
312 Self {
313 data,
314 shape,
315 strides,
316 }
317 }
318}
319
320#[pymethods]
321impl StripedSequence {
322 pub fn __copy__(&self) -> Self {
324 self.copy()
325 }
326
327 #[getter]
329 pub fn protein(&self) -> bool {
330 matches!(self.data, StripedSequenceData::Protein(_))
331 }
332
333 #[cfg(not(feature = "abi3"))]
334 unsafe fn __getbuffer__(
335 mut slf: PyRefMut<'_, Self>,
336 view: *mut pyo3::ffi::Py_buffer,
337 flags: std::os::raw::c_int,
338 ) -> PyResult<()> {
339 if view.is_null() {
340 return Err(PyBufferError::new_err("View is null"));
341 }
342 if (flags & pyo3::ffi::PyBUF_WRITABLE) == pyo3::ffi::PyBUF_WRITABLE {
343 return Err(PyBufferError::new_err("Object is not writable"));
344 }
345
346 let data = slf.data.as_ptr();
347 (*view).buf = data as *mut std::os::raw::c_void;
348 (*view).len = (slf.data.rows() * slf.data.columns()) as isize;
349 (*view).readonly = 1;
350 (*view).itemsize = std::mem::size_of::<Nucleotide>() as isize;
351
352 (*view).obj = slf.as_ptr();
353 pyo3::ffi::Py_INCREF((*view).obj);
354 let msg = std::ffi::CStr::from_bytes_with_nul(b"B\0").unwrap();
355 (*view).format = msg.as_ptr() as *mut _;
356
357 (*view).ndim = 2;
358 (*view).shape = slf.shape.as_mut_ptr();
359 (*view).strides = slf.strides.as_mut_ptr();
360 (*view).suboffsets = std::ptr::null_mut();
361 (*view).internal = std::ptr::null_mut();
362
363 Ok(())
364 }
365
366 pub fn copy(&self) -> Self {
368 self.clone()
369 }
370}
371
372alphabet_data_enum!(
375 #[derive(Clone, Debug, PartialEq)]
376 enum CountMatrixData(lightmotif::pwm::CountMatrix)
377);
378
379impl_matrix_methods!(CountMatrixData);
380
381impl CountMatrixData {
382 fn get(&self, index: usize) -> &[u32] {
383 apply_self!(self, |x| &x.matrix()[index])
384 }
385}
386
387#[pyclass(module = "lightmotif.lib", sequence)]
389#[derive(Clone, Debug)]
390pub struct CountMatrix {
391 data: CountMatrixData,
392}
393
394impl CountMatrix {
395 fn new<D>(data: D) -> Self
396 where
397 D: Into<CountMatrixData>,
398 {
399 Self { data: data.into() }
400 }
401}
402
403#[pymethods]
404impl CountMatrix {
405 #[new]
407 #[allow(unused_variables)]
408 #[pyo3(signature = (values, *, protein = false))]
409 pub fn __init__<'py>(
410 values: Bound<'py, PyDict>,
411 protein: bool,
412 ) -> PyResult<PyClassInitializer<Self>> {
413 macro_rules! run {
414 ($alphabet:ty) => {{
415 let mut data: Option<DenseMatrix<u32, <$alphabet as Alphabet>::K>> = None;
417 for s in <$alphabet as Alphabet>::symbols() {
418 let key = String::from(s.as_char());
419 if let Some(res) = values.get_item(&key)? {
420 let column = res;
421 if data.is_none() {
422 data = Some(DenseMatrix::new(column.len()?));
423 }
424 let matrix = data.as_mut().unwrap();
425 if matrix.rows() != column.len()? {
426 return Err(PyValueError::new_err("Invalid number of rows"));
427 }
428 for (i, x) in column.try_iter()?.enumerate() {
429 matrix[i][s.as_index()] = x?.extract::<u32>()?;
430 }
431 }
432 }
433
434 match data {
435 None => Err(PyValueError::new_err("Invalid count matrix")),
436 Some(matrix) => match lightmotif::CountMatrix::<$alphabet>::new(matrix) {
437 Ok(counts) => Ok(CountMatrix::new(counts).into()),
438 Err(_) => Err(PyValueError::new_err("Inconsistent rows in count matrix")),
439 },
440 }
441 }};
442 }
443 if protein {
444 run!(Protein)
445 } else {
446 run!(Dna)
447 }
448 }
449
450 pub fn __eq__<'py>(&self, object: Bound<'py, PyAny>) -> PyResult<bool> {
451 if let Ok(other) = object.extract::<PyRef<Self>>() {
452 Ok(self.data == other.data)
453 } else {
454 Ok(false)
455 }
456 }
457
458 pub fn __len__(&self) -> usize {
459 self.data.rows()
460 }
461
462 pub fn __getitem__<'py>(slf: PyRef<'py, Self>, index: isize) -> PyResult<Bound<'py, PyAny>> {
463 let py = slf.py();
464
465 let mut index_ = index;
466 if index_ < 0 {
467 index_ += slf.data.rows() as isize;
468 }
469 if index_ < 0 || (index_ as usize) >= slf.data.rows() {
470 return Err(PyIndexError::new_err(index));
471 }
472
473 let row = slf.data.get(index as usize);
474 row.into_pyobject(py)
475 }
476
477 #[getter]
479 pub fn protein(&self) -> bool {
480 matches!(self.data, CountMatrixData::Protein(_))
481 }
482
483 #[pyo3(signature = (pseudocount=None))]
501 pub fn normalize(&self, pseudocount: Option<PyObject>) -> PyResult<WeightMatrix> {
502 macro_rules! run {
503 ($data:ident, $alphabet:ty) => {{
504 let pseudo = Python::with_gil(|py| {
505 if let Some(obj) = pseudocount {
506 if let Ok(x) = obj.extract::<f32>(py) {
507 Ok(lightmotif::abc::Pseudocounts::from(x))
508 } else if let Ok(d) = obj.extract::<Bound<PyDict>>(py) {
509 let p = dict_to_alphabet_array::<$alphabet>(d)?;
510 Ok(lightmotif::abc::Pseudocounts::from(p))
511 } else {
512 Err(PyTypeError::new_err("Invalid type for pseudocount"))
513 }
514 } else {
515 Ok(lightmotif::abc::Pseudocounts::default())
516 }
517 })?;
518 let data = $data.to_freq(pseudo).to_weight(None);
519 Ok(WeightMatrix::new(data))
520 }};
521 }
522 match &self.data {
523 CountMatrixData::Dna(dna) => run!(dna, Dna),
524 CountMatrixData::Protein(prot) => run!(prot, Protein),
525 }
526 }
527}
528
529impl From<CountMatrixData> for CountMatrix {
530 fn from(data: CountMatrixData) -> Self {
531 Self { data }
532 }
533}
534
535alphabet_data_enum!(
538 #[derive(Clone, Debug, PartialEq)]
539 enum WeightMatrixData(lightmotif::pwm::WeightMatrix)
540);
541
542impl_matrix_methods!(WeightMatrixData);
543
544impl WeightMatrixData {
545 fn get(&self, index: usize) -> &[f32] {
546 apply_self!(self, |x| &x.matrix()[index])
547 }
548}
549
550#[pyclass(module = "lightmotif.lib")]
552#[derive(Clone, Debug)]
553pub struct WeightMatrix {
554 data: WeightMatrixData,
555}
556
557impl WeightMatrix {
558 fn new<D>(data: D) -> Self
559 where
560 D: Into<WeightMatrixData>,
561 {
562 Self { data: data.into() }
563 }
564}
565
566#[pymethods]
567impl WeightMatrix {
568 pub fn __eq__<'py>(&self, object: Bound<'py, PyAny>) -> PyResult<bool> {
569 if let Ok(other) = object.extract::<PyRef<Self>>() {
570 Ok(self.data == other.data)
571 } else {
572 Ok(false)
573 }
574 }
575
576 pub fn __len__(&self) -> usize {
577 self.data.rows()
578 }
579
580 pub fn __getitem__<'py>(slf: PyRef<'py, Self>, index: isize) -> PyResult<Bound<'py, PyAny>> {
581 let py = slf.py();
582
583 let mut index_ = index;
584 if index_ < 0 {
585 index_ += slf.data.rows() as isize;
586 }
587 if index_ < 0 || (index_ as usize) >= slf.data.rows() {
588 return Err(PyIndexError::new_err(index));
589 }
590
591 let row = slf.data.get(index as usize);
592 row.into_pyobject(py)
593 }
594
595 #[getter]
597 pub fn protein(&self) -> bool {
598 matches!(self.data, WeightMatrixData::Protein(_))
599 }
600
601 #[pyo3(signature=(background=None, base=2.0))]
609 pub fn log_odds(&self, background: Option<PyObject>, base: f32) -> PyResult<ScoringMatrix> {
610 macro_rules! run {
611 ($data:ident, $alphabet:ty) => {{
612 let bg = Python::with_gil(|py| {
614 if let Some(obj) = background {
615 if let Ok(d) = obj.extract::<Bound<PyDict>>(py) {
616 let p = dict_to_alphabet_array::<$alphabet>(d)?;
617 lightmotif::abc::Background::new(p).map_err(|_| {
618 PyValueError::new_err("Invalid background frequencies")
619 })
620 } else {
621 Err(PyTypeError::new_err("Invalid type for pseudocount"))
622 }
623 } else {
624 Ok(lightmotif::abc::Background::uniform())
625 }
626 })?;
627 let pwm = match bg.frequencies() != $data.background().frequencies() {
629 false => $data.rescale(bg),
630 true => $data.clone(),
631 };
632 Ok(ScoringMatrix::new(pwm.to_scoring_with_base(base)))
633 }};
634 }
635 match &self.data {
636 WeightMatrixData::Dna(dna) => run!(dna, Dna),
637 WeightMatrixData::Protein(protein) => run!(protein, Protein),
638 }
639 }
640}
641
642impl From<WeightMatrixData> for WeightMatrix {
643 fn from(data: WeightMatrixData) -> Self {
644 Self::new(data)
645 }
646}
647
648alphabet_data_enum!(
651 #[derive(Clone, Debug, PartialEq)]
652 enum ScoringMatrixData(lightmotif::pwm::ScoringMatrix)
653);
654
655impl_matrix_methods!(ScoringMatrixData);
656
657impl ScoringMatrixData {
658 fn get(&self, index: usize) -> &[f32] {
659 apply_self!(self, |x| &x[index])
660 }
661
662 unsafe fn as_ptr(&self) -> *const f32 {
663 apply_self!(self, |x| x.matrix()[0].as_ptr())
664 }
665}
666
667#[pyclass(module = "lightmotif.lib")]
669#[derive(Debug)]
670pub struct ScoringMatrix {
671 data: ScoringMatrixData,
672 distribution: Option<Py<ScoreDistribution>>,
673 shape: [Py_ssize_t; 2],
674 strides: [Py_ssize_t; 2],
675}
676
677impl ScoringMatrix {
678 fn new<D>(data: D) -> Self
679 where
680 D: Into<ScoringMatrixData>,
681 {
682 let data = data.into();
683 let cols = data.columns();
684 let rows = data.rows();
685 let stride = data.stride();
686 let shape = [cols as Py_ssize_t, rows as Py_ssize_t];
687 let strides = [
688 (stride * std::mem::size_of::<f32>()) as Py_ssize_t,
689 std::mem::size_of::<f32>() as Py_ssize_t,
690 ];
691 Self {
692 data,
693 shape,
694 strides,
695 distribution: None,
696 }
697 }
698}
699
700#[pymethods]
701impl ScoringMatrix {
702 #[new]
704 #[pyo3(signature = (values, background = None, *, protein = false))]
705 #[allow(unused)]
706 pub fn __init__<'py>(
707 values: Bound<'py, PyDict>,
708 background: Option<PyObject>,
709 protein: bool,
710 ) -> PyResult<PyClassInitializer<Self>> {
711 macro_rules! run {
712 ($alphabet:ty) => {{
713 let bg = Python::with_gil(|py| {
715 if let Some(obj) = background {
716 if let Ok(d) = obj.extract::<Bound<PyDict>>(py) {
717 let p = dict_to_alphabet_array::<$alphabet>(d)?;
718 lightmotif::abc::Background::<$alphabet>::new(p).map_err(|_| {
719 PyValueError::new_err("Invalid background frequencies")
720 })
721 } else {
722 Err(PyTypeError::new_err("Invalid type for pseudocount"))
723 }
724 } else {
725 Ok(lightmotif::abc::Background::uniform())
726 }
727 })?;
728 let mut data: Option<DenseMatrix<f32, <$alphabet as Alphabet>::K>> = None;
730 for s in <$alphabet as Alphabet>::symbols() {
731 let key = String::from(s.as_char());
732 if let Some(res) = values.get_item(&key)? {
733 let column = res.downcast::<PyList>()?;
734 if data.is_none() {
735 data = Some(DenseMatrix::new(column.len()));
736 }
737 let matrix = data.as_mut().unwrap();
738 if matrix.rows() != column.len() {
739 return Err(PyValueError::new_err("Invalid number of rows"));
740 }
741 for (i, x) in column.iter().enumerate() {
742 matrix[i][s.as_index()] = x.extract::<f32>()?;
743 }
744 }
745 }
746 match data {
748 None => Err(PyValueError::new_err("Invalid count matrix")),
749 Some(matrix) => Ok(Self::new(lightmotif::ScoringMatrix::<$alphabet>::new(
750 bg, matrix,
751 ))
752 .into()),
753 }
754 }};
755 }
756
757 if protein {
758 run!(Protein)
759 } else {
760 run!(Dna)
761 }
762 }
763
764 pub fn __eq__(&self, object: Bound<PyAny>) -> PyResult<bool> {
765 if let Ok(other) = object.extract::<PyRef<Self>>() {
766 Ok(self.data == other.data)
767 } else {
768 Ok(false)
769 }
770 }
771
772 pub fn __len__(&self) -> usize {
773 self.data.rows()
774 }
775
776 pub fn __getitem__<'py>(slf: PyRef<'py, Self>, index: isize) -> PyResult<Bound<'py, PyAny>> {
777 let py = slf.py();
778 let mut index_ = index;
779 if index_ < 0 {
780 index_ += slf.data.rows() as isize;
781 }
782 if index_ < 0 || (index_ as usize) >= slf.data.rows() {
783 return Err(PyIndexError::new_err(index));
784 }
785 slf.data.get(index as usize).into_pyobject(py)
786 }
787
788 #[cfg(not(feature = "abi3"))]
789 unsafe fn __getbuffer__(
790 mut slf: PyRefMut<'_, Self>,
791 view: *mut pyo3::ffi::Py_buffer,
792 flags: std::os::raw::c_int,
793 ) -> PyResult<()> {
794 if view.is_null() {
795 return Err(PyBufferError::new_err("View is null"));
796 }
797 if (flags & pyo3::ffi::PyBUF_WRITABLE) == pyo3::ffi::PyBUF_WRITABLE {
798 return Err(PyBufferError::new_err("Object is not writable"));
799 }
800
801 let data = slf.data.as_ptr();
802 (*view).buf = data as *mut std::os::raw::c_void;
803 (*view).len = -1;
804 (*view).readonly = 1;
805 (*view).itemsize = std::mem::size_of::<f32>() as isize;
806
807 (*view).obj = slf.as_ptr();
808 pyo3::ffi::Py_INCREF((*view).obj);
809 let msg = std::ffi::CStr::from_bytes_with_nul(b"f\0").unwrap();
810 (*view).format = msg.as_ptr() as *mut _;
811
812 (*view).ndim = 2;
813 (*view).shape = slf.shape.as_mut_ptr();
814 (*view).strides = slf.strides.as_mut_ptr();
815 (*view).suboffsets = std::ptr::null_mut();
816 (*view).internal = std::ptr::null_mut();
817
818 Ok(())
819 }
820
821 #[getter]
822 pub fn score_distribution<'py>(slf: Bound<'py, Self>) -> PyResult<Py<ScoreDistribution>> {
823 let py = slf.py();
824 if slf.borrow().distribution.is_none() {
825 let data = match &slf.borrow().data {
826 ScoringMatrixData::Dna(dna) => {
827 ScoreDistributionData::from(dna.to_score_distribution())
828 }
829 ScoringMatrixData::Protein(prot) => {
830 ScoreDistributionData::from(prot.to_score_distribution())
831 }
832 };
833 slf.borrow_mut().distribution = Some(Py::new(py, ScoreDistribution::from(data))?);
834 }
835 Ok(slf.borrow().distribution.as_ref().unwrap().clone_ref(py))
836 }
837
838 #[getter]
840 pub fn protein(&self) -> bool {
841 matches!(self.data, ScoringMatrixData::Protein(_))
842 }
843
844 pub fn calculate(
856 slf: Bound<'_, Self>,
857 sequence: &mut StripedSequence,
858 ) -> PyResult<StripedScores> {
859 let pssm = &slf.borrow().data;
860 let seq = &mut sequence.data;
861 match (seq, pssm) {
862 (StripedSequenceData::Dna(dna), ScoringMatrixData::Dna(pssm)) => {
863 let pli = lightmotif::pli::Pipeline::dispatch();
864 dna.configure(pssm);
865 Ok(slf.py().allow_threads(|| pli.score(pssm, dna)).into())
866 }
867 (StripedSequenceData::Protein(prot), ScoringMatrixData::Protein(pssm)) => {
868 let pli = lightmotif::pli::Pipeline::dispatch();
869 prot.configure(pssm);
870 Ok(slf.py().allow_threads(|| pli.score(pssm, prot)).into())
871 }
872 (_, _) => Err(PyValueError::new_err("alphabet mismatch")),
873 }
874 }
875
876 #[pyo3(signature=(score, method="meme"))]
878 pub fn pvalue(slf: Bound<'_, Self>, score: f64, method: &str) -> PyResult<f64> {
879 let py = slf.py();
880 match method {
881 "tfmpvalue" => {
882 #[cfg(feature = "tfmpvalue")]
883 match &slf.borrow().data {
884 ScoringMatrixData::Dna(dna) => Ok(TfmPvalue::new(&dna).pvalue(score)),
885 ScoringMatrixData::Protein(prot) => Ok(TfmPvalue::new(&prot).pvalue(score)),
886 }
887 #[cfg(not(feature = "tfmpvalue"))]
888 Err(PyRuntimeError::new_err(
889 "package compiled without `lightmotif-tfmpvalue`",
890 ))
891 }
892 "meme" => {
893 let dist = Self::score_distribution(slf)?;
894 match &dist.bind(py).borrow().data {
895 ScoreDistributionData::Dna(dna) => Ok(dna.pvalue(score as f32)),
896 ScoreDistributionData::Protein(prot) => Ok(prot.pvalue(score as f32)),
897 }
898 }
899 other => Err(PyValueError::new_err(format!(
900 "invalid pvalue method: {:?}",
901 other
902 ))),
903 }
904 }
905
906 pub fn max_score(slf: PyRef<'_, Self>) -> f32 {
907 match &slf.data {
908 ScoringMatrixData::Dna(dna) => dna.max_score(),
909 ScoringMatrixData::Protein(prot) => prot.max_score(),
910 }
911 }
912
913 #[pyo3(signature=(pvalue, method="meme"))]
915 pub fn score(slf: Bound<'_, Self>, pvalue: f64, method: &str) -> PyResult<f64> {
916 let py = slf.py();
917 match method {
918 "tfmpvalue" => {
919 #[cfg(feature = "tfmpvalue")]
920 match &slf.borrow().data {
921 ScoringMatrixData::Dna(dna) => Ok(TfmPvalue::new(&dna).score(pvalue)),
922 ScoringMatrixData::Protein(prot) => Ok(TfmPvalue::new(&prot).score(pvalue)),
923 }
924 #[cfg(not(feature = "tfmpvalue"))]
925 Err(PyRuntimeError::new_err(
926 "package compiled without `lightmotif-tfmpvalue`",
927 ))
928 }
929 "meme" => {
930 let dist = Self::score_distribution(slf)?;
931 match &dist.bind(py).borrow().data {
932 ScoreDistributionData::Dna(dna) => Ok(dna.score(pvalue) as f64),
933 ScoreDistributionData::Protein(prot) => Ok(prot.score(pvalue) as f64),
934 }
935 }
936 other => Err(PyValueError::new_err(format!(
937 "invalid pvalue method: {:?}",
938 other
939 ))),
940 }
941 }
942
943 pub fn reverse_complement(slf: PyRef<'_, Self>) -> PyResult<ScoringMatrix> {
945 match &slf.data {
946 ScoringMatrixData::Dna(dna) => Ok(Self::from(ScoringMatrixData::from(
947 dna.reverse_complement(),
948 ))),
949 ScoringMatrixData::Protein(_) => Err(PyRuntimeError::new_err(
950 "cannot complement a protein sequence",
951 )),
952 }
953 }
954}
955
956impl From<ScoringMatrixData> for ScoringMatrix {
957 fn from(data: ScoringMatrixData) -> Self {
958 Self::new(data)
959 }
960}
961
962alphabet_data_enum!(
965 #[derive(Clone, Debug)]
966 enum ScoreDistributionData(lightmotif::pwm::dist::ScoreDistribution)
967);
968
969impl ScoreDistributionData {
970 fn sf(&self) -> &[f64] {
971 apply_self!(self, |x| x.sf())
972 }
973}
974
975#[pyclass(module = "lightmotif.lib")]
977#[derive(Clone, Debug)]
978pub struct ScoreDistribution {
979 data: ScoreDistributionData,
980}
981
982impl From<ScoreDistributionData> for ScoreDistribution {
983 fn from(data: ScoreDistributionData) -> Self {
984 Self { data }
985 }
986}
987
988#[pymethods]
989impl ScoreDistribution {
990 #[cfg(not(feature = "abi3"))]
992 unsafe fn __getbuffer__(
993 slf: PyRefMut<'_, Self>,
994 view: *mut pyo3::ffi::Py_buffer,
995 flags: std::os::raw::c_int,
996 ) -> PyResult<()> {
997 if view.is_null() {
998 return Err(PyBufferError::new_err("View is null"));
999 }
1000 if (flags & pyo3::ffi::PyBUF_WRITABLE) == pyo3::ffi::PyBUF_WRITABLE {
1001 return Err(PyBufferError::new_err("Object is not writable"));
1002 }
1003
1004 let array: &[_] = slf.data.sf();
1005 (*view).buf = array.as_ptr() as *mut std::os::raw::c_void;
1006 (*view).len = (array.len() * std::mem::size_of::<f64>()) as isize;
1007 (*view).readonly = 1;
1008 (*view).itemsize = std::mem::size_of::<f64>() as isize;
1009
1010 (*view).obj = slf.as_ptr();
1011 pyo3::ffi::Py_INCREF((*view).obj);
1012 let msg = std::ffi::CStr::from_bytes_with_nul(b"d\0").unwrap();
1013 (*view).format = msg.as_ptr() as *mut _;
1014
1015 (*view).ndim = 1;
1016 (*view).shape = std::ptr::null_mut();
1017 (*view).strides = std::ptr::null_mut();
1018 (*view).suboffsets = std::ptr::null_mut();
1019 (*view).internal = std::ptr::null_mut();
1020
1021 Ok(())
1022 }
1023}
1024
1025#[pyclass(module = "lightmotif.lib", sequence)]
1029#[derive(Clone, Debug)]
1030pub struct StripedScores {
1031 scores: lightmotif::scores::StripedScores<f32>,
1032 shape: [Py_ssize_t; 2],
1033 strides: [Py_ssize_t; 2],
1034}
1035
1036#[pymethods]
1037impl StripedScores {
1038 fn __len__(&self) -> usize {
1039 self.scores.max_index()
1040 }
1041
1042 fn __getitem__(&self, index: isize) -> PyResult<f32> {
1043 if index < self.scores.max_index() as isize && index >= 0 {
1044 Ok(self.scores[index as usize])
1045 } else {
1046 Err(PyIndexError::new_err("list index out of range"))
1047 }
1048 }
1049
1050 #[cfg(not(feature = "abi3"))]
1051 unsafe fn __getbuffer__(
1052 mut slf: PyRefMut<'_, Self>,
1053 view: *mut pyo3::ffi::Py_buffer,
1054 flags: std::os::raw::c_int,
1055 ) -> PyResult<()> {
1056 if view.is_null() {
1057 return Err(PyBufferError::new_err("View is null"));
1058 }
1059 if (flags & pyo3::ffi::PyBUF_WRITABLE) == pyo3::ffi::PyBUF_WRITABLE {
1060 return Err(PyBufferError::new_err("Object is not writable"));
1061 }
1062
1063 let data = slf.scores.matrix()[0].as_ptr();
1064 (*view).buf = data as *mut std::os::raw::c_void;
1065 (*view).len = -1;
1066 (*view).readonly = 1;
1067 (*view).itemsize = std::mem::size_of::<f32>() as isize;
1068
1069 (*view).obj = slf.as_ptr();
1070 pyo3::ffi::Py_INCREF((*view).obj);
1071 let msg = std::ffi::CStr::from_bytes_with_nul(b"f\0").unwrap();
1072 (*view).format = msg.as_ptr() as *mut _;
1073
1074 (*view).ndim = 2;
1075 (*view).shape = slf.shape.as_mut_ptr();
1076 (*view).strides = slf.strides.as_mut_ptr();
1077 (*view).suboffsets = std::ptr::null_mut();
1078 (*view).internal = std::ptr::null_mut();
1079
1080 Ok(())
1081 }
1082
1083 pub fn threshold(slf: PyRef<'_, Self>, threshold: f32) -> Vec<usize> {
1095 let scores = &slf.scores;
1096 slf.py().allow_threads(|| scores.threshold(threshold))
1097 }
1098
1099 pub fn max(slf: PyRef<'_, Self>) -> Option<f32> {
1109 let scores = &slf.scores;
1110 slf.py().allow_threads(|| scores.max())
1111 }
1112
1113 pub fn argmax(slf: PyRef<'_, Self>) -> Option<usize> {
1123 let scores = &slf.scores;
1124 slf.py().allow_threads(|| scores.argmax())
1125 }
1126}
1127
1128impl From<lightmotif::scores::StripedScores<f32>> for StripedScores {
1129 fn from(scores: lightmotif::scores::StripedScores<f32>) -> Self {
1130 let cols = scores.matrix().columns();
1133 let rows = scores.matrix().rows();
1134 let shape = [cols as Py_ssize_t, rows as Py_ssize_t];
1136 let strides = [
1137 std::mem::size_of::<f32>() as Py_ssize_t,
1138 (scores.matrix().stride() * std::mem::size_of::<f32>()) as Py_ssize_t,
1139 ];
1140 Self {
1150 scores,
1151 shape,
1152 strides,
1153 }
1154 }
1155}
1156
1157#[pyclass(module = "lightmotif.lib", subclass)]
1161#[derive(Debug)]
1162pub struct Motif {
1163 #[pyo3(get)]
1164 counts: Option<Py<CountMatrix>>,
1169 #[pyo3(get)]
1170 pwm: Py<WeightMatrix>,
1172 #[pyo3(get)]
1173 pssm: Py<ScoringMatrix>,
1175 #[pyo3(get)]
1176 name: Option<String>,
1178}
1179
1180impl Motif {
1181 fn from_counts<A>(py: Python, counts: lightmotif::pwm::CountMatrix<A>) -> PyResult<Self>
1182 where
1183 A: Alphabet,
1184 CountMatrixData: From<lightmotif::pwm::CountMatrix<A>>,
1185 WeightMatrixData: From<lightmotif::pwm::WeightMatrix<A>>,
1186 ScoringMatrixData: From<lightmotif::pwm::ScoringMatrix<A>>,
1187 {
1188 let weights = counts.to_freq(0.0).to_weight(None);
1189 let scoring = weights.to_scoring();
1190 Ok(Motif {
1191 counts: Some(Py::new(py, CountMatrix::new(counts))?),
1192 pwm: Py::new(py, WeightMatrix::new(weights))?,
1193 pssm: Py::new(py, ScoringMatrix::new(scoring))?,
1194 name: None,
1195 })
1196 }
1197
1198 fn from_weights<A>(py: Python, weights: lightmotif::pwm::WeightMatrix<A>) -> PyResult<Self>
1199 where
1200 A: Alphabet,
1201 WeightMatrixData: From<lightmotif::pwm::WeightMatrix<A>>,
1202 ScoringMatrixData: From<lightmotif::pwm::ScoringMatrix<A>>,
1203 {
1204 let scoring = weights.to_scoring();
1205 Ok(Motif {
1206 counts: None,
1207 pwm: Py::new(py, WeightMatrix::new(weights))?,
1208 pssm: Py::new(py, ScoringMatrix::new(scoring))?,
1209 name: None,
1210 })
1211 }
1212}
1213
1214#[pymethods]
1215impl Motif {
1216 #[getter]
1218 pub fn protein<'py>(slf: PyRef<'py, Self>) -> bool {
1219 let py = slf.py();
1220 matches!(
1221 slf.pssm.bind(py).borrow().data,
1222 ScoringMatrixData::Protein(_)
1223 )
1224 }
1225}
1226
1227#[pyclass(module = "lightmotif.lib")]
1242#[derive(Debug)]
1243pub struct Scanner {
1244 #[allow(unused)]
1245 pssm: Py<ScoringMatrix>,
1246 #[allow(unused)]
1247 sequence: Py<StripedSequence>,
1248 data: lightmotif::scan::Scanner<
1249 'static,
1250 Dna,
1251 &'static lightmotif::pwm::ScoringMatrix<Dna>,
1252 &'static lightmotif::seq::StripedSequence<Dna>,
1253 >,
1254}
1255
1256#[pymethods]
1257impl Scanner {
1258 #[new]
1259 #[pyo3(signature = (pssm, sequence, threshold = 0.0, block_size = 256))]
1260 fn __init__<'py>(
1261 pssm: Bound<'py, ScoringMatrix>,
1262 sequence: Bound<'py, StripedSequence>,
1263 threshold: f32,
1264 block_size: usize,
1265 ) -> PyResult<PyClassInitializer<Self>> {
1266 match (
1267 &pssm.try_borrow()?.data,
1268 &mut sequence.try_borrow_mut()?.data,
1269 ) {
1270 (ScoringMatrixData::Dna(p), StripedSequenceData::Dna(s)) => {
1271 s.configure(&p);
1272 let scanner = unsafe {
1278 let mut scanner = lightmotif::scan::Scanner::<Dna, _, _>::new(p, s);
1279 scanner.threshold(threshold);
1280 scanner.block_size(block_size);
1281 std::mem::transmute(scanner)
1282 };
1283 Ok(PyClassInitializer::from(Scanner {
1284 data: scanner,
1285 pssm: pssm.unbind(),
1286 sequence: sequence.unbind(),
1287 }))
1288 }
1289 (ScoringMatrixData::Protein(_), StripedSequenceData::Protein(_)) => {
1290 Err(PyValueError::new_err("protein scanner is not supported"))
1291 }
1292 (_, _) => Err(PyValueError::new_err("alphabet mismatch")),
1293 }
1294 }
1295
1296 fn __iter__(slf: PyRef<Self>) -> PyRef<Self> {
1297 slf
1298 }
1299
1300 fn __next__(mut slf: PyRefMut<Self>) -> Option<Hit> {
1301 slf.data.next().map(Hit::from)
1302 }
1303}
1304
1305#[pyclass(module = "lightmotif.lib")]
1307#[derive(Debug)]
1308pub struct Hit {
1309 #[pyo3(get)]
1310 position: usize,
1312 #[pyo3(get)]
1313 score: f32,
1315}
1316
1317impl From<lightmotif::scan::Hit> for Hit {
1318 fn from(value: lightmotif::scan::Hit) -> Self {
1319 Hit {
1320 position: value.position(),
1321 score: value.score(),
1322 }
1323 }
1324}
1325
1326#[pyfunction]
1352#[pyo3(signature = (sequences, *, protein = false, name = None))]
1353pub fn create(sequences: Bound<PyAny>, protein: bool, name: Option<String>) -> PyResult<Motif> {
1354 let py = sequences.py();
1355 macro_rules! run {
1356 ($alphabet:ty) => {{
1357 let mut encoded = Vec::new();
1358 for seq in sequences.try_iter()? {
1359 let s = seq?.extract::<Bound<PyString>>()?;
1360 let sequence = s.to_cow()?;
1361 let x = py
1362 .allow_threads(|| lightmotif::EncodedSequence::<$alphabet>::encode(&*sequence))
1363 .map_err(|_| PyValueError::new_err("Invalid symbol in sequence"))?;
1364 encoded.push(x);
1365 }
1366
1367 let data = lightmotif::CountMatrix::from_sequences(encoded)
1368 .map_err(|_| PyValueError::new_err("Inconsistent sequence length"))?;
1369 let weights = data.to_freq(0.0).to_weight(None);
1370 let scoring = weights.to_scoring();
1371
1372 Ok(Motif {
1373 counts: Some(Py::new(py, CountMatrix::new(data))?),
1374 pwm: Py::new(py, WeightMatrix::new(weights))?,
1375 pssm: Py::new(py, ScoringMatrix::new(scoring))?,
1376 name,
1377 })
1378 }};
1379 }
1380
1381 if protein {
1382 run!(Protein)
1383 } else {
1384 run!(Dna)
1385 }
1386}
1387
1388#[pyfunction]
1403#[pyo3(signature = (sequence, *, protein=false))]
1404pub fn stripe(sequence: Bound<PyString>, protein: bool) -> PyResult<StripedSequence> {
1405 let py = sequence.py();
1406 let encoded = EncodedSequence::__init__(sequence, protein).and_then(|e| Py::new(py, e))?;
1407 let striped = encoded.borrow(py).stripe();
1408 Ok(striped)
1409}
1410
1411#[pyfunction]
1439#[pyo3(signature = (pssm, sequence, *, threshold = 0.0, block_size = 256))]
1440pub fn scan<'py>(
1441 pssm: Bound<'py, ScoringMatrix>,
1442 sequence: Bound<'py, StripedSequence>,
1443 threshold: f32,
1444 block_size: usize,
1445) -> PyResult<Bound<'py, Scanner>> {
1446 let py = pssm.py();
1447 Bound::new(
1448 py,
1449 Scanner::__init__(pssm, sequence, threshold, block_size)?,
1450 )
1451}
1452
1453#[pymodule]
1475#[pyo3(name = "lib")]
1476pub fn init<'py>(_py: Python<'py>, m: &Bound<PyModule>) -> PyResult<()> {
1477 m.add("__package__", "lightmotif")?;
1478 m.add("__version__", env!("CARGO_PKG_VERSION"))?;
1479 m.add("__author__", env!("CARGO_PKG_AUTHORS").replace(':', "\n"))?;
1480
1481 #[cfg(feature = "abi3")]
1482 m.add("LIMITED_API", true)?;
1483 #[cfg(not(feature = "abi3"))]
1484 m.add("LIMITED_API", false)?;
1485
1486 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1487 m.add("AVX2_SUPPORTED", std::is_x86_feature_detected!("avx2"))?;
1488 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
1489 m.add("AVX2_SUPPORTED", false)?;
1490
1491 m.add_class::<EncodedSequence>()?;
1492 m.add_class::<StripedSequence>()?;
1493
1494 m.add_class::<CountMatrix>()?;
1495 m.add_class::<WeightMatrix>()?;
1496 m.add_class::<ScoringMatrix>()?;
1497
1498 m.add_class::<ScoreDistribution>()?;
1499
1500 m.add_class::<StripedScores>()?;
1501
1502 m.add_class::<Motif>()?;
1503 m.add_class::<io::TransfacMotif>()?;
1504 m.add_class::<io::JasparMotif>()?;
1505 m.add_class::<io::UniprobeMotif>()?;
1506 m.add_class::<io::MemeMotif>()?;
1507
1508 m.add_class::<Scanner>()?;
1509 m.add_class::<Hit>()?;
1510
1511 m.add_class::<io::Loader>()?;
1512
1513 m.add_function(wrap_pyfunction!(create, m)?)?;
1514 m.add_function(wrap_pyfunction!(scan, m)?)?;
1515 m.add_function(wrap_pyfunction!(stripe, m)?)?;
1516 m.add_function(wrap_pyfunction!(io::load, m)?)?;
1517
1518 Ok(())
1519}