lightmotif_py/
lib.rs

1#![doc = include_str!("../README.md")]
2
3extern crate generic_array;
4extern crate lightmotif;
5#[cfg(feature = "tfmpvalue")]
6extern crate lightmotif_tfmpvalue;
7extern crate pyo3;
8
9use std::fmt::Display;
10use std::fmt::Formatter;
11
12use lightmotif::abc::Alphabet;
13use lightmotif::abc::Dna;
14use lightmotif::abc::Nucleotide;
15use lightmotif::abc::Protein;
16use lightmotif::abc::Symbol;
17use lightmotif::dense::DenseMatrix;
18use lightmotif::num::Unsigned;
19use lightmotif::pli::Score;
20#[cfg(feature = "tfmpvalue")]
21use lightmotif_tfmpvalue::TfmPvalue;
22
23use generic_array::GenericArray;
24use pyo3::exceptions::PyBufferError;
25use pyo3::exceptions::PyIndexError;
26use pyo3::exceptions::PyRuntimeError;
27use pyo3::exceptions::PyTypeError;
28use pyo3::exceptions::PyValueError;
29use pyo3::ffi::Py_ssize_t;
30use pyo3::prelude::*;
31use pyo3::types::PyDict;
32use pyo3::types::PyList;
33use pyo3::types::PyString;
34
35mod io;
36mod pyfile;
37
38// --- Macros ------------------------------------------------------------------
39
40macro_rules! apply_self {
41    ($self:expr, |$x:ident| $e:expr) => {
42        match &$self {
43            Self::Dna($x) => $e,
44            Self::Protein($x) => $e,
45        }
46    };
47}
48
49macro_rules! impl_matrix_methods {
50    ($datatype:ident) => {
51        impl $datatype {
52            #[allow(unused)]
53            fn rows(&self) -> usize {
54                apply_self!(self, |x| x.matrix().rows())
55            }
56
57            #[allow(unused)]
58            fn columns(&self) -> usize {
59                apply_self!(self, |x| x.matrix().columns())
60            }
61
62            #[allow(unused)]
63            fn stride(&self) -> usize {
64                apply_self!(self, |x| x.matrix().stride())
65            }
66        }
67    };
68}
69
70macro_rules! alphabet_data_enum {
71    (
72        #[derive($($t:ident),*)]
73        enum $name:ident($($p:ident)::*)
74    ) => {
75
76        #[derive($($t),*)]
77        enum $name {
78            Dna($($p)::*<Dna>),
79            Protein($($p)::*<Protein>)
80        }
81
82        impl From<$($p)::*<Dna>> for $name {
83            fn from(value: $($p)::*<Dna>) -> Self {
84                $name::Dna(value)
85            }
86        }
87
88        impl From<$($p)::*<Protein>> for $name {
89            fn from(value: $($p)::*<Protein>) -> Self {
90                $name::Protein(value)
91            }
92        }
93    }
94}
95
96// --- Helpers -----------------------------------------------------------------
97
98fn dict_to_alphabet_array<'py, A: lightmotif::Alphabet>(
99    d: Bound<'py, PyDict>,
100) -> PyResult<GenericArray<f32, A::K>> {
101    let mut p = std::iter::repeat(0.0)
102        .take(A::K::USIZE)
103        .collect::<GenericArray<f32, A::K>>();
104    for (k, v) in d.iter() {
105        let s = k.extract::<Bound<PyString>>()?;
106        let key = s.to_cow()?;
107        if key.len() != 1 {
108            return Err(PyValueError::new_err((
109                "Invalid key for pseudocount:",
110                s.to_string(),
111            )));
112        }
113        let x = key.chars().next().unwrap();
114        let symbol = <A as lightmotif::Alphabet>::Symbol::from_char(x)
115            .map_err(|_| PyValueError::new_err(("Invalid key for pseudocount:", x)))?;
116        let value = v.extract::<f32>()?;
117        p[symbol.as_index()] = value;
118    }
119    Ok(p)
120}
121
122// --- EncodedSequence ---------------------------------------------------------
123
124alphabet_data_enum!(
125    #[derive(Clone, Debug)]
126    enum EncodedSequenceData(lightmotif::seq::EncodedSequence)
127);
128
129impl EncodedSequenceData {
130    unsafe fn as_ptr(&self) -> *const u8 {
131        apply_self!(self, |x| {
132            let data: &[_] = x.as_ref();
133            data.as_ptr() as *const u8
134        })
135    }
136
137    fn len(&self) -> usize {
138        apply_self!(self, |x| x.len())
139    }
140
141    fn get(&self, index: usize) -> u8 {
142        apply_self!(self, |x| x[index] as u8)
143    }
144
145    fn stripe(&self) -> StripedSequenceData {
146        apply_self!(self, |x| x.to_striped().into())
147    }
148}
149
150impl Display for EncodedSequenceData {
151    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
152        apply_self!(self, |x| x.fmt(f))
153    }
154}
155
156/// A biological sequence encoded as digits.
157#[pyclass(module = "lightmotif.lib")]
158#[derive(Clone, Debug)]
159pub struct EncodedSequence {
160    data: EncodedSequenceData,
161}
162
163#[pymethods]
164impl EncodedSequence {
165    /// Encode a sequence.
166    #[new]
167    #[pyo3(signature = (sequence, protein=false))]
168    pub fn __init__<'py>(
169        sequence: Bound<'py, PyString>,
170        protein: bool,
171    ) -> PyResult<PyClassInitializer<Self>> {
172        let seq = sequence.to_cow()?;
173        let py = sequence.py();
174        let data = py
175            .allow_threads(|| {
176                if protein {
177                    lightmotif::seq::EncodedSequence::<Protein>::encode(&*seq)
178                        .map(EncodedSequenceData::from)
179                } else {
180                    lightmotif::seq::EncodedSequence::<Dna>::encode(&*seq)
181                        .map(EncodedSequenceData::from)
182                }
183            })
184            .map_err(|lightmotif::err::InvalidSymbol(x)| {
185                PyValueError::new_err(format!("Invalid symbol in input: {}", x))
186            })?;
187        Ok(Self::from(data).into())
188    }
189
190    /// Convert the encoded sequence to a string.
191    pub fn __str__(&self) -> String {
192        self.data.to_string()
193    }
194
195    /// Get the length of the encoded sequence.
196    pub fn __len__(&self) -> usize {
197        self.data.len()
198    }
199
200    /// Get a copy of the encoded sequence.
201    pub fn __copy__(&self) -> Self {
202        self.copy()
203    }
204
205    /// Get an element of the encoded sequence.
206    pub fn __getitem__(&self, mut index: Py_ssize_t) -> PyResult<u8> {
207        let length = self.data.len();
208        if index < 0 {
209            index += length as Py_ssize_t;
210        }
211        if index < 0 || index >= length as Py_ssize_t {
212            Err(PyIndexError::new_err("sequence index out of range"))
213        } else {
214            Ok(self.data.get(index as usize))
215        }
216    }
217
218    /// Get the underlying memory of the encoded sequence.
219    #[cfg(not(feature = "abi3"))]
220    unsafe fn __getbuffer__(
221        slf: PyRef<'_, Self>,
222        view: *mut pyo3::ffi::Py_buffer,
223        flags: std::os::raw::c_int,
224    ) -> PyResult<()> {
225        if view.is_null() {
226            return Err(PyBufferError::new_err("View is null"));
227        }
228        if (flags & pyo3::ffi::PyBUF_WRITABLE) == pyo3::ffi::PyBUF_WRITABLE {
229            return Err(PyBufferError::new_err("Object is not writable"));
230        }
231
232        (*view).buf = slf.data.as_ptr() as *mut std::os::raw::c_void;
233        (*view).len = slf.data.len() as isize;
234        (*view).readonly = 1;
235        (*view).itemsize = std::mem::size_of::<u8>() as isize;
236
237        (*view).obj = slf.as_ptr();
238        pyo3::ffi::Py_INCREF((*view).obj);
239        let msg = std::ffi::CStr::from_bytes_with_nul(b"B\0").unwrap();
240        (*view).format = msg.as_ptr() as *mut _;
241
242        (*view).ndim = 1;
243        (*view).shape = std::ptr::null_mut();
244        (*view).strides = std::ptr::null_mut();
245        (*view).suboffsets = std::ptr::null_mut();
246        (*view).internal = std::ptr::null_mut();
247
248        Ok(())
249    }
250
251    /// `bool`: `True` if the encoded sequence is a protein sequence.
252    #[getter]
253    pub fn protein(&self) -> bool {
254        matches!(self.data, EncodedSequenceData::Protein(_))
255    }
256
257    /// Create a copy of this sequence.
258    pub fn copy(&self) -> Self {
259        self.clone()
260    }
261
262    /// Convert this sequence into a striped matrix.
263    ///
264    /// Returns:
265    ///     `~lightmotif.StripedSequence`: The input sequence, stored in a
266    ///     column-major matrix.
267    ///
268    pub fn stripe(&self) -> StripedSequence {
269        self.data.stripe().into()
270    }
271}
272
273impl From<EncodedSequenceData> for EncodedSequence {
274    fn from(data: EncodedSequenceData) -> Self {
275        Self { data }
276    }
277}
278
279// --- StripedSequence ---------------------------------------------------------
280
281alphabet_data_enum!(
282    #[derive(Clone, Debug)]
283    enum StripedSequenceData(lightmotif::seq::StripedSequence)
284);
285
286impl_matrix_methods!(StripedSequenceData);
287
288impl StripedSequenceData {
289    unsafe fn as_ptr(&self) -> *const u8 {
290        apply_self!(self, |x| x.matrix()[0].as_ptr() as *const u8)
291    }
292}
293
294/// An encoded biological sequence stored in a column-major matrix.
295#[pyclass(module = "lightmotif.lib")]
296#[derive(Clone, Debug)]
297pub struct StripedSequence {
298    data: StripedSequenceData,
299    shape: [Py_ssize_t; 2],
300    strides: [Py_ssize_t; 2],
301}
302
303impl From<StripedSequenceData> for StripedSequence {
304    fn from(data: StripedSequenceData) -> Self {
305        let cols = data.columns();
306        let rows = data.rows();
307        let shape = [cols as Py_ssize_t, rows as Py_ssize_t];
308        let strides = [
309            std::mem::size_of::<u8>() as Py_ssize_t,
310            (std::mem::size_of::<u8>() * data.stride()) as Py_ssize_t,
311        ];
312        Self {
313            data,
314            shape,
315            strides,
316        }
317    }
318}
319
320#[pymethods]
321impl StripedSequence {
322    /// Get a copy of the striped sequence.
323    pub fn __copy__(&self) -> Self {
324        self.copy()
325    }
326
327    /// `bool`: `True` if the striped sequence is a protein sequence.
328    #[getter]
329    pub fn protein(&self) -> bool {
330        matches!(self.data, StripedSequenceData::Protein(_))
331    }
332
333    #[cfg(not(feature = "abi3"))]
334    unsafe fn __getbuffer__(
335        mut slf: PyRefMut<'_, Self>,
336        view: *mut pyo3::ffi::Py_buffer,
337        flags: std::os::raw::c_int,
338    ) -> PyResult<()> {
339        if view.is_null() {
340            return Err(PyBufferError::new_err("View is null"));
341        }
342        if (flags & pyo3::ffi::PyBUF_WRITABLE) == pyo3::ffi::PyBUF_WRITABLE {
343            return Err(PyBufferError::new_err("Object is not writable"));
344        }
345
346        let data = slf.data.as_ptr();
347        (*view).buf = data as *mut std::os::raw::c_void;
348        (*view).len = (slf.data.rows() * slf.data.columns()) as isize;
349        (*view).readonly = 1;
350        (*view).itemsize = std::mem::size_of::<Nucleotide>() as isize;
351
352        (*view).obj = slf.as_ptr();
353        pyo3::ffi::Py_INCREF((*view).obj);
354        let msg = std::ffi::CStr::from_bytes_with_nul(b"B\0").unwrap();
355        (*view).format = msg.as_ptr() as *mut _;
356
357        (*view).ndim = 2;
358        (*view).shape = slf.shape.as_mut_ptr();
359        (*view).strides = slf.strides.as_mut_ptr();
360        (*view).suboffsets = std::ptr::null_mut();
361        (*view).internal = std::ptr::null_mut();
362
363        Ok(())
364    }
365
366    /// Create a copy of this sequence.
367    pub fn copy(&self) -> Self {
368        self.clone()
369    }
370}
371
372// --- CountMatrix -------------------------------------------------------------
373
374alphabet_data_enum!(
375    #[derive(Clone, Debug, PartialEq)]
376    enum CountMatrixData(lightmotif::pwm::CountMatrix)
377);
378
379impl_matrix_methods!(CountMatrixData);
380
381impl CountMatrixData {
382    fn get(&self, index: usize) -> &[u32] {
383        apply_self!(self, |x| &x.matrix()[index])
384    }
385}
386
387/// A matrix storing the count of a motif letters at each position.
388#[pyclass(module = "lightmotif.lib", sequence)]
389#[derive(Clone, Debug)]
390pub struct CountMatrix {
391    data: CountMatrixData,
392}
393
394impl CountMatrix {
395    fn new<D>(data: D) -> Self
396    where
397        D: Into<CountMatrixData>,
398    {
399        Self { data: data.into() }
400    }
401}
402
403#[pymethods]
404impl CountMatrix {
405    /// Create a new count matrix.
406    #[new]
407    #[allow(unused_variables)]
408    #[pyo3(signature = (values, *, protein = false))]
409    pub fn __init__<'py>(
410        values: Bound<'py, PyDict>,
411        protein: bool,
412    ) -> PyResult<PyClassInitializer<Self>> {
413        macro_rules! run {
414            ($alphabet:ty) => {{
415                // Extract values from dictionary into matrix
416                let mut data: Option<DenseMatrix<u32, <$alphabet as Alphabet>::K>> = None;
417                for s in <$alphabet as Alphabet>::symbols() {
418                    let key = String::from(s.as_char());
419                    if let Some(res) = values.get_item(&key)? {
420                        let column = res;
421                        if data.is_none() {
422                            data = Some(DenseMatrix::new(column.len()?));
423                        }
424                        let matrix = data.as_mut().unwrap();
425                        if matrix.rows() != column.len()? {
426                            return Err(PyValueError::new_err("Invalid number of rows"));
427                        }
428                        for (i, x) in column.try_iter()?.enumerate() {
429                            matrix[i][s.as_index()] = x?.extract::<u32>()?;
430                        }
431                    }
432                }
433
434                match data {
435                    None => Err(PyValueError::new_err("Invalid count matrix")),
436                    Some(matrix) => match lightmotif::CountMatrix::<$alphabet>::new(matrix) {
437                        Ok(counts) => Ok(CountMatrix::new(counts).into()),
438                        Err(_) => Err(PyValueError::new_err("Inconsistent rows in count matrix")),
439                    },
440                }
441            }};
442        }
443        if protein {
444            run!(Protein)
445        } else {
446            run!(Dna)
447        }
448    }
449
450    pub fn __eq__<'py>(&self, object: Bound<'py, PyAny>) -> PyResult<bool> {
451        if let Ok(other) = object.extract::<PyRef<Self>>() {
452            Ok(self.data == other.data)
453        } else {
454            Ok(false)
455        }
456    }
457
458    pub fn __len__(&self) -> usize {
459        self.data.rows()
460    }
461
462    pub fn __getitem__<'py>(slf: PyRef<'py, Self>, index: isize) -> PyResult<Bound<'py, PyAny>> {
463        let py = slf.py();
464
465        let mut index_ = index;
466        if index_ < 0 {
467            index_ += slf.data.rows() as isize;
468        }
469        if index_ < 0 || (index_ as usize) >= slf.data.rows() {
470            return Err(PyIndexError::new_err(index));
471        }
472
473        let row = slf.data.get(index as usize);
474        row.into_pyobject(py)
475    }
476
477    /// `bool`: `True` if the count matrix stores protein counts.
478    #[getter]
479    pub fn protein(&self) -> bool {
480        matches!(self.data, CountMatrixData::Protein(_))
481    }
482
483    /// Normalize this count matrix to obtain a position weight matrix.
484    ///
485    /// This method converts the count matrix to a weight matrix. Each row
486    /// from the matrix is normalized so that they sum to ``1.0``. Each element
487    /// is then divided by a uniform background probability to obtain
488    /// odds-ratio at every position of the motif. Pseudocounts can be given
489    /// to prevent zero elements, which may translate into -∞ scores in the
490    /// final position-specific scoring matrix.
491    ///
492    /// Arguments:
493    ///     pseudocount (`float`, `dict` or `None`): The pseudocounts to apply
494    ///         before normalizing the count matrix. If a `float` is given,
495    ///         then a similar pseudocount is applied to every column of the
496    ///         matrix (excluding the default symbol). Otherwise, a `dict`
497    ///         may be given to map each symbol of the alphabet to a distinct
498    ///         pseudocount. If `None` given, no pseudocount is used.
499    ///
500    #[pyo3(signature = (pseudocount=None))]
501    pub fn normalize(&self, pseudocount: Option<PyObject>) -> PyResult<WeightMatrix> {
502        macro_rules! run {
503            ($data:ident, $alphabet:ty) => {{
504                let pseudo = Python::with_gil(|py| {
505                    if let Some(obj) = pseudocount {
506                        if let Ok(x) = obj.extract::<f32>(py) {
507                            Ok(lightmotif::abc::Pseudocounts::from(x))
508                        } else if let Ok(d) = obj.extract::<Bound<PyDict>>(py) {
509                            let p = dict_to_alphabet_array::<$alphabet>(d)?;
510                            Ok(lightmotif::abc::Pseudocounts::from(p))
511                        } else {
512                            Err(PyTypeError::new_err("Invalid type for pseudocount"))
513                        }
514                    } else {
515                        Ok(lightmotif::abc::Pseudocounts::default())
516                    }
517                })?;
518                let data = $data.to_freq(pseudo).to_weight(None);
519                Ok(WeightMatrix::new(data))
520            }};
521        }
522        match &self.data {
523            CountMatrixData::Dna(dna) => run!(dna, Dna),
524            CountMatrixData::Protein(prot) => run!(prot, Protein),
525        }
526    }
527}
528
529impl From<CountMatrixData> for CountMatrix {
530    fn from(data: CountMatrixData) -> Self {
531        Self { data }
532    }
533}
534
535// --- WeightMatrix ------------------------------------------------------------
536
537alphabet_data_enum!(
538    #[derive(Clone, Debug, PartialEq)]
539    enum WeightMatrixData(lightmotif::pwm::WeightMatrix)
540);
541
542impl_matrix_methods!(WeightMatrixData);
543
544impl WeightMatrixData {
545    fn get(&self, index: usize) -> &[f32] {
546        apply_self!(self, |x| &x.matrix()[index])
547    }
548}
549
550/// A matrix storing position-specific odds-ratio for a motif.
551#[pyclass(module = "lightmotif.lib")]
552#[derive(Clone, Debug)]
553pub struct WeightMatrix {
554    data: WeightMatrixData,
555}
556
557impl WeightMatrix {
558    fn new<D>(data: D) -> Self
559    where
560        D: Into<WeightMatrixData>,
561    {
562        Self { data: data.into() }
563    }
564}
565
566#[pymethods]
567impl WeightMatrix {
568    pub fn __eq__<'py>(&self, object: Bound<'py, PyAny>) -> PyResult<bool> {
569        if let Ok(other) = object.extract::<PyRef<Self>>() {
570            Ok(self.data == other.data)
571        } else {
572            Ok(false)
573        }
574    }
575
576    pub fn __len__(&self) -> usize {
577        self.data.rows()
578    }
579
580    pub fn __getitem__<'py>(slf: PyRef<'py, Self>, index: isize) -> PyResult<Bound<'py, PyAny>> {
581        let py = slf.py();
582
583        let mut index_ = index;
584        if index_ < 0 {
585            index_ += slf.data.rows() as isize;
586        }
587        if index_ < 0 || (index_ as usize) >= slf.data.rows() {
588            return Err(PyIndexError::new_err(index));
589        }
590
591        let row = slf.data.get(index as usize);
592        row.into_pyobject(py)
593    }
594
595    /// `bool`: `True` if the weight matrix stores protein weights.
596    #[getter]
597    pub fn protein(&self) -> bool {
598        matches!(self.data, WeightMatrixData::Protein(_))
599    }
600
601    /// Log-scale this weight matrix to obtain a position-specific scoring matrix.
602    ///
603    /// Arguments:
604    ///     background (`dict` or `None`): The background frequencies to use for
605    ///         rescaling the weight matrix before computing log-odds-ratio. If
606    ///         `None` given, uniform background frequencies will be used.
607    ///
608    #[pyo3(signature=(background=None, base=2.0))]
609    pub fn log_odds(&self, background: Option<PyObject>, base: f32) -> PyResult<ScoringMatrix> {
610        macro_rules! run {
611            ($data:ident, $alphabet:ty) => {{
612                // extract the background from the method argument
613                let bg = Python::with_gil(|py| {
614                    if let Some(obj) = background {
615                        if let Ok(d) = obj.extract::<Bound<PyDict>>(py) {
616                            let p = dict_to_alphabet_array::<$alphabet>(d)?;
617                            lightmotif::abc::Background::new(p).map_err(|_| {
618                                PyValueError::new_err("Invalid background frequencies")
619                            })
620                        } else {
621                            Err(PyTypeError::new_err("Invalid type for pseudocount"))
622                        }
623                    } else {
624                        Ok(lightmotif::abc::Background::uniform())
625                    }
626                })?;
627                // rescale if backgrounds do not match
628                let pwm = match bg.frequencies() != $data.background().frequencies() {
629                    false => $data.rescale(bg),
630                    true => $data.clone(),
631                };
632                Ok(ScoringMatrix::new(pwm.to_scoring_with_base(base)))
633            }};
634        }
635        match &self.data {
636            WeightMatrixData::Dna(dna) => run!(dna, Dna),
637            WeightMatrixData::Protein(protein) => run!(protein, Protein),
638        }
639    }
640}
641
642impl From<WeightMatrixData> for WeightMatrix {
643    fn from(data: WeightMatrixData) -> Self {
644        Self::new(data)
645    }
646}
647
648// --- ScoringMatrix -----------------------------------------------------------
649
650alphabet_data_enum!(
651    #[derive(Clone, Debug, PartialEq)]
652    enum ScoringMatrixData(lightmotif::pwm::ScoringMatrix)
653);
654
655impl_matrix_methods!(ScoringMatrixData);
656
657impl ScoringMatrixData {
658    fn get(&self, index: usize) -> &[f32] {
659        apply_self!(self, |x| &x[index])
660    }
661
662    unsafe fn as_ptr(&self) -> *const f32 {
663        apply_self!(self, |x| x.matrix()[0].as_ptr())
664    }
665}
666
667/// A matrix storing position-specific odds-ratio for a motif.
668#[pyclass(module = "lightmotif.lib")]
669#[derive(Debug)]
670pub struct ScoringMatrix {
671    data: ScoringMatrixData,
672    distribution: Option<Py<ScoreDistribution>>,
673    shape: [Py_ssize_t; 2],
674    strides: [Py_ssize_t; 2],
675}
676
677impl ScoringMatrix {
678    fn new<D>(data: D) -> Self
679    where
680        D: Into<ScoringMatrixData>,
681    {
682        let data = data.into();
683        let cols = data.columns();
684        let rows = data.rows();
685        let stride = data.stride();
686        let shape = [cols as Py_ssize_t, rows as Py_ssize_t];
687        let strides = [
688            (stride * std::mem::size_of::<f32>()) as Py_ssize_t,
689            std::mem::size_of::<f32>() as Py_ssize_t,
690        ];
691        Self {
692            data,
693            shape,
694            strides,
695            distribution: None,
696        }
697    }
698}
699
700#[pymethods]
701impl ScoringMatrix {
702    /// Create a new scoring matrix.
703    #[new]
704    #[pyo3(signature = (values, background = None, *, protein = false))]
705    #[allow(unused)]
706    pub fn __init__<'py>(
707        values: Bound<'py, PyDict>,
708        background: Option<PyObject>,
709        protein: bool,
710    ) -> PyResult<PyClassInitializer<Self>> {
711        macro_rules! run {
712            ($alphabet:ty) => {{
713                // extract the background from the method argument
714                let bg = Python::with_gil(|py| {
715                    if let Some(obj) = background {
716                        if let Ok(d) = obj.extract::<Bound<PyDict>>(py) {
717                            let p = dict_to_alphabet_array::<$alphabet>(d)?;
718                            lightmotif::abc::Background::<$alphabet>::new(p).map_err(|_| {
719                                PyValueError::new_err("Invalid background frequencies")
720                            })
721                        } else {
722                            Err(PyTypeError::new_err("Invalid type for pseudocount"))
723                        }
724                    } else {
725                        Ok(lightmotif::abc::Background::uniform())
726                    }
727                })?;
728                // build data
729                let mut data: Option<DenseMatrix<f32, <$alphabet as Alphabet>::K>> = None;
730                for s in <$alphabet as Alphabet>::symbols() {
731                    let key = String::from(s.as_char());
732                    if let Some(res) = values.get_item(&key)? {
733                        let column = res.downcast::<PyList>()?;
734                        if data.is_none() {
735                            data = Some(DenseMatrix::new(column.len()));
736                        }
737                        let matrix = data.as_mut().unwrap();
738                        if matrix.rows() != column.len() {
739                            return Err(PyValueError::new_err("Invalid number of rows"));
740                        }
741                        for (i, x) in column.iter().enumerate() {
742                            matrix[i][s.as_index()] = x.extract::<f32>()?;
743                        }
744                    }
745                }
746                // create matrix
747                match data {
748                    None => Err(PyValueError::new_err("Invalid count matrix")),
749                    Some(matrix) => Ok(Self::new(lightmotif::ScoringMatrix::<$alphabet>::new(
750                        bg, matrix,
751                    ))
752                    .into()),
753                }
754            }};
755        }
756
757        if protein {
758            run!(Protein)
759        } else {
760            run!(Dna)
761        }
762    }
763
764    pub fn __eq__(&self, object: Bound<PyAny>) -> PyResult<bool> {
765        if let Ok(other) = object.extract::<PyRef<Self>>() {
766            Ok(self.data == other.data)
767        } else {
768            Ok(false)
769        }
770    }
771
772    pub fn __len__(&self) -> usize {
773        self.data.rows()
774    }
775
776    pub fn __getitem__<'py>(slf: PyRef<'py, Self>, index: isize) -> PyResult<Bound<'py, PyAny>> {
777        let py = slf.py();
778        let mut index_ = index;
779        if index_ < 0 {
780            index_ += slf.data.rows() as isize;
781        }
782        if index_ < 0 || (index_ as usize) >= slf.data.rows() {
783            return Err(PyIndexError::new_err(index));
784        }
785        slf.data.get(index as usize).into_pyobject(py)
786    }
787
788    #[cfg(not(feature = "abi3"))]
789    unsafe fn __getbuffer__(
790        mut slf: PyRefMut<'_, Self>,
791        view: *mut pyo3::ffi::Py_buffer,
792        flags: std::os::raw::c_int,
793    ) -> PyResult<()> {
794        if view.is_null() {
795            return Err(PyBufferError::new_err("View is null"));
796        }
797        if (flags & pyo3::ffi::PyBUF_WRITABLE) == pyo3::ffi::PyBUF_WRITABLE {
798            return Err(PyBufferError::new_err("Object is not writable"));
799        }
800
801        let data = slf.data.as_ptr();
802        (*view).buf = data as *mut std::os::raw::c_void;
803        (*view).len = -1;
804        (*view).readonly = 1;
805        (*view).itemsize = std::mem::size_of::<f32>() as isize;
806
807        (*view).obj = slf.as_ptr();
808        pyo3::ffi::Py_INCREF((*view).obj);
809        let msg = std::ffi::CStr::from_bytes_with_nul(b"f\0").unwrap();
810        (*view).format = msg.as_ptr() as *mut _;
811
812        (*view).ndim = 2;
813        (*view).shape = slf.shape.as_mut_ptr();
814        (*view).strides = slf.strides.as_mut_ptr();
815        (*view).suboffsets = std::ptr::null_mut();
816        (*view).internal = std::ptr::null_mut();
817
818        Ok(())
819    }
820
821    #[getter]
822    pub fn score_distribution<'py>(slf: Bound<'py, Self>) -> PyResult<Py<ScoreDistribution>> {
823        let py = slf.py();
824        if slf.borrow().distribution.is_none() {
825            let data = match &slf.borrow().data {
826                ScoringMatrixData::Dna(dna) => {
827                    ScoreDistributionData::from(dna.to_score_distribution())
828                }
829                ScoringMatrixData::Protein(prot) => {
830                    ScoreDistributionData::from(prot.to_score_distribution())
831                }
832            };
833            slf.borrow_mut().distribution = Some(Py::new(py, ScoreDistribution::from(data))?);
834        }
835        Ok(slf.borrow().distribution.as_ref().unwrap().clone_ref(py))
836    }
837
838    /// `bool`: `True` if the scoring matrix stores protein scores.
839    #[getter]
840    pub fn protein(&self) -> bool {
841        matches!(self.data, ScoringMatrixData::Protein(_))
842    }
843
844    /// Calculate the PSSM score for all positions of the given sequence.
845    ///
846    /// Returns:
847    ///     `~lightmotif.StripedScores`: The PSSM scores for every position
848    ///     of the input sequence, stored into a striped matrix for fast
849    ///     vectorized operations.
850    ///
851    /// Note:
852    ///     This method uses the best implementation for the local platform,
853    ///     prefering AVX2 if available.
854    ///
855    pub fn calculate(
856        slf: Bound<'_, Self>,
857        sequence: &mut StripedSequence,
858    ) -> PyResult<StripedScores> {
859        let pssm = &slf.borrow().data;
860        let seq = &mut sequence.data;
861        match (seq, pssm) {
862            (StripedSequenceData::Dna(dna), ScoringMatrixData::Dna(pssm)) => {
863                let pli = lightmotif::pli::Pipeline::dispatch();
864                dna.configure(pssm);
865                Ok(slf.py().allow_threads(|| pli.score(pssm, dna)).into())
866            }
867            (StripedSequenceData::Protein(prot), ScoringMatrixData::Protein(pssm)) => {
868                let pli = lightmotif::pli::Pipeline::dispatch();
869                prot.configure(pssm);
870                Ok(slf.py().allow_threads(|| pli.score(pssm, prot)).into())
871            }
872            (_, _) => Err(PyValueError::new_err("alphabet mismatch")),
873        }
874    }
875
876    /// Translate an absolute score to a P-value for this PSSM.
877    #[pyo3(signature=(score, method="meme"))]
878    pub fn pvalue(slf: Bound<'_, Self>, score: f64, method: &str) -> PyResult<f64> {
879        let py = slf.py();
880        match method {
881            "tfmpvalue" => {
882                #[cfg(feature = "tfmpvalue")]
883                match &slf.borrow().data {
884                    ScoringMatrixData::Dna(dna) => Ok(TfmPvalue::new(&dna).pvalue(score)),
885                    ScoringMatrixData::Protein(prot) => Ok(TfmPvalue::new(&prot).pvalue(score)),
886                }
887                #[cfg(not(feature = "tfmpvalue"))]
888                Err(PyRuntimeError::new_err(
889                    "package compiled without `lightmotif-tfmpvalue`",
890                ))
891            }
892            "meme" => {
893                let dist = Self::score_distribution(slf)?;
894                match &dist.bind(py).borrow().data {
895                    ScoreDistributionData::Dna(dna) => Ok(dna.pvalue(score as f32)),
896                    ScoreDistributionData::Protein(prot) => Ok(prot.pvalue(score as f32)),
897                }
898            }
899            other => Err(PyValueError::new_err(format!(
900                "invalid pvalue method: {:?}",
901                other
902            ))),
903        }
904    }
905
906    pub fn max_score(slf: PyRef<'_, Self>) -> f32 {
907        match &slf.data {
908            ScoringMatrixData::Dna(dna) => dna.max_score(),
909            ScoringMatrixData::Protein(prot) => prot.max_score(),
910        }
911    }
912
913    /// Translate a P-value to an absolute score for this PSSM.
914    #[pyo3(signature=(pvalue, method="meme"))]
915    pub fn score(slf: Bound<'_, Self>, pvalue: f64, method: &str) -> PyResult<f64> {
916        let py = slf.py();
917        match method {
918            "tfmpvalue" => {
919                #[cfg(feature = "tfmpvalue")]
920                match &slf.borrow().data {
921                    ScoringMatrixData::Dna(dna) => Ok(TfmPvalue::new(&dna).score(pvalue)),
922                    ScoringMatrixData::Protein(prot) => Ok(TfmPvalue::new(&prot).score(pvalue)),
923                }
924                #[cfg(not(feature = "tfmpvalue"))]
925                Err(PyRuntimeError::new_err(
926                    "package compiled without `lightmotif-tfmpvalue`",
927                ))
928            }
929            "meme" => {
930                let dist = Self::score_distribution(slf)?;
931                match &dist.bind(py).borrow().data {
932                    ScoreDistributionData::Dna(dna) => Ok(dna.score(pvalue) as f64),
933                    ScoreDistributionData::Protein(prot) => Ok(prot.score(pvalue) as f64),
934                }
935            }
936            other => Err(PyValueError::new_err(format!(
937                "invalid pvalue method: {:?}",
938                other
939            ))),
940        }
941    }
942
943    /// Compute the reverse complement of this scoring matrix.
944    pub fn reverse_complement(slf: PyRef<'_, Self>) -> PyResult<ScoringMatrix> {
945        match &slf.data {
946            ScoringMatrixData::Dna(dna) => Ok(Self::from(ScoringMatrixData::from(
947                dna.reverse_complement(),
948            ))),
949            ScoringMatrixData::Protein(_) => Err(PyRuntimeError::new_err(
950                "cannot complement a protein sequence",
951            )),
952        }
953    }
954}
955
956impl From<ScoringMatrixData> for ScoringMatrix {
957    fn from(data: ScoringMatrixData) -> Self {
958        Self::new(data)
959    }
960}
961
962// --- ScoreDistribution -------------------------------------------------------
963
964alphabet_data_enum!(
965    #[derive(Clone, Debug)]
966    enum ScoreDistributionData(lightmotif::pwm::dist::ScoreDistribution)
967);
968
969impl ScoreDistributionData {
970    fn sf(&self) -> &[f64] {
971        apply_self!(self, |x| x.sf())
972    }
973}
974
975/// A matrix storing position-specific odds-ratio for a motif.
976#[pyclass(module = "lightmotif.lib")]
977#[derive(Clone, Debug)]
978pub struct ScoreDistribution {
979    data: ScoreDistributionData,
980}
981
982impl From<ScoreDistributionData> for ScoreDistribution {
983    fn from(data: ScoreDistributionData) -> Self {
984        Self { data }
985    }
986}
987
988#[pymethods]
989impl ScoreDistribution {
990    /// Get the underlying memory of the encoded sequence.
991    #[cfg(not(feature = "abi3"))]
992    unsafe fn __getbuffer__(
993        slf: PyRefMut<'_, Self>,
994        view: *mut pyo3::ffi::Py_buffer,
995        flags: std::os::raw::c_int,
996    ) -> PyResult<()> {
997        if view.is_null() {
998            return Err(PyBufferError::new_err("View is null"));
999        }
1000        if (flags & pyo3::ffi::PyBUF_WRITABLE) == pyo3::ffi::PyBUF_WRITABLE {
1001            return Err(PyBufferError::new_err("Object is not writable"));
1002        }
1003
1004        let array: &[_] = slf.data.sf();
1005        (*view).buf = array.as_ptr() as *mut std::os::raw::c_void;
1006        (*view).len = (array.len() * std::mem::size_of::<f64>()) as isize;
1007        (*view).readonly = 1;
1008        (*view).itemsize = std::mem::size_of::<f64>() as isize;
1009
1010        (*view).obj = slf.as_ptr();
1011        pyo3::ffi::Py_INCREF((*view).obj);
1012        let msg = std::ffi::CStr::from_bytes_with_nul(b"d\0").unwrap();
1013        (*view).format = msg.as_ptr() as *mut _;
1014
1015        (*view).ndim = 1;
1016        (*view).shape = std::ptr::null_mut();
1017        (*view).strides = std::ptr::null_mut();
1018        (*view).suboffsets = std::ptr::null_mut();
1019        (*view).internal = std::ptr::null_mut();
1020
1021        Ok(())
1022    }
1023}
1024
1025// --- Scores ------------------------------------------------------------------
1026
1027/// A striped matrix storing scores obtained with a scoring matrix.
1028#[pyclass(module = "lightmotif.lib", sequence)]
1029#[derive(Clone, Debug)]
1030pub struct StripedScores {
1031    scores: lightmotif::scores::StripedScores<f32>,
1032    shape: [Py_ssize_t; 2],
1033    strides: [Py_ssize_t; 2],
1034}
1035
1036#[pymethods]
1037impl StripedScores {
1038    fn __len__(&self) -> usize {
1039        self.scores.max_index()
1040    }
1041
1042    fn __getitem__(&self, index: isize) -> PyResult<f32> {
1043        if index < self.scores.max_index() as isize && index >= 0 {
1044            Ok(self.scores[index as usize])
1045        } else {
1046            Err(PyIndexError::new_err("list index out of range"))
1047        }
1048    }
1049
1050    #[cfg(not(feature = "abi3"))]
1051    unsafe fn __getbuffer__(
1052        mut slf: PyRefMut<'_, Self>,
1053        view: *mut pyo3::ffi::Py_buffer,
1054        flags: std::os::raw::c_int,
1055    ) -> PyResult<()> {
1056        if view.is_null() {
1057            return Err(PyBufferError::new_err("View is null"));
1058        }
1059        if (flags & pyo3::ffi::PyBUF_WRITABLE) == pyo3::ffi::PyBUF_WRITABLE {
1060            return Err(PyBufferError::new_err("Object is not writable"));
1061        }
1062
1063        let data = slf.scores.matrix()[0].as_ptr();
1064        (*view).buf = data as *mut std::os::raw::c_void;
1065        (*view).len = -1;
1066        (*view).readonly = 1;
1067        (*view).itemsize = std::mem::size_of::<f32>() as isize;
1068
1069        (*view).obj = slf.as_ptr();
1070        pyo3::ffi::Py_INCREF((*view).obj);
1071        let msg = std::ffi::CStr::from_bytes_with_nul(b"f\0").unwrap();
1072        (*view).format = msg.as_ptr() as *mut _;
1073
1074        (*view).ndim = 2;
1075        (*view).shape = slf.shape.as_mut_ptr();
1076        (*view).strides = slf.strides.as_mut_ptr();
1077        (*view).suboffsets = std::ptr::null_mut();
1078        (*view).internal = std::ptr::null_mut();
1079
1080        Ok(())
1081    }
1082
1083    /// Return all positions with a score greater or equal to the threshold.
1084    ///
1085    /// Returns:
1086    ///     `list` of `int`: The indices of the position with a score greater or
1087    ///     equal to the given threshold. Note that the indices may or may not
1088    ///     be sorted, depending on the implementation.
1089    ///
1090    /// Note:
1091    ///     This method uses the best implementation for the local platform,
1092    ///     prefering AVX2 if available.
1093    ///
1094    pub fn threshold(slf: PyRef<'_, Self>, threshold: f32) -> Vec<usize> {
1095        let scores = &slf.scores;
1096        slf.py().allow_threads(|| scores.threshold(threshold))
1097    }
1098
1099    /// Return the maximum score, if the score matrix is not empty.
1100    ///
1101    /// Returns:
1102    ///     `float` or `None`: The maximum score, if any.
1103    ///
1104    /// Note:
1105    ///     This method uses the best implementation for the local platform,
1106    ///     prefering AVX2 if available.
1107    ///
1108    pub fn max(slf: PyRef<'_, Self>) -> Option<f32> {
1109        let scores = &slf.scores;
1110        slf.py().allow_threads(|| scores.max())
1111    }
1112
1113    /// Return the position of the maximum score, if the score matrix is not empty.
1114    ///
1115    /// Returns:
1116    ///     `int` or `None`: The position of the maximum score, if any.
1117    ///
1118    /// Note:
1119    ///     This method uses the best implementation for the local platform,
1120    ///     prefering AVX2 if available.
1121    ///
1122    pub fn argmax(slf: PyRef<'_, Self>) -> Option<usize> {
1123        let scores = &slf.scores;
1124        slf.py().allow_threads(|| scores.argmax())
1125    }
1126}
1127
1128impl From<lightmotif::scores::StripedScores<f32>> for StripedScores {
1129    fn from(scores: lightmotif::scores::StripedScores<f32>) -> Self {
1130        // assert_eq!(scores.range().start, 0);
1131        // extract the matrix shape
1132        let cols = scores.matrix().columns();
1133        let rows = scores.matrix().rows();
1134        // record the matrix shape as a Fortran buffer
1135        let shape = [cols as Py_ssize_t, rows as Py_ssize_t];
1136        let strides = [
1137            std::mem::size_of::<f32>() as Py_ssize_t,
1138            (scores.matrix().stride() * std::mem::size_of::<f32>()) as Py_ssize_t,
1139        ];
1140        // mask the remaining positions that are outside the sequence length
1141        // let length = scores.len();
1142        // let data = scores.matrix_mut();
1143        // for i in length..rows * cols {
1144        //     let row = i % rows;
1145        //     let col = i / rows;
1146        //     data[row][col] = -f32::INFINITY;
1147        // }
1148        // return a Python object implementing the buffer protocol
1149        Self {
1150            scores,
1151            shape,
1152            strides,
1153        }
1154    }
1155}
1156
1157// --- Motif -------------------------------------------------------------------
1158
1159/// A base object storing information about a motif.
1160#[pyclass(module = "lightmotif.lib", subclass)]
1161#[derive(Debug)]
1162pub struct Motif {
1163    #[pyo3(get)]
1164    /// `CountMatrix` or `None`: The count matrix for this motif.
1165    ///
1166    /// This may be `None` if the motif was loaded from a format that does
1167    /// not store counts but frequencies, such as the ``uniprobe`` format.
1168    counts: Option<Py<CountMatrix>>,
1169    #[pyo3(get)]
1170    /// `WeightMatrix`: The weight matrix for this motif.
1171    pwm: Py<WeightMatrix>,
1172    #[pyo3(get)]
1173    /// `ScoringMatrix`: The scoring matrix for this motif.
1174    pssm: Py<ScoringMatrix>,
1175    #[pyo3(get)]
1176    /// `str` or `None`: An optional name for the motif.
1177    name: Option<String>,
1178}
1179
1180impl Motif {
1181    fn from_counts<A>(py: Python, counts: lightmotif::pwm::CountMatrix<A>) -> PyResult<Self>
1182    where
1183        A: Alphabet,
1184        CountMatrixData: From<lightmotif::pwm::CountMatrix<A>>,
1185        WeightMatrixData: From<lightmotif::pwm::WeightMatrix<A>>,
1186        ScoringMatrixData: From<lightmotif::pwm::ScoringMatrix<A>>,
1187    {
1188        let weights = counts.to_freq(0.0).to_weight(None);
1189        let scoring = weights.to_scoring();
1190        Ok(Motif {
1191            counts: Some(Py::new(py, CountMatrix::new(counts))?),
1192            pwm: Py::new(py, WeightMatrix::new(weights))?,
1193            pssm: Py::new(py, ScoringMatrix::new(scoring))?,
1194            name: None,
1195        })
1196    }
1197
1198    fn from_weights<A>(py: Python, weights: lightmotif::pwm::WeightMatrix<A>) -> PyResult<Self>
1199    where
1200        A: Alphabet,
1201        WeightMatrixData: From<lightmotif::pwm::WeightMatrix<A>>,
1202        ScoringMatrixData: From<lightmotif::pwm::ScoringMatrix<A>>,
1203    {
1204        let scoring = weights.to_scoring();
1205        Ok(Motif {
1206            counts: None,
1207            pwm: Py::new(py, WeightMatrix::new(weights))?,
1208            pssm: Py::new(py, ScoringMatrix::new(scoring))?,
1209            name: None,
1210        })
1211    }
1212}
1213
1214#[pymethods]
1215impl Motif {
1216    /// `bool`: `True` if the motif is a protein motif.
1217    #[getter]
1218    pub fn protein<'py>(slf: PyRef<'py, Self>) -> bool {
1219        let py = slf.py();
1220        matches!(
1221            slf.pssm.bind(py).borrow().data,
1222            ScoringMatrixData::Protein(_)
1223        )
1224    }
1225}
1226
1227// --- Scanner -----------------------------------------------------------------
1228
1229/// A fast scanner for identifying high scoring positions in a sequence.
1230///
1231/// This class internally uses a discretized version of the matrix to
1232/// identify candidate positions, and then rescores blocks with the full
1233/// algorithm only if needed. Using a `Scanner` is likely faster than
1234/// using the `~ScoringMatrix.calculate` method for rare sites or high
1235/// thresholds.
1236///
1237/// Note:
1238///     This algorithm is only available for DNA motifs because of
1239///     implementation requirements.
1240///
1241#[pyclass(module = "lightmotif.lib")]
1242#[derive(Debug)]
1243pub struct Scanner {
1244    #[allow(unused)]
1245    pssm: Py<ScoringMatrix>,
1246    #[allow(unused)]
1247    sequence: Py<StripedSequence>,
1248    data: lightmotif::scan::Scanner<
1249        'static,
1250        Dna,
1251        &'static lightmotif::pwm::ScoringMatrix<Dna>,
1252        &'static lightmotif::seq::StripedSequence<Dna>,
1253    >,
1254}
1255
1256#[pymethods]
1257impl Scanner {
1258    #[new]
1259    #[pyo3(signature = (pssm, sequence, threshold = 0.0, block_size = 256))]
1260    fn __init__<'py>(
1261        pssm: Bound<'py, ScoringMatrix>,
1262        sequence: Bound<'py, StripedSequence>,
1263        threshold: f32,
1264        block_size: usize,
1265    ) -> PyResult<PyClassInitializer<Self>> {
1266        match (
1267            &pssm.try_borrow()?.data,
1268            &mut sequence.try_borrow_mut()?.data,
1269        ) {
1270            (ScoringMatrixData::Dna(p), StripedSequenceData::Dna(s)) => {
1271                s.configure(&p);
1272                // transmute (!!!!!) the scanner so that its lifetime is 'static
1273                // (i.e. the reference to the PSSM and sequence never expire),
1274                // which is possible because we are building a self-referential
1275                // struct and self.scanner.next() will only be called with
1276                // the GIL held...
1277                let scanner = unsafe {
1278                    let mut scanner = lightmotif::scan::Scanner::<Dna, _, _>::new(p, s);
1279                    scanner.threshold(threshold);
1280                    scanner.block_size(block_size);
1281                    std::mem::transmute(scanner)
1282                };
1283                Ok(PyClassInitializer::from(Scanner {
1284                    data: scanner,
1285                    pssm: pssm.unbind(),
1286                    sequence: sequence.unbind(),
1287                }))
1288            }
1289            (ScoringMatrixData::Protein(_), StripedSequenceData::Protein(_)) => {
1290                Err(PyValueError::new_err("protein scanner is not supported"))
1291            }
1292            (_, _) => Err(PyValueError::new_err("alphabet mismatch")),
1293        }
1294    }
1295
1296    fn __iter__(slf: PyRef<Self>) -> PyRef<Self> {
1297        slf
1298    }
1299
1300    fn __next__(mut slf: PyRefMut<Self>) -> Option<Hit> {
1301        slf.data.next().map(Hit::from)
1302    }
1303}
1304
1305/// A hit found by a `~lightmotif.Scanner`.
1306#[pyclass(module = "lightmotif.lib")]
1307#[derive(Debug)]
1308pub struct Hit {
1309    #[pyo3(get)]
1310    /// `int`: The index of the hit, zero-based.
1311    position: usize,
1312    #[pyo3(get)]
1313    /// `float`: The score of the scoring matrix at the hit position.
1314    score: f32,
1315}
1316
1317impl From<lightmotif::scan::Hit> for Hit {
1318    fn from(value: lightmotif::scan::Hit) -> Self {
1319        Hit {
1320            position: value.position(),
1321            score: value.score(),
1322        }
1323    }
1324}
1325
1326// --- Functions ---------------------------------------------------------------
1327
1328/// Create a new motif from an iterable of sequences.
1329///
1330/// All sequences must have the same length, and must contain only valid
1331/// alphabet symbols (*ATGCN* for nucleotides, *ACDEFGHIKLMNPQRSTVWYX*
1332/// for proteins).
1333///
1334/// Arguments:
1335///     sequences (iterable of `str`): The sequences to use to build the
1336///         count matrix for the motif.
1337///     protein (`bool`): Pass `True` to build a protein motif. Defaults
1338///         to `False`.
1339///
1340/// Example:
1341///     >>> sequences = ["TATAAT", "TATAAA", "TATATT", "TATAAT"]
1342///     >>> motif = lightmotif.create(sequences)
1343///
1344/// Returns:
1345///     `~lightmotif.Motif`: The motif corresponding to the given sequences.
1346///
1347/// Raises:
1348///     `ValueError`: When any of the sequences contain an invalid character,
1349///         or when the sequence lengths are not consistent.
1350///
1351#[pyfunction]
1352#[pyo3(signature = (sequences, *, protein = false, name = None))]
1353pub fn create(sequences: Bound<PyAny>, protein: bool, name: Option<String>) -> PyResult<Motif> {
1354    let py = sequences.py();
1355    macro_rules! run {
1356        ($alphabet:ty) => {{
1357            let mut encoded = Vec::new();
1358            for seq in sequences.try_iter()? {
1359                let s = seq?.extract::<Bound<PyString>>()?;
1360                let sequence = s.to_cow()?;
1361                let x = py
1362                    .allow_threads(|| lightmotif::EncodedSequence::<$alphabet>::encode(&*sequence))
1363                    .map_err(|_| PyValueError::new_err("Invalid symbol in sequence"))?;
1364                encoded.push(x);
1365            }
1366
1367            let data = lightmotif::CountMatrix::from_sequences(encoded)
1368                .map_err(|_| PyValueError::new_err("Inconsistent sequence length"))?;
1369            let weights = data.to_freq(0.0).to_weight(None);
1370            let scoring = weights.to_scoring();
1371
1372            Ok(Motif {
1373                counts: Some(Py::new(py, CountMatrix::new(data))?),
1374                pwm: Py::new(py, WeightMatrix::new(weights))?,
1375                pssm: Py::new(py, ScoringMatrix::new(scoring))?,
1376                name,
1377            })
1378        }};
1379    }
1380
1381    if protein {
1382        run!(Protein)
1383    } else {
1384        run!(Dna)
1385    }
1386}
1387
1388/// Encode and stripe a text sequence.
1389///
1390/// Arguments:
1391///     sequence (`str`): The sequence to encode and stripe.
1392///     protein (`bool`): Pass `True` to treat the sequence as a protein
1393///         sequence.
1394///
1395/// Returns:
1396///     `~lightmotif.StripedSequence`: A striped sequence containing the
1397///     sequence data.
1398///
1399/// Raises:
1400///     `ValueError`: When the sequences contains an invalid character.
1401///
1402#[pyfunction]
1403#[pyo3(signature = (sequence, *, protein=false))]
1404pub fn stripe(sequence: Bound<PyString>, protein: bool) -> PyResult<StripedSequence> {
1405    let py = sequence.py();
1406    let encoded = EncodedSequence::__init__(sequence, protein).and_then(|e| Py::new(py, e))?;
1407    let striped = encoded.borrow(py).stripe();
1408    Ok(striped)
1409}
1410
1411/// Scan a sequence using a fast scanner implementation to identify hits.
1412///
1413/// See `~lightmotif.Scanner` for more information.
1414///
1415/// Arguments:
1416///     pssm (`~lightmotif.ScoringMatrix`): The scoring matrix to use to
1417///         score the sequence with.
1418///     sequence (`~lightmotif.StripedSequence`): The striped sequence to
1419///         process with the scanner. Longer sequences benifit more from the
1420///         scanning implementation.
1421///     threshold (`float`): The score threshold to use for filtering hits.
1422///         Use `ScoringMatrix.score` to compute a score threshold from a
1423///         target *p-value*. A higher threshold will result in less
1424///         candidate hits and total runtime.
1425///     
1426/// Returns:
1427///     `~lightmotif.Scanner`: The scanner for finding candidate hits in
1428///     the target sequence.
1429///  
1430/// Raises:
1431///     `ValueError`: When either ``pssm`` or ``sequence`` are not using
1432///         the DNA alphabet.
1433///
1434/// Note:
1435///     This algorithm is only available for DNA motifs because of
1436///     implementation requirements.  
1437///
1438#[pyfunction]
1439#[pyo3(signature = (pssm, sequence, *, threshold = 0.0, block_size = 256))]
1440pub fn scan<'py>(
1441    pssm: Bound<'py, ScoringMatrix>,
1442    sequence: Bound<'py, StripedSequence>,
1443    threshold: f32,
1444    block_size: usize,
1445) -> PyResult<Bound<'py, Scanner>> {
1446    let py = pssm.py();
1447    Bound::new(
1448        py,
1449        Scanner::__init__(pssm, sequence, threshold, block_size)?,
1450    )
1451}
1452
1453// --- Module ------------------------------------------------------------------
1454
1455/// PyO3 bindings to ``lightmotif``, a library for fast PWM motif scanning.
1456///
1457/// The API is similar to the `Bio.motifs` module from Biopython on purpose.
1458///
1459/// Note:
1460///     Arbitrary alphabets cannot be configured, as ``lightmotif`` uses
1461///     constant definitions of alphabets that cannot be changed at runtime.
1462///     The different sequences are treated as nucleotide sequences. To use
1463///     a protein sequence instead, most classes and functions
1464///     have a ``protein`` keyword `True`::
1465///
1466///         >>> sequences = ["PILFFRLK", "KDMLKEYL", "PFRLTHKL"]
1467///         >>> lightmotif.create(sequences)
1468///         Traceback (most recent call last):
1469///           ...
1470///         ValueError: Invalid symbol in sequence
1471///         >>> lightmotif.create(sequences, protein=True)
1472///         <lightmotif.lib.Motif object at ...>
1473///
1474#[pymodule]
1475#[pyo3(name = "lib")]
1476pub fn init<'py>(_py: Python<'py>, m: &Bound<PyModule>) -> PyResult<()> {
1477    m.add("__package__", "lightmotif")?;
1478    m.add("__version__", env!("CARGO_PKG_VERSION"))?;
1479    m.add("__author__", env!("CARGO_PKG_AUTHORS").replace(':', "\n"))?;
1480
1481    #[cfg(feature = "abi3")]
1482    m.add("LIMITED_API", true)?;
1483    #[cfg(not(feature = "abi3"))]
1484    m.add("LIMITED_API", false)?;
1485
1486    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1487    m.add("AVX2_SUPPORTED", std::is_x86_feature_detected!("avx2"))?;
1488    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
1489    m.add("AVX2_SUPPORTED", false)?;
1490
1491    m.add_class::<EncodedSequence>()?;
1492    m.add_class::<StripedSequence>()?;
1493
1494    m.add_class::<CountMatrix>()?;
1495    m.add_class::<WeightMatrix>()?;
1496    m.add_class::<ScoringMatrix>()?;
1497
1498    m.add_class::<ScoreDistribution>()?;
1499
1500    m.add_class::<StripedScores>()?;
1501
1502    m.add_class::<Motif>()?;
1503    m.add_class::<io::TransfacMotif>()?;
1504    m.add_class::<io::JasparMotif>()?;
1505    m.add_class::<io::UniprobeMotif>()?;
1506    m.add_class::<io::MemeMotif>()?;
1507
1508    m.add_class::<Scanner>()?;
1509    m.add_class::<Hit>()?;
1510
1511    m.add_class::<io::Loader>()?;
1512
1513    m.add_function(wrap_pyfunction!(create, m)?)?;
1514    m.add_function(wrap_pyfunction!(scan, m)?)?;
1515    m.add_function(wrap_pyfunction!(stripe, m)?)?;
1516    m.add_function(wrap_pyfunction!(io::load, m)?)?;
1517
1518    Ok(())
1519}