Skip to main content

tsalign/
lib.rs

1use std::io;
2
3use compact_genome::implementation::alphabets::dna_alphabet_or_n::DnaAlphabetOrN;
4use lib_tsalign::{
5    a_star_aligner::{
6        alignment_geometry::{AlignmentCoordinates, AlignmentRange},
7        alignment_result::AlignmentResult,
8        configurable_a_star_align::Aligner,
9        template_switch_distance::AlignmentType,
10    },
11    costs::U64Cost,
12};
13use lib_tsshow::plain_text::show_template_switches;
14use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyDict};
15use pythonize::{depythonize, pythonize};
16
17#[pyclass]
18struct TSPairwiseAlignment {
19    result: AlignmentResult<AlignmentType, U64Cost>,
20}
21
22#[pymethods]
23impl TSPairwiseAlignment {
24    fn viz_template_switches(&self) -> PyResult<()> {
25        show_template_switches(io::stdout(), &self.result, &None)
26            .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
27        Ok(())
28    }
29
30    fn stats<'a>(&'a self, py: Python<'a>) -> PyResult<Bound<'a, PyAny>> {
31        Ok(pythonize(py, self.result.statistics())?)
32    }
33
34    fn cigar(&self) -> Option<String> {
35        match &self.result {
36            AlignmentResult::WithTarget { alignment, .. } => Some(alignment.cigar()),
37            AlignmentResult::WithoutTarget { .. } => None,
38        }
39    }
40
41    fn alignments<'a>(&'a self, py: Python<'a>) -> PyResult<Option<Bound<'a, PyAny>>> {
42        match &self.result {
43            AlignmentResult::WithTarget { alignment, .. } => {
44                let mut container = Vec::new();
45                alignment.iter_compact().for_each(|e| container.push(e));
46                Ok(Some(pythonize(py, &container)?))
47            }
48            AlignmentResult::WithoutTarget { .. } => Ok(None),
49        }
50    }
51}
52
53fn py_to_str(o: Bound<'_, PyAny>) -> PyResult<Vec<u8>> {
54    let str = o.str()?.to_str()?.as_bytes().to_vec();
55    Ok(str)
56}
57
58#[pyclass(name = "Aligner")]
59struct TSAligner {
60    aligner: Aligner<DnaAlphabetOrN>,
61}
62
63#[pymethods]
64impl TSAligner {
65    #[new]
66    #[pyo3(signature = (**kwargs))]
67    fn new(kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<Self> {
68        let Some(kwargs) = kwargs else {
69            return Ok(Self {
70                aligner: Aligner::new(),
71            });
72        };
73
74        let costs_str = if let Some(costs) = kwargs.get_item("costs")? {
75            let py_str: &str = costs.extract()?;
76            kwargs.del_item("costs")?;
77            Some(py_str.to_string())
78        } else {
79            None
80        };
81
82        let mut aligner: Aligner<DnaAlphabetOrN> = depythonize(kwargs)?;
83
84        if let Some(costs_str) = costs_str {
85            aligner
86                .set_costs_parse(&costs_str)
87                .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
88        }
89
90        Ok(Self { aligner })
91    }
92
93    /// Align two sequences, accounting for template switches
94    ///
95    /// The function takes a reference and a query string, and performs a global alignment on both. The output alignment may contain (short-range) template switches.
96    /// Optionally, settings can be specified on this aligner.
97    #[pyo3(signature = (reference, query, reference_name="reference", query_name="query", reference_start=None, reference_limit=None, query_start=None, query_limit=None, cost_limit=None, memory_limit=None))]
98    #[allow(clippy::too_many_arguments)]
99    fn align(
100        &self,
101        reference: Bound<'_, PyAny>, // Accepting PyAny instead of PyString to allow using e.g. `Bio.Seq` types and alike. String representation will be used.
102        query: Bound<'_, PyAny>,
103        reference_name: &str,
104        query_name: &str,
105        reference_start: Option<usize>,
106        reference_limit: Option<usize>,
107        query_start: Option<usize>,
108        query_limit: Option<usize>,
109        cost_limit: Option<u64>,
110        memory_limit: Option<usize>,
111    ) -> PyResult<Option<TSPairwiseAlignment>> {
112        let reference = py_to_str(reference)?;
113        let query = py_to_str(query)?;
114
115        let reference_start = reference_start.unwrap_or(0);
116        let reference_limit = reference_limit.unwrap_or(reference.len());
117        let query_start = query_start.unwrap_or(0);
118        let query_limit = query_limit.unwrap_or(query.len());
119        let ranges = AlignmentRange::new_offset_limit(
120            AlignmentCoordinates::new(reference_start, query_start),
121            AlignmentCoordinates::new(reference_limit, query_limit),
122        );
123
124        let result = self.aligner.align(
125            reference_name,
126            &reference,
127            query_name,
128            &query,
129            Some(ranges),
130            cost_limit,
131            memory_limit,
132        );
133
134        match result {
135            result @ AlignmentResult::WithTarget { .. } => {
136                let ts_alignment = TSPairwiseAlignment { result };
137                Ok(Some(ts_alignment))
138            }
139            AlignmentResult::WithoutTarget { .. } => Ok(None),
140        }
141    }
142}
143
144/// Bindings for the `lib_tsalign` library.
145#[pymodule]
146fn tsalign(m: &Bound<'_, PyModule>) -> PyResult<()> {
147    pyo3_log::init();
148    m.add_class::<TSPairwiseAlignment>()?;
149    m.add_class::<TSAligner>()?;
150    Ok(())
151}