assembly_theory/
python.rs

1//! Expose public `assembly_theory` functionality to a Python package using
2//! [`pyo3`](https://docs.rs/pyo3/latest/pyo3/).
3//!
4//! The package is available [on PyPI](https://pypi.org/project/assembly-theory/);
5//! see that README for installation and usage instructions. To build the
6//! Python package directly from this crate's source code, see the instructions
7//! in the [GitHub README](https://github.com/DaymudeLab/assembly-theory).
8//!
9//! Note that all Rust functions in this module have the form `_fn_name`, which
10//! correspond to the actual Rust function `fn_name` elsewhere in the crate and
11//! are exposed to the Python package as `fn_name`.
12//!
13//! # Python Example
14//!
15//! ```custom,{class=language-python}
16//! import assembly_theory as at
17//!
18//! # Load a mol block from file.
19//! with open('data/checks/anthracene.mol') as f:
20//!     mol_block = f.read()
21//!
22//! # Calculate the molecule's assembly index.
23//! at.index(mol_block)  # 6
24//! ```
25
26use std::str::FromStr;
27
28use pyo3::{
29    exceptions::{PyOSError, PyValueError},
30    prelude::*,
31    PyErr,
32};
33
34use crate::{
35    assembly::{depth, index, index_search, ParallelMode},
36    bounds::Bound as OurBound,
37    canonize::CanonizeMode,
38    enumerate::EnumerateMode,
39    kernels::KernelMode,
40    loader::{parse_molfile_str, ParserError},
41    memoize::MemoizeMode,
42};
43
44/// Implement a Python version of [`crate::loader::ParserError`].
45impl From<ParserError> for PyErr {
46    fn from(err: ParserError) -> PyErr {
47        PyOSError::new_err(err.to_string())
48    }
49}
50
51// TODO: Is there a clean way of avoiding the duplication of all our various
52// algorithm variant enums?
53
54/// Mirrors the [`EnumerateMode`] enum.
55#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
56enum PyEnumerateMode {
57    Extend,
58    GrowErode,
59}
60
61/// Mirrors the [`CanonizeMode`] enum.
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
63enum PyCanonizeMode {
64    Nauty,
65    Faulon,
66    TreeNauty,
67    TreeFaulon,
68}
69
70/// Mirrors the [`ParallelMode`] enum.
71#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
72enum PyParallelMode {
73    None,
74    DepthOne,
75    Always,
76}
77
78/// Mirrors the [`MemoizeMode`] enum.
79#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
80enum PyMemoizeMode {
81    None,
82    FragsIndex,
83    CanonIndex,
84}
85
86/// Mirrors the `kernels::KernelMode` enum.
87#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
88enum PyKernelMode {
89    None,
90    Once,
91    DepthOne,
92    Always,
93}
94
95/// Mirrors the `bounds::Bound` enum.
96#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
97enum PyBound {
98    Log,
99    Int,
100    VecSimple,
101    VecSmallFrags,
102    CoverSort,
103    CoverNoSort,
104    CliqueBudget,
105}
106
107/// Converts bound options in `&str` format to `PyEnumerateMode`.
108impl FromStr for PyEnumerateMode {
109    type Err = PyErr;
110
111    fn from_str(s: &str) -> Result<Self, Self::Err> {
112        match s.to_lowercase().as_str() {
113            "extend" => Ok(PyEnumerateMode::Extend),
114            "grow-erode" => Ok(PyEnumerateMode::GrowErode),
115            _ => Err(PyValueError::new_err(format!(
116                "Invalid enumeration mode \"{s}\", options are: \
117                [\"extend\", \"grow-erode\"]"
118            ))),
119        }
120    }
121}
122
123/// Converts bound options in `&str` format to `PyCanonizeMode`.
124impl FromStr for PyCanonizeMode {
125    type Err = PyErr;
126
127    fn from_str(s: &str) -> Result<Self, Self::Err> {
128        match s.to_lowercase().as_str() {
129            "nauty" => Ok(PyCanonizeMode::Nauty),
130            "faulon" => Ok(PyCanonizeMode::Faulon),
131            "tree-nauty" => Ok(PyCanonizeMode::TreeNauty),
132            "tree-faulon" => Ok(PyCanonizeMode::TreeFaulon),
133            _ => Err(PyValueError::new_err(format!(
134                "Invalid canonization mode \"{s}\", options are: \
135                [\"nauty\", \"faulon\", \"tree-nauty\", \"tree-faulon\"]"
136            ))),
137        }
138    }
139}
140
141/// Converts bound options in `&str` format to `PyParallelMode`.
142impl FromStr for PyParallelMode {
143    type Err = PyErr;
144
145    fn from_str(s: &str) -> Result<Self, Self::Err> {
146        match s.to_lowercase().as_str() {
147            "none" => Ok(PyParallelMode::None),
148            "depth-one" => Ok(PyParallelMode::DepthOne),
149            "always" => Ok(PyParallelMode::Always),
150            _ => Err(PyValueError::new_err(format!(
151                "Invalid parallelization mode \"{s}\", options are: \
152                [\"none\", \"depth-one\", \"always\"]"
153            ))),
154        }
155    }
156}
157
158/// Converts bound options in `&str` format to `PyMemoizeMode`.
159impl FromStr for PyMemoizeMode {
160    type Err = PyErr;
161
162    fn from_str(s: &str) -> Result<Self, Self::Err> {
163        match s.to_lowercase().as_str() {
164            "none" => Ok(PyMemoizeMode::None),
165            "frags-index" => Ok(PyMemoizeMode::FragsIndex),
166            "canon-index" => Ok(PyMemoizeMode::CanonIndex),
167            _ => Err(PyValueError::new_err(format!(
168                "Invalid memoization mode \"{s}\", options are: \
169                [\"none\", \"frags-index\", \"canon-index\"]"
170            ))),
171        }
172    }
173}
174
175/// Converts bound options in `&str` format to `PyKernelMode`.
176impl FromStr for PyKernelMode {
177    type Err = PyErr;
178
179    fn from_str(s: &str) -> Result<Self, Self::Err> {
180        match s.to_lowercase().as_str() {
181            "none" => Ok(PyKernelMode::None),
182            "once" => Ok(PyKernelMode::Once),
183            "depth-one" => Ok(PyKernelMode::DepthOne),
184            "always" => Ok(PyKernelMode::Always),
185            _ => Err(PyValueError::new_err(format!(
186                "Invalid kernelization mode \"{s}\", options are: \
187                [\"none\", \"once\", \"depth-one\", \"always\"]"
188            ))),
189        }
190    }
191}
192
193/// Converts bound options in `&str` format to `PyBound`.
194impl FromStr for PyBound {
195    type Err = PyErr;
196
197    fn from_str(s: &str) -> Result<Self, Self::Err> {
198        match s.to_lowercase().as_str() {
199            "log" => Ok(PyBound::Log),
200            "int" => Ok(PyBound::Int),
201            "vec-simple" => Ok(PyBound::VecSimple),
202            "vec-small-frags" => Ok(PyBound::VecSmallFrags),
203            "cover-sort" => Ok(PyBound::CoverSort),
204            "cover-no-sort" => Ok(PyBound::CoverNoSort),
205            "clique-budget" => Ok(PyBound::CliqueBudget),
206            _ => Err(PyValueError::new_err(format!(
207                "Invalid bound \"{s}\", options are: \
208                [\"log\", \"int\", \"vec-simple\", \"vec-small-frags\", \
209                \"cover-sort\", \"cover-no-sort\", \"clique-budget\"]"
210            ))),
211        }
212    }
213}
214
215/// Converts a `Vec<String>` of bound Python strings into a `Vec<PyBound>`,
216/// raising an error if any bound string is invalid.
217fn process_bound_strs(bound_strs: Vec<String>) -> PyResult<Vec<PyBound>> {
218    bound_strs
219        .iter()
220        .map(|s| s.parse())
221        .collect::<Result<_, _>>()
222}
223
224/// Converts a slice of `PyBound`s into a vector of `bounds::Bound`s.
225fn make_boundlist(pybounds: &[PyBound]) -> Vec<OurBound> {
226    let mut boundlist = pybounds
227        .iter()
228        .flat_map(|b| match b {
229            PyBound::Log => vec![OurBound::Log],
230            PyBound::Int => vec![OurBound::Int],
231            PyBound::VecSimple => vec![OurBound::VecSimple],
232            PyBound::VecSmallFrags => vec![OurBound::VecSmallFrags],
233            PyBound::CoverSort => vec![OurBound::CoverSort],
234            PyBound::CoverNoSort => vec![OurBound::CoverNoSort],
235            PyBound::CliqueBudget => vec![OurBound::CliqueBudget],
236        })
237        .collect::<Vec<_>>();
238    boundlist.dedup();
239    boundlist
240}
241
242/// Get a pretty-printable string of this molecule's graph representation.
243///
244/// Python version of [`crate::molecule::Molecule::info`].
245///
246/// # Python Parameters
247/// - `mol_block`: The contents of a `.mol` file as a `str`.
248///
249/// # Python Returns
250/// - A pretty-printable `str` detailing the molecule's atoms and bonds.
251///
252/// # Python Example
253///
254/// ```custom,{class=language-python}
255/// import assembly_theory as at
256///
257/// # Load a mol block from file.
258/// with open('data/checks/anthracene.mol') as f:
259///     mol_block = f.read()
260///
261/// # Print the molecule's graph structure.
262/// print(at.mol_info(mol_block))
263///
264/// # graph {
265/// #     0 [ label = "Atom { element: Carbon, capacity: 0 }" ]
266/// #     1 [ label = "Atom { element: Carbon, capacity: 0 }" ]
267/// #     2 [ label = "Atom { element: Carbon, capacity: 0 }" ]
268/// #     ...
269/// #     0 -- 1 [ label = "Double" ]
270/// #     1 -- 2 [ label = "Single" ]
271/// #     2 -- 5 [ label = "Double" ]
272/// #     ...
273/// # }
274/// ```
275#[pyfunction(name = "mol_info")]
276pub fn _mol_info(mol_block: &str) -> PyResult<String> {
277    // Parse the .mol file contents as a molecule::Molecule.
278    let mol_result = parse_molfile_str(mol_block);
279    let mol = match mol_result {
280        Ok(mol) => mol,
281        Err(e) => return Err(e.into()), // Convert the error to PyErr
282    };
283
284    // Return molecule info.
285    Ok(mol.info())
286}
287
288/// Compute assembly depth; see
289/// [Pagel et al. (2024)](https://arxiv.org/abs/2409.05993).
290///
291/// Python version of [`depth`].
292///
293/// # Python Parameters
294/// - `mol_block`: The contents of a `.mol` file as a `str`.
295///
296/// # Python Returns
297/// - The molecule's `int` assembly depth.
298///
299/// # Python Example
300///
301/// ```custom,{class=language-python}
302/// import assembly_theory as at
303///
304/// # Load a mol block from file.
305/// with open('data/checks/benzene.mol') as f:
306///     mol_block = f.read()
307///
308/// # Calculate the molecule's assembly index.
309/// at.depth(mol_block)  # 3
310/// ```
311#[pyfunction(name = "depth")]
312pub fn _depth(mol_block: &str) -> PyResult<u32> {
313    // Parse the .mol file contents as a molecule::Molecule.
314    let mol_result = parse_molfile_str(mol_block);
315    let mol = match mol_result {
316        Ok(mol) => mol,
317        Err(e) => return Err(e.into()), // Convert the error to PyErr
318    };
319
320    // Calculate assembly depth.
321    Ok(depth(&mol))
322}
323
324/// Computes a molecule's assembly index using an efficient default strategy.
325///
326/// Python version of [`index`].
327///
328/// # Python Parameters
329/// - `mol_block`: The contents of a `.mol` file as a `str`.
330///
331/// # Python Returns
332/// - The molecule's `int` assembly index.
333///
334/// # Python Example
335///
336/// ```custom,{class=language-python}
337/// import assembly_theory as at
338///
339/// # Load a mol block from file.
340/// with open('data/checks/anthracene.mol') as f:
341///     mol_block = f.read()
342///
343/// # Calculate the molecule's assembly index.
344/// at.index(mol_block)  # 6
345/// ```
346#[pyfunction(name = "index")]
347pub fn _index(mol_block: &str) -> PyResult<u32> {
348    // Parse the .mol file contents as a molecule::Molecule.
349    let mol_result = parse_molfile_str(mol_block);
350    let mol = match mol_result {
351        Ok(mol) => mol,
352        Err(e) => return Err(e.into()), // Convert the error to PyErr
353    };
354
355    // Calculate the assembly index.
356    Ok(index(&mol))
357}
358
359/// Computes a molecule's assembly index and related information using a
360/// top-down recursive search, parameterized by the specified options.
361///
362/// Python version of [`index_search`].
363///
364/// # Python Parameters
365///
366/// - `mol_block`: The contents of a `.mol` file as a `str`.
367/// - `enumerate_str`: An enumeration mode from [`"extend"`, `"grow-erode"`
368/// (default)]. See [`EnumerateMode`] for details.
369/// - `canonize_str`: A canonization mode from [`"nauty"`, `"faulon"`,
370/// `"tree-nauty"` (default), `"tree-faulon"`]. See [`CanonizeMode`] for
371/// details.
372/// - `parallel_str`: A parallelization mode from [`"none"`, `"depth-one"`
373/// (default), `"always"`]. See [`ParallelMode`] for details.
374/// - `memoize_str`: A memoization mode from [`none`, `frags-index`,
375/// `canon-index` (default)]. See [`MemoizeMode`] for details.
376/// - `kernel_str`: A kernelization mode from [`"none"` (default), `"once"`,
377/// `"depth-one"`, `"always"`]. See [`KernelMode`] for details.
378/// - `bound_strs`: A list of bounds containing zero or more of [`"log"`,
379/// `"int"`, `"vec-simple"`, `"vec-small-frags"`, `"cover-sort"`,
380/// `"cover-no-sort"`, `"clique-budget"`]. The default bounds are [`"int"`,
381/// `"vec-simple"`, `"vec-small-frags"`]. See [`crate::bounds::Bound`] for
382/// details.
383///
384/// # Python Returns
385///
386/// A 3-tuple containing:
387/// - The molecule's `int` assembly index.
388/// - The molecule's `int` number of non-overlapping isomorphic subgraph pairs.
389/// - The `int` number of assembly states searched.
390///
391/// # Python Example
392///
393/// ```custom,{class=language-python}
394/// import assembly_theory as at
395///
396/// # Load a mol block from file.
397/// with open('data/checks/anthracene.mol') as f:
398///     mol_block = f.read()
399///
400/// # Calculate the molecule's assembly index using the specified options.
401/// (index, num_matches, states_searched) = at.index_search(
402///     mol_block,
403///     "grow-erode",
404///     "tree-nauty",
405///     "none",
406///     "none",
407///     "none",
408///     ["int", "vec-simple", "vec-small-frags"])
409///
410/// print(f"Assembly Index: {index}")  # 6
411/// print(f"Non-Overlapping Isomorphic Subgraph Pairs: {num_matches}")  # 466
412/// print(f"Assembly States Searched: {states_searched}")  # 2562
413/// ```
414#[pyfunction(name = "index_search")]
415#[pyo3(signature = (mol_block, enumerate_str="grow-erode", canonize_str="tree-nauty", parallel_str="depth-one", memoize_str="canon-index", kernel_str="none", bound_strs=vec!["int".to_string(), "vec-simple".to_string(), "vec-small-frags".to_string()]), text_signature = "(mol_block, enumerate_str=\"grow-erode\", canonize_str=\"tree-nauty\", parallel_str=\"depth-one\", memoize_str=\"canon-index\", kernel_str=\"none\", bound_strs=[\"int\", \"vec-simple\", \"vec-small-frags\"]))")]
416pub fn _index_search(
417    mol_block: &str,
418    enumerate_str: &str,
419    canonize_str: &str,
420    parallel_str: &str,
421    memoize_str: &str,
422    kernel_str: &str,
423    bound_strs: Vec<String>,
424) -> PyResult<(u32, u32, usize)> {
425    // Parse the .mol file contents as a molecule::Molecule.
426    let mol_result = parse_molfile_str(mol_block);
427    let mol = match mol_result {
428        Ok(mol) => mol,
429        Err(e) => return Err(e.into()), // Convert the error to PyErr
430    };
431
432    // Parse the various modes and bound options.
433    let enumerate_mode = match PyEnumerateMode::from_str(enumerate_str) {
434        Ok(PyEnumerateMode::Extend) => EnumerateMode::Extend,
435        Ok(PyEnumerateMode::GrowErode) => EnumerateMode::GrowErode,
436        Err(e) => return Err(e),
437    };
438    let canonize_mode = match PyCanonizeMode::from_str(canonize_str) {
439        Ok(PyCanonizeMode::Nauty) => CanonizeMode::Nauty,
440        Ok(PyCanonizeMode::Faulon) => CanonizeMode::Faulon,
441        Ok(PyCanonizeMode::TreeNauty) => CanonizeMode::TreeNauty,
442        Ok(PyCanonizeMode::TreeFaulon) => CanonizeMode::TreeFaulon,
443        Err(e) => return Err(e),
444    };
445    let parallel_mode = match PyParallelMode::from_str(parallel_str) {
446        Ok(PyParallelMode::None) => ParallelMode::None,
447        Ok(PyParallelMode::DepthOne) => ParallelMode::DepthOne,
448        Ok(PyParallelMode::Always) => ParallelMode::Always,
449        Err(e) => return Err(e),
450    };
451    let memoize_mode = match PyMemoizeMode::from_str(memoize_str) {
452        Ok(PyMemoizeMode::None) => MemoizeMode::None,
453        Ok(PyMemoizeMode::FragsIndex) => MemoizeMode::FragsIndex,
454        Ok(PyMemoizeMode::CanonIndex) => MemoizeMode::CanonIndex,
455        Err(e) => return Err(e),
456    };
457    let kernel_mode = match PyKernelMode::from_str(kernel_str) {
458        Ok(PyKernelMode::None) => KernelMode::None,
459        Ok(PyKernelMode::Once) => KernelMode::Once,
460        Ok(PyKernelMode::DepthOne) => KernelMode::DepthOne,
461        Ok(PyKernelMode::Always) => KernelMode::Always,
462        Err(e) => return Err(e),
463    };
464    let pybounds = process_bound_strs(bound_strs)?;
465    let boundlist = make_boundlist(&pybounds);
466
467    // Compute assembly index.
468    Ok(index_search(
469        &mol,
470        enumerate_mode,
471        canonize_mode,
472        parallel_mode,
473        memoize_mode,
474        kernel_mode,
475        &boundlist,
476    ))
477}
478
479/// A Python wrapper for the assembly_theory Rust crate.
480// Registers the listed functions as a Python module named 'assembly_theory';
481// the above line is used as a docstring.
482#[pymodule(name = "assembly_theory")]
483fn _assembly_theory(m: &Bound<'_, PyModule>) -> PyResult<()> {
484    m.add_function(wrap_pyfunction!(_mol_info, m)?)?;
485    m.add_function(wrap_pyfunction!(_depth, m)?)?;
486    m.add_function(wrap_pyfunction!(_index, m)?)?;
487    m.add_function(wrap_pyfunction!(_index_search, m)?)?;
488    Ok(())
489}