assembly_theory/
python.rs

1//! Expose public `assembly_theory` functionality to a Python package using
2//! [`pyo3`](https://docs.rs/pyo3/latest/pyo3/).
3//!
4//! The package is available [on PyPI](https://pypi.org/project/assembly-theory/);
5//! see that README for installation and usage instructions. To build the
6//! Python package directly from this crate's source code, see the instructions
7//! in the [GitHub README](https://github.com/DaymudeLab/assembly-theory).
8//!
9//! Note that all Rust functions in this module have the form `_fn_name`, which
10//! correspond to the actual Rust function `fn_name` elsewhere in the crate and
11//! are exposed to the Python package as `fn_name`.
12//!
13//! # Python Example
14//!
15//! ```custom,{class=language-python}
16//! import assembly_theory as at
17//!
18//! # Load a mol block from file...
19//! with open('data/checks/anthracene.mol') as f:
20//!     mol_block = f.read()
21//!
22//! # ...or define the mol block directly.
23//! mol_block = """
24//!
25//!
26//!  14 16  0  0  0  0  0  0  0  0999 V2000
27//!    25.2202  -16.2366    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
28//!    25.2202  -17.6385    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
29//!    26.4373  -18.3394    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
30//!    26.4373  -15.5356    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
31//!    27.6471  -16.2366    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
32//!    27.6412  -17.6385    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
33//!    28.8523  -18.3446    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
34//!    28.8644  -15.5409    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
35//!    30.0755  -16.2469    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
36//!    30.1327  -17.6453    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
37//!    31.2674  -18.3552    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
38//!    32.4846  -17.6672    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
39//!    32.4973  -16.2688    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
40//!    31.2927  -15.5589    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
41//!   9  8  2  0     0  0
42//!   8  5  1  0     0  0
43//!   5  4  2  0     0  0
44//!   4  1  1  0     0  0
45//!   1  2  2  0     0  0
46//!   2  3  1  0     0  0
47//!   3  6  2  0     0  0
48//!   9 10  1  0     0  0
49//!  10 11  1  0     0  0
50//!  11 12  2  0     0  0
51//!  12 13  1  0     0  0
52//!  13 14  2  0     0  0
53//!  14  9  1  0     0  0
54//!   5  6  1  0     0  0
55//!   6  7  1  0     0  0
56//!   7 10  2  0     0  0
57//! M  END"""
58//!
59//! # Calculate the molecule's assembly index.
60//! at.index(mol_block)  # 6
61//! ```
62
63use std::str::FromStr;
64
65use pyo3::{
66    exceptions::{PyOSError, PyValueError},
67    prelude::*,
68    PyErr,
69};
70
71use crate::{
72    assembly::{depth, index, index_search, ParallelMode},
73    bounds::Bound as OurBound,
74    canonize::CanonizeMode,
75    kernels::KernelMode,
76    loader::{parse_molfile_str, ParserError},
77    memoize::MemoizeMode,
78};
79
80/// Implement a Python version of [`crate::loader::ParserError`].
81impl From<ParserError> for PyErr {
82    fn from(err: ParserError) -> PyErr {
83        PyOSError::new_err(err.to_string())
84    }
85}
86
87// TODO: Is there a clean way of avoiding the duplication of all our various
88// algorithm variant enums?
89
90/// Mirrors the [`CanonizeMode`] enum.
91#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
92enum PyCanonizeMode {
93    Nauty,
94    Faulon,
95    TreeNauty,
96    TreeFaulon,
97}
98
99/// Mirrors the [`ParallelMode`] enum.
100#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
101enum PyParallelMode {
102    None,
103    DepthOne,
104    Always,
105}
106
107/// Mirrors the [`MemoizeMode`] enum.
108#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
109enum PyMemoizeMode {
110    None,
111    CanonIndex,
112}
113
114/// Mirrors the `kernels::KernelMode` enum.
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
116enum PyKernelMode {
117    None,
118    Once,
119    DepthOne,
120    Always,
121}
122
123/// Mirrors the `bounds::Bound` enum.
124#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
125enum PyBound {
126    Log,
127    Int,
128    VecSimple,
129    VecSmallFrags,
130    MatchableEdges,
131}
132
133/// Converts bound options in `&str` format to `PyCanonizeMode`.
134impl FromStr for PyCanonizeMode {
135    type Err = PyErr;
136
137    fn from_str(s: &str) -> Result<Self, Self::Err> {
138        match s.to_lowercase().as_str() {
139            "nauty" => Ok(PyCanonizeMode::Nauty),
140            "faulon" => Ok(PyCanonizeMode::Faulon),
141            "tree-nauty" => Ok(PyCanonizeMode::TreeNauty),
142            "tree-faulon" => Ok(PyCanonizeMode::TreeFaulon),
143            _ => Err(PyValueError::new_err(format!(
144                "Invalid canonization mode \"{s}\", options are: \
145                [\"nauty\", \"faulon\", \"tree-nauty\", \"tree-faulon\"]"
146            ))),
147        }
148    }
149}
150
151/// Converts bound options in `&str` format to `PyParallelMode`.
152impl FromStr for PyParallelMode {
153    type Err = PyErr;
154
155    fn from_str(s: &str) -> Result<Self, Self::Err> {
156        match s.to_lowercase().as_str() {
157            "none" => Ok(PyParallelMode::None),
158            "depth-one" => Ok(PyParallelMode::DepthOne),
159            "always" => Ok(PyParallelMode::Always),
160            _ => Err(PyValueError::new_err(format!(
161                "Invalid parallelization mode \"{s}\", options are: \
162                [\"none\", \"depth-one\", \"always\"]"
163            ))),
164        }
165    }
166}
167
168/// Converts bound options in `&str` format to `PyMemoizeMode`.
169impl FromStr for PyMemoizeMode {
170    type Err = PyErr;
171
172    fn from_str(s: &str) -> Result<Self, Self::Err> {
173        match s.to_lowercase().as_str() {
174            "none" => Ok(PyMemoizeMode::None),
175            "canon-index" => Ok(PyMemoizeMode::CanonIndex),
176            _ => Err(PyValueError::new_err(format!(
177                "Invalid memoization mode \"{s}\", options are: \
178                [\"none\", \"frags-index\", \"canon-index\"]"
179            ))),
180        }
181    }
182}
183
184/// Converts bound options in `&str` format to `PyKernelMode`.
185impl FromStr for PyKernelMode {
186    type Err = PyErr;
187
188    fn from_str(s: &str) -> Result<Self, Self::Err> {
189        match s.to_lowercase().as_str() {
190            "none" => Ok(PyKernelMode::None),
191            "once" => Ok(PyKernelMode::Once),
192            "depth-one" => Ok(PyKernelMode::DepthOne),
193            "always" => Ok(PyKernelMode::Always),
194            _ => Err(PyValueError::new_err(format!(
195                "Invalid kernelization mode \"{s}\", options are: \
196                [\"none\", \"once\", \"depth-one\", \"always\"]"
197            ))),
198        }
199    }
200}
201
202/// Converts bound options in `&str` format to `PyBound`.
203impl FromStr for PyBound {
204    type Err = PyErr;
205
206    fn from_str(s: &str) -> Result<Self, Self::Err> {
207        match s.to_lowercase().as_str() {
208            "log" => Ok(PyBound::Log),
209            "int" => Ok(PyBound::Int),
210            "vec-simple" => Ok(PyBound::VecSimple),
211            "vec-small-frags" => Ok(PyBound::VecSmallFrags),
212            "matchable-edges" => Ok(PyBound::MatchableEdges),
213            _ => Err(PyValueError::new_err(format!(
214                "Invalid bound \"{s}\", options are: \
215                [\"log\", \"int\", \"vec-simple\", \"vec-small-frags\", \
216                \"matchable-edges\"]"
217            ))),
218        }
219    }
220}
221
222/// Converts a `Vec<String>` of bound Python strings into a `Vec<PyBound>`,
223/// raising an error if any bound string is invalid.
224fn process_bound_strs(bound_strs: Vec<String>) -> PyResult<Vec<PyBound>> {
225    bound_strs
226        .iter()
227        .map(|s| s.parse())
228        .collect::<Result<_, _>>()
229}
230
231/// Converts a slice of `PyBound`s into a vector of `bounds::Bound`s.
232fn make_boundlist(pybounds: &[PyBound]) -> Vec<OurBound> {
233    let mut boundlist = pybounds
234        .iter()
235        .flat_map(|b| match b {
236            PyBound::Log => vec![OurBound::Log],
237            PyBound::Int => vec![OurBound::Int],
238            PyBound::VecSimple => vec![OurBound::VecSimple],
239            PyBound::VecSmallFrags => vec![OurBound::VecSmallFrags],
240            PyBound::MatchableEdges => vec![OurBound::MatchableEdges],
241        })
242        .collect::<Vec<_>>();
243    boundlist.dedup();
244    boundlist
245}
246
247/// Get a pretty-printable string of this molecule's graph representation.
248///
249/// Python version of [`crate::molecule::Molecule::info`].
250///
251/// # Python Parameters
252/// - `mol_block`: The contents of a `.mol` file as a `str`.
253///
254/// # Python Returns
255/// - A pretty-printable `str` detailing the molecule's atoms and bonds.
256///
257/// # Python Example
258///
259/// ```custom,{class=language-python}
260/// import assembly_theory as at
261///
262/// # Load a mol block from file.
263/// with open('data/checks/anthracene.mol') as f:
264///     mol_block = f.read()
265///
266/// # Print the molecule's graph structure.
267/// print(at.mol_info(mol_block))
268///
269/// # graph {
270/// #     0 [ label = "Atom { element: Carbon, capacity: 0 }" ]
271/// #     1 [ label = "Atom { element: Carbon, capacity: 0 }" ]
272/// #     2 [ label = "Atom { element: Carbon, capacity: 0 }" ]
273/// #     ...
274/// #     0 -- 1 [ label = "Double" ]
275/// #     1 -- 2 [ label = "Single" ]
276/// #     2 -- 5 [ label = "Double" ]
277/// #     ...
278/// # }
279/// ```
280#[pyfunction(name = "mol_info")]
281pub fn _mol_info(mol_block: &str) -> PyResult<String> {
282    // Parse the .mol file contents as a molecule::Molecule.
283    let mol_result = parse_molfile_str(mol_block);
284    let mol = match mol_result {
285        Ok(mol) => mol,
286        Err(e) => return Err(e.into()), // Convert the error to PyErr
287    };
288
289    // Return molecule info.
290    Ok(mol.info())
291}
292
293/// Compute assembly depth; see
294/// [Pagel et al. (2024)](https://arxiv.org/abs/2409.05993).
295///
296/// Python version of [`depth`].
297///
298/// # Python Parameters
299/// - `mol_block`: The contents of a `.mol` file as a `str`.
300///
301/// # Python Returns
302/// - The molecule's `int` assembly depth.
303///
304/// # Python Example
305///
306/// ```custom,{class=language-python}
307/// import assembly_theory as at
308///
309/// # Load a mol block from file.
310/// with open('data/checks/benzene.mol') as f:
311///     mol_block = f.read()
312///
313/// # Calculate the molecule's assembly index.
314/// at.depth(mol_block)  # 3
315/// ```
316#[pyfunction(name = "depth")]
317pub fn _depth(mol_block: &str) -> PyResult<u32> {
318    // Parse the .mol file contents as a molecule::Molecule.
319    let mol_result = parse_molfile_str(mol_block);
320    let mol = match mol_result {
321        Ok(mol) => mol,
322        Err(e) => return Err(e.into()), // Convert the error to PyErr
323    };
324
325    // Calculate assembly depth.
326    Ok(depth(&mol))
327}
328
329/// Computes a molecule's assembly index using an efficient default strategy.
330///
331/// Python version of [`index`].
332///
333/// # Python Parameters
334/// - `mol_block`: The contents of a `.mol` file as a `str`.
335///
336/// # Python Returns
337/// - The molecule's `int` assembly index.
338///
339/// # Python Example
340///
341/// ```custom,{class=language-python}
342/// import assembly_theory as at
343///
344/// # Load a mol block from file.
345/// with open('data/checks/anthracene.mol') as f:
346///     mol_block = f.read()
347///
348/// # Calculate the molecule's assembly index.
349/// at.index(mol_block)  # 6
350/// ```
351#[pyfunction(name = "index")]
352pub fn _index(mol_block: &str) -> PyResult<u32> {
353    // Parse the .mol file contents as a molecule::Molecule.
354    let mol_result = parse_molfile_str(mol_block);
355    let mol = match mol_result {
356        Ok(mol) => mol,
357        Err(e) => return Err(e.into()), // Convert the error to PyErr
358    };
359
360    // Calculate the assembly index.
361    Ok(index(&mol))
362}
363
364/// Computes a molecule's assembly index and related information using a
365/// top-down recursive search, parameterized by the specified options.
366///
367/// Python version of [`index_search`].
368///
369/// # Python Parameters
370///
371/// - `mol_block`: The contents of a `.mol` file as a `str`.
372/// - `canonize_str`: A canonization mode from [`"nauty"`, `"faulon"`,
373/// `"tree-nauty"` (default), `"tree-faulon"`]. See [`CanonizeMode`] for
374/// details.
375/// - `parallel_str`: A parallelization mode from [`"none"`, `"depth-one"`
376/// (default), `"always"`]. See [`ParallelMode`] for details.
377/// - `memoize_str`: A memoization mode from [`none`, `frags-index`,
378/// `canon-index` (default)]. See [`MemoizeMode`] for details.
379/// - `kernel_str`: A kernelization mode from [`"none"` (default), `"once"`,
380/// `"depth-one"`, `"always"`]. See [`KernelMode`] for details.
381/// - `bound_strs`: A list of bounds containing zero or more of [`"log"`,
382/// `"int"`, `"vec-simple"`, `"vec-small-frags"`, `"matchable-edges"`].
383/// The default bounds are [`"int"`, `"matchable-edges"`]. See
384/// [`crate::bounds::Bound`] for details.
385///
386/// # Python Returns
387///
388/// A 3-tuple containing:
389/// - The molecule's `int` assembly index.
390/// - The molecule's `int` number of edge-disjoint isomorphic subgraph pairs.
391/// - The `int` number of assembly states searched.
392///
393/// # Python Example
394///
395/// ```custom,{class=language-python}
396/// import assembly_theory as at
397///
398/// # Load a mol block from file.
399/// with open('data/checks/anthracene.mol') as f:
400///     mol_block = f.read()
401///
402/// # Calculate the molecule's assembly index using the specified options.
403/// (index, num_matches, states_searched) = at.index_search(
404///     mol_block,
405///     "tree-nauty",
406///     "none",
407///     "none",
408///     "none",
409///     ["int", "matchable-edges"])
410///
411/// print(f"Assembly Index: {index}")  # 6
412/// print(f"Edge-Disjoint Isomorphic Subgraph Pairs: {num_matches}")  # 466
413/// print(f"Assembly States Searched: {states_searched}")  # 491
414/// ```
415#[pyfunction(name = "index_search")]
416#[pyo3(signature = (mol_block, canonize_str="tree-nauty", parallel_str="depth-one", memoize_str="canon-index", kernel_str="none", bound_strs=vec!["int".to_string(), "matchable-edges".to_string()]), text_signature = "(mol_block, canonize_str=\"tree-nauty\", parallel_str=\"depth-one\", memoize_str=\"canon-index\", kernel_str=\"none\", bound_strs=[\"int\", \"matchable-edges\"]))")]
417pub fn _index_search(
418    mol_block: &str,
419    canonize_str: &str,
420    parallel_str: &str,
421    memoize_str: &str,
422    kernel_str: &str,
423    bound_strs: Vec<String>,
424) -> PyResult<(u32, u32, usize)> {
425    // Parse the .mol file contents as a molecule::Molecule.
426    let mol_result = parse_molfile_str(mol_block);
427    let mol = match mol_result {
428        Ok(mol) => mol,
429        Err(e) => return Err(e.into()), // Convert the error to PyErr
430    };
431
432    // Parse the various modes and bound options.
433    let canonize_mode = match PyCanonizeMode::from_str(canonize_str) {
434        Ok(PyCanonizeMode::Nauty) => CanonizeMode::Nauty,
435        Ok(PyCanonizeMode::Faulon) => CanonizeMode::Faulon,
436        Ok(PyCanonizeMode::TreeNauty) => CanonizeMode::TreeNauty,
437        Ok(PyCanonizeMode::TreeFaulon) => CanonizeMode::TreeFaulon,
438        Err(e) => return Err(e),
439    };
440    let parallel_mode = match PyParallelMode::from_str(parallel_str) {
441        Ok(PyParallelMode::None) => ParallelMode::None,
442        Ok(PyParallelMode::DepthOne) => ParallelMode::DepthOne,
443        Ok(PyParallelMode::Always) => ParallelMode::Always,
444        Err(e) => return Err(e),
445    };
446    let memoize_mode = match PyMemoizeMode::from_str(memoize_str) {
447        Ok(PyMemoizeMode::None) => MemoizeMode::None,
448        Ok(PyMemoizeMode::CanonIndex) => MemoizeMode::CanonIndex,
449        Err(e) => return Err(e),
450    };
451    let kernel_mode = match PyKernelMode::from_str(kernel_str) {
452        Ok(PyKernelMode::None) => KernelMode::None,
453        Ok(PyKernelMode::Once) => KernelMode::Once,
454        Ok(PyKernelMode::DepthOne) => KernelMode::DepthOne,
455        Ok(PyKernelMode::Always) => KernelMode::Always,
456        Err(e) => return Err(e),
457    };
458    let pybounds = process_bound_strs(bound_strs)?;
459    let boundlist = make_boundlist(&pybounds);
460
461    // Compute assembly index.
462    Ok(index_search(
463        &mol,
464        canonize_mode,
465        parallel_mode,
466        memoize_mode,
467        kernel_mode,
468        &boundlist,
469    ))
470}
471
472/// A Python wrapper for the assembly_theory Rust crate.
473// Registers the listed functions as a Python module named 'assembly_theory';
474// the above line is used as a docstring.
475#[pymodule(name = "assembly_theory")]
476fn _assembly_theory(m: &Bound<'_, PyModule>) -> PyResult<()> {
477    m.add_function(wrap_pyfunction!(_mol_info, m)?)?;
478    m.add_function(wrap_pyfunction!(_depth, m)?)?;
479    m.add_function(wrap_pyfunction!(_index, m)?)?;
480    m.add_function(wrap_pyfunction!(_index_search, m)?)?;
481    Ok(())
482}