assembly_theory/python.rs
1//! Expose public `assembly_theory` functionality to a Python package using
2//! [`pyo3`](https://docs.rs/pyo3/latest/pyo3/).
3//!
4//! The package is available [on PyPI](https://pypi.org/project/assembly-theory/);
5//! see that README for installation and usage instructions. To build the
6//! Python package directly from this crate's source code, see the instructions
7//! in the [GitHub README](https://github.com/DaymudeLab/assembly-theory).
8//!
9//! Note that all Rust functions in this module have the form `_fn_name`, which
10//! correspond to the actual Rust function `fn_name` elsewhere in the crate and
11//! are exposed to the Python package as `fn_name`.
12//!
13//! # Python Example
14//!
15//! ```custom,{class=language-python}
16//! import assembly_theory as at
17//!
18//! # Load a mol block from file.
19//! with open('data/checks/anthracene.mol') as f:
20//! mol_block = f.read()
21//!
22//! # Calculate the molecule's assembly index.
23//! at.index(mol_block) # 6
24//! ```
25
26use std::str::FromStr;
27
28use pyo3::{
29 exceptions::{PyOSError, PyValueError},
30 prelude::*,
31 PyErr,
32};
33
34use crate::{
35 assembly::{depth, index, index_search, ParallelMode},
36 bounds::Bound as OurBound,
37 canonize::CanonizeMode,
38 enumerate::EnumerateMode,
39 kernels::KernelMode,
40 loader::{parse_molfile_str, ParserError},
41 memoize::MemoizeMode,
42};
43
44/// Implement a Python version of [`crate::loader::ParserError`].
45impl From<ParserError> for PyErr {
46 fn from(err: ParserError) -> PyErr {
47 PyOSError::new_err(err.to_string())
48 }
49}
50
51// TODO: Is there a clean way of avoiding the duplication of all our various
52// algorithm variant enums?
53
54/// Mirrors the [`EnumerateMode`] enum.
55#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
56enum PyEnumerateMode {
57 Extend,
58 GrowErode,
59}
60
61/// Mirrors the [`CanonizeMode`] enum.
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
63enum PyCanonizeMode {
64 Nauty,
65 Faulon,
66 TreeNauty,
67 TreeFaulon,
68}
69
70/// Mirrors the [`ParallelMode`] enum.
71#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
72enum PyParallelMode {
73 None,
74 DepthOne,
75 Always,
76}
77
78/// Mirrors the [`MemoizeMode`] enum.
79#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
80enum PyMemoizeMode {
81 None,
82 FragsIndex,
83 CanonIndex,
84}
85
86/// Mirrors the `kernels::KernelMode` enum.
87#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
88enum PyKernelMode {
89 None,
90 Once,
91 DepthOne,
92 Always,
93}
94
95/// Mirrors the `bounds::Bound` enum.
96#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
97enum PyBound {
98 Log,
99 Int,
100 VecSimple,
101 VecSmallFrags,
102 CoverSort,
103 CoverNoSort,
104 CliqueBudget,
105}
106
107/// Converts bound options in `&str` format to `PyEnumerateMode`.
108impl FromStr for PyEnumerateMode {
109 type Err = PyErr;
110
111 fn from_str(s: &str) -> Result<Self, Self::Err> {
112 match s.to_lowercase().as_str() {
113 "extend" => Ok(PyEnumerateMode::Extend),
114 "grow-erode" => Ok(PyEnumerateMode::GrowErode),
115 _ => Err(PyValueError::new_err(format!(
116 "Invalid enumeration mode \"{s}\", options are: \
117 [\"extend\", \"grow-erode\"]"
118 ))),
119 }
120 }
121}
122
123/// Converts bound options in `&str` format to `PyCanonizeMode`.
124impl FromStr for PyCanonizeMode {
125 type Err = PyErr;
126
127 fn from_str(s: &str) -> Result<Self, Self::Err> {
128 match s.to_lowercase().as_str() {
129 "nauty" => Ok(PyCanonizeMode::Nauty),
130 "faulon" => Ok(PyCanonizeMode::Faulon),
131 "tree-nauty" => Ok(PyCanonizeMode::TreeNauty),
132 "tree-faulon" => Ok(PyCanonizeMode::TreeFaulon),
133 _ => Err(PyValueError::new_err(format!(
134 "Invalid canonization mode \"{s}\", options are: \
135 [\"nauty\", \"faulon\", \"tree-nauty\", \"tree-faulon\"]"
136 ))),
137 }
138 }
139}
140
141/// Converts bound options in `&str` format to `PyParallelMode`.
142impl FromStr for PyParallelMode {
143 type Err = PyErr;
144
145 fn from_str(s: &str) -> Result<Self, Self::Err> {
146 match s.to_lowercase().as_str() {
147 "none" => Ok(PyParallelMode::None),
148 "depth-one" => Ok(PyParallelMode::DepthOne),
149 "always" => Ok(PyParallelMode::Always),
150 _ => Err(PyValueError::new_err(format!(
151 "Invalid parallelization mode \"{s}\", options are: \
152 [\"none\", \"depth-one\", \"always\"]"
153 ))),
154 }
155 }
156}
157
158/// Converts bound options in `&str` format to `PyMemoizeMode`.
159impl FromStr for PyMemoizeMode {
160 type Err = PyErr;
161
162 fn from_str(s: &str) -> Result<Self, Self::Err> {
163 match s.to_lowercase().as_str() {
164 "none" => Ok(PyMemoizeMode::None),
165 "frags-index" => Ok(PyMemoizeMode::FragsIndex),
166 "canon-index" => Ok(PyMemoizeMode::CanonIndex),
167 _ => Err(PyValueError::new_err(format!(
168 "Invalid memoization mode \"{s}\", options are: \
169 [\"none\", \"frags-index\", \"canon-index\"]"
170 ))),
171 }
172 }
173}
174
175/// Converts bound options in `&str` format to `PyKernelMode`.
176impl FromStr for PyKernelMode {
177 type Err = PyErr;
178
179 fn from_str(s: &str) -> Result<Self, Self::Err> {
180 match s.to_lowercase().as_str() {
181 "none" => Ok(PyKernelMode::None),
182 "once" => Ok(PyKernelMode::Once),
183 "depth-one" => Ok(PyKernelMode::DepthOne),
184 "always" => Ok(PyKernelMode::Always),
185 _ => Err(PyValueError::new_err(format!(
186 "Invalid kernelization mode \"{s}\", options are: \
187 [\"none\", \"once\", \"depth-one\", \"always\"]"
188 ))),
189 }
190 }
191}
192
193/// Converts bound options in `&str` format to `PyBound`.
194impl FromStr for PyBound {
195 type Err = PyErr;
196
197 fn from_str(s: &str) -> Result<Self, Self::Err> {
198 match s.to_lowercase().as_str() {
199 "log" => Ok(PyBound::Log),
200 "int" => Ok(PyBound::Int),
201 "vec-simple" => Ok(PyBound::VecSimple),
202 "vec-small-frags" => Ok(PyBound::VecSmallFrags),
203 "cover-sort" => Ok(PyBound::CoverSort),
204 "cover-no-sort" => Ok(PyBound::CoverNoSort),
205 "clique-budget" => Ok(PyBound::CliqueBudget),
206 _ => Err(PyValueError::new_err(format!(
207 "Invalid bound \"{s}\", options are: \
208 [\"log\", \"int\", \"vec-simple\", \"vec-small-frags\", \
209 \"cover-sort\", \"cover-no-sort\", \"clique-budget\"]"
210 ))),
211 }
212 }
213}
214
215/// Converts a `Vec<String>` of bound Python strings into a `Vec<PyBound>`,
216/// raising an error if any bound string is invalid.
217fn process_bound_strs(bound_strs: Vec<String>) -> PyResult<Vec<PyBound>> {
218 bound_strs
219 .iter()
220 .map(|s| s.parse())
221 .collect::<Result<_, _>>()
222}
223
224/// Converts a slice of `PyBound`s into a vector of `bounds::Bound`s.
225fn make_boundlist(pybounds: &[PyBound]) -> Vec<OurBound> {
226 let mut boundlist = pybounds
227 .iter()
228 .flat_map(|b| match b {
229 PyBound::Log => vec![OurBound::Log],
230 PyBound::Int => vec![OurBound::Int],
231 PyBound::VecSimple => vec![OurBound::VecSimple],
232 PyBound::VecSmallFrags => vec![OurBound::VecSmallFrags],
233 PyBound::CoverSort => vec![OurBound::CoverSort],
234 PyBound::CoverNoSort => vec![OurBound::CoverNoSort],
235 PyBound::CliqueBudget => vec![OurBound::CliqueBudget],
236 })
237 .collect::<Vec<_>>();
238 boundlist.dedup();
239 boundlist
240}
241
242/// Get a pretty-printable string of this molecule's graph representation.
243///
244/// Python version of [`crate::molecule::Molecule::info`].
245///
246/// # Python Parameters
247/// - `mol_block`: The contents of a `.mol` file as a `str`.
248///
249/// # Python Returns
250/// - A pretty-printable `str` detailing the molecule's atoms and bonds.
251///
252/// # Python Example
253///
254/// ```custom,{class=language-python}
255/// import assembly_theory as at
256///
257/// # Load a mol block from file.
258/// with open('data/checks/anthracene.mol') as f:
259/// mol_block = f.read()
260///
261/// # Print the molecule's graph structure.
262/// print(at.mol_info(mol_block))
263///
264/// # graph {
265/// # 0 [ label = "Atom { element: Carbon, capacity: 0 }" ]
266/// # 1 [ label = "Atom { element: Carbon, capacity: 0 }" ]
267/// # 2 [ label = "Atom { element: Carbon, capacity: 0 }" ]
268/// # ...
269/// # 0 -- 1 [ label = "Double" ]
270/// # 1 -- 2 [ label = "Single" ]
271/// # 2 -- 5 [ label = "Double" ]
272/// # ...
273/// # }
274/// ```
275#[pyfunction(name = "mol_info")]
276pub fn _mol_info(mol_block: &str) -> PyResult<String> {
277 // Parse the .mol file contents as a molecule::Molecule.
278 let mol_result = parse_molfile_str(mol_block);
279 let mol = match mol_result {
280 Ok(mol) => mol,
281 Err(e) => return Err(e.into()), // Convert the error to PyErr
282 };
283
284 // Return molecule info.
285 Ok(mol.info())
286}
287
288/// Compute assembly depth; see
289/// [Pagel et al. (2024)](https://arxiv.org/abs/2409.05993).
290///
291/// Python version of [`depth`].
292///
293/// # Python Parameters
294/// - `mol_block`: The contents of a `.mol` file as a `str`.
295///
296/// # Python Returns
297/// - The molecule's `int` assembly depth.
298///
299/// # Python Example
300///
301/// ```custom,{class=language-python}
302/// import assembly_theory as at
303///
304/// # Load a mol block from file.
305/// with open('data/checks/benzene.mol') as f:
306/// mol_block = f.read()
307///
308/// # Calculate the molecule's assembly index.
309/// at.depth(mol_block) # 3
310/// ```
311#[pyfunction(name = "depth")]
312pub fn _depth(mol_block: &str) -> PyResult<u32> {
313 // Parse the .mol file contents as a molecule::Molecule.
314 let mol_result = parse_molfile_str(mol_block);
315 let mol = match mol_result {
316 Ok(mol) => mol,
317 Err(e) => return Err(e.into()), // Convert the error to PyErr
318 };
319
320 // Calculate assembly depth.
321 Ok(depth(&mol))
322}
323
324/// Computes a molecule's assembly index using an efficient default strategy.
325///
326/// Python version of [`index`].
327///
328/// # Python Parameters
329/// - `mol_block`: The contents of a `.mol` file as a `str`.
330///
331/// # Python Returns
332/// - The molecule's `int` assembly index.
333///
334/// # Python Example
335///
336/// ```custom,{class=language-python}
337/// import assembly_theory as at
338///
339/// # Load a mol block from file.
340/// with open('data/checks/anthracene.mol') as f:
341/// mol_block = f.read()
342///
343/// # Calculate the molecule's assembly index.
344/// at.index(mol_block) # 6
345/// ```
346#[pyfunction(name = "index")]
347pub fn _index(mol_block: &str) -> PyResult<u32> {
348 // Parse the .mol file contents as a molecule::Molecule.
349 let mol_result = parse_molfile_str(mol_block);
350 let mol = match mol_result {
351 Ok(mol) => mol,
352 Err(e) => return Err(e.into()), // Convert the error to PyErr
353 };
354
355 // Calculate the assembly index.
356 Ok(index(&mol))
357}
358
359/// Computes a molecule's assembly index and related information using a
360/// top-down recursive search, parameterized by the specified options.
361///
362/// Python version of [`index_search`].
363///
364/// # Python Parameters
365///
366/// - `mol_block`: The contents of a `.mol` file as a `str`.
367/// - `enumerate_str`: An enumeration mode from [`"extend"`, `"grow-erode"`
368/// (default)]. See [`EnumerateMode`] for details.
369/// - `canonize_str`: A canonization mode from [`"nauty"`, `"faulon"`,
370/// `"tree-nauty"` (default), `"tree-faulon"`]. See [`CanonizeMode`] for
371/// details.
372/// - `parallel_str`: A parallelization mode from [`"none"`, `"depth-one"`
373/// (default), `"always"`]. See [`ParallelMode`] for details.
374/// - `memoize_str`: A memoization mode from [`none`, `frags-index`,
375/// `canon-index` (default)]. See [`MemoizeMode`] for details.
376/// - `kernel_str`: A kernelization mode from [`"none"` (default), `"once"`,
377/// `"depth-one"`, `"always"`]. See [`KernelMode`] for details.
378/// - `bound_strs`: A list of bounds containing zero or more of [`"log"`,
379/// `"int"`, `"vec-simple"`, `"vec-small-frags"`, `"cover-sort"`,
380/// `"cover-no-sort"`, `"clique-budget"`]. The default bounds are [`"int"`,
381/// `"vec-simple"`, `"vec-small-frags"`]. See [`crate::bounds::Bound`] for
382/// details.
383///
384/// # Python Returns
385///
386/// A 3-tuple containing:
387/// - The molecule's `int` assembly index.
388/// - The molecule's `int` number of non-overlapping isomorphic subgraph pairs.
389/// - The `int` number of assembly states searched.
390///
391/// # Python Example
392///
393/// ```custom,{class=language-python}
394/// import assembly_theory as at
395///
396/// # Load a mol block from file.
397/// with open('data/checks/anthracene.mol') as f:
398/// mol_block = f.read()
399///
400/// # Calculate the molecule's assembly index using the specified options.
401/// (index, num_matches, states_searched) = at.index_search(
402/// mol_block,
403/// "grow-erode",
404/// "tree-nauty",
405/// "none",
406/// "none",
407/// "none",
408/// ["int", "vec-simple", "vec-small-frags"])
409///
410/// print(f"Assembly Index: {index}") # 6
411/// print(f"Non-Overlapping Isomorphic Subgraph Pairs: {num_matches}") # 466
412/// print(f"Assembly States Searched: {states_searched}") # 2562
413/// ```
414#[pyfunction(name = "index_search")]
415#[pyo3(signature = (mol_block, enumerate_str="grow-erode", canonize_str="tree-nauty", parallel_str="depth-one", memoize_str="canon-index", kernel_str="none", bound_strs=vec!["int".to_string(), "vec-simple".to_string(), "vec-small-frags".to_string()]), text_signature = "(mol_block, enumerate_str=\"grow-erode\", canonize_str=\"tree-nauty\", parallel_str=\"depth-one\", memoize_str=\"canon-index\", kernel_str=\"none\", bound_strs=[\"int\", \"vec-simple\", \"vec-small-frags\"]))")]
416pub fn _index_search(
417 mol_block: &str,
418 enumerate_str: &str,
419 canonize_str: &str,
420 parallel_str: &str,
421 memoize_str: &str,
422 kernel_str: &str,
423 bound_strs: Vec<String>,
424) -> PyResult<(u32, u32, usize)> {
425 // Parse the .mol file contents as a molecule::Molecule.
426 let mol_result = parse_molfile_str(mol_block);
427 let mol = match mol_result {
428 Ok(mol) => mol,
429 Err(e) => return Err(e.into()), // Convert the error to PyErr
430 };
431
432 // Parse the various modes and bound options.
433 let enumerate_mode = match PyEnumerateMode::from_str(enumerate_str) {
434 Ok(PyEnumerateMode::Extend) => EnumerateMode::Extend,
435 Ok(PyEnumerateMode::GrowErode) => EnumerateMode::GrowErode,
436 Err(e) => return Err(e),
437 };
438 let canonize_mode = match PyCanonizeMode::from_str(canonize_str) {
439 Ok(PyCanonizeMode::Nauty) => CanonizeMode::Nauty,
440 Ok(PyCanonizeMode::Faulon) => CanonizeMode::Faulon,
441 Ok(PyCanonizeMode::TreeNauty) => CanonizeMode::TreeNauty,
442 Ok(PyCanonizeMode::TreeFaulon) => CanonizeMode::TreeFaulon,
443 Err(e) => return Err(e),
444 };
445 let parallel_mode = match PyParallelMode::from_str(parallel_str) {
446 Ok(PyParallelMode::None) => ParallelMode::None,
447 Ok(PyParallelMode::DepthOne) => ParallelMode::DepthOne,
448 Ok(PyParallelMode::Always) => ParallelMode::Always,
449 Err(e) => return Err(e),
450 };
451 let memoize_mode = match PyMemoizeMode::from_str(memoize_str) {
452 Ok(PyMemoizeMode::None) => MemoizeMode::None,
453 Ok(PyMemoizeMode::FragsIndex) => MemoizeMode::FragsIndex,
454 Ok(PyMemoizeMode::CanonIndex) => MemoizeMode::CanonIndex,
455 Err(e) => return Err(e),
456 };
457 let kernel_mode = match PyKernelMode::from_str(kernel_str) {
458 Ok(PyKernelMode::None) => KernelMode::None,
459 Ok(PyKernelMode::Once) => KernelMode::Once,
460 Ok(PyKernelMode::DepthOne) => KernelMode::DepthOne,
461 Ok(PyKernelMode::Always) => KernelMode::Always,
462 Err(e) => return Err(e),
463 };
464 let pybounds = process_bound_strs(bound_strs)?;
465 let boundlist = make_boundlist(&pybounds);
466
467 // Compute assembly index.
468 Ok(index_search(
469 &mol,
470 enumerate_mode,
471 canonize_mode,
472 parallel_mode,
473 memoize_mode,
474 kernel_mode,
475 &boundlist,
476 ))
477}
478
479/// A Python wrapper for the assembly_theory Rust crate.
480// Registers the listed functions as a Python module named 'assembly_theory';
481// the above line is used as a docstring.
482#[pymodule(name = "assembly_theory")]
483fn _assembly_theory(m: &Bound<'_, PyModule>) -> PyResult<()> {
484 m.add_function(wrap_pyfunction!(_mol_info, m)?)?;
485 m.add_function(wrap_pyfunction!(_depth, m)?)?;
486 m.add_function(wrap_pyfunction!(_index, m)?)?;
487 m.add_function(wrap_pyfunction!(_index_search, m)?)?;
488 Ok(())
489}