assembly_theory/python.rs
1//! Expose public `assembly_theory` functionality to a Python package using
2//! [`pyo3`](https://docs.rs/pyo3/latest/pyo3/).
3//!
4//! The package is available [on PyPI](https://pypi.org/project/assembly-theory/);
5//! see that README for installation and usage instructions. To build the
6//! Python package directly from this crate's source code, see the instructions
7//! in the [GitHub README](https://github.com/DaymudeLab/assembly-theory).
8//!
9//! Note that all Rust functions in this module have the form `_fn_name`, which
10//! correspond to the actual Rust function `fn_name` elsewhere in the crate and
11//! are exposed to the Python package as `fn_name`.
12//!
13//! # Python Example
14//!
15//! ```custom,{class=language-python}
16//! import assembly_theory as at
17//!
18//! # Load a mol block from file...
19//! with open('data/checks/anthracene.mol') as f:
20//! mol_block = f.read()
21//!
22//! # ...or define the mol block directly.
23//! mol_block = """
24//!
25//!
26//! 14 16 0 0 0 0 0 0 0 0999 V2000
27//! 25.2202 -16.2366 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
28//! 25.2202 -17.6385 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
29//! 26.4373 -18.3394 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
30//! 26.4373 -15.5356 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
31//! 27.6471 -16.2366 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
32//! 27.6412 -17.6385 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
33//! 28.8523 -18.3446 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
34//! 28.8644 -15.5409 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
35//! 30.0755 -16.2469 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
36//! 30.1327 -17.6453 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
37//! 31.2674 -18.3552 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
38//! 32.4846 -17.6672 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
39//! 32.4973 -16.2688 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
40//! 31.2927 -15.5589 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
41//! 9 8 2 0 0 0
42//! 8 5 1 0 0 0
43//! 5 4 2 0 0 0
44//! 4 1 1 0 0 0
45//! 1 2 2 0 0 0
46//! 2 3 1 0 0 0
47//! 3 6 2 0 0 0
48//! 9 10 1 0 0 0
49//! 10 11 1 0 0 0
50//! 11 12 2 0 0 0
51//! 12 13 1 0 0 0
52//! 13 14 2 0 0 0
53//! 14 9 1 0 0 0
54//! 5 6 1 0 0 0
55//! 6 7 1 0 0 0
56//! 7 10 2 0 0 0
57//! M END"""
58//!
59//! # Calculate the molecule's assembly index.
60//! at.index(mol_block) # 6
61//! ```
62
63use std::str::FromStr;
64
65use pyo3::{
66 exceptions::{PyOSError, PyValueError},
67 prelude::*,
68 PyErr,
69};
70
71use crate::{
72 assembly::{depth, index, index_search, ParallelMode},
73 bounds::Bound as OurBound,
74 canonize::CanonizeMode,
75 kernels::KernelMode,
76 loader::{parse_molfile_str, ParserError},
77 memoize::MemoizeMode,
78};
79
80/// Implement a Python version of [`crate::loader::ParserError`].
81impl From<ParserError> for PyErr {
82 fn from(err: ParserError) -> PyErr {
83 PyOSError::new_err(err.to_string())
84 }
85}
86
87// TODO: Is there a clean way of avoiding the duplication of all our various
88// algorithm variant enums?
89
90/// Mirrors the [`CanonizeMode`] enum.
91#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
92enum PyCanonizeMode {
93 Nauty,
94 Faulon,
95 TreeNauty,
96 TreeFaulon,
97}
98
99/// Mirrors the [`ParallelMode`] enum.
100#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
101enum PyParallelMode {
102 None,
103 DepthOne,
104 Always,
105}
106
107/// Mirrors the [`MemoizeMode`] enum.
108#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
109enum PyMemoizeMode {
110 None,
111 CanonIndex,
112}
113
114/// Mirrors the `kernels::KernelMode` enum.
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
116enum PyKernelMode {
117 None,
118 Once,
119 DepthOne,
120 Always,
121}
122
123/// Mirrors the `bounds::Bound` enum.
124#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
125enum PyBound {
126 Log,
127 Int,
128 VecSimple,
129 VecSmallFrags,
130 MatchableEdges,
131}
132
133/// Converts bound options in `&str` format to `PyCanonizeMode`.
134impl FromStr for PyCanonizeMode {
135 type Err = PyErr;
136
137 fn from_str(s: &str) -> Result<Self, Self::Err> {
138 match s.to_lowercase().as_str() {
139 "nauty" => Ok(PyCanonizeMode::Nauty),
140 "faulon" => Ok(PyCanonizeMode::Faulon),
141 "tree-nauty" => Ok(PyCanonizeMode::TreeNauty),
142 "tree-faulon" => Ok(PyCanonizeMode::TreeFaulon),
143 _ => Err(PyValueError::new_err(format!(
144 "Invalid canonization mode \"{s}\", options are: \
145 [\"nauty\", \"faulon\", \"tree-nauty\", \"tree-faulon\"]"
146 ))),
147 }
148 }
149}
150
151/// Converts bound options in `&str` format to `PyParallelMode`.
152impl FromStr for PyParallelMode {
153 type Err = PyErr;
154
155 fn from_str(s: &str) -> Result<Self, Self::Err> {
156 match s.to_lowercase().as_str() {
157 "none" => Ok(PyParallelMode::None),
158 "depth-one" => Ok(PyParallelMode::DepthOne),
159 "always" => Ok(PyParallelMode::Always),
160 _ => Err(PyValueError::new_err(format!(
161 "Invalid parallelization mode \"{s}\", options are: \
162 [\"none\", \"depth-one\", \"always\"]"
163 ))),
164 }
165 }
166}
167
168/// Converts bound options in `&str` format to `PyMemoizeMode`.
169impl FromStr for PyMemoizeMode {
170 type Err = PyErr;
171
172 fn from_str(s: &str) -> Result<Self, Self::Err> {
173 match s.to_lowercase().as_str() {
174 "none" => Ok(PyMemoizeMode::None),
175 "canon-index" => Ok(PyMemoizeMode::CanonIndex),
176 _ => Err(PyValueError::new_err(format!(
177 "Invalid memoization mode \"{s}\", options are: \
178 [\"none\", \"frags-index\", \"canon-index\"]"
179 ))),
180 }
181 }
182}
183
184/// Converts bound options in `&str` format to `PyKernelMode`.
185impl FromStr for PyKernelMode {
186 type Err = PyErr;
187
188 fn from_str(s: &str) -> Result<Self, Self::Err> {
189 match s.to_lowercase().as_str() {
190 "none" => Ok(PyKernelMode::None),
191 "once" => Ok(PyKernelMode::Once),
192 "depth-one" => Ok(PyKernelMode::DepthOne),
193 "always" => Ok(PyKernelMode::Always),
194 _ => Err(PyValueError::new_err(format!(
195 "Invalid kernelization mode \"{s}\", options are: \
196 [\"none\", \"once\", \"depth-one\", \"always\"]"
197 ))),
198 }
199 }
200}
201
202/// Converts bound options in `&str` format to `PyBound`.
203impl FromStr for PyBound {
204 type Err = PyErr;
205
206 fn from_str(s: &str) -> Result<Self, Self::Err> {
207 match s.to_lowercase().as_str() {
208 "log" => Ok(PyBound::Log),
209 "int" => Ok(PyBound::Int),
210 "vec-simple" => Ok(PyBound::VecSimple),
211 "vec-small-frags" => Ok(PyBound::VecSmallFrags),
212 "matchable-edges" => Ok(PyBound::MatchableEdges),
213 _ => Err(PyValueError::new_err(format!(
214 "Invalid bound \"{s}\", options are: \
215 [\"log\", \"int\", \"vec-simple\", \"vec-small-frags\", \
216 \"matchable-edges\"]"
217 ))),
218 }
219 }
220}
221
222/// Converts a `Vec<String>` of bound Python strings into a `Vec<PyBound>`,
223/// raising an error if any bound string is invalid.
224fn process_bound_strs(bound_strs: Vec<String>) -> PyResult<Vec<PyBound>> {
225 bound_strs
226 .iter()
227 .map(|s| s.parse())
228 .collect::<Result<_, _>>()
229}
230
231/// Converts a slice of `PyBound`s into a vector of `bounds::Bound`s.
232fn make_boundlist(pybounds: &[PyBound]) -> Vec<OurBound> {
233 let mut boundlist = pybounds
234 .iter()
235 .flat_map(|b| match b {
236 PyBound::Log => vec![OurBound::Log],
237 PyBound::Int => vec![OurBound::Int],
238 PyBound::VecSimple => vec![OurBound::VecSimple],
239 PyBound::VecSmallFrags => vec![OurBound::VecSmallFrags],
240 PyBound::MatchableEdges => vec![OurBound::MatchableEdges],
241 })
242 .collect::<Vec<_>>();
243 boundlist.dedup();
244 boundlist
245}
246
247/// Get a pretty-printable string of this molecule's graph representation.
248///
249/// Python version of [`crate::molecule::Molecule::info`].
250///
251/// # Python Parameters
252/// - `mol_block`: The contents of a `.mol` file as a `str`.
253///
254/// # Python Returns
255/// - A pretty-printable `str` detailing the molecule's atoms and bonds.
256///
257/// # Python Example
258///
259/// ```custom,{class=language-python}
260/// import assembly_theory as at
261///
262/// # Load a mol block from file.
263/// with open('data/checks/anthracene.mol') as f:
264/// mol_block = f.read()
265///
266/// # Print the molecule's graph structure.
267/// print(at.mol_info(mol_block))
268///
269/// # graph {
270/// # 0 [ label = "Atom { element: Carbon, capacity: 0 }" ]
271/// # 1 [ label = "Atom { element: Carbon, capacity: 0 }" ]
272/// # 2 [ label = "Atom { element: Carbon, capacity: 0 }" ]
273/// # ...
274/// # 0 -- 1 [ label = "Double" ]
275/// # 1 -- 2 [ label = "Single" ]
276/// # 2 -- 5 [ label = "Double" ]
277/// # ...
278/// # }
279/// ```
280#[pyfunction(name = "mol_info")]
281pub fn _mol_info(mol_block: &str) -> PyResult<String> {
282 // Parse the .mol file contents as a molecule::Molecule.
283 let mol_result = parse_molfile_str(mol_block);
284 let mol = match mol_result {
285 Ok(mol) => mol,
286 Err(e) => return Err(e.into()), // Convert the error to PyErr
287 };
288
289 // Return molecule info.
290 Ok(mol.info())
291}
292
293/// Compute assembly depth; see
294/// [Pagel et al. (2024)](https://arxiv.org/abs/2409.05993).
295///
296/// Python version of [`depth`].
297///
298/// # Python Parameters
299/// - `mol_block`: The contents of a `.mol` file as a `str`.
300///
301/// # Python Returns
302/// - The molecule's `int` assembly depth.
303///
304/// # Python Example
305///
306/// ```custom,{class=language-python}
307/// import assembly_theory as at
308///
309/// # Load a mol block from file.
310/// with open('data/checks/benzene.mol') as f:
311/// mol_block = f.read()
312///
313/// # Calculate the molecule's assembly index.
314/// at.depth(mol_block) # 3
315/// ```
316#[pyfunction(name = "depth")]
317pub fn _depth(mol_block: &str) -> PyResult<u32> {
318 // Parse the .mol file contents as a molecule::Molecule.
319 let mol_result = parse_molfile_str(mol_block);
320 let mol = match mol_result {
321 Ok(mol) => mol,
322 Err(e) => return Err(e.into()), // Convert the error to PyErr
323 };
324
325 // Calculate assembly depth.
326 Ok(depth(&mol))
327}
328
329/// Computes a molecule's assembly index using an efficient default strategy.
330///
331/// Python version of [`index`].
332///
333/// # Python Parameters
334/// - `mol_block`: The contents of a `.mol` file as a `str`.
335///
336/// # Python Returns
337/// - The molecule's `int` assembly index.
338///
339/// # Python Example
340///
341/// ```custom,{class=language-python}
342/// import assembly_theory as at
343///
344/// # Load a mol block from file.
345/// with open('data/checks/anthracene.mol') as f:
346/// mol_block = f.read()
347///
348/// # Calculate the molecule's assembly index.
349/// at.index(mol_block) # 6
350/// ```
351#[pyfunction(name = "index")]
352pub fn _index(mol_block: &str) -> PyResult<u32> {
353 // Parse the .mol file contents as a molecule::Molecule.
354 let mol_result = parse_molfile_str(mol_block);
355 let mol = match mol_result {
356 Ok(mol) => mol,
357 Err(e) => return Err(e.into()), // Convert the error to PyErr
358 };
359
360 // Calculate the assembly index.
361 Ok(index(&mol))
362}
363
364/// Computes a molecule's assembly index and related information using a
365/// top-down recursive search, parameterized by the specified options.
366///
367/// Python version of [`index_search`].
368///
369/// # Python Parameters
370///
371/// - `mol_block`: The contents of a `.mol` file as a `str`.
372/// - `canonize_str`: A canonization mode from [`"nauty"`, `"faulon"`,
373/// `"tree-nauty"` (default), `"tree-faulon"`]. See [`CanonizeMode`] for
374/// details.
375/// - `parallel_str`: A parallelization mode from [`"none"`, `"depth-one"`
376/// (default), `"always"`]. See [`ParallelMode`] for details.
377/// - `memoize_str`: A memoization mode from [`none`, `frags-index`,
378/// `canon-index` (default)]. See [`MemoizeMode`] for details.
379/// - `kernel_str`: A kernelization mode from [`"none"` (default), `"once"`,
380/// `"depth-one"`, `"always"`]. See [`KernelMode`] for details.
381/// - `bound_strs`: A list of bounds containing zero or more of [`"log"`,
382/// `"int"`, `"vec-simple"`, `"vec-small-frags"`, `"matchable-edges"`].
383/// The default bounds are [`"int"`, `"matchable-edges"`]. See
384/// [`crate::bounds::Bound`] for details.
385///
386/// # Python Returns
387///
388/// A 3-tuple containing:
389/// - The molecule's `int` assembly index.
390/// - The molecule's `int` number of edge-disjoint isomorphic subgraph pairs.
391/// - The `int` number of assembly states searched.
392///
393/// # Python Example
394///
395/// ```custom,{class=language-python}
396/// import assembly_theory as at
397///
398/// # Load a mol block from file.
399/// with open('data/checks/anthracene.mol') as f:
400/// mol_block = f.read()
401///
402/// # Calculate the molecule's assembly index using the specified options.
403/// (index, num_matches, states_searched) = at.index_search(
404/// mol_block,
405/// "tree-nauty",
406/// "none",
407/// "none",
408/// "none",
409/// ["int", "matchable-edges"])
410///
411/// print(f"Assembly Index: {index}") # 6
412/// print(f"Edge-Disjoint Isomorphic Subgraph Pairs: {num_matches}") # 466
413/// print(f"Assembly States Searched: {states_searched}") # 491
414/// ```
415#[pyfunction(name = "index_search")]
416#[pyo3(signature = (mol_block, canonize_str="tree-nauty", parallel_str="depth-one", memoize_str="canon-index", kernel_str="none", bound_strs=vec!["int".to_string(), "matchable-edges".to_string()]), text_signature = "(mol_block, canonize_str=\"tree-nauty\", parallel_str=\"depth-one\", memoize_str=\"canon-index\", kernel_str=\"none\", bound_strs=[\"int\", \"matchable-edges\"]))")]
417pub fn _index_search(
418 mol_block: &str,
419 canonize_str: &str,
420 parallel_str: &str,
421 memoize_str: &str,
422 kernel_str: &str,
423 bound_strs: Vec<String>,
424) -> PyResult<(u32, u32, usize)> {
425 // Parse the .mol file contents as a molecule::Molecule.
426 let mol_result = parse_molfile_str(mol_block);
427 let mol = match mol_result {
428 Ok(mol) => mol,
429 Err(e) => return Err(e.into()), // Convert the error to PyErr
430 };
431
432 // Parse the various modes and bound options.
433 let canonize_mode = match PyCanonizeMode::from_str(canonize_str) {
434 Ok(PyCanonizeMode::Nauty) => CanonizeMode::Nauty,
435 Ok(PyCanonizeMode::Faulon) => CanonizeMode::Faulon,
436 Ok(PyCanonizeMode::TreeNauty) => CanonizeMode::TreeNauty,
437 Ok(PyCanonizeMode::TreeFaulon) => CanonizeMode::TreeFaulon,
438 Err(e) => return Err(e),
439 };
440 let parallel_mode = match PyParallelMode::from_str(parallel_str) {
441 Ok(PyParallelMode::None) => ParallelMode::None,
442 Ok(PyParallelMode::DepthOne) => ParallelMode::DepthOne,
443 Ok(PyParallelMode::Always) => ParallelMode::Always,
444 Err(e) => return Err(e),
445 };
446 let memoize_mode = match PyMemoizeMode::from_str(memoize_str) {
447 Ok(PyMemoizeMode::None) => MemoizeMode::None,
448 Ok(PyMemoizeMode::CanonIndex) => MemoizeMode::CanonIndex,
449 Err(e) => return Err(e),
450 };
451 let kernel_mode = match PyKernelMode::from_str(kernel_str) {
452 Ok(PyKernelMode::None) => KernelMode::None,
453 Ok(PyKernelMode::Once) => KernelMode::Once,
454 Ok(PyKernelMode::DepthOne) => KernelMode::DepthOne,
455 Ok(PyKernelMode::Always) => KernelMode::Always,
456 Err(e) => return Err(e),
457 };
458 let pybounds = process_bound_strs(bound_strs)?;
459 let boundlist = make_boundlist(&pybounds);
460
461 // Compute assembly index.
462 Ok(index_search(
463 &mol,
464 canonize_mode,
465 parallel_mode,
466 memoize_mode,
467 kernel_mode,
468 &boundlist,
469 ))
470}
471
472/// A Python wrapper for the assembly_theory Rust crate.
473// Registers the listed functions as a Python module named 'assembly_theory';
474// the above line is used as a docstring.
475#[pymodule(name = "assembly_theory")]
476fn _assembly_theory(m: &Bound<'_, PyModule>) -> PyResult<()> {
477 m.add_function(wrap_pyfunction!(_mol_info, m)?)?;
478 m.add_function(wrap_pyfunction!(_depth, m)?)?;
479 m.add_function(wrap_pyfunction!(_index, m)?)?;
480 m.add_function(wrap_pyfunction!(_index_search, m)?)?;
481 Ok(())
482}