assembly_theory/python.rs
1//! Expose public `assembly_theory` functionality to a Python package using
2//! [`pyo3`](https://docs.rs/pyo3/latest/pyo3/).
3//!
4//! The package is available [on PyPI](https://pypi.org/project/assembly-theory/);
5//! see that README for installation and usage instructions. To build the
6//! Python package directly from this crate's source code, see the instructions
7//! in the [GitHub README](https://github.com/DaymudeLab/assembly-theory).
8//!
9//! Note that all Rust functions in this module have the form `_fn_name`, which
10//! correspond to the actual Rust function `fn_name` elsewhere in the crate and
11//! are exposed to the Python package as `fn_name`.
12//!
13//! # Python Example
14//!
15//! ```custom,{class=language-python}
16//! import assembly_theory as at
17//!
18//! # Load a mol block from file...
19//! with open('data/checks/anthracene.mol') as f:
20//! mol_block = f.read()
21//!
22//! # ...or define the mol block directly.
23//! mol_block = """
24//!
25//!
26//! 14 16 0 0 0 0 0 0 0 0999 V2000
27//! 25.2202 -16.2366 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
28//! 25.2202 -17.6385 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
29//! 26.4373 -18.3394 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
30//! 26.4373 -15.5356 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
31//! 27.6471 -16.2366 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
32//! 27.6412 -17.6385 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
33//! 28.8523 -18.3446 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
34//! 28.8644 -15.5409 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
35//! 30.0755 -16.2469 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
36//! 30.1327 -17.6453 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
37//! 31.2674 -18.3552 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
38//! 32.4846 -17.6672 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
39//! 32.4973 -16.2688 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
40//! 31.2927 -15.5589 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
41//! 9 8 2 0 0 0
42//! 8 5 1 0 0 0
43//! 5 4 2 0 0 0
44//! 4 1 1 0 0 0
45//! 1 2 2 0 0 0
46//! 2 3 1 0 0 0
47//! 3 6 2 0 0 0
48//! 9 10 1 0 0 0
49//! 10 11 1 0 0 0
50//! 11 12 2 0 0 0
51//! 12 13 1 0 0 0
52//! 13 14 2 0 0 0
53//! 14 9 1 0 0 0
54//! 5 6 1 0 0 0
55//! 6 7 1 0 0 0
56//! 7 10 2 0 0 0
57//! M END"""
58//!
59//! # Calculate the molecule's assembly index.
60//! at.index(mol_block) # 6
61//! ```
62
63use std::str::FromStr;
64
65use pyo3::{
66 exceptions::{PyOSError, PyValueError},
67 prelude::*,
68 PyErr,
69};
70
71use crate::{
72 assembly::{depth, index, index_search, ParallelMode},
73 bounds::Bound as OurBound,
74 canonize::CanonizeMode,
75 kernels::KernelMode,
76 loader::{parse_molfile_str, ParserError},
77 memoize::MemoizeMode,
78};
79
80/// Implement a Python version of [`crate::loader::ParserError`].
81impl From<ParserError> for PyErr {
82 fn from(err: ParserError) -> PyErr {
83 PyOSError::new_err(err.to_string())
84 }
85}
86
87// TODO: Is there a clean way of avoiding the duplication of all our various
88// algorithm variant enums?
89
90/// Mirrors the [`CanonizeMode`] enum.
91#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
92enum PyCanonizeMode {
93 Nauty,
94 Faulon,
95 TreeNauty,
96 TreeFaulon,
97}
98
99/// Mirrors the [`ParallelMode`] enum.
100#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
101enum PyParallelMode {
102 None,
103 DepthOne,
104 Always,
105}
106
107/// Mirrors the [`MemoizeMode`] enum.
108#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
109enum PyMemoizeMode {
110 None,
111 CanonIndex,
112}
113
114/// Mirrors the `kernels::KernelMode` enum.
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
116enum PyKernelMode {
117 None,
118 Once,
119 DepthOne,
120 Always,
121}
122
123/// Mirrors the `bounds::Bound` enum.
124#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
125enum PyBound {
126 Log,
127 Int,
128 VecSimple,
129 VecSmallFrags,
130 MatchableEdges,
131}
132
133/// Converts bound options in `&str` format to `PyCanonizeMode`.
134impl FromStr for PyCanonizeMode {
135 type Err = PyErr;
136
137 fn from_str(s: &str) -> Result<Self, Self::Err> {
138 match s.to_lowercase().as_str() {
139 "nauty" => Ok(PyCanonizeMode::Nauty),
140 "faulon" => Ok(PyCanonizeMode::Faulon),
141 "tree-nauty" => Ok(PyCanonizeMode::TreeNauty),
142 "tree-faulon" => Ok(PyCanonizeMode::TreeFaulon),
143 _ => Err(PyValueError::new_err(format!(
144 "Invalid canonization mode \"{s}\", options are: \
145 [\"nauty\", \"faulon\", \"tree-nauty\", \"tree-faulon\"]"
146 ))),
147 }
148 }
149}
150
151/// Converts bound options in `&str` format to `PyParallelMode`.
152impl FromStr for PyParallelMode {
153 type Err = PyErr;
154
155 fn from_str(s: &str) -> Result<Self, Self::Err> {
156 match s.to_lowercase().as_str() {
157 "none" => Ok(PyParallelMode::None),
158 "depth-one" => Ok(PyParallelMode::DepthOne),
159 "always" => Ok(PyParallelMode::Always),
160 _ => Err(PyValueError::new_err(format!(
161 "Invalid parallelization mode \"{s}\", options are: \
162 [\"none\", \"depth-one\", \"always\"]"
163 ))),
164 }
165 }
166}
167
168/// Converts bound options in `&str` format to `PyMemoizeMode`.
169impl FromStr for PyMemoizeMode {
170 type Err = PyErr;
171
172 fn from_str(s: &str) -> Result<Self, Self::Err> {
173 match s.to_lowercase().as_str() {
174 "none" => Ok(PyMemoizeMode::None),
175 "canon-index" => Ok(PyMemoizeMode::CanonIndex),
176 _ => Err(PyValueError::new_err(format!(
177 "Invalid memoization mode \"{s}\", options are: \
178 [\"none\", \"frags-index\", \"canon-index\"]"
179 ))),
180 }
181 }
182}
183
184/// Converts bound options in `&str` format to `PyKernelMode`.
185impl FromStr for PyKernelMode {
186 type Err = PyErr;
187
188 fn from_str(s: &str) -> Result<Self, Self::Err> {
189 match s.to_lowercase().as_str() {
190 "none" => Ok(PyKernelMode::None),
191 "once" => Ok(PyKernelMode::Once),
192 "depth-one" => Ok(PyKernelMode::DepthOne),
193 "always" => Ok(PyKernelMode::Always),
194 _ => Err(PyValueError::new_err(format!(
195 "Invalid kernelization mode \"{s}\", options are: \
196 [\"none\", \"once\", \"depth-one\", \"always\"]"
197 ))),
198 }
199 }
200}
201
202/// Converts bound options in `&str` format to `PyBound`.
203impl FromStr for PyBound {
204 type Err = PyErr;
205
206 fn from_str(s: &str) -> Result<Self, Self::Err> {
207 match s.to_lowercase().as_str() {
208 "log" => Ok(PyBound::Log),
209 "int" => Ok(PyBound::Int),
210 "vec-simple" => Ok(PyBound::VecSimple),
211 "vec-small-frags" => Ok(PyBound::VecSmallFrags),
212 "matchable-edges" => Ok(PyBound::MatchableEdges),
213 _ => Err(PyValueError::new_err(format!(
214 "Invalid bound \"{s}\", options are: \
215 [\"log\", \"int\", \"vec-simple\", \"vec-small-frags\", \
216 \"matchable-edges\"]"
217 ))),
218 }
219 }
220}
221
222/// Converts a `Vec<String>` of bound Python strings into a `Vec<PyBound>`,
223/// raising an error if any bound string is invalid.
224fn process_bound_strs(bound_strs: Vec<String>) -> PyResult<Vec<PyBound>> {
225 bound_strs
226 .iter()
227 .map(|s| s.parse())
228 .collect::<Result<_, _>>()
229}
230
231/// Converts a slice of `PyBound`s into a vector of `bounds::Bound`s.
232fn make_boundlist(pybounds: &[PyBound]) -> Vec<OurBound> {
233 let mut boundlist = pybounds
234 .iter()
235 .flat_map(|b| match b {
236 PyBound::Log => vec![OurBound::Log],
237 PyBound::Int => vec![OurBound::Int],
238 PyBound::VecSimple => vec![OurBound::VecSimple],
239 PyBound::VecSmallFrags => vec![OurBound::VecSmallFrags],
240 PyBound::MatchableEdges => vec![OurBound::MatchableEdges],
241 })
242 .collect::<Vec<_>>();
243 boundlist.dedup();
244 boundlist
245}
246
247/// Get a pretty-printable string of this molecule's graph representation.
248///
249/// Python version of [`crate::molecule::Molecule::info`].
250///
251/// # Python Parameters
252/// - `mol_block`: The contents of a `.mol` file as a `str`.
253///
254/// # Python Returns
255/// - A pretty-printable `str` detailing the molecule's atoms and bonds.
256///
257/// # Python Example
258///
259/// ```custom,{class=language-python}
260/// import assembly_theory as at
261///
262/// # Load a mol block from file.
263/// with open('data/checks/anthracene.mol') as f:
264/// mol_block = f.read()
265///
266/// # Print the molecule's graph structure.
267/// print(at.mol_info(mol_block))
268///
269/// # graph {
270/// # 0 [ label = "Atom { element: Carbon, capacity: 0 }" ]
271/// # 1 [ label = "Atom { element: Carbon, capacity: 0 }" ]
272/// # 2 [ label = "Atom { element: Carbon, capacity: 0 }" ]
273/// # ...
274/// # 0 -- 1 [ label = "Double" ]
275/// # 1 -- 2 [ label = "Single" ]
276/// # 2 -- 5 [ label = "Double" ]
277/// # ...
278/// # }
279/// ```
280#[pyfunction(name = "mol_info")]
281pub fn _mol_info(mol_block: &str) -> PyResult<String> {
282 // Parse the .mol file contents as a molecule::Molecule.
283 let mol = parse_molfile_str(mol_block)?;
284
285 // Return molecule info.
286 Ok(mol.info())
287}
288
289/// Compute assembly depth; see
290/// [Pagel et al. (2024)](https://arxiv.org/abs/2409.05993).
291///
292/// Python version of [`depth`].
293///
294/// # Python Parameters
295/// - `mol_block`: The contents of a `.mol` file as a `str`.
296///
297/// # Python Returns
298/// - The molecule's `int` assembly depth.
299///
300/// # Python Example
301///
302/// ```custom,{class=language-python}
303/// import assembly_theory as at
304///
305/// # Load a mol block from file.
306/// with open('data/checks/benzene.mol') as f:
307/// mol_block = f.read()
308///
309/// # Calculate the molecule's assembly index.
310/// at.depth(mol_block) # 3
311/// ```
312#[pyfunction(name = "depth")]
313pub fn _depth(mol_block: &str) -> PyResult<u32> {
314 // Parse the .mol file contents as a molecule::Molecule.
315 let mol = parse_molfile_str(mol_block)?;
316
317 // Calculate assembly depth.
318 Ok(depth(&mol))
319}
320
321/// Computes a molecule's assembly index using an efficient default strategy.
322///
323/// Python version of [`index`].
324///
325/// # Python Parameters
326/// - `mol_block`: The contents of a `.mol` file as a `str`.
327///
328/// # Python Returns
329/// - The molecule's `int` assembly index.
330///
331/// # Python Example
332///
333/// ```custom,{class=language-python}
334/// import assembly_theory as at
335///
336/// # Load a mol block from file.
337/// with open('data/checks/anthracene.mol') as f:
338/// mol_block = f.read()
339///
340/// # Calculate the molecule's assembly index.
341/// at.index(mol_block) # 6
342/// ```
343#[pyfunction(name = "index")]
344pub fn _index(mol_block: &str) -> PyResult<u32> {
345 // Parse the .mol file contents as a molecule::Molecule.
346 let mol = parse_molfile_str(mol_block)?;
347
348 // Calculate the assembly index.
349 Ok(index(&mol))
350}
351
352/// Computes a molecule's assembly index and related information using a
353/// top-down recursive search, parameterized by the specified options.
354///
355/// Python version of [`index_search`].
356///
357/// # Python Parameters
358///
359/// - `mol_block`: The contents of a `.mol` file as a `str`.
360/// - `timeout`: An `int` duration in milliseconds after which search is
361/// stopped and the best assembly index found so far is returned, or `None` if
362/// search is run until the true assembly index is found.
363/// - `canonize_str`: A canonization mode from [`"nauty"`, `"faulon"`,
364/// `"tree-nauty"` (default), `"tree-faulon"`]. See [`CanonizeMode`] for
365/// details.
366/// - `parallel_str`: A parallelization mode from [`"none"`, `"depth-one"`
367/// (default), `"always"`]. See [`ParallelMode`] for details.
368/// - `memoize_str`: A memoization mode from [`none`, `frags-index`,
369/// `canon-index` (default)]. See [`MemoizeMode`] for details.
370/// - `kernel_str`: A kernelization mode from [`"none"` (default), `"once"`,
371/// `"depth-one"`, `"always"`]. See [`KernelMode`] for details.
372/// - `bound_strs`: A list of bounds containing zero or more of [`"log"`,
373/// `"int"`, `"vec-simple"`, `"vec-small-frags"`, `"matchable-edges"`].
374/// The default bounds are [`"int"`, `"matchable-edges"`]. See
375/// [`crate::bounds::Bound`] for details.
376///
377/// # Python Returns
378///
379/// A 3-tuple containing:
380/// - The molecule's `int` assembly index (or an upper bound if timed out).
381/// - The molecule's `int` number of edge-disjoint isomorphic subgraph pairs.
382/// - The `int` number of assembly states searched, or `None` if timed out.
383///
384/// # Python Example
385///
386/// ```custom,{class=language-python}
387/// import assembly_theory as at
388///
389/// # Load a mol block from file.
390/// with open('data/checks/anthracene.mol') as f:
391/// mol_block = f.read()
392///
393/// # Calculate the molecule's assembly index using the specified options.
394/// (index, num_matches, states_searched) = at.index_search(
395/// mol_block,
396/// timeout=None,
397/// canonize_str="tree-nauty",
398/// parallel_str="none",
399/// memoize_str="none",
400/// kernel_str="none",
401/// bound_strs=["int", "matchable-edges"])
402///
403/// print(f"Assembly Index: {index}") # 6
404/// print(f"Matches: {num_matches}") # 466
405/// print(f"States Searched: {states_searched}") # 491
406/// ```
407#[pyfunction(name = "index_search")]
408#[pyo3(signature = (mol_block, timeout=None, canonize_str="tree-nauty", parallel_str="depth-one", memoize_str="canon-index", kernel_str="none", bound_strs=vec!["int".to_string(), "matchable-edges".to_string()]), text_signature = "(mol_block, canonize_str=\"tree-nauty\", parallel_str=\"depth-one\", memoize_str=\"canon-index\", kernel_str=\"none\", bound_strs=[\"int\", \"matchable-edges\"]))")]
409pub fn _index_search(
410 mol_block: &str,
411 timeout: Option<u64>,
412 canonize_str: &str,
413 parallel_str: &str,
414 memoize_str: &str,
415 kernel_str: &str,
416 bound_strs: Vec<String>,
417) -> PyResult<(u32, u32, Option<usize>)> {
418 // Parse the .mol file contents as a molecule::Molecule.
419 let mol = parse_molfile_str(mol_block)?;
420
421 // Parse the various modes and bound options.
422 let canonize_mode = match PyCanonizeMode::from_str(canonize_str) {
423 Ok(PyCanonizeMode::Nauty) => CanonizeMode::Nauty,
424 Ok(PyCanonizeMode::Faulon) => CanonizeMode::Faulon,
425 Ok(PyCanonizeMode::TreeNauty) => CanonizeMode::TreeNauty,
426 Ok(PyCanonizeMode::TreeFaulon) => CanonizeMode::TreeFaulon,
427 Err(e) => return Err(e),
428 };
429 let parallel_mode = match PyParallelMode::from_str(parallel_str) {
430 Ok(PyParallelMode::None) => ParallelMode::None,
431 Ok(PyParallelMode::DepthOne) => ParallelMode::DepthOne,
432 Ok(PyParallelMode::Always) => ParallelMode::Always,
433 Err(e) => return Err(e),
434 };
435 let memoize_mode = match PyMemoizeMode::from_str(memoize_str) {
436 Ok(PyMemoizeMode::None) => MemoizeMode::None,
437 Ok(PyMemoizeMode::CanonIndex) => MemoizeMode::CanonIndex,
438 Err(e) => return Err(e),
439 };
440 let kernel_mode = match PyKernelMode::from_str(kernel_str) {
441 Ok(PyKernelMode::None) => KernelMode::None,
442 Ok(PyKernelMode::Once) => KernelMode::Once,
443 Ok(PyKernelMode::DepthOne) => KernelMode::DepthOne,
444 Ok(PyKernelMode::Always) => KernelMode::Always,
445 Err(e) => return Err(e),
446 };
447 let pybounds = process_bound_strs(bound_strs)?;
448 let boundlist = make_boundlist(&pybounds);
449
450 // Compute assembly index.
451 Ok(index_search(
452 &mol,
453 timeout,
454 canonize_mode,
455 parallel_mode,
456 memoize_mode,
457 kernel_mode,
458 &boundlist,
459 ))
460}
461
462/// A Python wrapper for the assembly_theory Rust crate.
463// Registers the listed functions as a Python module named 'assembly_theory';
464// the above line is used as a docstring.
465#[pymodule(name = "assembly_theory")]
466fn _assembly_theory(m: &Bound<'_, PyModule>) -> PyResult<()> {
467 m.add_function(wrap_pyfunction!(_mol_info, m)?)?;
468 m.add_function(wrap_pyfunction!(_depth, m)?)?;
469 m.add_function(wrap_pyfunction!(_index, m)?)?;
470 m.add_function(wrap_pyfunction!(_index_search, m)?)?;
471 Ok(())
472}