Skip to main content

assembly_theory/
python.rs

1//! Expose public `assembly_theory` functionality to a Python package using
2//! [`pyo3`](https://docs.rs/pyo3/latest/pyo3/).
3//!
4//! The package is available [on PyPI](https://pypi.org/project/assembly-theory/);
5//! see that README for installation and usage instructions. To build the
6//! Python package directly from this crate's source code, see the instructions
7//! in the [GitHub README](https://github.com/DaymudeLab/assembly-theory).
8//!
9//! Note that all Rust functions in this module have the form `_fn_name`, which
10//! correspond to the actual Rust function `fn_name` elsewhere in the crate and
11//! are exposed to the Python package as `fn_name`.
12//!
13//! # Python Example
14//!
15//! ```custom,{class=language-python}
16//! import assembly_theory as at
17//!
18//! # Load a mol block from file...
19//! with open('data/checks/anthracene.mol') as f:
20//!     mol_block = f.read()
21//!
22//! # ...or define the mol block directly.
23//! mol_block = """
24//!
25//!
26//!  14 16  0  0  0  0  0  0  0  0999 V2000
27//!    25.2202  -16.2366    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
28//!    25.2202  -17.6385    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
29//!    26.4373  -18.3394    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
30//!    26.4373  -15.5356    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
31//!    27.6471  -16.2366    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
32//!    27.6412  -17.6385    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
33//!    28.8523  -18.3446    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
34//!    28.8644  -15.5409    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
35//!    30.0755  -16.2469    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
36//!    30.1327  -17.6453    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
37//!    31.2674  -18.3552    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
38//!    32.4846  -17.6672    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
39//!    32.4973  -16.2688    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
40//!    31.2927  -15.5589    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
41//!   9  8  2  0     0  0
42//!   8  5  1  0     0  0
43//!   5  4  2  0     0  0
44//!   4  1  1  0     0  0
45//!   1  2  2  0     0  0
46//!   2  3  1  0     0  0
47//!   3  6  2  0     0  0
48//!   9 10  1  0     0  0
49//!  10 11  1  0     0  0
50//!  11 12  2  0     0  0
51//!  12 13  1  0     0  0
52//!  13 14  2  0     0  0
53//!  14  9  1  0     0  0
54//!   5  6  1  0     0  0
55//!   6  7  1  0     0  0
56//!   7 10  2  0     0  0
57//! M  END"""
58//!
59//! # Calculate the molecule's assembly index.
60//! at.index(mol_block)  # 6
61//! ```
62
63use std::str::FromStr;
64
65use pyo3::{
66    exceptions::{PyOSError, PyValueError},
67    prelude::*,
68    PyErr,
69};
70
71use crate::{
72    assembly::{depth, index, index_search, ParallelMode},
73    bounds::Bound as OurBound,
74    canonize::CanonizeMode,
75    kernels::KernelMode,
76    loader::{parse_molfile_str, ParserError},
77    memoize::MemoizeMode,
78};
79
80/// Implement a Python version of [`crate::loader::ParserError`].
81impl From<ParserError> for PyErr {
82    fn from(err: ParserError) -> PyErr {
83        PyOSError::new_err(err.to_string())
84    }
85}
86
87// TODO: Is there a clean way of avoiding the duplication of all our various
88// algorithm variant enums?
89
90/// Mirrors the [`CanonizeMode`] enum.
91#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
92enum PyCanonizeMode {
93    Nauty,
94    Faulon,
95    TreeNauty,
96    TreeFaulon,
97}
98
99/// Mirrors the [`ParallelMode`] enum.
100#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
101enum PyParallelMode {
102    None,
103    DepthOne,
104    Always,
105}
106
107/// Mirrors the [`MemoizeMode`] enum.
108#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
109enum PyMemoizeMode {
110    None,
111    CanonIndex,
112}
113
114/// Mirrors the `kernels::KernelMode` enum.
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
116enum PyKernelMode {
117    None,
118    Once,
119    DepthOne,
120    Always,
121}
122
123/// Mirrors the `bounds::Bound` enum.
124#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
125enum PyBound {
126    Log,
127    Int,
128    VecSimple,
129    VecSmallFrags,
130    MatchableEdges,
131}
132
133/// Converts bound options in `&str` format to `PyCanonizeMode`.
134impl FromStr for PyCanonizeMode {
135    type Err = PyErr;
136
137    fn from_str(s: &str) -> Result<Self, Self::Err> {
138        match s.to_lowercase().as_str() {
139            "nauty" => Ok(PyCanonizeMode::Nauty),
140            "faulon" => Ok(PyCanonizeMode::Faulon),
141            "tree-nauty" => Ok(PyCanonizeMode::TreeNauty),
142            "tree-faulon" => Ok(PyCanonizeMode::TreeFaulon),
143            _ => Err(PyValueError::new_err(format!(
144                "Invalid canonization mode \"{s}\", options are: \
145                [\"nauty\", \"faulon\", \"tree-nauty\", \"tree-faulon\"]"
146            ))),
147        }
148    }
149}
150
151/// Converts bound options in `&str` format to `PyParallelMode`.
152impl FromStr for PyParallelMode {
153    type Err = PyErr;
154
155    fn from_str(s: &str) -> Result<Self, Self::Err> {
156        match s.to_lowercase().as_str() {
157            "none" => Ok(PyParallelMode::None),
158            "depth-one" => Ok(PyParallelMode::DepthOne),
159            "always" => Ok(PyParallelMode::Always),
160            _ => Err(PyValueError::new_err(format!(
161                "Invalid parallelization mode \"{s}\", options are: \
162                [\"none\", \"depth-one\", \"always\"]"
163            ))),
164        }
165    }
166}
167
168/// Converts bound options in `&str` format to `PyMemoizeMode`.
169impl FromStr for PyMemoizeMode {
170    type Err = PyErr;
171
172    fn from_str(s: &str) -> Result<Self, Self::Err> {
173        match s.to_lowercase().as_str() {
174            "none" => Ok(PyMemoizeMode::None),
175            "canon-index" => Ok(PyMemoizeMode::CanonIndex),
176            _ => Err(PyValueError::new_err(format!(
177                "Invalid memoization mode \"{s}\", options are: \
178                [\"none\", \"frags-index\", \"canon-index\"]"
179            ))),
180        }
181    }
182}
183
184/// Converts bound options in `&str` format to `PyKernelMode`.
185impl FromStr for PyKernelMode {
186    type Err = PyErr;
187
188    fn from_str(s: &str) -> Result<Self, Self::Err> {
189        match s.to_lowercase().as_str() {
190            "none" => Ok(PyKernelMode::None),
191            "once" => Ok(PyKernelMode::Once),
192            "depth-one" => Ok(PyKernelMode::DepthOne),
193            "always" => Ok(PyKernelMode::Always),
194            _ => Err(PyValueError::new_err(format!(
195                "Invalid kernelization mode \"{s}\", options are: \
196                [\"none\", \"once\", \"depth-one\", \"always\"]"
197            ))),
198        }
199    }
200}
201
202/// Converts bound options in `&str` format to `PyBound`.
203impl FromStr for PyBound {
204    type Err = PyErr;
205
206    fn from_str(s: &str) -> Result<Self, Self::Err> {
207        match s.to_lowercase().as_str() {
208            "log" => Ok(PyBound::Log),
209            "int" => Ok(PyBound::Int),
210            "vec-simple" => Ok(PyBound::VecSimple),
211            "vec-small-frags" => Ok(PyBound::VecSmallFrags),
212            "matchable-edges" => Ok(PyBound::MatchableEdges),
213            _ => Err(PyValueError::new_err(format!(
214                "Invalid bound \"{s}\", options are: \
215                [\"log\", \"int\", \"vec-simple\", \"vec-small-frags\", \
216                \"matchable-edges\"]"
217            ))),
218        }
219    }
220}
221
222/// Converts a `Vec<String>` of bound Python strings into a `Vec<PyBound>`,
223/// raising an error if any bound string is invalid.
224fn process_bound_strs(bound_strs: Vec<String>) -> PyResult<Vec<PyBound>> {
225    bound_strs
226        .iter()
227        .map(|s| s.parse())
228        .collect::<Result<_, _>>()
229}
230
231/// Converts a slice of `PyBound`s into a vector of `bounds::Bound`s.
232fn make_boundlist(pybounds: &[PyBound]) -> Vec<OurBound> {
233    let mut boundlist = pybounds
234        .iter()
235        .flat_map(|b| match b {
236            PyBound::Log => vec![OurBound::Log],
237            PyBound::Int => vec![OurBound::Int],
238            PyBound::VecSimple => vec![OurBound::VecSimple],
239            PyBound::VecSmallFrags => vec![OurBound::VecSmallFrags],
240            PyBound::MatchableEdges => vec![OurBound::MatchableEdges],
241        })
242        .collect::<Vec<_>>();
243    boundlist.dedup();
244    boundlist
245}
246
247/// Get a pretty-printable string of this molecule's graph representation.
248///
249/// Python version of [`crate::molecule::Molecule::info`].
250///
251/// # Python Parameters
252/// - `mol_block`: The contents of a `.mol` file as a `str`.
253///
254/// # Python Returns
255/// - A pretty-printable `str` detailing the molecule's atoms and bonds.
256///
257/// # Python Example
258///
259/// ```custom,{class=language-python}
260/// import assembly_theory as at
261///
262/// # Load a mol block from file.
263/// with open('data/checks/anthracene.mol') as f:
264///     mol_block = f.read()
265///
266/// # Print the molecule's graph structure.
267/// print(at.mol_info(mol_block))
268///
269/// # graph {
270/// #     0 [ label = "Atom { element: Carbon, capacity: 0 }" ]
271/// #     1 [ label = "Atom { element: Carbon, capacity: 0 }" ]
272/// #     2 [ label = "Atom { element: Carbon, capacity: 0 }" ]
273/// #     ...
274/// #     0 -- 1 [ label = "Double" ]
275/// #     1 -- 2 [ label = "Single" ]
276/// #     2 -- 5 [ label = "Double" ]
277/// #     ...
278/// # }
279/// ```
280#[pyfunction(name = "mol_info")]
281pub fn _mol_info(mol_block: &str) -> PyResult<String> {
282    // Parse the .mol file contents as a molecule::Molecule.
283    let mol = parse_molfile_str(mol_block)?;
284
285    // Return molecule info.
286    Ok(mol.info())
287}
288
289/// Compute assembly depth; see
290/// [Pagel et al. (2024)](https://arxiv.org/abs/2409.05993).
291///
292/// Python version of [`depth`].
293///
294/// # Python Parameters
295/// - `mol_block`: The contents of a `.mol` file as a `str`.
296///
297/// # Python Returns
298/// - The molecule's `int` assembly depth.
299///
300/// # Python Example
301///
302/// ```custom,{class=language-python}
303/// import assembly_theory as at
304///
305/// # Load a mol block from file.
306/// with open('data/checks/benzene.mol') as f:
307///     mol_block = f.read()
308///
309/// # Calculate the molecule's assembly index.
310/// at.depth(mol_block)  # 3
311/// ```
312#[pyfunction(name = "depth")]
313pub fn _depth(mol_block: &str) -> PyResult<u32> {
314    // Parse the .mol file contents as a molecule::Molecule.
315    let mol = parse_molfile_str(mol_block)?;
316
317    // Calculate assembly depth.
318    Ok(depth(&mol))
319}
320
321/// Computes a molecule's assembly index using an efficient default strategy.
322///
323/// Python version of [`index`].
324///
325/// # Python Parameters
326/// - `mol_block`: The contents of a `.mol` file as a `str`.
327///
328/// # Python Returns
329/// - The molecule's `int` assembly index.
330///
331/// # Python Example
332///
333/// ```custom,{class=language-python}
334/// import assembly_theory as at
335///
336/// # Load a mol block from file.
337/// with open('data/checks/anthracene.mol') as f:
338///     mol_block = f.read()
339///
340/// # Calculate the molecule's assembly index.
341/// at.index(mol_block)  # 6
342/// ```
343#[pyfunction(name = "index")]
344pub fn _index(mol_block: &str) -> PyResult<u32> {
345    // Parse the .mol file contents as a molecule::Molecule.
346    let mol = parse_molfile_str(mol_block)?;
347
348    // Calculate the assembly index.
349    Ok(index(&mol))
350}
351
352/// Computes a molecule's assembly index and related information using a
353/// top-down recursive search, parameterized by the specified options.
354///
355/// Python version of [`index_search`].
356///
357/// # Python Parameters
358///
359/// - `mol_block`: The contents of a `.mol` file as a `str`.
360/// - `timeout`: An `int` duration in milliseconds after which search is
361/// stopped and the best assembly index found so far is returned, or `None` if
362/// search is run until the true assembly index is found.
363/// - `canonize_str`: A canonization mode from [`"nauty"`, `"faulon"`,
364/// `"tree-nauty"` (default), `"tree-faulon"`]. See [`CanonizeMode`] for
365/// details.
366/// - `parallel_str`: A parallelization mode from [`"none"`, `"depth-one"`
367/// (default), `"always"`]. See [`ParallelMode`] for details.
368/// - `memoize_str`: A memoization mode from [`none`, `frags-index`,
369/// `canon-index` (default)]. See [`MemoizeMode`] for details.
370/// - `kernel_str`: A kernelization mode from [`"none"` (default), `"once"`,
371/// `"depth-one"`, `"always"`]. See [`KernelMode`] for details.
372/// - `bound_strs`: A list of bounds containing zero or more of [`"log"`,
373/// `"int"`, `"vec-simple"`, `"vec-small-frags"`, `"matchable-edges"`].
374/// The default bounds are [`"int"`, `"matchable-edges"`]. See
375/// [`crate::bounds::Bound`] for details.
376///
377/// # Python Returns
378///
379/// A 3-tuple containing:
380/// - The molecule's `int` assembly index (or an upper bound if timed out).
381/// - The molecule's `int` number of edge-disjoint isomorphic subgraph pairs.
382/// - The `int` number of assembly states searched, or `None` if timed out.
383///
384/// # Python Example
385///
386/// ```custom,{class=language-python}
387/// import assembly_theory as at
388///
389/// # Load a mol block from file.
390/// with open('data/checks/anthracene.mol') as f:
391///     mol_block = f.read()
392///
393/// # Calculate the molecule's assembly index using the specified options.
394/// (index, num_matches, states_searched) = at.index_search(
395///     mol_block,
396///     timeout=None,
397///     canonize_str="tree-nauty",
398///     parallel_str="none",
399///     memoize_str="none",
400///     kernel_str="none",
401///     bound_strs=["int", "matchable-edges"])
402///
403/// print(f"Assembly Index:  {index}")            # 6
404/// print(f"Matches:         {num_matches}")      # 466
405/// print(f"States Searched: {states_searched}")  # 491
406/// ```
407#[pyfunction(name = "index_search")]
408#[pyo3(signature = (mol_block, timeout=None, canonize_str="tree-nauty", parallel_str="depth-one", memoize_str="canon-index", kernel_str="none", bound_strs=vec!["int".to_string(), "matchable-edges".to_string()]), text_signature = "(mol_block, canonize_str=\"tree-nauty\", parallel_str=\"depth-one\", memoize_str=\"canon-index\", kernel_str=\"none\", bound_strs=[\"int\", \"matchable-edges\"]))")]
409pub fn _index_search(
410    mol_block: &str,
411    timeout: Option<u64>,
412    canonize_str: &str,
413    parallel_str: &str,
414    memoize_str: &str,
415    kernel_str: &str,
416    bound_strs: Vec<String>,
417) -> PyResult<(u32, u32, Option<usize>)> {
418    // Parse the .mol file contents as a molecule::Molecule.
419    let mol = parse_molfile_str(mol_block)?;
420
421    // Parse the various modes and bound options.
422    let canonize_mode = match PyCanonizeMode::from_str(canonize_str) {
423        Ok(PyCanonizeMode::Nauty) => CanonizeMode::Nauty,
424        Ok(PyCanonizeMode::Faulon) => CanonizeMode::Faulon,
425        Ok(PyCanonizeMode::TreeNauty) => CanonizeMode::TreeNauty,
426        Ok(PyCanonizeMode::TreeFaulon) => CanonizeMode::TreeFaulon,
427        Err(e) => return Err(e),
428    };
429    let parallel_mode = match PyParallelMode::from_str(parallel_str) {
430        Ok(PyParallelMode::None) => ParallelMode::None,
431        Ok(PyParallelMode::DepthOne) => ParallelMode::DepthOne,
432        Ok(PyParallelMode::Always) => ParallelMode::Always,
433        Err(e) => return Err(e),
434    };
435    let memoize_mode = match PyMemoizeMode::from_str(memoize_str) {
436        Ok(PyMemoizeMode::None) => MemoizeMode::None,
437        Ok(PyMemoizeMode::CanonIndex) => MemoizeMode::CanonIndex,
438        Err(e) => return Err(e),
439    };
440    let kernel_mode = match PyKernelMode::from_str(kernel_str) {
441        Ok(PyKernelMode::None) => KernelMode::None,
442        Ok(PyKernelMode::Once) => KernelMode::Once,
443        Ok(PyKernelMode::DepthOne) => KernelMode::DepthOne,
444        Ok(PyKernelMode::Always) => KernelMode::Always,
445        Err(e) => return Err(e),
446    };
447    let pybounds = process_bound_strs(bound_strs)?;
448    let boundlist = make_boundlist(&pybounds);
449
450    // Compute assembly index.
451    Ok(index_search(
452        &mol,
453        timeout,
454        canonize_mode,
455        parallel_mode,
456        memoize_mode,
457        kernel_mode,
458        &boundlist,
459    ))
460}
461
462/// A Python wrapper for the assembly_theory Rust crate.
463// Registers the listed functions as a Python module named 'assembly_theory';
464// the above line is used as a docstring.
465#[pymodule(name = "assembly_theory")]
466fn _assembly_theory(m: &Bound<'_, PyModule>) -> PyResult<()> {
467    m.add_function(wrap_pyfunction!(_mol_info, m)?)?;
468    m.add_function(wrap_pyfunction!(_depth, m)?)?;
469    m.add_function(wrap_pyfunction!(_index, m)?)?;
470    m.add_function(wrap_pyfunction!(_index_search, m)?)?;
471    Ok(())
472}