assembly_theory/
python.rs

1//! Expose functionality to python library using pyo3.
2use pyo3::exceptions::PyValueError;
3use pyo3::prelude::*;
4use std::collections::{HashMap, HashSet};
5use std::str::FromStr;
6
7use crate::assembly::Bound as AssemblyBound;
8use crate::assembly::{index_search, serial_index_search};
9use crate::loader::parse_molfile_str;
10
11// TODO This needs to be combined with the Bounds Enum in main but I'm not sure the
12// best way to do that. Could move it to utils
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
14enum PyBounds {
15    Log,
16    IntChain,
17    VecChain,
18}
19
20/// Implements conversion from `&str` to `PyBounds`
21impl FromStr for PyBounds {
22    type Err = PyErr;
23
24    fn from_str(s: &str) -> Result<Self, Self::Err> {
25        match s.to_lowercase().as_str() {
26            "log" => Ok(PyBounds::Log),
27            "intchain" => Ok(PyBounds::IntChain),
28            "vecchain" => Ok(PyBounds::VecChain),
29            _ => Err(PyValueError::new_err(format!("Invalid bound: {}", s))),
30        }
31    }
32}
33
34/// Converts a slice of `PyBounds` to a vector of `AssemblyBound`
35fn make_boundlist(u: &[PyBounds]) -> Vec<AssemblyBound> {
36    let mut boundlist = u
37        .iter()
38        .flat_map(|b| match b {
39            PyBounds::Log => vec![AssemblyBound::Log],
40            PyBounds::IntChain => vec![AssemblyBound::IntChain],
41            PyBounds::VecChain => vec![
42                AssemblyBound::VecChainSimple,
43                AssemblyBound::VecChainSmallFrags,
44            ],
45        })
46        .collect::<Vec<_>>();
47
48    boundlist.dedup(); // Ensure no duplicate bounds
49    boundlist
50}
51
52/// Processes a `HashSet<String>` from Python and converts it into a `Vec<PyBounds>`
53/// Raises an error if any string is invalid.
54fn process_bound_set(bound_set: HashSet<String>) -> PyResult<Vec<PyBounds>> {
55    bound_set
56        .iter()
57        .map(|s| s.parse())
58        .collect::<Result<_, _>>() // Try parsing each string
59}
60
61/// Computes the molecular assembly index using specified bounds.
62///
63/// # Parameters
64/// - `mol_block`: The contents of a .mol file as a string.
65/// - `bound_set`: A set of bounds as strings (from Python).
66///
67/// # Returns
68/// - The computed molecular index as a `u32`.
69#[pyfunction]
70pub fn _molecular_assembly(
71    mol_block: String,
72    bound_set: HashSet<String>,
73    serial: bool,
74) -> PyResult<u32> {
75    let mol_result = parse_molfile_str(&mol_block);
76    let py_bounds = process_bound_set(bound_set)?;
77
78    let mol = match mol_result {
79        Ok(mol) => mol,
80        Err(e) => return Err(e.into()), // Convert the error to PyErr
81    };
82
83    let (index, _, _) = if serial {
84        serial_index_search(&mol, &make_boundlist(&py_bounds))
85    } else {
86        index_search(&mol, &make_boundlist(&py_bounds))
87    };
88
89    Ok(index)
90}
91
92/// Computes the molecular assembly index with additional details.
93///
94/// # Parameters
95/// - `mol_block`: The contents of a .mol file as a string.
96/// - `bound_set`: A set of bounds as strings (from Python).
97///
98/// # Returns
99/// - A `HashMap<String, u32>` containing:
100///   - `"index"`: The computed molecular index.
101///   - `"duplicates"`: Duplicate count.
102///   - `"space"`: Space calculation.
103#[pyfunction]
104pub fn _molecular_assembly_verbose(
105    mol_block: String,
106    bound_set: HashSet<String>,
107    serial: bool,
108) -> PyResult<HashMap<String, usize>> {
109    let mol_result = parse_molfile_str(&mol_block);
110    let py_bounds = process_bound_set(bound_set)?;
111
112    let mol = match mol_result {
113        Ok(mol) => mol,
114        Err(e) => return Err(e.into()), // Convert error to PyErr
115    };
116
117    let (ix, duplicates, space) = if serial {
118        serial_index_search(&mol, &make_boundlist(&py_bounds))
119    } else {
120        index_search(&mol, &make_boundlist(&py_bounds))
121    };
122
123    let mut data = HashMap::new();
124    data.insert("index".to_string(), ix as usize);
125    data.insert("duplicates".to_string(), duplicates as usize);
126    data.insert("space".to_string(), space);
127
128    Ok(data)
129}
130
131/// Retrieves molecular information from a given mol block.
132///
133/// # Parameters
134/// - `mol_block`: The contents of a .mol file as a string.
135///
136/// # Returns
137/// - A `String` containing molecular information.
138#[pyfunction]
139pub fn _molecule_info(mol_block: String) -> PyResult<String> {
140    let mol_result = parse_molfile_str(&mol_block);
141
142    let mol = match mol_result {
143        Ok(mol) => mol,
144        Err(e) => return Err(e.into()), // Convert error to PyErr
145    };
146
147    Ok(mol.info()) // Retrieve molecular info
148}
149
150// Registers the Rust functions as a Python module.
151//
152// This function must match the `lib.name` setting in `Cargo.toml`,
153// otherwise, Python will not be able to import the module.
154#[pymodule]
155fn _pyat(m: &Bound<'_, PyModule>) -> PyResult<()> {
156    m.add_function(wrap_pyfunction!(_molecular_assembly, m)?)?;
157    m.add_function(wrap_pyfunction!(_molecular_assembly_verbose, m)?)?;
158    m.add_function(wrap_pyfunction!(_molecule_info, m)?)?;
159    Ok(())
160}