Skip to main content

chematic_wasm/
lib.rs

1//! `chematic-wasm` — WebAssembly bindings for the chematic cheminformatics library.
2//!
3//! Exposes a small, ergonomic API for parsing SMILES and computing molecular
4//! descriptors from JavaScript/TypeScript via `wasm-bindgen`.
5
6use wasm_bindgen::prelude::*;
7
8// ---------------------------------------------------------------------------
9// MolHandle
10// ---------------------------------------------------------------------------
11
12/// A handle to a parsed molecule.  Owns the molecule behind an `Rc` so that
13/// it can be cheaply cloned on the JS side without copying atom/bond data.
14#[wasm_bindgen]
15pub struct MolHandle {
16    inner: std::rc::Rc<chematic_core::Molecule>,
17}
18
19#[wasm_bindgen]
20impl MolHandle {
21    /// Number of heavy atoms (explicit atoms in the graph; does not count implicit H).
22    pub fn atom_count(&self) -> usize {
23        self.inner.atom_count()
24    }
25
26    /// Number of bonds.
27    pub fn bond_count(&self) -> usize {
28        self.inner.bond_count()
29    }
30
31    /// Molecular formula string (Hill notation: C first, H second, then alphabetical).
32    pub fn formula(&self) -> String {
33        molecular_formula(&self.inner)
34    }
35
36    /// Canonical SMILES string.
37    pub fn canonical_smiles(&self) -> String {
38        chematic_smiles::canonical_smiles(&self.inner)
39    }
40
41    /// Average molecular weight (Da).
42    pub fn molecular_weight(&self) -> f64 {
43        chematic_chem::molecular_weight(&self.inner)
44    }
45
46    /// Topological polar surface area (Ų).
47    pub fn tpsa(&self) -> f64 {
48        chematic_chem::tpsa(&self.inner)
49    }
50
51    /// Returns `true` if the molecule satisfies Lipinski's Rule of Five.
52    pub fn lipinski_passes(&self) -> bool {
53        chematic_chem::lipinski_passes(&self.inner)
54    }
55
56    /// Number of non-hydrogen heavy atoms.
57    pub fn heavy_atom_count(&self) -> usize {
58        chematic_chem::heavy_atom_count(&self.inner)
59    }
60
61    /// Number of hydrogen bond donors (N-H or O-H groups).
62    pub fn hbd_count(&self) -> usize {
63        chematic_chem::hbd_count(&self.inner)
64    }
65
66    /// Number of hydrogen bond acceptors (Lipinski: all N and O atoms).
67    pub fn hba_count(&self) -> usize {
68        chematic_chem::hba_count(&self.inner)
69    }
70}
71
72// ---------------------------------------------------------------------------
73// Free functions exported to JS
74// ---------------------------------------------------------------------------
75
76/// Parse a SMILES string into a `MolHandle`.
77///
78/// Returns a JS error string on parse failure.
79#[wasm_bindgen]
80pub fn parse_smiles(s: &str) -> Result<MolHandle, JsValue> {
81    chematic_smiles::parse(s)
82        .map(|mol| MolHandle { inner: std::rc::Rc::new(mol) })
83        .map_err(|e| JsValue::from_str(&e.to_string()))
84}
85
86/// Tanimoto similarity between two molecules using ECFP4 fingerprints.
87#[wasm_bindgen]
88pub fn tanimoto_ecfp4(a: &MolHandle, b: &MolHandle) -> f64 {
89    chematic_fp::tanimoto_ecfp4(&a.inner, &b.inner)
90}
91
92/// Compute the ECFP4 fingerprint as a bit-packed byte vector (256 bytes = 2048 bits).
93#[wasm_bindgen]
94pub fn ecfp4_bitvec(mol: &MolHandle) -> Vec<u8> {
95    let fp = chematic_fp::ecfp4(&mol.inner);
96    // BitVec2048 is 2048 bits; extract them byte-by-byte via the public `get` method.
97    let mut bytes = vec![0u8; 256];
98    for byte_idx in 0..256usize {
99        let mut byte = 0u8;
100        for bit in 0..8usize {
101            if fp.get(byte_idx * 8 + bit) {
102                byte |= 1 << bit;
103            }
104        }
105        bytes[byte_idx] = byte;
106    }
107    bytes
108}
109
110// ---------------------------------------------------------------------------
111// Private helper: molecular formula (Hill notation)
112// ---------------------------------------------------------------------------
113
114/// Build a molecular formula string in Hill notation.
115///
116/// Hill convention: carbon first, hydrogen second, remaining elements
117/// in alphabetical order.  Implicit hydrogens (from valence model) are
118/// included in the count.
119fn molecular_formula(mol: &chematic_core::Molecule) -> String {
120    use chematic_core::{Element, implicit_hcount};
121    use std::collections::BTreeMap;
122
123    let mut counts: BTreeMap<u8, u32> = BTreeMap::new();
124
125    for (idx, atom) in mol.atoms() {
126        let an = atom.element.atomic_number();
127        if an != 1 {
128            // Count the heavy atom.
129            *counts.entry(an).or_insert(0) += 1;
130            // Add its implicit hydrogens.
131            let h = implicit_hcount(mol, idx) as u32;
132            if h > 0 {
133                *counts.entry(1).or_insert(0) += h;
134            }
135        } else {
136            // Explicit hydrogen atom.
137            *counts.entry(1).or_insert(0) += 1;
138        }
139    }
140
141    // Collect into Hill order: C (6), H (1), then remaining by atomic number.
142    let mut result = String::new();
143
144    // Carbon first.
145    if let Some(&c_count) = counts.get(&6) {
146        result.push_str("C");
147        if c_count > 1 {
148            result.push_str(&c_count.to_string());
149        }
150    }
151
152    // Hydrogen second.
153    if let Some(&h_count) = counts.get(&1) {
154        result.push_str("H");
155        if h_count > 1 {
156            result.push_str(&h_count.to_string());
157        }
158    }
159
160    // Remaining elements in atomic-number order (BTreeMap is sorted by key).
161    for (&an, &count) in &counts {
162        if an == 1 || an == 6 {
163            continue; // already handled
164        }
165        let elem = Element::from_atomic_number(an).unwrap();
166        result.push_str(elem.symbol());
167        if count > 1 {
168            result.push_str(&count.to_string());
169        }
170    }
171
172    result
173}
174
175// ---------------------------------------------------------------------------
176// Tests
177// ---------------------------------------------------------------------------
178
179#[cfg(test)]
180mod tests {
181    use super::*;
182
183    fn parse(s: &str) -> MolHandle {
184        MolHandle { inner: std::rc::Rc::new(chematic_smiles::parse(s).unwrap()) }
185    }
186
187    #[test]
188    fn parse_benzene_atom_count() {
189        assert_eq!(parse("c1ccccc1").atom_count(), 6);
190    }
191
192    #[test]
193    fn canonical_smiles_benzene() {
194        let mol = parse("c1ccccc1");
195        let cs = mol.canonical_smiles();
196        assert!(!cs.is_empty());
197    }
198
199    #[test]
200    fn molecular_weight_aspirin() {
201        let mw = parse("CC(=O)Oc1ccccc1C(=O)O").molecular_weight();
202        assert!((mw - 180.16).abs() < 1.0);
203    }
204
205    #[test]
206    fn lipinski_aspirin() {
207        assert!(parse("CC(=O)Oc1ccccc1C(=O)O").lipinski_passes());
208    }
209
210    #[test]
211    fn tanimoto_same_mol() {
212        let a = parse("c1ccccc1");
213        let b = parse("c1ccccc1");
214        let sim = tanimoto_ecfp4(&a, &b);
215        assert!((sim - 1.0).abs() < 1e-6);
216    }
217
218    #[test]
219    fn tanimoto_different() {
220        let a = parse("c1ccccc1");
221        let b = parse("CC(=O)Oc1ccccc1C(=O)O");
222        assert!(tanimoto_ecfp4(&a, &b) < 1.0);
223    }
224
225    #[test]
226    fn heavy_atom_count_ethanol() {
227        assert_eq!(parse("CCO").heavy_atom_count(), 3);
228    }
229}