gchemol_core/
molecule.rs

1// [[file:../gchemol-core.note::6eec5694][6eec5694]]
2use serde::*;
3
4use gchemol_graph::*;
5use gut::prelude::*;
6
7use bimap::BiHashMap;
8use gchemol_lattice::Lattice;
9
10use crate::atom::*;
11use crate::bond::*;
12use crate::element::*;
13use crate::property::PropertyStore;
14// 6eec5694 ends here
15
16// [[file:../gchemol-core.note::9fd07ad9][9fd07ad9]]
17type MolGraph = NxGraph<Atom, Bond>;
18
19/// Molecule is the most important data structure in gchemol, which repsents
20/// "any singular entity, irrespective of its nature, used to concisely express
21/// any type of chemical particle that can exemplify some process: for example,
22/// atoms, molecules, ions, etc. can all undergo a chemical reaction". Molecule
23/// may have chemical bonds between atoms.
24///
25/// Reference
26/// ---------
27/// 1. http://goldbook.iupac.org/M03986.html
28/// 2. https://en.wikipedia.org/wiki/Molecular_entity
29///
30#[derive(Debug, Clone, Deserialize, Serialize, Default)]
31pub struct Molecule {
32    /// Arbitrary property stored in key-value pair. Key is a string type, but
33    /// it is the responsibility of the user to correctly interpret the value.
34    pub properties: PropertyStore,
35
36    /// Crystalline lattice for structure using periodic boundary conditions
37    pub lattice: Option<Lattice>,
38
39    /// Molecular name.
40    pub(crate) name: String,
41
42    /// core data in graph
43    pub(crate) graph: MolGraph,
44
45    /// mapping: Atom serial number <=> graph NodeIndex
46    pub(crate) mapping: BiHashMap<usize, NodeIndex>,
47}
48
49/// Methods for internal uses
50impl Molecule {
51    /// get internal node index by atom sn.
52    pub(crate) fn node_index(&self, sn: usize) -> NodeIndex {
53        *self.get_node_index(sn).unwrap_or_else(|| panic!("invalid atom sn: {}", sn))
54    }
55
56    /// get internal node index by atom sn.
57    pub(crate) fn get_node_index(&self, sn: usize) -> Option<&NodeIndex> {
58        self.mapping.get_by_left(&sn)
59    }
60
61    /// get atom sn  by internal node index.
62    pub(crate) fn atom_sn(&self, n: NodeIndex) -> usize {
63        *self.mapping.get_by_right(&n).unwrap_or_else(|| panic!("invalid NodeIndex: {:?}", n))
64    }
65
66    /// Removes atom sn from mapping and returns the associated NodeIndex.
67    pub(crate) fn remove_atom_sn(&mut self, sn: usize) -> Option<NodeIndex> {
68        self.mapping.remove_by_left(&sn).map(|(_, n)| n)
69    }
70}
71// 9fd07ad9 ends here
72
73// [[file:../gchemol-core.note::29c55361][29c55361]]
74/// `Molecule` constructors
75impl Molecule {
76    /// Create a new empty molecule with specific name
77    pub fn new(name: &str) -> Self {
78        Molecule {
79            name: name.to_string(),
80            ..Default::default()
81        }
82    }
83
84    /// Build a molecule from iterator of atoms associated with serial numbers
85    /// from 1.
86    pub fn from_atoms<T>(atoms: T) -> Self
87    where
88        T: IntoIterator,
89        T::Item: Into<Atom>,
90    {
91        let mut mol = Self::default();
92        for (i, a) in atoms.into_iter().enumerate() {
93            mol.add_atom(i + 1, a.into());
94        }
95
96        mol
97    }
98
99    /// Build `Molecule` from raw graph struct.
100    pub fn from_graph(graph: MolGraph) -> Self {
101        Self::from_graph_raw(graph, 1..)
102    }
103
104    /// Build `Molecule` from raw graph struct, with atom serial numbers.
105    pub fn from_graph_raw(graph: MolGraph, atoms: impl IntoIterator<Item = usize>) -> Self {
106        let n = graph.number_of_nodes();
107        let mut mol = Self { graph, ..Default::default() };
108
109        // create serial number mapping
110        let nodes = mol.graph.node_indices();
111        for (sn, n) in atoms.into_iter().zip(nodes) {
112            mol.mapping.insert_no_overwrite(sn, n).expect("from graph failure");
113        }
114
115        mol
116    }
117}
118// 29c55361 ends here
119
120// [[file:../gchemol-core.note::8017401a][8017401a]]
121/// Core methods
122impl Molecule {
123    /// Add atom `a` into molecule. If Atom numbered as `a` already exists in
124    /// molecule, then the associated Atom will be updated with `atom`.
125    pub fn add_atom(&mut self, a: usize, atom: Atom) {
126        if let Some(&n) = self.mapping.get_by_left(&a) {
127            self.graph[n] = atom;
128        } else {
129            let n = self.graph.add_node(atom);
130            self.mapping.insert_no_overwrite(a, n).expect("add atom failure");
131        }
132    }
133
134    /// Remove Atom `a` from `Molecule`.
135    ///
136    /// Return the removed Atom on success, and return None if Atom `a` does not
137    /// exist.
138    pub fn remove_atom(&mut self, a: usize) -> Option<Atom> {
139        if let Some(n) = self.remove_atom_sn(a) {
140            self.graph.remove_node(n)
141        } else {
142            None
143        }
144    }
145
146    /// Return the number of atoms in the molecule.
147    pub fn natoms(&self) -> usize {
148        self.graph.number_of_nodes()
149    }
150
151    /// Return the number of bonds in the molecule.
152    pub fn nbonds(&self) -> usize {
153        self.graph.number_of_edges()
154    }
155
156    /// Add `bond` between Atom `a` and Atom `b` into molecule. The existing
157    /// Bond will be replaced if Atom `a` already bonded with Atom `b`.
158    ///
159    /// Panic if the specified atom `a` or `b` does not exist
160    ///
161    pub fn add_bond(&mut self, a: usize, b: usize, bond: Bond) {
162        let na = self.node_index(a);
163        let nb = self.node_index(b);
164        self.graph.add_edge(na, nb, bond);
165    }
166
167    /// Remove the bond between atom `a` and atom `b`.
168    ///
169    /// Returns the removed `Bond` on success
170    ///
171    /// Panic if the specified atom `a` or `b` does not exist
172    pub fn remove_bond(&mut self, a: usize, b: usize) -> Option<Bond> {
173        let na = self.node_index(a);
174        let nb = self.node_index(b);
175        self.graph.remove_edge(na, nb)
176    }
177
178    /// Remove all atoms and bonds. To remove bonds only, see [unbound](#method.unbound) method.
179    pub fn clear(&mut self) {
180        self.mapping.clear();
181        self.graph.clear();
182    }
183
184    /// Iterate over atoms ordered by serial numbers.
185    pub fn atoms(&self) -> impl Iterator<Item = (usize, &Atom)> {
186        // sort by atom serial numbers
187        self.serial_numbers().map(move |sn| {
188            let n = self.node_index(sn);
189            let atom = &self.graph[n];
190            (sn, atom)
191        })
192    }
193
194    /// Iterate over bonds in arbitrary order.
195    pub fn bonds(&self) -> impl Iterator<Item = (usize, usize, &Bond)> {
196        self.graph.edges().map(move |(u, v, bond)| {
197            let sn1 = self.atom_sn(u);
198            let sn2 = self.atom_sn(v);
199            (sn1, sn2, bond)
200        })
201    }
202
203    /// Iterate over atom serial numbers in ascending order. Serial number is an
204    /// unsigned integer (1-based, traditionally) for accessing `Atom` in
205    /// `Molecule`
206    pub fn serial_numbers(&self) -> impl Iterator<Item = usize> {
207        self.mapping.left_values().copied().sorted()
208    }
209
210    /// A shorter alias to [serial_numbers](#method.serial_numbers) method.
211    pub fn numbers(&self) -> impl Iterator<Item = usize> + '_ {
212        self.serial_numbers()
213    }
214
215    /// Iterate over atom symbols ordered by serial numbers.
216    pub fn symbols(&self) -> impl Iterator<Item = &str> {
217        self.atoms().map(move |(_, atom)| atom.symbol())
218    }
219
220    /// Iterate over atom's mass ordered by atom's serial numbers. Dummy atom
221    /// has mass of zero.
222    pub fn masses(&self) -> impl Iterator<Item = f64> + '_ {
223        self.atoms().map(|(_, a)| a.get_mass().unwrap_or_default())
224    }
225
226    /// Iterate over atomic numbers.
227    pub fn atomic_numbers(&self) -> impl Iterator<Item = usize> + '_ {
228        self.atoms().map(move |(_, atom)| atom.number())
229    }
230
231    /// Iterate over atom positions ordered by serial numbers.
232    pub fn positions(&self) -> impl Iterator<Item = Point3> + '_ {
233        self.atoms().map(move |(_, atom)| atom.position())
234    }
235
236    /// A short description of the molecule.
237    ///
238    /// NOTE: for long title, only the first line will be return for safely
239    /// storing in various chemical file formats such as xyz.
240    pub fn title(&self) -> String {
241        let tlines: Vec<_> = self.name.lines().collect();
242        if tlines.is_empty() {
243            "untitled".to_owned()
244        } else {
245            tlines[0].trim().to_owned()
246        }
247    }
248
249    #[cfg(feature = "adhoc")]
250    /// Return a reference to internal Molecule Graph struct.
251    pub fn graph(&self) -> &NxGraph<Atom, Bond> {
252        &self.graph
253    }
254
255    #[cfg(feature = "adhoc")]
256    /// Return mut access to internal Molecule Graph struct.
257    pub fn graph_mut(&mut self) -> &mut NxGraph<Atom, Bond> {
258        &mut self.graph
259    }
260}
261// 8017401a ends here
262
263// [[file:../gchemol-core.note::61192a00][61192a00]]
264/// Edit `Molecule`
265impl Molecule {
266    /// Read access to atom by atom serial number.
267    pub fn get_atom(&self, sn: usize) -> Option<&Atom> {
268        self.get_node_index(sn).map(|&n| &self.graph[n])
269    }
270
271    /// Read access to atom by atom serial number. Panic if no this atom.
272    #[track_caller]
273    pub fn get_atom_unchecked(&self, sn: usize) -> &Atom {
274        assert!(self.has_atom(sn), "invalid atom `sn` {sn}, mol: {:?}", &self);
275        self.get_atom(sn).unwrap()
276    }
277
278    /// Returns true if the molecule contains atom with the given `sn`
279    pub fn has_atom(&self, sn: usize) -> bool {
280        self.mapping.contains_left(&sn)
281    }
282
283    /// Mutable access to atom by atom serial number.
284    pub fn get_atom_mut(&mut self, sn: usize) -> Option<&mut Atom> {
285        // self.get_node_index(sn).map(move |&n| &mut self.graph[n])
286        if let Some(&n) = self.get_node_index(sn) {
287            Some(&mut self.graph[n])
288        } else {
289            None
290        }
291    }
292
293    /// Mutable access to atom by atom serial number. Panic if no this atom.
294    #[track_caller]
295    pub fn get_atom_unchecked_mut(&mut self, sn: usize) -> &mut Atom {
296        assert!(self.has_atom(sn), "invalid atom i: {}, mol: {:?}", sn, &self);
297        self.get_atom_mut(sn).unwrap()
298    }
299
300    /// Read access to bond by a pair of atoms. Return None if there is no bond
301    /// between Atom `sn1` and Atom `sn2`.
302    pub fn get_bond(&self, sn1: usize, sn2: usize) -> Option<&Bond> {
303        if let Some(&n1) = self.get_node_index(sn1) {
304            if let Some(&n2) = self.get_node_index(sn2) {
305                if self.graph.has_edge(n1, n2) {
306                    return Some(&self.graph[(n1, n2)]);
307                }
308            }
309        }
310        None
311    }
312
313    /// Returns true if the molcule contains bond between atom `sn1` and `sn2`
314    pub fn has_bond(&self, sn1: usize, sn2: usize) -> bool {
315        self.get_bond(sn1, sn2).is_some()
316    }
317
318    /// Mutable access to bond by a pair of atoms.
319    pub fn get_bond_mut(&mut self, sn1: usize, sn2: usize) -> Option<&mut Bond> {
320        if let Some(&n1) = self.get_node_index(sn1) {
321            if let Some(&n2) = self.get_node_index(sn2) {
322                if self.graph.has_edge(n1, n2) {
323                    return Some(&mut self.graph[(n1, n2)]);
324                }
325            }
326        }
327        None
328    }
329
330    /// Set atom position.
331    ///
332    /// Panic if atom `sn` does not exist.
333    pub fn set_position<P: Into<Vector3f>>(&mut self, sn: usize, position: P) {
334        let atom = self.get_atom_mut(sn).expect("invalid atom serial number");
335        atom.set_position(position);
336    }
337
338    /// Set atom symbol.
339    ///
340    /// Panic if atom `sn` does not exist.
341    pub fn set_symbol<S: Into<AtomKind>>(&mut self, sn: usize, sym: S) {
342        let atom = self.get_atom_mut(sn).expect("invalid atom serial number");
343        atom.set_symbol(sym);
344    }
345
346    /// Add a list of atoms into molecule.
347    pub fn add_atoms_from<T, P>(&mut self, atoms: T)
348    where
349        T: IntoIterator<Item = (usize, P)>,
350        P: Into<Atom>,
351    {
352        for (n, a) in atoms {
353            self.add_atom(n, a.into());
354        }
355    }
356
357    /// Set molecular title.
358    pub fn set_title<S: AsRef<str>>(&mut self, title: S) {
359        self.name = title.as_ref().to_owned();
360    }
361
362    /// Add a list of bonds into molecule.
363    pub fn add_bonds_from<T>(&mut self, bonds: T)
364    where
365        T: IntoIterator<Item = (usize, usize, Bond)>,
366    {
367        for (u, v, b) in bonds {
368            self.add_bond(u, v, b);
369        }
370    }
371
372    /// Set positions of atoms in sequential order.
373    pub fn set_positions<T, P>(&mut self, positions: T)
374    where
375        T: IntoIterator<Item = P>,
376        P: Into<Vector3f>,
377    {
378        let mut n = 0;
379        for (sn, p) in self.serial_numbers().zip(positions.into_iter()) {
380            let atom = self.get_atom_mut(sn).unwrap();
381            n += 1;
382            atom.set_position(p);
383        }
384        assert_eq!(n, self.natoms(), "invalid number of input positions");
385    }
386
387    /// Update positions of atoms in sequential order, with freezing coordinates
388    /// ignored.
389    pub fn update_positions<T, P>(&mut self, positions: T)
390    where
391        T: IntoIterator<Item = P>,
392        P: Into<Vector3f>,
393    {
394        let mut n = 0;
395        for (sn, p) in self.serial_numbers().zip(positions.into_iter()) {
396            let atom = self.get_atom_mut(sn).unwrap();
397            n += 1;
398            atom.update_position(p);
399        }
400        assert_eq!(n, self.natoms(), "invalid number of input positions");
401    }
402
403    /// Set positions of specified atoms
404    pub fn set_positions_from<T, P>(&mut self, selected_positions: T)
405    where
406        T: IntoIterator<Item = (usize, P)>,
407        P: Into<Vector3f>,
408    {
409        for (i, p) in selected_positions {
410            self.set_position(i, p);
411        }
412    }
413
414    /// Set element symbols
415    pub fn set_symbols<T, S>(&mut self, symbols: T)
416    where
417        T: IntoIterator<Item = S>,
418        S: Into<AtomKind>,
419    {
420        for (sn, sy) in self.serial_numbers().zip(symbols.into_iter()) {
421            let atom = self.get_atom_mut(sn).unwrap();
422            atom.set_symbol(sy);
423        }
424    }
425
426    /// Remove atoms from .. (unimplemented)
427    pub fn remove_atoms_from(&mut self) {
428        unimplemented!()
429    }
430
431    /// Remove bonds from .. (unimplemented)
432    pub fn remove_bonds_from(&mut self) {
433        unimplemented!()
434    }
435}
436// 61192a00 ends here
437
438// [[file:../gchemol-core.note::b07deb3d][b07deb3d]]
439#[test]
440fn test() {
441    let mut mol = Molecule::new("test");
442
443    for i in 0..5 {
444        mol.add_atom(i, Atom::default());
445    }
446    assert_eq!(mol.natoms(), 5);
447
448    mol.add_bond(1, 2, Bond::single());
449    mol.add_bond(2, 3, Bond::double());
450    assert_eq!(mol.nbonds(), 2);
451    mol.add_bond(2, 1, Bond::single());
452    assert_eq!(mol.nbonds(), 2);
453
454    for (i, a) in mol.atoms() {
455        // dbg!((i, a.symbol()));
456    }
457
458    // set title
459    mol.set_title("new mol");
460    mol.set_title(format!("Molecule: {}", 4));
461}
462// b07deb3d ends here