Skip to main content

cosmolkit_core/model/
molecule.rs

1use std::{collections::BTreeMap, sync::Arc};
2
3use crate::{
4    Atom, AtomId, Bond, MoleculeBuilder, error::InvariantError,
5    invariants::enforce_molecule_invariants, sgroup::SubstanceGroup, stereo::StereoGroup,
6};
7
8#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
9pub enum SmilesParseError {
10    #[error("SMILES parser is not implemented")]
11    NotImplemented,
12    #[error(transparent)]
13    UnsupportedFeature(#[from] crate::UnsupportedFeatureError),
14    #[error("{0}")]
15    ParseError(String),
16}
17
18pub use crate::smiles_write::SmilesWriteError;
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum CoordinateDimension {
22    TwoD,
23    ThreeD,
24}
25
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub enum TopologyTrust {
28    Unknown,
29    TrustedGraph,
30    CoordinateOnly,
31}
32
33impl Default for TopologyTrust {
34    fn default() -> Self {
35        Self::Unknown
36    }
37}
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
40pub struct MoleculeCapabilities {
41    topology_trust: TopologyTrust,
42}
43
44impl MoleculeCapabilities {
45    #[must_use]
46    pub const fn new(topology_trust: TopologyTrust) -> Self {
47        Self { topology_trust }
48    }
49
50    #[must_use]
51    pub const fn topology_trust(self) -> TopologyTrust {
52        self.topology_trust
53    }
54
55    #[must_use]
56    pub const fn with_topology_trust(self, topology_trust: TopologyTrust) -> Self {
57        Self { topology_trust }
58    }
59}
60
61#[derive(Debug, Clone, PartialEq)]
62pub struct Conformer2D {
63    id: usize,
64    coords: Vec<[f64; 2]>,
65    props: BTreeMap<String, String>,
66}
67
68impl Conformer2D {
69    #[must_use]
70    pub fn new(id: usize, coords: Vec<[f64; 2]>) -> Self {
71        Self {
72            id,
73            coords,
74            props: BTreeMap::new(),
75        }
76    }
77
78    #[must_use]
79    pub const fn id(&self) -> usize {
80        self.id
81    }
82
83    #[must_use]
84    pub fn coordinates(&self) -> &[[f64; 2]] {
85        &self.coords
86    }
87
88    pub fn coordinates_mut(&mut self) -> &mut [[f64; 2]] {
89        &mut self.coords
90    }
91
92    #[must_use]
93    pub fn props(&self) -> &BTreeMap<String, String> {
94        &self.props
95    }
96
97    #[must_use]
98    pub fn with_prop(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
99        self.props.insert(key.into(), value.into());
100        self
101    }
102
103    #[must_use]
104    pub(crate) fn with_id(mut self, id: usize) -> Self {
105        self.id = id;
106        self
107    }
108
109    #[allow(dead_code)]
110    pub(crate) fn remapped_to_kept_atoms(&self, kept_old_indices: &[usize], id: usize) -> Self {
111        let coords = kept_old_indices
112            .iter()
113            .filter_map(|old_idx| self.coords.get(*old_idx).copied())
114            .collect();
115        Self {
116            id,
117            coords,
118            props: self.props.clone(),
119        }
120    }
121
122    #[allow(dead_code)]
123    pub(crate) fn push_coord(&mut self, coord: [f64; 2]) {
124        self.coords.push(coord);
125    }
126}
127
128#[derive(Debug, Clone, PartialEq, Default)]
129pub struct ConformerStore {
130    pub conformers_2d: Vec<Conformer2D>,
131    pub conformers_3d: Vec<Conformer3D>,
132    pub source_coordinate_dim: Option<CoordinateDimension>,
133}
134
135#[derive(Debug, Clone, PartialEq, Default)]
136pub struct PropertyStore {
137    pub name: Option<String>,
138    pub sdf_data_fields: Vec<(String, String)>,
139    pub sdf_property_lists: Vec<SdfPropertyList>,
140    pub props: std::collections::BTreeMap<String, String>,
141}
142
143#[derive(Debug, Clone, PartialEq)]
144pub struct Conformer3D {
145    id: usize,
146    coords: Vec<[f64; 3]>,
147    is_3d: bool,
148    props: BTreeMap<String, String>,
149}
150
151impl Conformer3D {
152    #[must_use]
153    pub fn new(id: usize, coords: Vec<[f64; 3]>, is_3d: bool) -> Self {
154        Self {
155            id,
156            coords,
157            is_3d,
158            props: BTreeMap::new(),
159        }
160    }
161
162    #[must_use]
163    pub const fn id(&self) -> usize {
164        self.id
165    }
166
167    #[must_use]
168    pub fn coordinates(&self) -> &[[f64; 3]] {
169        &self.coords
170    }
171
172    /// Mutable access to coordinates (pub(crate) for conformer transforms).
173    pub fn coordinates_mut(&mut self) -> &mut [[f64; 3]] {
174        &mut self.coords
175    }
176
177    #[must_use]
178    pub const fn is_3d(&self) -> bool {
179        self.is_3d
180    }
181
182    #[must_use]
183    pub fn props(&self) -> &BTreeMap<String, String> {
184        &self.props
185    }
186
187    #[must_use]
188    pub fn with_prop(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
189        self.props.insert(key.into(), value.into());
190        self
191    }
192
193    #[must_use]
194    pub(crate) fn with_id(mut self, id: usize) -> Self {
195        self.id = id;
196        self
197    }
198
199    #[allow(dead_code)]
200    pub(crate) fn remapped_to_kept_atoms(&self, kept_old_indices: &[usize], id: usize) -> Self {
201        let coords = kept_old_indices
202            .iter()
203            .filter_map(|old_idx| self.coords.get(*old_idx).copied())
204            .collect();
205        Self {
206            id,
207            coords,
208            is_3d: self.is_3d,
209            props: self.props.clone(),
210        }
211    }
212
213    #[allow(dead_code)]
214    pub(crate) fn push_coord(&mut self, coord: [f64; 3]) {
215        self.coords.push(coord);
216    }
217}
218
219#[derive(Debug, Clone, PartialEq, Default)]
220pub struct MoleculeProperties {
221    name: Option<String>,
222    sdf_data_fields: Vec<(String, String)>,
223    sdf_property_lists: Vec<SdfPropertyList>,
224    props: BTreeMap<String, String>,
225}
226
227#[derive(Debug, Clone, Copy, PartialEq, Eq)]
228pub enum SdfPropertyListTarget {
229    Atom,
230    Bond,
231}
232
233#[derive(Debug, Clone, PartialEq, Eq)]
234pub struct SdfPropertyList {
235    target: SdfPropertyListTarget,
236    name: String,
237    values: Vec<Option<String>>,
238}
239
240impl SdfPropertyList {
241    #[must_use]
242    pub fn new(
243        target: SdfPropertyListTarget,
244        name: impl Into<String>,
245        values: Vec<Option<String>>,
246    ) -> Self {
247        Self {
248            target,
249            name: name.into(),
250            values,
251        }
252    }
253
254    #[must_use]
255    pub const fn target(&self) -> SdfPropertyListTarget {
256        self.target
257    }
258
259    #[must_use]
260    pub fn name(&self) -> &str {
261        &self.name
262    }
263
264    #[must_use]
265    pub fn values(&self) -> &[Option<String>] {
266        &self.values
267    }
268}
269
270impl MoleculeProperties {
271    #[must_use]
272    pub fn name(&self) -> Option<&str> {
273        self.name.as_deref()
274    }
275
276    #[must_use]
277    pub fn with_name(mut self, name: impl Into<String>) -> Self {
278        self.name = Some(name.into());
279        self
280    }
281
282    #[must_use]
283    pub fn sdf_data_fields(&self) -> &[(String, String)] {
284        &self.sdf_data_fields
285    }
286
287    #[must_use]
288    pub fn sdf_property_lists(&self) -> &[SdfPropertyList] {
289        &self.sdf_property_lists
290    }
291
292    #[must_use]
293    pub fn props(&self) -> &BTreeMap<String, String> {
294        &self.props
295    }
296
297    #[must_use]
298    pub fn prop(&self, key: &str) -> Option<&str> {
299        self.props.get(key).map(String::as_str)
300    }
301
302    #[must_use]
303    pub fn with_prop(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
304        self.props.insert(key.into(), value.into());
305        self
306    }
307
308    #[must_use]
309    pub fn with_sdf_data_field(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
310        self.sdf_data_fields.push((key.into(), value.into()));
311        self
312    }
313
314    #[must_use]
315    pub fn with_sdf_property_list(mut self, property_list: SdfPropertyList) -> Self {
316        self.sdf_property_lists.push(property_list);
317        self
318    }
319
320    #[allow(dead_code)]
321    pub(crate) fn set_prop(&mut self, key: impl Into<String>, value: impl Into<String>) {
322        self.props.insert(key.into(), value.into());
323    }
324
325    #[allow(dead_code)]
326    pub(crate) fn clear_prop(&mut self, key: &str) {
327        self.props.remove(key);
328    }
329
330    pub(crate) fn remap_topology(&mut self, mapping: &TopologyMapping) {
331        self.sdf_property_lists = self
332            .sdf_property_lists
333            .iter()
334            .map(|property_list| property_list.remapped_topology(mapping))
335            .collect();
336    }
337}
338
339impl SdfPropertyList {
340    fn remapped_topology(&self, mapping: &TopologyMapping) -> Self {
341        let values = match self.target {
342            SdfPropertyListTarget::Atom => mapping
343                .atoms
344                .new_to_old
345                .iter()
346                .map(|old_row| {
347                    old_row.and_then(|row| self.values.get(row.index()).cloned().flatten())
348                })
349                .collect(),
350            SdfPropertyListTarget::Bond => mapping
351                .bonds
352                .new_to_old
353                .iter()
354                .map(|old_row| {
355                    old_row.and_then(|row| self.values.get(row.index()).cloned().flatten())
356                })
357                .collect(),
358        };
359        Self {
360            target: self.target,
361            name: self.name.clone(),
362            values,
363        }
364    }
365}
366
367#[derive(Debug, Clone, PartialEq)]
368pub(crate) struct TopologyBlock {
369    pub(crate) atoms: Vec<Atom>,
370    pub(crate) bonds: Vec<Bond>,
371    pub(crate) adjacency: crate::AdjacencyList,
372    pub(crate) substance_groups: Vec<SubstanceGroup>,
373    pub(crate) stereo_groups: Vec<StereoGroup>,
374}
375
376impl Default for TopologyBlock {
377    fn default() -> Self {
378        Self {
379            atoms: Vec::new(),
380            bonds: Vec::new(),
381            adjacency: crate::AdjacencyList::from_topology(0, &[]),
382            substance_groups: Vec::new(),
383            stereo_groups: Vec::new(),
384        }
385    }
386}
387
388#[derive(Debug, Clone, PartialEq, Default)]
389pub(crate) struct CoordinateBlock {
390    /// Zero or more 2D conformers.
391    ///
392    /// Coordinates are stored in the same atom-index order as `TopologyBlock`.
393    /// Any operation changing atom indices must remap or drop this block through
394    /// a topology report. Do not mutate this block directly from operation code.
395    pub(crate) conformers_2d: Vec<Conformer2D>,
396    pub(crate) conformers_3d: Vec<Conformer3D>,
397    pub(crate) source_coordinate_dim: Option<CoordinateDimension>,
398}
399
400#[derive(Debug, Clone, PartialEq, Default)]
401pub(crate) struct DerivedCacheBlock {
402    pub(crate) rings: Option<crate::RingInfo>,
403    pub(crate) ring_families: Option<crate::RingInfo>,
404    pub(crate) valence: Option<crate::ValenceAssignment>,
405    pub(crate) aromaticity_valid: bool,
406    pub(crate) stereo_valid: bool,
407}
408
409impl DerivedCacheBlock {
410    pub(crate) fn invalidate(&mut self, states: crate::DerivedState) {
411        if states.contains(crate::DerivedState::VALENCE) {
412            self.valence = None;
413        }
414        if states.contains(crate::DerivedState::RINGS) {
415            self.rings = None;
416        }
417        if states.contains(crate::DerivedState::RING_FAMILIES) {
418            self.ring_families = None;
419        }
420        if states.contains(crate::DerivedState::AROMATICITY) {
421            self.aromaticity_valid = false;
422        }
423        if states.contains(crate::DerivedState::STEREO) {
424            self.stereo_valid = false;
425        }
426    }
427}
428
429#[derive(Debug, Clone, PartialEq, Eq)]
430pub struct AtomMapping {
431    old_to_new: Vec<Option<AtomId>>,
432    new_to_old: Vec<Option<AtomId>>,
433}
434
435impl AtomMapping {
436    #[must_use]
437    pub fn old_to_new(&self) -> &[Option<AtomId>] {
438        &self.old_to_new
439    }
440
441    #[must_use]
442    pub fn new_to_old(&self) -> &[Option<AtomId>] {
443        &self.new_to_old
444    }
445}
446
447#[derive(Debug, Clone, PartialEq, Eq)]
448pub struct BondMapping {
449    old_to_new: Vec<Option<crate::BondId>>,
450    new_to_old: Vec<Option<crate::BondId>>,
451}
452
453impl BondMapping {
454    #[must_use]
455    pub fn old_to_new(&self) -> &[Option<crate::BondId>] {
456        &self.old_to_new
457    }
458
459    #[must_use]
460    pub fn new_to_old(&self) -> &[Option<crate::BondId>] {
461        &self.new_to_old
462    }
463}
464
465#[derive(Debug, Clone, PartialEq, Eq)]
466pub struct TopologyMapping {
467    atoms: AtomMapping,
468    bonds: BondMapping,
469}
470
471impl TopologyMapping {
472    pub(crate) fn identity(atom_count: usize, bond_count: usize) -> Self {
473        Self {
474            atoms: AtomMapping {
475                old_to_new: (0..atom_count).map(|idx| Some(AtomId::new(idx))).collect(),
476                new_to_old: (0..atom_count).map(|idx| Some(AtomId::new(idx))).collect(),
477            },
478            bonds: BondMapping {
479                old_to_new: (0..bond_count)
480                    .map(|idx| Some(crate::BondId::new(idx)))
481                    .collect(),
482                new_to_old: (0..bond_count)
483                    .map(|idx| Some(crate::BondId::new(idx)))
484                    .collect(),
485            },
486        }
487    }
488
489    pub(crate) fn with_appended(
490        old_atom_count: usize,
491        old_bond_count: usize,
492        added_atom_count: usize,
493        added_bond_count: usize,
494    ) -> Self {
495        let mut atom_new_to_old = (0..old_atom_count)
496            .map(|idx| Some(AtomId::new(idx)))
497            .collect::<Vec<_>>();
498        atom_new_to_old.extend((0..added_atom_count).map(|_| None));
499
500        let mut bond_new_to_old = (0..old_bond_count)
501            .map(|idx| Some(crate::BondId::new(idx)))
502            .collect::<Vec<_>>();
503        bond_new_to_old.extend((0..added_bond_count).map(|_| None));
504
505        Self {
506            atoms: AtomMapping {
507                old_to_new: (0..old_atom_count)
508                    .map(|idx| Some(AtomId::new(idx)))
509                    .collect(),
510                new_to_old: atom_new_to_old,
511            },
512            bonds: BondMapping {
513                old_to_new: (0..old_bond_count)
514                    .map(|idx| Some(crate::BondId::new(idx)))
515                    .collect(),
516                new_to_old: bond_new_to_old,
517            },
518        }
519    }
520
521    #[must_use]
522    pub fn atoms(&self) -> &AtomMapping {
523        &self.atoms
524    }
525
526    #[must_use]
527    pub fn bonds(&self) -> &BondMapping {
528        &self.bonds
529    }
530}
531
532impl TopologyBlock {
533    #[allow(dead_code)]
534    pub(crate) fn remove_atoms_with_mapping(
535        &mut self,
536        atoms_to_remove: &[AtomId],
537    ) -> TopologyMapping {
538        let mut remove_atom = vec![false; self.atoms.len()];
539        for atom in atoms_to_remove {
540            if let Some(slot) = remove_atom.get_mut(atom.index()) {
541                *slot = true;
542            }
543        }
544
545        let mut atom_old_to_new = vec![None; self.atoms.len()];
546        let mut atom_new_to_old = Vec::new();
547        let mut atoms = Vec::with_capacity(self.atoms.len().saturating_sub(atoms_to_remove.len()));
548        for atom in &self.atoms {
549            if remove_atom[atom.id().index()] {
550                continue;
551            }
552            let new_id = AtomId::new(atoms.len());
553            atom_old_to_new[atom.id().index()] = Some(new_id);
554            atom_new_to_old.push(Some(atom.id()));
555            atoms.push(atom.clone().with_id(new_id));
556        }
557
558        let mut bond_old_to_new = vec![None; self.bonds.len()];
559        let mut bond_new_to_old = Vec::new();
560        let mut bonds = Vec::new();
561        for bond in &self.bonds {
562            let Some(begin) = atom_old_to_new
563                .get(bond.begin().index())
564                .and_then(|mapped| *mapped)
565            else {
566                continue;
567            };
568            let Some(end) = atom_old_to_new
569                .get(bond.end().index())
570                .and_then(|mapped| *mapped)
571            else {
572                continue;
573            };
574
575            let stereo_atoms = bond.stereo_atoms().and_then(|[left, right]| {
576                Some([
577                    atom_old_to_new
578                        .get(left.index())
579                        .and_then(|mapped| *mapped)?,
580                    atom_old_to_new
581                        .get(right.index())
582                        .and_then(|mapped| *mapped)?,
583                ])
584            });
585
586            let new_id = crate::BondId::new(bonds.len());
587            bond_old_to_new[bond.id().index()] = Some(new_id);
588            bond_new_to_old.push(Some(bond.id()));
589            bonds.push(bond.clone().remapped(new_id, begin, end, stereo_atoms));
590        }
591
592        let mut sgroup_survives = self
593            .substance_groups
594            .iter()
595            .map(|sgroup| sgroup.can_remap_without_parent(&atom_old_to_new, &bond_old_to_new))
596            .collect::<Vec<_>>();
597        loop {
598            let mut changed = false;
599            for (idx, sgroup) in self.substance_groups.iter().enumerate() {
600                if !sgroup_survives[idx] {
601                    continue;
602                }
603                if let Some(parent) = sgroup.parent()
604                    && !sgroup_survives
605                        .get(parent.index())
606                        .copied()
607                        .unwrap_or(false)
608                {
609                    sgroup_survives[idx] = false;
610                    changed = true;
611                }
612            }
613            if !changed {
614                break;
615            }
616        }
617
618        let mut sgroup_map = vec![None; self.substance_groups.len()];
619        let mut next_sgroup_index = 0usize;
620        for (old_index, survives) in sgroup_survives.iter().copied().enumerate() {
621            if survives {
622                sgroup_map[old_index] = Some(crate::SubstanceGroupId::new(next_sgroup_index));
623                next_sgroup_index += 1;
624            }
625        }
626        self.substance_groups = self
627            .substance_groups
628            .iter()
629            .enumerate()
630            .filter_map(|(old_index, sgroup)| {
631                sgroup_map[old_index].and_then(|new_id| {
632                    sgroup.remapped(new_id, &atom_old_to_new, &bond_old_to_new, &sgroup_map)
633                })
634            })
635            .collect();
636
637        self.stereo_groups = self
638            .stereo_groups
639            .iter()
640            .filter_map(|group| group.remapped(&atom_old_to_new, &bond_old_to_new))
641            .collect();
642
643        self.atoms = atoms;
644        self.bonds = bonds;
645        self.adjacency = crate::AdjacencyList::from_topology(self.atoms.len(), &self.bonds);
646
647        TopologyMapping {
648            atoms: AtomMapping {
649                old_to_new: atom_old_to_new,
650                new_to_old: atom_new_to_old,
651            },
652            bonds: BondMapping {
653                old_to_new: bond_old_to_new,
654                new_to_old: bond_new_to_old,
655            },
656        }
657    }
658}
659
660impl CoordinateBlock {
661    #[allow(dead_code)]
662    pub(crate) fn remap_topology(&mut self, mapping: &TopologyMapping) {
663        let kept_old_indices: Vec<_> = mapping
664            .atoms
665            .new_to_old
666            .iter()
667            .filter_map(|old_atom| old_atom.map(AtomId::index))
668            .collect();
669
670        self.conformers_2d = self
671            .conformers_2d
672            .iter()
673            .enumerate()
674            .map(|(id, conformer)| conformer.remapped_to_kept_atoms(&kept_old_indices, id))
675            .collect();
676
677        self.conformers_3d = self
678            .conformers_3d
679            .iter()
680            .enumerate()
681            .map(|(id, conformer)| conformer.remapped_to_kept_atoms(&kept_old_indices, id))
682            .collect();
683    }
684}
685
686/// Immutable molecule value.
687///
688/// The only public way to create a molecule is `MoleculeBuilder`.
689///
690/// Existing molecules are transformed through registered operations. This type
691/// intentionally does not expose mutable storage accessors.
692///
693/// # Examples
694///
695/// Build a molecule from SMILES and create a transformed value:
696///
697/// ```
698/// use cosmolkit_core::Molecule;
699///
700/// let mol = Molecule::from_smiles("CCO").unwrap();
701/// let named = mol.with_name("ethanol");
702///
703/// assert_eq!(mol.properties().name(), None);
704/// assert_eq!(named.properties().name(), Some("ethanol"));
705/// ```
706///
707/// Use explicit in-place operations when mutation is intended:
708///
709/// ```
710/// use cosmolkit_core::Molecule;
711///
712/// let mut mol = Molecule::from_smiles("c1ccccc1").unwrap();
713/// mol.kekulize_(true).unwrap();
714/// mol.sanitize_().unwrap();
715///
716/// assert_eq!(mol.num_atoms(), 6);
717/// ```
718#[derive(Debug, Clone, PartialEq, Default)]
719pub struct Molecule {
720    topology: Arc<TopologyBlock>,
721    coordinates: Arc<CoordinateBlock>,
722    properties: Arc<MoleculeProperties>,
723    derived_cache: Arc<DerivedCacheBlock>,
724    capabilities: Arc<MoleculeCapabilities>,
725}
726
727impl Molecule {
728    #[must_use]
729    pub fn new() -> Self {
730        Self::default()
731    }
732
733    #[must_use]
734    pub fn builder() -> MoleculeBuilder {
735        MoleculeBuilder::new()
736    }
737
738    pub(crate) fn from_blocks(
739        mut topology: TopologyBlock,
740        coordinates: CoordinateBlock,
741        properties: MoleculeProperties,
742    ) -> Result<Self, InvariantError> {
743        topology.adjacency =
744            crate::AdjacencyList::from_topology(topology.atoms.len(), &topology.bonds);
745        let molecule = Self {
746            topology: Arc::new(topology),
747            coordinates: Arc::new(coordinates),
748            properties: Arc::new(properties),
749            derived_cache: Arc::new(DerivedCacheBlock::default()),
750            capabilities: Arc::new(MoleculeCapabilities::default()),
751        };
752        enforce_molecule_invariants(&molecule)?;
753        Ok(molecule)
754    }
755
756    pub(crate) fn from_blocks_with_capabilities(
757        mut topology: TopologyBlock,
758        coordinates: CoordinateBlock,
759        properties: MoleculeProperties,
760        capabilities: MoleculeCapabilities,
761    ) -> Result<Self, InvariantError> {
762        topology.adjacency =
763            crate::AdjacencyList::from_topology(topology.atoms.len(), &topology.bonds);
764        let molecule = Self {
765            topology: Arc::new(topology),
766            coordinates: Arc::new(coordinates),
767            properties: Arc::new(properties),
768            derived_cache: Arc::new(DerivedCacheBlock::default()),
769            capabilities: Arc::new(capabilities),
770        };
771        enforce_molecule_invariants(&molecule)?;
772        Ok(molecule)
773    }
774
775    pub(crate) fn from_operation_blocks(
776        mut topology: TopologyBlock,
777        coordinates: CoordinateBlock,
778        properties: MoleculeProperties,
779        derived_cache: DerivedCacheBlock,
780        capabilities: MoleculeCapabilities,
781    ) -> Result<Self, InvariantError> {
782        topology.adjacency =
783            crate::AdjacencyList::from_topology(topology.atoms.len(), &topology.bonds);
784        let molecule = Self {
785            topology: Arc::new(topology),
786            coordinates: Arc::new(coordinates),
787            properties: Arc::new(properties),
788            derived_cache: Arc::new(derived_cache),
789            capabilities: Arc::new(capabilities),
790        };
791        enforce_molecule_invariants(&molecule)?;
792        Ok(molecule)
793    }
794
795    #[must_use]
796    pub fn atoms(&self) -> &[Atom] {
797        &self.topology.atoms
798    }
799
800    #[must_use]
801    pub fn bonds(&self) -> &[Bond] {
802        &self.topology.bonds
803    }
804
805    #[must_use]
806    pub fn atom(&self, id: AtomId) -> Option<&Atom> {
807        self.atoms().get(id.index())
808    }
809
810    #[must_use]
811    pub fn atomic_numbers(&self) -> Vec<u8> {
812        self.atoms().iter().map(Atom::atomic_number).collect()
813    }
814
815    #[must_use]
816    pub fn num_atoms(&self) -> usize {
817        self.atoms().len()
818    }
819
820    #[must_use]
821    pub fn num_bonds(&self) -> usize {
822        self.bonds().len()
823    }
824
825    #[must_use]
826    pub fn coordinates_2d(&self) -> Option<&[[f64; 2]]> {
827        self.coordinates
828            .conformers_2d
829            .first()
830            .map(Conformer2D::coordinates)
831    }
832
833    #[must_use]
834    pub fn conformers_2d(&self) -> &[Conformer2D] {
835        &self.coordinates.conformers_2d
836    }
837
838    #[must_use]
839    pub fn conformers_3d(&self) -> &[Conformer3D] {
840        &self.coordinates.conformers_3d
841    }
842
843    #[must_use]
844    pub fn source_coordinate_dim(&self) -> Option<CoordinateDimension> {
845        self.coordinates.source_coordinate_dim
846    }
847
848    #[must_use]
849    pub fn capabilities(&self) -> MoleculeCapabilities {
850        *self.capabilities
851    }
852
853    #[must_use]
854    pub fn topology_trust(&self) -> TopologyTrust {
855        self.capabilities.topology_trust()
856    }
857
858    #[must_use]
859    pub fn substance_groups(&self) -> &[SubstanceGroup] {
860        &self.topology.substance_groups
861    }
862
863    #[must_use]
864    pub fn stereo_groups(&self) -> &[StereoGroup] {
865        &self.topology.stereo_groups
866    }
867
868    #[must_use]
869    pub fn properties(&self) -> &MoleculeProperties {
870        &self.properties
871    }
872
873    #[must_use]
874    pub fn prop(&self, key: &str) -> Option<&str> {
875        self.properties.prop(key)
876    }
877
878    #[must_use]
879    pub fn with_name(&self, name: impl Into<String>) -> Self {
880        let mut properties = (*self.properties).clone();
881        properties = properties.with_name(name);
882        Self {
883            topology: Arc::clone(&self.topology),
884            coordinates: Arc::clone(&self.coordinates),
885            properties: Arc::new(properties),
886            derived_cache: Arc::clone(&self.derived_cache),
887            capabilities: Arc::clone(&self.capabilities),
888        }
889    }
890
891    #[must_use]
892    pub fn with_prop(&self, key: impl Into<String>, value: impl Into<String>) -> Self {
893        let mut properties = (*self.properties).clone();
894        properties = properties.with_prop(key, value);
895        Self {
896            topology: Arc::clone(&self.topology),
897            coordinates: Arc::clone(&self.coordinates),
898            properties: Arc::new(properties),
899            derived_cache: Arc::clone(&self.derived_cache),
900            capabilities: Arc::clone(&self.capabilities),
901        }
902    }
903
904    #[must_use]
905    pub fn with_sdf_data_field(&self, key: impl Into<String>, value: impl Into<String>) -> Self {
906        let mut properties = (*self.properties).clone();
907        properties = properties.with_sdf_data_field(key, value);
908        Self {
909            topology: Arc::clone(&self.topology),
910            coordinates: Arc::clone(&self.coordinates),
911            properties: Arc::new(properties),
912            derived_cache: Arc::clone(&self.derived_cache),
913            capabilities: Arc::clone(&self.capabilities),
914        }
915    }
916
917    pub fn from_smiles(smiles: &str) -> Result<Self, SmilesParseError> {
918        crate::smiles::mol_from_smiles(smiles, &crate::smiles::SmilesParseParams::default())
919    }
920
921    pub fn from_smiles_with_sanitize(
922        smiles: &str,
923        sanitize: bool,
924    ) -> Result<Self, SmilesParseError> {
925        let params = crate::smiles::SmilesParseParams::with_sanitize(sanitize);
926        crate::smiles::mol_from_smiles(smiles, &params)
927    }
928
929    pub fn from_mol_block(_block: &str) -> Result<Self, crate::io::sdf::SdfReadError> {
930        Ok(crate::io::sdf::read_sdf_from_str_with_params(
931            _block,
932            crate::io::sdf::SdfReadParams::default(),
933        )?
934        .molecule)
935    }
936
937    pub fn from_mol_block_with_params(
938        _block: &str,
939        params: crate::io::sdf::SdfReadParams,
940    ) -> Result<Self, crate::io::sdf::SdfReadError> {
941        Ok(crate::io::sdf::read_sdf_from_str_with_params(_block, params)?.molecule)
942    }
943
944    pub fn from_mol_file(
945        _path: impl AsRef<std::path::Path>,
946    ) -> Result<Self, crate::io::sdf::SdfReadError> {
947        Ok(crate::io::molfile::read_mol_file(_path)?.molecule)
948    }
949
950    pub fn from_mol_file_with_params(
951        _path: impl AsRef<std::path::Path>,
952        params: crate::io::sdf::SdfReadParams,
953    ) -> Result<Self, crate::io::sdf::SdfReadError> {
954        Ok(crate::io::molfile::read_mol_file_with_params(_path, params)?.molecule)
955    }
956
957    pub fn from_xyz_block(block: &str) -> Result<Self, crate::io::xyz::XyzReadError> {
958        crate::io::xyz::read_xyz_from_str(block)
959    }
960
961    pub fn to_smiles(&self, isomeric_smiles: bool) -> Result<String, SmilesWriteError> {
962        let params = crate::SmilesWriteParams {
963            do_isomeric_smiles: isomeric_smiles,
964            ..Default::default()
965        };
966        self.to_smiles_with_params(&params)
967    }
968
969    pub fn to_smiles_with_params(
970        &self,
971        params: &crate::SmilesWriteParams,
972    ) -> Result<String, SmilesWriteError> {
973        crate::smiles_write::mol_to_smiles(self, params)
974    }
975
976    pub fn dg_bounds_matrix(&self) -> Result<Vec<Vec<f64>>, crate::DgBoundsError> {
977        crate::distgeom::dg_bounds_matrix(self)
978    }
979
980    pub fn avalon_fingerprint(
981        &self,
982        params: &crate::avalon_fingerprint::AvalonFingerprintParams,
983    ) -> Result<crate::Fingerprint, crate::FingerprintError> {
984        crate::avalon_fingerprint::avalon_fingerprint(self, params)
985    }
986
987    pub fn morgan_fingerprint(
988        &self,
989        params: &crate::MorganFingerprintParams,
990    ) -> Result<crate::Fingerprint, crate::FingerprintError> {
991        crate::fingerprint::morgan_fingerprint(self, params)
992    }
993
994    pub fn morgan_fingerprint_with_output(
995        &self,
996        params: &crate::MorganFingerprintParams,
997    ) -> Result<crate::MorganFingerprintOutput, crate::FingerprintError> {
998        crate::fingerprint::morgan_fingerprint_with_output(self, params)
999    }
1000
1001    pub fn topological_fingerprint(
1002        &self,
1003        params: &crate::fingerprint::TopologicalFingerprintParams,
1004    ) -> crate::Fingerprint {
1005        crate::fingerprint::topological_fingerprint(self, params)
1006    }
1007
1008    pub fn maccs_fingerprint(
1009        &self,
1010        params: &crate::fingerprint::MaccsFingerprintParams,
1011    ) -> crate::Fingerprint {
1012        crate::fingerprint::maccs_fingerprint(self, params)
1013    }
1014
1015    pub fn hash(&self) -> Result<u64, crate::mol_hash::HashError> {
1016        crate::mol_hash::mol_hash(self)
1017    }
1018
1019    pub fn hash_with_ranks(&self, ranks: &[u32]) -> Result<u64, crate::mol_hash::HashError> {
1020        crate::mol_hash::mol_hash_with_ranks(self, ranks)
1021    }
1022
1023    pub fn fragments(&self) -> Result<Vec<Molecule>, crate::fragment::FragmentError> {
1024        crate::fragment::get_mol_frags(self)
1025    }
1026
1027    pub fn largest_fragment(&self) -> Result<Molecule, crate::fragment::FragmentError> {
1028        crate::fragment::get_largest_fragment(self)
1029    }
1030
1031    pub fn murcko_scaffold(&self) -> Result<Molecule, crate::mol_hash::HashError> {
1032        crate::mol_hash::mol_murcko_scaffold(self)
1033    }
1034
1035    pub fn net_scaffold(&self) -> Result<Molecule, crate::mol_hash::HashError> {
1036        crate::mol_hash::mol_net_scaffold(self)
1037    }
1038
1039    pub fn to_svg(&self, width: u32, height: u32) -> Result<String, crate::SvgDrawError> {
1040        crate::draw::mol_to_svg(self, width, height)
1041    }
1042
1043    pub fn to_png(&self, width: u32, height: u32) -> Result<Vec<u8>, crate::SvgDrawError> {
1044        crate::draw::mol_to_png(self, width, height)
1045    }
1046
1047    pub fn to_pdb_block(&self, conf_id: i32, flavor: u32) -> String {
1048        crate::pdb_writer::mol_to_pdb_block(self, conf_id, flavor)
1049    }
1050
1051    pub(crate) fn prepared_for_drawing_parity(
1052        &self,
1053    ) -> Result<crate::draw::PreparedDrawMolecule, crate::SvgDrawError> {
1054        crate::draw::prepare_mol_for_drawing_parity(self)
1055    }
1056
1057    pub fn tetrahedral_stereo(&self) -> Result<Vec<crate::TetrahedralStereo>, crate::StereoError> {
1058        crate::stereo::tetrahedral_stereo(self)
1059    }
1060
1061    pub fn perceive_stereochemistry(&self) -> Result<(), crate::StereoError> {
1062        crate::stereo::perceive_stereochemistry(self)
1063    }
1064
1065    #[allow(dead_code)]
1066    pub(crate) fn topology_block(&self) -> &TopologyBlock {
1067        &self.topology
1068    }
1069
1070    // Operation-body COW accessors are reached through OpParts. Some accessors
1071    // are intentionally ahead of the first real operation body that uses them.
1072    #[allow(dead_code)]
1073    pub(crate) fn topology_block_mut(&mut self) -> &mut TopologyBlock {
1074        Arc::make_mut(&mut self.topology)
1075    }
1076
1077    pub(crate) fn replace_topology_block(&mut self, topology: TopologyBlock) {
1078        self.topology = Arc::new(topology);
1079    }
1080
1081    pub(crate) fn take_topology_block_or_clone(&mut self) -> TopologyBlock {
1082        let block = std::mem::replace(&mut self.topology, Arc::new(TopologyBlock::default()));
1083        match Arc::try_unwrap(block) {
1084            Ok(block) => block,
1085            Err(block) => (*block).clone(),
1086        }
1087    }
1088
1089    #[allow(dead_code)]
1090    pub(crate) fn coordinate_block(&self) -> &CoordinateBlock {
1091        &self.coordinates
1092    }
1093
1094    #[allow(dead_code)]
1095    pub(crate) fn coordinate_block_mut(&mut self) -> &mut CoordinateBlock {
1096        Arc::make_mut(&mut self.coordinates)
1097    }
1098
1099    pub(crate) fn replace_coordinate_block(&mut self, coordinates: CoordinateBlock) {
1100        self.coordinates = Arc::new(coordinates);
1101    }
1102
1103    pub(crate) fn take_coordinate_block_or_clone(&mut self) -> CoordinateBlock {
1104        let block = std::mem::replace(&mut self.coordinates, Arc::new(CoordinateBlock::default()));
1105        match Arc::try_unwrap(block) {
1106            Ok(block) => block,
1107            Err(block) => (*block).clone(),
1108        }
1109    }
1110
1111    #[allow(dead_code)]
1112    pub(crate) fn derived_cache(&self) -> &DerivedCacheBlock {
1113        &self.derived_cache
1114    }
1115
1116    #[allow(dead_code)]
1117    pub(crate) fn derived_cache_mut(&mut self) -> &mut DerivedCacheBlock {
1118        Arc::make_mut(&mut self.derived_cache)
1119    }
1120
1121    #[allow(dead_code)]
1122    pub(crate) fn properties_mut(&mut self) -> &mut MoleculeProperties {
1123        Arc::make_mut(&mut self.properties)
1124    }
1125
1126    pub(crate) fn replace_properties(&mut self, properties: MoleculeProperties) {
1127        self.properties = Arc::new(properties);
1128    }
1129
1130    pub(crate) fn take_properties_or_clone(&mut self) -> MoleculeProperties {
1131        let block = std::mem::replace(
1132            &mut self.properties,
1133            Arc::new(MoleculeProperties::default()),
1134        );
1135        match Arc::try_unwrap(block) {
1136            Ok(block) => block,
1137            Err(block) => (*block).clone(),
1138        }
1139    }
1140
1141    pub(crate) fn capabilities_block(&self) -> MoleculeCapabilities {
1142        *self.capabilities
1143    }
1144}
1145
1146#[cfg(test)]
1147mod tests {
1148    use super::Molecule;
1149    use crate::avalon_fingerprint::{AvalonFingerprintParams, avalon_fingerprint};
1150    use crate::fingerprint::{MaccsFingerprintParams, TopologicalFingerprintParams};
1151    use crate::{
1152        AtomSpec, BondOrder, BondSpec, Conformer3D, Element, MoleculeBuilder,
1153        assign_stereochemistry, fragment, mol_hash, pdb_writer, perceive_stereochemistry,
1154    };
1155
1156    #[test]
1157    fn molecule_fragment_helpers_match_module_functions() {
1158        let mol = Molecule::from_smiles("CC.C").expect("failed to parse fragments test molecule");
1159
1160        let via_method = mol.fragments().expect("method fragments");
1161        let via_module = fragment::get_mol_frags(&mol).expect("module fragments");
1162
1163        assert_eq!(via_method, via_module);
1164    }
1165
1166    #[test]
1167    fn molecule_fragments_do_not_materialize_absent_isotope_or_atom_map_as_zero() {
1168        let mol = Molecule::from_smiles("CC.O").expect("failed to parse fragments test molecule");
1169        let smiles: Vec<_> = mol
1170            .fragments()
1171            .expect("method fragments")
1172            .into_iter()
1173            .map(|fragment| fragment.to_smiles(true).expect("fragment smiles"))
1174            .collect();
1175
1176        assert_eq!(smiles, vec!["CC", "O"]);
1177    }
1178
1179    #[test]
1180    fn molecule_tetrahedral_stereo_reports_smiles_chiral_center() {
1181        let mol = Molecule::from_smiles("F[C@H](Cl)Br").expect("failed to parse chiral molecule");
1182        let stereo = mol.tetrahedral_stereo().expect("tetrahedral stereo");
1183
1184        assert_eq!(stereo.len(), 1);
1185        assert_eq!(stereo[0].center.index(), 1);
1186    }
1187
1188    #[test]
1189    fn molecule_largest_fragment_helper_matches_module_function() {
1190        let mol = Molecule::from_smiles("CC.C").expect("failed to parse fragments test molecule");
1191
1192        let via_method = mol.largest_fragment().expect("method largest fragment");
1193        let via_module = fragment::get_largest_fragment(&mol).expect("module largest fragment");
1194
1195        assert_eq!(via_method, via_module);
1196    }
1197
1198    #[test]
1199    fn molecule_scaffold_helpers_match_module_functions() {
1200        let mol = Molecule::from_smiles("Cc1ccccc1").expect("failed to parse scaffold molecule");
1201
1202        let murcko_method = mol.murcko_scaffold().expect("method murcko scaffold");
1203        let murcko_module =
1204            crate::mol_hash::mol_murcko_scaffold(&mol).expect("module murcko scaffold");
1205        let net_method = mol.net_scaffold().expect("method net scaffold");
1206        let net_module = crate::mol_hash::mol_net_scaffold(&mol).expect("module net scaffold");
1207
1208        assert_eq!(murcko_method, murcko_module);
1209        assert_eq!(net_method, net_module);
1210    }
1211
1212    #[test]
1213    fn molecule_fingerprint_helpers_match_module_functions() {
1214        let mol = Molecule::from_smiles("CCO").expect("failed to parse fingerprint molecule");
1215
1216        let avalon_params = AvalonFingerprintParams::default();
1217        let topological_params = TopologicalFingerprintParams::default();
1218        let maccs_params = MaccsFingerprintParams::default();
1219
1220        assert_eq!(
1221            mol.avalon_fingerprint(&avalon_params)
1222                .expect("method avalon fingerprint"),
1223            avalon_fingerprint(&mol, &avalon_params).expect("module avalon fingerprint")
1224        );
1225        assert_eq!(
1226            mol.topological_fingerprint(&topological_params),
1227            crate::fingerprint::topological_fingerprint(&mol, &topological_params)
1228        );
1229        assert_eq!(
1230            mol.maccs_fingerprint(&maccs_params),
1231            crate::fingerprint::maccs_fingerprint(&mol, &maccs_params)
1232        );
1233    }
1234
1235    #[test]
1236    fn molecule_hash_helpers_match_module_functions() {
1237        let mol = Molecule::from_smiles("Cl[C@H](Br)I").expect("failed to parse hash molecule");
1238        let ranks = crate::stereo::assign_atom_cip_ranks(&mol).expect("cip ranks");
1239
1240        assert_eq!(
1241            mol.hash().expect("method hash"),
1242            mol_hash::mol_hash(&mol).expect("module hash")
1243        );
1244        assert_eq!(
1245            mol.hash_with_ranks(&ranks).expect("method hash with ranks"),
1246            mol_hash::mol_hash_with_ranks(&mol, &ranks).expect("module hash with ranks")
1247        );
1248    }
1249
1250    #[test]
1251    fn molecule_pdb_helper_matches_module_function() {
1252        let mol = Molecule::from_smiles("CO").expect("failed to parse pdb molecule");
1253
1254        assert_eq!(
1255            mol.to_pdb_block(-1, 0),
1256            pdb_writer::mol_to_pdb_block(&mol, -1, 0)
1257        );
1258    }
1259
1260    #[test]
1261    fn molecule_perceive_stereochemistry_matches_module_function() {
1262        let mol = Molecule::from_smiles("Cl[C@H](Br)I").expect("failed to parse stereo molecule");
1263
1264        assert_eq!(
1265            mol.perceive_stereochemistry(),
1266            perceive_stereochemistry(&mol)
1267        );
1268        #[allow(deprecated)]
1269        {
1270            assert_eq!(assign_stereochemistry(&mol), perceive_stereochemistry(&mol));
1271        }
1272    }
1273
1274    #[test]
1275    fn molecule_pdb_helper_uses_conformer_index_selection() {
1276        let mut builder = MoleculeBuilder::new();
1277        builder.add_atom(AtomSpec::new(Element::O));
1278        builder
1279            .add_conformer(Conformer3D::new(7, vec![[1.0, 2.0, 3.0]], true))
1280            .expect("failed to add conformer");
1281        let mol = builder.build().expect("failed to build conformer molecule");
1282
1283        let block = mol.to_pdb_block(0, 0);
1284        assert_eq!(
1285            block,
1286            "HETATM    1  O1  UNL     1      +1.000  +2.000  +3.000  1.00  0.00           O  \nEND\n"
1287        );
1288    }
1289
1290    #[test]
1291    fn molecule_perceive_stereochemistry_is_read_only() {
1292        let mol = Molecule::from_smiles("F/C=C/F").expect("failed to parse stereo molecule");
1293        let before = mol.to_smiles(true).expect("smiles before");
1294        let _ = mol
1295            .perceive_stereochemistry()
1296            .expect("perceive stereochemistry");
1297        let after = mol.to_smiles(true).expect("smiles after");
1298        assert_eq!(before, after);
1299    }
1300
1301    #[test]
1302    fn molecule_topological_fingerprint_helper_matches_module_function_with_custom_params() {
1303        let mut builder = MoleculeBuilder::new();
1304        let c1 = builder.add_atom(AtomSpec::new(Element::C));
1305        let c2 = builder.add_atom(AtomSpec::new(Element::C));
1306        let o = builder.add_atom(AtomSpec::new(Element::O));
1307        builder
1308            .add_bond(BondSpec::new(c1, c2, BondOrder::Single))
1309            .expect("failed to add c-c bond");
1310        builder
1311            .add_bond(BondSpec::new(c2, o, BondOrder::Single))
1312            .expect("failed to add c-o bond");
1313        let mol = builder
1314            .build()
1315            .expect("failed to build fingerprint molecule");
1316        let params = TopologicalFingerprintParams {
1317            min_path: 1,
1318            max_path: 2,
1319            n_bits: 512,
1320            n_bits_per_hash: 3,
1321            use_bond_types: false,
1322            from_atoms: Some(vec![0]),
1323            ignore_atoms: Some(vec![2]),
1324        };
1325        assert_eq!(
1326            mol.topological_fingerprint(&params),
1327            crate::fingerprint::topological_fingerprint(&mol, &params)
1328        );
1329    }
1330}