1use std::fmt;
17
18use cosmolkit_macros::{mol_op_body, molecule_ops};
19
20use crate::{
21 Atom, AtomId, Bond, DerivedState, InvariantError, Molecule, MoleculeProperties, SupportStatus,
22 TopologyTrust,
23 invariants::enforce_molecule_invariants,
24 molecule::{CoordinateBlock, DerivedCacheBlock, TopologyBlock, TopologyMapping},
25};
26
27pub(crate) use crate::read_parts::MoleculeReadParts;
28
29trait MoleculeReadAccess<'a>: Copy {
30 fn atoms(self) -> &'a [Atom];
31 fn bonds(self) -> &'a [Bond];
32 fn atom(self, atom: AtomId) -> Option<&'a Atom>;
33 fn num_atoms(self) -> usize;
34 fn derived_cache(self) -> &'a DerivedCacheBlock;
35 fn assign_valence_with_options(
36 self,
37 model: crate::ValenceModel,
38 strict: bool,
39 ) -> Result<crate::ValenceAssignment, crate::ValenceError>;
40 fn rank_mol_atoms(self) -> Result<Vec<usize>, crate::KekulizeError>;
41}
42
43impl<'a> MoleculeReadAccess<'a> for MoleculeReadParts<'a> {
44 fn atoms(self) -> &'a [Atom] {
45 MoleculeReadParts::atoms(self)
46 }
47
48 fn bonds(self) -> &'a [Bond] {
49 MoleculeReadParts::bonds(self)
50 }
51
52 fn atom(self, atom: AtomId) -> Option<&'a Atom> {
53 MoleculeReadParts::atom(self, atom)
54 }
55
56 fn num_atoms(self) -> usize {
57 MoleculeReadParts::num_atoms(self)
58 }
59
60 fn derived_cache(self) -> &'a DerivedCacheBlock {
61 MoleculeReadParts::derived_cache(self)
62 }
63
64 fn assign_valence_with_options(
65 self,
66 model: crate::ValenceModel,
67 strict: bool,
68 ) -> Result<crate::ValenceAssignment, crate::ValenceError> {
69 MoleculeReadParts::assign_valence_with_options(self, model, strict)
70 }
71
72 fn rank_mol_atoms(self) -> Result<Vec<usize>, crate::KekulizeError> {
73 MoleculeReadParts::rank_mol_atoms(self)
74 }
75}
76
77impl<'a> MoleculeReadAccess<'a> for &'a Molecule {
78 fn atoms(self) -> &'a [Atom] {
79 self.atoms()
80 }
81
82 fn bonds(self) -> &'a [Bond] {
83 self.bonds()
84 }
85
86 fn atom(self, atom: AtomId) -> Option<&'a Atom> {
87 self.atom(atom)
88 }
89
90 fn num_atoms(self) -> usize {
91 self.num_atoms()
92 }
93
94 fn derived_cache(self) -> &'a DerivedCacheBlock {
95 self.derived_cache()
96 }
97
98 fn assign_valence_with_options(
99 self,
100 model: crate::ValenceModel,
101 strict: bool,
102 ) -> Result<crate::ValenceAssignment, crate::ValenceError> {
103 crate::assign_valence_with_options(self, model, strict)
104 }
105
106 fn rank_mol_atoms(self) -> Result<Vec<usize>, crate::KekulizeError> {
107 crate::canon_rank::rank_mol_atoms(self)
108 }
109}
110
111#[derive(Debug, Clone, Copy, PartialEq, Eq)]
112pub enum MoleculeOpKind {
113 Strong,
114 Weak,
115}
116
117#[derive(Debug, Clone, Copy, PartialEq, Eq)]
118pub enum TopologyEditKind {
119 None,
120 Local,
121 Compacting,
122 Appending,
123 Renumbering,
124 Merge,
125}
126
127#[derive(Debug, Clone, Copy, PartialEq, Eq)]
128pub enum OperationDomain {
129 Topology,
130 Coordinate,
131}
132
133#[derive(Debug, Clone, Copy, PartialEq, Eq)]
134pub enum ParityPolicy {
135 NotApplicable,
136 RequiredWhenSupported,
137 RequiredNow,
138}
139
140#[derive(Debug, Clone, Copy, PartialEq, Eq)]
141pub struct BlockSet(u8);
142
143impl BlockSet {
144 pub const NONE: Self = Self(0);
145 pub const TOPOLOGY: Self = Self(1 << 0);
146 pub const COORDINATES: Self = Self(1 << 1);
147 pub const PROPERTIES: Self = Self(1 << 2);
148 pub const DERIVED_CACHE: Self = Self(1 << 3);
149
150 #[must_use]
151 pub const fn union(self, other: Self) -> Self {
152 Self(self.0 | other.0)
153 }
154
155 #[must_use]
156 pub const fn contains(self, other: Self) -> bool {
157 (self.0 & other.0) == other.0
158 }
159
160 #[must_use]
161 pub const fn intersects(self, other: Self) -> bool {
162 (self.0 & other.0) != 0
163 }
164}
165
166#[derive(Debug, Clone, Copy, PartialEq, Eq)]
167pub struct BlockAccess {
168 read: BlockSet,
169 write: BlockSet,
170}
171
172impl BlockAccess {
173 pub const NONE: Self = Self {
174 read: BlockSet::NONE,
175 write: BlockSet::NONE,
176 };
177
178 #[must_use]
179 pub const fn new(read: BlockSet, write: BlockSet) -> Self {
180 Self { read, write }
181 }
182
183 #[must_use]
184 pub const fn read(self) -> BlockSet {
185 self.read
186 }
187
188 #[must_use]
189 pub const fn write(self) -> BlockSet {
190 self.write
191 }
192
193 #[must_use]
194 pub const fn can_read(self, block: BlockSet) -> bool {
195 self.read.contains(block) || self.write.contains(block)
196 }
197
198 #[must_use]
199 pub const fn can_write(self, block: BlockSet) -> bool {
200 self.write.contains(block)
201 }
202
203 #[must_use]
204 pub const fn has_overlapping_read_write(self) -> bool {
205 self.read.intersects(self.write)
206 }
207}
208
209#[derive(Debug, Clone, Copy, PartialEq, Eq)]
210pub enum PreservationProof {
211 LeafAtomAppend,
212}
213
214#[derive(Debug, Clone, Copy, PartialEq, Eq)]
215pub struct DerivedEffects {
216 recompute: DerivedState,
217 preserve: DerivedState,
218 invalidate: DerivedState,
219}
220
221impl DerivedEffects {
222 pub const NONE: Self = Self {
223 recompute: DerivedState::NONE,
224 preserve: DerivedState::NONE,
225 invalidate: DerivedState::NONE,
226 };
227
228 #[must_use]
229 pub const fn new(
230 recompute: DerivedState,
231 preserve: DerivedState,
232 invalidate: DerivedState,
233 ) -> Self {
234 Self {
235 recompute,
236 preserve,
237 invalidate,
238 }
239 }
240
241 #[must_use]
242 pub const fn recompute(self) -> DerivedState {
243 self.recompute
244 }
245
246 #[must_use]
247 pub const fn preserve(self) -> DerivedState {
248 self.preserve
249 }
250
251 #[must_use]
252 pub const fn invalidate(self) -> DerivedState {
253 self.invalidate
254 }
255
256 #[must_use]
257 pub const fn needs_update(self) -> DerivedState {
258 self.invalidate.union(self.recompute)
259 }
260}
261
262#[derive(Debug, Clone, Copy, PartialEq, Eq)]
263pub enum MappingRequirement {
264 None,
265 Identity,
266 Required,
267}
268
269#[derive(Debug, Clone, Copy, PartialEq, Eq)]
270pub enum SemanticPrecondition {
271 TrustedBondTopology,
272 HydrogenOwnershipRepresented,
273 ValenceComputable,
274}
275
276impl fmt::Display for SemanticPrecondition {
277 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
278 match self {
279 Self::TrustedBondTopology => f.write_str("trusted_bond_topology"),
280 Self::HydrogenOwnershipRepresented => f.write_str("hydrogen_ownership_represented"),
281 Self::ValenceComputable => f.write_str("valence_computable"),
282 }
283 }
284}
285
286#[derive(Debug, Clone, Copy, PartialEq, Eq)]
287pub struct SemanticPreconditionSet(u8);
288
289impl SemanticPreconditionSet {
290 pub const NONE: Self = Self(0);
291 pub const TRUSTED_BOND_TOPOLOGY: Self = Self(1 << 0);
292 pub const HYDROGEN_OWNERSHIP_REPRESENTED: Self = Self(1 << 1);
293 pub const VALENCE_COMPUTABLE: Self = Self(1 << 2);
294
295 #[must_use]
296 pub const fn union(self, other: Self) -> Self {
297 Self(self.0 | other.0)
298 }
299
300 #[must_use]
301 pub const fn contains(self, other: Self) -> bool {
302 (self.0 & other.0) == other.0
303 }
304}
305
306#[derive(Debug, Clone, Copy, PartialEq, Eq)]
307pub struct MoleculeOpSpec {
308 pub method: &'static str,
309 pub impl_fn: &'static str,
310 pub domain: OperationDomain,
311 pub kind: MoleculeOpKind,
312 pub topology_edit: TopologyEditKind,
313 pub access: BlockAccess,
314 pub may_mutate: BlockSet,
315 pub auto_remap: BlockSet,
316 pub derived_effects: DerivedEffects,
317 pub semantic_preconditions: SemanticPreconditionSet,
318 pub requires_mapping: MappingRequirement,
319 pub allows_noop: bool,
320 pub support: SupportStatus,
321 pub parity: ParityPolicy,
322 pub io_roundtrip: bool,
323}
324
325impl fmt::Display for MoleculeOpSpec {
326 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
327 f.write_str(self.method)
328 }
329}
330
331impl MoleculeOpSpec {
332 #[must_use]
333 pub const fn needs_update(self: &Self) -> DerivedState {
334 self.derived_effects.needs_update()
335 }
336}
337
338#[derive(Debug, Clone, PartialEq, Eq)]
339pub enum OpOutcome {
340 Changed,
341 NoOp { reason: &'static str },
342}
343
344#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
345pub enum OperationError {
346 #[error("{operation}: unsupported operation: {reason}")]
347 Unsupported {
348 operation: &'static MoleculeOpSpec,
349 reason: &'static str,
350 },
351 #[error("{operation}: invalid input: {message}")]
352 InvalidInput {
353 operation: &'static MoleculeOpSpec,
354 message: &'static str,
355 },
356 #[error("{operation}: semantic precondition `{requirement}` failed: {message}")]
357 Precondition {
358 operation: &'static MoleculeOpSpec,
359 requirement: SemanticPrecondition,
360 message: &'static str,
361 },
362 #[error("{operation}: chemistry error: {message}")]
363 Chemistry {
364 operation: &'static MoleculeOpSpec,
365 message: &'static str,
366 },
367 #[error("{operation}: distance-geometry error: {source}")]
368 DistanceGeometry {
369 operation: &'static MoleculeOpSpec,
370 #[source]
371 source: crate::DgBoundsError,
372 },
373 #[error("{operation}: {source}")]
374 UnsupportedFeature {
375 operation: &'static MoleculeOpSpec,
376 #[source]
377 source: crate::UnsupportedFeatureError,
378 },
379 #[error("{operation}: valence error: {source}")]
380 Valence {
381 operation: &'static MoleculeOpSpec,
382 #[source]
383 source: crate::ValenceError,
384 },
385 #[error("{operation}: ring finding error: {source}")]
386 RingFinding {
387 operation: &'static MoleculeOpSpec,
388 #[source]
389 source: crate::RingFindingError,
390 },
391 #[error("{operation}: aromaticity error: {source}")]
392 Aromaticity {
393 operation: &'static MoleculeOpSpec,
394 #[source]
395 source: crate::AromaticityError,
396 },
397 #[error("{operation}: {source}")]
398 Sanitize {
399 operation: &'static MoleculeOpSpec,
400 #[source]
401 source: crate::SanitizeError,
402 },
403 #[error("{operation}: kekulize error: {source}")]
404 Kekulize {
405 operation: &'static MoleculeOpSpec,
406 #[source]
407 source: crate::KekulizeError,
408 },
409 #[error("{operation}: hydrogen removal error: {source}")]
410 RemoveHydrogens {
411 operation: &'static MoleculeOpSpec,
412 #[source]
413 source: crate::RemoveHydrogensError,
414 },
415 #[error("{operation}: hydrogen addition error: {source}")]
416 AddHydrogens {
417 operation: &'static MoleculeOpSpec,
418 #[source]
419 source: crate::AddHydrogensError,
420 },
421 #[error("{operation}: invariant violation: {failure}")]
422 InvariantViolation {
423 operation: &'static MoleculeOpSpec,
424 failure: InvariantError,
425 },
426}
427
428#[derive(Debug, Clone, Copy, PartialEq, Eq)]
429pub struct InvariantCheckSet(u32);
430
431impl InvariantCheckSet {
432 pub const NONE: Self = Self(0);
433 pub const GRAPH_INDEX_VALIDITY: Self = Self(1 << 0);
434 pub const COORDINATE_ROW_ALIGNMENT: Self = Self(1 << 1);
435 pub const MAPPING_RECORDED_OR_EXPLICITLY_UNSUPPORTED: Self = Self(1 << 2);
436 pub const STEREO_VALIDITY: Self = Self(1 << 3);
437 pub const CACHE_INVALIDATION: Self = Self(1 << 4);
438 pub const PROPERTY_POLICY: Self = Self(1 << 5);
439 pub const SOURCE_UNCHANGED: Self = Self(1 << 6);
440 pub const UNSUPPORTED_IS_EXPLICIT: Self = Self(1 << 7);
441
442 #[must_use]
443 pub const fn union(self, other: Self) -> Self {
444 Self(self.0 | other.0)
445 }
446}
447
448#[derive(Debug, Clone, Copy, PartialEq, Eq)]
449pub struct SupportMatrixEntry {
450 pub feature: &'static crate::FeatureSpec,
451 pub operation: Option<&'static MoleculeOpSpec>,
452}
453
454#[derive(Debug, Clone, Copy, PartialEq, Eq)]
455pub struct OperationInvariantEntry {
456 pub operation: &'static MoleculeOpSpec,
457 pub profile: &'static str,
458 pub required_checks: InvariantCheckSet,
459}
460
461impl OperationInvariantEntry {
462 #[must_use]
463 pub const fn for_operation(operation: &'static MoleculeOpSpec, profile: &'static str) -> Self {
464 Self {
465 operation,
466 profile,
467 required_checks: InvariantCheckSet::GRAPH_INDEX_VALIDITY
468 .union(InvariantCheckSet::COORDINATE_ROW_ALIGNMENT)
469 .union(InvariantCheckSet::MAPPING_RECORDED_OR_EXPLICITLY_UNSUPPORTED)
470 .union(InvariantCheckSet::STEREO_VALIDITY)
471 .union(InvariantCheckSet::CACHE_INVALIDATION)
472 .union(InvariantCheckSet::PROPERTY_POLICY)
473 .union(InvariantCheckSet::SOURCE_UNCHANGED)
474 .union(InvariantCheckSet::UNSUPPORTED_IS_EXPLICIT),
475 }
476 }
477}
478
479#[derive(Debug, Clone, Copy, PartialEq, Eq)]
480pub struct ParityMatrixEntry {
481 pub operation: &'static MoleculeOpSpec,
482 pub feature: &'static crate::FeatureSpec,
483 pub profile: &'static str,
484 pub rdkit_version: Option<&'static str>,
485}
486
487#[derive(Debug, Clone, PartialEq, Eq)]
488pub struct OperationTrace {
489 touched_blocks: BlockSet,
490 claimed_write_blocks: BlockSet,
491 recorded_topology_edit: TopologyEditKind,
492 remapped_blocks: BlockSet,
493 preserved_cache: DerivedState,
494 read_cache: DerivedState,
495 cleared_cache: DerivedState,
496 updated_cache: DerivedState,
497 outcome: Option<OpOutcome>,
498}
499
500impl OperationTrace {
501 #[must_use]
502 pub const fn touched_blocks(&self) -> BlockSet {
503 self.touched_blocks
504 }
505
506 #[must_use]
507 pub const fn read_cache(&self) -> DerivedState {
508 self.read_cache
509 }
510
511 #[must_use]
512 pub const fn remapped_blocks(&self) -> BlockSet {
513 self.remapped_blocks
514 }
515
516 #[must_use]
517 pub const fn preserved_cache(&self) -> DerivedState {
518 self.preserved_cache
519 }
520
521 #[must_use]
522 pub const fn cleared_cache(&self) -> DerivedState {
523 self.cleared_cache
524 }
525
526 #[must_use]
527 pub const fn updated_cache(&self) -> DerivedState {
528 self.updated_cache
529 }
530
531 #[must_use]
532 pub fn outcome(&self) -> Option<&OpOutcome> {
533 self.outcome.as_ref()
534 }
535}
536
537#[cfg(feature = "op-contracts")]
538#[derive(Debug, Clone, Copy, PartialEq, Eq)]
539enum BlockLifecycle {
540 Available,
541 Begun,
542 Committed,
543}
544
545#[cfg(feature = "op-contracts")]
546#[derive(Debug, Clone, PartialEq, Eq)]
547struct ContractSourceSnapshot {
548 atom_count: usize,
549 bond_endpoints: Vec<(AtomId, AtomId)>,
550}
551
552#[cfg(feature = "op-contracts")]
553impl ContractSourceSnapshot {
554 fn from_molecule(molecule: &Molecule) -> Self {
555 Self {
556 atom_count: molecule.num_atoms(),
557 bond_endpoints: molecule
558 .bonds()
559 .iter()
560 .map(|bond| (bond.begin(), bond.end()))
561 .collect(),
562 }
563 }
564
565 const fn num_atoms(&self) -> usize {
566 self.atom_count
567 }
568
569 fn num_bonds(&self) -> usize {
570 self.bond_endpoints.len()
571 }
572
573 fn bond_endpoint(&self, index: usize) -> Option<(AtomId, AtomId)> {
574 self.bond_endpoints.get(index).copied()
575 }
576}
577
578#[cfg(feature = "op-contracts")]
579enum ContractSource<'a> {
580 Borrowed(&'a Molecule),
581 Snapshot(ContractSourceSnapshot),
582}
583
584#[cfg(feature = "op-contracts")]
585impl ContractSource<'_> {
586 fn num_atoms(&self) -> usize {
587 match self {
588 Self::Borrowed(molecule) => molecule.num_atoms(),
589 Self::Snapshot(snapshot) => snapshot.num_atoms(),
590 }
591 }
592
593 fn num_bonds(&self) -> usize {
594 match self {
595 Self::Borrowed(molecule) => molecule.num_bonds(),
596 Self::Snapshot(snapshot) => snapshot.num_bonds(),
597 }
598 }
599
600 fn bond_endpoint(&self, index: usize) -> Option<(AtomId, AtomId)> {
601 match self {
602 Self::Borrowed(molecule) => molecule
603 .bonds()
604 .get(index)
605 .map(|bond| (bond.begin(), bond.end())),
606 Self::Snapshot(snapshot) => snapshot.bond_endpoint(index),
607 }
608 }
609}
610
611pub struct OpParts<'a> {
612 spec: &'static MoleculeOpSpec,
613 #[cfg(feature = "op-contracts")]
614 contract_source: ContractSource<'a>,
615 working: Molecule,
616 in_place_target: Option<&'a mut Molecule>,
617 topology_mapping: Option<TopologyMapping>,
618 #[cfg(feature = "op-contracts")]
619 topology_lifecycle: BlockLifecycle,
620 #[cfg(feature = "op-contracts")]
621 coordinates_lifecycle: BlockLifecycle,
622 #[cfg(feature = "op-contracts")]
623 properties_lifecycle: BlockLifecycle,
624
625 #[cfg(feature = "op-contracts")]
626 trace: OperationTrace,
627}
628
629impl<'a> OpParts<'a> {
630 pub(crate) fn new(
631 source: &'a Molecule,
632 spec: &'static MoleculeOpSpec,
633 ) -> Result<Self, OperationError> {
634 validate_semantic_preconditions(source, spec)?;
635 Ok(Self {
636 spec,
637 #[cfg(feature = "op-contracts")]
638 contract_source: ContractSource::Borrowed(source),
639 working: source.clone(),
640 in_place_target: None,
641 topology_mapping: None,
642 #[cfg(feature = "op-contracts")]
643 topology_lifecycle: BlockLifecycle::Available,
644 #[cfg(feature = "op-contracts")]
645 coordinates_lifecycle: BlockLifecycle::Available,
646 #[cfg(feature = "op-contracts")]
647 properties_lifecycle: BlockLifecycle::Available,
648 #[cfg(feature = "op-contracts")]
649 trace: OperationTrace {
650 touched_blocks: BlockSet::NONE,
651 claimed_write_blocks: BlockSet::NONE,
652 recorded_topology_edit: TopologyEditKind::None,
653 remapped_blocks: BlockSet::NONE,
654 preserved_cache: DerivedState::NONE,
655 read_cache: DerivedState::NONE,
656 cleared_cache: DerivedState::NONE,
657 updated_cache: DerivedState::NONE,
658 outcome: None,
659 },
660 })
661 }
662
663 pub(crate) fn new_in_place(
664 target: &'a mut Molecule,
665 spec: &'static MoleculeOpSpec,
666 ) -> Result<Self, OperationError> {
667 validate_semantic_preconditions(target, spec)?;
668 #[cfg(feature = "op-contracts")]
669 let contract_source =
670 ContractSource::Snapshot(ContractSourceSnapshot::from_molecule(target));
671 let working = std::mem::take(target);
672 Ok(Self {
673 spec,
674 #[cfg(feature = "op-contracts")]
675 contract_source,
676 working,
677 in_place_target: Some(target),
678 topology_mapping: None,
679 #[cfg(feature = "op-contracts")]
680 topology_lifecycle: BlockLifecycle::Available,
681 #[cfg(feature = "op-contracts")]
682 coordinates_lifecycle: BlockLifecycle::Available,
683 #[cfg(feature = "op-contracts")]
684 properties_lifecycle: BlockLifecycle::Available,
685 #[cfg(feature = "op-contracts")]
686 trace: OperationTrace {
687 touched_blocks: BlockSet::NONE,
688 claimed_write_blocks: BlockSet::NONE,
689 recorded_topology_edit: TopologyEditKind::None,
690 remapped_blocks: BlockSet::NONE,
691 preserved_cache: DerivedState::NONE,
692 read_cache: DerivedState::NONE,
693 cleared_cache: DerivedState::NONE,
694 updated_cache: DerivedState::NONE,
695 outcome: None,
696 },
697 })
698 }
699
700 pub(crate) fn begin_topology_read(&self) -> Result<MoleculeReadParts<'_>, OperationError> {
707 self.validate_access_spec()?;
708 #[cfg(feature = "op-contracts")]
709 {
710 if !self.spec.access.read().contains(BlockSet::TOPOLOGY)
711 || self.spec.access.write().contains(BlockSet::TOPOLOGY)
712 {
713 return Err(OperationError::InvalidInput {
714 operation: self.spec,
715 message: "operation attempted to read topology outside its registry read access",
716 });
717 }
718 }
719 Ok(MoleculeReadParts::from_molecule(&self.working))
720 }
721
722 fn read_parts_for_topology(&self, topology: TopologyBlock) -> Result<Molecule, OperationError> {
723 self.read_parts_for_blocks(
724 topology,
725 self.working.coordinate_block().clone(),
726 self.working.properties().clone(),
727 )
728 }
729
730 fn read_parts_for_blocks(
731 &self,
732 topology: TopologyBlock,
733 coordinates: CoordinateBlock,
734 properties: MoleculeProperties,
735 ) -> Result<Molecule, OperationError> {
736 Molecule::from_operation_blocks(
737 topology,
738 coordinates,
739 properties,
740 self.working.derived_cache().clone(),
741 self.working.capabilities_block(),
742 )
743 .map_err(|failure| OperationError::InvariantViolation {
744 operation: self.spec,
745 failure,
746 })
747 }
748
749 fn read_parts_for_optional_blocks(
750 &self,
751 topology: TopologyBlock,
752 coordinates: Option<&CoordinateBlock>,
753 properties: Option<&MoleculeProperties>,
754 ) -> Result<Molecule, OperationError> {
755 self.read_parts_for_blocks(
756 topology,
757 coordinates
758 .cloned()
759 .unwrap_or_else(|| self.working.coordinate_block().clone()),
760 properties
761 .cloned()
762 .unwrap_or_else(|| self.working.properties().clone()),
763 )
764 }
765
766 pub(crate) fn begin_topology_mut(&mut self) -> Result<TopologyBlock, OperationError> {
767 self.begin_block_mut(BlockSet::TOPOLOGY)?;
768 #[cfg(feature = "op-contracts")]
769 {
770 self.topology_lifecycle = BlockLifecycle::Begun;
771 }
772 Ok(if self.in_place_target.is_some() {
773 self.working.take_topology_block_or_clone()
774 } else {
775 self.working.topology_block().clone()
776 })
777 }
778
779 pub(crate) fn commit_topology(
780 &mut self,
781 mut topology: TopologyBlock,
782 ) -> Result<(), OperationError> {
783 #[cfg(feature = "op-contracts")]
784 {
785 if self.topology_lifecycle != BlockLifecycle::Begun {
786 return Err(OperationError::InvalidInput {
787 operation: self.spec,
788 message: "topology block was not begun before commit",
789 });
790 }
791 }
792 topology.adjacency =
793 crate::AdjacencyList::from_topology(topology.atoms.len(), &topology.bonds);
794 self.record_mutation(BlockSet::TOPOLOGY);
795 self.working.replace_topology_block(topology);
796 #[cfg(feature = "op-contracts")]
797 {
798 self.topology_lifecycle = BlockLifecycle::Committed;
799 }
800 Ok(())
801 }
802
803 pub(crate) fn begin_coordinates_mut(&mut self) -> Result<CoordinateBlock, OperationError> {
804 self.begin_block_mut(BlockSet::COORDINATES)?;
805 #[cfg(feature = "op-contracts")]
806 {
807 self.coordinates_lifecycle = BlockLifecycle::Begun;
808 }
809 Ok(if self.in_place_target.is_some() {
810 self.working.take_coordinate_block_or_clone()
811 } else {
812 self.working.coordinate_block().clone()
813 })
814 }
815
816 pub(crate) fn commit_coordinates(
817 &mut self,
818 coordinates: CoordinateBlock,
819 ) -> Result<(), OperationError> {
820 #[cfg(feature = "op-contracts")]
821 {
822 if self.coordinates_lifecycle != BlockLifecycle::Begun {
823 return Err(OperationError::InvalidInput {
824 operation: self.spec,
825 message: "coordinate block was not begun before commit",
826 });
827 }
828 }
829 self.record_mutation(BlockSet::COORDINATES);
830 self.working.replace_coordinate_block(coordinates);
831 #[cfg(feature = "op-contracts")]
832 {
833 self.coordinates_lifecycle = BlockLifecycle::Committed;
834 }
835 Ok(())
836 }
837
838 pub(crate) fn begin_properties_mut(&mut self) -> Result<MoleculeProperties, OperationError> {
839 self.begin_block_mut(BlockSet::PROPERTIES)?;
840 #[cfg(feature = "op-contracts")]
841 {
842 self.properties_lifecycle = BlockLifecycle::Begun;
843 }
844 Ok(if self.in_place_target.is_some() {
845 self.working.take_properties_or_clone()
846 } else {
847 self.working.properties().clone()
848 })
849 }
850
851 pub(crate) fn commit_properties(
852 &mut self,
853 properties: MoleculeProperties,
854 ) -> Result<(), OperationError> {
855 #[cfg(feature = "op-contracts")]
856 {
857 if self.properties_lifecycle != BlockLifecycle::Begun {
858 return Err(OperationError::InvalidInput {
859 operation: self.spec,
860 message: "properties block was not begun before commit",
861 });
862 }
863 }
864 self.record_mutation(BlockSet::PROPERTIES);
865 self.working.replace_properties(properties);
866 #[cfg(feature = "op-contracts")]
867 {
868 self.properties_lifecycle = BlockLifecycle::Committed;
869 }
870 Ok(())
871 }
872
873 pub(crate) fn record_topology_edit(
874 &mut self,
875 kind: TopologyEditKind,
876 ) -> Result<(), OperationError> {
877 #[cfg(feature = "op-contracts")]
878 {
879 if kind == TopologyEditKind::Local
880 && matches!(
881 self.spec.topology_edit,
882 TopologyEditKind::Appending
883 | TopologyEditKind::Compacting
884 | TopologyEditKind::Renumbering
885 | TopologyEditKind::Merge
886 )
887 {
888 return Ok(());
889 }
890 if self.spec.topology_edit != kind {
891 return Err(OperationError::InvalidInput {
892 operation: self.spec,
893 message: "recorded topology edit does not match registry declaration",
894 });
895 }
896 self.trace.recorded_topology_edit = kind;
897 }
898 #[cfg(not(feature = "op-contracts"))]
899 {
900 let _ = kind;
901 }
902 Ok(())
903 }
904
905 pub(crate) fn record_topology_mapping(&mut self, mapping: TopologyMapping) {
906 self.topology_mapping = Some(mapping);
907 self.record_remapped(self.spec.auto_remap);
908 }
909
910 #[cfg(feature = "op-contracts")]
914 fn check_cache_write_permission(&self, state: DerivedState) {
915 let effects = self.spec.derived_effects;
916 let allowed = effects.recompute();
917 if !allowed.contains(state) {
918 panic!(
919 "cache write permission violation: operation `{}` attempted to write \
920 derived state `{:?}` but only has `recompute({:?})` \
921 permissions",
922 self.spec.method,
923 state,
924 effects.recompute(),
925 );
926 }
927 }
928
929 #[cfg(feature = "op-contracts")]
932 fn check_cache_clear_permission(&self, states: DerivedState) {
933 let effects = self.spec.derived_effects;
934 let allowed = effects.invalidate().union(effects.recompute());
935 let forbidden = states.bits() & !allowed.bits();
936 if forbidden != 0 {
937 panic!(
938 "cache clear permission violation: operation `{}` attempted to clear \
939 derived state bits `{:#010b}` but only has `invalidate({:?})` and `recompute({:?})` \
940 permissions",
941 self.spec.method,
942 forbidden,
943 effects.invalidate(),
944 effects.recompute(),
945 );
946 }
947 }
948
949 #[cfg(feature = "op-contracts")]
952 fn check_cache_read_permission(&self, state: DerivedState) {
953 let effects = self.spec.derived_effects;
954 let allowed = effects.preserve();
955 if !allowed.contains(state) {
956 panic!(
957 "cache read permission violation: operation `{}` attempted to read \
958 derived state `{:?}` but only has `preserve({:?})` \
959 permissions",
960 self.spec.method,
961 state,
962 effects.preserve(),
963 );
964 }
965 }
966
967 pub(crate) fn set_rings_cache(&mut self, rings: crate::RingInfo) {
968 #[cfg(feature = "op-contracts")]
969 self.check_cache_write_permission(DerivedState::RINGS);
970 self.record_mutation(BlockSet::DERIVED_CACHE);
971 self.working.derived_cache_mut().rings = Some(rings);
972 self.record_updated_cache(DerivedState::RINGS);
973 }
974
975 pub(crate) fn set_ring_families_cache(&mut self, ring_families: crate::RingInfo) {
976 #[cfg(feature = "op-contracts")]
977 self.check_cache_write_permission(DerivedState::RING_FAMILIES);
978 self.record_mutation(BlockSet::DERIVED_CACHE);
979 self.working.derived_cache_mut().ring_families = Some(ring_families);
980 self.record_updated_cache(DerivedState::RING_FAMILIES);
981 }
982
983 pub(crate) fn set_valence_cache(&mut self, valence: crate::ValenceAssignment) {
984 #[cfg(feature = "op-contracts")]
985 self.check_cache_write_permission(DerivedState::VALENCE);
986 self.record_mutation(BlockSet::DERIVED_CACHE);
987 self.working.derived_cache_mut().valence = Some(valence);
988 self.record_updated_cache(DerivedState::VALENCE);
989 }
990
991 pub(crate) fn mark_aromaticity_valid(&mut self) {
992 #[cfg(feature = "op-contracts")]
993 self.check_cache_write_permission(DerivedState::AROMATICITY);
994 self.record_mutation(BlockSet::DERIVED_CACHE);
995 self.working.derived_cache_mut().aromaticity_valid = true;
996 self.record_updated_cache(DerivedState::AROMATICITY);
997 }
998
999 #[allow(dead_code)]
1000 pub(crate) fn mark_stereo_handled(&mut self) {
1001 #[cfg(feature = "op-contracts")]
1002 self.check_cache_write_permission(DerivedState::STEREO);
1003 self.record_mutation(BlockSet::DERIVED_CACHE);
1004 self.working.derived_cache_mut().stereo_valid = true;
1005 self.record_updated_cache(DerivedState::STEREO);
1006 }
1007
1008 pub(crate) fn clear_cache(&mut self, states: DerivedState) {
1009 #[cfg(feature = "op-contracts")]
1010 {
1011 self.check_cache_clear_permission(states);
1012 self.trace.cleared_cache |= states;
1013 }
1014 if states.touches_cache() {
1015 self.record_mutation(BlockSet::DERIVED_CACHE);
1016 self.working.derived_cache_mut().invalidate(states);
1017 }
1018 #[cfg(not(feature = "op-contracts"))]
1019 {
1020 let _ = states;
1021 }
1022 }
1023
1024 pub(crate) fn prove_preserved(
1025 &mut self,
1026 states: DerivedState,
1027 proof: PreservationProof,
1028 ) -> Result<(), OperationError> {
1029 #[cfg(feature = "op-contracts")]
1030 {
1031 if !self.spec.derived_effects.preserve().contains(states) {
1032 return Err(OperationError::InvalidInput {
1033 operation: self.spec,
1034 message: "operation attempted to prove preservation for undeclared derived states",
1035 });
1036 }
1037 match proof {
1038 PreservationProof::LeafAtomAppend => {
1039 self.validate_leaf_atom_append_preservation()?
1040 }
1041 }
1042 self.trace.preserved_cache |= states;
1043 }
1044 #[cfg(not(feature = "op-contracts"))]
1045 {
1046 let _ = states;
1047 let _ = proof;
1048 }
1049 Ok(())
1050 }
1051
1052 pub(crate) fn finish(self, outcome: OpOutcome) -> Result<Molecule, OperationError> {
1053 debug_assert!(self.in_place_target.is_none());
1054 #[cfg(feature = "op-contracts")]
1055 {
1056 let mut this = self;
1057 this.trace.outcome = Some(outcome);
1058 this.validate_contract()?;
1059 enforce_molecule_invariants(&this.working).map_err(|failure| {
1060 OperationError::InvariantViolation {
1061 operation: this.spec,
1062 failure,
1063 }
1064 })?;
1065 Ok(this.working)
1066 }
1067 #[cfg(not(feature = "op-contracts"))]
1068 {
1069 let _ = outcome;
1070 enforce_molecule_invariants(&self.working).map_err(|failure| {
1071 OperationError::InvariantViolation {
1072 operation: self.spec,
1073 failure,
1074 }
1075 })?;
1076 Ok(self.working)
1077 }
1078 }
1079
1080 pub(crate) fn abort_in_place(mut self) {
1081 let Some(target) = self.in_place_target.take() else {
1082 return;
1083 };
1084 *target = self.working;
1085 }
1086
1087 pub(crate) fn finish_in_place(self, outcome: OpOutcome) -> Result<(), OperationError> {
1088 #[cfg(feature = "op-contracts")]
1089 {
1090 let mut this = self;
1091 this.trace.outcome = Some(outcome);
1092 let validation = this.validate_contract().and_then(|()| {
1093 enforce_molecule_invariants(&this.working).map_err(|failure| {
1094 OperationError::InvariantViolation {
1095 operation: this.spec,
1096 failure,
1097 }
1098 })
1099 });
1100 let target = this
1101 .in_place_target
1102 .take()
1103 .ok_or(OperationError::InvalidInput {
1104 operation: this.spec,
1105 message: "in-place operation was finished without an in-place target",
1106 })?;
1107 *target = this.working;
1108 validation
1109 }
1110 #[cfg(not(feature = "op-contracts"))]
1111 {
1112 let mut this = self;
1113 let _ = outcome;
1114 let validation = enforce_molecule_invariants(&this.working).map_err(|failure| {
1115 OperationError::InvariantViolation {
1116 operation: this.spec,
1117 failure,
1118 }
1119 });
1120 let target = this
1121 .in_place_target
1122 .take()
1123 .ok_or(OperationError::InvalidInput {
1124 operation: this.spec,
1125 message: "in-place operation was finished without an in-place target",
1126 })?;
1127 *target = this.working;
1128 validation
1129 }
1130 }
1131
1132 fn record_mutation(&mut self, block: BlockSet) {
1133 #[cfg(feature = "op-contracts")]
1134 {
1135 assert!(
1136 self.spec.access.can_write(block) && self.spec.may_mutate.contains(block),
1137 "operation `{}` attempted to mutate a block outside its registry permissions",
1138 self.spec.method
1139 );
1140 self.trace.touched_blocks = self.trace.touched_blocks.union(block);
1141 }
1142 #[cfg(not(feature = "op-contracts"))]
1143 {
1144 let _ = block;
1145 }
1146 }
1147
1148 #[cfg(feature = "op-contracts")]
1149 fn validate_access_spec(&self) -> Result<(), OperationError> {
1150 if self.spec.access.has_overlapping_read_write() {
1151 return Err(OperationError::InvalidInput {
1152 operation: self.spec,
1153 message: "operation access declares the same block as both read and write",
1154 });
1155 }
1156 if self.spec.access.write() != self.spec.may_mutate {
1157 return Err(OperationError::InvalidInput {
1158 operation: self.spec,
1159 message: "operation access write set must match may_mutate",
1160 });
1161 }
1162 Ok(())
1163 }
1164
1165 #[cfg(not(feature = "op-contracts"))]
1166 fn validate_access_spec(&self) -> Result<(), OperationError> {
1167 Ok(())
1168 }
1169
1170 fn begin_block_mut(&mut self, block: BlockSet) -> Result<(), OperationError> {
1171 self.validate_access_spec()?;
1172 #[cfg(feature = "op-contracts")]
1173 {
1174 if !self.spec.access.can_write(block) {
1175 return Err(OperationError::InvalidInput {
1176 operation: self.spec,
1177 message: "operation attempted to write a block outside its registry access",
1178 });
1179 }
1180 let lifecycle = if block == BlockSet::TOPOLOGY {
1181 self.topology_lifecycle
1182 } else if block == BlockSet::COORDINATES {
1183 self.coordinates_lifecycle
1184 } else if block == BlockSet::PROPERTIES {
1185 self.properties_lifecycle
1186 } else {
1187 return Err(OperationError::InvalidInput {
1188 operation: self.spec,
1189 message: "operation attempted to begin an unknown block",
1190 });
1191 };
1192 if lifecycle != BlockLifecycle::Available {
1193 return Err(OperationError::InvalidInput {
1194 operation: self.spec,
1195 message: "operation attempted to begin the same writable block twice",
1196 });
1197 }
1198 self.trace.claimed_write_blocks = self.trace.claimed_write_blocks.union(block);
1199 }
1200 #[cfg(not(feature = "op-contracts"))]
1201 {
1202 let _ = block;
1203 }
1204 Ok(())
1205 }
1206
1207 fn record_updated_cache(&mut self, state: DerivedState) {
1208 #[cfg(feature = "op-contracts")]
1209 {
1210 self.trace.updated_cache = self.trace.updated_cache.union(state);
1211 }
1212 #[cfg(not(feature = "op-contracts"))]
1213 {
1214 let _ = state;
1215 }
1216 }
1217
1218 #[cfg(feature = "op-contracts")]
1219 fn validate_leaf_atom_append_preservation(&self) -> Result<(), OperationError> {
1220 if self.spec.topology_edit != TopologyEditKind::Appending {
1221 return Err(OperationError::InvalidInput {
1222 operation: self.spec,
1223 message: "leaf-append preservation proof requires an appending topology operation",
1224 });
1225 }
1226 let Some(mapping) = &self.topology_mapping else {
1227 return Err(OperationError::InvalidInput {
1228 operation: self.spec,
1229 message: "leaf-append preservation proof requires a topology mapping",
1230 });
1231 };
1232 let old_atom_count = self.contract_source.num_atoms();
1233 let old_bond_count = self.contract_source.num_bonds();
1234 if mapping.atoms().old_to_new().len() != old_atom_count
1235 || mapping.bonds().old_to_new().len() != old_bond_count
1236 || mapping.atoms().new_to_old().len() != self.working.num_atoms()
1237 || mapping.bonds().new_to_old().len() != self.working.num_bonds()
1238 {
1239 return Err(OperationError::InvalidInput {
1240 operation: self.spec,
1241 message: "leaf-append preservation proof has inconsistent mapping dimensions",
1242 });
1243 }
1244 for (old_idx, mapped) in mapping.atoms().old_to_new().iter().enumerate() {
1245 if *mapped != Some(AtomId::new(old_idx)) {
1246 return Err(OperationError::InvalidInput {
1247 operation: self.spec,
1248 message: "leaf-append preservation proof requires identity mapping for old atoms",
1249 });
1250 }
1251 }
1252 for (old_idx, mapped) in mapping.bonds().old_to_new().iter().enumerate() {
1253 if *mapped != Some(crate::BondId::new(old_idx)) {
1254 return Err(OperationError::InvalidInput {
1255 operation: self.spec,
1256 message: "leaf-append preservation proof requires identity mapping for old bonds",
1257 });
1258 }
1259 }
1260 for old_idx in 0..old_bond_count {
1261 let Some((before_begin, before_end)) = self.contract_source.bond_endpoint(old_idx)
1262 else {
1263 return Err(OperationError::InvalidInput {
1264 operation: self.spec,
1265 message: "leaf-append preservation proof found missing source bond endpoint",
1266 });
1267 };
1268 let after = &self.working.bonds()[old_idx];
1269 if before_begin != after.begin() || before_end != after.end() {
1270 return Err(OperationError::InvalidInput {
1271 operation: self.spec,
1272 message: "leaf-append preservation proof detected changed old bond endpoints",
1273 });
1274 }
1275 }
1276
1277 let mut appended_degrees =
1278 vec![0usize; self.working.num_atoms().saturating_sub(old_atom_count)];
1279 for bond in &self.working.bonds()[old_bond_count..] {
1280 let begin_old = bond.begin().index() < old_atom_count;
1281 let end_old = bond.end().index() < old_atom_count;
1282 if begin_old == end_old {
1283 return Err(OperationError::InvalidInput {
1284 operation: self.spec,
1285 message: "leaf-append preservation proof requires every appended bond to connect one old atom and one appended atom",
1286 });
1287 }
1288 let appended_idx = if begin_old {
1289 bond.end().index() - old_atom_count
1290 } else {
1291 bond.begin().index() - old_atom_count
1292 };
1293 if appended_idx >= appended_degrees.len() {
1294 return Err(OperationError::InvalidInput {
1295 operation: self.spec,
1296 message: "leaf-append preservation proof found appended bond referencing an out-of-range atom",
1297 });
1298 }
1299 appended_degrees[appended_idx] += 1;
1300 }
1301 if appended_degrees.iter().any(|degree| *degree != 1) {
1302 return Err(OperationError::InvalidInput {
1303 operation: self.spec,
1304 message: "leaf-append preservation proof requires every appended atom to be a degree-one leaf",
1305 });
1306 }
1307 Ok(())
1308 }
1309
1310 #[cfg(feature = "op-contracts")]
1311 fn validate_contract(&self) -> Result<(), OperationError> {
1312 self.validate_access_spec()?;
1313 let effects = self.spec.derived_effects;
1314 let recompute_ds = effects.recompute();
1315 if recompute_ds.intersects(effects.preserve())
1316 || recompute_ds.intersects(effects.invalidate())
1317 || effects.preserve().intersects(effects.invalidate())
1318 {
1319 return Err(OperationError::InvalidInput {
1320 operation: self.spec,
1321 message: "operation derived_effects contains overlapping effect categories",
1322 });
1323 }
1324
1325 let updated_or_cleared = self.trace.cleared_cache | self.trace.updated_cache;
1326 if !updated_or_cleared.contains(self.spec.needs_update()) {
1327 return Err(OperationError::InvalidInput {
1328 operation: self.spec,
1329 message: "operation body did not clear or update every required cache state",
1330 });
1331 }
1332
1333 if !self
1334 .trace
1335 .preserved_cache
1336 .contains(self.spec.derived_effects.preserve())
1337 {
1338 return Err(OperationError::InvalidInput {
1339 operation: self.spec,
1340 message: "operation body did not prove every declared preserved derived state",
1341 });
1342 }
1343
1344 if self.spec.requires_mapping == MappingRequirement::Required
1345 && self.topology_mapping.is_none()
1346 {
1347 return Err(OperationError::InvalidInput {
1348 operation: self.spec,
1349 message: "strong topology operation did not record a topology mapping",
1350 });
1351 }
1352
1353 if !self.trace.remapped_blocks.contains(self.spec.auto_remap) {
1354 return Err(OperationError::InvalidInput {
1355 operation: self.spec,
1356 message: "operation did not remap every registry-required block",
1357 });
1358 }
1359
1360 if self.trace.touched_blocks.contains(BlockSet::TOPOLOGY)
1361 && self.spec.topology_edit != TopologyEditKind::None
1362 && self.trace.recorded_topology_edit == TopologyEditKind::None
1363 {
1364 return Err(OperationError::InvalidInput {
1365 operation: self.spec,
1366 message: "operation touched topology without recording the registry topology edit",
1367 });
1368 }
1369
1370 Ok(())
1371 }
1372
1373 fn record_remapped(&mut self, block: BlockSet) {
1374 #[cfg(feature = "op-contracts")]
1375 {
1376 self.trace.remapped_blocks = self.trace.remapped_blocks.union(block);
1377 }
1378 #[cfg(not(feature = "op-contracts"))]
1379 {
1380 let _ = block;
1381 }
1382 }
1383}
1384
1385fn validate_semantic_preconditions(
1386 molecule: &Molecule,
1387 spec: &'static MoleculeOpSpec,
1388) -> Result<(), OperationError> {
1389 let preconditions = spec.semantic_preconditions;
1390 if preconditions.contains(SemanticPreconditionSet::TRUSTED_BOND_TOPOLOGY)
1391 && molecule.num_atoms() != 0
1392 && molecule.topology_trust() != TopologyTrust::TrustedGraph
1393 {
1394 return Err(OperationError::Precondition {
1395 operation: spec,
1396 requirement: SemanticPrecondition::TrustedBondTopology,
1397 message: "operation requires a molecule with trusted bond topology",
1398 });
1399 }
1400 if preconditions.contains(SemanticPreconditionSet::HYDROGEN_OWNERSHIP_REPRESENTED)
1401 && !hydrogen_ownership_is_represented(molecule)
1402 {
1403 return Err(OperationError::Precondition {
1404 operation: spec,
1405 requirement: SemanticPrecondition::HydrogenOwnershipRepresented,
1406 message: "explicit hydrogen atoms must be connected to their owning heavy atoms",
1407 });
1408 }
1409 Ok(())
1410}
1411
1412fn hydrogen_ownership_is_represented(molecule: &Molecule) -> bool {
1413 if !molecule
1414 .atoms()
1415 .iter()
1416 .any(|atom| atom.atomic_number() != 1)
1417 {
1418 return true;
1419 }
1420 molecule
1421 .atoms()
1422 .iter()
1423 .filter(|atom| atom.atomic_number() == 1)
1424 .all(|atom| {
1425 molecule
1426 .topology_block()
1427 .adjacency
1428 .neighbors_of(atom.id().index())
1429 .iter()
1430 .any(|neighbor| {
1431 molecule
1432 .atom(AtomId::new(neighbor.atom_index))
1433 .is_some_and(|neighbor_atom| neighbor_atom.atomic_number() != 1)
1434 })
1435 })
1436}
1437
1438molecule_ops! {
1439 op with_hydrogens(params: crate::hydrogens::AddHsParams) {
1440 method: with_hydrogens_with_params,
1441 impl_fn: with_hydrogens_impl,
1442 default_method: with_hydrogens,
1443 default_args: [crate::hydrogens::AddHsParams::default()],
1444 inplace: true,
1445 inplace_method: add_hydrogens_with_params_,
1446 default_inplace_method: add_hydrogens_,
1447 domain: topology,
1448 kind: strong,
1449 topology_edit: appending,
1450 access: { read: [], write: [topology, coordinates, properties, derived_cache] },
1451 may_mutate: [topology, coordinates, properties, derived_cache],
1452 auto_remap: [coordinates, properties],
1453 derived_effects: {
1454 recompute: [],
1455 preserve: [rings, ring_families],
1456 invalidate: [valence, aromaticity, stereo, drawing, fingerprint],
1457 },
1458 semantic_preconditions: [
1459 trusted_bond_topology,
1460 hydrogen_ownership_represented,
1461 valence_computable,
1462 ],
1463 requires_mapping: required,
1464 allows_noop: false,
1465 feature: HYDROGENS_FEATURE,
1466 parity: required_when_supported,
1467 io_roundtrip: true,
1468 invariant_profile: "strong_topology_with_coordinates",
1469 parity_profile: "add_hs_default_rdkit",
1470 }
1471
1472 op without_hydrogens(sanitize: bool) {
1473 method: without_hydrogens_with_sanitize,
1474 impl_fn: without_hydrogens_impl,
1475 default_method: without_hydrogens,
1476 default_args: [true],
1477 inplace: true,
1478 inplace_method: remove_hydrogens_with_sanitize_,
1479 default_inplace_method: remove_hydrogens_,
1480 domain: topology,
1481 kind: strong,
1482 topology_edit: compacting,
1483 access: { read: [], write: [topology, coordinates, properties, derived_cache] },
1484 may_mutate: [topology, coordinates, properties, derived_cache],
1485 auto_remap: [coordinates, properties],
1486 derived_effects: {
1487 recompute: [valence, aromaticity, rings],
1488 preserve: [],
1489 invalidate: [ring_families, stereo, drawing, fingerprint],
1490 },
1491 requires_mapping: required,
1492 allows_noop: true,
1493 feature: HYDROGENS_FEATURE,
1494 parity: required_when_supported,
1495 io_roundtrip: true,
1496 invariant_profile: "strong_topology_with_coordinates",
1497 parity_profile: "remove_hs_default_rdkit",
1498 }
1499
1500 op without_hydrogens_with_params(params: crate::hydrogens::RemoveHsParams, sanitize: bool) {
1501 method: without_hydrogens_with_params,
1502 impl_fn: without_hydrogens_with_params_impl,
1503 inplace: true,
1504 inplace_method: remove_hydrogens_with_params_,
1505 domain: topology,
1506 kind: strong,
1507 topology_edit: compacting,
1508 access: { read: [], write: [topology, coordinates, properties, derived_cache] },
1509 may_mutate: [topology, coordinates, properties, derived_cache],
1510 auto_remap: [coordinates, properties],
1511 derived_effects: {
1512 recompute: [valence, aromaticity, rings],
1513 preserve: [],
1514 invalidate: [ring_families, stereo, drawing, fingerprint],
1515 },
1516 requires_mapping: required,
1517 allows_noop: true,
1518 feature: HYDROGENS_FEATURE,
1519 parity: required_when_supported,
1520 io_roundtrip: true,
1521 invariant_profile: "strong_topology_with_coordinates",
1522 parity_profile: "remove_hs_parameterized_rdkit",
1523 }
1524
1525 op with_kekulized_bonds(clear_aromatic_flags: bool) {
1526 method: with_kekulized_bonds,
1527 impl_fn: with_kekulized_bonds_impl,
1528 inplace: true,
1529 inplace_method: kekulize_,
1530 domain: topology,
1531 kind: weak,
1532 topology_edit: local,
1533 access: { read: [], write: [topology, derived_cache] },
1534 may_mutate: [topology, derived_cache],
1535 auto_remap: [],
1536 derived_effects: {
1537 recompute: [rings, valence],
1538 preserve: [],
1539 invalidate: [aromaticity, drawing, fingerprint],
1540 },
1541 requires_mapping: none,
1542 allows_noop: true,
1543 feature: KEKULIZE_FEATURE,
1544 parity: required_when_supported,
1545 io_roundtrip: true,
1546 invariant_profile: "weak_topology_state",
1547 parity_profile: "kekulize_clear_aromatic_flags",
1548 }
1549
1550 op sanitize(ops: crate::SanitizeOps) {
1551 method: sanitize_with_ops,
1552 impl_fn: sanitize_impl,
1553 default_method: sanitize,
1554 default_args: [crate::SanitizeOps::ALL],
1555 inplace: true,
1556 inplace_method: sanitize_with_ops_,
1557 default_inplace_method: sanitize_,
1558 domain: topology,
1559 kind: weak,
1560 topology_edit: local,
1561 access: { read: [], write: [topology, derived_cache] },
1562 may_mutate: [topology, derived_cache],
1563 auto_remap: [],
1564 derived_effects: {
1565 recompute: [rings, valence, aromaticity],
1566 preserve: [],
1567 invalidate: [ring_families, stereo, drawing, fingerprint],
1568 },
1569 requires_mapping: none,
1570 allows_noop: true,
1571 feature: SANITIZE_FEATURE,
1572 parity: required_when_supported,
1573 io_roundtrip: true,
1574 invariant_profile: "weak_topology_state",
1575 parity_profile: "sanitize_default_rdkit",
1576 }
1577
1578 op assigned_valence() {
1579 method: with_assigned_valence,
1580 impl_fn: assigned_valence_impl,
1581 inplace: true,
1582 inplace_method: assign_valence_,
1583 domain: topology,
1584 kind: weak,
1585 topology_edit: none,
1586 access: { read: [topology], write: [derived_cache] },
1587 may_mutate: [derived_cache],
1588 auto_remap: [],
1589 derived_effects: {
1590 recompute: [valence],
1591 preserve: [],
1592 invalidate: [],
1593 },
1594 requires_mapping: none,
1595 allows_noop: true,
1596 feature: VALENCE_FEATURE,
1597 parity: required_when_supported,
1598 io_roundtrip: true,
1599 invariant_profile: "weak_derived_cache_update",
1600 parity_profile: "update_property_cache_rdkit",
1601 }
1602
1603 op assigned_rings() {
1604 method: with_assigned_rings,
1605 impl_fn: assigned_rings_impl,
1606 inplace: true,
1607 inplace_method: assign_rings_,
1608 domain: topology,
1609 kind: weak,
1610 topology_edit: none,
1611 access: { read: [topology], write: [derived_cache] },
1612 may_mutate: [derived_cache],
1613 auto_remap: [],
1614 derived_effects: {
1615 recompute: [rings],
1616 preserve: [],
1617 invalidate: [],
1618 },
1619 requires_mapping: none,
1620 allows_noop: true,
1621 feature: RINGS_FEATURE,
1622 parity: required_when_supported,
1623 io_roundtrip: true,
1624 invariant_profile: "weak_derived_cache_update",
1625 parity_profile: "symmetrize_sssr_rdkit",
1626 }
1627
1628 op assigned_ring_families() {
1629 method: with_assigned_ring_families,
1630 impl_fn: assigned_ring_families_impl,
1631 inplace: true,
1632 inplace_method: assign_ring_families_,
1633 domain: topology,
1634 kind: weak,
1635 topology_edit: none,
1636 access: { read: [topology], write: [derived_cache] },
1637 may_mutate: [derived_cache],
1638 auto_remap: [],
1639 derived_effects: {
1640 recompute: [ring_families],
1641 preserve: [],
1642 invalidate: [],
1643 },
1644 requires_mapping: none,
1645 allows_noop: true,
1646 feature: RINGS_FEATURE,
1647 parity: required_when_supported,
1648 io_roundtrip: true,
1649 invariant_profile: "weak_derived_cache_update",
1650 parity_profile: "ring_families_rdkit_urf",
1651 }
1652
1653 op assigned_aromaticity() {
1654 method: with_assigned_aromaticity,
1655 impl_fn: assigned_aromaticity_impl,
1656 inplace: true,
1657 inplace_method: assign_aromaticity_,
1658 domain: topology,
1659 kind: weak,
1660 topology_edit: local,
1661 access: { read: [], write: [topology, derived_cache] },
1662 may_mutate: [topology, derived_cache],
1663 auto_remap: [],
1664 derived_effects: {
1665 recompute: [rings, valence, aromaticity],
1666 preserve: [],
1667 invalidate: [drawing, fingerprint],
1668 },
1669 requires_mapping: none,
1670 allows_noop: true,
1671 feature: AROMATICITY_FEATURE,
1672 parity: required_when_supported,
1673 io_roundtrip: true,
1674 invariant_profile: "weak_topology_state",
1675 parity_profile: "set_aromaticity_default_rdkit",
1676 }
1677
1678 op assigned_radicals() {
1679 method: with_assigned_radicals,
1680 impl_fn: assigned_radicals_impl,
1681 inplace: true,
1682 inplace_method: assign_radicals_,
1683 domain: topology,
1684 kind: weak,
1685 topology_edit: local,
1686 access: { read: [], write: [topology, derived_cache] },
1687 may_mutate: [topology, derived_cache],
1688 auto_remap: [],
1689 derived_effects: {
1690 recompute: [valence],
1691 preserve: [],
1692 invalidate: [],
1693 },
1694 requires_mapping: none,
1695 allows_noop: true,
1696 feature: VALENCE_FEATURE,
1697 parity: required_when_supported,
1698 io_roundtrip: true,
1699 invariant_profile: "weak_topology_state",
1700 parity_profile: "assign_radicals_rdkit",
1701 }
1702
1703 op with_2d_coordinates(params: crate::With2DCoordinatesParams) {
1704 method: with_2d_coordinates_with_params,
1705 impl_fn: with_2d_coordinates_impl,
1706 default_method: with_2d_coordinates,
1707 default_args: [crate::With2DCoordinatesParams::default()],
1708 inplace: true,
1709 inplace_method: compute_2d_coordinates_with_params_,
1710 default_inplace_method: compute_2d_coordinates_,
1711 domain: coordinate,
1712 kind: weak,
1713 topology_edit: none,
1714 access: { read: [topology], write: [coordinates] },
1715 may_mutate: [coordinates],
1716 auto_remap: [],
1717 semantic_preconditions: [trusted_bond_topology],
1718 derived_effects: {
1719 recompute: [],
1720 preserve: [],
1721 invalidate: [drawing],
1722 },
1723 requires_mapping: none,
1724 allows_noop: true,
1725 feature: COORDINATE_2D_FEATURE,
1726 parity: required_when_supported,
1727 io_roundtrip: true,
1728 invariant_profile: "coordinate_generation",
1729 parity_profile: "compute_2d_coords_default_rdkit",
1730 }
1731
1732 op with_3d_conformer(params: crate::EmbedParameters) {
1733 method: with_3d_conformer_with_params,
1734 impl_fn: with_3d_conformer_impl,
1735 default_method: with_3d_conformer,
1736 default_args: [crate::EmbedParameters::etkdg_v3()],
1737 inplace: true,
1738 inplace_method: embed_3d_conformer_with_params_,
1739 default_inplace_method: embed_3d_conformer_,
1740 domain: coordinate,
1741 kind: weak,
1742 topology_edit: none,
1743 access: { read: [topology], write: [coordinates] },
1744 may_mutate: [coordinates],
1745 auto_remap: [],
1746 semantic_preconditions: [trusted_bond_topology],
1747 derived_effects: {
1748 recompute: [],
1749 preserve: [],
1750 invalidate: [drawing],
1751 },
1752 requires_mapping: none,
1753 allows_noop: true,
1754 feature: CONFORMER_GENERATION_FEATURE,
1755 parity: required_when_supported,
1756 io_roundtrip: true,
1757 invariant_profile: "coordinate_generation_3d",
1758 parity_profile: "embed_molecule_etkdgv3_rdkit",
1759 }
1760
1761 op with_3d_conformers(num_confs: usize, params: crate::EmbedParameters) {
1762 method: with_3d_conformers_with_params,
1763 impl_fn: with_3d_conformers_impl,
1764 inplace: true,
1765 inplace_method: embed_3d_conformers_with_params_,
1766 domain: coordinate,
1767 kind: weak,
1768 topology_edit: none,
1769 access: { read: [topology], write: [coordinates] },
1770 may_mutate: [coordinates],
1771 auto_remap: [],
1772 semantic_preconditions: [trusted_bond_topology],
1773 derived_effects: {
1774 recompute: [],
1775 preserve: [],
1776 invalidate: [drawing],
1777 },
1778 requires_mapping: none,
1779 allows_noop: true,
1780 feature: CONFORMER_GENERATION_FEATURE,
1781 parity: required_when_supported,
1782 io_roundtrip: true,
1783 invariant_profile: "coordinate_generation_3d_multi",
1784 parity_profile: "embed_multiple_confs_rdkit",
1785 }
1786}
1787
1788mod hydrogens;
1789mod sanitize_pipeline;
1790
1791use self::{hydrogens::*, sanitize_pipeline::*};
1792
1793#[mol_op_body(with_kekulized_bonds, parts)]
1794fn with_kekulized_bonds_impl(clear_aromatic_flags: bool) -> Result<OpOutcome, OperationError> {
1795 let mut topology = parts.begin_topology_mut()?;
1801 let view = parts.read_parts_for_topology(topology.clone())?;
1802 let rings = MoleculeReadParts::from_molecule(&view)
1803 .symmetrize_sssr()
1804 .map_err(|source| OperationError::RingFinding {
1805 operation: &WITH_KEKULIZED_BONDS_SPEC,
1806 source,
1807 })?;
1808 parts.set_rings_cache(rings);
1809 let view = parts.read_parts_for_topology(topology.clone())?;
1810 let ring_info = MoleculeReadParts::from_molecule(&view)
1811 .derived_cache()
1812 .rings
1813 .as_ref()
1814 .expect("rings were recomputed immediately above")
1815 .clone();
1816 let assignment = MoleculeReadParts::from_molecule(&view)
1817 .kekulize_assignment(Some(&ring_info), clear_aromatic_flags, true, 100)
1818 .map_err(|source| OperationError::Kekulize {
1819 operation: &WITH_KEKULIZED_BONDS_SPEC,
1820 source,
1821 })?;
1822
1823 let changed = crate::kekulize::apply_kekulize_assignment(&mut topology, &assignment);
1824 let view = parts.read_parts_for_topology(topology.clone())?;
1825 let valence = MoleculeReadParts::from_molecule(&view)
1826 .assign_valence_with_options(crate::ValenceModel::RdkitLike, true)
1827 .map_err(|source| OperationError::Valence {
1828 operation: &WITH_KEKULIZED_BONDS_SPEC,
1829 source,
1830 })?;
1831 parts.commit_topology(topology)?;
1832 parts.record_topology_edit(TopologyEditKind::Local)?;
1833 parts.clear_cache(DerivedState::AROMATICITY);
1834 parts.set_valence_cache(valence);
1835 parts.clear_cache(DerivedState::DRAWING | DerivedState::FINGERPRINT);
1836 Ok(if changed {
1837 OpOutcome::Changed
1838 } else {
1839 OpOutcome::NoOp {
1840 reason: "kekulization assignment produced no effective topology-state change",
1841 }
1842 })
1843}
1844
1845#[mol_op_body(assigned_valence, parts)]
1846fn assigned_valence_impl() -> Result<OpOutcome, OperationError> {
1847 let read = parts.begin_topology_read()?;
1848 let valence = read
1849 .assign_valence_with_options(crate::ValenceModel::RdkitLike, true)
1850 .map_err(|source| OperationError::Valence {
1851 operation: &ASSIGNED_VALENCE_SPEC,
1852 source,
1853 })?;
1854 parts.set_valence_cache(valence);
1855 Ok(OpOutcome::Changed)
1856}
1857
1858#[mol_op_body(assigned_rings, parts)]
1859fn assigned_rings_impl() -> Result<OpOutcome, OperationError> {
1860 let read = parts.begin_topology_read()?;
1861 let rings = read
1862 .symmetrize_sssr()
1863 .map_err(|source| OperationError::RingFinding {
1864 operation: &ASSIGNED_RINGS_SPEC,
1865 source,
1866 })?;
1867 parts.set_rings_cache(rings);
1868 Ok(OpOutcome::Changed)
1869}
1870
1871#[mol_op_body(assigned_ring_families, parts)]
1872fn assigned_ring_families_impl() -> Result<OpOutcome, OperationError> {
1873 let read = parts.begin_topology_read()?;
1874 let ring_families =
1875 read.find_ring_families(false, false)
1876 .map_err(|source| OperationError::RingFinding {
1877 operation: &ASSIGNED_RING_FAMILIES_SPEC,
1878 source,
1879 })?;
1880 parts.set_ring_families_cache(ring_families);
1881 Ok(OpOutcome::Changed)
1882}
1883
1884#[mol_op_body(assigned_aromaticity, parts)]
1885fn assigned_aromaticity_impl() -> Result<OpOutcome, OperationError> {
1886 let mut topology = parts.begin_topology_mut()?;
1887 let view = parts.read_parts_for_topology(topology.clone())?;
1888 let read = MoleculeReadParts::from_molecule(&view);
1889 let rings = read
1890 .symmetrize_sssr()
1891 .map_err(|source| OperationError::RingFinding {
1892 operation: &ASSIGNED_AROMATICITY_SPEC,
1893 source,
1894 })?;
1895 parts.set_rings_cache(rings);
1896 let view = parts.read_parts_for_topology(topology.clone())?;
1897 let assignment = MoleculeReadParts::from_molecule(&view)
1898 .set_aromaticity(crate::AromaticityModel::Default)
1899 .map_err(|source| OperationError::Aromaticity {
1900 operation: &ASSIGNED_AROMATICITY_SPEC,
1901 source,
1902 })?;
1903 for (atom, is_aromatic) in topology
1904 .atoms
1905 .iter_mut()
1906 .zip(assignment.atom_aromatic.iter().copied())
1907 {
1908 atom.set_aromatic(is_aromatic);
1909 }
1910 for (bond, is_aromatic) in topology
1911 .bonds
1912 .iter_mut()
1913 .zip(assignment.bond_aromatic.iter().copied())
1914 {
1915 bond.set_aromatic(is_aromatic);
1916 if is_aromatic
1917 && matches!(
1918 bond.order(),
1919 crate::BondOrder::Single | crate::BondOrder::Double
1920 )
1921 {
1922 bond.set_order(crate::BondOrder::Aromatic);
1923 }
1924 }
1925 let view = parts.read_parts_for_topology(topology.clone())?;
1926 let valence = MoleculeReadParts::from_molecule(&view)
1927 .assign_valence_with_options(crate::ValenceModel::RdkitLike, true)
1928 .map_err(|source| OperationError::Valence {
1929 operation: &ASSIGNED_AROMATICITY_SPEC,
1930 source,
1931 })?;
1932 parts.commit_topology(topology)?;
1933 parts.record_topology_edit(TopologyEditKind::Local)?;
1934 parts.set_valence_cache(valence);
1935 parts.mark_aromaticity_valid();
1936 parts.clear_cache(DerivedState::DRAWING | DerivedState::FINGERPRINT);
1937 Ok(OpOutcome::Changed)
1938}
1939
1940#[mol_op_body(assigned_radicals, parts)]
1941fn assigned_radicals_impl() -> Result<OpOutcome, OperationError> {
1942 let mut topology = parts.begin_topology_mut()?;
1943 let view = parts.read_parts_for_topology(topology.clone())?;
1944 let read = MoleculeReadParts::from_molecule(&view);
1945
1946 let radicals = read
1947 .assign_radicals()
1948 .map_err(|source| OperationError::Valence {
1949 operation: &ASSIGNED_RADICALS_SPEC,
1950 source,
1951 })?;
1952
1953 let changed = read
1954 .atoms()
1955 .iter()
1956 .zip(radicals.iter().copied())
1957 .any(|(atom, radical)| atom.radical_electrons() != radical);
1958
1959 if changed {
1960 for (atom, radical) in topology.atoms.iter_mut().zip(radicals) {
1961 atom.set_radical_electrons(radical);
1962 }
1963 }
1964
1965 let view = parts.read_parts_for_topology(topology.clone())?;
1966 let valence = MoleculeReadParts::from_molecule(&view)
1967 .assign_valence_with_options(crate::ValenceModel::RdkitLike, true)
1968 .map_err(|source| OperationError::Valence {
1969 operation: &ASSIGNED_RADICALS_SPEC,
1970 source,
1971 })?;
1972 parts.commit_topology(topology)?;
1973 parts.record_topology_edit(TopologyEditKind::Local)?;
1974 parts.set_valence_cache(valence);
1975 Ok(OpOutcome::Changed)
1976}
1977
1978#[mol_op_body(with_2d_coordinates, parts)]
1979fn with_2d_coordinates_impl(
1980 params: crate::With2DCoordinatesParams,
1981) -> Result<OpOutcome, OperationError> {
1982 let (atoms, bonds) = {
1983 let read = parts.begin_topology_read()?;
1984 (read.atoms(), read.bonds())
1985 };
1986 let coords = crate::coordinates::compute_2d_coords_with_params(
1987 atoms,
1988 bonds,
1989 ¶ms.as_compute_params(),
1990 )
1991 .map_err(|source| match source {
1992 crate::coordinates::Coordinate2DError::InvalidInput(message) => {
1993 OperationError::InvalidInput {
1994 operation: &WITH_2D_COORDINATES_SPEC,
1995 message,
1996 }
1997 }
1998 crate::coordinates::Coordinate2DError::UnsupportedFeature(_) => {
1999 OperationError::UnsupportedFeature {
2000 operation: &WITH_2D_COORDINATES_SPEC,
2001 source: crate::UnsupportedFeatureError::from_spec(&crate::COORDINATE_2D_FEATURE),
2002 }
2003 }
2004 })?;
2005 let mut coord_block = parts.begin_coordinates_mut()?;
2006 if params.clear_confs {
2007 coord_block.conformers_2d.clear();
2008 }
2009 coord_block.conformers_2d.push(crate::Conformer2D::new(
2010 coord_block.conformers_2d.len(),
2011 coords,
2012 ));
2013 coord_block.source_coordinate_dim = Some(crate::CoordinateDimension::TwoD);
2014 parts.commit_coordinates(coord_block)?;
2015 parts.clear_cache(DerivedState::DRAWING);
2016 Ok(OpOutcome::Changed)
2017}
2018
2019#[mol_op_body(with_3d_conformer, parts)]
2020fn with_3d_conformer_impl(mut params: crate::EmbedParameters) -> Result<OpOutcome, OperationError> {
2021 let source = parts.working.clone();
2022 let (embedded, id) =
2023 crate::distgeom::embed_molecule(&source, &mut params).map_err(|source| {
2024 OperationError::DistanceGeometry {
2025 operation: &WITH_3D_CONFORMER_SPEC,
2026 source,
2027 }
2028 })?;
2029 let _coord_block = parts.begin_coordinates_mut()?;
2030 let coord_block = embedded.coordinate_block().clone();
2031 parts.commit_coordinates(coord_block)?;
2032 parts.clear_cache(DerivedState::DRAWING);
2033 if id < 0 {
2034 Ok(OpOutcome::NoOp {
2035 reason: "RDKit EmbedMolecule returned -1",
2036 })
2037 } else {
2038 Ok(OpOutcome::Changed)
2039 }
2040}
2041
2042#[mol_op_body(with_3d_conformers, parts)]
2043fn with_3d_conformers_impl(
2044 num_confs: usize,
2045 mut params: crate::EmbedParameters,
2046) -> Result<OpOutcome, OperationError> {
2047 let source = parts.working.clone();
2048 let num_confs = u32::try_from(num_confs).map_err(|_| OperationError::InvalidInput {
2049 operation: &WITH_3D_CONFORMERS_SPEC,
2050 message: "num_confs does not fit in RDKit unsigned int parameter",
2051 })?;
2052 let (embedded, ids) = crate::distgeom::embed_multiple_confs(&source, num_confs, &mut params)
2053 .map_err(|source| OperationError::DistanceGeometry {
2054 operation: &WITH_3D_CONFORMERS_SPEC,
2055 source,
2056 })?;
2057 let _coord_block = parts.begin_coordinates_mut()?;
2058 let coord_block = embedded.coordinate_block().clone();
2059 parts.commit_coordinates(coord_block)?;
2060 parts.clear_cache(DerivedState::DRAWING);
2061 if ids.is_empty() {
2062 Ok(OpOutcome::NoOp {
2063 reason: "RDKit EmbedMultipleConfs returned no conformer IDs",
2064 })
2065 } else {
2066 Ok(OpOutcome::Changed)
2067 }
2068}
2069
2070#[cfg(test)]
2071mod tests {
2072 use std::collections::HashSet;
2073
2074 use super::*;
2075 use crate::{BondOrder, BondQueryPredicate, QueryNode};
2076
2077 const TEST_NEEDS_VALENCE_UPDATE_SPEC: MoleculeOpSpec = MoleculeOpSpec {
2078 method: "test_needs_valence_update",
2079 impl_fn: "test_needs_valence_update_impl",
2080 domain: OperationDomain::Topology,
2081 kind: MoleculeOpKind::Weak,
2082 topology_edit: TopologyEditKind::None,
2083 access: BlockAccess::new(BlockSet::NONE, BlockSet::DERIVED_CACHE),
2084 may_mutate: BlockSet::DERIVED_CACHE,
2085 auto_remap: BlockSet::NONE,
2086 semantic_preconditions: SemanticPreconditionSet::NONE,
2087 derived_effects: DerivedEffects::new(
2088 DerivedState::NONE, DerivedState::NONE, DerivedState::VALENCE.union(DerivedState::RINGS), ),
2092 requires_mapping: MappingRequirement::None,
2093 allows_noop: true,
2094 support: SupportStatus::Experimental,
2095 parity: ParityPolicy::NotApplicable,
2096 io_roundtrip: false,
2097 };
2098
2099 const TEST_RECOMPUTE_VALENCE_SPEC: MoleculeOpSpec = MoleculeOpSpec {
2100 method: "test_recompute_valence",
2101 impl_fn: "test_recompute_valence_impl",
2102 domain: OperationDomain::Topology,
2103 kind: MoleculeOpKind::Weak,
2104 topology_edit: TopologyEditKind::None,
2105 access: BlockAccess::new(BlockSet::NONE, BlockSet::DERIVED_CACHE),
2106 may_mutate: BlockSet::DERIVED_CACHE,
2107 auto_remap: BlockSet::NONE,
2108 semantic_preconditions: SemanticPreconditionSet::NONE,
2109 derived_effects: DerivedEffects::new(
2110 DerivedState::VALENCE, DerivedState::NONE, DerivedState::NONE, ),
2114 requires_mapping: MappingRequirement::None,
2115 allows_noop: true,
2116 support: SupportStatus::Experimental,
2117 parity: ParityPolicy::NotApplicable,
2118 io_roundtrip: false,
2119 };
2120
2121 const TEST_OVERLAPPING_ACCESS_SPEC: MoleculeOpSpec = MoleculeOpSpec {
2122 method: "test_overlapping_access",
2123 impl_fn: "test_overlapping_access_impl",
2124 domain: OperationDomain::Topology,
2125 kind: MoleculeOpKind::Weak,
2126 topology_edit: TopologyEditKind::Local,
2127 access: BlockAccess::new(BlockSet::TOPOLOGY, BlockSet::TOPOLOGY),
2128 may_mutate: BlockSet::TOPOLOGY,
2129 auto_remap: BlockSet::NONE,
2130 semantic_preconditions: SemanticPreconditionSet::NONE,
2131 derived_effects: DerivedEffects::NONE,
2132 requires_mapping: MappingRequirement::None,
2133 allows_noop: true,
2134 support: SupportStatus::Experimental,
2135 parity: ParityPolicy::NotApplicable,
2136 io_roundtrip: false,
2137 };
2138
2139 #[test]
2140 fn molecule_read_parts_does_not_expose_raw_molecule_escape() {
2141 let ops_source = include_str!("ops.rs");
2142 let read_parts_source = include_str!("../model/read_parts.rs");
2143 assert!(!ops_source.contains(concat!("read_parts", ".", "molecule")));
2144 assert!(!ops_source.contains(concat!(".", "molecule", "()")));
2145 assert!(!read_parts_source.contains(concat!("pub(crate) fn ", "molecule")));
2146 }
2147
2148 #[cfg(feature = "op-contracts")]
2149 #[test]
2150 fn begin_topology_mut_rejects_second_begin() {
2151 let molecule = crate::Molecule::new();
2152 let mut parts = OpParts::new(&molecule, &WITH_KEKULIZED_BONDS_SPEC).unwrap();
2153 let _topology = parts
2154 .begin_topology_mut()
2155 .expect("first topology begin should succeed");
2156 let err = match parts.begin_topology_mut() {
2157 Ok(_) => panic!("second topology begin must be rejected"),
2158 Err(err) => err,
2159 };
2160 assert!(
2161 matches!(err, OperationError::InvalidInput { message, .. } if message.contains("same writable block twice"))
2162 );
2163 }
2164
2165 #[cfg(feature = "op-contracts")]
2166 #[test]
2167 fn begin_topology_mut_rejects_second_begin_before_commit() {
2168 let molecule = crate::Molecule::new();
2169 let mut parts = OpParts::new(&molecule, &WITH_KEKULIZED_BONDS_SPEC).unwrap();
2170 let _topology = parts
2171 .begin_topology_mut()
2172 .expect("first topology begin should succeed");
2173 let err = match parts.begin_topology_mut() {
2174 Ok(_) => panic!("second topology begin must be rejected"),
2175 Err(err) => err,
2176 };
2177 assert!(
2178 matches!(err, OperationError::InvalidInput { message, .. } if message.contains("same writable block twice"))
2179 );
2180 }
2181
2182 #[cfg(feature = "op-contracts")]
2183 #[test]
2184 fn begin_access_rejects_overlapping_read_and_write_blocks() {
2185 let molecule = crate::Molecule::new();
2186 let mut parts = OpParts::new(&molecule, &TEST_OVERLAPPING_ACCESS_SPEC).unwrap();
2187 let err = match parts.begin_topology_mut() {
2188 Ok(_) => panic!("overlapping read/write access must be rejected"),
2189 Err(err) => err,
2190 };
2191 assert!(
2192 matches!(err, OperationError::InvalidInput { message, .. } if message.contains("both read and write"))
2193 );
2194 }
2195
2196 #[test]
2197 fn sanitize_is_bond_order_query_matches_rdkit_simple_vs_complex_split() {
2198 assert!(sanitize_is_bond_order_query(Some(&QueryNode::predicate(
2199 BondQueryPredicate::Order(BondOrder::Single),
2200 ))));
2201 assert!(sanitize_is_bond_order_query(Some(&QueryNode::and(vec![
2202 QueryNode::predicate(BondQueryPredicate::IsInRing(true)),
2203 QueryNode::predicate(BondQueryPredicate::Order(BondOrder::Double)),
2204 ]))));
2205 assert!(!sanitize_is_bond_order_query(Some(&QueryNode::predicate(
2206 BondQueryPredicate::OrderIn(vec![BondOrder::Single, BondOrder::Double]),
2207 ))));
2208 assert!(!sanitize_is_bond_order_query(Some(&QueryNode::not(
2209 QueryNode::predicate(BondQueryPredicate::Order(BondOrder::Single)),
2210 ))));
2211 assert!(!sanitize_is_bond_order_query(Some(&QueryNode::predicate(
2212 BondQueryPredicate::Any,
2213 ))));
2214 }
2215
2216 fn same_operation(left: &'static MoleculeOpSpec, right: &'static MoleculeOpSpec) -> bool {
2217 left.method == right.method && left.impl_fn == right.impl_fn
2218 }
2219
2220 fn support_matrix_contains(operation: &'static MoleculeOpSpec) -> bool {
2221 SUPPORT_MATRIX.iter().any(|entry| {
2222 entry
2223 .operation
2224 .is_some_and(|candidate| same_operation(candidate, operation))
2225 })
2226 }
2227
2228 fn support_feature_for(
2229 operation: &'static MoleculeOpSpec,
2230 ) -> Option<&'static crate::FeatureSpec> {
2231 SUPPORT_MATRIX.iter().find_map(|entry| {
2232 entry
2233 .operation
2234 .is_some_and(|candidate| same_operation(candidate, operation))
2235 .then_some(entry.feature)
2236 })
2237 }
2238
2239 fn invariant_matrix_contains(operation: &'static MoleculeOpSpec) -> bool {
2240 OPERATION_INVARIANT_MATRIX
2241 .iter()
2242 .any(|entry| same_operation(entry.operation, operation))
2243 }
2244
2245 fn parity_matrix_contains(operation: &'static MoleculeOpSpec) -> bool {
2246 PARITY_MATRIX
2247 .iter()
2248 .any(|entry| same_operation(entry.operation, operation))
2249 }
2250
2251 #[allow(dead_code)]
2252 fn assert_unsupported_feature(
2253 result: Result<crate::Molecule, OperationError>,
2254 operation: &'static MoleculeOpSpec,
2255 feature: &'static crate::FeatureSpec,
2256 ) {
2257 match result {
2258 Err(OperationError::UnsupportedFeature {
2259 operation: actual_operation,
2260 source,
2261 }) => {
2262 assert!(same_operation(actual_operation, operation));
2263 assert_eq!(source.feature, feature.name);
2264 }
2265 other => panic!(
2266 "expected UnsupportedFeature for {}, got {other:?}",
2267 operation.method
2268 ),
2269 }
2270 }
2271
2272 #[test]
2273 fn registered_ops_have_unique_methods() {
2274 let mut methods = HashSet::new();
2275 for operation in MOLECULE_OPS.iter().copied() {
2276 assert!(
2277 methods.insert(operation.method),
2278 "duplicate registered operation method `{}`",
2279 operation.method
2280 );
2281 }
2282 }
2283
2284 #[test]
2285 fn registered_ops_have_support_and_invariant_entries() {
2286 assert_eq!(SUPPORT_MATRIX.len(), MOLECULE_OPS.len());
2287 assert_eq!(OPERATION_INVARIANT_MATRIX.len(), MOLECULE_OPS.len());
2288 for operation in MOLECULE_OPS.iter().copied() {
2289 assert!(
2290 support_matrix_contains(operation),
2291 "missing support matrix entry for `{}`",
2292 operation.method
2293 );
2294 assert!(
2295 invariant_matrix_contains(operation),
2296 "missing invariant matrix entry for `{}`",
2297 operation.method
2298 );
2299 }
2300 }
2301
2302 #[test]
2303 fn parity_registered_ops_have_parity_entries() {
2304 for operation in MOLECULE_OPS.iter().copied() {
2305 if operation.parity != ParityPolicy::NotApplicable {
2306 assert!(
2307 parity_matrix_contains(operation),
2308 "missing parity matrix entry for `{}`",
2309 operation.method
2310 );
2311 }
2312 }
2313 }
2314
2315 #[test]
2316 fn conformer_generation_operation_registry_exposes_3d_coordinate_generation() {
2317 assert!(
2318 MOLECULE_OPS
2319 .iter()
2320 .any(|operation| same_operation(operation, &WITH_3D_CONFORMER_SPEC))
2321 );
2322 assert!(
2323 MOLECULE_OPS
2324 .iter()
2325 .any(|operation| same_operation(operation, &WITH_3D_CONFORMERS_SPEC))
2326 );
2327 assert_eq!(
2328 support_feature_for(&WITH_3D_CONFORMER_SPEC).map(|feature| feature.name),
2329 Some(crate::CONFORMER_GENERATION_FEATURE.name)
2330 );
2331 assert_eq!(
2332 support_feature_for(&WITH_3D_CONFORMERS_SPEC).map(|feature| feature.name),
2333 Some(crate::CONFORMER_GENERATION_FEATURE.name)
2334 );
2335 assert_eq!(WITH_3D_CONFORMER_SPEC.domain, OperationDomain::Coordinate);
2336 assert_eq!(WITH_3D_CONFORMERS_SPEC.domain, OperationDomain::Coordinate);
2337 assert!(support_matrix_contains(&WITH_3D_CONFORMER_SPEC));
2338 assert!(support_matrix_contains(&WITH_3D_CONFORMERS_SPEC));
2339 assert!(invariant_matrix_contains(&WITH_3D_CONFORMER_SPEC));
2340 assert!(invariant_matrix_contains(&WITH_3D_CONFORMERS_SPEC));
2341 assert!(parity_matrix_contains(&WITH_3D_CONFORMER_SPEC));
2342 assert!(parity_matrix_contains(&WITH_3D_CONFORMERS_SPEC));
2343
2344 let molecule = crate::Molecule::from_smiles("CC").expect("ethane");
2345 let generated = molecule
2346 .with_3d_conformer()
2347 .expect("default ETKDGv3 conformer");
2348 assert!(molecule.conformers_3d().is_empty());
2349 assert_eq!(generated.conformers_3d().len(), 1);
2350
2351 let mut params = crate::EmbedParameters::etkdg();
2352 params.random_seed = 0xf00d;
2353 params.num_threads = 1;
2354 let generated_multi = molecule
2355 .with_3d_conformers_with_params(2, params)
2356 .expect("multi conformer generation");
2357 assert_eq!(generated_multi.conformers_3d().len(), 2);
2358 }
2359
2360 #[test]
2361 fn sanitized_all_runs_through_operation_pipeline_without_changing_source() {
2362 let molecule = crate::Molecule::new();
2363 let original = molecule.clone();
2364
2365 let sanitized = molecule.sanitize().unwrap();
2366 let sanitized_with_all = molecule.sanitize_with_ops(crate::SanitizeOps::ALL).unwrap();
2367
2368 assert_eq!(sanitized.num_atoms(), 0);
2369 assert_eq!(sanitized_with_all.num_atoms(), 0);
2370 assert!(matches!(
2371 molecule.with_2d_coordinates(),
2372 Err(OperationError::InvalidInput {
2373 operation: &WITH_2D_COORDINATES_SPEC,
2374 ..
2375 })
2376 ));
2377
2378 assert_eq!(molecule, original);
2379 }
2380
2381 #[test]
2382 fn with_2d_coordinates_with_params_preserves_source_and_uses_parameterized_surface() {
2383 let mut builder = crate::MoleculeBuilder::new();
2384 let a0 = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
2385 let a1 = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
2386 builder
2387 .add_bond(crate::BondSpec::new(a0, a1, crate::BondOrder::Single))
2388 .unwrap();
2389 let molecule = builder.build().unwrap();
2390 let original = molecule.clone();
2391
2392 let result = molecule
2393 .with_2d_coordinates_with_params(crate::With2DCoordinatesParams {
2394 force_rdkit: true,
2395 use_ring_templates: true,
2396 ..crate::With2DCoordinatesParams::default()
2397 })
2398 .unwrap();
2399
2400 assert_eq!(molecule, original);
2401 assert_eq!(result.conformers_2d().len(), 1);
2402 assert_eq!(
2403 result.source_coordinate_dim(),
2404 Some(crate::CoordinateDimension::TwoD)
2405 );
2406 }
2407
2408 #[test]
2409 fn with_hydrogens_adds_implicit_hydrogens_through_operation_pipeline() {
2410 let mut builder = crate::MoleculeBuilder::new();
2411 let carbon = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
2412 builder.set_2d_coordinates(vec![[0.5, 1.0]]).unwrap();
2413 let molecule = builder.build().unwrap();
2414 let original = molecule.clone();
2415
2416 let result = molecule.with_hydrogens().unwrap();
2417
2418 assert_eq!(molecule, original);
2419 assert_eq!(result.num_atoms(), 5);
2420 assert_eq!(result.num_bonds(), 4);
2421 assert_eq!(result.atoms()[carbon.index()].explicit_hydrogens(), 0);
2422 assert!(
2423 result.atoms()[1..]
2424 .iter()
2425 .all(|atom| atom.atomic_number() == 1 && atom.implicit_hydrogen())
2426 );
2427 assert_eq!(
2428 result.coordinates_2d(),
2429 Some(&[[0.5, 1.0], [0.5, 1.0], [0.5, 1.0], [0.5, 1.0], [0.5, 1.0]][..])
2430 );
2431 }
2432
2433 #[test]
2434 fn add_hydrogens_in_place_matches_value_operation() {
2435 let molecule = crate::Molecule::from_smiles("CCO").unwrap();
2436 let expected = molecule.with_hydrogens().unwrap();
2437
2438 let mut in_place = molecule.clone();
2439 in_place.add_hydrogens_().unwrap();
2440
2441 assert_eq!(in_place, expected);
2442 }
2443
2444 #[test]
2445 fn add_hydrogens_in_place_preserves_shared_source_value() {
2446 let mut molecule = crate::Molecule::from_smiles("CCO").unwrap();
2447 let shared = molecule.clone();
2448
2449 molecule.add_hydrogens_().unwrap();
2450
2451 assert_eq!(shared.num_atoms(), 3);
2452 assert_eq!(shared.num_bonds(), 2);
2453 assert_eq!(molecule.num_atoms(), 9);
2454 assert_eq!(molecule.num_bonds(), 8);
2455 }
2456
2457 #[test]
2458 fn add_hs_terminal_coords_follow_rdkit_sequential_append() {
2459 let mut builder = crate::MoleculeBuilder::new();
2460 builder.add_atom(crate::AtomSpec::new(crate::Element::C));
2461 builder.set_2d_coordinates(vec![[0.0, 0.0]]).unwrap();
2462 let molecule = builder.build().unwrap();
2463 let params = crate::hydrogens::AddHsParams {
2464 add_coords: true,
2465 ..crate::hydrogens::AddHsParams::default()
2466 };
2467 let read_parts = MoleculeReadParts::from_molecule(&molecule);
2468 let assignment = crate::hydrogens::add_hs_assignment(read_parts, ¶ms).unwrap();
2469
2470 let coords = add_hs_terminal_coords_2d(
2471 MoleculeReadParts::from_molecule(&molecule),
2472 &assignment,
2473 molecule.coordinates_2d().unwrap(),
2474 )
2475 .unwrap();
2476
2477 assert_eq!(coords.len(), 4);
2478 assert_eq!(coords[0], [1.0, 0.0]);
2479 assert_ne!(coords[1], [0.0, 0.0]);
2480 }
2481
2482 #[test]
2483 fn add_hs_terminal_coords_3d_uses_rdkit_rb0_bond_length() {
2484 let mut builder = crate::MoleculeBuilder::new();
2485 let carbon = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
2486 builder.add_3d_conformer(vec![[0.0, 0.0, 0.0]]).unwrap();
2487 let molecule = builder.build().unwrap();
2488 let assignment = crate::hydrogens::AddHsAssignment {
2489 hydrogens_to_add: vec![crate::hydrogens::AddHydrogen {
2490 heavy_atom: carbon,
2491 isotope: None,
2492 is_implicit: true,
2493 props: Default::default(),
2494 pdb_residue_info: None,
2495 }],
2496 add_terminal_coordinates: true,
2497 ..crate::hydrogens::AddHsAssignment::default()
2498 };
2499
2500 let coords = add_hs_terminal_coords_3d(
2501 MoleculeReadParts::from_molecule(&molecule),
2502 &assignment,
2503 molecule.conformers_3d()[0].coordinates(),
2504 )
2505 .unwrap();
2506
2507 assert_eq!(coords.len(), 1);
2508 assert!((coords[0][0] - 0.0).abs() < 1.0e-12);
2509 assert!((coords[0][1] - 0.0).abs() < 1.0e-12);
2510 assert!((coords[0][2] - 1.10).abs() < 1.0e-12);
2511 }
2512
2513 #[test]
2514 fn add_hs_terminal_coords_default_degree_branch_matches_rdkit_zero_vector() {
2515 let mut builder = crate::MoleculeBuilder::new();
2516 let center = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
2517 let mut coords = vec![[5.0, 5.0, 5.0]];
2518 for index in 0..5 {
2519 let neighbor = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
2520 builder
2521 .add_bond(crate::BondSpec::new(
2522 center,
2523 neighbor,
2524 crate::BondOrder::Single,
2525 ))
2526 .unwrap();
2527 coords.push([index as f64, 0.0, 0.0]);
2528 }
2529 builder.add_3d_conformer(coords).unwrap();
2530 let molecule = builder.build().unwrap();
2531 let assignment = crate::hydrogens::AddHsAssignment {
2532 hydrogens_to_add: vec![crate::hydrogens::AddHydrogen {
2533 heavy_atom: center,
2534 isotope: None,
2535 is_implicit: true,
2536 props: Default::default(),
2537 pdb_residue_info: None,
2538 }],
2539 add_terminal_coordinates: true,
2540 ..crate::hydrogens::AddHsAssignment::default()
2541 };
2542
2543 let coords = add_hs_terminal_coords_3d(
2544 MoleculeReadParts::from_molecule(&molecule),
2545 &assignment,
2546 molecule.conformers_3d()[0].coordinates(),
2547 )
2548 .unwrap();
2549
2550 assert_eq!(coords, vec![[0.0, 0.0, 0.0]]);
2551 }
2552
2553 #[test]
2554 fn add_hs_terminal_coords_rejects_degenerate_or_nonterminal_virtual_atom_like_rdkit() {
2555 let mut builder = crate::MoleculeBuilder::new();
2556 let a0 = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
2557 let a1 = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
2558 builder
2559 .add_bond(crate::BondSpec::new(a0, a1, crate::BondOrder::Single))
2560 .unwrap();
2561 builder
2562 .add_3d_conformer(vec![[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]])
2563 .unwrap();
2564 let molecule = builder.build().unwrap();
2565 let adjacency = vec![vec![(1, None), (2, None)], vec![(0, None)], vec![(0, None)]];
2566
2567 let degenerate = add_hs_set_terminal_atom_coord(
2568 MoleculeReadParts::from_molecule(&molecule),
2569 &adjacency,
2570 molecule.conformers_3d()[0].coordinates(),
2571 0,
2572 0,
2573 true,
2574 )
2575 .unwrap_err();
2576 assert!(degenerate.to_string().contains("degenerate atoms"));
2577
2578 let nonterminal = add_hs_set_terminal_atom_coord(
2579 MoleculeReadParts::from_molecule(&molecule),
2580 &adjacency,
2581 molecule.conformers_3d()[0].coordinates(),
2582 0,
2583 1,
2584 true,
2585 )
2586 .unwrap_err();
2587 assert!(nonterminal.to_string().contains("degree one"));
2588 }
2589
2590 #[test]
2591 fn with_hydrogens_with_params_materializes_add_coords_branch() {
2592 let mut builder = crate::MoleculeBuilder::new();
2593 builder.add_atom(crate::AtomSpec::new(crate::Element::C));
2594 builder.set_2d_coordinates(vec![[0.0, 0.0]]).unwrap();
2595 let molecule = builder.build().unwrap();
2596 let params = crate::AddHsParams {
2597 add_coords: true,
2598 ..crate::AddHsParams::default()
2599 };
2600
2601 let result = molecule.with_hydrogens_with_params(params).unwrap();
2602
2603 assert_eq!(result.num_atoms(), 5);
2604 let coords = result.coordinates_2d().unwrap();
2605 assert_eq!(coords.len(), 5);
2606 assert_eq!(coords[0], [0.0, 0.0]);
2607 assert_eq!(coords[1], [1.0, 0.0]);
2608 assert_eq!(coords[2], [-1.0, 0.0]);
2609 }
2610
2611 #[test]
2612 fn with_hydrogens_materializes_explicit_h_count_and_clears_heavy_atom_count() {
2613 let mut builder = crate::MoleculeBuilder::new();
2614 let nitrogen =
2615 builder.add_atom(crate::AtomSpec::new(crate::Element::N).with_explicit_hydrogens(2));
2616 let molecule = builder.build().unwrap();
2617
2618 let result = molecule.with_hydrogens().unwrap();
2619
2620 assert_eq!(result.num_atoms(), 4);
2621 assert_eq!(result.atoms()[nitrogen.index()].explicit_hydrogens(), 0);
2622 assert_eq!(
2623 result.atoms()[1..]
2624 .iter()
2625 .filter(|atom| !atom.implicit_hydrogen())
2626 .count(),
2627 2
2628 );
2629 assert_eq!(
2630 result.atoms()[1..]
2631 .iter()
2632 .filter(|atom| atom.implicit_hydrogen())
2633 .count(),
2634 1
2635 );
2636 }
2637
2638 #[test]
2639 fn with_hydrogens_commits_topology_with_rebuilt_adjacency_for_valence_followups() {
2640 let molecule = crate::Molecule::from_smiles("C=C").unwrap();
2641
2642 let result = molecule.with_hydrogens().unwrap();
2643 let assignment = crate::assign_valence(&result, crate::ValenceModel::RdkitLike).unwrap();
2644
2645 assert_eq!(assignment.explicit_valence, vec![4, 4, 1, 1, 1, 1]);
2646 assert_eq!(assignment.implicit_hydrogens, vec![0, 0, 0, 0, 0, 0]);
2647 }
2648
2649 #[test]
2650 fn with_hydrogens_replays_tracked_isotopes_and_clears_tracking_property() {
2651 let mut builder = crate::MoleculeBuilder::new();
2652 let nitrogen = builder.add_atom(
2653 crate::AtomSpec::new(crate::Element::N)
2654 .with_explicit_hydrogens(2)
2655 .with_tracked_isotopic_hydrogens(vec![2, 3]),
2656 );
2657 let molecule = builder.build().unwrap();
2658
2659 let result = molecule.with_hydrogens().unwrap();
2660
2661 assert_eq!(result.atoms()[nitrogen.index()].prop("_isotopicHs"), None);
2662 assert_eq!(
2663 result.atoms()[nitrogen.index()].tracked_isotopic_hydrogens(),
2664 &[] as &[u16]
2665 );
2666 assert_eq!(result.atoms()[nitrogen.index()].explicit_hydrogens(), 0);
2667 assert_eq!(
2668 result.atoms()[1..]
2669 .iter()
2670 .map(crate::Atom::isotope)
2671 .collect::<Vec<_>>(),
2672 vec![Some(2), Some(3), None]
2673 );
2674 }
2675
2676 #[test]
2677 fn with_hydrogens_clears_atom_cip_ranks_like_rdkit_addhs() {
2678 let smiles =
2679 "O=C(NC[C@]12C[C@H]3C[C@H](C[C@H](C3)C1)C2)[C@@H]1C[C@H]2c3ccccc3[C@@H]1c1ccccc12";
2680 let molecule = crate::Molecule::from_smiles(smiles).unwrap();
2681 assert!(
2682 molecule
2683 .atoms()
2684 .iter()
2685 .any(|atom| atom.prop("_CIPRank").is_some()),
2686 "SMILES sanitize path should assign legacy _CIPRank before AddHs"
2687 );
2688
2689 let result = molecule
2690 .with_hydrogens_with_params(crate::AddHsParams {
2691 only_on_atoms: Some(
2692 [4usize, 6, 8, 10, 14, 16, 23]
2693 .into_iter()
2694 .map(crate::AtomId::new)
2695 .collect(),
2696 ),
2697 ..crate::AddHsParams::default()
2698 })
2699 .unwrap();
2700
2701 assert!(
2702 result
2703 .atoms()
2704 .iter()
2705 .all(|atom| atom.prop("_CIPRank").is_none()),
2706 "RDKit AddHs clears atom _CIPRank computed props before depiction"
2707 );
2708 }
2709
2710 #[test]
2711 fn add_hs_operation_materializes_typed_pdb_residue_info() {
2712 let mut builder = crate::MoleculeBuilder::new();
2713 let nitrogen = builder.add_atom(crate::AtomSpec::new(crate::Element::N));
2714 let molecule = builder.build().unwrap();
2715 let assignment = crate::hydrogens::AddHsAssignment {
2716 hydrogens_to_add: vec![crate::hydrogens::AddHydrogen {
2717 heavy_atom: nitrogen,
2718 isotope: None,
2719 is_implicit: false,
2720 props: Default::default(),
2721 pdb_residue_info: Some(crate::AtomPdbResidueInfo::new(
2722 " H1 ", 12, "GLY", 3, "A", false,
2723 )),
2724 }],
2725 ..crate::hydrogens::AddHsAssignment::default()
2726 };
2727 let mut parts = OpParts::new(&molecule, &WITH_HYDROGENS_SPEC).unwrap();
2728 let mut topology = parts.begin_topology_mut().unwrap();
2729 let mut coordinates = parts.begin_coordinates_mut().unwrap();
2730 let mut properties = parts.begin_properties_mut().unwrap();
2731
2732 let changed = apply_add_hs_assignment(
2733 &mut parts,
2734 &mut topology,
2735 &mut coordinates,
2736 &mut properties,
2737 &assignment,
2738 )
2739 .unwrap();
2740 parts.commit_topology(topology).unwrap();
2741 parts.commit_coordinates(coordinates).unwrap();
2742 parts.commit_properties(properties).unwrap();
2743 parts
2744 .prove_preserved(
2745 DerivedState::RINGS | DerivedState::RING_FAMILIES,
2746 PreservationProof::LeafAtomAppend,
2747 )
2748 .unwrap();
2749 let result = parts.finish(OpOutcome::Changed).unwrap();
2750
2751 assert!(changed);
2752 let info = result.atoms()[1].pdb_residue_info().unwrap();
2753 assert_eq!(info.atom_name(), " H1 ");
2754 assert_eq!(info.serial_number(), 12);
2755 assert_eq!(info.residue_name(), "GLY");
2756 assert_eq!(info.residue_number(), 3);
2757 assert_eq!(info.chain_id(), "A");
2758 }
2759
2760 #[test]
2761 fn with_hydrogens_with_params_materializes_add_residue_info_branch() {
2762 let mut builder = crate::MoleculeBuilder::new();
2763 builder.add_atom(
2764 crate::AtomSpec::new(crate::Element::N).with_pdb_residue_info(
2765 crate::AtomPdbResidueInfo::new(" N ", 10, "GLY", 3, "A", false),
2766 ),
2767 );
2768 let molecule = builder.build().unwrap();
2769 let params = crate::AddHsParams {
2770 add_residue_info: true,
2771 ..crate::AddHsParams::default()
2772 };
2773
2774 let result = molecule.with_hydrogens_with_params(params).unwrap();
2775
2776 assert_eq!(result.num_atoms(), 4);
2777 let first_h_info = result.atoms()[1].pdb_residue_info().unwrap();
2778 assert_eq!(first_h_info.atom_name(), " H1 ");
2779 assert_eq!(first_h_info.residue_name(), "GLY");
2780 assert_eq!(first_h_info.residue_number(), 3);
2781 assert_eq!(first_h_info.chain_id(), "A");
2782 }
2783
2784 #[test]
2785 fn add_hs_operation_materializes_existing_atom_pdb_residue_info_updates() {
2786 let mut builder = crate::MoleculeBuilder::new();
2787 let hydrogen = builder.add_atom(crate::AtomSpec::new(crate::Element::H));
2788 let molecule = builder.build().unwrap();
2789 let assignment = crate::hydrogens::AddHsAssignment {
2790 atom_pdb_residue_info_updates: vec![crate::hydrogens::AtomPdbResidueInfoUpdate {
2791 atom: hydrogen,
2792 pdb_residue_info: crate::AtomPdbResidueInfo::new(" H1 ", 12, "GLY", 3, "A", false),
2793 }],
2794 ..crate::hydrogens::AddHsAssignment::default()
2795 };
2796 let mut parts = OpParts::new(&molecule, &WITH_HYDROGENS_SPEC).unwrap();
2797 let mut topology = parts.begin_topology_mut().unwrap();
2798 let mut coordinates = parts.begin_coordinates_mut().unwrap();
2799 let mut properties = parts.begin_properties_mut().unwrap();
2800
2801 let changed = apply_add_hs_assignment(
2802 &mut parts,
2803 &mut topology,
2804 &mut coordinates,
2805 &mut properties,
2806 &assignment,
2807 )
2808 .unwrap();
2809 parts.commit_topology(topology).unwrap();
2810 parts.commit_coordinates(coordinates).unwrap();
2811 parts.commit_properties(properties).unwrap();
2812 parts
2813 .prove_preserved(
2814 DerivedState::RINGS | DerivedState::RING_FAMILIES,
2815 PreservationProof::LeafAtomAppend,
2816 )
2817 .unwrap();
2818 let result = parts.finish(OpOutcome::Changed).unwrap();
2819
2820 assert!(changed);
2821 let info = result.atoms()[hydrogen.index()].pdb_residue_info().unwrap();
2822 assert_eq!(info.atom_name(), " H1 ");
2823 assert_eq!(info.serial_number(), 12);
2824 assert_eq!(info.residue_name(), "GLY");
2825 assert_eq!(info.residue_number(), 3);
2826 assert_eq!(info.chain_id(), "A");
2827 }
2828
2829 #[test]
2830 fn without_hydrogens_removes_basic_explicit_hydrogen_through_operation_pipeline() {
2831 let mut builder = crate::MoleculeBuilder::new();
2832 let carbon = builder.add_atom(
2833 crate::AtomSpec::new(crate::Element::C).with_pdb_residue_info(
2834 crate::AtomPdbResidueInfo::new(" C ", 7, "GLY", 3, "A", false),
2835 ),
2836 );
2837 let hydrogen = builder.add_atom(crate::AtomSpec::new(crate::Element::H));
2838 builder
2839 .add_bond(crate::BondSpec::new(
2840 carbon,
2841 hydrogen,
2842 crate::BondOrder::Single,
2843 ))
2844 .unwrap();
2845 builder
2846 .set_2d_coordinates(vec![[0.0, 0.0], [1.0, 0.0]])
2847 .unwrap();
2848 let molecule = builder.build().unwrap();
2849 let original = molecule.clone();
2850
2851 let result = molecule.without_hydrogens_with_sanitize(false).unwrap();
2852
2853 assert_eq!(molecule, original);
2854 assert_eq!(result.num_atoms(), 1);
2855 assert_eq!(result.num_bonds(), 0);
2856 assert_eq!(result.coordinates_2d(), Some(&[[0.0, 0.0]][..]));
2857 assert_eq!(
2858 result.atoms()[0]
2859 .pdb_residue_info()
2860 .unwrap()
2861 .serial_number(),
2862 7
2863 );
2864 }
2865
2866 #[test]
2867 fn without_hydrogens_materializes_unknown_stereo_as_typed_atom_state() {
2868 let mut builder = crate::MoleculeBuilder::new();
2869 let carbon = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
2870 let hydrogen = builder.add_atom(crate::AtomSpec::new(crate::Element::H));
2871 builder
2872 .add_bond(
2873 crate::BondSpec::new(carbon, hydrogen, crate::BondOrder::Single)
2874 .with_direction(crate::BondDirection::Unknown),
2875 )
2876 .unwrap();
2877 let molecule = builder.build().unwrap();
2878
2879 let result = molecule.without_hydrogens_with_sanitize(false).unwrap();
2880
2881 assert_eq!(result.num_atoms(), 1);
2882 assert!(result.atoms()[0].unknown_stereo());
2883 assert_eq!(result.atoms()[0].prop("_UnknownStereo"), None);
2884 }
2885
2886 #[test]
2887 fn without_hydrogens_with_params_materializes_remove_and_track_isotopes_branch() {
2888 let mut builder = crate::MoleculeBuilder::new();
2889 let carbon =
2890 builder.add_atom(crate::AtomSpec::new(crate::Element::C).with_no_implicit(true));
2891 let protium = builder.add_atom(crate::AtomSpec::new(crate::Element::H));
2892 let deuterium = builder.add_atom(crate::AtomSpec::new(crate::Element::H).with_isotope(2));
2893 builder
2894 .add_bond(crate::BondSpec::new(
2895 carbon,
2896 protium,
2897 crate::BondOrder::Single,
2898 ))
2899 .unwrap();
2900 builder
2901 .add_bond(crate::BondSpec::new(
2902 carbon,
2903 deuterium,
2904 crate::BondOrder::Single,
2905 ))
2906 .unwrap();
2907 let molecule = builder.build().unwrap();
2908 let params = crate::RemoveHsParams {
2909 remove_and_track_isotopes: true,
2910 ..crate::RemoveHsParams::default()
2911 };
2912
2913 let result = molecule
2914 .without_hydrogens_with_params(params, false)
2915 .unwrap();
2916
2917 assert_eq!(result.num_atoms(), 1);
2918 assert_eq!(result.atoms()[0].tracked_isotopic_hydrogens(), &[2]);
2919 assert_eq!(result.atoms()[0].prop("_isotopicHs"), None);
2920 }
2921
2922 #[test]
2923 fn without_hydrogens_updates_sgroup_membership_before_topology_compaction() {
2924 let mut builder = crate::MoleculeBuilder::new();
2925 let carbon = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
2926 let hydrogen = builder.add_atom(crate::AtomSpec::new(crate::Element::H));
2927 builder
2928 .add_bond(crate::BondSpec::new(
2929 carbon,
2930 hydrogen,
2931 crate::BondOrder::Single,
2932 ))
2933 .unwrap();
2934 builder
2935 .add_substance_group(
2936 crate::SubstanceGroup::new(
2937 crate::SubstanceGroupId::new(0),
2938 crate::SubstanceGroupKind::Superatom,
2939 )
2940 .with_atoms(vec![carbon, hydrogen]),
2941 )
2942 .unwrap();
2943 let molecule = builder.build().unwrap();
2944
2945 let result = molecule.without_hydrogens_with_sanitize(false).unwrap();
2946
2947 assert_eq!(result.num_atoms(), 1);
2948 assert_eq!(result.substance_groups().len(), 1);
2949 assert_eq!(
2950 result.substance_groups()[0].atoms(),
2951 &[crate::AtomId::new(0)]
2952 );
2953 }
2954
2955 #[test]
2956 fn without_hydrogens_with_sanitize_runs_full_sanitize_on_aromatic_result() {
2957 let molecule = crate::Molecule::from_smiles_with_sanitize("c1ccccc1", false)
2958 .unwrap()
2959 .with_hydrogens()
2960 .unwrap();
2961 let original = molecule.clone();
2962
2963 let result = molecule.without_hydrogens_with_sanitize(true).unwrap();
2964
2965 assert_eq!(molecule, original);
2966 assert_eq!(result.num_atoms(), 6);
2967 assert!(result.atoms().iter().all(crate::Atom::is_aromatic));
2968 assert!(
2969 result
2970 .bonds()
2971 .iter()
2972 .all(|bond| bond.is_aromatic() && bond.order() == crate::BondOrder::Aromatic)
2973 );
2974 assert!(result.derived_cache().valence.is_some());
2975 assert!(result.derived_cache().rings.is_some());
2976 }
2977
2978 #[test]
2979 fn without_hydrogens_without_sanitize_skips_full_sanitize_pipeline() {
2980 let molecule = crate::Molecule::from_smiles_with_sanitize("c1ccccc1", false)
2981 .unwrap()
2982 .with_hydrogens()
2983 .unwrap();
2984
2985 let result = molecule.without_hydrogens_with_sanitize(false).unwrap();
2986
2987 assert_eq!(result.num_atoms(), 6);
2988 assert!(result.derived_cache().valence.is_none());
2989 assert!(result.derived_cache().rings.is_none());
2990 }
2991
2992 #[test]
2993 fn with_hydrogens_preserves_existing_ring_caches_by_leaf_append_proof() {
2994 let molecule = crate::Molecule::from_smiles_with_sanitize("C1CCCCC1", false)
2995 .unwrap()
2996 .with_assigned_rings()
2997 .unwrap()
2998 .with_assigned_ring_families()
2999 .unwrap();
3000 let rings_before = molecule.derived_cache().rings.clone();
3001 let ring_families_before = molecule.derived_cache().ring_families.clone();
3002
3003 let result = molecule.with_hydrogens().unwrap();
3004
3005 assert_eq!(result.derived_cache().rings, rings_before);
3006 assert_eq!(result.derived_cache().ring_families, ring_families_before);
3007 assert!(result.derived_cache().valence.is_none());
3008 }
3009
3010 #[test]
3011 fn sanitized_supported_subset_updates_cache_through_operation_pipeline() {
3012 let molecule = crate::Molecule::from_smiles_with_sanitize("CCO", false).unwrap();
3013 let original = molecule.clone();
3014 let ops = crate::SanitizeOps::CLEANUP
3015 | crate::SanitizeOps::PROPERTIES
3016 | crate::SanitizeOps::SYMMRINGS
3017 | crate::SanitizeOps::FIND_RADICALS
3018 | crate::SanitizeOps::SET_AROMATICITY
3019 | crate::SanitizeOps::SET_CONJUGATION;
3020
3021 let result = molecule.sanitize_with_ops(ops).unwrap();
3022
3023 assert_eq!(molecule, original);
3024 let cache = result.derived_cache();
3025 assert!(cache.valence.is_some());
3026 assert!(cache.rings.is_some());
3027 assert!(cache.ring_families.is_none());
3028 assert!(cache.aromaticity_valid);
3029 }
3030
3031 #[test]
3032 fn sanitized_set_aromaticity_recomputes_valence_after_aromatic_bond_updates() {
3033 let molecule = crate::Molecule::from_smiles_with_sanitize("C1=CC=CC=C1", false).unwrap();
3034 let result = molecule
3035 .sanitize_with_ops(
3036 crate::SanitizeOps::PROPERTIES
3037 | crate::SanitizeOps::SYMMRINGS
3038 | crate::SanitizeOps::SET_AROMATICITY,
3039 )
3040 .unwrap();
3041
3042 assert!(
3043 result
3044 .bonds()
3045 .iter()
3046 .all(|bond| bond.order() == crate::BondOrder::Aromatic)
3047 );
3048 let expected_valence =
3049 crate::assign_valence_with_options(&result, crate::ValenceModel::RdkitLike, true)
3050 .unwrap();
3051 assert_eq!(result.derived_cache().valence, Some(expected_valence));
3052 }
3053
3054 #[test]
3055 fn sanitized_kekulize_runs_kekulize_assignment_like_rdkit() {
3056 let molecule = crate::Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
3057
3058 let result = molecule
3059 .sanitize_with_ops(crate::SanitizeOps::SYMMRINGS | crate::SanitizeOps::KEKULIZE)
3060 .unwrap();
3061
3062 assert!(result.bonds().iter().all(|bond| !bond.is_aromatic()));
3063 assert!(result.bonds().iter().all(|bond| matches!(
3064 bond.order(),
3065 crate::BondOrder::Single | crate::BondOrder::Double
3066 )));
3067 assert_eq!(
3068 result
3069 .bonds()
3070 .iter()
3071 .filter(|bond| bond.order() == crate::BondOrder::Double)
3072 .count(),
3073 3
3074 );
3075 assert!(result.derived_cache().rings.is_some());
3076 }
3077
3078 #[test]
3079 fn sanitized_kekulize_materializes_ring_cache_without_explicit_symmrings_step_like_rdkit() {
3080 let molecule = crate::Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
3081
3082 let result = molecule
3083 .sanitize_with_ops(crate::SanitizeOps::KEKULIZE)
3084 .unwrap();
3085
3086 assert!(result.derived_cache().rings.is_some());
3087 assert!(result.bonds().iter().all(|bond| matches!(
3088 bond.order(),
3089 crate::BondOrder::Single | crate::BondOrder::Double
3090 )));
3091 }
3092
3093 #[test]
3094 fn sanitized_reports_kekulize_failure_step_like_rdkit_operation_that_failed() {
3095 let molecule = crate::Molecule::from_smiles_with_sanitize("c", false).unwrap();
3096
3097 let err = molecule
3098 .sanitize_with_ops(crate::SanitizeOps::KEKULIZE)
3099 .unwrap_err();
3100
3101 match err {
3102 OperationError::Sanitize { source, .. } => {
3103 assert_eq!(source.step, crate::SanitizeStep::Kekulize);
3104 assert!(source.message.contains("aromatic"));
3105 }
3106 other => panic!("expected sanitize error, got {other:?}"),
3107 }
3108 }
3109
3110 #[test]
3111 fn sanitized_reports_properties_before_later_requested_steps() {
3112 let molecule = crate::Molecule::from_smiles_with_sanitize("C(=O)(=O)(=O)", false).unwrap();
3113
3114 let err = molecule
3115 .sanitize_with_ops(crate::SanitizeOps::PROPERTIES | crate::SanitizeOps::KEKULIZE)
3116 .unwrap_err();
3117
3118 match err {
3119 OperationError::Sanitize { source, .. } => {
3120 assert_eq!(source.step, crate::SanitizeStep::Properties);
3121 assert!(source.message.contains("greater than permitted"));
3122 }
3123 other => panic!("expected sanitize error, got {other:?}"),
3124 }
3125 }
3126
3127 #[test]
3128 fn sanitized_reports_properties_before_multiple_later_requested_steps() {
3129 let molecule = crate::Molecule::from_smiles_with_sanitize("C(=O)(=O)(=O)", false).unwrap();
3130
3131 let err = molecule
3132 .sanitize_with_ops(
3133 crate::SanitizeOps::PROPERTIES
3134 | crate::SanitizeOps::KEKULIZE
3135 | crate::SanitizeOps::SET_AROMATICITY
3136 | crate::SanitizeOps::SET_CONJUGATION,
3137 )
3138 .unwrap_err();
3139
3140 match err {
3141 OperationError::Sanitize { source, .. } => {
3142 assert_eq!(source.step, crate::SanitizeStep::Properties);
3143 assert!(source.message.contains("greater than permitted"));
3144 }
3145 other => panic!("expected sanitize error, got {other:?}"),
3146 }
3147 }
3148
3149 #[test]
3150 fn sanitize_stage_maps_requested_step_through_shared_helper() {
3151 let err = sanitize_stage(
3152 crate::SanitizeStep::Cleanup,
3153 || -> Result<(), crate::ValenceError> {
3154 Err(crate::ValenceError::UnsupportedBranch {
3155 reason: "helper step mapping regression",
3156 })
3157 },
3158 |step, source| sanitize_valence_error(&SANITIZE_SPEC, step, source),
3159 )
3160 .unwrap_err();
3161
3162 match err {
3163 OperationError::Sanitize { source, .. } => {
3164 assert_eq!(source.step, crate::SanitizeStep::Cleanup);
3165 assert!(source.message.contains("helper step mapping regression"));
3166 }
3167 other => panic!("expected sanitize error, got {other:?}"),
3168 }
3169 }
3170
3171 #[test]
3172 fn sanitized_without_properties_uses_non_strict_property_cache_like_rdkit() {
3173 let molecule = crate::Molecule::from_smiles_with_sanitize("C(=O)(=O)(=O)", false).unwrap();
3174
3175 let result = molecule
3176 .sanitize_with_ops(crate::SanitizeOps::NONE)
3177 .unwrap();
3178 let expected =
3179 crate::assign_valence_with_options(&result, crate::ValenceModel::RdkitLike, false)
3180 .unwrap();
3181
3182 assert_eq!(result.derived_cache().valence, Some(expected));
3183 }
3184
3185 #[test]
3186 fn sanitized_cleanup_converts_neutral_nitro_like_rdkit() {
3187 let mut builder = crate::MoleculeBuilder::new();
3188 let carbon = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3189 let nitrogen = builder.add_atom(crate::AtomSpec::new(crate::Element::N));
3190 let oxygen_single = builder.add_atom(crate::AtomSpec::new(crate::Element::O));
3191 let oxygen_double = builder.add_atom(crate::AtomSpec::new(crate::Element::O));
3192 builder
3193 .add_bond(crate::BondSpec::new(
3194 carbon,
3195 nitrogen,
3196 crate::BondOrder::Single,
3197 ))
3198 .unwrap();
3199 builder
3200 .add_bond(crate::BondSpec::new(
3201 nitrogen,
3202 oxygen_single,
3203 crate::BondOrder::Double,
3204 ))
3205 .unwrap();
3206 builder
3207 .add_bond(crate::BondSpec::new(
3208 nitrogen,
3209 oxygen_double,
3210 crate::BondOrder::Double,
3211 ))
3212 .unwrap();
3213 let molecule = builder.build().unwrap();
3214
3215 let result = molecule
3216 .sanitize_with_ops(crate::SanitizeOps::CLEANUP)
3217 .unwrap();
3218
3219 assert_eq!(result.atoms()[nitrogen.index()].formal_charge(), 1);
3220 assert_eq!(
3221 result
3222 .atoms()
3223 .iter()
3224 .filter(|atom| atom.atomic_number() == 8 && atom.formal_charge() == -1)
3225 .count(),
3226 1
3227 );
3228 assert_eq!(
3229 result
3230 .bonds()
3231 .iter()
3232 .filter(|bond| bond.order() == crate::BondOrder::Double)
3233 .count(),
3234 1
3235 );
3236 }
3237
3238 #[test]
3239 fn sanitized_nitrogens_cleanup_rewrites_neutral_nitrogen_triple_bond_branch_like_rdkit() {
3240 let mut builder = crate::MoleculeBuilder::new();
3241 let carbon_one = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3242 let nitrogen_center = builder.add_atom(crate::AtomSpec::new(crate::Element::N));
3243 let carbon_two = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3244 let nitrogen_terminal = builder.add_atom(crate::AtomSpec::new(crate::Element::N));
3245 builder
3246 .add_bond(crate::BondSpec::new(
3247 carbon_one,
3248 nitrogen_center,
3249 crate::BondOrder::Single,
3250 ))
3251 .unwrap();
3252 builder
3253 .add_bond(crate::BondSpec::new(
3254 nitrogen_center,
3255 carbon_two,
3256 crate::BondOrder::Single,
3257 ))
3258 .unwrap();
3259 let triple_bond = builder
3260 .add_bond(crate::BondSpec::new(
3261 nitrogen_center,
3262 nitrogen_terminal,
3263 crate::BondOrder::Triple,
3264 ))
3265 .unwrap();
3266 let molecule = builder.build().unwrap();
3267
3268 let result = molecule
3269 .sanitize_with_ops(crate::SanitizeOps::CLEANUP)
3270 .unwrap();
3271
3272 assert_eq!(result.atoms()[nitrogen_center.index()].formal_charge(), 1);
3273 assert_eq!(
3274 result.atoms()[nitrogen_terminal.index()].formal_charge(),
3275 -1
3276 );
3277 assert_eq!(
3278 result.bonds()[triple_bond.index()].order(),
3279 crate::BondOrder::Double
3280 );
3281 }
3282
3283 #[test]
3284 fn sanitized_cleanup_converts_phosphorus_oxo_like_rdkit() {
3285 let phosphorus_element =
3286 crate::Element::from_atomic_number(15).expect("phosphorus atomic number is valid");
3287 let mut builder = crate::MoleculeBuilder::new();
3288 let carbon_single = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3289 let phosphorus = builder.add_atom(crate::AtomSpec::new(phosphorus_element));
3290 let oxygen = builder.add_atom(crate::AtomSpec::new(crate::Element::O));
3291 let nitrogen = builder.add_atom(crate::AtomSpec::new(crate::Element::N));
3292 let carbon_double = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3293 builder
3294 .add_bond(crate::BondSpec::new(
3295 carbon_single,
3296 phosphorus,
3297 crate::BondOrder::Single,
3298 ))
3299 .unwrap();
3300 builder
3301 .add_bond(crate::BondSpec::new(
3302 phosphorus,
3303 oxygen,
3304 crate::BondOrder::Double,
3305 ))
3306 .unwrap();
3307 builder
3308 .add_bond(crate::BondSpec::new(
3309 phosphorus,
3310 nitrogen,
3311 crate::BondOrder::Double,
3312 ))
3313 .unwrap();
3314 builder
3315 .add_bond(crate::BondSpec::new(
3316 nitrogen,
3317 carbon_double,
3318 crate::BondOrder::Single,
3319 ))
3320 .unwrap();
3321 let molecule = builder.build().unwrap();
3322
3323 let result = molecule
3324 .sanitize_with_ops(crate::SanitizeOps::CLEANUP)
3325 .unwrap();
3326
3327 assert_eq!(result.atoms()[phosphorus.index()].formal_charge(), 1);
3328 assert_eq!(result.atoms()[oxygen.index()].formal_charge(), -1);
3329 assert_eq!(result.atoms()[nitrogen.index()].formal_charge(), 0);
3330 assert_eq!(result.bonds()[1].order(), crate::BondOrder::Single);
3331 assert_eq!(result.bonds()[2].order(), crate::BondOrder::Double);
3332 }
3333
3334 #[test]
3335 fn sanitized_phosphorus_cleanup_leaves_double_oxo_without_double_cn_branch_unchanged_like_rdkit()
3336 {
3337 let phosphorus_element =
3338 crate::Element::from_atomic_number(15).expect("phosphorus atomic number is valid");
3339 let mut builder = crate::MoleculeBuilder::new();
3340 let phosphorus = builder.add_atom(crate::AtomSpec::new(phosphorus_element));
3341 let oxygen_one = builder.add_atom(crate::AtomSpec::new(crate::Element::O));
3342 let oxygen_two = builder.add_atom(crate::AtomSpec::new(crate::Element::O));
3343 let carbon = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3344 let bond_one = builder
3345 .add_bond(crate::BondSpec::new(
3346 phosphorus,
3347 oxygen_one,
3348 crate::BondOrder::Double,
3349 ))
3350 .unwrap();
3351 let bond_two = builder
3352 .add_bond(crate::BondSpec::new(
3353 phosphorus,
3354 oxygen_two,
3355 crate::BondOrder::Double,
3356 ))
3357 .unwrap();
3358 builder
3359 .add_bond(crate::BondSpec::new(
3360 phosphorus,
3361 carbon,
3362 crate::BondOrder::Single,
3363 ))
3364 .unwrap();
3365 let molecule = builder.build().unwrap();
3366
3367 let result = molecule
3368 .sanitize_with_ops(crate::SanitizeOps::CLEANUP)
3369 .unwrap();
3370
3371 assert_eq!(result.atoms()[phosphorus.index()].formal_charge(), 0);
3372 assert_eq!(result.atoms()[oxygen_one.index()].formal_charge(), 0);
3373 assert_eq!(result.atoms()[oxygen_two.index()].formal_charge(), 0);
3374 assert_eq!(
3375 result.bonds()[bond_one.index()].order(),
3376 crate::BondOrder::Double
3377 );
3378 assert_eq!(
3379 result.bonds()[bond_two.index()].order(),
3380 crate::BondOrder::Double
3381 );
3382 }
3383
3384 #[test]
3385 fn sanitized_cleanup_converts_hypervalent_halogen_oxo_like_rdkit() {
3386 let chlorine =
3387 crate::Element::from_atomic_number(17).expect("chlorine atomic number is valid");
3388 let mut builder = crate::MoleculeBuilder::new();
3389 let center = builder.add_atom(crate::AtomSpec::new(chlorine));
3390 let oxygen_one = builder.add_atom(crate::AtomSpec::new(crate::Element::O));
3391 let oxygen_two = builder.add_atom(crate::AtomSpec::new(crate::Element::O));
3392 builder
3393 .add_bond(crate::BondSpec::new(
3394 center,
3395 oxygen_one,
3396 crate::BondOrder::Double,
3397 ))
3398 .unwrap();
3399 builder
3400 .add_bond(crate::BondSpec::new(
3401 center,
3402 oxygen_two,
3403 crate::BondOrder::Single,
3404 ))
3405 .unwrap();
3406 let molecule = builder.build().unwrap();
3407
3408 let result = molecule
3409 .sanitize_with_ops(crate::SanitizeOps::CLEANUP)
3410 .unwrap();
3411
3412 assert_eq!(result.atoms()[center.index()].formal_charge(), 1);
3413 assert_eq!(result.atoms()[oxygen_one.index()].formal_charge(), -1);
3414 assert_eq!(result.atoms()[oxygen_two.index()].formal_charge(), 0);
3415 assert!(
3416 result
3417 .bonds()
3418 .iter()
3419 .all(|bond| bond.order() == crate::BondOrder::Single)
3420 );
3421 }
3422
3423 #[test]
3424 fn sanitized_halogen_cleanup_skips_non_oxo_neighbor_branch_like_rdkit() {
3425 let chlorine =
3426 crate::Element::from_atomic_number(17).expect("chlorine atomic number is valid");
3427 let mut builder = crate::MoleculeBuilder::new();
3428 let center = builder.add_atom(crate::AtomSpec::new(chlorine));
3429 let oxygen = builder.add_atom(crate::AtomSpec::new(crate::Element::O));
3430 let carbon = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3431 let double_bond = builder
3432 .add_bond(crate::BondSpec::new(
3433 center,
3434 oxygen,
3435 crate::BondOrder::Double,
3436 ))
3437 .unwrap();
3438 builder
3439 .add_bond(crate::BondSpec::new(
3440 center,
3441 carbon,
3442 crate::BondOrder::Single,
3443 ))
3444 .unwrap();
3445 let molecule = builder.build().unwrap();
3446
3447 let result = molecule
3448 .sanitize_with_ops(crate::SanitizeOps::CLEANUP)
3449 .unwrap();
3450
3451 assert_eq!(result.atoms()[center.index()].formal_charge(), 0);
3452 assert_eq!(result.atoms()[oxygen.index()].formal_charge(), 0);
3453 assert_eq!(
3454 result.bonds()[double_bond.index()].order(),
3455 crate::BondOrder::Double
3456 );
3457 }
3458
3459 #[test]
3460 fn cleanup_incident_bonds_returns_only_local_bond_indices() {
3461 let mut builder = crate::MoleculeBuilder::new();
3462 let a0 = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3463 let a1 = builder.add_atom(crate::AtomSpec::new(crate::Element::N));
3464 let a2 = builder.add_atom(crate::AtomSpec::new(crate::Element::O));
3465 let a3 = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3466 builder
3467 .add_bond(crate::BondSpec::new(a0, a1, crate::BondOrder::Single))
3468 .unwrap();
3469 let center_left = builder
3470 .add_bond(crate::BondSpec::new(a1, a2, crate::BondOrder::Double))
3471 .unwrap();
3472 let center_right = builder
3473 .add_bond(crate::BondSpec::new(a1, a3, crate::BondOrder::Single))
3474 .unwrap();
3475 let molecule = builder.build().unwrap();
3476 let adjacency = sanitize_adjacency(&molecule).unwrap();
3477
3478 let incident = sanitize_cleanup_incident_bonds(&adjacency, a1);
3479
3480 assert_eq!(incident, vec![0, center_left.index(), center_right.index()]);
3481 }
3482
3483 #[test]
3484 fn cleanup_incident_bonds_explicit_valence_uses_assignment_bond_orders() {
3485 let mut builder = crate::MoleculeBuilder::new();
3486 let nitrogen = builder.add_atom(crate::AtomSpec::new(crate::Element::N));
3487 let oxygen = builder.add_atom(crate::AtomSpec::new(crate::Element::O));
3488 let carbon = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3489 let oxygen_bond = builder
3490 .add_bond(crate::BondSpec::new(
3491 nitrogen,
3492 oxygen,
3493 crate::BondOrder::Double,
3494 ))
3495 .unwrap();
3496 builder
3497 .add_bond(crate::BondSpec::new(
3498 nitrogen,
3499 carbon,
3500 crate::BondOrder::Single,
3501 ))
3502 .unwrap();
3503 let molecule = builder.build().unwrap();
3504 let adjacency = sanitize_adjacency(&molecule).unwrap();
3505 let mut assignment = SanitizeCleanupAssignment {
3506 atom_formal_charges: molecule
3507 .atoms()
3508 .iter()
3509 .map(crate::Atom::formal_charge)
3510 .collect(),
3511 bond_orders: molecule.bonds().iter().map(crate::Bond::order).collect(),
3512 };
3513 assignment.bond_orders[oxygen_bond.index()] = crate::BondOrder::Single;
3514
3515 let valence =
3516 sanitize_cleanup_explicit_valence(&molecule, &adjacency, &assignment, nitrogen)
3517 .unwrap();
3518
3519 assert_eq!(valence, 2);
3520 }
3521
3522 #[test]
3523 fn sanitized_organometallic_cleanup_converts_single_metal_bond_to_dative_like_rdkit() {
3524 let iron = crate::Element::from_atomic_number(26).expect("iron atomic number is valid");
3525 let mut builder = crate::MoleculeBuilder::new();
3526 let nitrogen = builder.add_atom(crate::AtomSpec::new(crate::Element::N));
3527 let carbon_one = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3528 let carbon_two = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3529 let carbon_three = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3530 let metal = builder.add_atom(crate::AtomSpec::new(iron));
3531 for neighbor in [carbon_one, carbon_two, carbon_three, metal] {
3532 builder
3533 .add_bond(crate::BondSpec::new(
3534 nitrogen,
3535 neighbor,
3536 crate::BondOrder::Single,
3537 ))
3538 .unwrap();
3539 }
3540 let molecule = builder.build().unwrap();
3541
3542 let result = molecule
3543 .sanitize_with_ops(crate::SanitizeOps::CLEANUP_ORGANOMETALLICS)
3544 .unwrap();
3545 let metal_bond = result
3546 .bonds()
3547 .iter()
3548 .find(|bond| {
3549 (bond.begin() == nitrogen && bond.end() == metal)
3550 || (bond.begin() == metal && bond.end() == nitrogen)
3551 })
3552 .unwrap();
3553
3554 assert_eq!(metal_bond.order(), crate::BondOrder::Dative);
3555 assert_eq!(metal_bond.begin(), nitrogen);
3556 assert_eq!(metal_bond.end(), metal);
3557 }
3558
3559 #[test]
3560 fn sanitized_organometallic_cleanup_prefers_metal_with_fewer_existing_dative_bonds() {
3561 let iron = crate::Element::from_atomic_number(26).expect("iron atomic number is valid");
3562 let mut builder = crate::MoleculeBuilder::new();
3563
3564 let donor = builder.add_atom(crate::AtomSpec::new(crate::Element::N));
3565 let donor_c1 = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3566 let donor_c2 = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3567 let donor_c3 = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3568 let metal_busy = builder.add_atom(crate::AtomSpec::new(iron));
3569 let metal_open = builder.add_atom(crate::AtomSpec::new(iron));
3570
3571 for neighbor in [donor_c1, donor_c2, donor_c3, metal_busy, metal_open] {
3572 builder
3573 .add_bond(crate::BondSpec::new(
3574 donor,
3575 neighbor,
3576 crate::BondOrder::Single,
3577 ))
3578 .unwrap();
3579 }
3580
3581 let donor_busy = builder.add_atom(crate::AtomSpec::new(crate::Element::N));
3582 let busy_c1 = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3583 let busy_c2 = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3584 let busy_c3 = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3585 for neighbor in [busy_c1, busy_c2, busy_c3] {
3586 builder
3587 .add_bond(crate::BondSpec::new(
3588 donor_busy,
3589 neighbor,
3590 crate::BondOrder::Single,
3591 ))
3592 .unwrap();
3593 }
3594 builder
3595 .add_bond(crate::BondSpec::new(
3596 donor_busy,
3597 metal_busy,
3598 crate::BondOrder::Dative,
3599 ))
3600 .unwrap();
3601
3602 let molecule = builder.build().unwrap();
3603
3604 let result = molecule
3605 .sanitize_with_ops(crate::SanitizeOps::CLEANUP_ORGANOMETALLICS)
3606 .unwrap();
3607
3608 let donor_to_busy = result
3609 .bonds()
3610 .iter()
3611 .find(|bond| {
3612 (bond.begin() == donor && bond.end() == metal_busy)
3613 || (bond.begin() == metal_busy && bond.end() == donor)
3614 })
3615 .unwrap();
3616 let donor_to_open = result
3617 .bonds()
3618 .iter()
3619 .find(|bond| {
3620 (bond.begin() == donor && bond.end() == metal_open)
3621 || (bond.begin() == metal_open && bond.end() == donor)
3622 })
3623 .unwrap();
3624
3625 assert_eq!(donor_to_busy.order(), crate::BondOrder::Single);
3626 assert_eq!(donor_to_open.order(), crate::BondOrder::Dative);
3627 assert_eq!(donor_to_open.begin(), donor);
3628 assert_eq!(donor_to_open.end(), metal_open);
3629 }
3630
3631 #[test]
3632 fn sanitized_organometallic_cleanup_skips_non_hypervalent_donor_like_rdkit() {
3633 let iron = crate::Element::from_atomic_number(26).expect("iron atomic number is valid");
3634 let mut builder = crate::MoleculeBuilder::new();
3635 let oxygen =
3636 builder.add_atom(crate::AtomSpec::new(crate::Element::O).with_no_implicit(true));
3637 let carbon = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3638 let metal = builder.add_atom(crate::AtomSpec::new(iron));
3639 builder
3640 .add_bond(crate::BondSpec::new(
3641 oxygen,
3642 carbon,
3643 crate::BondOrder::Single,
3644 ))
3645 .unwrap();
3646 let metal_bond = builder
3647 .add_bond(crate::BondSpec::new(
3648 oxygen,
3649 metal,
3650 crate::BondOrder::Single,
3651 ))
3652 .unwrap();
3653 let molecule = builder.build().unwrap();
3654
3655 let result = molecule
3656 .sanitize_with_ops(crate::SanitizeOps::CLEANUP_ORGANOMETALLICS)
3657 .unwrap();
3658
3659 assert_eq!(
3660 result.bonds()[metal_bond.index()].order(),
3661 crate::BondOrder::Single
3662 );
3663 }
3664
3665 #[test]
3666 fn metal_bond_cleanup_prefers_higher_rank_when_dative_counts_tie() {
3667 let iron = crate::Element::from_atomic_number(26).expect("iron atomic number is valid");
3668 let mut builder = crate::MoleculeBuilder::new();
3669 let donor = builder.add_atom(crate::AtomSpec::new(crate::Element::N));
3670 let c1 = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3671 let c2 = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3672 let c3 = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3673 let metal_plain = builder.add_atom(crate::AtomSpec::new(iron));
3674 let metal_substituted = builder.add_atom(crate::AtomSpec::new(iron));
3675 let hydrogen = builder.add_atom(crate::AtomSpec::new(crate::Element::H));
3676 for neighbor in [c1, c2, c3, metal_plain, metal_substituted] {
3677 builder
3678 .add_bond(crate::BondSpec::new(
3679 donor,
3680 neighbor,
3681 crate::BondOrder::Single,
3682 ))
3683 .unwrap();
3684 }
3685 builder
3686 .add_bond(crate::BondSpec::new(
3687 metal_substituted,
3688 hydrogen,
3689 crate::BondOrder::Single,
3690 ))
3691 .unwrap();
3692 let molecule = builder.build().unwrap();
3693 let adjacency = sanitize_adjacency(&molecule).unwrap();
3694 let valence = molecule
3695 .assign_valence_with_options(crate::ValenceModel::RdkitLike, false)
3696 .unwrap();
3697 let ranks = molecule.rank_mol_atoms().unwrap();
3698 let mut assignment = SanitizeOrganometallicCleanupAssignment {
3699 bond_orders: molecule.bonds().iter().map(crate::Bond::order).collect(),
3700 bond_endpoints: molecule
3701 .bonds()
3702 .iter()
3703 .map(|bond| (bond.begin(), bond.end()))
3704 .collect(),
3705 };
3706
3707 sanitize_metal_bond_cleanup_assignment(
3708 &molecule,
3709 &adjacency,
3710 &valence,
3711 &ranks,
3712 donor,
3713 &mut assignment,
3714 )
3715 .unwrap();
3716
3717 let chosen_metal = assignment
3718 .bond_endpoints
3719 .iter()
3720 .zip(assignment.bond_orders.iter())
3721 .find_map(|(&(begin, end), &order)| {
3722 (order == crate::BondOrder::Dative && begin == donor).then_some(end)
3723 })
3724 .unwrap();
3725 let expected = [metal_plain, metal_substituted]
3726 .into_iter()
3727 .max_by_key(|atom| ranks[atom.index()])
3728 .unwrap();
3729 assert_eq!(chosen_metal, expected);
3730 }
3731
3732 #[test]
3733 fn hypervalent_nonmetal_predicate_matches_metal_and_aromatic_degree_four_branches() {
3734 let carbon = crate::Element::C;
3735 let iron = crate::Element::from_atomic_number(26).unwrap();
3736
3737 let mut aromatic_builder = crate::MoleculeBuilder::new();
3738 let sulfur_atom = aromatic_builder.add_atom(
3739 crate::AtomSpec::new(carbon)
3740 .with_aromatic(true)
3741 .with_no_implicit(true),
3742 );
3743 let mut aromatic_neighbors = Vec::new();
3744 for _ in 0..4 {
3745 let carbon = aromatic_builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3746 aromatic_builder
3747 .add_bond(crate::BondSpec::new(
3748 sulfur_atom,
3749 carbon,
3750 crate::BondOrder::Single,
3751 ))
3752 .unwrap();
3753 aromatic_neighbors.push(carbon);
3754 }
3755 let aromatic = aromatic_builder.build().unwrap();
3756 let aromatic_adj = sanitize_adjacency(&aromatic).unwrap();
3757 let aromatic_valence = aromatic
3758 .assign_valence_with_options(crate::ValenceModel::RdkitLike, false)
3759 .unwrap();
3760
3761 assert!(
3762 sanitize_is_hypervalent_nonmetal(
3763 &aromatic,
3764 &aromatic_adj,
3765 &aromatic_valence,
3766 sulfur_atom
3767 )
3768 .unwrap()
3769 );
3770
3771 let mut metal_builder = crate::MoleculeBuilder::new();
3772 let metal_atom = metal_builder.add_atom(crate::AtomSpec::new(iron));
3773 let ligand = metal_builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3774 metal_builder
3775 .add_bond(crate::BondSpec::new(
3776 metal_atom,
3777 ligand,
3778 crate::BondOrder::Single,
3779 ))
3780 .unwrap();
3781 let metal_molecule = metal_builder.build().unwrap();
3782 let metal_adj = sanitize_adjacency(&metal_molecule).unwrap();
3783 let metal_valence = metal_molecule
3784 .assign_valence_with_options(crate::ValenceModel::RdkitLike, false)
3785 .unwrap();
3786
3787 assert!(
3788 !sanitize_is_hypervalent_nonmetal(
3789 &metal_molecule,
3790 &metal_adj,
3791 &metal_valence,
3792 metal_atom
3793 )
3794 .unwrap()
3795 );
3796 }
3797
3798 #[test]
3799 fn single_bonded_metals_filters_non_single_and_non_metal_neighbors() {
3800 let iron = crate::Element::from_atomic_number(26).unwrap();
3801 let mut builder = crate::MoleculeBuilder::new();
3802 let donor = builder.add_atom(crate::AtomSpec::new(crate::Element::N));
3803 let metal_single = builder.add_atom(crate::AtomSpec::new(iron));
3804 let metal_rewritten = builder.add_atom(crate::AtomSpec::new(iron));
3805 let carbon = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3806 let keep_bond = builder
3807 .add_bond(crate::BondSpec::new(
3808 donor,
3809 metal_single,
3810 crate::BondOrder::Single,
3811 ))
3812 .unwrap();
3813 let rewritten_bond = builder
3814 .add_bond(crate::BondSpec::new(
3815 donor,
3816 metal_rewritten,
3817 crate::BondOrder::Single,
3818 ))
3819 .unwrap();
3820 builder
3821 .add_bond(crate::BondSpec::new(
3822 donor,
3823 carbon,
3824 crate::BondOrder::Single,
3825 ))
3826 .unwrap();
3827 let molecule = builder.build().unwrap();
3828 let adjacency = sanitize_adjacency(&molecule).unwrap();
3829 let mut assignment = SanitizeOrganometallicCleanupAssignment {
3830 bond_orders: molecule.bonds().iter().map(crate::Bond::order).collect(),
3831 bond_endpoints: molecule
3832 .bonds()
3833 .iter()
3834 .map(|bond| (bond.begin(), bond.end()))
3835 .collect(),
3836 };
3837 assignment.bond_orders[rewritten_bond.index()] = crate::BondOrder::Dative;
3838
3839 let metals =
3840 sanitize_organometallic_single_bonded_metals(&molecule, &adjacency, &assignment, donor);
3841
3842 assert_eq!(metals, vec![metal_single]);
3843 assert_eq!(keep_bond.index(), 0);
3844 }
3845
3846 #[test]
3847 fn sanitized_cleanup_atropisomers_clears_non_sp2_atrop_bond_like_rdkit() {
3848 let mut builder = crate::MoleculeBuilder::new();
3849 let left = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3850 let right = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3851 builder
3852 .add_bond(
3853 crate::BondSpec::new(left, right, crate::BondOrder::Single)
3854 .with_stereo(crate::BondStereo::AtropCw),
3855 )
3856 .unwrap();
3857 let molecule = builder.build().unwrap();
3858
3859 let result = molecule
3860 .sanitize_with_ops(crate::SanitizeOps::CLEANUP_ATROPISOMERS)
3861 .unwrap();
3862
3863 assert_eq!(result.bonds()[0].stereo(), crate::BondStereo::None);
3864 assert_eq!(result.bonds()[0].stereo_atoms(), None);
3865 }
3866
3867 #[test]
3868 fn sanitized_cleanup_atropisomers_clears_small_ring_atrop_stereo_and_group_like_rdkit() {
3869 let mut builder = crate::MoleculeBuilder::new();
3870 let atoms = (0..6)
3871 .map(|_| {
3872 builder.add_atom(
3873 crate::AtomSpec::new(crate::Element::C)
3874 .with_hybridization(crate::Hybridization::Sp2),
3875 )
3876 })
3877 .collect::<Vec<_>>();
3878 let atrop_bond = builder
3879 .add_bond(
3880 crate::BondSpec::new(atoms[0], atoms[1], crate::BondOrder::Single)
3881 .with_stereo(crate::BondStereo::AtropCcw),
3882 )
3883 .unwrap();
3884 for idx in 1..6 {
3885 builder
3886 .add_bond(crate::BondSpec::new(
3887 atoms[idx],
3888 atoms[(idx + 1) % 6],
3889 crate::BondOrder::Single,
3890 ))
3891 .unwrap();
3892 }
3893 builder
3894 .add_stereo_group(crate::StereoGroup::new(
3895 crate::StereoGroupKind::Or,
3896 Vec::new(),
3897 vec![atrop_bond],
3898 ))
3899 .unwrap();
3900 let molecule = builder.build().unwrap();
3901
3902 let result = molecule
3903 .sanitize_with_ops(crate::SanitizeOps::CLEANUP_ATROPISOMERS)
3904 .unwrap();
3905
3906 assert_eq!(
3907 result.bonds()[atrop_bond.index()].stereo(),
3908 crate::BondStereo::None
3909 );
3910 assert_eq!(result.bonds()[atrop_bond.index()].stereo_atoms(), None);
3911 assert!(result.stereo_groups().is_empty());
3912 }
3913
3914 #[test]
3915 fn sanitized_cleanup_chirality_clears_non_sp3_tetrahedral_tag_like_rdkit() {
3916 let mut builder = crate::MoleculeBuilder::new();
3917 builder.add_atom(
3918 crate::AtomSpec::new(crate::Element::C)
3919 .with_chiral_tag(crate::ChiralTag::TetrahedralCw),
3920 );
3921 let molecule = builder.build().unwrap();
3922
3923 let result = molecule
3924 .sanitize_with_ops(crate::SanitizeOps::CLEANUP_CHIRALITY)
3925 .unwrap();
3926
3927 assert_eq!(
3928 result.atoms()[0].chiral_tag(),
3929 crate::ChiralTag::Unspecified
3930 );
3931 }
3932
3933 #[test]
3934 fn sanitized_cleanup_chirality_cleans_stereo_groups_for_non_sp3_tetrahedral_tags_like_rdkit() {
3935 let mut builder = crate::MoleculeBuilder::new();
3936 let atom = builder.add_atom(
3937 crate::AtomSpec::new(crate::Element::C)
3938 .with_chiral_tag(crate::ChiralTag::TetrahedralCw),
3939 );
3940 builder
3941 .add_stereo_group(crate::StereoGroup::new(
3942 crate::StereoGroupKind::Absolute,
3943 vec![atom],
3944 Vec::new(),
3945 ))
3946 .unwrap();
3947 let molecule = builder.build().unwrap();
3948
3949 let result = molecule
3950 .sanitize_with_ops(crate::SanitizeOps::CLEANUP_CHIRALITY)
3951 .unwrap();
3952
3953 assert_eq!(
3954 result.atoms()[atom.index()].chiral_tag(),
3955 crate::ChiralTag::Unspecified
3956 );
3957 assert!(result.stereo_groups().is_empty());
3958 }
3959
3960 #[test]
3961 fn sanitized_cleanup_chirality_resets_tetrahedral_permutation_above_limit_like_rdkit() {
3962 let mut builder = crate::MoleculeBuilder::new();
3963 builder.add_atom(
3964 crate::AtomSpec::new(crate::Element::C)
3965 .with_chiral_tag(crate::ChiralTag::Tetrahedral)
3966 .with_hybridization(crate::Hybridization::Sp3)
3967 .with_chiral_permutation(7),
3968 );
3969 let molecule = builder.build().unwrap();
3970
3971 let result = molecule
3972 .sanitize_with_ops(crate::SanitizeOps::CLEANUP_CHIRALITY)
3973 .unwrap();
3974
3975 assert_eq!(
3976 result.atoms()[0].chiral_tag(),
3977 crate::ChiralTag::Tetrahedral
3978 );
3979 assert_eq!(result.atoms()[0].chiral_permutation(), Some(0));
3980 }
3981
3982 #[test]
3983 fn sanitized_cleanup_chirality_resets_square_planar_permutation_above_limit_like_rdkit() {
3984 let mut builder = crate::MoleculeBuilder::new();
3985 let center = builder.add_atom(
3986 crate::AtomSpec::new(crate::Element::C)
3987 .with_no_implicit(true)
3988 .with_chiral_tag(crate::ChiralTag::SquarePlanar)
3989 .with_chiral_permutation(7),
3990 );
3991 let left = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3992 let right = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
3993 builder
3994 .add_bond(crate::BondSpec::new(center, left, crate::BondOrder::Single))
3995 .unwrap();
3996 builder
3997 .add_bond(crate::BondSpec::new(
3998 center,
3999 right,
4000 crate::BondOrder::Single,
4001 ))
4002 .unwrap();
4003 let molecule = builder.build().unwrap();
4004
4005 let result = molecule
4006 .sanitize_with_ops(crate::SanitizeOps::CLEANUP_CHIRALITY)
4007 .unwrap();
4008
4009 assert_eq!(
4010 result.atoms()[center.index()].chiral_tag(),
4011 crate::ChiralTag::SquarePlanar
4012 );
4013 assert_eq!(result.atoms()[center.index()].chiral_permutation(), Some(0));
4014 }
4015
4016 #[test]
4017 fn sanitized_cleanup_chirality_leaves_invalid_square_planar_stereo_group_untouched_without_tetrahedral_cleanup_flag_like_rdkit()
4018 {
4019 let mut builder = crate::MoleculeBuilder::new();
4020 let center = builder.add_atom(
4021 crate::AtomSpec::new(crate::Element::C)
4022 .with_no_implicit(true)
4023 .with_chiral_tag(crate::ChiralTag::SquarePlanar)
4024 .with_chiral_permutation(3),
4025 );
4026 let neighbor = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
4027 builder
4028 .add_bond(crate::BondSpec::new(
4029 center,
4030 neighbor,
4031 crate::BondOrder::Single,
4032 ))
4033 .unwrap();
4034 builder
4035 .add_stereo_group(crate::StereoGroup::new(
4036 crate::StereoGroupKind::Absolute,
4037 vec![center],
4038 Vec::new(),
4039 ))
4040 .unwrap();
4041 let molecule = builder.build().unwrap();
4042
4043 let result = molecule
4044 .sanitize_with_ops(crate::SanitizeOps::CLEANUP_CHIRALITY)
4045 .unwrap();
4046
4047 assert_eq!(
4048 result.atoms()[center.index()].chiral_tag(),
4049 crate::ChiralTag::Unspecified
4050 );
4051 assert_eq!(result.stereo_groups().len(), 1);
4052 assert_eq!(result.stereo_groups()[0].atoms(), &[center]);
4053 }
4054
4055 #[test]
4056 fn sanitized_sets_conjugation_for_butadiene_like_rdkit() {
4057 let molecule = crate::Molecule::from_smiles_with_sanitize("C=CC=C", false).unwrap();
4058
4059 let result = molecule
4060 .sanitize_with_ops(crate::SanitizeOps::PROPERTIES | crate::SanitizeOps::SET_CONJUGATION)
4061 .unwrap();
4062
4063 assert!(result.bonds().iter().all(crate::Bond::is_conjugated));
4064 }
4065
4066 #[test]
4067 fn sanitized_set_conjugation_keeps_aromatic_bonds_conjugated_like_rdkit() {
4068 let molecule = crate::Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
4069
4070 let result = molecule
4071 .sanitize_with_ops(crate::SanitizeOps::SET_CONJUGATION)
4072 .unwrap();
4073
4074 assert!(result.bonds().iter().all(crate::Bond::is_conjugated));
4075 }
4076
4077 #[test]
4078 fn sanitized_set_conjugation_uses_heteroatom_lone_pair_candidate_like_rdkit() {
4079 let molecule = crate::Molecule::from_smiles_with_sanitize("NC=O", false).unwrap();
4080
4081 let result = molecule
4082 .sanitize_with_ops(crate::SanitizeOps::PROPERTIES | crate::SanitizeOps::SET_CONJUGATION)
4083 .unwrap();
4084
4085 assert_eq!(result.num_bonds(), 2);
4086 assert!(result.bonds().iter().all(crate::Bond::is_conjugated));
4087 }
4088
4089 #[test]
4090 fn sanitized_sets_hybridization_after_conjugation_like_rdkit() {
4091 let molecule = crate::Molecule::from_smiles_with_sanitize("CCO", false).unwrap();
4092
4093 let result = molecule
4094 .sanitize_with_ops(
4095 crate::SanitizeOps::PROPERTIES
4096 | crate::SanitizeOps::SET_CONJUGATION
4097 | crate::SanitizeOps::SET_HYBRIDIZATION,
4098 )
4099 .unwrap();
4100
4101 assert_eq!(result.atoms()[0].hybridization(), crate::Hybridization::Sp3);
4102 assert_eq!(result.atoms()[1].hybridization(), crate::Hybridization::Sp3);
4103 assert_eq!(result.atoms()[2].hybridization(), crate::Hybridization::Sp3);
4104 }
4105
4106 #[test]
4107 fn sanitized_set_hybridization_uses_chiral_tag_coordination_override() {
4108 let mut builder = crate::MoleculeBuilder::new();
4109 let center = builder.add_atom(
4110 crate::AtomSpec::new(crate::Element::C)
4111 .with_chiral_tag(crate::ChiralTag::TetrahedralCw),
4112 );
4113 for _ in 0..4 {
4114 let neighbor = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
4115 builder
4116 .add_bond(crate::BondSpec::new(
4117 center,
4118 neighbor,
4119 crate::BondOrder::Single,
4120 ))
4121 .unwrap();
4122 }
4123 let molecule = builder.build().unwrap();
4124
4125 let result = molecule
4126 .sanitize_with_ops(
4127 crate::SanitizeOps::PROPERTIES | crate::SanitizeOps::SET_HYBRIDIZATION,
4128 )
4129 .unwrap();
4130
4131 assert_eq!(
4132 result.atoms()[center.index()].hybridization(),
4133 crate::Hybridization::Sp3
4134 );
4135 }
4136
4137 #[test]
4138 fn sanitized_set_hybridization_excludes_dative_bonds_from_num_bonds_plus_lone_pairs() {
4139 let iron = crate::Element::from_atomic_number(26).unwrap();
4140 let mut builder = crate::MoleculeBuilder::new();
4141 let nitrogen =
4142 builder.add_atom(crate::AtomSpec::new(crate::Element::N).with_no_implicit(true));
4143 let carbon = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
4144 let metal = builder.add_atom(crate::AtomSpec::new(iron));
4145 builder
4146 .add_bond(crate::BondSpec::new(
4147 nitrogen,
4148 carbon,
4149 crate::BondOrder::Single,
4150 ))
4151 .unwrap();
4152 builder
4153 .add_bond(crate::BondSpec::new(
4154 nitrogen,
4155 metal,
4156 crate::BondOrder::Dative,
4157 ))
4158 .unwrap();
4159 let molecule = builder.build().unwrap();
4160
4161 let result = molecule
4162 .sanitize_with_ops(
4163 crate::SanitizeOps::PROPERTIES | crate::SanitizeOps::SET_HYBRIDIZATION,
4164 )
4165 .unwrap();
4166
4167 assert_eq!(
4168 result.atoms()[nitrogen.index()].hybridization(),
4169 crate::Hybridization::Sp2
4170 );
4171 }
4172
4173 #[test]
4174 fn sanitized_set_hybridization_excludes_zero_bonds_from_num_bonds_plus_lone_pairs() {
4175 let mut builder = crate::MoleculeBuilder::new();
4176 let oxygen =
4177 builder.add_atom(crate::AtomSpec::new(crate::Element::O).with_no_implicit(true));
4178 let carbon = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
4179 let dummy = builder.add_atom(crate::AtomSpec::new(crate::Element::DUMMY));
4180 builder
4181 .add_bond(crate::BondSpec::new(
4182 oxygen,
4183 carbon,
4184 crate::BondOrder::Single,
4185 ))
4186 .unwrap();
4187 builder
4188 .add_bond(crate::BondSpec::new(oxygen, dummy, crate::BondOrder::Zero))
4189 .unwrap();
4190 let molecule = builder.build().unwrap();
4191
4192 let result = molecule
4193 .sanitize_with_ops(
4194 crate::SanitizeOps::PROPERTIES | crate::SanitizeOps::SET_HYBRIDIZATION,
4195 )
4196 .unwrap();
4197
4198 assert_eq!(
4199 result.atoms()[oxygen.index()].hybridization(),
4200 crate::Hybridization::Sp2
4201 );
4202 }
4203
4204 #[test]
4205 fn sanitized_set_hybridization_uses_conjugated_bond_sp2_branch_like_rdkit() {
4206 let molecule = crate::Molecule::from_smiles_with_sanitize("NC=O", false).unwrap();
4207
4208 let result = molecule
4209 .sanitize_with_ops(
4210 crate::SanitizeOps::PROPERTIES
4211 | crate::SanitizeOps::SET_CONJUGATION
4212 | crate::SanitizeOps::SET_HYBRIDIZATION,
4213 )
4214 .unwrap();
4215
4216 assert_eq!(result.atoms()[0].hybridization(), crate::Hybridization::Sp2);
4217 }
4218
4219 #[test]
4220 fn sanitized_set_hybridization_uses_atomic_number_cutoff_for_actinides() {
4221 let actinium = crate::Element::from_atomic_number(89).unwrap();
4222 let mut builder = crate::MoleculeBuilder::new();
4223 let center = builder.add_atom(crate::AtomSpec::new(actinium).with_no_implicit(true));
4224 for _ in 0..2 {
4225 let neighbor = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
4226 builder
4227 .add_bond(crate::BondSpec::new(
4228 center,
4229 neighbor,
4230 crate::BondOrder::Single,
4231 ))
4232 .unwrap();
4233 }
4234 let molecule = builder.build().unwrap();
4235
4236 let result = molecule
4237 .sanitize_with_ops(
4238 crate::SanitizeOps::PROPERTIES | crate::SanitizeOps::SET_HYBRIDIZATION,
4239 )
4240 .unwrap();
4241
4242 assert_eq!(
4243 result.atoms()[center.index()].hybridization(),
4244 crate::Hybridization::Sp
4245 );
4246 }
4247
4248 #[test]
4249 fn sanitized_find_radicals_recomputes_property_cache_after_topology_state_update() {
4250 let mut builder = crate::MoleculeBuilder::new();
4251 builder.add_atom(
4252 crate::AtomSpec::new(crate::Element::C)
4253 .with_no_implicit(true)
4254 .with_explicit_hydrogens(3),
4255 );
4256 let molecule = builder.build().unwrap();
4257
4258 let result = molecule
4259 .sanitize_with_ops(crate::SanitizeOps::FIND_RADICALS)
4260 .unwrap();
4261
4262 assert_eq!(result.atoms()[0].radical_electrons(), 1);
4263 let expected =
4264 crate::assign_valence_with_options(&result, crate::ValenceModel::RdkitLike, false)
4265 .unwrap();
4266 assert_eq!(result.derived_cache().valence, Some(expected));
4267 }
4268
4269 #[test]
4270 fn sanitize_adjust_hydrogens_assignment_requested_step_does_not_mutate_source() {
4271 let molecule = crate::Molecule::from_smiles_with_sanitize("CCO", false).unwrap();
4272 let original = molecule.clone();
4273
4274 let result = molecule
4275 .sanitize_with_ops(crate::SanitizeOps::ADJUST_HYDROGENS)
4276 .unwrap();
4277
4278 assert_eq!(molecule, original);
4279 assert_eq!(result.num_atoms(), molecule.num_atoms());
4280 }
4281
4282 #[test]
4283 fn sanitize_adjust_hydrogens_assignment_materializes_disappearing_pyrrolic_hydrogen() {
4284 let molecule = crate::Molecule::from_smiles_with_sanitize("N1C=CC=C1", false).unwrap();
4285
4286 let result = molecule
4287 .sanitize_with_ops(
4288 crate::SanitizeOps::PROPERTIES
4289 | crate::SanitizeOps::SYMMRINGS
4290 | crate::SanitizeOps::KEKULIZE
4291 | crate::SanitizeOps::SET_AROMATICITY
4292 | crate::SanitizeOps::ADJUST_HYDROGENS,
4293 )
4294 .unwrap();
4295
4296 assert!(result.atoms()[0].is_aromatic());
4297 assert_eq!(result.atoms()[0].explicit_hydrogens(), 1);
4298 assert_eq!(
4299 result
4300 .derived_cache()
4301 .valence
4302 .as_ref()
4303 .unwrap()
4304 .implicit_hydrogens[0],
4305 0
4306 );
4307 }
4308
4309 #[test]
4310 fn sanitize_adjust_hydrogens_assignment_preserves_existing_explicit_hydrogen_when_delta_is_zero()
4311 {
4312 let mut builder = crate::MoleculeBuilder::new();
4313 builder.add_atom(
4314 crate::AtomSpec::new(crate::Element::N)
4315 .with_no_implicit(true)
4316 .with_explicit_hydrogens(1),
4317 );
4318 let molecule = builder.build().unwrap();
4319
4320 let result = molecule
4321 .sanitize_with_ops(crate::SanitizeOps::ADJUST_HYDROGENS)
4322 .unwrap();
4323
4324 assert_eq!(result.atoms()[0].explicit_hydrogens(), 1);
4325 }
4326
4327 #[test]
4328 fn sanitize_adjust_hydrogens_assignment_leaves_stable_explicit_hydrogens_unchanged_like_rdkit()
4329 {
4330 let molecule = crate::Molecule::from_smiles_with_sanitize("CCO", false).unwrap();
4331
4332 let result = molecule
4333 .sanitize_with_ops(crate::SanitizeOps::ADJUST_HYDROGENS)
4334 .unwrap();
4335
4336 let explicit_hs = result
4337 .atoms()
4338 .iter()
4339 .map(crate::Atom::explicit_hydrogens)
4340 .collect::<Vec<_>>();
4341 assert_eq!(explicit_hs, vec![0, 0, 0]);
4342 assert_eq!(
4343 molecule
4344 .atoms()
4345 .iter()
4346 .map(crate::Atom::explicit_hydrogens)
4347 .collect::<Vec<_>>(),
4348 explicit_hs
4349 );
4350 }
4351
4352 #[test]
4353 fn experimental_kekulize_runs_through_operation_pipeline_without_changing_source() {
4354 assert_eq!(
4355 WITH_KEKULIZED_BONDS_SPEC.support,
4356 SupportStatus::Experimental
4357 );
4358
4359 let molecule = crate::Molecule::new();
4360 let original = molecule.clone();
4361 let result = molecule
4362 .with_kekulized_bonds(true)
4363 .expect("experimental kekulize skeleton should satisfy op contract");
4364
4365 assert_eq!(molecule, original);
4366 assert_eq!(result.atoms(), original.atoms());
4367 assert_eq!(result.bonds(), original.bonds());
4368 assert_eq!(result.coordinates_2d(), original.coordinates_2d());
4369 assert_eq!(result.conformers_3d(), original.conformers_3d());
4370 assert_eq!(
4371 result.source_coordinate_dim(),
4372 original.source_coordinate_dim()
4373 );
4374 assert_eq!(result.properties(), original.properties());
4375 assert_eq!(
4376 result.derived_cache().valence,
4377 Some(crate::ValenceAssignment {
4378 explicit_valence: Vec::new(),
4379 implicit_hydrogens: Vec::new(),
4380 })
4381 );
4382 }
4383
4384 #[cfg(feature = "op-contracts")]
4385 #[test]
4386 fn op_parts_rejects_permission_violation_under_strict_checks() {
4387 let molecule = crate::Molecule::new();
4388 let mut parts = OpParts::new(&molecule, &WITH_KEKULIZED_BONDS_SPEC).unwrap();
4389 let err = parts
4390 .begin_coordinates_mut()
4391 .expect_err("coordinate begin should be rejected");
4392 assert!(
4393 matches!(err, OperationError::InvalidInput { message, .. } if message.contains("outside its registry access"))
4394 );
4395 }
4396
4397 #[test]
4398 fn needs_update_clears_matching_derived_cache_entries() {
4399 let molecule = crate::Molecule::new();
4400 let mut parts = OpParts::new(&molecule, &SANITIZE_SPEC).unwrap();
4401 parts.set_valence_cache(crate::ValenceAssignment {
4402 explicit_valence: Vec::new(),
4403 implicit_hydrogens: Vec::new(),
4404 });
4405 parts.mark_aromaticity_valid();
4406
4407 parts.clear_cache(SANITIZE_SPEC.needs_update());
4408 let result = parts
4409 .finish(OpOutcome::Changed)
4410 .expect("cache invalidation should satisfy operation contract");
4411
4412 let cache = result.derived_cache();
4413 assert!(cache.valence.is_none());
4414 assert!(cache.rings.is_none());
4415 assert!(cache.ring_families.is_none());
4416 assert!(!cache.aromaticity_valid);
4417 assert!(!cache.stereo_valid);
4418 }
4419
4420 #[test]
4421 fn needs_update_accepts_cache_updates_without_prior_clear() {
4422 let molecule = crate::Molecule::new();
4423 let mut parts = OpParts::new(&molecule, &SANITIZE_SPEC).unwrap();
4424
4425 parts.set_rings_cache(crate::RingInfo::new(crate::RingFindType::SymmSssr, 0, 0));
4426 parts.set_valence_cache(crate::ValenceAssignment {
4427 explicit_valence: Vec::new(),
4428 implicit_hydrogens: Vec::new(),
4429 });
4430 parts.mark_aromaticity_valid();
4431 parts.clear_cache(
4432 DerivedState::RING_FAMILIES
4433 | DerivedState::STEREO
4434 | DerivedState::DRAWING
4435 | DerivedState::FINGERPRINT,
4436 );
4437 let result = parts
4438 .finish(OpOutcome::Changed)
4439 .expect("updated cache entries should satisfy needs_update without clear first");
4440
4441 let cache = result.derived_cache();
4442 assert!(cache.valence.is_some());
4443 assert!(cache.rings.is_some());
4444 assert!(cache.ring_families.is_none());
4445 assert!(cache.aromaticity_valid);
4446 assert!(!cache.stereo_valid);
4447 }
4448
4449 #[cfg(feature = "op-contracts")]
4450 #[test]
4451 fn finish_rejects_missing_needs_update_handling() {
4452 let molecule = crate::Molecule::new();
4453 let parts = OpParts::new(&molecule, &TEST_NEEDS_VALENCE_UPDATE_SPEC).unwrap();
4454
4455 let err = parts
4456 .finish(OpOutcome::NoOp {
4457 reason: "intentionally missed needs_update",
4458 })
4459 .expect_err("needs_update must be cleared or updated before finish");
4460
4461 assert!(matches!(
4462 err,
4463 OperationError::InvalidInput {
4464 message: "operation body did not clear or update every required cache state",
4465 ..
4466 }
4467 ));
4468 }
4469
4470 #[cfg(feature = "op-contracts")]
4471 #[test]
4472 fn finish_rejects_unrelated_cache_clear_for_needs_update() {
4473 let molecule = crate::Molecule::new();
4474 let mut parts = OpParts::new(&molecule, &TEST_NEEDS_VALENCE_UPDATE_SPEC).unwrap();
4475
4476 parts.clear_cache(DerivedState::RINGS);
4477 let err = parts
4478 .finish(OpOutcome::Changed)
4479 .expect_err("clearing rings must not satisfy a valence needs_update contract");
4480
4481 assert!(matches!(
4482 err,
4483 OperationError::InvalidInput {
4484 message: "operation body did not clear or update every required cache state",
4485 ..
4486 }
4487 ));
4488 }
4489
4490 #[cfg(feature = "op-contracts")]
4491 #[test]
4492 fn finish_rejects_declared_preservation_without_proof() {
4493 let molecule = crate::Molecule::new();
4494 let mut parts = OpParts::new(&molecule, &WITH_HYDROGENS_SPEC).unwrap();
4495 let topology = parts.begin_topology_mut().unwrap();
4496 let coordinates = parts.begin_coordinates_mut().unwrap();
4497 let properties = parts.begin_properties_mut().unwrap();
4498
4499 parts.commit_topology(topology).unwrap();
4500 parts.commit_coordinates(coordinates).unwrap();
4501 parts.commit_properties(properties).unwrap();
4502 parts
4503 .record_topology_edit(TopologyEditKind::Appending)
4504 .unwrap();
4505 parts.record_topology_mapping(TopologyMapping::with_appended(0, 0, 0, 0));
4506 parts.clear_cache(WITH_HYDROGENS_SPEC.needs_update());
4507
4508 let err = parts
4509 .finish(OpOutcome::Changed)
4510 .expect_err("declared preserve states require an explicit preservation proof");
4511
4512 assert!(matches!(
4513 err,
4514 OperationError::InvalidInput {
4515 message: "operation body did not prove every declared preserved derived state",
4516 ..
4517 }
4518 ));
4519 }
4520
4521 #[cfg(feature = "op-contracts")]
4522 #[test]
4523 fn leaf_append_preservation_proof_rejects_non_leaf_appended_atom() {
4524 let mut builder = crate::MoleculeBuilder::new();
4525 builder.add_atom(crate::AtomSpec::new(crate::Element::C));
4526 let molecule = builder.build().unwrap();
4527 let mut parts = OpParts::new(&molecule, &WITH_HYDROGENS_SPEC).unwrap();
4528 let mut topology = parts.begin_topology_mut().unwrap();
4529 let coordinates = parts.begin_coordinates_mut().unwrap();
4530 let properties = parts.begin_properties_mut().unwrap();
4531
4532 let appended = AtomId::new(topology.atoms.len());
4533 topology.atoms.push(crate::Atom::from_spec(
4534 appended,
4535 crate::AtomSpec::new(crate::Element::H),
4536 ));
4537 parts.commit_topology(topology).unwrap();
4538 parts.commit_coordinates(coordinates).unwrap();
4539 parts.commit_properties(properties).unwrap();
4540 parts
4541 .record_topology_edit(TopologyEditKind::Appending)
4542 .unwrap();
4543 parts.record_topology_mapping(TopologyMapping::with_appended(1, 0, 1, 0));
4544
4545 let err = parts
4546 .prove_preserved(
4547 DerivedState::RINGS | DerivedState::RING_FAMILIES,
4548 PreservationProof::LeafAtomAppend,
4549 )
4550 .expect_err("an appended atom with no appended leaf bond must not preserve rings");
4551
4552 assert!(matches!(
4553 err,
4554 OperationError::InvalidInput {
4555 message: "leaf-append preservation proof requires every appended atom to be a degree-one leaf",
4556 ..
4557 }
4558 ));
4559 }
4560
4561 #[test]
4562 fn finish_accepts_update_path_for_recompute() {
4563 let molecule = crate::Molecule::new();
4564 let mut parts = OpParts::new(&molecule, &TEST_RECOMPUTE_VALENCE_SPEC).unwrap();
4565
4566 parts.set_valence_cache(crate::ValenceAssignment {
4567 explicit_valence: Vec::new(),
4568 implicit_hydrogens: Vec::new(),
4569 });
4570 let result = parts
4571 .finish(OpOutcome::Changed)
4572 .expect("setting valence cache should satisfy recompute requirement");
4573
4574 assert_eq!(
4575 result.derived_cache().valence,
4576 Some(crate::ValenceAssignment {
4577 explicit_valence: Vec::new(),
4578 implicit_hydrogens: Vec::new(),
4579 })
4580 );
4581 assert_eq!(molecule.derived_cache().valence, None);
4582 }
4583
4584 #[cfg(feature = "op-contracts")]
4585 #[test]
4586 #[should_panic(expected = "cache clear permission violation")]
4587 fn clear_cache_panics_without_derived_cache_permission_when_cache_is_touched() {
4588 let molecule = crate::Molecule::new();
4589 let mut parts = OpParts::new(&molecule, &WITH_2D_COORDINATES_SPEC).unwrap();
4590 parts.clear_cache(DerivedState::VALENCE);
4591 }
4592
4593 #[cfg(not(feature = "op-contracts"))]
4594 #[test]
4595 fn op_contract_checks_are_disabled_without_feature() {
4596 let molecule = crate::Molecule::new();
4597
4598 let mut unauthorized = OpParts::new(&molecule, &WITH_2D_COORDINATES_SPEC).unwrap();
4599 unauthorized.clear_cache(DerivedState::VALENCE);
4600 unauthorized
4601 .finish(OpOutcome::Changed)
4602 .expect("without op-contracts, cache permission checks are disabled");
4603
4604 let missing_update = OpParts::new(&molecule, &TEST_NEEDS_VALENCE_UPDATE_SPEC).unwrap();
4605 missing_update
4606 .finish(OpOutcome::Changed)
4607 .expect("without op-contracts, needs_update checks are disabled");
4608 }
4609
4610 #[cfg(feature = "op-contracts")]
4611 #[test]
4612 #[should_panic(expected = "cache write permission violation")]
4613 fn set_valence_cache_panics_without_requires_or_recompute() {
4614 let molecule = crate::Molecule::new();
4616 let mut parts = OpParts::new(&molecule, &WITH_2D_COORDINATES_SPEC).unwrap();
4617 parts.set_valence_cache(crate::ValenceAssignment {
4618 explicit_valence: Vec::new(),
4619 implicit_hydrogens: Vec::new(),
4620 });
4621 }
4622
4623 #[cfg(feature = "op-contracts")]
4624 #[test]
4625 #[should_panic(expected = "cache clear permission violation")]
4626 fn clear_cache_panics_without_invalidate_or_recompute() {
4627 let molecule = crate::Molecule::new();
4630 let mut parts = OpParts::new(&molecule, &TEST_RECOMPUTE_VALENCE_SPEC).unwrap();
4631 parts.clear_cache(DerivedState::RINGS);
4632 }
4633
4634 #[test]
4635 fn op_parts_cow_mutation_changes_result_without_changing_source() {
4636 let molecule = crate::Molecule::new();
4637 let mut parts = OpParts::new(&molecule, &WITH_KEKULIZED_BONDS_SPEC).unwrap();
4638
4639 let mut topology = parts.begin_topology_mut().unwrap();
4640 topology.atoms.push(crate::Atom::from_spec(
4641 crate::AtomId::new(0),
4642 crate::AtomSpec::new(crate::Element::C),
4643 ));
4644 let view = parts.read_parts_for_topology(topology.clone()).unwrap();
4645 let valence = MoleculeReadParts::from_molecule(&view)
4646 .assign_valence_with_options(crate::ValenceModel::RdkitLike, true)
4647 .unwrap();
4648 parts.commit_topology(topology).unwrap();
4649 parts.record_topology_edit(TopologyEditKind::Local).unwrap();
4650 parts.set_rings_cache(crate::RingInfo::new(crate::RingFindType::SymmSssr, 1, 0));
4651 parts.set_valence_cache(valence);
4652 parts.clear_cache(DerivedState::AROMATICITY);
4653 parts.clear_cache(DerivedState::DRAWING | DerivedState::FINGERPRINT);
4654
4655 let result = parts
4656 .finish(OpOutcome::Changed)
4657 .expect("COW topology edit should produce a valid molecule");
4658
4659 assert_eq!(molecule.num_atoms(), 0);
4660 assert_eq!(result.num_atoms(), 1);
4661 assert_eq!(result.atomic_numbers(), vec![6]);
4662 }
4663
4664 #[test]
4665 fn compacting_edit_uses_begin_commit_blocks_and_records_mapping() {
4666 let mut builder = crate::Molecule::builder();
4667 let c0 = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
4668 let o1 = builder.add_atom(crate::AtomSpec::new(crate::Element::O));
4669 let n2 = builder.add_atom(crate::AtomSpec::new(crate::Element::N));
4670 builder
4671 .add_bond(crate::BondSpec::new(c0, o1, crate::BondOrder::Single))
4672 .unwrap();
4673 builder
4674 .add_bond(crate::BondSpec::new(o1, n2, crate::BondOrder::Double))
4675 .unwrap();
4676 builder
4677 .set_2d_coordinates(vec![[0.0, 0.0], [1.0, 0.0], [2.0, 0.0]])
4678 .unwrap();
4679 let properties = crate::MoleculeProperties::default()
4680 .with_sdf_property_list(crate::SdfPropertyList::new(
4681 crate::SdfPropertyListTarget::Atom,
4682 "atom_tag",
4683 vec![
4684 Some("c0".to_string()),
4685 Some("o1".to_string()),
4686 Some("n2".to_string()),
4687 ],
4688 ))
4689 .with_sdf_property_list(crate::SdfPropertyList::new(
4690 crate::SdfPropertyListTarget::Bond,
4691 "bond_tag",
4692 vec![Some("c-o".to_string()), Some("o-n".to_string())],
4693 ));
4694 builder = builder.with_properties(properties);
4695 let molecule = builder.build().unwrap();
4696 let original = molecule.clone();
4697
4698 let mut parts = OpParts::new(&molecule, &WITHOUT_HYDROGENS_SPEC).unwrap();
4699 let mut topology = parts.begin_topology_mut().unwrap();
4700 let mut coordinates = parts.begin_coordinates_mut().unwrap();
4701 let mut properties = parts.begin_properties_mut().unwrap();
4702 let mapping = topology.remove_atoms_with_mapping(&[o1]);
4703 coordinates.remap_topology(&mapping);
4704 properties.remap_topology(&mapping);
4705 parts
4706 .record_topology_edit(TopologyEditKind::Compacting)
4707 .unwrap();
4708 parts.record_topology_mapping(mapping.clone());
4709 assert_eq!(
4710 mapping.atoms().old_to_new(),
4711 &[
4712 Some(crate::AtomId::new(0)),
4713 None,
4714 Some(crate::AtomId::new(1))
4715 ]
4716 );
4717 assert_eq!(mapping.bonds().old_to_new(), &[None, None]);
4718 parts.clear_cache(WITHOUT_HYDROGENS_SPEC.needs_update());
4719 parts.commit_topology(topology).unwrap();
4720 parts.commit_coordinates(coordinates).unwrap();
4721 parts.commit_properties(properties).unwrap();
4722
4723 let result = parts
4724 .finish(OpOutcome::Changed)
4725 .expect("strong compacting edit should satisfy registry contract");
4726
4727 assert_eq!(molecule, original);
4728 assert_eq!(result.atomic_numbers(), vec![6, 7]);
4729 assert_eq!(result.num_bonds(), 0);
4730 assert_eq!(result.coordinates_2d().unwrap(), &[[0.0, 0.0], [2.0, 0.0]]);
4731 assert_eq!(
4732 result.properties().sdf_property_lists()[0].values(),
4733 &[Some("c0".to_string()), Some("n2".to_string())]
4734 );
4735 assert_eq!(result.properties().sdf_property_lists()[1].values(), &[]);
4736 }
4737
4738 #[test]
4739 fn strong_remove_atoms_remaps_surviving_sgroup_parent_relationships() {
4740 let mut builder = crate::Molecule::builder();
4741 let c0 = builder.add_atom(crate::AtomSpec::new(crate::Element::C));
4742 let o1 = builder.add_atom(crate::AtomSpec::new(crate::Element::O));
4743 let n2 = builder.add_atom(crate::AtomSpec::new(crate::Element::N));
4744 builder
4745 .add_substance_group(
4746 crate::SubstanceGroup::new(
4747 crate::SubstanceGroupId::new(0),
4748 crate::SubstanceGroupKind::Superatom,
4749 )
4750 .with_atoms(vec![c0]),
4751 )
4752 .unwrap();
4753 builder
4754 .add_substance_group(
4755 crate::SubstanceGroup::new(
4756 crate::SubstanceGroupId::new(1),
4757 crate::SubstanceGroupKind::Data,
4758 )
4759 .with_atoms(vec![o1])
4760 .with_parent(crate::SubstanceGroupId::new(0)),
4761 )
4762 .unwrap();
4763 let molecule = builder.build().unwrap();
4764
4765 let mut parts = OpParts::new(&molecule, &WITHOUT_HYDROGENS_SPEC).unwrap();
4766 let mut topology = parts.begin_topology_mut().unwrap();
4767 let mut coordinates = parts.begin_coordinates_mut().unwrap();
4768 let mut properties = parts.begin_properties_mut().unwrap();
4769 let mapping = topology.remove_atoms_with_mapping(&[n2]);
4770 coordinates.remap_topology(&mapping);
4771 properties.remap_topology(&mapping);
4772 parts
4773 .record_topology_edit(TopologyEditKind::Compacting)
4774 .unwrap();
4775 parts.record_topology_mapping(mapping);
4776 parts.clear_cache(WITHOUT_HYDROGENS_SPEC.needs_update());
4777 parts.commit_topology(topology).unwrap();
4778 parts.commit_coordinates(coordinates).unwrap();
4779 parts.commit_properties(properties).unwrap();
4780
4781 let result = parts
4782 .finish(OpOutcome::Changed)
4783 .expect("strong compacting edit should preserve surviving SGroup parent links");
4784
4785 assert_eq!(result.substance_groups().len(), 2);
4786 assert_eq!(
4787 result.substance_groups()[0].atoms(),
4788 &[crate::AtomId::new(0)]
4789 );
4790 assert_eq!(
4791 result.substance_groups()[1].atoms(),
4792 &[crate::AtomId::new(1)]
4793 );
4794 assert_eq!(
4795 result.substance_groups()[1].parent(),
4796 Some(crate::SubstanceGroupId::new(0))
4797 );
4798 }
4799
4800 #[test]
4801 fn with_hydrogens_extends_sdf_property_lists_for_appended_atoms_and_bonds() {
4802 let mut builder = crate::Molecule::builder();
4803 let carbon = builder.add_atom(
4804 crate::AtomSpec::new(crate::Element::C)
4805 .with_explicit_hydrogens(1)
4806 .with_no_implicit(true),
4807 );
4808 let properties = crate::MoleculeProperties::default()
4809 .with_sdf_property_list(crate::SdfPropertyList::new(
4810 crate::SdfPropertyListTarget::Atom,
4811 "atom_tag",
4812 vec![Some("c0".to_string())],
4813 ))
4814 .with_sdf_property_list(crate::SdfPropertyList::new(
4815 crate::SdfPropertyListTarget::Bond,
4816 "bond_tag",
4817 Vec::new(),
4818 ));
4819 builder = builder.with_properties(properties);
4820 let molecule = builder.build().unwrap();
4821
4822 let result = molecule.with_hydrogens().unwrap();
4823
4824 assert_eq!(result.num_atoms(), 2);
4825 assert_eq!(
4826 result.properties().sdf_property_lists()[0].values(),
4827 &[Some("c0".to_string()), None]
4828 );
4829 assert_eq!(
4830 result.properties().sdf_property_lists()[1].values(),
4831 &[None]
4832 );
4833 assert_eq!(result.atoms()[carbon.index()].explicit_hydrogens(), 0);
4834 }
4835
4836 #[cfg(feature = "op-contracts")]
4837 #[test]
4838 fn compacting_topology_edit_record_is_rejected_for_weak_operations() {
4839 let molecule = crate::Molecule::new();
4840 let mut parts = OpParts::new(&molecule, &WITH_KEKULIZED_BONDS_SPEC).unwrap();
4841 let err = parts
4842 .record_topology_edit(TopologyEditKind::Compacting)
4843 .expect_err("weak operations must not record compacting topology edits");
4844 assert!(matches!(err, OperationError::InvalidInput { .. }));
4845 }
4846
4847 #[test]
4848 fn with_kekulized_bonds_uses_rdkit_wrapper_canonical_default_for_benzene() {
4849 let molecule = crate::Molecule::from_smiles("C1=CC=CC=C1").unwrap();
4850
4851 let kekulized = molecule.with_kekulized_bonds(false).unwrap();
4852 let bond_orders = kekulized
4853 .bonds()
4854 .iter()
4855 .map(|bond| bond.order())
4856 .collect::<Vec<_>>();
4857
4858 assert_eq!(
4859 bond_orders,
4860 vec![
4861 crate::BondOrder::Single,
4862 crate::BondOrder::Double,
4863 crate::BondOrder::Single,
4864 crate::BondOrder::Double,
4865 crate::BondOrder::Single,
4866 crate::BondOrder::Double
4867 ]
4868 );
4869 assert!(kekulized.bonds().iter().all(|bond| bond.is_aromatic()));
4870 assert!(kekulized.atoms().iter().all(|atom| atom.is_aromatic()));
4871 }
4872}