Skip to main content

cosmolkit_core/notation/
smiles_write.rs

1// RDKit marker convention defined in dev/source_reproduction_protocol.md.
2
3mod cx;
4mod direction;
5mod stereo;
6
7pub(crate) use self::stereo::serialize_ring_stereo_atoms;
8use self::{cx::*, direction::*, stereo::*};
9
10use crate::{
11    AtomId, AtomQueryPredicate, Bond, BondDirection, BondId, BondOrder, BondStereo, ChiralTag,
12    Molecule, QueryNode, ValenceError,
13};
14use std::collections::{BTreeMap, BTreeSet};
15use std::sync::atomic::{AtomicU64, Ordering};
16
17thread_local! {
18    static RANDOM_SMILES_SEED: std::cell::Cell<u64> = const { std::cell::Cell::new(0) };
19}
20
21static RANDOM_SMILES_COUNTER: AtomicU64 = AtomicU64::new(0x9e37_79b9_7f4a_7c15);
22const CANON_MAX_NATOMS: i64 = 5000;
23const CANON_MAX_BONDTYPE: i64 = 32;
24
25#[derive(Debug, Clone, PartialEq, Eq)]
26pub struct SmilesWriteParams {
27    pub do_isomeric_smiles: bool,
28    pub do_kekule: bool,
29    pub canonical: bool,
30    pub clean_stereo: bool,
31    pub all_bonds_explicit: bool,
32    pub all_hydrogens_explicit: bool,
33    pub do_random: bool,
34    pub rooted_at_atom: Option<usize>,
35    pub include_dative_bonds: bool,
36    pub ignore_atom_map_numbers: bool,
37}
38
39impl Default for SmilesWriteParams {
40    fn default() -> Self {
41        Self {
42            do_isomeric_smiles: true,
43            do_kekule: false,
44            canonical: true,
45            clean_stereo: true,
46            all_bonds_explicit: false,
47            all_hydrogens_explicit: false,
48            do_random: false,
49            rooted_at_atom: None,
50            include_dative_bonds: true,
51            ignore_atom_map_numbers: false,
52        }
53    }
54}
55
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
57pub struct CxSmilesFields(u32);
58
59impl CxSmilesFields {
60    pub const NONE: Self = Self(0);
61    pub const ATOM_LABELS: Self = Self(1 << 0);
62    pub const MOLFILE_VALUES: Self = Self(1 << 1);
63    pub const COORDS: Self = Self(1 << 2);
64    pub const RADICALS: Self = Self(1 << 3);
65    pub const ATOM_PROPS: Self = Self(1 << 4);
66    pub const LINKNODES: Self = Self(1 << 5);
67    pub const ENHANCED_STEREO: Self = Self(1 << 6);
68    pub const SGROUPS: Self = Self(1 << 7);
69    pub const POLYMER: Self = Self(1 << 8);
70    pub const BOND_CFG: Self = Self(1 << 9);
71    pub const BOND_ATROPISOMER: Self = Self(1 << 10);
72    pub const COORDINATE_BONDS: Self = Self(1 << 11);
73    pub const HYDROGEN_BONDS: Self = Self(1 << 12);
74    pub const ZERO_BONDS: Self = Self(1 << 13);
75    pub const ALL: Self = Self(0x7fff_ffff);
76    pub const ALL_BUT_COORDS: Self = Self(Self::ALL.0 ^ Self::COORDS.0);
77
78    #[must_use]
79    pub const fn bits(self) -> u32 {
80        self.0
81    }
82
83    #[must_use]
84    pub const fn contains(self, other: Self) -> bool {
85        self.0 & other.0 == other.0
86    }
87
88    #[must_use]
89    pub const fn combine(self, other: Self) -> Self {
90        Self(self.0 | other.0)
91    }
92}
93
94impl std::ops::BitOr for CxSmilesFields {
95    type Output = Self;
96
97    fn bitor(self, rhs: Self) -> Self {
98        Self(self.0 | rhs.0)
99    }
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq)]
103pub enum RestoreBondDirOption {
104    None,
105    True,
106    Clear,
107}
108
109#[derive(Debug, Clone, Copy, PartialEq, Eq)]
110enum SmilesOutputMode {
111    PlainSmiles,
112    CxSmiles {
113        fields: CxSmilesFields,
114        restore_bond_dirs: RestoreBondDirOption,
115        include_stereo_groups: bool,
116    },
117}
118
119/// Stages of the SMILES writing pipeline used for internal guard diagnostics.
120///
121/// Each variant corresponds to a writer phase used by invariant-violation
122/// errors to identify where an internal contract was broken.
123///
124/// ## Status of each stage
125///
126/// - `ShortTermAtomWriter`: Atom-level guards / deferred operations.
127///   Includes: empty rank edge case, chiral/query/radical atoms in
128///   the minimal-fast-path guard, and non-whitelisted atom properties.
129/// - `ShortTermBondWriter`: Bond-level guards / deferred operations.
130///   Includes: dative-bond stripping, Unknown/EitherDouble/Any bond
131///   direction/stereo for plain SMILES, ring-closure digit exhaustion,
132///   and non-standard bond orders in the fast-path guard.
133/// - `LongTermCanonicalRanking`: Canonical rank calculation. Defined
134///   but unused in deferred-error path — canonical ranking errors use
135///   SmilesWriteError::CanonicalRank directly.
136#[derive(Debug, Clone, Copy, PartialEq, Eq)]
137enum SmilesPlanStage {
138    ShortTermAtomWriter,
139    ShortTermBondWriter,
140    LongTermCanonicalRanking,
141}
142
143impl SmilesPlanStage {
144    const fn as_str(self) -> &'static str {
145        match self {
146            Self::ShortTermAtomWriter => "ShortTermAtomWriter",
147            Self::ShortTermBondWriter => "ShortTermBondWriter",
148            Self::LongTermCanonicalRanking => "LongTermCanonicalRanking",
149        }
150    }
151}
152
153#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
154pub enum SmilesWriteError {
155    #[error(transparent)]
156    UnsupportedFeature(#[from] crate::UnsupportedFeatureError),
157    #[error("canonical ranking failed: {source}")]
158    CanonicalRank { source: crate::KekulizeError },
159    #[error("kekulization failed: {source}")]
160    Kekulize {
161        #[from]
162        source: crate::KekulizeError,
163    },
164    #[error("operation failed while preparing SMILES output: {source}")]
165    Operation {
166        #[from]
167        source: crate::OperationError,
168    },
169    #[error("valence calculation failed: {source}")]
170    Valence {
171        #[from]
172        source: ValenceError,
173    },
174    #[error("stereochemistry preparation failed: {source}")]
175    Stereo {
176        #[from]
177        source: crate::StereoError,
178    },
179    #[error("ring finding failed while preparing SMILES output: {source}")]
180    RingFinding {
181        #[from]
182        source: crate::RingFindingError,
183    },
184    #[error("atom index {atom} is out of range")]
185    AtomOutOfRange { atom: usize },
186    #[error("bond index {bond} is out of range")]
187    BondOutOfRange { bond: usize },
188    #[error("rooted atom index {atom} is out of range")]
189    RootedAtomOutOfRange { atom: usize },
190    #[error("rooted atom index {atom} is not present in atoms_to_use")]
191    RootedAtomNotInFragment { atom: usize },
192    #[error(
193        "rooted atom index {atom} requires a single-fragment molecule when bonds_to_use is omitted"
194    )]
195    RootedAtomRequiresSingleFragment { atom: usize },
196    #[error("atom symbol override vector has length {len}, expected at least {expected}")]
197    AtomSymbolsTooShort { len: usize, expected: usize },
198    #[error("bond symbol override vector has length {len}, expected at least {expected}")]
199    BondSymbolsTooShort { len: usize, expected: usize },
200    #[error(
201        "invalid non-tetrahedral chiral permutation {permutation} for {chiral_tag:?}; max allowed is {limit}"
202    )]
203    InvalidChiralPermutation {
204        chiral_tag: ChiralTag,
205        permutation: u32,
206        limit: u32,
207    },
208    #[error("invalid ring stereochemistry state on atom {atom}: {requirement}")]
209    InvalidRingStereoState {
210        atom: usize,
211        requirement: &'static str,
212    },
213    #[error("internal SMILES writer invariant violated in {stage}: {message}")]
214    InvariantViolation {
215        stage: &'static str,
216        message: &'static str,
217    },
218}
219
220#[derive(Debug, Clone, PartialEq, Eq, Default)]
221struct SmilesWriteContext {
222    atom_output_order: Vec<AtomId>,
223    bond_output_order: Vec<BondId>,
224    ring_closure_digits: BTreeMap<usize, usize>,
225    ring_closures_to_erase: Vec<usize>,
226    chiral_tag_overrides: BTreeMap<AtomId, ChiralTag>,
227    chiral_inversions: BTreeSet<AtomId>,
228    chiral_permutations: BTreeMap<AtomId, u32>,
229    broken_chiral_atoms: BTreeSet<AtomId>,
230}
231
232#[derive(Debug, Clone, PartialEq, Eq)]
233struct FragmentWritePlan {
234    atoms: Vec<AtomId>,
235    bonds: Vec<BondId>,
236    rooted_at_atom: Option<AtomId>,
237}
238
239#[derive(Debug, Clone, PartialEq, Eq, Default)]
240struct FragmentWriteResult {
241    smiles: String,
242    atom_ordering: Vec<AtomId>,
243    bond_ordering: Vec<BondId>,
244}
245
246#[derive(Debug, Clone, Copy, Default)]
247struct SmilesWriteOverrides<'a> {
248    atom_symbols: Option<&'a [String]>,
249    bond_symbols: Option<&'a [String]>,
250}
251
252#[derive(Debug, Clone)]
253struct CxWriteScope {
254    atom_order: Vec<AtomId>,
255    bond_order: Vec<BondId>,
256}
257
258impl CxWriteScope {
259    fn full_molecule(molecule: &Molecule) -> Self {
260        Self {
261            atom_order: molecule.atoms().iter().map(|atom| atom.id()).collect(),
262            bond_order: molecule.bonds().iter().map(|bond| bond.id()).collect(),
263        }
264    }
265}
266
267#[derive(Debug, Clone, PartialEq, Eq)]
268#[allow(dead_code)]
269enum MolStackElem {
270    Atom(AtomId),
271    Bond(BondId, AtomId),
272    Ring { bond: BondId, ring_idx: usize },
273    BranchOpen,
274    BranchClose,
275}
276
277#[derive(Debug, Clone, PartialEq, Eq, Default)]
278struct CanonicalTraversalResult {
279    stack: Vec<MolStackElem>,
280    traversal_ring_closure_bonds: Vec<bool>,
281    chiral_tag_overrides: BTreeMap<AtomId, ChiralTag>,
282    chiral_inversions: BTreeSet<AtomId>,
283    chiral_permutations: BTreeMap<AtomId, u32>,
284    broken_chiral_atoms: BTreeSet<AtomId>,
285}
286
287pub fn mol_to_smiles(
288    molecule: &Molecule,
289    params: &SmilesWriteParams,
290) -> Result<String, SmilesWriteError> {
291    mol_to_smiles_with_mode(molecule, params, SmilesOutputMode::PlainSmiles)
292}
293
294pub fn mol_to_cx_smiles(
295    molecule: &Molecule,
296    params: &SmilesWriteParams,
297    fields: CxSmilesFields,
298    restore_bond_dirs: RestoreBondDirOption,
299) -> Result<String, SmilesWriteError> {
300    mol_to_smiles_with_mode(
301        molecule,
302        params,
303        SmilesOutputMode::CxSmiles {
304            fields,
305            restore_bond_dirs,
306            include_stereo_groups: fields.contains(CxSmilesFields::ENHANCED_STEREO),
307        },
308    )
309}
310
311// BEGIN RDKIT CPP FUNCTION MolToRandomSmilesVect
312// RDKit✔️✔️: std::vector<std::string> MolToRandomSmilesVect(
313// RDKit✔️✔️:     const ROMol &mol, unsigned int numSmiles, unsigned int randomSeed,
314// RDKit✔️✔️:     bool doIsomericSmiles, bool doKekule, bool allBondsExplicit,
315// RDKit✔️✔️:     bool allHsExplicit) {
316// RDKit✔️✔️:   if (randomSeed > 0) {
317// RDKit✔️✔️:     getRandomGenerator(rdcast<int>(randomSeed));
318// RDKit✔️✔️:   }
319// RDKit✔️✔️:   std::vector<std::string> res;
320// RDKit✔️✔️:   res.reserve(numSmiles);
321// RDKit✔️✔️:   for (unsigned int i = 0; i < numSmiles; ++i) {
322// RDKit✔️✔️:     bool canonical = false;
323// RDKit✔️✔️:     int rootedAtAtom = -1;
324// RDKit✔️✔️:     bool doRandom = true;
325// RDKit✔️✔️:     res.push_back(MolToSmiles(mol, doIsomericSmiles, doKekule, rootedAtAtom,
326// RDKit✔️✔️:                               canonical, allBondsExplicit, allHsExplicit,
327// RDKit✔️✔️:                               doRandom));
328// RDKit✔️✔️:   }
329// RDKit✔️✔️:   return res;
330// RDKit✔️✔️: };
331// END RDKIT CPP FUNCTION MolToRandomSmilesVect
332pub fn mol_to_random_smiles_vect(
333    molecule: &Molecule,
334    num_smiles: usize,
335    random_seed: u64,
336    do_isomeric_smiles: bool,
337    do_kekule: bool,
338    all_bonds_explicit: bool,
339    all_hydrogens_explicit: bool,
340) -> Result<Vec<String>, SmilesWriteError> {
341    let mut result = Vec::with_capacity(num_smiles);
342    let mut stream_seed = if random_seed == 0 {
343        next_unseeded_random_smiles_seed(0)
344    } else {
345        random_seed
346    };
347    for _ in 0..num_smiles {
348        stream_seed = splitmix64(stream_seed);
349        let params = SmilesWriteParams {
350            do_isomeric_smiles,
351            do_kekule,
352            canonical: false,
353            clean_stereo: true,
354            all_bonds_explicit,
355            all_hydrogens_explicit,
356            do_random: true,
357            rooted_at_atom: None,
358            include_dative_bonds: true,
359            ignore_atom_map_numbers: false,
360        };
361        result.push(with_random_smiles_seed(stream_seed, || {
362            mol_to_smiles(molecule, &params)
363        })?);
364    }
365    Ok(result)
366}
367
368fn with_random_smiles_seed<T>(
369    seed: u64,
370    f: impl FnOnce() -> Result<T, SmilesWriteError>,
371) -> Result<T, SmilesWriteError> {
372    RANDOM_SMILES_SEED.with(|cell| {
373        let previous = cell.replace(seed);
374        let result = f();
375        cell.set(previous);
376        result
377    })
378}
379
380fn next_random_smiles_u64() -> u64 {
381    RANDOM_SMILES_SEED.with(|cell| {
382        let current = cell.get();
383        let next = splitmix64(current);
384        cell.set(next);
385        current
386    })
387}
388
389fn next_unseeded_random_smiles_seed(offset: u64) -> u64 {
390    splitmix64(
391        RANDOM_SMILES_COUNTER
392            .fetch_add(0x9e37_79b9_7f4a_7c15, Ordering::Relaxed)
393            .wrapping_add(offset),
394    )
395}
396
397fn splitmix64(mut value: u64) -> u64 {
398    value = value.wrapping_add(0x9e37_79b9_7f4a_7c15);
399    value = (value ^ (value >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9);
400    value = (value ^ (value >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb);
401    value ^ (value >> 31)
402}
403
404fn mol_to_smiles_with_mode(
405    molecule: &Molecule,
406    params: &SmilesWriteParams,
407    mode: SmilesOutputMode,
408) -> Result<String, SmilesWriteError> {
409    // BEGIN RDKIT CPP FUNCTION SmilesWrite::detail::MolToSmiles
410    // RDKit✔️✔️: std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params,
411    // RDKit✔️✔️:                         bool doingCXSmiles, bool includeStereoGroups) {
412    // RDKit✔️✔️:   if (!mol.getNumAtoms()) {
413    // RDKit✔️✔️:     return "";
414    // RDKit✔️✔️:   }
415    // RDKit✔️✔️:   PRECONDITION(
416    // RDKit✔️✔️:       params.rootedAtAtom < 0 ||
417    // RDKit✔️✔️:           static_cast<unsigned int>(params.rootedAtAtom) < mol.getNumAtoms(),
418    // RDKit✔️✔️:       "rootedAtAtom must be less than the number of atoms");
419    // RDKit✔️✔️:
420    // RDKit✔️✔️:   int rootedAtAtom;
421    // RDKit✔️✔️:   std::vector<int> fragsRootedAtAtom;
422    // RDKit✔️✔️:   std::vector<std::vector<int>> fragsMolAtomMapping;
423    // RDKit✔️✔️:   auto mols =
424    // RDKit✔️✔️:       MolOps::getMolFrags(mol, false, nullptr, &fragsMolAtomMapping, false);
425    // RDKit✔️✔️:   std::vector<std::vector<int>> fragsMolBondMapping;
426    // RDKit✔️✔️:   std::vector<std::string> vfragsmi(mols.size());
427    // RDKit✔️✔️:   std::vector<std::vector<RDKit::UINT>> allAtomOrdering;
428    // RDKit✔️✔️:   std::vector<std::vector<RDKit::UINT>> allBondOrdering;
429    // RDKit✔️✔️:   for (unsigned fragIdx = 0; fragIdx < mols.size(); fragIdx++) {
430    // RDKit✔️✔️:     ROMol *tmol = mols[fragIdx].get();
431    // RDKit✔️✔️:     std::vector<int> atomMapNums(tmol->getNumAtoms(), 0);
432    // RDKit✔️✔️:     for (auto atom : tmol->atoms()) {
433    // RDKit✔️✔️:       atom->updatePropertyCache(false);
434    // RDKit✔️✔️:     }
435    // RDKit✔️✔️:     if (params.doIsomericSmiles) {
436    // RDKit✔️✔️:       tmol->setProp(common_properties::_doIsoSmiles, 1);
437    // RDKit✔️✔️:       if (!tmol->hasProp(common_properties::_StereochemDone)) {
438    // RDKit✔️✔️:         MolOps::assignStereochemistry(*tmol, params.cleanStereo);
439    // RDKit✔️✔️:       }
440    // RDKit✔️✔️:     }
441    // RDKit✔️✔️:     if (params.canonical) {
442    // RDKit✔️✔️:       Canon::rankMolAtoms(*tmol, ranks, breakTies, includeChirality,
443    // RDKit✔️✔️:                           includeIsotopes, includeAtomMaps,
444    // RDKit✔️✔️:                           includeChiralPresence, includeStereoGroups,
445    // RDKit✔️✔️:                           useNonStereoRanks);
446    // RDKit✔️✔️:     } else {
447    // RDKit✔️✔️:       std::iota(ranks.begin(), ranks.end(), 0);
448    // RDKit✔️✔️:     }
449    // RDKit✔️✔️:     subSmi = SmilesWrite::FragmentSmilesConstruct(
450    // RDKit✔️✔️:         *tmol, nextAtomIdx, colors, ranks, params, atomOrdering, bondOrdering);
451    // RDKit✔️✔️:   }
452    // RDKit✔️✔️:   if (params.canonical) {
453    // RDKit✔️✔️:     std::sort(tmp.begin(), tmp.end());
454    // RDKit✔️✔️:   } else {
455    // RDKit✔️✔️:     for (unsigned i = 0; i < vfragsmi.size(); ++i) {
456    // RDKit✔️✔️:       result += vfragsmi[i];
457    // RDKit✔️✔️:       if (i < vfragsmi.size() - 1) {
458    // RDKit✔️✔️:         result += ".";
459    // RDKit✔️✔️:       }
460    // RDKit✔️✔️:     }
461    // RDKit✔️✔️:   }
462    // RDKit✔️✔️:   mol.setProp(common_properties::_smilesAtomOutputOrder, flattenedAtomOrdering,
463    // RDKit✔️✔️:               true);
464    // RDKit✔️✔️:   mol.setProp(common_properties::_smilesBondOutputOrder, flattenedBondOrdering,
465    // RDKit✔️✔️:               true);
466    // RDKit✔️✔️:   return result;
467    // RDKit✔️✔️: }
468    // END RDKIT CPP FUNCTION SmilesWrite::detail::MolToSmiles
469    validate_rooted_atom(molecule, params)?;
470    if molecule.num_atoms() == 0 {
471        return Ok(String::new());
472    }
473
474    let mut molecule = molecule.clone();
475
476    let mut context = SmilesWriteContext::default();
477    let mut fragment_results = Vec::new();
478    let mut working_params = params.clone();
479
480    let saved_atom_maps = match mode {
481        SmilesOutputMode::PlainSmiles => {
482            prepare_plain_smiles_molecule(&mut molecule, &working_params)?
483        }
484        SmilesOutputMode::CxSmiles {
485            fields,
486            restore_bond_dirs,
487            include_stereo_groups,
488        } => prepare_cx_smiles_molecule(
489            &mut molecule,
490            &mut working_params,
491            fields,
492            restore_bond_dirs,
493            include_stereo_groups,
494        )?,
495    };
496
497    let fragment_plans = collect_fragment_write_plans(&molecule, &working_params)?;
498    let fragment_ranks = fragment_plans
499        .iter()
500        .map(|plan| rank_fragment_atoms_for_smiles(&molecule, plan, &working_params, mode))
501        .collect::<Result<Vec<_>, _>>()?;
502    // RDKit✔️✔️:       if (params.ignoreAtomMapNumbers) {
503    // RDKit✔️✔️:         for (auto atom : tmol->atoms()) {
504    // RDKit✔️✔️:           atom->setAtomMapNum(atomMapNums[atom->getIdx()]);
505    // RDKit✔️✔️:         }
506    // RDKit✔️✔️:       }
507    //
508    // RDKit restores atom maps immediately after canonical traversal ranking
509    // and before FragmentSmilesConstruct(), whose doKekule branch calls
510    // KekulizeFragment(). That means canonical kekulization still ranks with
511    // the original atom maps even when ignoreAtomMapNumbers=true.
512    if working_params.canonical {
513        restore_atom_maps_after_canonical_smiles(&mut molecule, saved_atom_maps.as_deref());
514    }
515    if params.do_kekule {
516        molecule = kekulize_for_smiles(&molecule)?;
517    }
518    // do_kekule already handled on the working molecule; keep the rest of the
519    // writer on the post-kekulization topology without re-running that stage.
520    working_params.do_kekule = false;
521    for (plan, ranks) in fragment_plans.iter().zip(fragment_ranks.iter()) {
522        if working_params.canonical {
523            restore_atom_maps_after_canonical_smiles(&mut molecule, saved_atom_maps.as_deref());
524        }
525        fragment_results.push(write_fragment_smiles_with_ranks(
526            &mut molecule,
527            plan,
528            &ranks,
529            &working_params,
530            SmilesWriteOverrides::default(),
531            &mut context,
532        )?);
533        if working_params.canonical && saved_atom_maps.is_some() {
534            let _ = stash_and_clear_atom_maps_for_smiles(&mut molecule, &working_params);
535        }
536    }
537    if working_params.canonical {
538        restore_atom_maps_after_canonical_smiles(&mut molecule, saved_atom_maps.as_deref());
539    }
540
541    let mut result = assemble_fragment_smiles(fragment_results, &working_params, &mut context)?;
542    if let SmilesOutputMode::CxSmiles { fields, .. } = mode {
543        let scope = CxWriteScope {
544            atom_order: context.atom_output_order.clone(),
545            bond_order: context.bond_output_order.clone(),
546        };
547        let cx_extension = get_cx_extensions_scoped(&molecule, fields, &scope)?;
548        if !cx_extension.is_empty() {
549            result.push(' ');
550            result.push_str(&cx_extension);
551        }
552    }
553    Ok(result)
554}
555
556fn prepare_plain_smiles_molecule(
557    molecule: &mut Molecule,
558    params: &SmilesWriteParams,
559) -> Result<Option<Vec<Option<u32>>>, SmilesWriteError> {
560    // BEGIN RDKIT CPP FUNCTION SmilesWrite::detail::MolToSmiles fragment preparation section
561    // RDKit✔️✔️:     // update property cache
562    // RDKit✔️✔️:     std::vector<int> atomMapNums(tmol->getNumAtoms(), 0);
563    // RDKit✔️✔️:     for (auto atom : tmol->atoms()) {
564    // RDKit✔️✔️:       if (params.ignoreAtomMapNumbers) {
565    // RDKit✔️✔️:         atomMapNums[atom->getIdx()] = atom->getAtomMapNum();
566    // RDKit✔️✔️:         atom->setAtomMapNum(0);
567    // RDKit✔️✔️:       }
568    // RDKit✔️✔️:       atom->updatePropertyCache(false);
569    // RDKit✔️✔️:     }
570    // RDKit✔️✔️:
571    // RDKit✔️✔️:     // clean up the chirality on any atom that is marked as chiral,
572    // RDKit✔️✔️:     // but that should not be:
573    // RDKit✔️✔️:     if (params.doIsomericSmiles) {
574    // RDKit✔️✔️:       tmol->setProp(common_properties::_doIsoSmiles, 1);
575    // RDKit✔️✔️:
576    // RDKit✔️✔️:       if (!tmol->hasProp(common_properties::_StereochemDone)) {
577    // RDKit✔️✔️:         MolOps::assignStereochemistry(*tmol, params.cleanStereo);
578    // RDKit✔️✔️:       }
579    // RDKit✔️✔️:     }
580    // RDKit✔️✔️:     if (!doingCXSmiles || !includeStereoGroups) {
581    // RDKit✔️✔️:       std::vector<StereoGroup> noStereoGroups;
582    // RDKit✔️✔️:       tmol->setStereoGroups(noStereoGroups);
583    // RDKit✔️✔️:     }
584    // RDKit✔️✔️:     if (!doingCXSmiles) {
585    // RDKit✔️✔️:       for (auto bond : tmol->bonds()) {
586    // RDKit✔️✔️:         if (bond->getBondDir() == Bond::BondDir::UNKNOWN ||
587    // RDKit✔️✔️:             bond->getBondDir() == Bond::BondDir::EITHERDOUBLE) {
588    // RDKit✔️✔️:           bond->setBondDir(Bond::BondDir::NONE);
589    // RDKit✔️✔️:         }
590    // RDKit✔️✔️:         if (bond->getStereo() == Bond::BondStereo::STEREOANY) {
591    // RDKit✔️✔️:           bond->setStereo(Bond::BondStereo::STEREONONE);
592    // RDKit✔️✔️:         }
593    // RDKit✔️✔️:       }
594    // RDKit✔️✔️:     }
595    // RDKit✔️✔️:     if (doingCXSmiles || !params.includeDativeBonds) {
596    // RDKit✔️✔️:       for (auto bond : tmol->bonds()) {
597    // RDKit✔️✔️:         if (bond->getBondType() == Bond::DATIVE) {
598    // RDKit✔️✔️:           bond->setBondType(Bond::SINGLE);
599    // RDKit✔️✔️:           bond->getBeginAtom()->calcExplicitValence(false);
600    // RDKit✔️✔️:         }
601    // RDKit✔️✔️:       }
602    // RDKit✔️✔️:     }
603    // END RDKIT CPP FUNCTION SmilesWrite::detail::MolToSmiles fragment preparation section
604    let saved_atom_maps = stash_and_clear_atom_maps_for_smiles(molecule, params);
605    if is_minimal_plain_smiles_path(params) && validate_minimal_plain_smiles_molecule(molecule) {
606        return Ok(saved_atom_maps);
607    }
608    clear_fragment_temp_molecule_computed_stereo_props_for_writer(molecule);
609    update_property_cache_for_smiles(molecule)?;
610    if params.do_isomeric_smiles {
611        if molecule.prop("_StereochemDone").is_none() {
612            assign_stereochemistry_for_smiles(molecule, params.clean_stereo)?;
613        }
614    }
615    // Kekulization is handled upstream in mol_to_smiles_with_mode before
616    // this function is called.
617    if params.do_random {
618        // Random SMILES uses non-canonical traversal with randomized
619        // bond ordering at each atom. Continue through the standard
620        // preparation path; randomization happens in the fragment
621        // traversal step.
622    }
623    if !params.include_dative_bonds {
624        normalize_dative_bonds_for_plain_smiles(molecule)?;
625    }
626    if !params.do_isomeric_smiles {
627        // RDKit plain non-isomeric SMILES suppresses bond-direction output.
628        crate::notation::smiles::clear_all_bond_dir_flags(molecule);
629    }
630    remove_plain_smiles_only_cx_state(molecule)?;
631    Ok(saved_atom_maps)
632}
633
634fn prepare_cx_smiles_molecule(
635    molecule: &mut Molecule,
636    params: &mut SmilesWriteParams,
637    fields: CxSmilesFields,
638    restore_bond_dirs: RestoreBondDirOption,
639    include_stereo_groups: bool,
640) -> Result<Option<Vec<Option<u32>>>, SmilesWriteError> {
641    let saved_atom_maps = stash_and_clear_atom_maps_for_smiles(molecule, params);
642    // Kekulization is handled upstream in mol_to_smiles_with_mode.
643    if is_minimal_plain_smiles_path(params) && validate_minimal_plain_smiles_molecule(molecule) {
644        // CX still needs CX-specific cleanup below; the fast path only skips
645        // property/stereo preparation for the simplest typed molecule state.
646    } else {
647        clear_fragment_temp_molecule_computed_stereo_props_for_writer(molecule);
648        update_property_cache_for_smiles(molecule)?;
649        if params.do_isomeric_smiles {
650            if molecule.prop("_StereochemDone").is_none() {
651                assign_stereochemistry_for_smiles(molecule, params.clean_stereo)?;
652            }
653        }
654    }
655    normalize_dative_bonds_for_cx_smiles(molecule)?;
656    normalize_hydrogen_bonds_for_cx_smiles(molecule)?;
657    apply_cx_bond_direction_policy(molecule, restore_bond_dirs)?;
658    if params.clean_stereo {
659        if molecule.prop("_StereochemDone").is_none() {
660            assign_stereochemistry_for_smiles(molecule, true)?;
661        }
662        cleanup_stereo_groups_for_cx_smiles(molecule)?;
663    }
664    if include_stereo_groups {
665        canonicalize_enhanced_stereo_for_smiles(molecule)?;
666    }
667    validate_cx_extension_plan(fields)?;
668    Ok(saved_atom_maps)
669}
670
671fn stash_and_clear_atom_maps_for_smiles(
672    molecule: &mut Molecule,
673    params: &SmilesWriteParams,
674) -> Option<Vec<Option<u32>>> {
675    if !params.ignore_atom_map_numbers {
676        return None;
677    }
678    let topology = molecule.topology_block_mut();
679    let saved = topology
680        .atoms
681        .iter()
682        .map(|atom| atom.atom_map())
683        .collect::<Vec<_>>();
684    for atom in &mut topology.atoms {
685        atom.set_atom_map(None);
686    }
687    Some(saved)
688}
689
690fn restore_atom_maps_after_canonical_smiles(
691    molecule: &mut Molecule,
692    saved_atom_maps: Option<&[Option<u32>]>,
693) {
694    let Some(saved_atom_maps) = saved_atom_maps else {
695        return;
696    };
697    let topology = molecule.topology_block_mut();
698    for (atom, atom_map) in topology
699        .atoms
700        .iter_mut()
701        .zip(saved_atom_maps.iter().copied())
702    {
703        atom.set_atom_map(atom_map);
704    }
705}
706
707fn collect_fragment_write_plans(
708    molecule: &Molecule,
709    params: &SmilesWriteParams,
710) -> Result<Vec<FragmentWritePlan>, SmilesWriteError> {
711    // BEGIN RDKIT CPP FUNCTION SmilesWrite::detail::MolToSmiles fragment collection section
712    // RDKit✔️✔️:   int rootedAtAtom;
713    // RDKit✔️✔️:   std::vector<int> fragsRootedAtAtom;
714    // RDKit✔️✔️:   std::vector<std::vector<int>> fragsMolAtomMapping;
715    // RDKit✔️✔️:   auto mols =
716    // RDKit✔️✔️:       MolOps::getMolFrags(mol, false, nullptr, &fragsMolAtomMapping, false);
717    // RDKit✔️✔️:   // we got the mapping between fragments and atoms; repeat that for bonds
718    // RDKit✔️✔️:   std::vector<std::vector<int>> fragsMolBondMapping;
719    // RDKit✔️✔️:   boost::dynamic_bitset<> atsPresent(mol.getNumAtoms());
720    // RDKit✔️✔️:   std::vector<int> bondsInFrag;
721    // RDKit✔️✔️:   bondsInFrag.reserve(mol.getNumBonds());
722    // RDKit✔️✔️:   for (const auto &atsInFrag : fragsMolAtomMapping) {
723    // RDKit✔️✔️:     atsPresent.reset();
724    // RDKit✔️✔️:     bondsInFrag.clear();
725    // RDKit✔️✔️:     for (auto aidx : atsInFrag) {
726    // RDKit✔️✔️:       atsPresent.set(aidx);
727    // RDKit✔️✔️:     }
728    // RDKit✔️✔️:
729    // RDKit✔️✔️:     rootedAtAtom = -1;
730    // RDKit✔️✔️:     if (params.rootedAtAtom >= 0 && atsPresent[params.rootedAtAtom]) {
731    // RDKit✔️✔️:       rootedAtAtom = params.rootedAtAtom - atsPresent.find_first();
732    // RDKit✔️✔️:     }
733    // RDKit✔️✔️:     fragsRootedAtAtom.push_back(rootedAtAtom);
734    // RDKit✔️✔️:
735    // RDKit✔️✔️:     for (const auto bnd : mol.bonds()) {
736    // RDKit✔️✔️:       if (atsPresent[bnd->getBeginAtomIdx()] &&
737    // RDKit✔️✔️:           atsPresent[bnd->getEndAtomIdx()]) {
738    // RDKit✔️✔️:         bondsInFrag.push_back(bnd->getIdx());
739    // RDKit✔️✔️:       }
740    // RDKit✔️✔️:     }
741    // RDKit✔️✔️:     fragsMolBondMapping.push_back(bondsInFrag);
742    // RDKit✔️✔️:   }
743    // END RDKIT CPP FUNCTION SmilesWrite::detail::MolToSmiles fragment collection section
744    let atom_to_fragment = crate::notation::fragment::get_fragment_atom_mapping(molecule);
745    if atom_to_fragment.is_empty() {
746        return Ok(Vec::new());
747    }
748    let fragment_count = atom_to_fragment.iter().copied().max().unwrap_or(0) + 1;
749    let mut fragment_atoms = vec![Vec::new(); fragment_count];
750    for (atom_idx, fragment_idx) in atom_to_fragment.iter().copied().enumerate() {
751        fragment_atoms[fragment_idx].push(AtomId::new(atom_idx));
752    }
753    let mut fragment_bonds = vec![Vec::new(); fragment_count];
754    for bond in molecule.bonds() {
755        let begin_fragment = atom_to_fragment[bond.begin().index()];
756        let end_fragment = atom_to_fragment[bond.end().index()];
757        if begin_fragment == end_fragment {
758            fragment_bonds[begin_fragment].push(bond.id());
759        }
760    }
761    let mut plans = Vec::with_capacity(fragment_count);
762    for fragment_idx in 0..fragment_count {
763        let atoms = std::mem::take(&mut fragment_atoms[fragment_idx]);
764        let rooted_at_atom = params
765            .rooted_at_atom
766            .map(AtomId::new)
767            .filter(|root| atom_to_fragment[root.index()] == fragment_idx);
768        plans.push(FragmentWritePlan {
769            bonds: std::mem::take(&mut fragment_bonds[fragment_idx]),
770            atoms,
771            rooted_at_atom,
772        });
773    }
774    Ok(plans)
775}
776
777fn write_fragment_smiles(
778    molecule: &mut Molecule,
779    plan: &FragmentWritePlan,
780    params: &SmilesWriteParams,
781    mode: SmilesOutputMode,
782    overrides: SmilesWriteOverrides<'_>,
783    context: &mut SmilesWriteContext,
784) -> Result<FragmentWriteResult, SmilesWriteError> {
785    let ranks = rank_fragment_atoms_for_smiles(molecule, plan, params, mode)?;
786    write_fragment_smiles_with_ranks(molecule, plan, &ranks, params, overrides, context)
787}
788
789fn write_fragment_smiles_with_ranks(
790    molecule: &mut Molecule,
791    plan: &FragmentWritePlan,
792    ranks: &[usize],
793    params: &SmilesWriteParams,
794    overrides: SmilesWriteOverrides<'_>,
795    context: &mut SmilesWriteContext,
796) -> Result<FragmentWriteResult, SmilesWriteError> {
797    let start_atom = choose_fragment_start_atom(plan, &ranks, params)?;
798    fragment_smiles_construct(
799        molecule, plan, start_atom, &ranks, params, overrides, context,
800    )
801}
802
803fn fragment_smiles_construct(
804    molecule: &mut Molecule,
805    plan: &FragmentWritePlan,
806    start_atom: AtomId,
807    ranks: &[usize],
808    params: &SmilesWriteParams,
809    overrides: SmilesWriteOverrides<'_>,
810    context: &mut SmilesWriteContext,
811) -> Result<FragmentWriteResult, SmilesWriteError> {
812    // Full-molecule kekulization is handled before fragment planning through
813    // the registered operation pipeline.
814    if params.canonical && params.do_isomeric_smiles {
815        canonicalize_enhanced_stereo_for_smiles(molecule)?;
816    }
817    let traversal =
818        canonicalize_fragment_stack(molecule, plan, start_atom, ranks, params, overrides)?;
819    canonicalize_double_bond_directions_for_writer(
820        molecule,
821        &traversal.stack,
822        &traversal.traversal_ring_closure_bonds,
823    )?;
824    context.chiral_tag_overrides.extend(
825        traversal
826            .chiral_tag_overrides
827            .iter()
828            .map(|(atom, tag)| (*atom, *tag)),
829    );
830    context
831        .chiral_inversions
832        .extend(traversal.chiral_inversions.iter().copied());
833    context.chiral_permutations.extend(
834        traversal
835            .chiral_permutations
836            .iter()
837            .map(|(atom, permutation)| (*atom, *permutation)),
838    );
839    context
840        .broken_chiral_atoms
841        .extend(traversal.broken_chiral_atoms.iter().copied());
842    write_mol_stack(molecule, &traversal.stack, params, overrides, context)
843}
844
845fn rank_fragment_atoms_for_smiles(
846    molecule: &Molecule,
847    plan: &FragmentWritePlan,
848    params: &SmilesWriteParams,
849    mode: SmilesOutputMode,
850) -> Result<Vec<usize>, SmilesWriteError> {
851    // BEGIN RDKIT CPP FUNCTION SmilesWrite non-canonical rank initialization
852    // RDKit✔️✔️:     } else {
853    // RDKit✔️✔️:       std::iota(ranks.begin(), ranks.end(), 0);
854    // RDKit✔️✔️:     }
855    // RDKit✔️✔️:     for (unsigned int i = 0; i < tmol.getNumAtoms(); ++i) {
856    // RDKit✔️✔️:       ranks[i] = i;
857    // RDKit✔️✔️:     }
858    // END RDKIT CPP FUNCTION SmilesWrite non-canonical rank initialization
859    // RDKit canonical mode still computes canonical ranks when rootedAtAtom
860    // is provided; rootedAtAtom only overrides traversal start.
861    if params.canonical && !params.do_random {
862        return rank_mol_atoms_for_smiles(molecule, plan, params, mode);
863    }
864    let _ = molecule;
865    Ok(plan.atoms.iter().map(|atom| atom.index()).collect())
866}
867
868fn rank_mol_atoms_for_smiles(
869    molecule: &Molecule,
870    plan: &FragmentWritePlan,
871    params: &SmilesWriteParams,
872    mode: SmilesOutputMode,
873) -> Result<Vec<usize>, SmilesWriteError> {
874    let _stage = SmilesPlanStage::LongTermCanonicalRanking;
875    let _ = mode;
876    // BEGIN RDKIT CPP FUNCTION SmilesWrite::detail::MolToSmiles canonical rank options
877    // RDKit✔️✔️:       const bool includeChiralPresence = false;
878    // RDKit✔️✔️:       const bool includeIsotopes = params.doIsomericSmiles;
879    // RDKit✔️✔️:       ;
880    // RDKit✔️✔️:       const bool includeChirality = params.doIsomericSmiles;
881    // RDKit✔️✔️:       ;
882    // RDKit✔️✔️:       const bool includeStereoGroups = params.doIsomericSmiles;
883    // RDKit✔️✔️:       ;
884    // RDKit✔️✔️:       const bool useNonStereoRanks = false;
885    // RDKit✔️✔️:       const bool includeAtomMaps = true;
886    // RDKit✔️✔️:
887    // RDKit✔️✔️:       Canon::rankMolAtoms(*tmol, ranks, breakTies, includeChirality,
888    // RDKit✔️✔️:                           includeIsotopes, includeAtomMaps,
889    // RDKit✔️✔️:                           includeChiralPresence, includeStereoGroups,
890    // RDKit✔️✔️:                           useNonStereoRanks);
891    // END RDKIT CPP FUNCTION SmilesWrite::detail::MolToSmiles canonical rank options
892    let ranks = crate::canon_rank::rank_mol_atoms_with_options(
893        molecule,
894        crate::canon_rank::CanonicalRankOptions {
895            break_ties: true,
896            include_chirality: params.do_isomeric_smiles,
897            include_isotopes: params.do_isomeric_smiles,
898            include_atom_maps: true,
899            include_chiral_presence: false,
900            include_stereo_groups: params.do_isomeric_smiles,
901            use_non_stereo_ranks: false,
902            include_ring_stereo: params.do_isomeric_smiles,
903            chirality_rings_use_ring_stereo: true,
904        },
905    )?;
906    Ok(plan.atoms.iter().map(|atom| ranks[atom.index()]).collect())
907}
908
909fn choose_fragment_start_atom(
910    plan: &FragmentWritePlan,
911    ranks: &[usize],
912    params: &SmilesWriteParams,
913) -> Result<AtomId, SmilesWriteError> {
914    // BEGIN RDKIT CPP FUNCTION SmilesWrite::detail::MolToSmiles start atom selection section
915    // RDKit✔️✔️:     // find the next atom for a traverse
916    // RDKit✔️✔️:     if (params.doRandom && rootedAtAtom == -1) {
917    // RDKit✔️✔️:       rootedAtAtom = getRandomGenerator()() % tmol->getNumAtoms();
918    // RDKit✔️✔️:     }
919    // RDKit✔️✔️:     if (rootedAtAtom >= 0) {
920    // RDKit✔️✔️:       nextAtomIdx = rootedAtAtom;
921    // RDKit✔️✔️:       rootedAtAtom = -1;
922    // RDKit✔️✔️:     } else {
923    // RDKit✔️✔️:       unsigned int nextRank = nAtoms + 1;
924    // RDKit✔️✔️:       for (unsigned int i = 0; i < nAtoms; i++) {
925    // RDKit✔️✔️:         if (colors[i] == Canon::WHITE_NODE && ranks[i] < nextRank) {
926    // RDKit✔️✔️:           nextRank = ranks[i];
927    // RDKit✔️✔️:           nextAtomIdx = i;
928    // RDKit✔️✔️:         }
929    // RDKit✔️✔️:       }
930    // RDKit✔️✔️:     }
931    // RDKit✔️✔️:     CHECK_INVARIANT(nextAtomIdx >= 0, "no start atom found");
932    // END RDKIT CPP FUNCTION SmilesWrite::detail::MolToSmiles start atom selection section
933    if let Some(root) = plan.rooted_at_atom {
934        return Ok(root);
935    }
936    if params.do_random {
937        let idx = (next_random_smiles_u64() as usize) % plan.atoms.len();
938        return Ok(plan.atoms[idx]);
939    }
940    let (idx, _) = match ranks.iter().enumerate().min_by_key(|(_, rank)| **rank) {
941        Some(pair) => pair,
942        // [deferred] Empty ranks: this is a defensive guard for impossible
943        // state (fragment with no atoms). Should never fire in practice
944        // since choose_fragment_start_atom is only called on non-empty plans.
945        None => {
946            return invariant_stage_error(
947                SmilesPlanStage::ShortTermAtomWriter,
948                "choose_fragment_start_atom() called with empty canonical rank scope",
949            );
950        }
951    };
952    Ok(plan.atoms[idx])
953}
954
955fn canonicalize_fragment_stack(
956    molecule: &Molecule,
957    plan: &FragmentWritePlan,
958    start_atom: AtomId,
959    ranks: &[usize],
960    params: &SmilesWriteParams,
961    overrides: SmilesWriteOverrides<'_>,
962) -> Result<CanonicalTraversalResult, SmilesWriteError> {
963    // BEGIN RDKIT CPP FUNCTION Canon::canonicalizeFragment call site
964    // RDKit✔️✔️:     subSmi = SmilesWrite::FragmentSmilesConstruct(
965    // RDKit✔️✔️:         *tmol, nextAtomIdx, colors, ranks, params, atomOrdering, bondOrdering);
966    // RDKit✔️✔️: Canon::canonicalizeFragment(mol, atomIdx, colors, ranks, molStack,
967    // RDKit✔️✔️:                           atomsInPlay, bondsInPlay, bondSymbols,
968    // RDKit✔️✔️:                           params.doIsomericSmiles, params.doRandom);
969    // END RDKIT CPP FUNCTION Canon::canonicalizeFragment call site
970    canonical_dfs_traversal(
971        molecule,
972        plan,
973        start_atom,
974        ranks,
975        params.do_isomeric_smiles,
976        params.clean_stereo,
977        params.do_random,
978        overrides.bond_symbols,
979    )
980}
981
982fn write_mol_stack(
983    molecule: &Molecule,
984    stack: &[MolStackElem],
985    params: &SmilesWriteParams,
986    overrides: SmilesWriteOverrides<'_>,
987    context: &mut SmilesWriteContext,
988) -> Result<FragmentWriteResult, SmilesWriteError> {
989    // BEGIN RDKIT CPP FUNCTION FragmentSmilesConstruct MolStack emission section
990    // RDKit✔️✔️:   Bond *bond = nullptr;
991    // RDKit✔️✔️:   for (auto &mSE : molStack) {
992    // RDKit✔️✔️:     switch (mSE.type) {
993    // RDKit✔️✔️:       case Canon::MOL_STACK_ATOM:
994    // RDKit✔️✔️:         for (auto rclosure : ringClosuresToErase) {
995    // RDKit✔️✔️:           ringClosureMap.erase(rclosure);
996    // RDKit✔️✔️:         }
997    // RDKit✔️✔️:         ringClosuresToErase.clear();
998    // RDKit✔️✔️:         if (!atomSymbols) {
999    // RDKit✔️✔️:           res << GetAtomSmiles(mSE.obj.atom, params);
1000    // RDKit✔️✔️:         } else {
1001    // RDKit✔️✔️:           res << (*atomSymbols)[mSE.obj.atom->getIdx()];
1002    // RDKit✔️✔️:         }
1003    // RDKit✔️✔️:         atomOrdering.push_back(mSE.obj.atom->getIdx());
1004    // RDKit✔️✔️:         break;
1005    // RDKit✔️✔️:       case Canon::MOL_STACK_BOND:
1006    // RDKit✔️✔️:         bond = mSE.obj.bond;
1007    // RDKit✔️✔️:         if (!bondSymbols) {
1008    // RDKit✔️✔️:           res << GetBondSmiles(bond, params, mSE.number);
1009    // RDKit✔️✔️:         } else {
1010    // RDKit✔️✔️:           res << (*bondSymbols)[bond->getIdx()];
1011    // RDKit✔️✔️:         }
1012    // RDKit✔️✔️:         bondOrdering.push_back(bond->getIdx());
1013    // RDKit✔️✔️:         break;
1014    // RDKit✔️✔️:       case Canon::MOL_STACK_RING:
1015    // RDKit✔️✔️:       case Canon::MOL_STACK_BRANCH_OPEN:
1016    // RDKit✔️✔️:       case Canon::MOL_STACK_BRANCH_CLOSE:
1017    // RDKit✔️✔️:       default:
1018    // RDKit✔️✔️:         break;
1019    // RDKit✔️✔️:     }
1020    // RDKit✔️✔️:   }
1021    // RDKit✔️✔️:   return res.str();
1022    // END RDKIT CPP FUNCTION FragmentSmilesConstruct MolStack emission section
1023    let mut result = FragmentWriteResult::default();
1024    for item in stack {
1025        match *item {
1026            MolStackElem::Atom(atom) => {
1027                for ring_closure in context.ring_closures_to_erase.drain(..) {
1028                    context.ring_closure_digits.remove(&ring_closure);
1029                }
1030                if let Some(atom_symbols) = overrides.atom_symbols {
1031                    result.smiles.push_str(&atom_symbols[atom.index()]);
1032                } else {
1033                    result
1034                        .smiles
1035                        .push_str(&build_atom_smiles(molecule, atom, params, context)?);
1036                }
1037                result.atom_ordering.push(atom);
1038            }
1039            MolStackElem::Bond(bond, atom_to_left) => {
1040                if let Some(bond_symbols) = overrides.bond_symbols {
1041                    result.smiles.push_str(&bond_symbols[bond.index()]);
1042                } else {
1043                    result.smiles.push_str(&build_bond_smiles(
1044                        molecule,
1045                        bond,
1046                        atom_to_left,
1047                        params,
1048                    )?);
1049                }
1050                result.bond_ordering.push(bond);
1051            }
1052            MolStackElem::Ring { ring_idx, .. } => {
1053                write_ring_closure(&mut result.smiles, ring_idx, context)?;
1054            }
1055            MolStackElem::BranchOpen => {
1056                result.smiles.push('(');
1057            }
1058            MolStackElem::BranchClose => {
1059                result.smiles.push(')');
1060            }
1061        }
1062    }
1063    Ok(result)
1064}
1065
1066// BEGIN RDKIT CPP FUNCTION MolFragmentToSmiles
1067// RDKit✔️✔️: std::string MolFragmentToSmiles(const ROMol &mol,
1068// RDKit✔️✔️:                                 const SmilesWriteParams &params,
1069// RDKit✔️✔️:                                 const std::vector<int> &atomsToUse,
1070// RDKit✔️✔️:                                 const std::vector<int> *bondsToUse,
1071// RDKit✔️✔️:                                 const std::vector<std::string> *atomSymbols,
1072// RDKit✔️✔️:                                 const std::vector<std::string> *bondSymbols) {
1073// RDKit✔️✔️:   PRECONDITION(atomsToUse.size(), "no atoms provided");
1074// RDKit✔️✔️:   if (!mol.getNumAtoms()) { return ""; }
1075// RDKit✔️✔️:   int rootedAtAtom = params.rootedAtAtom;
1076// RDKit✔️✔️:   ROMol tmol(mol, true);  // copy molecule
1077// RDKit✔️✔️:   std::string res;
1078// RDKit✔️✔️:   // compute bondsInPlay from atomsToUse
1079// RDKit✔️✔️:   // then FragmentSmilesConstruct with atomSymbols/bondSymbols
1080// RDKit✔️✔️:   return res;
1081// RDKit✔️✔️: }
1082// END RDKIT CPP FUNCTION MolFragmentToSmiles
1083pub fn mol_fragment_to_smiles(
1084    molecule: &Molecule,
1085    params: &SmilesWriteParams,
1086    atoms_to_use: &[usize],
1087    bonds_to_use: Option<&[usize]>,
1088    atom_symbols: Option<&[String]>,
1089    bond_symbols: Option<&[String]>,
1090) -> Result<String, SmilesWriteError> {
1091    validate_fragment_api_inputs(
1092        molecule,
1093        params,
1094        atoms_to_use,
1095        bonds_to_use,
1096        atom_symbols,
1097        bond_symbols,
1098    )?;
1099    if molecule.num_atoms() == 0 || atoms_to_use.is_empty() {
1100        return Ok(String::new());
1101    }
1102
1103    // BEGIN RDKIT CPP FUNCTION MolFragmentToSmiles fragment bitset/output section
1104    // RDKit✔️✔️:   boost::dynamic_bitset<> atomsInPlay(mol.getNumAtoms(), 0);
1105    // RDKit✔️✔️:   for (auto aidx : atomsToUse) { atomsInPlay.set(aidx); }
1106    // RDKit✔️✔️:   boost::dynamic_bitset<> bondsInPlay(mol.getNumBonds(), 0);
1107    // RDKit✔️✔️:   if (bondsToUse) { for (auto bidx : *bondsToUse) { bondsInPlay.set(bidx); } }
1108    // RDKit✔️✔️:   else {
1109    // RDKit✔️✔️:     PRECONDITION(
1110    // RDKit✔️✔️:         params.rootedAtAtom < 0 || MolOps::getMolFrags(mol).size() == 1,
1111    // RDKit✔️✔️:         "rootedAtAtom can only be used with molecules that have a single fragment");
1112    // RDKit✔️✔️:     for (auto aidx : atomsToUse) { ... if (atomsInPlay[other]) bondsInPlay.set(...); }
1113    // RDKit✔️✔️:   }
1114    // RDKit✔️✔️:   while (colorIt != colors.end()) {
1115    // RDKit✔️✔️:     ... FragmentSmilesConstruct(..., &atomsInPlay, &bondsInPlay, atomSymbols, bondSymbols);
1116    // RDKit✔️✔️:     if (colorIt != colors.end()) { res += "."; }
1117    // RDKit✔️✔️:   }
1118    // END RDKIT CPP FUNCTION MolFragmentToSmiles fragment bitset/output section
1119    let mut molecule = if params.do_kekule {
1120        kekulize_for_smiles(molecule)?
1121    } else {
1122        molecule.clone()
1123    };
1124    let mut working_params = params.clone();
1125    working_params.do_kekule = false;
1126    let saved_atom_maps = prepare_plain_smiles_molecule(&mut molecule, &working_params)?;
1127
1128    let mut plans =
1129        collect_fragment_api_write_plans(&molecule, &working_params, atoms_to_use, bonds_to_use)?;
1130    if working_params.canonical {
1131        restore_atom_maps_after_canonical_smiles(&mut molecule, saved_atom_maps.as_deref());
1132        plans.sort_by_key(|plan| {
1133            plan.atoms
1134                .iter()
1135                .map(|atom| atom.index())
1136                .min()
1137                .unwrap_or(usize::MAX)
1138        });
1139    }
1140
1141    let overrides = SmilesWriteOverrides {
1142        atom_symbols,
1143        bond_symbols,
1144    };
1145    let mut context = SmilesWriteContext::default();
1146    let mut results = Vec::new();
1147    for plan in &plans {
1148        results.push(write_fragment_smiles(
1149            &mut molecule,
1150            plan,
1151            &working_params,
1152            SmilesOutputMode::PlainSmiles,
1153            overrides,
1154            &mut context,
1155        )?);
1156    }
1157    assemble_fragment_smiles(results, &working_params, &mut context)
1158}
1159
1160pub fn mol_fragment_to_cx_smiles(
1161    molecule: &Molecule,
1162    params: &SmilesWriteParams,
1163    atoms_to_use: &[usize],
1164    bonds_to_use: Option<&[usize]>,
1165    atom_symbols: Option<&[String]>,
1166    bond_symbols: Option<&[String]>,
1167    fields: CxSmilesFields,
1168) -> Result<String, SmilesWriteError> {
1169    validate_fragment_api_inputs(
1170        molecule,
1171        params,
1172        atoms_to_use,
1173        bonds_to_use,
1174        atom_symbols,
1175        bond_symbols,
1176    )?;
1177    let mut context = SmilesWriteContext::default();
1178    let smiles = mol_fragment_to_smiles_with_context(
1179        molecule,
1180        params,
1181        atoms_to_use,
1182        bonds_to_use,
1183        atom_symbols,
1184        bond_symbols,
1185        &mut context,
1186    )?;
1187    let scope = CxWriteScope {
1188        atom_order: context.atom_output_order,
1189        bond_order: context.bond_output_order,
1190    };
1191    let cx_extension = get_cx_extensions_scoped(molecule, fields, &scope)?;
1192    if cx_extension.is_empty() {
1193        Ok(smiles)
1194    } else {
1195        Ok(format!("{smiles} {cx_extension}"))
1196    }
1197}
1198
1199fn mol_fragment_to_smiles_with_context(
1200    molecule: &Molecule,
1201    params: &SmilesWriteParams,
1202    atoms_to_use: &[usize],
1203    bonds_to_use: Option<&[usize]>,
1204    atom_symbols: Option<&[String]>,
1205    bond_symbols: Option<&[String]>,
1206    context: &mut SmilesWriteContext,
1207) -> Result<String, SmilesWriteError> {
1208    if molecule.num_atoms() == 0 || atoms_to_use.is_empty() {
1209        return Ok(String::new());
1210    }
1211
1212    let mut molecule = if params.do_kekule {
1213        kekulize_for_smiles(molecule)?
1214    } else {
1215        molecule.clone()
1216    };
1217    let mut working_params = params.clone();
1218    working_params.do_kekule = false;
1219    let saved_atom_maps = prepare_plain_smiles_molecule(&mut molecule, &working_params)?;
1220
1221    let mut plans =
1222        collect_fragment_api_write_plans(&molecule, &working_params, atoms_to_use, bonds_to_use)?;
1223    if working_params.canonical {
1224        restore_atom_maps_after_canonical_smiles(&mut molecule, saved_atom_maps.as_deref());
1225        plans.sort_by_key(|plan| {
1226            plan.atoms
1227                .iter()
1228                .map(|atom| atom.index())
1229                .min()
1230                .unwrap_or(usize::MAX)
1231        });
1232    }
1233
1234    let overrides = SmilesWriteOverrides {
1235        atom_symbols,
1236        bond_symbols,
1237    };
1238    let mut results = Vec::new();
1239    for plan in &plans {
1240        results.push(write_fragment_smiles(
1241            &mut molecule,
1242            plan,
1243            &working_params,
1244            SmilesOutputMode::PlainSmiles,
1245            overrides,
1246            context,
1247        )?);
1248    }
1249    assemble_fragment_smiles(results, &working_params, context)
1250}
1251
1252fn collect_fragment_api_write_plans(
1253    molecule: &Molecule,
1254    params: &SmilesWriteParams,
1255    atoms_to_use: &[usize],
1256    bonds_to_use: Option<&[usize]>,
1257) -> Result<Vec<FragmentWritePlan>, SmilesWriteError> {
1258    let atom_set = atoms_to_use.iter().copied().collect::<BTreeSet<_>>();
1259    let bond_set = if let Some(bonds_to_use) = bonds_to_use {
1260        bonds_to_use.iter().copied().collect::<BTreeSet<_>>()
1261    } else {
1262        molecule
1263            .bonds()
1264            .iter()
1265            .filter(|bond| {
1266                atom_set.contains(&bond.begin().index()) && atom_set.contains(&bond.end().index())
1267            })
1268            .map(|bond| bond.id().index())
1269            .collect::<BTreeSet<_>>()
1270    };
1271
1272    let mut seen = BTreeSet::new();
1273    let mut plans = Vec::new();
1274    for &start in atoms_to_use {
1275        if seen.contains(&start) {
1276            continue;
1277        }
1278        let mut stack = vec![AtomId::new(start)];
1279        let mut atoms = Vec::new();
1280        let mut bonds = BTreeSet::new();
1281        while let Some(atom) = stack.pop() {
1282            if !seen.insert(atom.index()) {
1283                continue;
1284            }
1285            atoms.push(atom);
1286            for bond in molecule.bonds() {
1287                if !bond_set.contains(&bond.id().index()) {
1288                    continue;
1289                }
1290                let Some(other) = bond_other_atom(bond, atom) else {
1291                    continue;
1292                };
1293                if !atom_set.contains(&other.index()) {
1294                    continue;
1295                }
1296                bonds.insert(bond.id());
1297                if !seen.contains(&other.index()) {
1298                    stack.push(other);
1299                }
1300            }
1301        }
1302        atoms.sort_by_key(|atom| atom.index());
1303        let bonds = bonds.into_iter().collect::<Vec<_>>();
1304        let rooted_at_atom = params
1305            .rooted_at_atom
1306            .map(AtomId::new)
1307            .filter(|root| atoms.contains(root));
1308        plans.push(FragmentWritePlan {
1309            atoms,
1310            bonds,
1311            rooted_at_atom,
1312        });
1313    }
1314    Ok(plans)
1315}
1316
1317pub fn get_atom_smiles(
1318    molecule: &Molecule,
1319    atom: usize,
1320    params: &SmilesWriteParams,
1321) -> Result<String, SmilesWriteError> {
1322    validate_atom_index(molecule, atom)?;
1323    get_atom_smiles_impl(
1324        molecule,
1325        AtomId::new(atom),
1326        params,
1327        None,
1328        false,
1329        None,
1330        false,
1331    )
1332}
1333
1334fn get_atom_smiles_with_context(
1335    molecule: &Molecule,
1336    atom: AtomId,
1337    params: &SmilesWriteParams,
1338    context: &SmilesWriteContext,
1339) -> Result<String, SmilesWriteError> {
1340    get_atom_smiles_impl(
1341        molecule,
1342        atom,
1343        params,
1344        context.chiral_tag_overrides.get(&atom).copied(),
1345        context.chiral_inversions.contains(&atom),
1346        context.chiral_permutations.get(&atom).copied(),
1347        context.broken_chiral_atoms.contains(&atom),
1348    )
1349}
1350
1351fn get_atom_smiles_impl(
1352    molecule: &Molecule,
1353    atom_id: AtomId,
1354    params: &SmilesWriteParams,
1355    chiral_tag_override: Option<ChiralTag>,
1356    invert_chirality: bool,
1357    chiral_permutation_override: Option<u32>,
1358    broken_chirality: bool,
1359) -> Result<String, SmilesWriteError> {
1360    // BEGIN RDKIT CPP FUNCTION GetAtomSmiles
1361    // RDKit✔️✔️: std::string GetAtomSmiles(const Atom *atom, const SmilesWriteParams &params) {
1362    // RDKit✔️✔️:   PRECONDITION(atom, "bad atom");
1363    // RDKit✔️✔️:   std::string res;
1364    // RDKit✔️✔️:   int fc = atom->getFormalCharge();
1365    // RDKit✔️✔️:   int num = atom->getAtomicNum();
1366    // RDKit✔️✔️:   int isotope = atom->getIsotope();
1367    // RDKit✔️✔️:
1368    // RDKit✔️✔️:   std::string symb;
1369    // RDKit✔️✔️:   bool hasCustomSymbol =
1370    // RDKit✔️✔️:       atom->getPropIfPresent(common_properties::smilesSymbol, symb);
1371    // RDKit✔️✔️:   if (!hasCustomSymbol) {
1372    // RDKit✔️✔️:     symb = PeriodicTable::getTable()->getElementSymbol(num);
1373    // RDKit✔️✔️:   }
1374    // RDKit✔️✔️:
1375    // RDKit✔️✔️:   // check for atomic stereochemistry
1376    // RDKit✔️✔️:   std::string atString;
1377    // RDKit✔️✔️:   if (params.doIsomericSmiles) {
1378    // RDKit✔️✔️:     if (atom->getChiralTag() != Atom::CHI_UNSPECIFIED &&
1379    // RDKit✔️✔️:         !atom->hasProp(common_properties::_brokenChirality)) {
1380    // RDKit✔️✔️:       atString = getAtomChiralityInfo(atom);
1381    // RDKit✔️✔️:     }
1382    // RDKit✔️✔️:   }
1383    // RDKit✔️✔️:   bool needsBracket = true;
1384    // RDKit✔️✔️:   if (!hasCustomSymbol && !params.allHsExplicit) {
1385    // RDKit✔️✔️:     needsBracket = atomNeedsBracket(atom, atString, params);
1386    // RDKit✔️✔️:   }
1387    // RDKit✔️✔️:   if (needsBracket) {
1388    // RDKit✔️✔️:     res += "[";
1389    // RDKit✔️✔️:   }
1390    // RDKit✔️✔️:
1391    // RDKit✔️✔️:   if (isotope && params.doIsomericSmiles) {
1392    // RDKit✔️✔️:     res += std::to_string(isotope);
1393    // RDKit✔️✔️:   }
1394    // RDKit✔️✔️:   if (!params.doKekule && atom->getIsAromatic() && symb[0] >= 'A' &&
1395    // RDKit✔️✔️:       symb[0] <= 'Z') {
1396    // RDKit✔️✔️:     symb[0] = tolower(symb[0]);
1397    // RDKit✔️✔️:   }
1398    // RDKit✔️✔️:   res += symb;
1399    // RDKit✔️✔️:   res += atString;
1400    // RDKit✔️✔️:   if (needsBracket) {
1401    // RDKit✔️✔️:     unsigned int totNumHs = atom->getTotalNumHs();
1402    // RDKit✔️✔️:     if (totNumHs > 0) {
1403    // RDKit✔️✔️:       res += "H";
1404    // RDKit✔️✔️:       if (totNumHs > 1) {
1405    // RDKit✔️✔️:         res += std::to_string(totNumHs);
1406    // RDKit✔️✔️:       }
1407    // RDKit✔️✔️:     }
1408    // RDKit✔️✔️:     if (fc > 0) {
1409    // RDKit✔️✔️:       res += "+";
1410    // RDKit✔️✔️:       if (fc > 1) {
1411    // RDKit✔️✔️:         res += std::to_string(fc);
1412    // RDKit✔️✔️:       }
1413    // RDKit✔️✔️:     } else if (fc < 0) {
1414    // RDKit✔️✔️:       if (fc < -1) {
1415    // RDKit✔️✔️:         res += std::to_string(fc);
1416    // RDKit✔️✔️:       } else {
1417    // RDKit✔️✔️:         res += "-";
1418    // RDKit✔️✔️:       }
1419    // RDKit✔️✔️:     }
1420    // RDKit✔️✔️:     int mapNum;
1421    // RDKit✔️✔️:     if (atom->getPropIfPresent(common_properties::molAtomMapNumber, mapNum)) {
1422    // RDKit✔️✔️:       res += ":";
1423    // RDKit✔️✔️:       res += std::to_string(mapNum);
1424    // RDKit✔️✔️:     }
1425    // RDKit✔️✔️:     res += "]";
1426    // RDKit✔️✔️:   }
1427    // RDKit✔️✔️:   std::string label;
1428    // RDKit✔️✔️:   if (atom->getPropIfPresent(common_properties::_supplementalSmilesLabel,
1429    // RDKit✔️✔️:                              label)) {
1430    // RDKit✔️✔️:     res += label;
1431    // RDKit✔️✔️:   }
1432    // RDKit✔️✔️:   return res;
1433    // RDKit✔️✔️: }
1434    // END RDKIT CPP FUNCTION GetAtomSmiles
1435    let chirality = if params.do_isomeric_smiles && !broken_chirality {
1436        get_atom_chirality_info_with_inversion(
1437            molecule,
1438            atom_id,
1439            chiral_tag_override,
1440            invert_chirality,
1441            chiral_permutation_override,
1442        )?
1443    } else {
1444        String::new()
1445    };
1446    let atom = &molecule.atoms()[atom_id.index()];
1447    let custom_symbol = atom.prop("smilesSymbol");
1448    let has_custom_symbol = custom_symbol.is_some();
1449    let needs_bracket = if has_custom_symbol || params.all_hydrogens_explicit {
1450        true
1451    } else {
1452        atom_needs_bracket(molecule, atom_id, &chirality, params)?
1453    };
1454    let raw_symbol = custom_symbol.unwrap_or(element_symbol(atom.atomic_number())?);
1455    let lowered_symbol;
1456    let symbol: &str = if !params.do_kekule
1457        && atom.is_aromatic()
1458        && raw_symbol
1459            .as_bytes()
1460            .first()
1461            .is_some_and(u8::is_ascii_uppercase)
1462    {
1463        let should_lower = matches!(
1464            atom.atomic_number(),
1465            5 | 6 | 7 | 8 | 14 | 15 | 16 | 33 | 34 | 52
1466        );
1467        if should_lower {
1468            let mut owned = String::with_capacity(raw_symbol.len());
1469            let mut chars = raw_symbol.chars();
1470            if let Some(first) = chars.next() {
1471                owned.extend(first.to_lowercase());
1472            }
1473            owned.push_str(chars.as_str());
1474            lowered_symbol = owned;
1475            &lowered_symbol
1476        } else {
1477            raw_symbol
1478        }
1479    } else {
1480        raw_symbol
1481    };
1482    let mut result = String::new();
1483    if needs_bracket {
1484        result.push('[');
1485    }
1486    if let Some(isotope) = atom.isotope()
1487        && params.do_isomeric_smiles
1488    {
1489        result.push_str(&isotope.to_string());
1490    }
1491    result.push_str(symbol);
1492    result.push_str(&chirality);
1493    if needs_bracket {
1494        let total_num_hs = total_num_hydrogens_for_writer(molecule, atom_id);
1495        if total_num_hs > 0 {
1496            result.push('H');
1497            if total_num_hs > 1 {
1498                result.push_str(&total_num_hs.to_string());
1499            }
1500        }
1501        if atom.formal_charge() > 0 {
1502            result.push('+');
1503            if atom.formal_charge() > 1 {
1504                result.push_str(&atom.formal_charge().to_string());
1505            }
1506        } else if atom.formal_charge() < 0 {
1507            if atom.formal_charge() < -1 {
1508                result.push_str(&atom.formal_charge().to_string());
1509            } else {
1510                result.push('-');
1511            }
1512        }
1513        if let Some(atom_map) = atom.atom_map() {
1514            result.push(':');
1515            result.push_str(&atom_map.to_string());
1516        }
1517        result.push(']');
1518    }
1519    if let Some(label) = atom.prop("_supplementalSmilesLabel") {
1520        result.push_str(label);
1521    }
1522    Ok(result)
1523}
1524
1525fn build_atom_smiles(
1526    molecule: &Molecule,
1527    atom_id: AtomId,
1528    params: &SmilesWriteParams,
1529    context: &SmilesWriteContext,
1530) -> Result<String, SmilesWriteError> {
1531    get_atom_smiles_with_context(molecule, atom_id, params, context)
1532}
1533
1534pub fn get_bond_smiles(_bond_order: BondOrder) -> Result<&'static str, SmilesWriteError> {
1535    // RDKit✔️✔️: default: res = "~";
1536    match _bond_order {
1537        BondOrder::Single => Ok(""),
1538        BondOrder::Double => Ok("="),
1539        BondOrder::Triple => Ok("#"),
1540        BondOrder::Quadruple => Ok("$"),
1541        BondOrder::Dative => Ok("->"),
1542        _ => Ok("~"),
1543    }
1544}
1545
1546pub fn get_molecule_bond_smiles(
1547    molecule: &Molecule,
1548    bond: usize,
1549    atom_to_left: Option<usize>,
1550    params: &SmilesWriteParams,
1551) -> Result<String, SmilesWriteError> {
1552    // BEGIN RDKIT CPP FUNCTION GetBondSmiles
1553    // RDKit✔️✔️: std::string GetBondSmiles(const Bond *bond, const SmilesWriteParams &params,
1554    // RDKit✔️✔️:                           int atomToLeftIdx) {
1555    // RDKit✔️✔️:   PRECONDITION(bond, "bad bond");
1556    // RDKit✔️✔️:   if (atomToLeftIdx < 0) {
1557    // RDKit✔️✔️:     atomToLeftIdx = bond->getBeginAtomIdx();
1558    // RDKit✔️✔️:   }
1559    // RDKit✔️✔️:   std::string res = "";
1560    // RDKit✔️✔️:   bool aromatic = false;
1561    // RDKit✔️✔️:   if (!params.doKekule && (bond->getBondType() == Bond::SINGLE ||
1562    // RDKit✔️✔️:                            bond->getBondType() == Bond::DOUBLE ||
1563    // RDKit✔️✔️:                            bond->getBondType() == Bond::AROMATIC)) {
1564    // RDKit✔️✔️:     aromatic = true;
1565    // RDKit✔️✔️:   }
1566    // RDKit✔️✔️:   Bond::BondDir dir = bond->getBondDir();
1567    // RDKit✔️✔️:   bond->clearProp(common_properties::_TraversalRingClosureBond);
1568    // RDKit✔️✔️:   switch (bond->getBondType()) {
1569    // RDKit✔️✔️:     case Bond::SINGLE:
1570    // RDKit✔️✔️:       if (dir != Bond::NONE && dir != Bond::UNKNOWN) {
1571    // RDKit✔️✔️:       } else {
1572    // RDKit✔️✔️:         if (params.allBondsExplicit) {
1573    // RDKit✔️✔️:           res = "-";
1574    // RDKit✔️✔️:         } else if (aromatic && !bond->getIsAromatic()) {
1575    // RDKit✔️✔️:           res = "-";
1576    // RDKit✔️✔️:         }
1577    // RDKit✔️✔️:       }
1578    // RDKit✔️✔️:       break;
1579    // RDKit✔️✔️:     case Bond::DOUBLE:
1580    // RDKit✔️✔️:       if (!aromatic || !bond->getIsAromatic() || params.allBondsExplicit) {
1581    // RDKit✔️✔️:         res = "=";
1582    // RDKit✔️✔️:       }
1583    // RDKit✔️✔️:       break;
1584    // RDKit✔️✔️:     case Bond::TRIPLE:
1585    // RDKit✔️✔️:       res = "#";
1586    // RDKit✔️✔️:       break;
1587    // RDKit✔️✔️:     case Bond::QUADRUPLE:
1588    // RDKit✔️✔️:       res = "$";
1589    // RDKit✔️✔️:       break;
1590    // RDKit✔️✔️:     case Bond::AROMATIC:
1591    // RDKit✔️✔️:       if (params.allBondsExplicit) {
1592    // RDKit✔️✔️:         res = ":";
1593    // RDKit✔️✔️:       }
1594    // RDKit✔️✔️:       break;
1595    // RDKit✔️✔️:     case Bond::DATIVE:
1596    // RDKit✔️✔️:       if (atomToLeftIdx >= 0 &&
1597    // RDKit✔️✔️:           bond->getBeginAtomIdx() == static_cast<unsigned int>(atomToLeftIdx)) {
1598    // RDKit✔️✔️:         res = "->";
1599    // RDKit✔️✔️:       } else {
1600    // RDKit✔️✔️:         res = "<-";
1601    // RDKit✔️✔️:       }
1602    // RDKit✔️✔️:       break;
1603    // RDKit✔️✔️:     default:
1604    // RDKit✔️✔️:       res = "~";
1605    // RDKit✔️✔️:   }
1606    // RDKit✔️✔️:   return res;
1607    // RDKit✔️✔️: }
1608    // END RDKIT CPP FUNCTION GetBondSmiles
1609    validate_bond_index(molecule, bond)?;
1610    if let Some(atom) = atom_to_left {
1611        validate_atom_index(molecule, atom)?;
1612    }
1613    let bond = &molecule.bonds()[bond];
1614    let atom_to_left = atom_to_left.unwrap_or_else(|| bond.begin().index());
1615    let aromatic_context = if !params.do_kekule
1616        && matches!(
1617            bond.order(),
1618            BondOrder::Single | BondOrder::Double | BondOrder::Aromatic
1619        ) {
1620        let left = &molecule.atoms()[atom_to_left];
1621        let other_id = bond_other_atom(bond, AtomId::new(atom_to_left)).ok_or(
1622            SmilesWriteError::BondOutOfRange {
1623                bond: bond.id().index(),
1624            },
1625        )?;
1626        let other = &molecule.atoms()[other_id.index()];
1627        left.is_aromatic()
1628            && other.is_aromatic()
1629            && (left.atomic_number() != 0 || other.atomic_number() != 0)
1630    } else {
1631        false
1632    };
1633    match bond.order() {
1634        // RDKit✔️✔️: case Bond::SINGLE:
1635        // RDKit✔️✔️:   if (dir != Bond::NONE && dir != Bond::UNKNOWN) {
1636        // RDKit✔️✔️:     if (dir == Bond::BEGINWEDGE || dir == Bond::BEGINDASH ||
1637        // RDKit✔️✔️:         dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT) {
1638        // RDKit✔️✔️:       res = dirSymbol(dir, atomToLeftIdx);
1639        // RDKit✔️✔️:     }
1640        // RDKit✔️✔️:   } else if (params.allBondsExplicit) { res = "-"; }
1641        BondOrder::Single => {
1642            if !matches!(
1643                bond.direction(),
1644                BondDirection::None | BondDirection::Unknown
1645            ) {
1646                match bond.direction() {
1647                    BondDirection::EndDownRight => {
1648                        if params.all_bonds_explicit || params.do_isomeric_smiles {
1649                            Ok("\\".to_string())
1650                        } else {
1651                            Ok(String::new())
1652                        }
1653                    }
1654                    BondDirection::EndUpRight => {
1655                        if params.all_bonds_explicit || params.do_isomeric_smiles {
1656                            Ok("/".to_string())
1657                        } else {
1658                            Ok(String::new())
1659                        }
1660                    }
1661                    _ => {
1662                        if params.all_bonds_explicit {
1663                            Ok("-".to_string())
1664                        } else {
1665                            Ok(String::new())
1666                        }
1667                    }
1668                }
1669            } else if params.all_bonds_explicit || (aromatic_context && !bond.is_aromatic()) {
1670                Ok("-".to_string())
1671            } else {
1672                Ok(String::new())
1673            }
1674        }
1675        // RDKit✔️✔️: case Bond::DOUBLE:
1676        // RDKit✔️✔️:   if (!aromatic || !bond->getIsAromatic() || params.allBondsExplicit) {
1677        // RDKit✔️✔️:     res = "=";
1678        // RDKit✔️✔️:   }
1679        BondOrder::Double => {
1680            if !aromatic_context || !bond.is_aromatic() || params.all_bonds_explicit {
1681                Ok("=".to_string())
1682            } else {
1683                Ok(String::new())
1684            }
1685        }
1686        // RDKit✔️✔️: case Bond::TRIPLE: res = "#"; break;
1687        BondOrder::Triple => Ok("#".to_string()),
1688        // RDKit✔️✔️: case Bond::QUADRUPLE: res = "$"; break;
1689        BondOrder::Quadruple => Ok("$".to_string()),
1690        // RDKit✔️✔️: case Bond::AROMATIC:
1691        // RDKit✔️✔️:   if (params.allBondsExplicit) { res = ":"; }
1692        // RDKit✔️✔️:   break;
1693        BondOrder::Aromatic => {
1694            if !matches!(
1695                bond.direction(),
1696                BondDirection::None | BondDirection::Unknown
1697            ) {
1698                match bond.direction() {
1699                    BondDirection::EndDownRight => {
1700                        if params.all_bonds_explicit || params.do_isomeric_smiles {
1701                            Ok("\\".to_string())
1702                        } else {
1703                            Ok(String::new())
1704                        }
1705                    }
1706                    BondDirection::EndUpRight => {
1707                        if params.all_bonds_explicit || params.do_isomeric_smiles {
1708                            Ok("/".to_string())
1709                        } else {
1710                            Ok(String::new())
1711                        }
1712                    }
1713                    _ => {
1714                        if params.all_bonds_explicit || !aromatic_context {
1715                            Ok(":".to_string())
1716                        } else {
1717                            Ok(String::new())
1718                        }
1719                    }
1720                }
1721            } else if params.all_bonds_explicit || !aromatic_context {
1722                Ok(":".to_string())
1723            } else {
1724                Ok(String::new())
1725            }
1726        }
1727        // RDKit✔️✔️: case Bond::DATIVE:
1728        BondOrder::Dative => {
1729            if bond.begin().index() == atom_to_left {
1730                Ok("->".to_string())
1731            } else {
1732                Ok("<-".to_string())
1733            }
1734        }
1735        // RDKit✔️✔️: default: res = "~";
1736        _ => Ok("~".to_string()),
1737    }
1738}
1739
1740fn build_bond_smiles(
1741    molecule: &Molecule,
1742    bond: BondId,
1743    atom_to_left: AtomId,
1744    params: &SmilesWriteParams,
1745) -> Result<String, SmilesWriteError> {
1746    get_molecule_bond_smiles(molecule, bond.index(), Some(atom_to_left.index()), params)
1747}
1748
1749fn total_num_hydrogens_for_writer(molecule: &Molecule, atom_id: AtomId) -> u32 {
1750    let explicit = u32::from(molecule.atoms()[atom_id.index()].explicit_hydrogens());
1751    let implicit = molecule
1752        .derived_cache()
1753        .valence
1754        .as_ref()
1755        .and_then(|valence| valence.implicit_hydrogens.get(atom_id.index()))
1756        .copied()
1757        .unwrap_or(0)
1758        .max(0) as u32;
1759    explicit + implicit
1760}
1761
1762fn total_valence_for_writer(molecule: &Molecule, atom_id: AtomId) -> Option<i32> {
1763    molecule.derived_cache().valence.as_ref().map(|valence| {
1764        valence.explicit_valence[atom_id.index()] + valence.implicit_hydrogens[atom_id.index()]
1765    })
1766}
1767
1768#[must_use]
1769pub fn in_organic_subset(_atomic_number: u8) -> Result<bool, SmilesWriteError> {
1770    // BEGIN RDKIT CPP FUNCTION inOrganicSubset
1771    // RDKit✔️✔️: const int atomicSmiles[] = {0, 5, 6, 7, 8, 9, 15, 16, 17, 35, 53, -1};
1772    // RDKit✔️✔️: bool inOrganicSubset(int atomicNumber) {
1773    // RDKit✔️✔️:   unsigned int idx = 0;
1774    // RDKit✔️✔️:   while (atomicSmiles[idx] < atomicNumber && atomicSmiles[idx] != -1) {
1775    // RDKit✔️✔️:     ++idx;
1776    // RDKit✔️✔️:   }
1777    // RDKit✔️✔️:   return atomicSmiles[idx] == atomicNumber;
1778    // RDKit✔️✔️: }
1779    // END RDKIT CPP FUNCTION inOrganicSubset
1780    Ok(matches!(
1781        _atomic_number,
1782        0 | 5 | 6 | 7 | 8 | 9 | 15 | 16 | 17 | 35 | 53
1783    ))
1784}
1785
1786fn write_ring_closure(
1787    smiles: &mut String,
1788    ring_idx: usize,
1789    context: &mut SmilesWriteContext,
1790) -> Result<(), SmilesWriteError> {
1791    if let Some(digit) = context.ring_closure_digits.get(&ring_idx).copied() {
1792        write_ring_index(smiles, digit);
1793        context.ring_closures_to_erase.push(ring_idx);
1794        return Ok(());
1795    }
1796
1797    let digit = match (1..).find(|candidate| {
1798        !context
1799            .ring_closure_digits
1800            .values()
1801            .any(|digit| digit == candidate)
1802    }) {
1803        Some(d) => d,
1804        // [deferred] Ring closure digit exhaustion. The candidate search
1805        // (1..) finds the first unused digit; None only occurs if every
1806        // positive integer is in use, which requires >2^63 concurrent ring
1807        // closures — an impossible real-world molecule. This is a defensive
1808        // guard for extreme edge cases.
1809        None => {
1810            return invariant_stage_error(
1811                SmilesPlanStage::ShortTermBondWriter,
1812                "write_ring_closure() could not allocate a free ring index",
1813            );
1814        }
1815    };
1816    context.ring_closure_digits.insert(ring_idx, digit);
1817    write_ring_index(smiles, digit);
1818    Ok(())
1819}
1820
1821fn write_ring_index(smiles: &mut String, digit: usize) {
1822    if digit < 10 {
1823        smiles.push(char::from(b'0' + digit as u8));
1824    } else if digit < 100 {
1825        smiles.push('%');
1826        smiles.push_str(&digit.to_string());
1827    } else {
1828        smiles.push_str("%(");
1829        smiles.push_str(&digit.to_string());
1830        smiles.push(')');
1831    }
1832}
1833
1834fn bond_other_atom(bond: &Bond, atom: AtomId) -> Option<AtomId> {
1835    if bond.begin() == atom {
1836        Some(bond.end())
1837    } else if bond.end() == atom {
1838        Some(bond.begin())
1839    } else {
1840        None
1841    }
1842}
1843
1844fn element_symbol(atomic_number: u8) -> Result<&'static str, SmilesWriteError> {
1845    match atomic_number {
1846        0 => Ok("*"),
1847        1 => Ok("H"),
1848        2 => Ok("He"),
1849        3 => Ok("Li"),
1850        4 => Ok("Be"),
1851        5 => Ok("B"),
1852        6 => Ok("C"),
1853        7 => Ok("N"),
1854        8 => Ok("O"),
1855        9 => Ok("F"),
1856        10 => Ok("Ne"),
1857        11 => Ok("Na"),
1858        12 => Ok("Mg"),
1859        13 => Ok("Al"),
1860        14 => Ok("Si"),
1861        15 => Ok("P"),
1862        16 => Ok("S"),
1863        17 => Ok("Cl"),
1864        18 => Ok("Ar"),
1865        19 => Ok("K"),
1866        20 => Ok("Ca"),
1867        21 => Ok("Sc"),
1868        22 => Ok("Ti"),
1869        23 => Ok("V"),
1870        24 => Ok("Cr"),
1871        25 => Ok("Mn"),
1872        26 => Ok("Fe"),
1873        27 => Ok("Co"),
1874        28 => Ok("Ni"),
1875        29 => Ok("Cu"),
1876        30 => Ok("Zn"),
1877        31 => Ok("Ga"),
1878        32 => Ok("Ge"),
1879        33 => Ok("As"),
1880        34 => Ok("Se"),
1881        35 => Ok("Br"),
1882        36 => Ok("Kr"),
1883        37 => Ok("Rb"),
1884        38 => Ok("Sr"),
1885        39 => Ok("Y"),
1886        40 => Ok("Zr"),
1887        41 => Ok("Nb"),
1888        42 => Ok("Mo"),
1889        43 => Ok("Tc"),
1890        44 => Ok("Ru"),
1891        45 => Ok("Rh"),
1892        46 => Ok("Pd"),
1893        47 => Ok("Ag"),
1894        48 => Ok("Cd"),
1895        49 => Ok("In"),
1896        50 => Ok("Sn"),
1897        51 => Ok("Sb"),
1898        52 => Ok("Te"),
1899        53 => Ok("I"),
1900        54 => Ok("Xe"),
1901        55 => Ok("Cs"),
1902        56 => Ok("Ba"),
1903        57 => Ok("La"),
1904        58 => Ok("Ce"),
1905        59 => Ok("Pr"),
1906        60 => Ok("Nd"),
1907        61 => Ok("Pm"),
1908        62 => Ok("Sm"),
1909        63 => Ok("Eu"),
1910        64 => Ok("Gd"),
1911        65 => Ok("Tb"),
1912        66 => Ok("Dy"),
1913        67 => Ok("Ho"),
1914        68 => Ok("Er"),
1915        69 => Ok("Tm"),
1916        70 => Ok("Yb"),
1917        71 => Ok("Lu"),
1918        72 => Ok("Hf"),
1919        73 => Ok("Ta"),
1920        74 => Ok("W"),
1921        75 => Ok("Re"),
1922        76 => Ok("Os"),
1923        77 => Ok("Ir"),
1924        78 => Ok("Pt"),
1925        79 => Ok("Au"),
1926        80 => Ok("Hg"),
1927        81 => Ok("Tl"),
1928        82 => Ok("Pb"),
1929        83 => Ok("Bi"),
1930        84 => Ok("Po"),
1931        85 => Ok("At"),
1932        86 => Ok("Rn"),
1933        87 => Ok("Fr"),
1934        88 => Ok("Ra"),
1935        89 => Ok("Ac"),
1936        90 => Ok("Th"),
1937        91 => Ok("Pa"),
1938        92 => Ok("U"),
1939        93 => Ok("Np"),
1940        94 => Ok("Pu"),
1941        95 => Ok("Am"),
1942        96 => Ok("Cm"),
1943        97 => Ok("Bk"),
1944        98 => Ok("Cf"),
1945        99 => Ok("Es"),
1946        100 => Ok("Fm"),
1947        101 => Ok("Md"),
1948        102 => Ok("No"),
1949        103 => Ok("Lr"),
1950        104 => Ok("Rf"),
1951        105 => Ok("Db"),
1952        106 => Ok("Sg"),
1953        107 => Ok("Bh"),
1954        108 => Ok("Hs"),
1955        109 => Ok("Mt"),
1956        110 => Ok("Ds"),
1957        111 => Ok("Rg"),
1958        112 => Ok("Cn"),
1959        113 => Ok("Nh"),
1960        114 => Ok("Fl"),
1961        115 => Ok("Mc"),
1962        116 => Ok("Lv"),
1963        117 => Ok("Ts"),
1964        118 => Ok("Og"),
1965        // RDKit✔️✔️: PeriodicTable returns "?" for unknown atomic numbers
1966        _ => Ok("?"),
1967    }
1968}
1969
1970/// Assert that an atom can be written in SMILES without unsupported features.
1971/// Query atoms, radical-bearing atoms, and out-of-range elements produce
1972/// bracket-wrapped output through the standard path (get_atom_smiles handles
1973/// them like RDKit does by using element symbol + bracket notation).
1974/// This function is kept for the minimal-plain path which bypasses the
1975/// standard writer for performance.
1976
1977fn assemble_fragment_smiles(
1978    fragment_results: Vec<FragmentWriteResult>,
1979    params: &SmilesWriteParams,
1980    context: &mut SmilesWriteContext,
1981) -> Result<String, SmilesWriteError> {
1982    // BEGIN RDKIT CPP FUNCTION SmilesWrite::detail::MolToSmiles fragment assembly section
1983    // RDKit✔️✔️:   if (params.canonical) {
1984    // RDKit✔️✔️:     std::sort(tmp.begin(), tmp.end());
1985    // RDKit✔️✔️:   } else {  // Not canonical
1986    // RDKit✔️✔️:     for (auto &i : allAtomOrdering) {
1987    // RDKit✔️✔️:       flattenedAtomOrdering.insert(flattenedAtomOrdering.end(), i.begin(),
1988    // RDKit✔️✔️:                                    i.end());
1989    // RDKit✔️✔️:     }
1990    // RDKit✔️✔️:     for (auto &i : allBondOrdering) {
1991    // RDKit✔️✔️:       flattenedBondOrdering.insert(flattenedBondOrdering.end(), i.begin(),
1992    // RDKit✔️✔️:                                    i.end());
1993    // RDKit✔️✔️:     }
1994    // RDKit✔️✔️:     for (unsigned i = 0; i < vfragsmi.size(); ++i) {
1995    // RDKit✔️✔️:       result += vfragsmi[i];
1996    // RDKit✔️✔️:       if (i < vfragsmi.size() - 1) {
1997    // RDKit✔️✔️:         result += ".";
1998    // RDKit✔️✔️:       }
1999    // RDKit✔️✔️:     }
2000    // RDKit✔️✔️:   }
2001    // RDKit✔️✔️:   mol.setProp(common_properties::_smilesAtomOutputOrder, flattenedAtomOrdering,
2002    // RDKit✔️✔️:               true);
2003    // RDKit✔️✔️:   mol.setProp(common_properties::_smilesBondOutputOrder, flattenedBondOrdering,
2004    // RDKit✔️✔️:               true);
2005    // RDKit✔️✔️:   return result;
2006    // END RDKIT CPP FUNCTION SmilesWrite::detail::MolToSmiles fragment assembly section
2007    if params.canonical {
2008        let mut sorted = fragment_results;
2009        sorted.sort_by(|left, right| left.smiles.cmp(&right.smiles));
2010        context.atom_output_order.clear();
2011        context.bond_output_order.clear();
2012        for fragment in &sorted {
2013            context
2014                .atom_output_order
2015                .extend(fragment.atom_ordering.iter().copied());
2016            context
2017                .bond_output_order
2018                .extend(fragment.bond_ordering.iter().copied());
2019        }
2020        return Ok(sorted
2021            .into_iter()
2022            .map(|fragment| fragment.smiles)
2023            .collect::<Vec<_>>()
2024            .join("."));
2025    }
2026    context.atom_output_order.clear();
2027    context.bond_output_order.clear();
2028    for fragment in &fragment_results {
2029        context
2030            .atom_output_order
2031            .extend(fragment.atom_ordering.iter().copied());
2032        context
2033            .bond_output_order
2034            .extend(fragment.bond_ordering.iter().copied());
2035    }
2036    Ok(fragment_results
2037        .into_iter()
2038        .map(|fragment| fragment.smiles)
2039        .collect::<Vec<_>>()
2040        .join("."))
2041}
2042
2043fn validate_rooted_atom(
2044    molecule: &Molecule,
2045    params: &SmilesWriteParams,
2046) -> Result<(), SmilesWriteError> {
2047    if let Some(atom) = params.rooted_at_atom
2048        && atom >= molecule.num_atoms()
2049    {
2050        return Err(SmilesWriteError::RootedAtomOutOfRange { atom });
2051    }
2052    Ok(())
2053}
2054
2055fn validate_fragment_api_inputs(
2056    molecule: &Molecule,
2057    params: &SmilesWriteParams,
2058    atoms_to_use: &[usize],
2059    bonds_to_use: Option<&[usize]>,
2060    atom_symbols: Option<&[String]>,
2061    bond_symbols: Option<&[String]>,
2062) -> Result<(), SmilesWriteError> {
2063    for atom in atoms_to_use {
2064        validate_atom_index(molecule, *atom)?;
2065    }
2066    if let Some(bonds_to_use) = bonds_to_use {
2067        for bond in bonds_to_use {
2068            validate_bond_index(molecule, *bond)?;
2069        }
2070    }
2071    if let Some(root) = params.rooted_at_atom
2072        && !atoms_to_use.contains(&root)
2073    {
2074        return Err(SmilesWriteError::RootedAtomNotInFragment { atom: root });
2075    }
2076    if bonds_to_use.is_none()
2077        && let Some(root) = params.rooted_at_atom
2078    {
2079        let fragment_count = crate::notation::fragment::get_fragment_atom_mapping(molecule)
2080            .into_iter()
2081            .max()
2082            .map_or(0, |max_fragment| max_fragment + 1);
2083        if fragment_count > 1 {
2084            return Err(SmilesWriteError::RootedAtomRequiresSingleFragment { atom: root });
2085        }
2086    }
2087    if let Some(atom_symbols) = atom_symbols
2088        && atom_symbols.len() < molecule.num_atoms()
2089    {
2090        return Err(SmilesWriteError::AtomSymbolsTooShort {
2091            len: atom_symbols.len(),
2092            expected: molecule.num_atoms(),
2093        });
2094    }
2095    if let Some(bond_symbols) = bond_symbols
2096        && bond_symbols.len() < molecule.num_bonds()
2097    {
2098        return Err(SmilesWriteError::BondSymbolsTooShort {
2099            len: bond_symbols.len(),
2100            expected: molecule.num_bonds(),
2101        });
2102    }
2103    Ok(())
2104}
2105
2106fn validate_atom_index(molecule: &Molecule, atom: usize) -> Result<(), SmilesWriteError> {
2107    if atom >= molecule.num_atoms() {
2108        Err(SmilesWriteError::AtomOutOfRange { atom })
2109    } else {
2110        Ok(())
2111    }
2112}
2113
2114fn validate_bond_index(molecule: &Molecule, bond: usize) -> Result<(), SmilesWriteError> {
2115    if bond >= molecule.num_bonds() {
2116        Err(SmilesWriteError::BondOutOfRange { bond })
2117    } else {
2118        Ok(())
2119    }
2120}
2121
2122fn invariant_stage_error<T>(
2123    stage: SmilesPlanStage,
2124    message: &'static str,
2125) -> Result<T, SmilesWriteError> {
2126    Err(SmilesWriteError::InvariantViolation {
2127        stage: stage.as_str(),
2128        message,
2129    })
2130}
2131
2132#[cfg(test)]
2133mod tests;