mod cx;
mod direction;
mod stereo;
pub(crate) use self::stereo::serialize_ring_stereo_atoms;
use self::{cx::*, direction::*, stereo::*};
use crate::{
AtomId, AtomQueryPredicate, Bond, BondDirection, BondId, BondOrder, BondStereo, ChiralTag,
Molecule, QueryNode, ValenceError,
};
use std::collections::{BTreeMap, BTreeSet};
use std::sync::atomic::{AtomicU64, Ordering};
thread_local! {
static RANDOM_SMILES_SEED: std::cell::Cell<u64> = const { std::cell::Cell::new(0) };
}
static RANDOM_SMILES_COUNTER: AtomicU64 = AtomicU64::new(0x9e37_79b9_7f4a_7c15);
const CANON_MAX_NATOMS: i64 = 5000;
const CANON_MAX_BONDTYPE: i64 = 32;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SmilesWriteParams {
pub do_isomeric_smiles: bool,
pub do_kekule: bool,
pub canonical: bool,
pub clean_stereo: bool,
pub all_bonds_explicit: bool,
pub all_hydrogens_explicit: bool,
pub do_random: bool,
pub rooted_at_atom: Option<usize>,
pub include_dative_bonds: bool,
pub ignore_atom_map_numbers: bool,
}
impl Default for SmilesWriteParams {
fn default() -> Self {
Self {
do_isomeric_smiles: true,
do_kekule: false,
canonical: true,
clean_stereo: true,
all_bonds_explicit: false,
all_hydrogens_explicit: false,
do_random: false,
rooted_at_atom: None,
include_dative_bonds: true,
ignore_atom_map_numbers: false,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct CxSmilesFields(u32);
impl CxSmilesFields {
pub const NONE: Self = Self(0);
pub const ATOM_LABELS: Self = Self(1 << 0);
pub const MOLFILE_VALUES: Self = Self(1 << 1);
pub const COORDS: Self = Self(1 << 2);
pub const RADICALS: Self = Self(1 << 3);
pub const ATOM_PROPS: Self = Self(1 << 4);
pub const LINKNODES: Self = Self(1 << 5);
pub const ENHANCED_STEREO: Self = Self(1 << 6);
pub const SGROUPS: Self = Self(1 << 7);
pub const POLYMER: Self = Self(1 << 8);
pub const BOND_CFG: Self = Self(1 << 9);
pub const BOND_ATROPISOMER: Self = Self(1 << 10);
pub const COORDINATE_BONDS: Self = Self(1 << 11);
pub const HYDROGEN_BONDS: Self = Self(1 << 12);
pub const ZERO_BONDS: Self = Self(1 << 13);
pub const ALL: Self = Self(0x7fff_ffff);
pub const ALL_BUT_COORDS: Self = Self(Self::ALL.0 ^ Self::COORDS.0);
#[must_use]
pub const fn bits(self) -> u32 {
self.0
}
#[must_use]
pub const fn contains(self, other: Self) -> bool {
self.0 & other.0 == other.0
}
#[must_use]
pub const fn combine(self, other: Self) -> Self {
Self(self.0 | other.0)
}
}
impl std::ops::BitOr for CxSmilesFields {
type Output = Self;
fn bitor(self, rhs: Self) -> Self {
Self(self.0 | rhs.0)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RestoreBondDirOption {
None,
True,
Clear,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum SmilesOutputMode {
PlainSmiles,
CxSmiles {
fields: CxSmilesFields,
restore_bond_dirs: RestoreBondDirOption,
include_stereo_groups: bool,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum SmilesPlanStage {
ShortTermAtomWriter,
ShortTermBondWriter,
LongTermCanonicalRanking,
}
impl SmilesPlanStage {
const fn as_str(self) -> &'static str {
match self {
Self::ShortTermAtomWriter => "ShortTermAtomWriter",
Self::ShortTermBondWriter => "ShortTermBondWriter",
Self::LongTermCanonicalRanking => "LongTermCanonicalRanking",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
pub enum SmilesWriteError {
#[error(transparent)]
UnsupportedFeature(#[from] crate::UnsupportedFeatureError),
#[error("canonical ranking failed: {source}")]
CanonicalRank { source: crate::KekulizeError },
#[error("kekulization failed: {source}")]
Kekulize {
#[from]
source: crate::KekulizeError,
},
#[error("operation failed while preparing SMILES output: {source}")]
Operation {
#[from]
source: crate::OperationError,
},
#[error("valence calculation failed: {source}")]
Valence {
#[from]
source: ValenceError,
},
#[error("stereochemistry preparation failed: {source}")]
Stereo {
#[from]
source: crate::StereoError,
},
#[error("ring finding failed while preparing SMILES output: {source}")]
RingFinding {
#[from]
source: crate::RingFindingError,
},
#[error("atom index {atom} is out of range")]
AtomOutOfRange { atom: usize },
#[error("bond index {bond} is out of range")]
BondOutOfRange { bond: usize },
#[error("rooted atom index {atom} is out of range")]
RootedAtomOutOfRange { atom: usize },
#[error("rooted atom index {atom} is not present in atoms_to_use")]
RootedAtomNotInFragment { atom: usize },
#[error(
"rooted atom index {atom} requires a single-fragment molecule when bonds_to_use is omitted"
)]
RootedAtomRequiresSingleFragment { atom: usize },
#[error("atom symbol override vector has length {len}, expected at least {expected}")]
AtomSymbolsTooShort { len: usize, expected: usize },
#[error("bond symbol override vector has length {len}, expected at least {expected}")]
BondSymbolsTooShort { len: usize, expected: usize },
#[error(
"invalid non-tetrahedral chiral permutation {permutation} for {chiral_tag:?}; max allowed is {limit}"
)]
InvalidChiralPermutation {
chiral_tag: ChiralTag,
permutation: u32,
limit: u32,
},
#[error("invalid ring stereochemistry state on atom {atom}: {requirement}")]
InvalidRingStereoState {
atom: usize,
requirement: &'static str,
},
#[error("internal SMILES writer invariant violated in {stage}: {message}")]
InvariantViolation {
stage: &'static str,
message: &'static str,
},
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
struct SmilesWriteContext {
atom_output_order: Vec<AtomId>,
bond_output_order: Vec<BondId>,
ring_closure_digits: BTreeMap<usize, usize>,
ring_closures_to_erase: Vec<usize>,
chiral_tag_overrides: BTreeMap<AtomId, ChiralTag>,
chiral_inversions: BTreeSet<AtomId>,
chiral_permutations: BTreeMap<AtomId, u32>,
broken_chiral_atoms: BTreeSet<AtomId>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct FragmentWritePlan {
atoms: Vec<AtomId>,
bonds: Vec<BondId>,
rooted_at_atom: Option<AtomId>,
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
struct FragmentWriteResult {
smiles: String,
atom_ordering: Vec<AtomId>,
bond_ordering: Vec<BondId>,
}
#[derive(Debug, Clone, Copy, Default)]
struct SmilesWriteOverrides<'a> {
atom_symbols: Option<&'a [String]>,
bond_symbols: Option<&'a [String]>,
}
#[derive(Debug, Clone)]
struct CxWriteScope {
atom_order: Vec<AtomId>,
bond_order: Vec<BondId>,
}
impl CxWriteScope {
fn full_molecule(molecule: &Molecule) -> Self {
Self {
atom_order: molecule.atoms().iter().map(|atom| atom.id()).collect(),
bond_order: molecule.bonds().iter().map(|bond| bond.id()).collect(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[allow(dead_code)]
enum MolStackElem {
Atom(AtomId),
Bond(BondId, AtomId),
Ring { bond: BondId, ring_idx: usize },
BranchOpen,
BranchClose,
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
struct CanonicalTraversalResult {
stack: Vec<MolStackElem>,
traversal_ring_closure_bonds: Vec<bool>,
chiral_tag_overrides: BTreeMap<AtomId, ChiralTag>,
chiral_inversions: BTreeSet<AtomId>,
chiral_permutations: BTreeMap<AtomId, u32>,
broken_chiral_atoms: BTreeSet<AtomId>,
}
pub fn mol_to_smiles(
molecule: &Molecule,
params: &SmilesWriteParams,
) -> Result<String, SmilesWriteError> {
mol_to_smiles_with_mode(molecule, params, SmilesOutputMode::PlainSmiles)
}
pub fn mol_to_cx_smiles(
molecule: &Molecule,
params: &SmilesWriteParams,
fields: CxSmilesFields,
restore_bond_dirs: RestoreBondDirOption,
) -> Result<String, SmilesWriteError> {
mol_to_smiles_with_mode(
molecule,
params,
SmilesOutputMode::CxSmiles {
fields,
restore_bond_dirs,
include_stereo_groups: fields.contains(CxSmilesFields::ENHANCED_STEREO),
},
)
}
pub fn mol_to_random_smiles_vect(
molecule: &Molecule,
num_smiles: usize,
random_seed: u64,
do_isomeric_smiles: bool,
do_kekule: bool,
all_bonds_explicit: bool,
all_hydrogens_explicit: bool,
) -> Result<Vec<String>, SmilesWriteError> {
let mut result = Vec::with_capacity(num_smiles);
let mut stream_seed = if random_seed == 0 {
next_unseeded_random_smiles_seed(0)
} else {
random_seed
};
for _ in 0..num_smiles {
stream_seed = splitmix64(stream_seed);
let params = SmilesWriteParams {
do_isomeric_smiles,
do_kekule,
canonical: false,
clean_stereo: true,
all_bonds_explicit,
all_hydrogens_explicit,
do_random: true,
rooted_at_atom: None,
include_dative_bonds: true,
ignore_atom_map_numbers: false,
};
result.push(with_random_smiles_seed(stream_seed, || {
mol_to_smiles(molecule, ¶ms)
})?);
}
Ok(result)
}
fn with_random_smiles_seed<T>(
seed: u64,
f: impl FnOnce() -> Result<T, SmilesWriteError>,
) -> Result<T, SmilesWriteError> {
RANDOM_SMILES_SEED.with(|cell| {
let previous = cell.replace(seed);
let result = f();
cell.set(previous);
result
})
}
fn next_random_smiles_u64() -> u64 {
RANDOM_SMILES_SEED.with(|cell| {
let current = cell.get();
let next = splitmix64(current);
cell.set(next);
current
})
}
fn next_unseeded_random_smiles_seed(offset: u64) -> u64 {
splitmix64(
RANDOM_SMILES_COUNTER
.fetch_add(0x9e37_79b9_7f4a_7c15, Ordering::Relaxed)
.wrapping_add(offset),
)
}
fn splitmix64(mut value: u64) -> u64 {
value = value.wrapping_add(0x9e37_79b9_7f4a_7c15);
value = (value ^ (value >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9);
value = (value ^ (value >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb);
value ^ (value >> 31)
}
fn mol_to_smiles_with_mode(
molecule: &Molecule,
params: &SmilesWriteParams,
mode: SmilesOutputMode,
) -> Result<String, SmilesWriteError> {
validate_rooted_atom(molecule, params)?;
if molecule.num_atoms() == 0 {
return Ok(String::new());
}
let mut molecule = molecule.clone();
let mut context = SmilesWriteContext::default();
let mut fragment_results = Vec::new();
let mut working_params = params.clone();
let saved_atom_maps = match mode {
SmilesOutputMode::PlainSmiles => {
prepare_plain_smiles_molecule(&mut molecule, &working_params)?
}
SmilesOutputMode::CxSmiles {
fields,
restore_bond_dirs,
include_stereo_groups,
} => prepare_cx_smiles_molecule(
&mut molecule,
&mut working_params,
fields,
restore_bond_dirs,
include_stereo_groups,
)?,
};
let fragment_plans = collect_fragment_write_plans(&molecule, &working_params)?;
let fragment_ranks = fragment_plans
.iter()
.map(|plan| rank_fragment_atoms_for_smiles(&molecule, plan, &working_params, mode))
.collect::<Result<Vec<_>, _>>()?;
if working_params.canonical {
restore_atom_maps_after_canonical_smiles(&mut molecule, saved_atom_maps.as_deref());
}
if params.do_kekule {
molecule = kekulize_for_smiles(&molecule)?;
}
working_params.do_kekule = false;
for (plan, ranks) in fragment_plans.iter().zip(fragment_ranks.iter()) {
if working_params.canonical {
restore_atom_maps_after_canonical_smiles(&mut molecule, saved_atom_maps.as_deref());
}
fragment_results.push(write_fragment_smiles_with_ranks(
&mut molecule,
plan,
&ranks,
&working_params,
SmilesWriteOverrides::default(),
&mut context,
)?);
if working_params.canonical && saved_atom_maps.is_some() {
let _ = stash_and_clear_atom_maps_for_smiles(&mut molecule, &working_params);
}
}
if working_params.canonical {
restore_atom_maps_after_canonical_smiles(&mut molecule, saved_atom_maps.as_deref());
}
let mut result = assemble_fragment_smiles(fragment_results, &working_params, &mut context)?;
if let SmilesOutputMode::CxSmiles { fields, .. } = mode {
let scope = CxWriteScope {
atom_order: context.atom_output_order.clone(),
bond_order: context.bond_output_order.clone(),
};
let cx_extension = get_cx_extensions_scoped(&molecule, fields, &scope)?;
if !cx_extension.is_empty() {
result.push(' ');
result.push_str(&cx_extension);
}
}
Ok(result)
}
fn prepare_plain_smiles_molecule(
molecule: &mut Molecule,
params: &SmilesWriteParams,
) -> Result<Option<Vec<Option<u32>>>, SmilesWriteError> {
let saved_atom_maps = stash_and_clear_atom_maps_for_smiles(molecule, params);
if is_minimal_plain_smiles_path(params) && validate_minimal_plain_smiles_molecule(molecule) {
return Ok(saved_atom_maps);
}
clear_fragment_temp_molecule_computed_stereo_props_for_writer(molecule);
update_property_cache_for_smiles(molecule)?;
if params.do_isomeric_smiles {
if molecule.prop("_StereochemDone").is_none() {
assign_stereochemistry_for_smiles(molecule, params.clean_stereo)?;
}
}
if params.do_random {
}
if !params.include_dative_bonds {
normalize_dative_bonds_for_plain_smiles(molecule)?;
}
if !params.do_isomeric_smiles {
crate::notation::smiles::clear_all_bond_dir_flags(molecule);
}
remove_plain_smiles_only_cx_state(molecule)?;
Ok(saved_atom_maps)
}
fn prepare_cx_smiles_molecule(
molecule: &mut Molecule,
params: &mut SmilesWriteParams,
fields: CxSmilesFields,
restore_bond_dirs: RestoreBondDirOption,
include_stereo_groups: bool,
) -> Result<Option<Vec<Option<u32>>>, SmilesWriteError> {
let saved_atom_maps = stash_and_clear_atom_maps_for_smiles(molecule, params);
if is_minimal_plain_smiles_path(params) && validate_minimal_plain_smiles_molecule(molecule) {
} else {
clear_fragment_temp_molecule_computed_stereo_props_for_writer(molecule);
update_property_cache_for_smiles(molecule)?;
if params.do_isomeric_smiles {
if molecule.prop("_StereochemDone").is_none() {
assign_stereochemistry_for_smiles(molecule, params.clean_stereo)?;
}
}
}
normalize_dative_bonds_for_cx_smiles(molecule)?;
normalize_hydrogen_bonds_for_cx_smiles(molecule)?;
apply_cx_bond_direction_policy(molecule, restore_bond_dirs)?;
if params.clean_stereo {
if molecule.prop("_StereochemDone").is_none() {
assign_stereochemistry_for_smiles(molecule, true)?;
}
cleanup_stereo_groups_for_cx_smiles(molecule)?;
}
if include_stereo_groups {
canonicalize_enhanced_stereo_for_smiles(molecule)?;
}
validate_cx_extension_plan(fields)?;
Ok(saved_atom_maps)
}
fn stash_and_clear_atom_maps_for_smiles(
molecule: &mut Molecule,
params: &SmilesWriteParams,
) -> Option<Vec<Option<u32>>> {
if !params.ignore_atom_map_numbers {
return None;
}
let topology = molecule.topology_block_mut();
let saved = topology
.atoms
.iter()
.map(|atom| atom.atom_map())
.collect::<Vec<_>>();
for atom in &mut topology.atoms {
atom.set_atom_map(None);
}
Some(saved)
}
fn restore_atom_maps_after_canonical_smiles(
molecule: &mut Molecule,
saved_atom_maps: Option<&[Option<u32>]>,
) {
let Some(saved_atom_maps) = saved_atom_maps else {
return;
};
let topology = molecule.topology_block_mut();
for (atom, atom_map) in topology
.atoms
.iter_mut()
.zip(saved_atom_maps.iter().copied())
{
atom.set_atom_map(atom_map);
}
}
fn collect_fragment_write_plans(
molecule: &Molecule,
params: &SmilesWriteParams,
) -> Result<Vec<FragmentWritePlan>, SmilesWriteError> {
let atom_to_fragment = crate::notation::fragment::get_fragment_atom_mapping(molecule);
if atom_to_fragment.is_empty() {
return Ok(Vec::new());
}
let fragment_count = atom_to_fragment.iter().copied().max().unwrap_or(0) + 1;
let mut fragment_atoms = vec![Vec::new(); fragment_count];
for (atom_idx, fragment_idx) in atom_to_fragment.iter().copied().enumerate() {
fragment_atoms[fragment_idx].push(AtomId::new(atom_idx));
}
let mut fragment_bonds = vec![Vec::new(); fragment_count];
for bond in molecule.bonds() {
let begin_fragment = atom_to_fragment[bond.begin().index()];
let end_fragment = atom_to_fragment[bond.end().index()];
if begin_fragment == end_fragment {
fragment_bonds[begin_fragment].push(bond.id());
}
}
let mut plans = Vec::with_capacity(fragment_count);
for fragment_idx in 0..fragment_count {
let atoms = std::mem::take(&mut fragment_atoms[fragment_idx]);
let rooted_at_atom = params
.rooted_at_atom
.map(AtomId::new)
.filter(|root| atom_to_fragment[root.index()] == fragment_idx);
plans.push(FragmentWritePlan {
bonds: std::mem::take(&mut fragment_bonds[fragment_idx]),
atoms,
rooted_at_atom,
});
}
Ok(plans)
}
fn write_fragment_smiles(
molecule: &mut Molecule,
plan: &FragmentWritePlan,
params: &SmilesWriteParams,
mode: SmilesOutputMode,
overrides: SmilesWriteOverrides<'_>,
context: &mut SmilesWriteContext,
) -> Result<FragmentWriteResult, SmilesWriteError> {
let ranks = rank_fragment_atoms_for_smiles(molecule, plan, params, mode)?;
write_fragment_smiles_with_ranks(molecule, plan, &ranks, params, overrides, context)
}
fn write_fragment_smiles_with_ranks(
molecule: &mut Molecule,
plan: &FragmentWritePlan,
ranks: &[usize],
params: &SmilesWriteParams,
overrides: SmilesWriteOverrides<'_>,
context: &mut SmilesWriteContext,
) -> Result<FragmentWriteResult, SmilesWriteError> {
let start_atom = choose_fragment_start_atom(plan, &ranks, params)?;
fragment_smiles_construct(
molecule, plan, start_atom, &ranks, params, overrides, context,
)
}
fn fragment_smiles_construct(
molecule: &mut Molecule,
plan: &FragmentWritePlan,
start_atom: AtomId,
ranks: &[usize],
params: &SmilesWriteParams,
overrides: SmilesWriteOverrides<'_>,
context: &mut SmilesWriteContext,
) -> Result<FragmentWriteResult, SmilesWriteError> {
if params.canonical && params.do_isomeric_smiles {
canonicalize_enhanced_stereo_for_smiles(molecule)?;
}
let traversal =
canonicalize_fragment_stack(molecule, plan, start_atom, ranks, params, overrides)?;
canonicalize_double_bond_directions_for_writer(
molecule,
&traversal.stack,
&traversal.traversal_ring_closure_bonds,
)?;
context.chiral_tag_overrides.extend(
traversal
.chiral_tag_overrides
.iter()
.map(|(atom, tag)| (*atom, *tag)),
);
context
.chiral_inversions
.extend(traversal.chiral_inversions.iter().copied());
context.chiral_permutations.extend(
traversal
.chiral_permutations
.iter()
.map(|(atom, permutation)| (*atom, *permutation)),
);
context
.broken_chiral_atoms
.extend(traversal.broken_chiral_atoms.iter().copied());
write_mol_stack(molecule, &traversal.stack, params, overrides, context)
}
fn rank_fragment_atoms_for_smiles(
molecule: &Molecule,
plan: &FragmentWritePlan,
params: &SmilesWriteParams,
mode: SmilesOutputMode,
) -> Result<Vec<usize>, SmilesWriteError> {
if params.canonical && !params.do_random {
return rank_mol_atoms_for_smiles(molecule, plan, params, mode);
}
let _ = molecule;
Ok(plan.atoms.iter().map(|atom| atom.index()).collect())
}
fn rank_mol_atoms_for_smiles(
molecule: &Molecule,
plan: &FragmentWritePlan,
params: &SmilesWriteParams,
mode: SmilesOutputMode,
) -> Result<Vec<usize>, SmilesWriteError> {
let _stage = SmilesPlanStage::LongTermCanonicalRanking;
let _ = mode;
let ranks = crate::canon_rank::rank_mol_atoms_with_options(
molecule,
crate::canon_rank::CanonicalRankOptions {
break_ties: true,
include_chirality: params.do_isomeric_smiles,
include_isotopes: params.do_isomeric_smiles,
include_atom_maps: true,
include_chiral_presence: false,
include_stereo_groups: params.do_isomeric_smiles,
use_non_stereo_ranks: false,
include_ring_stereo: params.do_isomeric_smiles,
chirality_rings_use_ring_stereo: true,
},
)?;
Ok(plan.atoms.iter().map(|atom| ranks[atom.index()]).collect())
}
fn choose_fragment_start_atom(
plan: &FragmentWritePlan,
ranks: &[usize],
params: &SmilesWriteParams,
) -> Result<AtomId, SmilesWriteError> {
if let Some(root) = plan.rooted_at_atom {
return Ok(root);
}
if params.do_random {
let idx = (next_random_smiles_u64() as usize) % plan.atoms.len();
return Ok(plan.atoms[idx]);
}
let (idx, _) = match ranks.iter().enumerate().min_by_key(|(_, rank)| **rank) {
Some(pair) => pair,
None => {
return invariant_stage_error(
SmilesPlanStage::ShortTermAtomWriter,
"choose_fragment_start_atom() called with empty canonical rank scope",
);
}
};
Ok(plan.atoms[idx])
}
fn canonicalize_fragment_stack(
molecule: &Molecule,
plan: &FragmentWritePlan,
start_atom: AtomId,
ranks: &[usize],
params: &SmilesWriteParams,
overrides: SmilesWriteOverrides<'_>,
) -> Result<CanonicalTraversalResult, SmilesWriteError> {
canonical_dfs_traversal(
molecule,
plan,
start_atom,
ranks,
params.do_isomeric_smiles,
params.clean_stereo,
params.do_random,
overrides.bond_symbols,
)
}
fn write_mol_stack(
molecule: &Molecule,
stack: &[MolStackElem],
params: &SmilesWriteParams,
overrides: SmilesWriteOverrides<'_>,
context: &mut SmilesWriteContext,
) -> Result<FragmentWriteResult, SmilesWriteError> {
let mut result = FragmentWriteResult::default();
for item in stack {
match *item {
MolStackElem::Atom(atom) => {
for ring_closure in context.ring_closures_to_erase.drain(..) {
context.ring_closure_digits.remove(&ring_closure);
}
if let Some(atom_symbols) = overrides.atom_symbols {
result.smiles.push_str(&atom_symbols[atom.index()]);
} else {
result
.smiles
.push_str(&build_atom_smiles(molecule, atom, params, context)?);
}
result.atom_ordering.push(atom);
}
MolStackElem::Bond(bond, atom_to_left) => {
if let Some(bond_symbols) = overrides.bond_symbols {
result.smiles.push_str(&bond_symbols[bond.index()]);
} else {
result.smiles.push_str(&build_bond_smiles(
molecule,
bond,
atom_to_left,
params,
)?);
}
result.bond_ordering.push(bond);
}
MolStackElem::Ring { ring_idx, .. } => {
write_ring_closure(&mut result.smiles, ring_idx, context)?;
}
MolStackElem::BranchOpen => {
result.smiles.push('(');
}
MolStackElem::BranchClose => {
result.smiles.push(')');
}
}
}
Ok(result)
}
pub fn mol_fragment_to_smiles(
molecule: &Molecule,
params: &SmilesWriteParams,
atoms_to_use: &[usize],
bonds_to_use: Option<&[usize]>,
atom_symbols: Option<&[String]>,
bond_symbols: Option<&[String]>,
) -> Result<String, SmilesWriteError> {
validate_fragment_api_inputs(
molecule,
params,
atoms_to_use,
bonds_to_use,
atom_symbols,
bond_symbols,
)?;
if molecule.num_atoms() == 0 || atoms_to_use.is_empty() {
return Ok(String::new());
}
let mut molecule = if params.do_kekule {
kekulize_for_smiles(molecule)?
} else {
molecule.clone()
};
let mut working_params = params.clone();
working_params.do_kekule = false;
let saved_atom_maps = prepare_plain_smiles_molecule(&mut molecule, &working_params)?;
let mut plans =
collect_fragment_api_write_plans(&molecule, &working_params, atoms_to_use, bonds_to_use)?;
if working_params.canonical {
restore_atom_maps_after_canonical_smiles(&mut molecule, saved_atom_maps.as_deref());
plans.sort_by_key(|plan| {
plan.atoms
.iter()
.map(|atom| atom.index())
.min()
.unwrap_or(usize::MAX)
});
}
let overrides = SmilesWriteOverrides {
atom_symbols,
bond_symbols,
};
let mut context = SmilesWriteContext::default();
let mut results = Vec::new();
for plan in &plans {
results.push(write_fragment_smiles(
&mut molecule,
plan,
&working_params,
SmilesOutputMode::PlainSmiles,
overrides,
&mut context,
)?);
}
assemble_fragment_smiles(results, &working_params, &mut context)
}
pub fn mol_fragment_to_cx_smiles(
molecule: &Molecule,
params: &SmilesWriteParams,
atoms_to_use: &[usize],
bonds_to_use: Option<&[usize]>,
atom_symbols: Option<&[String]>,
bond_symbols: Option<&[String]>,
fields: CxSmilesFields,
) -> Result<String, SmilesWriteError> {
validate_fragment_api_inputs(
molecule,
params,
atoms_to_use,
bonds_to_use,
atom_symbols,
bond_symbols,
)?;
let mut context = SmilesWriteContext::default();
let smiles = mol_fragment_to_smiles_with_context(
molecule,
params,
atoms_to_use,
bonds_to_use,
atom_symbols,
bond_symbols,
&mut context,
)?;
let scope = CxWriteScope {
atom_order: context.atom_output_order,
bond_order: context.bond_output_order,
};
let cx_extension = get_cx_extensions_scoped(molecule, fields, &scope)?;
if cx_extension.is_empty() {
Ok(smiles)
} else {
Ok(format!("{smiles} {cx_extension}"))
}
}
fn mol_fragment_to_smiles_with_context(
molecule: &Molecule,
params: &SmilesWriteParams,
atoms_to_use: &[usize],
bonds_to_use: Option<&[usize]>,
atom_symbols: Option<&[String]>,
bond_symbols: Option<&[String]>,
context: &mut SmilesWriteContext,
) -> Result<String, SmilesWriteError> {
if molecule.num_atoms() == 0 || atoms_to_use.is_empty() {
return Ok(String::new());
}
let mut molecule = if params.do_kekule {
kekulize_for_smiles(molecule)?
} else {
molecule.clone()
};
let mut working_params = params.clone();
working_params.do_kekule = false;
let saved_atom_maps = prepare_plain_smiles_molecule(&mut molecule, &working_params)?;
let mut plans =
collect_fragment_api_write_plans(&molecule, &working_params, atoms_to_use, bonds_to_use)?;
if working_params.canonical {
restore_atom_maps_after_canonical_smiles(&mut molecule, saved_atom_maps.as_deref());
plans.sort_by_key(|plan| {
plan.atoms
.iter()
.map(|atom| atom.index())
.min()
.unwrap_or(usize::MAX)
});
}
let overrides = SmilesWriteOverrides {
atom_symbols,
bond_symbols,
};
let mut results = Vec::new();
for plan in &plans {
results.push(write_fragment_smiles(
&mut molecule,
plan,
&working_params,
SmilesOutputMode::PlainSmiles,
overrides,
context,
)?);
}
assemble_fragment_smiles(results, &working_params, context)
}
fn collect_fragment_api_write_plans(
molecule: &Molecule,
params: &SmilesWriteParams,
atoms_to_use: &[usize],
bonds_to_use: Option<&[usize]>,
) -> Result<Vec<FragmentWritePlan>, SmilesWriteError> {
let atom_set = atoms_to_use.iter().copied().collect::<BTreeSet<_>>();
let bond_set = if let Some(bonds_to_use) = bonds_to_use {
bonds_to_use.iter().copied().collect::<BTreeSet<_>>()
} else {
molecule
.bonds()
.iter()
.filter(|bond| {
atom_set.contains(&bond.begin().index()) && atom_set.contains(&bond.end().index())
})
.map(|bond| bond.id().index())
.collect::<BTreeSet<_>>()
};
let mut seen = BTreeSet::new();
let mut plans = Vec::new();
for &start in atoms_to_use {
if seen.contains(&start) {
continue;
}
let mut stack = vec![AtomId::new(start)];
let mut atoms = Vec::new();
let mut bonds = BTreeSet::new();
while let Some(atom) = stack.pop() {
if !seen.insert(atom.index()) {
continue;
}
atoms.push(atom);
for bond in molecule.bonds() {
if !bond_set.contains(&bond.id().index()) {
continue;
}
let Some(other) = bond_other_atom(bond, atom) else {
continue;
};
if !atom_set.contains(&other.index()) {
continue;
}
bonds.insert(bond.id());
if !seen.contains(&other.index()) {
stack.push(other);
}
}
}
atoms.sort_by_key(|atom| atom.index());
let bonds = bonds.into_iter().collect::<Vec<_>>();
let rooted_at_atom = params
.rooted_at_atom
.map(AtomId::new)
.filter(|root| atoms.contains(root));
plans.push(FragmentWritePlan {
atoms,
bonds,
rooted_at_atom,
});
}
Ok(plans)
}
pub fn get_atom_smiles(
molecule: &Molecule,
atom: usize,
params: &SmilesWriteParams,
) -> Result<String, SmilesWriteError> {
validate_atom_index(molecule, atom)?;
get_atom_smiles_impl(
molecule,
AtomId::new(atom),
params,
None,
false,
None,
false,
)
}
fn get_atom_smiles_with_context(
molecule: &Molecule,
atom: AtomId,
params: &SmilesWriteParams,
context: &SmilesWriteContext,
) -> Result<String, SmilesWriteError> {
get_atom_smiles_impl(
molecule,
atom,
params,
context.chiral_tag_overrides.get(&atom).copied(),
context.chiral_inversions.contains(&atom),
context.chiral_permutations.get(&atom).copied(),
context.broken_chiral_atoms.contains(&atom),
)
}
fn get_atom_smiles_impl(
molecule: &Molecule,
atom_id: AtomId,
params: &SmilesWriteParams,
chiral_tag_override: Option<ChiralTag>,
invert_chirality: bool,
chiral_permutation_override: Option<u32>,
broken_chirality: bool,
) -> Result<String, SmilesWriteError> {
let chirality = if params.do_isomeric_smiles && !broken_chirality {
get_atom_chirality_info_with_inversion(
molecule,
atom_id,
chiral_tag_override,
invert_chirality,
chiral_permutation_override,
)?
} else {
String::new()
};
let atom = &molecule.atoms()[atom_id.index()];
let custom_symbol = atom.prop("smilesSymbol");
let has_custom_symbol = custom_symbol.is_some();
let needs_bracket = if has_custom_symbol || params.all_hydrogens_explicit {
true
} else {
atom_needs_bracket(molecule, atom_id, &chirality, params)?
};
let raw_symbol = custom_symbol.unwrap_or(element_symbol(atom.atomic_number())?);
let lowered_symbol;
let symbol: &str = if !params.do_kekule
&& atom.is_aromatic()
&& raw_symbol
.as_bytes()
.first()
.is_some_and(u8::is_ascii_uppercase)
{
let should_lower = matches!(
atom.atomic_number(),
5 | 6 | 7 | 8 | 14 | 15 | 16 | 33 | 34 | 52
);
if should_lower {
let mut owned = String::with_capacity(raw_symbol.len());
let mut chars = raw_symbol.chars();
if let Some(first) = chars.next() {
owned.extend(first.to_lowercase());
}
owned.push_str(chars.as_str());
lowered_symbol = owned;
&lowered_symbol
} else {
raw_symbol
}
} else {
raw_symbol
};
let mut result = String::new();
if needs_bracket {
result.push('[');
}
if let Some(isotope) = atom.isotope()
&& params.do_isomeric_smiles
{
result.push_str(&isotope.to_string());
}
result.push_str(symbol);
result.push_str(&chirality);
if needs_bracket {
let total_num_hs = total_num_hydrogens_for_writer(molecule, atom_id);
if total_num_hs > 0 {
result.push('H');
if total_num_hs > 1 {
result.push_str(&total_num_hs.to_string());
}
}
if atom.formal_charge() > 0 {
result.push('+');
if atom.formal_charge() > 1 {
result.push_str(&atom.formal_charge().to_string());
}
} else if atom.formal_charge() < 0 {
if atom.formal_charge() < -1 {
result.push_str(&atom.formal_charge().to_string());
} else {
result.push('-');
}
}
if let Some(atom_map) = atom.atom_map() {
result.push(':');
result.push_str(&atom_map.to_string());
}
result.push(']');
}
if let Some(label) = atom.prop("_supplementalSmilesLabel") {
result.push_str(label);
}
Ok(result)
}
fn build_atom_smiles(
molecule: &Molecule,
atom_id: AtomId,
params: &SmilesWriteParams,
context: &SmilesWriteContext,
) -> Result<String, SmilesWriteError> {
get_atom_smiles_with_context(molecule, atom_id, params, context)
}
pub fn get_bond_smiles(_bond_order: BondOrder) -> Result<&'static str, SmilesWriteError> {
match _bond_order {
BondOrder::Single => Ok(""),
BondOrder::Double => Ok("="),
BondOrder::Triple => Ok("#"),
BondOrder::Quadruple => Ok("$"),
BondOrder::Dative => Ok("->"),
_ => Ok("~"),
}
}
pub fn get_molecule_bond_smiles(
molecule: &Molecule,
bond: usize,
atom_to_left: Option<usize>,
params: &SmilesWriteParams,
) -> Result<String, SmilesWriteError> {
validate_bond_index(molecule, bond)?;
if let Some(atom) = atom_to_left {
validate_atom_index(molecule, atom)?;
}
let bond = &molecule.bonds()[bond];
let atom_to_left = atom_to_left.unwrap_or_else(|| bond.begin().index());
let aromatic_context = if !params.do_kekule
&& matches!(
bond.order(),
BondOrder::Single | BondOrder::Double | BondOrder::Aromatic
) {
let left = &molecule.atoms()[atom_to_left];
let other_id = bond_other_atom(bond, AtomId::new(atom_to_left)).ok_or(
SmilesWriteError::BondOutOfRange {
bond: bond.id().index(),
},
)?;
let other = &molecule.atoms()[other_id.index()];
left.is_aromatic()
&& other.is_aromatic()
&& (left.atomic_number() != 0 || other.atomic_number() != 0)
} else {
false
};
match bond.order() {
BondOrder::Single => {
if !matches!(
bond.direction(),
BondDirection::None | BondDirection::Unknown
) {
match bond.direction() {
BondDirection::EndDownRight => {
if params.all_bonds_explicit || params.do_isomeric_smiles {
Ok("\\".to_string())
} else {
Ok(String::new())
}
}
BondDirection::EndUpRight => {
if params.all_bonds_explicit || params.do_isomeric_smiles {
Ok("/".to_string())
} else {
Ok(String::new())
}
}
_ => {
if params.all_bonds_explicit {
Ok("-".to_string())
} else {
Ok(String::new())
}
}
}
} else if params.all_bonds_explicit || (aromatic_context && !bond.is_aromatic()) {
Ok("-".to_string())
} else {
Ok(String::new())
}
}
BondOrder::Double => {
if !aromatic_context || !bond.is_aromatic() || params.all_bonds_explicit {
Ok("=".to_string())
} else {
Ok(String::new())
}
}
BondOrder::Triple => Ok("#".to_string()),
BondOrder::Quadruple => Ok("$".to_string()),
BondOrder::Aromatic => {
if !matches!(
bond.direction(),
BondDirection::None | BondDirection::Unknown
) {
match bond.direction() {
BondDirection::EndDownRight => {
if params.all_bonds_explicit || params.do_isomeric_smiles {
Ok("\\".to_string())
} else {
Ok(String::new())
}
}
BondDirection::EndUpRight => {
if params.all_bonds_explicit || params.do_isomeric_smiles {
Ok("/".to_string())
} else {
Ok(String::new())
}
}
_ => {
if params.all_bonds_explicit || !aromatic_context {
Ok(":".to_string())
} else {
Ok(String::new())
}
}
}
} else if params.all_bonds_explicit || !aromatic_context {
Ok(":".to_string())
} else {
Ok(String::new())
}
}
BondOrder::Dative => {
if bond.begin().index() == atom_to_left {
Ok("->".to_string())
} else {
Ok("<-".to_string())
}
}
_ => Ok("~".to_string()),
}
}
fn build_bond_smiles(
molecule: &Molecule,
bond: BondId,
atom_to_left: AtomId,
params: &SmilesWriteParams,
) -> Result<String, SmilesWriteError> {
get_molecule_bond_smiles(molecule, bond.index(), Some(atom_to_left.index()), params)
}
fn total_num_hydrogens_for_writer(molecule: &Molecule, atom_id: AtomId) -> u32 {
let explicit = u32::from(molecule.atoms()[atom_id.index()].explicit_hydrogens());
let implicit = molecule
.derived_cache()
.valence
.as_ref()
.and_then(|valence| valence.implicit_hydrogens.get(atom_id.index()))
.copied()
.unwrap_or(0)
.max(0) as u32;
explicit + implicit
}
fn total_valence_for_writer(molecule: &Molecule, atom_id: AtomId) -> Option<i32> {
molecule.derived_cache().valence.as_ref().map(|valence| {
valence.explicit_valence[atom_id.index()] + valence.implicit_hydrogens[atom_id.index()]
})
}
#[must_use]
pub fn in_organic_subset(_atomic_number: u8) -> Result<bool, SmilesWriteError> {
Ok(matches!(
_atomic_number,
0 | 5 | 6 | 7 | 8 | 9 | 15 | 16 | 17 | 35 | 53
))
}
fn write_ring_closure(
smiles: &mut String,
ring_idx: usize,
context: &mut SmilesWriteContext,
) -> Result<(), SmilesWriteError> {
if let Some(digit) = context.ring_closure_digits.get(&ring_idx).copied() {
write_ring_index(smiles, digit);
context.ring_closures_to_erase.push(ring_idx);
return Ok(());
}
let digit = match (1..).find(|candidate| {
!context
.ring_closure_digits
.values()
.any(|digit| digit == candidate)
}) {
Some(d) => d,
None => {
return invariant_stage_error(
SmilesPlanStage::ShortTermBondWriter,
"write_ring_closure() could not allocate a free ring index",
);
}
};
context.ring_closure_digits.insert(ring_idx, digit);
write_ring_index(smiles, digit);
Ok(())
}
fn write_ring_index(smiles: &mut String, digit: usize) {
if digit < 10 {
smiles.push(char::from(b'0' + digit as u8));
} else if digit < 100 {
smiles.push('%');
smiles.push_str(&digit.to_string());
} else {
smiles.push_str("%(");
smiles.push_str(&digit.to_string());
smiles.push(')');
}
}
fn bond_other_atom(bond: &Bond, atom: AtomId) -> Option<AtomId> {
if bond.begin() == atom {
Some(bond.end())
} else if bond.end() == atom {
Some(bond.begin())
} else {
None
}
}
fn element_symbol(atomic_number: u8) -> Result<&'static str, SmilesWriteError> {
match atomic_number {
0 => Ok("*"),
1 => Ok("H"),
2 => Ok("He"),
3 => Ok("Li"),
4 => Ok("Be"),
5 => Ok("B"),
6 => Ok("C"),
7 => Ok("N"),
8 => Ok("O"),
9 => Ok("F"),
10 => Ok("Ne"),
11 => Ok("Na"),
12 => Ok("Mg"),
13 => Ok("Al"),
14 => Ok("Si"),
15 => Ok("P"),
16 => Ok("S"),
17 => Ok("Cl"),
18 => Ok("Ar"),
19 => Ok("K"),
20 => Ok("Ca"),
21 => Ok("Sc"),
22 => Ok("Ti"),
23 => Ok("V"),
24 => Ok("Cr"),
25 => Ok("Mn"),
26 => Ok("Fe"),
27 => Ok("Co"),
28 => Ok("Ni"),
29 => Ok("Cu"),
30 => Ok("Zn"),
31 => Ok("Ga"),
32 => Ok("Ge"),
33 => Ok("As"),
34 => Ok("Se"),
35 => Ok("Br"),
36 => Ok("Kr"),
37 => Ok("Rb"),
38 => Ok("Sr"),
39 => Ok("Y"),
40 => Ok("Zr"),
41 => Ok("Nb"),
42 => Ok("Mo"),
43 => Ok("Tc"),
44 => Ok("Ru"),
45 => Ok("Rh"),
46 => Ok("Pd"),
47 => Ok("Ag"),
48 => Ok("Cd"),
49 => Ok("In"),
50 => Ok("Sn"),
51 => Ok("Sb"),
52 => Ok("Te"),
53 => Ok("I"),
54 => Ok("Xe"),
55 => Ok("Cs"),
56 => Ok("Ba"),
57 => Ok("La"),
58 => Ok("Ce"),
59 => Ok("Pr"),
60 => Ok("Nd"),
61 => Ok("Pm"),
62 => Ok("Sm"),
63 => Ok("Eu"),
64 => Ok("Gd"),
65 => Ok("Tb"),
66 => Ok("Dy"),
67 => Ok("Ho"),
68 => Ok("Er"),
69 => Ok("Tm"),
70 => Ok("Yb"),
71 => Ok("Lu"),
72 => Ok("Hf"),
73 => Ok("Ta"),
74 => Ok("W"),
75 => Ok("Re"),
76 => Ok("Os"),
77 => Ok("Ir"),
78 => Ok("Pt"),
79 => Ok("Au"),
80 => Ok("Hg"),
81 => Ok("Tl"),
82 => Ok("Pb"),
83 => Ok("Bi"),
84 => Ok("Po"),
85 => Ok("At"),
86 => Ok("Rn"),
87 => Ok("Fr"),
88 => Ok("Ra"),
89 => Ok("Ac"),
90 => Ok("Th"),
91 => Ok("Pa"),
92 => Ok("U"),
93 => Ok("Np"),
94 => Ok("Pu"),
95 => Ok("Am"),
96 => Ok("Cm"),
97 => Ok("Bk"),
98 => Ok("Cf"),
99 => Ok("Es"),
100 => Ok("Fm"),
101 => Ok("Md"),
102 => Ok("No"),
103 => Ok("Lr"),
104 => Ok("Rf"),
105 => Ok("Db"),
106 => Ok("Sg"),
107 => Ok("Bh"),
108 => Ok("Hs"),
109 => Ok("Mt"),
110 => Ok("Ds"),
111 => Ok("Rg"),
112 => Ok("Cn"),
113 => Ok("Nh"),
114 => Ok("Fl"),
115 => Ok("Mc"),
116 => Ok("Lv"),
117 => Ok("Ts"),
118 => Ok("Og"),
_ => Ok("?"),
}
}
fn assemble_fragment_smiles(
fragment_results: Vec<FragmentWriteResult>,
params: &SmilesWriteParams,
context: &mut SmilesWriteContext,
) -> Result<String, SmilesWriteError> {
if params.canonical {
let mut sorted = fragment_results;
sorted.sort_by(|left, right| left.smiles.cmp(&right.smiles));
context.atom_output_order.clear();
context.bond_output_order.clear();
for fragment in &sorted {
context
.atom_output_order
.extend(fragment.atom_ordering.iter().copied());
context
.bond_output_order
.extend(fragment.bond_ordering.iter().copied());
}
return Ok(sorted
.into_iter()
.map(|fragment| fragment.smiles)
.collect::<Vec<_>>()
.join("."));
}
context.atom_output_order.clear();
context.bond_output_order.clear();
for fragment in &fragment_results {
context
.atom_output_order
.extend(fragment.atom_ordering.iter().copied());
context
.bond_output_order
.extend(fragment.bond_ordering.iter().copied());
}
Ok(fragment_results
.into_iter()
.map(|fragment| fragment.smiles)
.collect::<Vec<_>>()
.join("."))
}
fn validate_rooted_atom(
molecule: &Molecule,
params: &SmilesWriteParams,
) -> Result<(), SmilesWriteError> {
if let Some(atom) = params.rooted_at_atom
&& atom >= molecule.num_atoms()
{
return Err(SmilesWriteError::RootedAtomOutOfRange { atom });
}
Ok(())
}
fn validate_fragment_api_inputs(
molecule: &Molecule,
params: &SmilesWriteParams,
atoms_to_use: &[usize],
bonds_to_use: Option<&[usize]>,
atom_symbols: Option<&[String]>,
bond_symbols: Option<&[String]>,
) -> Result<(), SmilesWriteError> {
for atom in atoms_to_use {
validate_atom_index(molecule, *atom)?;
}
if let Some(bonds_to_use) = bonds_to_use {
for bond in bonds_to_use {
validate_bond_index(molecule, *bond)?;
}
}
if let Some(root) = params.rooted_at_atom
&& !atoms_to_use.contains(&root)
{
return Err(SmilesWriteError::RootedAtomNotInFragment { atom: root });
}
if bonds_to_use.is_none()
&& let Some(root) = params.rooted_at_atom
{
let fragment_count = crate::notation::fragment::get_fragment_atom_mapping(molecule)
.into_iter()
.max()
.map_or(0, |max_fragment| max_fragment + 1);
if fragment_count > 1 {
return Err(SmilesWriteError::RootedAtomRequiresSingleFragment { atom: root });
}
}
if let Some(atom_symbols) = atom_symbols
&& atom_symbols.len() < molecule.num_atoms()
{
return Err(SmilesWriteError::AtomSymbolsTooShort {
len: atom_symbols.len(),
expected: molecule.num_atoms(),
});
}
if let Some(bond_symbols) = bond_symbols
&& bond_symbols.len() < molecule.num_bonds()
{
return Err(SmilesWriteError::BondSymbolsTooShort {
len: bond_symbols.len(),
expected: molecule.num_bonds(),
});
}
Ok(())
}
fn validate_atom_index(molecule: &Molecule, atom: usize) -> Result<(), SmilesWriteError> {
if atom >= molecule.num_atoms() {
Err(SmilesWriteError::AtomOutOfRange { atom })
} else {
Ok(())
}
}
fn validate_bond_index(molecule: &Molecule, bond: usize) -> Result<(), SmilesWriteError> {
if bond >= molecule.num_bonds() {
Err(SmilesWriteError::BondOutOfRange { bond })
} else {
Ok(())
}
}
fn invariant_stage_error<T>(
stage: SmilesPlanStage,
message: &'static str,
) -> Result<T, SmilesWriteError> {
Err(SmilesWriteError::InvariantViolation {
stage: stage.as_str(),
message,
})
}
#[cfg(test)]
mod tests;