use std::collections::BTreeMap;
use crate::{AdjacencyList, AtomId, BondOrder, ChiralTag, Molecule, NeighborRef};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct MorganFingerprintParams {
pub radius: u32,
pub n_bits: usize,
pub use_chirality: bool,
pub use_bond_types: bool,
pub count_simulation: bool,
pub count_bounds: Vec<u32>,
pub only_nonzero_invariants: bool,
pub include_ring_membership: bool,
pub include_redundant_environments: bool,
pub from_atoms: Option<Vec<usize>>,
pub ignore_atoms: Option<Vec<usize>>,
pub custom_atom_invariants: Option<Vec<u32>>,
pub custom_bond_invariants: Option<Vec<u32>>,
pub atom_invariants_generator: MorganAtomInvariantsGenerator,
pub bond_invariants_generator: Option<MorganBondInvariantsGenerator>,
pub num_bits_per_feature: u32,
pub collect_additional_output: bool,
}
impl Default for MorganFingerprintParams {
fn default() -> Self {
Self {
radius: 2,
n_bits: 2048,
use_chirality: false,
use_bond_types: true,
count_simulation: false,
count_bounds: vec![1, 2, 4, 8],
only_nonzero_invariants: false,
include_ring_membership: true,
include_redundant_environments: false,
from_atoms: None,
ignore_atoms: None,
custom_atom_invariants: None,
custom_bond_invariants: None,
atom_invariants_generator: MorganAtomInvariantsGenerator::Connectivity {
include_ring_membership: true,
},
bond_invariants_generator: None,
num_bits_per_feature: 1,
collect_additional_output: false,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum MorganAtomInvariantsGenerator {
Connectivity { include_ring_membership: bool },
Feature,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct MorganBondInvariantsGenerator {
pub use_bond_types: bool,
pub use_chirality: bool,
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct MorganAdditionalOutput {
pub atom_counts: Vec<u32>,
pub atom_to_bits: Vec<Vec<usize>>,
pub bit_info_map: BTreeMap<usize, Vec<(usize, u32)>>,
pub atoms_per_bit: BTreeMap<usize, Vec<Vec<usize>>>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct MorganFingerprintOutput {
pub fingerprint: Fingerprint,
pub additional_output: Option<MorganAdditionalOutput>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Fingerprint {
bits: Vec<u64>,
n_bits: usize,
}
impl Fingerprint {
#[must_use]
pub fn from_on_bits(n_bits: usize, on_bits: impl IntoIterator<Item = usize>) -> Self {
let mut bits = vec![0; n_bits.div_ceil(64)];
for bit in on_bits {
assert!(
bit < n_bits,
"fingerprint bit {bit} is outside n_bits={n_bits}"
);
bits[bit / 64] |= 1u64 << (bit % 64);
}
Self { bits, n_bits }
}
#[must_use]
pub fn n_bits(&self) -> usize {
self.n_bits
}
#[must_use]
pub fn on_bits(&self) -> Vec<usize> {
let mut out = Vec::new();
for (word_idx, word) in self.bits.iter().copied().enumerate() {
let mut remaining = word;
while remaining != 0 {
let offset = remaining.trailing_zeros() as usize;
let bit = word_idx * 64 + offset;
if bit < self.n_bits {
out.push(bit);
}
remaining &= remaining - 1;
}
}
out
}
pub fn tanimoto(&self, other: &Self) -> Result<f64, FingerprintError> {
if self.n_bits != other.n_bits {
return Err(FingerprintError::BitLengthMismatch {
left: self.n_bits,
right: other.n_bits,
});
}
let mut intersection = 0u32;
let mut union = 0u32;
for (left, right) in self.bits.iter().zip(&other.bits) {
intersection += (left & right).count_ones();
union += (left | right).count_ones();
}
Ok(if union == 0 {
0.0
} else {
f64::from(intersection) / f64::from(union)
})
}
}
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
pub enum FingerprintError {
#[error("Morgan fingerprint requires n_bits > 0")]
EmptyFingerprint,
#[error(transparent)]
UnsupportedFeature(#[from] crate::UnsupportedFeatureError),
#[error("unsupported Morgan fingerprint option {option}: {reason}")]
UnsupportedOption {
option: &'static str,
reason: &'static str,
},
#[error("fingerprint bit length mismatch: {left} != {right}")]
BitLengthMismatch { left: usize, right: usize },
}
pub fn morgan_fingerprint(
molecule: &Molecule,
params: &MorganFingerprintParams,
) -> Result<Fingerprint, FingerprintError> {
let output = morgan_fingerprint_with_output(molecule, params)?;
Ok(output.fingerprint)
}
pub fn morgan_fingerprint_with_output(
molecule: &Molecule,
params: &MorganFingerprintParams,
) -> Result<MorganFingerprintOutput, FingerprintError> {
validate_morgan_params(params)?;
if params.n_bits == 0 {
return Err(FingerprintError::EmptyFingerprint);
}
if molecule.num_atoms() == 0 {
return Ok(MorganFingerprintOutput {
fingerprint: Fingerprint::from_on_bits(params.n_bits, []),
additional_output: if params.collect_additional_output {
Some(MorganAdditionalOutput::default())
} else {
None
},
});
}
let adjacency = molecule.topology_block().adjacency.clone();
let mut invariants = compute_initial_invariants(molecule, &adjacency, params)?;
let mut all_rounds: Vec<Vec<u32>> = Vec::with_capacity(params.radius as usize + 1);
all_rounds.push(invariants.clone());
for round in 0..params.radius {
let prev = invariants.clone();
for i in 0..molecule.num_atoms() {
if atom_is_excluded(i, params) {
continue;
}
let mut invar = round as u32;
hash_combine(&mut invar, prev[i]);
let mut neighbor_pairs: Vec<(u32, u32)> = adjacency
.neighbors_of(i)
.iter()
.filter(|n| !atom_is_excluded(n.atom_index, params))
.map(|n| {
let bond_idx = n.bond.index();
let bt = morgan_bond_invariant(bond_idx, &molecule.bonds()[bond_idx], params);
(bt, prev[n.atom_index])
})
.collect();
neighbor_pairs.sort_unstable();
for &(bt, n_inv) in &neighbor_pairs {
let mut pair_hash = 0u32;
hash_combine(&mut pair_hash, bt);
hash_combine(&mut pair_hash, n_inv);
hash_combine(&mut invar, pair_hash);
}
if params.use_chirality {
let atom = &molecule.atoms()[i];
match atom.chiral_tag() {
ChiralTag::TetrahedralCw | ChiralTag::TetrahedralCcw => {
let perm = atom.chiral_permutation().unwrap_or(0);
let is_r = matches!(
(atom.chiral_tag(), perm % 2),
(ChiralTag::TetrahedralCw, 0) | (ChiralTag::TetrahedralCcw, 1)
);
if is_r {
hash_combine(&mut invar, 3u32); } else {
hash_combine(&mut invar, 2u32); }
}
ChiralTag::Tetrahedral => {
hash_combine(&mut invar, 1u32); }
_ => {}
}
}
invariants[i] = invar;
}
all_rounds.push(invariants.clone());
}
build_fingerprint(molecule, &all_rounds, params)
}
#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
fn compute_connectivity_invariants(
molecule: &Molecule,
adjacency: &AdjacencyList,
params: &MorganFingerprintParams,
) -> Vec<u32> {
let num_atoms = molecule.num_atoms();
let ring_info = if params.include_ring_membership {
molecule.derived_cache().rings.as_ref()
} else {
None
};
let valence = molecule.derived_cache().valence.as_ref();
let mut invariants = Vec::with_capacity(num_atoms);
for i in 0..num_atoms {
let atom = &molecule.atoms()[i];
let degree = adjacency.neighbors_of(i).len() as u32;
let implicit_hs = valence
.and_then(|v| v.implicit_hydrogens.get(i).copied())
.unwrap_or(0) as u32;
let total_degree = degree + implicit_hs;
let total_hs = atom.explicit_hydrogens() as u32 + implicit_hs;
let delta_mass: u32 = 0;
let mut components: Vec<u32> = Vec::with_capacity(6);
components.push(atom.atomic_number() as u32);
components.push(total_degree);
components.push(total_hs);
components.push(atom.formal_charge() as u32);
components.push(delta_mass);
if let Some(rings) = ring_info {
if rings.num_atom_rings(AtomId::new(i)) > 0 {
components.push(1);
}
}
let mut inv = 0u32;
for &c in &components {
hash_combine(&mut inv, c);
}
invariants.push(inv);
}
invariants
}
fn compute_feature_invariants(molecule: &Molecule, adjacency: &AdjacencyList) -> Vec<u32> {
let num_atoms = molecule.num_atoms();
let valence = molecule.derived_cache().valence.as_ref();
let ring_info = molecule.derived_cache().rings.as_ref();
let mut invariants = vec![0u32; num_atoms];
for i in 0..num_atoms {
let atom = &molecule.atoms()[i];
let z = atom.atomic_number();
let fc = atom.formal_charge();
let is_aro = atom.is_aromatic();
let implicit_hs = valence
.and_then(|v| v.implicit_hydrogens.get(i).copied())
.unwrap_or(0) as u32;
let explicit_hs = atom.explicit_hydrogens() as u32;
let total_hs = explicit_hs + implicit_hs;
let degree = adjacency.neighbors_of(i).len();
let n_rings = ring_info.map_or(0, |r| r.num_atom_rings(AtomId::new(i)));
let mut mask = 0u32;
let is_donor = match z {
7 => total_hs > 0 && (degree == 3 || degree == 4) && (fc == 0 || fc == 1),
8 | 16 => total_hs == 1 && fc == 0,
_ => false,
};
if is_donor {
mask |= 1 << 0;
}
let is_acceptor = match z {
7 => total_hs == 0 && degree <= 3 && n_rings == 0,
8 | 16 => (total_hs == 0 || fc < 0) && degree == 2,
_ => false,
};
if is_acceptor {
mask |= 1 << 1;
}
if is_aro {
mask |= 1 << 2;
}
if matches!(z, 9 | 17 | 35 | 53) {
mask |= 1 << 3;
}
let is_basic = match z {
7 => fc > 0 || (total_hs == 2 && degree == 3),
_ => false,
};
if is_basic {
mask |= 1 << 4;
}
let is_acidic = match z {
6 | 16 => {
adjacency.neighbors_of(i).iter().any(|n| {
let nbor = &molecule.atoms()[n.atom_index];
if nbor.atomic_number() == 8 || nbor.atomic_number() == 16 {
let bond = &molecule.bonds()[n.bond.index()];
bond.order() == crate::BondOrder::Double && (total_hs == 0 || fc < 0)
} else {
false
}
})
}
_ => false,
};
if is_acidic {
mask |= 1 << 5;
}
invariants[i] = mask;
}
invariants
}
fn compute_initial_invariants(
molecule: &Molecule,
adjacency: &AdjacencyList,
params: &MorganFingerprintParams,
) -> Result<Vec<u32>, FingerprintError> {
let invariants = match ¶ms.atom_invariants_generator {
MorganAtomInvariantsGenerator::Connectivity { .. } => {
compute_connectivity_invariants(molecule, adjacency, params)
}
MorganAtomInvariantsGenerator::Feature => compute_feature_invariants(molecule, adjacency),
};
if let Some(custom) = ¶ms.custom_atom_invariants {
let mut overridden = invariants;
for (i, inv) in overridden.iter_mut().enumerate() {
if let Some(c) = custom.get(i) {
*inv = *c;
}
}
Ok(overridden)
} else {
Ok(invariants)
}
}
fn validate_morgan_params(params: &MorganFingerprintParams) -> Result<(), FingerprintError> {
Ok(())
}
fn morgan_bond_invariant(
bond_idx: usize,
bond: &crate::Bond,
params: &MorganFingerprintParams,
) -> u32 {
if let Some(custom) = ¶ms.custom_bond_invariants {
if let Some(&inv) = custom.get(bond_idx) {
return inv;
}
}
let use_bond_types = params
.bond_invariants_generator
.as_ref()
.map_or(params.use_bond_types, |generator| generator.use_bond_types);
if !use_bond_types {
return 0;
}
match bond.order() {
crate::BondOrder::Single => 1,
crate::BondOrder::Double => 2,
crate::BondOrder::Triple => 3,
crate::BondOrder::Quadruple => 4,
crate::BondOrder::Aromatic => 12,
crate::BondOrder::Dative => 9,
crate::BondOrder::Zero | crate::BondOrder::Unspecified | crate::BondOrder::Null => 0,
_ => 0,
}
}
pub(crate) fn hash_combine(seed: &mut u32, value: u32) {
*seed = seed
.wrapping_add(value)
.wrapping_add(0x9e3779b9u32)
.wrapping_add(seed.wrapping_shl(6))
.wrapping_add(seed.wrapping_shr(2));
}
fn fold_invariant(invariant: u32, n_bits: usize) -> usize {
invariant as usize % n_bits
}
fn build_fingerprint(
molecule: &Molecule,
all_rounds: &[Vec<u32>],
params: &MorganFingerprintParams,
) -> Result<MorganFingerprintOutput, FingerprintError> {
let n_bits = params.n_bits;
let collect = params.collect_additional_output;
let mut atom_counts = if collect {
vec![0u32; molecule.num_atoms()]
} else {
vec![]
};
let mut bit_info_map: BTreeMap<usize, Vec<(usize, u32)>> = BTreeMap::new();
let mut atoms_per_bit: BTreeMap<usize, Vec<Vec<usize>>> = BTreeMap::new();
let mut on_bits = Vec::new();
let mut seen_invariants: std::collections::HashSet<u32> = std::collections::HashSet::new();
for (round_idx, round_invs) in all_rounds.iter().enumerate() {
let round = round_idx as u32;
for atom_idx in 0..molecule.num_atoms() {
if atom_is_excluded(atom_idx, params) {
continue;
}
let inv = round_invs[atom_idx];
if !params.include_redundant_environments && !seen_invariants.insert(inv) {
continue;
}
if params.only_nonzero_invariants && inv == 0 {
continue;
}
if params.count_simulation {
let bit = fold_invariant(inv, n_bits);
on_bits.push(bit);
if collect {
atom_counts[atom_idx] += 1;
bit_info_map.entry(bit).or_default().push((atom_idx, round));
atoms_per_bit.entry(bit).or_default().push(vec![atom_idx]);
}
} else {
for chunk in 0..params.num_bits_per_feature {
let bit =
fold_invariant(inv.wrapping_add(chunk.wrapping_mul(0x517cc1b7)), n_bits);
on_bits.push(bit);
if collect {
atom_counts[atom_idx] += 1;
bit_info_map.entry(bit).or_default().push((atom_idx, round));
atoms_per_bit.entry(bit).or_default().push(vec![atom_idx]);
}
}
}
}
}
if params.count_simulation && !params.count_bounds.is_empty() {
let mut counts_per_bit: BTreeMap<usize, u32> = BTreeMap::new();
for &bit in &on_bits {
*counts_per_bit.entry(bit).or_insert(0) += 1;
}
on_bits.clear();
for (&bit, &count) in &counts_per_bit {
on_bits.push(bit);
for (bound_idx, &bound) in params.count_bounds.iter().enumerate().skip(1) {
if count >= bound {
let offset_bit = (bit + bound_idx * n_bits) % n_bits;
on_bits.push(offset_bit);
}
}
}
}
let fingerprint = Fingerprint::from_on_bits(n_bits, on_bits.iter().copied());
let additional_output = if collect {
let mut atom_to_bits: Vec<Vec<usize>> = vec![vec![]; molecule.num_atoms()];
for (&bit, entries) in &bit_info_map {
for &(atom_idx, _) in entries {
if !atom_to_bits[atom_idx].contains(&bit) {
atom_to_bits[atom_idx].push(bit);
}
}
}
Some(MorganAdditionalOutput {
atom_counts,
atom_to_bits,
bit_info_map,
atoms_per_bit,
})
} else {
None
};
Ok(MorganFingerprintOutput {
fingerprint,
additional_output,
})
}
fn atom_is_excluded(index: usize, params: &MorganFingerprintParams) -> bool {
if let Some(from) = ¶ms.from_atoms {
return !from.contains(&index);
}
if let Some(ignore) = ¶ms.ignore_atoms {
return ignore.contains(&index);
}
false
}
#[derive(Debug, Clone)]
pub struct TopologicalFingerprintParams {
pub min_path: u32,
pub max_path: u32,
pub n_bits: usize,
pub n_bits_per_hash: u32,
pub use_bond_types: bool,
pub from_atoms: Option<Vec<usize>>,
pub ignore_atoms: Option<Vec<usize>>,
}
impl Default for TopologicalFingerprintParams {
fn default() -> Self {
Self {
min_path: 1,
max_path: 7,
n_bits: 2048,
n_bits_per_hash: 2,
use_bond_types: true,
from_atoms: None,
ignore_atoms: None,
}
}
}
#[must_use]
pub fn topological_fingerprint(
molecule: &Molecule,
params: &TopologicalFingerprintParams,
) -> Fingerprint {
let n_bits = params.n_bits;
if molecule.num_atoms() == 0 || n_bits == 0 {
return Fingerprint::from_on_bits(n_bits, []);
}
let adjacency = molecule.topology_block().adjacency.clone();
let num_atoms = molecule.num_atoms();
let atoms = molecule.atoms();
let bonds = molecule.bonds();
let is_excluded = |idx: usize| -> bool {
if let Some(from) = ¶ms.from_atoms {
return !from.contains(&idx);
}
if let Some(ignore) = ¶ms.ignore_atoms {
return ignore.contains(&idx);
}
false
};
let atomic_numbers: Vec<u8> = atoms.iter().map(|a| a.atomic_number()).collect();
let bond_order_values: Vec<u32> = if params.use_bond_types {
bonds
.iter()
.map(|b| match b.order() {
crate::BondOrder::Single => 1,
crate::BondOrder::Double => 2,
crate::BondOrder::Triple => 3,
crate::BondOrder::Quadruple => 4,
crate::BondOrder::Aromatic => 12,
crate::BondOrder::Dative => 9,
_ => 0,
})
.collect()
} else {
bonds.iter().map(|_| 1u32).collect()
};
let mut on_bits_set: std::collections::HashSet<usize> = std::collections::HashSet::new();
for start_atom in 0..num_atoms {
if is_excluded(start_atom) {
continue;
}
let mut stack: Vec<(usize, Vec<usize>, Vec<u32>, u32)> = Vec::new();
stack.push((start_atom, vec![start_atom], vec![], 0));
while let Some((current, path_atoms, path_bonds, depth)) = stack.pop() {
let path_len_bonds = path_atoms.len() as u32 - 1;
if path_len_bonds >= params.min_path && path_len_bonds <= params.max_path {
let mut invariant: u32 = atomic_numbers[start_atom] as u32;
for (k, &bond_val) in path_bonds.iter().enumerate() {
let next_idx = path_atoms[k + 1];
let mut pair_hash: u32 = 0;
hash_combine(&mut pair_hash, bond_val);
hash_combine(&mut pair_hash, atomic_numbers[next_idx] as u32);
hash_combine(&mut invariant, pair_hash);
}
for chunk in 0..params.n_bits_per_hash {
let bit = if chunk == 0 {
invariant as usize % n_bits
} else {
invariant.wrapping_add(chunk.wrapping_mul(0x517cc1b7)) as usize % n_bits
};
on_bits_set.insert(bit);
}
}
if path_len_bonds >= params.max_path {
continue;
}
for neighbor in adjacency.neighbors_of(current) {
let n_idx = neighbor.atom_index;
if path_atoms.contains(&n_idx) {
continue;
}
if is_excluded(n_idx) {
continue;
}
let bond_val = bond_order_values[neighbor.bond.index()];
let mut new_path_atoms = path_atoms.clone();
new_path_atoms.push(n_idx);
let mut new_path_bonds = path_bonds.clone();
new_path_bonds.push(bond_val);
stack.push((n_idx, new_path_atoms, new_path_bonds, depth + 1));
}
}
}
let on_bits: Vec<usize> = on_bits_set.into_iter().collect();
Fingerprint::from_on_bits(n_bits, on_bits)
}
#[derive(Debug, Clone)]
pub struct MaccsFingerprintParams {
pub n_bits: usize,
}
impl Default for MaccsFingerprintParams {
fn default() -> Self {
Self { n_bits: 166 }
}
}
#[must_use]
pub fn maccs_fingerprint(molecule: &Molecule, params: &MaccsFingerprintParams) -> Fingerprint {
let n_bits = params.n_bits;
if molecule.num_atoms() == 0 || n_bits == 0 {
return Fingerprint::from_on_bits(n_bits, []);
}
let atoms = molecule.atoms();
let bonds = molecule.bonds();
let num_atoms = molecule.num_atoms();
let num_bonds = molecule.num_bonds();
let adjacency = molecule.topology_block().adjacency.clone();
let ring_info = molecule.derived_cache().rings.as_ref();
let atomic_numbers: Vec<u8> = atoms.iter().map(|a| a.atomic_number()).collect();
let charges: Vec<i8> = atoms.iter().map(|a| a.formal_charge()).collect();
let aromatic_atoms: Vec<bool> = atoms.iter().map(|a| a.is_aromatic()).collect();
let has_element = |target_z: u8| -> bool { atomic_numbers.iter().any(|&z| z == target_z) };
let count_element =
|target_z: u8| -> usize { atomic_numbers.iter().filter(|&&z| z == target_z).count() };
let bond_orders: Vec<BondOrder> = bonds.iter().map(|b| b.order()).collect();
let bond_aromatic: Vec<bool> = bonds.iter().map(|b| b.is_aromatic()).collect();
let has_bond_order = |order: BondOrder| -> bool { bond_orders.iter().any(|&o| o == order) };
let has_double_bond = || -> bool {
bond_orders
.iter()
.any(|o| *o == BondOrder::Double || *o == BondOrder::Aromatic)
};
let degree = |atom_idx: usize| -> usize { adjacency.neighbors_of(atom_idx).len() };
let atom_in_ring = |atom_idx: usize| -> bool {
ring_info.map_or(false, |r| r.num_atom_rings(AtomId::new(atom_idx)) > 0)
};
let has_aromatic_bond = || -> bool { bond_aromatic.iter().any(|&a| a) };
let has_neighbor_z = |atom_idx: usize, target_z: u8| -> bool {
adjacency
.neighbors_of(atom_idx)
.iter()
.any(|n| atomic_numbers[n.atom_index] == target_z)
};
let find_atom = |pred: fn(&[u8], &[i8], usize) -> bool| -> Option<usize> {
(0..num_atoms).find(|&i| pred(&atomic_numbers, &charges, i))
};
let mut on_bits: Vec<usize> = Vec::new();
if has_element(104) {
on_bits.push(1);
}
for &z in &[32, 33, 34, 50, 51, 52, 82, 83, 84] {
if has_element(z) {
on_bits.push(2);
break;
}
}
for z in 89u8..=103 {
if has_element(z) {
on_bits.push(3);
break;
}
}
for &z in &[21, 22, 39, 40, 72] {
if has_element(z) {
on_bits.push(4);
break;
}
}
for z in 57u8..=71 {
if has_element(z) {
on_bits.push(5);
break;
}
}
for &z in &[23, 24, 25, 41, 42, 43, 73, 74, 75] {
if has_element(z) {
on_bits.push(6);
break;
}
}
{
let mut found = false;
if let Some(ri) = ring_info {
for ring_idx in 0..ri.atom_rings().len() {
if ri.atom_rings()[ring_idx].len() == 4 {
for a in ri.atom_rings()[ring_idx].iter() {
let z = atomic_numbers[a.index()];
if z != 6 && z != 1 {
found = true;
break;
}
}
}
if found {
break;
}
}
}
if found {
on_bits.push(7);
}
}
for &z in &[26, 27, 28, 44, 45, 46, 76, 77, 78] {
if has_element(z) {
on_bits.push(8);
break;
}
}
for &z in &[4, 12, 20, 38, 56, 88] {
if has_element(z) {
on_bits.push(9);
break;
}
}
{
let mut found = false;
if let Some(ri) = ring_info {
for ring_idx in 0..ri.atom_rings().len() {
if ri.atom_rings()[ring_idx].len() == 4 {
found = true;
break;
}
}
}
if found {
on_bits.push(10);
}
}
for &z in &[29, 30, 47, 48, 79, 80] {
if has_element(z) {
on_bits.push(11);
break;
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 8 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 7 {
let c_neighbors_of_n: Vec<usize> = adjacency
.neighbors_of(n.atom_index)
.iter()
.filter(|nn| atomic_numbers[nn.atom_index] == 6)
.map(|nn| nn.atom_index)
.collect();
if c_neighbors_of_n.len() >= 2 {
found = true;
break;
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(12);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 16 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 16 {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order == BondOrder::Single {
found = true;
break;
}
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(13);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
let o_neighbors: Vec<usize> = adjacency
.neighbors_of(i)
.iter()
.filter(|n| atomic_numbers[n.atom_index] == 8)
.map(|n| n.atom_index)
.collect();
if o_neighbors.len() >= 3 {
found = true;
break;
}
}
}
if found {
on_bits.push(14);
}
}
{
let mut found = false;
if let Some(ri) = ring_info {
for ring_idx in 0..ri.atom_rings().len() {
if ri.atom_rings()[ring_idx].len() == 3 {
for a in ri.atom_rings()[ring_idx].iter() {
let z = atomic_numbers[a.index()];
if z != 6 && z != 1 {
found = true;
break;
}
}
}
if found {
break;
}
}
}
if found {
on_bits.push(15);
}
}
if has_bond_order(BondOrder::Triple) {
on_bits.push(16);
}
for &z in &[5, 13, 31, 49, 81] {
if has_element(z) {
on_bits.push(17);
break;
}
}
{
let mut found = false;
if let Some(ri) = ring_info {
for ring_idx in 0..ri.atom_rings().len() {
if ri.atom_rings()[ring_idx].len() == 7 {
found = true;
break;
}
}
}
if found {
on_bits.push(18);
}
}
if has_element(14) {
on_bits.push(19);
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 6 {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order == BondOrder::Double || *order == BondOrder::Aromatic {
let i_het: Vec<usize> = adjacency
.neighbors_of(i)
.iter()
.filter(|nn| {
let z = atomic_numbers[nn.atom_index];
nn.atom_index != n.atom_index && z != 6 && z != 1
})
.map(|nn| nn.atom_index)
.collect();
let n_het: Vec<usize> = adjacency
.neighbors_of(n.atom_index)
.iter()
.filter(|nn| {
let z = atomic_numbers[nn.atom_index];
nn.atom_index != i && z != 6 && z != 1
})
.map(|nn| nn.atom_index)
.collect();
if !i_het.is_empty() && !n_het.is_empty() {
found = true;
break;
}
}
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(20);
}
}
{
let mut found = false;
if let Some(ri) = ring_info {
for ring_idx in 0..ri.atom_rings().len() {
if ri.atom_rings()[ring_idx].len() == 3 {
found = true;
break;
}
}
}
if found {
on_bits.push(21);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 6 {
let o_neighbors_of_c: Vec<usize> = adjacency
.neighbors_of(n.atom_index)
.iter()
.filter(|nn| atomic_numbers[nn.atom_index] == 8)
.map(|nn| nn.atom_index)
.collect();
if o_neighbors_of_c.len() >= 2 {
found = true;
break;
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(22);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 8 {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(23);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 6 {
let n_neighbors_of_c: Vec<usize> = adjacency
.neighbors_of(n.atom_index)
.iter()
.filter(|nn| atomic_numbers[nn.atom_index] == 7)
.map(|nn| nn.atom_index)
.collect();
if n_neighbors_of_c.len() >= 3 {
found = true;
break;
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(24);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 && atom_in_ring(i) {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 6 && atom_in_ring(n.atom_index) {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order == BondOrder::Double || *order == BondOrder::Aromatic {
found = true;
break;
}
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(25);
}
}
if has_element(53) {
on_bits.push(26);
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
let het_neighbors: Vec<usize> = adjacency
.neighbors_of(i)
.iter()
.filter(|n| {
let z = atomic_numbers[n.atom_index];
z != 6 && z != 1
})
.map(|n| n.atom_index)
.collect();
if het_neighbors.len() >= 2 {
found = true;
break;
}
}
}
if found {
on_bits.push(27);
}
}
if has_element(15) {
on_bits.push(28);
}
{
let mut found = false;
for i in 0..num_atoms {
let z = atomic_numbers[i];
if z != 6 && z != 1 {
let c_neighbors: usize = adjacency
.neighbors_of(i)
.iter()
.filter(|n| atomic_numbers[n.atom_index] == 6)
.count();
let total_neighbors = adjacency.neighbors_of(i).len();
if c_neighbors >= 2 && total_neighbors >= 3 {
found = true;
break;
}
}
}
if found {
on_bits.push(29);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z = atomic_numbers[i];
if z != 6 && z != 1 {
for n in adjacency.neighbors_of(i) {
let nz = atomic_numbers[n.atom_index];
if nz == 9 || nz == 17 || nz == 35 || nz == 53 {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(30);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 16 {
for nn in adjacency.neighbors_of(n.atom_index) {
if nn.atom_index != i && atomic_numbers[nn.atom_index] == 7 {
found = true;
break;
}
}
}
if found {
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(31);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 16 {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(32);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
let neighbors = adjacency.neighbors_of(i);
if neighbors.len() == 1 {
if let Some(order) = bond_orders.get(neighbors[0].bond.index()) {
if *order == BondOrder::Double || *order == BondOrder::Aromatic {
found = true;
break;
}
}
}
}
}
if found {
on_bits.push(33);
}
}
for &z in &[3, 11, 19, 37, 55, 87] {
if has_element(z) {
on_bits.push(34);
break;
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 16 && atom_in_ring(i) {
found = true;
break;
}
}
if found {
on_bits.push(35);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
let n_neighbors: Vec<usize> = adjacency
.neighbors_of(i)
.iter()
.filter(|n| atomic_numbers[n.atom_index] == 7)
.map(|n| n.atom_index)
.collect();
let o_neighbors: Vec<usize> = adjacency
.neighbors_of(i)
.iter()
.filter(|n| atomic_numbers[n.atom_index] == 8)
.map(|n| n.atom_index)
.collect();
if n_neighbors.len() >= 2 && !o_neighbors.is_empty() {
found = true;
break;
}
}
}
if found {
on_bits.push(36);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
let n_neighbors: Vec<usize> = adjacency
.neighbors_of(i)
.iter()
.filter(|n| atomic_numbers[n.atom_index] == 7)
.map(|n| n.atom_index)
.collect();
let c_neighbors: Vec<usize> = adjacency
.neighbors_of(i)
.iter()
.filter(|n| atomic_numbers[n.atom_index] == 6)
.map(|n| n.atom_index)
.collect();
if n_neighbors.len() >= 2 && !c_neighbors.is_empty() {
found = true;
break;
}
}
}
if found {
on_bits.push(37);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 16 {
let o_neighbors: Vec<usize> = adjacency
.neighbors_of(i)
.iter()
.filter(|n| atomic_numbers[n.atom_index] == 8)
.map(|n| n.atom_index)
.collect();
if o_neighbors.len() >= 3 {
found = true;
break;
}
}
}
if found {
on_bits.push(38);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 16 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 8 {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order == BondOrder::Single {
found = true;
break;
}
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(39);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 7 {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order == BondOrder::Triple {
found = true;
break;
}
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(40);
}
}
if has_element(9) {
on_bits.push(41);
}
{
let mut found = false;
for i in 0..num_atoms {
let z_i = atomic_numbers[i];
if z_i == 1 || z_i == 6 {
continue;
}
let has_h = has_hydrogens(i, &adjacency, &atomic_numbers);
if !has_h {
continue;
}
for n in adjacency.neighbors_of(i) {
for nn in adjacency.neighbors_of(n.atom_index) {
if nn.atom_index == i {
continue;
}
let z_nn = atomic_numbers[nn.atom_index];
if z_nn == 1 || z_nn == 6 {
continue;
}
let has_h_nn = has_hydrogens(nn.atom_index, &adjacency, &atomic_numbers);
if has_h_nn && n.atom_index != nn.atom_index {
found = true;
break;
}
}
if found {
break;
}
}
if found {
break;
}
}
if found {
on_bits.push(42);
}
}
{
let mut found = false;
for &z in &atomic_numbers {
if z != 1
&& z != 6
&& z != 7
&& z != 8
&& z != 9
&& z != 14
&& z != 15
&& z != 16
&& z != 17
&& z != 35
&& z != 53
{
found = true;
break;
}
}
if found {
on_bits.push(43);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 6 {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order == BondOrder::Double || *order == BondOrder::Aromatic {
for nn in adjacency.neighbors_of(n.atom_index) {
if nn.atom_index != i && atomic_numbers[nn.atom_index] == 7 {
found = true;
break;
}
}
}
}
}
if found {
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(44);
}
}
if has_element(35) {
on_bits.push(45);
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 16 {
for n in adjacency.neighbors_of(i) {
for nn in adjacency.neighbors_of(n.atom_index) {
if nn.atom_index != i && atomic_numbers[nn.atom_index] == 7 {
found = true;
break;
}
}
if found {
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(46);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z = atomic_numbers[i];
if z != 6 && z != 1 {
let o_neighbors: Vec<usize> = adjacency
.neighbors_of(i)
.iter()
.filter(|n| atomic_numbers[n.atom_index] == 8)
.map(|n| n.atom_index)
.collect();
if o_neighbors.len() >= 3 {
found = true;
break;
}
}
}
if found {
on_bits.push(47);
}
}
if charges.iter().any(|&c| c != 0) {
on_bits.push(48);
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 6 {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order == BondOrder::Double || *order == BondOrder::Aromatic {
let i_c_neighbors = adjacency
.neighbors_of(i)
.iter()
.filter(|nn| {
nn.atom_index != n.atom_index
&& atomic_numbers[nn.atom_index] == 6
})
.count();
let n_c_neighbors = adjacency
.neighbors_of(n.atom_index)
.iter()
.filter(|nn| {
nn.atom_index != i && atomic_numbers[nn.atom_index] == 6
})
.count();
if i_c_neighbors >= 1 && n_c_neighbors >= 1 {
found = true;
break;
}
}
}
}
if found {
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(49);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 16 {
for nn in adjacency.neighbors_of(n.atom_index) {
if nn.atom_index != i && atomic_numbers[nn.atom_index] == 8 {
found = true;
break;
}
}
}
if found {
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(50);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 7 {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(51);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z_i = atomic_numbers[i];
if z_i == 1 || z_i == 6 {
continue;
}
let has_h_i = has_hydrogens(i, &adjacency, &atomic_numbers);
if !has_h_i {
continue;
}
if has_heteroatom_at_distance(i, 3, &adjacency, &atomic_numbers, &charges, true, i) {
found = true;
break;
}
}
if found {
on_bits.push(52);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z_i = atomic_numbers[i];
if z_i == 1 || z_i == 6 {
continue;
}
let has_h_i = has_hydrogens(i, &adjacency, &atomic_numbers);
if !has_h_i {
continue;
}
if has_heteroatom_at_distance(i, 2, &adjacency, &atomic_numbers, &charges, true, i) {
found = true;
break;
}
}
if found {
on_bits.push(53);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 16 {
let o_neighbors: Vec<usize> = adjacency
.neighbors_of(i)
.iter()
.filter(|n| atomic_numbers[n.atom_index] == 8)
.map(|n| n.atom_index)
.collect();
if o_neighbors.len() >= 2 {
found = true;
break;
}
}
}
if found {
on_bits.push(54);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
let o_neighbors: Vec<usize> = adjacency
.neighbors_of(i)
.iter()
.filter(|n| atomic_numbers[n.atom_index] == 8)
.map(|n| n.atom_index)
.collect();
let c_neighbors: Vec<usize> = adjacency
.neighbors_of(i)
.iter()
.filter(|n| atomic_numbers[n.atom_index] == 6)
.map(|n| n.atom_index)
.collect();
if o_neighbors.len() >= 2 && !c_neighbors.is_empty() {
found = true;
break;
}
}
}
if found {
on_bits.push(55);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 8 && atom_in_ring(i) {
found = true;
break;
}
}
if found {
on_bits.push(56);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 16 {
let het_neighbors: Vec<usize> = adjacency
.neighbors_of(i)
.iter()
.filter(|n| {
let z = atomic_numbers[n.atom_index];
z != 6 && z != 1
})
.map(|n| n.atom_index)
.collect();
if het_neighbors.len() >= 2 {
found = true;
break;
}
}
}
if found {
on_bits.push(57);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 16 {
for n in adjacency.neighbors_of(i) {
if aromatic_atoms[n.atom_index] {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order != BondOrder::Aromatic {
found = true;
break;
}
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(58);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 16 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 8 {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order == BondOrder::Double || *order == BondOrder::Aromatic {
found = true;
break;
}
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(59);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 16 && adjacency.neighbors_of(i).len() >= 3 {
found = true;
break;
}
}
if found {
on_bits.push(60);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atom_in_ring(i) {
let ring_nbrs: Vec<&NeighborRef> = adjacency
.neighbors_of(i)
.iter()
.filter(|n| atom_in_ring(n.atom_index))
.collect();
let non_ring_nbrs: Vec<&NeighborRef> = adjacency
.neighbors_of(i)
.iter()
.filter(|n| !atom_in_ring(n.atom_index))
.collect();
if !ring_nbrs.is_empty() && !non_ring_nbrs.is_empty() {
found = true;
break;
}
}
}
if found {
on_bits.push(61);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 8 {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order == BondOrder::Double || *order == BondOrder::Aromatic {
found = true;
break;
}
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(62);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 16 && atom_in_ring(i) {
for n in adjacency.neighbors_of(i) {
if !atom_in_ring(n.atom_index) {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(63);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 && aromatic_atoms[i] {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 7 && aromatic_atoms[n.atom_index] {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(64);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
let c_neighbors: usize = adjacency
.neighbors_of(i)
.iter()
.filter(|n| atomic_numbers[n.atom_index] == 6)
.count();
if c_neighbors >= 3 && adjacency.neighbors_of(i).len() >= 4 {
found = true;
break;
}
}
}
if found {
on_bits.push(65);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z = atomic_numbers[i];
if z != 6 && z != 1 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 16 {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(66);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z_i = atomic_numbers[i];
if z_i == 1 || z_i == 6 {
continue;
}
let has_h_i = has_hydrogens(i, &adjacency, &atomic_numbers);
if !has_h_i {
continue;
}
for n in adjacency.neighbors_of(i) {
let z_n = atomic_numbers[n.atom_index];
if z_n == 1 || z_n == 6 {
continue;
}
let has_h_n = has_hydrogens(n.atom_index, &adjacency, &atomic_numbers);
if has_h_n {
found = true;
break;
}
}
if found {
break;
}
}
if found {
on_bits.push(67);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z_i = atomic_numbers[i];
if z_i == 1 || z_i == 6 {
continue;
}
for n in adjacency.neighbors_of(i) {
let z_n = atomic_numbers[n.atom_index];
if z_n == 1 || z_n == 6 {
continue;
}
let has_h_n = has_hydrogens(n.atom_index, &adjacency, &atomic_numbers);
if has_h_n {
found = true;
break;
}
}
if found {
break;
}
}
if found {
on_bits.push(68);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
let het_neighbors: Vec<usize> = adjacency
.neighbors_of(i)
.iter()
.filter(|n| {
let z = atomic_numbers[n.atom_index];
z != 6 && z != 1
})
.map(|n| n.atom_index)
.collect();
if het_neighbors.len() >= 2 {
found = true;
break;
}
}
}
if found {
on_bits.push(69);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 8 {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(70);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 8 {
if has_oxidizable_at_distance(i, 2, &adjacency, &atomic_numbers, i) {
found = true;
break;
}
}
}
if found {
on_bits.push(71);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 16 {
for n in adjacency.neighbors_of(i) {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order == BondOrder::Double {
found = true;
break;
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(72);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let methyl_neighbors: usize = adjacency
.neighbors_of(i)
.iter()
.filter(|n| {
atomic_numbers[n.atom_index] == 6
&& adjacency.neighbors_of(n.atom_index).len() == 1
})
.count();
if methyl_neighbors >= 2 {
found = true;
break;
}
}
if found {
on_bits.push(73);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 && atom_in_ring(i) {
for n in adjacency.neighbors_of(i) {
if !atom_in_ring(n.atom_index) {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(74);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 6 {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order == BondOrder::Double || *order == BondOrder::Aromatic {
let i_sub = adjacency.neighbors_of(i).len()
- if adjacency
.neighbors_of(i)
.iter()
.any(|nn| nn.atom_index == n.atom_index)
{
1
} else {
0
};
let n_sub = adjacency.neighbors_of(n.atom_index).len() - 1; if i_sub >= 1 && n_sub >= 1 {
found = true;
break;
}
}
}
}
if found {
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(75);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
if has_atom_at_distance(i, 1, &adjacency, &atomic_numbers, 7, i) {
found = true;
break;
}
}
}
if found {
on_bits.push(76);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 7 {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order == BondOrder::Double || *order == BondOrder::Aromatic {
found = true;
break;
}
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(77);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
if has_atom_at_distance(i, 2, &adjacency, &atomic_numbers, 7, i) {
found = true;
break;
}
}
}
if found {
on_bits.push(78);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
if has_atom_at_distance(i, 3, &adjacency, &atomic_numbers, 7, i) {
found = true;
break;
}
}
}
if found {
on_bits.push(79);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 16 && adjacency.neighbors_of(i).len() >= 3 {
found = true;
break;
}
}
if found {
on_bits.push(80);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
for n in adjacency.neighbors_of(i) {
let z_n = atomic_numbers[n.atom_index];
if z_n != 6 && z_n != 1 {
let has_h_n = has_hydrogens(n.atom_index, &adjacency, &atomic_numbers);
if has_h_n {
found = true;
break;
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(81);
}
}
{
let mut found = false;
if let Some(ri) = ring_info {
for ring_idx in 0..ri.atom_rings().len() {
if ri.atom_rings()[ring_idx].len() == 5 {
for a in ri.atom_rings()[ring_idx].iter() {
let z = atomic_numbers[a.index()];
if z != 6 && z != 1 {
found = true;
break;
}
}
}
if found {
break;
}
}
}
if found {
on_bits.push(82);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 && degree(i) == 1 {
found = true;
break;
}
}
if found {
on_bits.push(83);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
let c_neighbors: usize = adjacency
.neighbors_of(i)
.iter()
.filter(|n| atomic_numbers[n.atom_index] == 6)
.count();
if c_neighbors >= 3 {
found = true;
break;
}
}
}
if found {
on_bits.push(84);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z_i = atomic_numbers[i];
if z_i != 6 && z_i != 1 {
let ch2_neighbors: usize = adjacency
.neighbors_of(i)
.iter()
.filter(|n| atomic_numbers[n.atom_index] == 6 && degree(n.atom_index) <= 2)
.count();
if ch2_neighbors >= 2 {
found = true;
break;
}
}
}
if found {
on_bits.push(85);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z = atomic_numbers[i];
if z == 9 || z == 17 || z == 35 || z == 53 {
if !atom_in_ring(i) {
for n in adjacency.neighbors_of(i) {
if atom_in_ring(n.atom_index) {
found = true;
break;
}
if degree(n.atom_index) > 1 {
found = true;
break;
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(86);
}
}
if has_element(16) {
on_bits.push(87);
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 8 {
if has_oxidizable_at_distance(i, 3, &adjacency, &atomic_numbers, i) {
found = true;
break;
}
}
}
if found {
on_bits.push(88);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z_i = atomic_numbers[i];
if z_i == 1 || z_i == 6 {
continue;
}
let has_h_i = has_hydrogens(i, &adjacency, &atomic_numbers);
if !has_h_i {
continue;
}
if has_ch2_at_distance(i, 3, &adjacency, &atomic_numbers, &atomic_numbers) {
found = true;
break;
}
}
if found {
on_bits.push(89);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z_i = atomic_numbers[i];
if z_i == 1 || z_i == 6 {
continue;
}
let has_h_i = has_hydrogens(i, &adjacency, &atomic_numbers);
if !has_h_i {
continue;
}
if has_ch2_at_distance(i, 4, &adjacency, &atomic_numbers, &atomic_numbers) {
found = true;
break;
}
}
if found {
on_bits.push(90);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
let has_o = adjacency
.neighbors_of(i)
.iter()
.any(|n| atomic_numbers[n.atom_index] == 8);
let has_n = adjacency
.neighbors_of(i)
.iter()
.any(|n| atomic_numbers[n.atom_index] == 7);
let c_neighbors = adjacency
.neighbors_of(i)
.iter()
.filter(|n| atomic_numbers[n.atom_index] == 6)
.count();
if has_o && has_n && c_neighbors >= 1 {
found = true;
break;
}
}
}
if found {
on_bits.push(91);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z_i = atomic_numbers[i];
if z_i != 6 && z_i != 1 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 6 && degree(n.atom_index) == 1 {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(92);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z_i = atomic_numbers[i];
if z_i != 6 && z_i != 1 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 7 {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(93);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
if has_oxidizable_at_distance(i, 2, &adjacency, &atomic_numbers, i) {
found = true;
break;
}
}
}
if found {
on_bits.push(94);
}
}
{
let mut found = false;
if let Some(ri) = ring_info {
for ring_idx in 0..ri.atom_rings().len() {
if ri.atom_rings()[ring_idx].len() == 5 {
found = true;
break;
}
}
}
if found {
on_bits.push(95);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
if has_oxidizable_at_distance(i, 3, &adjacency, &atomic_numbers, i) {
found = true;
break;
}
}
}
if found {
on_bits.push(96);
}
}
{
let mut found = false;
if let Some(ri) = ring_info {
for ring_idx in 0..ri.atom_rings().len() {
if ri.atom_rings()[ring_idx].len() == 6 {
for a in ri.atom_rings()[ring_idx].iter() {
let z = atomic_numbers[a.index()];
if z != 6 && z != 1 {
found = true;
break;
}
}
}
if found {
break;
}
}
}
if found {
on_bits.push(97);
}
}
{
if has_double_bond() {
on_bits.push(98);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 && degree(i) == 2 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 7 {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(99);
}
}
{
let mut found = false;
if let Some(ri) = ring_info {
for ring_idx in 0..ri.atom_rings().len() {
if ri.atom_rings()[ring_idx].len() >= 8 {
found = true;
break;
}
}
}
if found {
on_bits.push(100);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z_i = atomic_numbers[i];
if z_i != 6 && z_i != 1 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 8 {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(101);
}
}
if has_element(17) {
on_bits.push(102);
}
{
let mut found = false;
for i in 0..num_atoms {
let z_i = atomic_numbers[i];
if z_i == 1 || z_i == 6 {
continue;
}
let has_h_i = has_hydrogens(i, &adjacency, &atomic_numbers);
if !has_h_i {
continue;
}
for n in adjacency.neighbors_of(i) {
for nn in adjacency.neighbors_of(n.atom_index) {
if nn.atom_index != i && atomic_numbers[nn.atom_index] == 6 {
found = true;
break;
}
}
if found {
break;
}
}
if found {
break;
}
}
if found {
on_bits.push(103);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atom_in_ring(i) {
let ring_nbrs: usize = adjacency
.neighbors_of(i)
.iter()
.filter(|n| atom_in_ring(n.atom_index))
.count();
if ring_nbrs >= 3 {
found = true;
break;
}
}
}
if found {
on_bits.push(104);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z_i = atomic_numbers[i];
if z_i == 1 || z_i == 6 {
continue;
}
let het_neighbors: usize = adjacency
.neighbors_of(i)
.iter()
.filter(|n| {
let z = atomic_numbers[n.atom_index];
z != 6 && z != 1
})
.count();
if het_neighbors >= 3 {
found = true;
break;
}
}
if found {
on_bits.push(105);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z_i = atomic_numbers[i];
if z_i == 9 || z_i == 17 || z_i == 35 || z_i == 53 {
for n in adjacency.neighbors_of(i) {
if degree(n.atom_index) >= 3 {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(106);
}
}
{
let mut found = false;
if has_element(6) {
let mut chain_found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 && degree(i) == 1 {
let mut prev = i;
let mut current = adjacency.neighbors_of(i)[0].atom_index;
let mut chain_len = 2i32;
loop {
let nbrs: Vec<usize> = adjacency
.neighbors_of(current)
.iter()
.map(|n| n.atom_index)
.filter(|&n| n != prev)
.collect();
if nbrs.is_empty() {
break;
}
if nbrs.len() > 1 {
break;
} prev = current;
current = nbrs[0];
chain_len += 1;
if chain_len >= 5 {
chain_found = true;
break;
}
}
if chain_found {
break;
}
}
}
found = chain_found;
}
if found {
on_bits.push(107);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 8 {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(108);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
let has_n = adjacency
.neighbors_of(i)
.iter()
.any(|n| atomic_numbers[n.atom_index] == 7);
let has_o = adjacency
.neighbors_of(i)
.iter()
.any(|n| atomic_numbers[n.atom_index] == 8);
if has_n && has_o {
found = true;
break;
}
}
}
if found {
on_bits.push(109);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
for n in adjacency.neighbors_of(i) {
for nn in adjacency.neighbors_of(n.atom_index) {
if nn.atom_index != i && atomic_numbers[nn.atom_index] == 6 {
found = true;
break;
}
}
if found {
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(110);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if degree(i) >= 4 {
found = true;
break;
}
}
if found {
on_bits.push(111);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 8 {
for n in adjacency.neighbors_of(i) {
if aromatic_atoms[n.atom_index] {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order != BondOrder::Aromatic {
found = true;
break;
}
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(112);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 && degree(i) == 1 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 6 && degree(n.atom_index) >= 2 {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(113);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 && degree(i) == 1 {
for n in adjacency.neighbors_of(i) {
for nn in adjacency.neighbors_of(n.atom_index) {
if nn.atom_index != i && atomic_numbers[nn.atom_index] == 6 {
found = true;
break;
}
}
if found {
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(114);
}
}
{
let mut found_116 = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 && degree(i) == 1 {
let mut visited = vec![false; num_atoms];
visited[i] = true;
let mut queue = vec![];
for n in adjacency.neighbors_of(i) {
queue.push((n.atom_index, 1usize));
visited[n.atom_index] = true;
}
while let Some((node, dist)) = queue.pop() {
if atomic_numbers[node] == 6 && dist >= 2 {
found_116 = true;
break;
}
if dist < 3 {
for n in adjacency.neighbors_of(node) {
if !visited[n.atom_index] {
visited[n.atom_index] = true;
queue.push((n.atom_index, dist + 1));
}
}
}
}
if found_116 {
break;
}
}
}
if found_116 {
on_bits.push(115);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
if has_oxidizable_at_distance(i, 1, &adjacency, &atomic_numbers, i) {
found = true;
break;
}
}
}
if found {
on_bits.push(116);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 6 {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(117);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
for n in adjacency.neighbors_of(i) {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order == BondOrder::Double || *order == BondOrder::Aromatic {
found = true;
break;
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(118);
}
}
{
let mut het_in_ring_count = 0usize;
for i in 0..num_atoms {
let z = atomic_numbers[i];
if z != 6 && z != 1 && atom_in_ring(i) {
het_in_ring_count += 1;
if het_in_ring_count >= 2 {
break;
}
}
}
if het_in_ring_count >= 2 {
on_bits.push(119);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 && atom_in_ring(i) {
found = true;
break;
}
}
if found {
on_bits.push(120);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 && degree(i) >= 3 {
found = true;
break;
}
}
if found {
on_bits.push(121);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
let o_neighbors: usize = adjacency
.neighbors_of(i)
.iter()
.filter(|n| atomic_numbers[n.atom_index] == 8)
.count();
if o_neighbors >= 2 {
found = true;
break;
}
}
}
if found {
on_bits.push(122);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z_i = atomic_numbers[i];
if z_i == 1 || z_i == 6 {
continue;
}
for n in adjacency.neighbors_of(i) {
let z_n = atomic_numbers[n.atom_index];
if z_n != 1 && z_n != 6 {
found = true;
break;
}
}
if found {
break;
}
}
if found {
on_bits.push(123);
}
}
{
let mut aro_ring_count = 0usize;
if let Some(ri) = ring_info {
for ring_idx in 0..ri.atom_rings().len() {
let atoms_in_ring = &ri.atom_rings()[ring_idx];
let is_aro = atoms_in_ring.iter().all(|a| aromatic_atoms[a.index()]);
if is_aro {
aro_ring_count += 1;
if aro_ring_count >= 2 {
break;
}
}
}
}
if aro_ring_count >= 2 {
on_bits.push(124);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 8 && !atom_in_ring(i) {
found = true;
break;
}
}
if found {
on_bits.push(125);
}
}
{
let mut count = 0usize;
for i in 0..num_atoms {
if atomic_numbers[i] == 8 && atom_in_ring(i) {
for n in adjacency.neighbors_of(i) {
if !atom_in_ring(n.atom_index) {
count += 1;
break;
}
}
}
}
if count > 1 {
on_bits.push(126);
}
if count > 0 {
on_bits.push(142);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 && degree(i) == 2 {
let mut visited = vec![false; num_atoms];
visited[i] = true;
let mut queue = vec![];
for n in adjacency.neighbors_of(i) {
visited[n.atom_index] = true;
queue.push((n.atom_index, 1usize));
}
while let Some((node, dist)) = queue.pop() {
if dist >= 3 && atomic_numbers[node] == 6 {
found = true;
break;
}
if dist < 4 {
for n in adjacency.neighbors_of(node) {
if !visited[n.atom_index] {
visited[n.atom_index] = true;
queue.push((n.atom_index, dist + 1));
}
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(127);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
let mut visited = vec![false; num_atoms];
visited[i] = true;
let mut queue = vec![];
for n in adjacency.neighbors_of(i) {
visited[n.atom_index] = true;
queue.push((n.atom_index, 1usize));
}
while let Some((node, dist)) = queue.pop() {
if dist >= 2 && atomic_numbers[node] == 6 {
found = true;
break;
}
if dist < 3 {
for n in adjacency.neighbors_of(node) {
if !visited[n.atom_index] {
visited[n.atom_index] = true;
queue.push((n.atom_index, dist + 1));
}
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(128);
}
}
{
let mut count = 0usize;
for i in 0..num_atoms {
let z_i = atomic_numbers[i];
if z_i == 1 || z_i == 6 {
continue;
}
for n in adjacency.neighbors_of(i) {
let z_n = atomic_numbers[n.atom_index];
if z_n != 1 && z_n != 6 {
count += 1;
if count > 1 {
break;
}
}
}
if count > 1 {
break;
}
}
if count > 1 {
on_bits.push(129);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z = atomic_numbers[i];
if z != 6 && z != 1 {
if has_hydrogens(i, &adjacency, &atomic_numbers) {
found = true;
break;
}
}
}
if found {
on_bits.push(130);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 8 {
for n in adjacency.neighbors_of(i) {
for nn in adjacency.neighbors_of(n.atom_index) {
if nn.atom_index != i && atomic_numbers[nn.atom_index] == 6 {
found = true;
break;
}
}
if found {
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(131);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 && atom_in_ring(i) {
for n in adjacency.neighbors_of(i) {
if !atom_in_ring(n.atom_index) {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(132);
}
}
{
let mut found = false;
for &z in &atomic_numbers {
if z == 9 || z == 17 || z == 35 || z == 53 {
found = true;
break;
}
}
if found {
on_bits.push(133);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 {
for n in adjacency.neighbors_of(i) {
if aromatic_atoms[n.atom_index] {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order != BondOrder::Aromatic {
found = true;
break;
}
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(134);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 8 {
for n in adjacency.neighbors_of(i) {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order == BondOrder::Double || *order == BondOrder::Aromatic {
found = true;
break;
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(135);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z = atomic_numbers[i];
if z != 6 && atom_in_ring(i) {
found = true;
break;
}
}
if found {
on_bits.push(136);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z_i = atomic_numbers[i];
if z_i != 6 && z_i != 1 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 6 {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(137);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 8 && has_hydrogens(i, &adjacency, &atomic_numbers) {
found = true;
break;
}
}
if found {
on_bits.push(138);
}
}
if has_element(8) {
on_bits.push(139);
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 && degree(i) == 1 {
found = true;
break;
}
}
if found {
on_bits.push(140);
}
}
if has_element(7) {
on_bits.push(141);
}
{
let mut found = false;
if has_aromatic_bond() {
for i in 0..num_atoms {
if aromatic_atoms[i] {
continue;
}
for n in adjacency.neighbors_of(i) {
if aromatic_atoms[n.atom_index] {
for nn in adjacency.neighbors_of(n.atom_index) {
if nn.atom_index != i && !aromatic_atoms[nn.atom_index] {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
break;
}
}
}
if found {
on_bits.push(143);
}
}
{
let mut count = 0usize;
if let Some(ri) = ring_info {
for ring_idx in 0..ri.atom_rings().len() {
if ri.atom_rings()[ring_idx].len() == 6 {
count += 1;
if count > 1 {
break;
}
}
}
}
if count > 1 {
on_bits.push(144);
}
if count > 0 {
on_bits.push(162);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 6 {
found = true;
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(146);
}
}
{
let mut found = false;
for i in 0..num_atoms {
let z = atomic_numbers[i];
if z != 6 && z != 1 && degree(i) >= 3 {
found = true;
break;
}
}
if found {
on_bits.push(147);
}
}
{
let count = atomic_numbers
.iter()
.enumerate()
.filter(|&(i, &z)| z == 6 && degree(i) <= 1)
.count();
if count > 1 {
on_bits.push(148);
}
if count > 0 {
on_bits.push(159);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atom_in_ring(i) {
for n in adjacency.neighbors_of(i) {
if !atom_in_ring(n.atom_index) {
for nn in adjacency.neighbors_of(n.atom_index) {
if nn.atom_index != i && atom_in_ring(nn.atom_index) {
found = true;
break;
}
}
}
if found {
break;
}
}
}
if found {
break;
}
}
if found {
on_bits.push(149);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 && has_hydrogens(i, &adjacency, &atomic_numbers) {
found = true;
break;
}
}
if found {
on_bits.push(150);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 8 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 6 {
let c_neighbors_of_c: usize = adjacency
.neighbors_of(n.atom_index)
.iter()
.filter(|nn| nn.atom_index != i && atomic_numbers[nn.atom_index] == 6)
.count();
if c_neighbors_of_c >= 2 {
found = true;
break;
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(151);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 8 {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order == BondOrder::Double || *order == BondOrder::Aromatic {
found = true;
break;
}
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(153);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 && !atom_in_ring(i) && degree(i) == 2 {
found = true;
break;
}
}
if found {
on_bits.push(154);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 7 && degree(i) >= 3 {
found = true;
break;
}
}
if found {
on_bits.push(155);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 8 {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order == BondOrder::Single {
found = true;
break;
}
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(156);
}
}
{
let mut found = false;
for i in 0..num_atoms {
if atomic_numbers[i] == 6 {
for n in adjacency.neighbors_of(i) {
if atomic_numbers[n.atom_index] == 7 {
if let Some(order) = bond_orders.get(n.bond.index()) {
if *order == BondOrder::Single {
found = true;
break;
}
}
}
}
}
if found {
break;
}
}
if found {
on_bits.push(157);
}
}
if has_element(7) {
on_bits.push(160);
}
{
let mut found = false;
for i in 0..num_atoms {
if atom_in_ring(i) {
found = true;
break;
}
}
if found {
on_bits.push(164);
}
}
{
let mut num_fragments = 0usize;
let mut visited = vec![false; num_atoms];
for i in 0..num_atoms {
if !visited[i] {
num_fragments += 1;
if num_fragments > 1 {
break;
}
let mut queue = vec![i];
visited[i] = true;
while let Some(node) = queue.pop() {
for n in adjacency.neighbors_of(node) {
if !visited[n.atom_index] {
visited[n.atom_index] = true;
queue.push(n.atom_index);
}
}
}
}
}
if num_fragments > 1 {
on_bits.push(165);
}
}
Fingerprint::from_on_bits(n_bits, on_bits)
}
fn has_hydrogens(atom_idx: usize, adjacency: &AdjacencyList, atomic_numbers: &[u8]) -> bool {
let z = atomic_numbers[atom_idx];
let deg = adjacency.neighbors_of(atom_idx).len() as u32;
let typical_valence: u32 = match z {
5 => 3, 6 => 4, 7 => 3, 8 => 2, 9 => 1, 14 => 4, 15 => 3, 16 => 2, 17 => 1, 35 => 1, 53 => 1, _ => 4, };
deg < typical_valence
}
fn has_atom_at_distance(
start: usize,
distance: usize,
adjacency: &AdjacencyList,
atomic_numbers: &[u8],
target_z: u8,
exclude: usize,
) -> bool {
let mut visited = vec![false; atomic_numbers.len()];
visited[start] = true;
let mut queue: Vec<(usize, usize)> = Vec::new();
for n in adjacency.neighbors_of(start) {
if n.atom_index != exclude {
visited[n.atom_index] = true;
queue.push((n.atom_index, 1usize));
}
}
let mut next_queue: Vec<(usize, usize)> = Vec::new();
for d in 1..=distance {
for (node, _) in queue.drain(..) {
if d == distance && atomic_numbers[node] == target_z {
return true;
}
if d < distance {
for n in adjacency.neighbors_of(node) {
if !visited[n.atom_index] {
visited[n.atom_index] = true;
next_queue.push((n.atom_index, d + 1));
}
}
}
}
std::mem::swap(&mut queue, &mut next_queue);
}
false
}
fn has_oxidizable_at_distance(
start: usize,
distance: usize,
adjacency: &AdjacencyList,
atomic_numbers: &[u8],
exclude: usize,
) -> bool {
has_atom_at_distance(start, distance, adjacency, atomic_numbers, 8, exclude)
}
fn has_heteroatom_at_distance(
start: usize,
distance: usize,
adjacency: &AdjacencyList,
atomic_numbers: &[u8],
_charges: &[i8],
need_h: bool,
exclude: usize,
) -> bool {
let mut visited = vec![false; atomic_numbers.len()];
visited[start] = true;
let mut queue: Vec<(usize, usize)> = Vec::new();
for n in adjacency.neighbors_of(start) {
if n.atom_index != exclude {
visited[n.atom_index] = true;
queue.push((n.atom_index, 1usize));
}
}
let mut next_queue: Vec<(usize, usize)> = Vec::new();
for d in 1..=distance {
for (node, _) in queue.drain(..) {
let z = atomic_numbers[node];
if d == distance && z != 1 && z != 6 {
if !need_h || has_hydrogens(node, adjacency, atomic_numbers) {
return true;
}
}
if d < distance {
for n in adjacency.neighbors_of(node) {
if !visited[n.atom_index] {
visited[n.atom_index] = true;
next_queue.push((n.atom_index, d + 1));
}
}
}
}
std::mem::swap(&mut queue, &mut next_queue);
}
false
}
fn has_ch2_at_distance(
start: usize,
distance: usize,
adjacency: &AdjacencyList,
atomic_numbers: &[u8],
_hydropattern: &[u8],
) -> bool {
let mut visited = vec![false; atomic_numbers.len()];
visited[start] = true;
let mut queue: Vec<(usize, usize)> = Vec::new();
for n in adjacency.neighbors_of(start) {
visited[n.atom_index] = true;
queue.push((n.atom_index, 1usize));
}
let mut next_queue: Vec<(usize, usize)> = Vec::new();
for d in 1..=distance {
for (node, _) in queue.drain(..) {
if d == distance && atomic_numbers[node] == 6 {
return true;
}
if d < distance {
for n in adjacency.neighbors_of(node) {
if !visited[n.atom_index] {
visited[n.atom_index] = true;
next_queue.push((n.atom_index, d + 1));
}
}
}
}
std::mem::swap(&mut queue, &mut next_queue);
}
false
}
#[cfg(test)]
mod tests {
use super::*;
use crate::Molecule;
fn default_morgan_params(radius: u32, n_bits: usize) -> MorganFingerprintParams {
MorganFingerprintParams {
radius,
n_bits,
..Default::default()
}
}
fn methane() -> Molecule {
Molecule::from_smiles_with_sanitize("C", false).unwrap()
}
fn ethane() -> Molecule {
Molecule::from_smiles_with_sanitize("CC", false).unwrap()
}
fn benzene() -> Molecule {
Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap()
}
#[test]
fn morgan_fingerprint_empty_molecule_returns_empty_fingerprint() {
let mol = Molecule::from_smiles_with_sanitize("", false).unwrap();
let fp = morgan_fingerprint(&mol, &default_morgan_params(2, 2048)).unwrap();
assert_eq!(fp.on_bits(), Vec::<usize>::new());
}
#[test]
fn morgan_fingerprint_empty_params_n_bits_zero_returns_error() {
let mol = methane();
let params = MorganFingerprintParams {
n_bits: 0,
..Default::default()
};
assert!(matches!(
morgan_fingerprint(&mol, ¶ms),
Err(FingerprintError::EmptyFingerprint)
));
}
#[test]
fn morgan_fingerprint_methane_radius0_produces_deterministic_fingerprint() {
let mol = methane();
let fp_a = morgan_fingerprint(&mol, &default_morgan_params(0, 2048)).unwrap();
let fp_b = morgan_fingerprint(&mol, &default_morgan_params(0, 2048)).unwrap();
assert_eq!(fp_a, fp_b);
assert!(!fp_a.on_bits().is_empty(), "expected at least one on-bit");
}
#[test]
fn morgan_fingerprint_tanimoto_self_is_one() {
let mol = benzene();
let fp = morgan_fingerprint(&mol, &default_morgan_params(2, 2048)).unwrap();
let similarity = fp.tanimoto(&fp).unwrap();
assert!(
(similarity - 1.0).abs() < 1e-9,
"tanimoto of fingerprint with itself should be 1.0, got {similarity}"
);
}
#[test]
fn morgan_fingerprint_n_bits_matches_param() {
let mol = benzene();
for n_bits in [64, 256, 1024, 2048] {
let fp = morgan_fingerprint(&mol, &default_morgan_params(2, n_bits)).unwrap();
assert_eq!(fp.n_bits(), n_bits);
}
}
#[test]
fn morgan_fingerprint_radius_determinism() {
let mol = benzene();
for radius in 0..=3 {
let fp_a = morgan_fingerprint(&mol, &default_morgan_params(radius, 2048)).unwrap();
let fp_b = morgan_fingerprint(&mol, &default_morgan_params(radius, 2048)).unwrap();
assert_eq!(fp_a, fp_b, "radius={radius} should be deterministic");
}
}
#[test]
fn morgan_fingerprint_ethane_and_methane_differ() {
let m = methane();
let e = ethane();
let fp_m = morgan_fingerprint(&m, &default_morgan_params(0, 2048)).unwrap();
let fp_e = morgan_fingerprint(&e, &default_morgan_params(0, 2048)).unwrap();
assert_ne!(
fp_m, fp_e,
"methane and ethane should have different fingerprints"
);
}
#[test]
fn morgan_fingerprint_benzene_and_ethane_differ() {
let b = benzene();
let e = ethane();
let fp_b = morgan_fingerprint(&b, &default_morgan_params(2, 2048)).unwrap();
let fp_e = morgan_fingerprint(&e, &default_morgan_params(2, 2048)).unwrap();
assert_ne!(
fp_b, fp_e,
"benzene and ethane should have different fingerprints"
);
}
#[test]
fn morgan_fingerprint_radius_increases_on_bits() {
let mol = ethane();
let fp_r0 = morgan_fingerprint(&mol, &default_morgan_params(0, 2048)).unwrap();
let fp_r2 = morgan_fingerprint(&mol, &default_morgan_params(2, 2048)).unwrap();
assert!(
fp_r2.on_bits().len() >= fp_r0.on_bits().len(),
"larger radius should produce at least as many on-bits"
);
}
#[test]
fn morgan_fingerprint_with_output_produces_additional_data() {
let mol = ethane();
let params = MorganFingerprintParams {
radius: 1,
n_bits: 2048,
collect_additional_output: true,
..Default::default()
};
let output = morgan_fingerprint_with_output(&mol, ¶ms).unwrap();
assert!(output.additional_output.is_some());
let extra = output.additional_output.unwrap();
assert_eq!(extra.atom_counts.len(), mol.num_atoms());
assert!(!extra.bit_info_map.is_empty());
}
#[test]
fn morgan_fingerprint_from_atoms_filters_by_allowed_indices() {
let mol = ethane();
let params = MorganFingerprintParams {
radius: 0,
n_bits: 2048,
from_atoms: Some(vec![0]),
..Default::default()
};
let fp = morgan_fingerprint(&mol, ¶ms).unwrap();
assert!(!fp.on_bits().is_empty());
let params_empty = MorganFingerprintParams {
radius: 0,
n_bits: 2048,
from_atoms: Some(vec![]),
..Default::default()
};
let fp_empty = morgan_fingerprint(&mol, ¶ms_empty).unwrap();
assert!(
fp_empty.on_bits().is_empty(),
"no from_atoms → empty fingerprint"
);
}
#[test]
fn morgan_fingerprint_ignore_atoms_excludes_indices() {
let mol = ethane();
let params_full = default_morgan_params(0, 2048);
let params_exclude = MorganFingerprintParams {
radius: 0,
n_bits: 2048,
ignore_atoms: Some(vec![1]),
..Default::default()
};
let fp_full = morgan_fingerprint(&mol, ¶ms_full).unwrap();
let fp_excluded = morgan_fingerprint(&mol, ¶ms_exclude).unwrap();
assert_ne!(fp_full.on_bits().len(), 0);
assert!(
fp_excluded.on_bits().len() <= fp_full.on_bits().len(),
"excluding an atom should not increase on-bits"
);
}
#[test]
fn morgan_fingerprint_feature_generator_produces_deterministic_fingerprint() {
let mol = benzene();
let params = MorganFingerprintParams {
radius: 2,
n_bits: 2048,
atom_invariants_generator: MorganAtomInvariantsGenerator::Feature,
..Default::default()
};
let fp_a = morgan_fingerprint(&mol, ¶ms).unwrap();
let fp_b = morgan_fingerprint(&mol, ¶ms).unwrap();
assert_eq!(fp_a, fp_b, "feature invariants should be deterministic");
assert!(
!fp_a.on_bits().is_empty(),
"expected on-bits from feature invariants"
);
}
#[test]
fn morgan_fingerprint_custom_invariants_override_default() {
let mol = ethane();
let custom = vec![42u32, 99u32];
let params = MorganFingerprintParams {
radius: 0,
n_bits: 2048,
custom_atom_invariants: Some(custom),
..Default::default()
};
let fp_a = morgan_fingerprint(&mol, ¶ms).unwrap();
let fp_b = morgan_fingerprint(&mol, ¶ms).unwrap();
assert_eq!(fp_a, fp_b);
}
#[test]
fn morgan_fingerprint_zero_bonds_molecule_does_not_panic() {
let mol = Molecule::from_smiles_with_sanitize("[H][H]", false).unwrap();
let fp = morgan_fingerprint(&mol, &default_morgan_params(2, 2048));
assert!(fp.is_ok());
}
#[test]
fn morgan_fingerprint_count_simulation_runs() {
let mol = benzene();
let params = MorganFingerprintParams {
radius: 2,
n_bits: 2048,
count_simulation: true,
count_bounds: vec![1, 2, 4, 8],
..Default::default()
};
let fp = morgan_fingerprint(&mol, ¶ms).unwrap();
assert!(!fp.on_bits().is_empty());
let std_fp = morgan_fingerprint(&mol, &default_morgan_params(2, 2048)).unwrap();
assert!(
fp.on_bits().len() >= std_fp.on_bits().len(),
"count-simulation should set at least as many bits as standard mode"
);
}
#[test]
fn morgan_fingerprint_uses_topology_adjacency_without_derived_cache() {
let mol = benzene();
let fp = morgan_fingerprint(&mol, &default_morgan_params(2, 2048)).unwrap();
assert!(!fp.on_bits().is_empty());
}
#[test]
fn morgan_fingerprint_chirality_produces_different_fingerprints() {
let r_mol = Molecule::from_smiles_with_sanitize("C[C@@H](O)CC", false).unwrap();
let s_mol = Molecule::from_smiles_with_sanitize("C[C@H](O)CC", false).unwrap();
let params = MorganFingerprintParams {
radius: 2,
n_bits: 2048,
use_chirality: true,
..Default::default()
};
let fp_r = morgan_fingerprint(&r_mol, ¶ms).unwrap();
let fp_s = morgan_fingerprint(&s_mol, ¶ms).unwrap();
let tc = fp_r.tanimoto(&fp_s).unwrap();
assert!(
tc < 1.0,
"R and S enantiomers should have tc < 1.0 with chirality, got {tc}"
);
}
#[test]
fn morgan_fingerprint_chirality_disabled_produces_identical_fingerprints() {
let r_mol = Molecule::from_smiles_with_sanitize("C[C@@H](O)CC", false).unwrap();
let s_mol = Molecule::from_smiles_with_sanitize("C[C@H](O)CC", false).unwrap();
let params = MorganFingerprintParams {
radius: 2,
n_bits: 2048,
use_chirality: false,
..Default::default()
};
let fp_r = morgan_fingerprint(&r_mol, ¶ms).unwrap();
let fp_s = morgan_fingerprint(&s_mol, ¶ms).unwrap();
assert_eq!(
fp_r, fp_s,
"R and S should have same fingerprint without chirality"
);
}
#[test]
fn morgan_fingerprint_custom_bond_invariants_override_default() {
let mol = ethane();
let params = MorganFingerprintParams {
radius: 0,
n_bits: 2048,
custom_bond_invariants: Some(vec![5u32]),
..Default::default()
};
let fp = morgan_fingerprint(&mol, ¶ms).unwrap();
assert!(!fp.on_bits().is_empty());
}
}