use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReactiveSite {
pub atom_index: usize,
pub confidence: f64,
pub source: SiteSource,
pub role: SiteRole,
pub position: [f64; 3],
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum SiteSource {
SmirksMapping,
FukuiFrontier,
ChargeHeuristic,
ProximityFallback,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum SiteRole {
Nucleophile,
Electrophile,
Radical,
BondForming,
BondBreaking,
Unclassified,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReactiveSiteAnalysis {
pub fragment_sites: Vec<Vec<ReactiveSite>>,
pub approach_direction: Option<[f64; 3]>,
pub reactive_distance: Option<f64>,
pub bond_changes: Vec<BondChangeInfo>,
pub notes: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BondChangeInfo {
pub frag_a: usize,
pub atom_a: usize,
pub frag_b: usize,
pub atom_b: usize,
pub forming: bool,
}
pub fn identify_reactive_sites(
fragment_elements: &[&[u8]],
fragment_coords: &[&[f64]],
fragment_smiles: &[&str],
smirks: Option<&str>,
) -> Result<ReactiveSiteAnalysis, String> {
let n_frags = fragment_elements.len();
if n_frags == 0 {
return Err("No fragments provided".into());
}
let mut fragment_sites: Vec<Vec<ReactiveSite>> = vec![Vec::new(); n_frags];
let mut bond_changes = Vec::new();
let mut notes = Vec::new();
if let Some(smirks_str) = smirks {
if let Ok(transform) = crate::smirks::parse_smirks(smirks_str) {
notes.push(format!(
"SMIRKS parsed: {} atom maps, {} bond changes",
transform.atom_map.len(),
transform.bond_changes.len()
));
let smirks_sites = identify_sites_from_smirks(
&transform,
fragment_elements,
fragment_coords,
fragment_smiles,
);
if let Ok((sites, changes)) = smirks_sites {
for (frag_idx, frag_sites) in sites.into_iter().enumerate() {
if frag_idx < n_frags {
fragment_sites[frag_idx].extend(frag_sites);
}
}
bond_changes = changes;
notes.push("Reactive sites identified from SMIRKS atom mapping.".into());
} else {
notes.push("SMIRKS matching failed, falling back to Fukui analysis.".into());
}
}
}
for (frag_idx, (elems, coords)) in fragment_elements
.iter()
.zip(fragment_coords.iter())
.enumerate()
{
let n_atoms = elems.len();
let pos: Vec<[f64; 3]> = coords.chunks(3).map(|c| [c[0], c[1], c[2]]).collect();
let has_smirks_sites = !fragment_sites[frag_idx].is_empty();
if let Ok(ranking) = crate::compute_reactivity_ranking(elems, &pos) {
if let Some(nuc_site) = ranking.nucleophilic_attack_sites.first() {
let a = nuc_site.atom_index;
if a < n_atoms {
let site = ReactiveSite {
atom_index: a,
confidence: if has_smirks_sites { 0.5 } else { 0.8 },
source: SiteSource::FukuiFrontier,
role: SiteRole::Electrophile, position: pos[a],
};
if !has_smirks_sites {
fragment_sites[frag_idx].push(site);
}
}
}
if let Some(elec_site) = ranking.electrophilic_attack_sites.first() {
let a = elec_site.atom_index;
if a < n_atoms {
let site = ReactiveSite {
atom_index: a,
confidence: if has_smirks_sites { 0.5 } else { 0.8 },
source: SiteSource::FukuiFrontier,
role: SiteRole::Nucleophile, position: pos[a],
};
if !has_smirks_sites {
fragment_sites[frag_idx].push(site);
}
}
}
notes.push(format!(
"Fragment {}: Fukui reactivity ranking computed.",
frag_idx
));
}
}
for (frag_idx, (elems, coords)) in fragment_elements
.iter()
.zip(fragment_coords.iter())
.enumerate()
{
if !fragment_sites[frag_idx].is_empty() {
continue; }
let n_atoms = elems.len();
if let Ok(charge_result) = crate::compute_charges(fragment_smiles[frag_idx]) {
let charges = &charge_result.charges;
if let Some((idx, _)) = charges
.iter()
.enumerate()
.min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
{
if idx < n_atoms {
let pos: [f64; 3] = [coords[idx * 3], coords[idx * 3 + 1], coords[idx * 3 + 2]];
fragment_sites[frag_idx].push(ReactiveSite {
atom_index: idx,
confidence: 0.4,
source: SiteSource::ChargeHeuristic,
role: SiteRole::Nucleophile,
position: pos,
});
}
}
if let Some((idx, _)) = charges
.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
{
if idx < n_atoms {
let pos: [f64; 3] = [coords[idx * 3], coords[idx * 3 + 1], coords[idx * 3 + 2]];
fragment_sites[frag_idx].push(ReactiveSite {
atom_index: idx,
confidence: 0.4,
source: SiteSource::ChargeHeuristic,
role: SiteRole::Electrophile,
position: pos,
});
}
}
}
if fragment_sites[frag_idx].is_empty() && n_atoms > 0 {
let (best_idx, _) = elems.iter().enumerate().max_by_key(|(_, &z)| z).unwrap();
let pos: [f64; 3] = [
coords[best_idx * 3],
coords[best_idx * 3 + 1],
coords[best_idx * 3 + 2],
];
fragment_sites[frag_idx].push(ReactiveSite {
atom_index: best_idx,
confidence: 0.2,
source: SiteSource::ProximityFallback,
role: SiteRole::Unclassified,
position: pos,
});
notes.push(format!(
"Fragment {}: fallback to heaviest atom (Z={}).",
frag_idx, elems[best_idx]
));
}
}
let (approach_direction, reactive_distance) = if n_frags >= 2 {
compute_approach_from_sites(&fragment_sites)
} else {
(None, None)
};
Ok(ReactiveSiteAnalysis {
fragment_sites,
approach_direction,
reactive_distance,
bond_changes,
notes,
})
}
fn identify_sites_from_smirks(
transform: &crate::smirks::SmirksTransform,
fragment_elements: &[&[u8]],
fragment_coords: &[&[f64]],
fragment_smiles: &[&str],
) -> Result<(Vec<Vec<ReactiveSite>>, Vec<BondChangeInfo>), String> {
let n_frags = fragment_elements.len();
let mut fragment_sites: Vec<Vec<ReactiveSite>> = vec![Vec::new(); n_frags];
let mut bond_changes = Vec::new();
let smirks_result = if n_frags == 1 {
crate::smirks::apply_smirks(&transform.smirks, fragment_smiles[0])
} else {
crate::smirks::apply_smirks_multi(&transform.smirks, fragment_smiles)
};
let result = smirks_result?;
if !result.success {
return Err("SMIRKS pattern did not match".into());
}
let mut frag_offsets = Vec::with_capacity(n_frags);
let mut offset = 0usize;
for elems in fragment_elements {
frag_offsets.push(offset);
offset += elems.len();
}
for (&map_num, &mol_atom_idx) in &result.atom_mapping {
let (frag_idx, local_idx) = find_fragment_for_atom(mol_atom_idx, fragment_elements);
if frag_idx < n_frags && local_idx < fragment_elements[frag_idx].len() {
let coords = fragment_coords[frag_idx];
let pos = [
coords[local_idx * 3],
coords[local_idx * 3 + 1],
coords[local_idx * 3 + 2],
];
let role = determine_role_from_smirks(map_num, &transform.bond_changes);
fragment_sites[frag_idx].push(ReactiveSite {
atom_index: local_idx,
confidence: 0.95,
source: SiteSource::SmirksMapping,
role,
position: pos,
});
}
}
for bc in &transform.bond_changes {
if let (Some(&atom_a_mol), Some(&atom_b_mol)) = (
result.atom_mapping.get(&bc.atom1_map),
result.atom_mapping.get(&bc.atom2_map),
) {
let (frag_a, local_a) = find_fragment_for_atom(atom_a_mol, fragment_elements);
let (frag_b, local_b) = find_fragment_for_atom(atom_b_mol, fragment_elements);
let forming = bc.old_order.is_none() && bc.new_order.is_some();
bond_changes.push(BondChangeInfo {
frag_a,
atom_a: local_a,
frag_b,
atom_b: local_b,
forming,
});
}
}
Ok((fragment_sites, bond_changes))
}
fn find_fragment_for_atom(atom_idx: usize, fragment_elements: &[&[u8]]) -> (usize, usize) {
let mut offset = 0;
for (frag_idx, elems) in fragment_elements.iter().enumerate() {
if atom_idx < offset + elems.len() {
return (frag_idx, atom_idx - offset);
}
offset += elems.len();
}
let last = fragment_elements.len().saturating_sub(1);
(last, atom_idx.saturating_sub(offset))
}
fn determine_role_from_smirks(
map_num: usize,
bond_changes: &[crate::smirks::BondChange],
) -> SiteRole {
for bc in bond_changes {
if bc.atom1_map == map_num || bc.atom2_map == map_num {
if bc.old_order.is_none() && bc.new_order.is_some() {
return SiteRole::BondForming;
}
if bc.old_order.is_some() && bc.new_order.is_none() {
return SiteRole::BondBreaking;
}
}
}
SiteRole::Unclassified
}
fn compute_approach_from_sites(
fragment_sites: &[Vec<ReactiveSite>],
) -> (Option<[f64; 3]>, Option<f64>) {
if fragment_sites.len() < 2 {
return (None, None);
}
let sites_a = &fragment_sites[0];
let sites_b = &fragment_sites[1];
if sites_a.is_empty() || sites_b.is_empty() {
return (None, None);
}
let mut best_score = -1.0f64;
let mut best_a = 0;
let mut best_b = 0;
for (ia, sa) in sites_a.iter().enumerate() {
for (ib, sb) in sites_b.iter().enumerate() {
let mut score = sa.confidence + sb.confidence;
score += role_complementarity(&sa.role, &sb.role);
if score > best_score {
best_score = score;
best_a = ia;
best_b = ib;
}
}
}
let pa = sites_a[best_a].position;
let pb = sites_b[best_b].position;
let dx = pb[0] - pa[0];
let dy = pb[1] - pa[1];
let dz = pb[2] - pa[2];
let dist = (dx * dx + dy * dy + dz * dz).sqrt();
if dist < 1e-6 {
return (None, None);
}
let direction = [dx / dist, dy / dist, dz / dist];
(Some(direction), Some(dist))
}
fn role_complementarity(a: &SiteRole, b: &SiteRole) -> f64 {
match (a, b) {
(SiteRole::Nucleophile, SiteRole::Electrophile)
| (SiteRole::Electrophile, SiteRole::Nucleophile) => 1.0,
(SiteRole::BondForming, SiteRole::BondForming) => 0.8,
(SiteRole::BondBreaking, SiteRole::BondBreaking) => 0.6,
(SiteRole::Radical, SiteRole::Radical) => 0.5,
_ => 0.0,
}
}
pub fn reactive_atom_pairs(analysis: &ReactiveSiteAnalysis) -> Vec<(usize, usize)> {
let mut pairs = Vec::new();
for bc in &analysis.bond_changes {
if bc.forming && bc.frag_a == 0 && bc.frag_b == 1 {
pairs.push((bc.atom_a, bc.atom_b));
} else if bc.forming && bc.frag_a == 1 && bc.frag_b == 0 {
pairs.push((bc.atom_b, bc.atom_a));
}
}
if pairs.is_empty() && analysis.fragment_sites.len() >= 2 {
let sites_a = &analysis.fragment_sites[0];
let sites_b = &analysis.fragment_sites[1];
if let (Some(sa), Some(sb)) = (
sites_a.iter().max_by(|a, b| {
a.confidence
.partial_cmp(&b.confidence)
.unwrap_or(std::cmp::Ordering::Equal)
}),
sites_b.iter().max_by(|a, b| {
a.confidence
.partial_cmp(&b.confidence)
.unwrap_or(std::cmp::Ordering::Equal)
}),
) {
pairs.push((sa.atom_index, sb.atom_index));
}
}
pairs
}