Skip to main content

sci_form/
reactivity.rs

1//! Frontier-orbital reactivity descriptors derived from EHT molecular orbitals.
2
3use serde::{Deserialize, Serialize};
4
5/// One empirical pKa estimate for a specific atom-centered acid/base site.
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct EmpiricalPkaSite {
8    /// Atom index in the input graph.
9    pub atom_index: usize,
10    /// Site classification: `acidic` or `basic`.
11    pub site_type: String,
12    /// Textual environment label used by the heuristic rule.
13    pub environment: String,
14    /// Estimated pKa value from simple charge/environment heuristics.
15    pub estimated_pka: f64,
16    /// Rule confidence in [0,1].
17    pub confidence: f64,
18}
19
20/// Empirical pKa summary for acidic and basic candidate sites.
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct EmpiricalPkaResult {
23    /// Ranked acidic sites (lowest pKa first).
24    pub acidic_sites: Vec<EmpiricalPkaSite>,
25    /// Ranked basic sites (highest pKa first).
26    pub basic_sites: Vec<EmpiricalPkaSite>,
27    /// Human-readable caveats and guidance.
28    pub notes: Vec<String>,
29}
30
31/// UFF energy enriched with aromaticity-aware correction metadata.
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct UffHeuristicEnergy {
34    /// Raw UFF energy in kcal/mol.
35    pub raw_energy_kcal_mol: f64,
36    /// Aromatic stabilization correction in kcal/mol (negative lowers energy).
37    pub aromatic_stabilization_kcal_mol: f64,
38    /// Corrected heuristic energy in kcal/mol.
39    pub corrected_energy_kcal_mol: f64,
40    /// Number of aromatic bonds found in the parsed molecular graph.
41    pub aromatic_bond_count: usize,
42    /// Interpretation notes for downstream consumers.
43    pub notes: Vec<String>,
44}
45
46/// Atom-resolved frontier-orbital descriptor summary.
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct FrontierDescriptors {
49    /// Number of atoms in the system.
50    pub num_atoms: usize,
51    /// HOMO atom contributions, normalized to sum to ~1.
52    pub homo_atom_contributions: Vec<f64>,
53    /// LUMO atom contributions, normalized to sum to ~1.
54    pub lumo_atom_contributions: Vec<f64>,
55    /// Simple dual-descriptor proxy: LUMO contribution minus HOMO contribution.
56    pub dual_descriptor: Vec<f64>,
57    /// HOMO energy in eV.
58    pub homo_energy: f64,
59    /// LUMO energy in eV.
60    pub lumo_energy: f64,
61    /// HOMO-LUMO gap in eV.
62    pub gap: f64,
63}
64
65/// One condensed Fukui descriptor row for an atom.
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct CondensedFukuiAtom {
68    /// Atom index in the input order.
69    pub atom_index: usize,
70    /// Nucleophilic-attack susceptibility proxy (f+).
71    pub f_plus: f64,
72    /// Electrophilic-attack susceptibility proxy (f-).
73    pub f_minus: f64,
74    /// Radical-attack susceptibility proxy (f0).
75    pub f_radical: f64,
76    /// Dual descriptor (f+ - f-).
77    pub dual_descriptor: f64,
78}
79
80/// Fukui workflow output, including atom-wise condensed descriptors.
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct FukuiDescriptors {
83    /// Number of atoms in the system.
84    pub num_atoms: usize,
85    /// Nucleophilic-attack susceptibility proxy per atom.
86    pub f_plus: Vec<f64>,
87    /// Electrophilic-attack susceptibility proxy per atom.
88    pub f_minus: Vec<f64>,
89    /// Radical-attack susceptibility proxy per atom.
90    pub f_radical: Vec<f64>,
91    /// Dual descriptor (f+ - f-) per atom.
92    pub dual_descriptor: Vec<f64>,
93    /// Condensed atom-wise summary.
94    pub condensed: Vec<CondensedFukuiAtom>,
95    /// HOMO energy in eV.
96    pub homo_energy: f64,
97    /// LUMO energy in eV.
98    pub lumo_energy: f64,
99    /// HOMO-LUMO gap in eV.
100    pub gap: f64,
101    /// Domain and confidence caveats for interpretation.
102    pub validity_notes: Vec<String>,
103}
104
105/// One ranked site entry for empirical local-reactivity helpers.
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct ReactivitySiteScore {
108    /// Atom index in the input order.
109    pub atom_index: usize,
110    /// Composite empirical score.
111    pub score: f64,
112}
113
114/// Empirical local-reactivity ranking summary.
115#[derive(Debug, Clone, Serialize, Deserialize)]
116pub struct ReactivityRanking {
117    /// Ranked candidate sites for nucleophilic attack on the molecule.
118    pub nucleophilic_attack_sites: Vec<ReactivitySiteScore>,
119    /// Ranked candidate sites for electrophilic attack on the molecule.
120    pub electrophilic_attack_sites: Vec<ReactivitySiteScore>,
121    /// Ranked candidate sites for radical attack on the molecule.
122    pub radical_attack_sites: Vec<ReactivitySiteScore>,
123    /// Notes about how scores are constructed.
124    pub notes: Vec<String>,
125}
126
127/// A single broad-band transition peak in an exploratory UV-Vis-like spectrum.
128#[derive(Debug, Clone, Serialize, Deserialize)]
129pub struct UvVisPeak {
130    /// Transition energy in eV.
131    pub energy_ev: f64,
132    /// Wavelength in nm.
133    pub wavelength_nm: f64,
134    /// Relative intensity (arbitrary units).
135    pub intensity: f64,
136    /// Occupied MO index.
137    pub from_mo: usize,
138    /// Virtual MO index.
139    pub to_mo: usize,
140}
141
142/// Exploratory UV-Vis-like spectrum built from low-cost EHT transitions.
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct UvVisSpectrum {
145    /// Energy axis in eV.
146    pub energies_ev: Vec<f64>,
147    /// Intensity axis in arbitrary units.
148    pub intensities: Vec<f64>,
149    /// Dominant transitions used for interpretation.
150    pub peaks: Vec<UvVisPeak>,
151    /// Broadening sigma in eV.
152    pub sigma: f64,
153    /// Method caveats for use in UI/API consumers.
154    pub notes: Vec<String>,
155}
156
157fn orbital_atom_contributions(
158    basis: &[crate::eht::basis::AtomicOrbital],
159    overlap: &nalgebra::DMatrix<f64>,
160    coefficients: &[Vec<f64>],
161    mo_index: usize,
162    n_atoms: usize,
163) -> Vec<f64> {
164    let ao_to_atom = crate::population::population::ao_to_atom_map(basis);
165    let mut contributions = vec![0.0; n_atoms];
166
167    for mu in 0..basis.len() {
168        let mut gross = 0.0;
169        for nu in 0..basis.len() {
170            gross += coefficients[mu][mo_index] * overlap[(mu, nu)] * coefficients[nu][mo_index];
171        }
172        contributions[ao_to_atom[mu]] += gross;
173    }
174
175    let total: f64 = contributions.iter().sum();
176    if total.abs() > 1e-12 {
177        for value in &mut contributions {
178            *value /= total;
179        }
180    }
181
182    contributions
183}
184
185/// Compute atom-resolved HOMO/LUMO descriptors from an EHT result.
186pub fn compute_frontier_descriptors(
187    elements: &[u8],
188    positions: &[[f64; 3]],
189    eht_result: &crate::eht::EhtResult,
190) -> FrontierDescriptors {
191    let basis = crate::eht::basis::build_basis(elements, positions);
192    let overlap = crate::eht::build_overlap_matrix(&basis);
193
194    let homo_atom_contributions = orbital_atom_contributions(
195        &basis,
196        &overlap,
197        &eht_result.coefficients,
198        eht_result.homo_index,
199        elements.len(),
200    );
201    let lumo_atom_contributions = orbital_atom_contributions(
202        &basis,
203        &overlap,
204        &eht_result.coefficients,
205        eht_result.lumo_index,
206        elements.len(),
207    );
208    let dual_descriptor = lumo_atom_contributions
209        .iter()
210        .zip(homo_atom_contributions.iter())
211        .map(|(lumo, homo)| lumo - homo)
212        .collect();
213
214    FrontierDescriptors {
215        num_atoms: elements.len(),
216        homo_atom_contributions,
217        lumo_atom_contributions,
218        dual_descriptor,
219        homo_energy: eht_result.homo_energy,
220        lumo_energy: eht_result.lumo_energy,
221        gap: eht_result.gap,
222    }
223}
224
225fn validity_notes(elements: &[u8]) -> Vec<String> {
226    let support = crate::eht::analyze_eht_support(elements);
227    let mut notes = vec![
228        "Fukui descriptors are computed from frontier-orbital atom contributions (EHT proxy), not from full finite-difference electron-addition/removal calculations.".to_string(),
229        "Interpret values comparatively within related structures; absolute values are semi-quantitative.".to_string(),
230    ];
231
232    match support.level {
233        crate::eht::SupportLevel::Supported => {
234            notes.push(
235                "Element set is in supported EHT domain; qualitative organic trend interpretation is usually reliable."
236                    .to_string(),
237            );
238        }
239        crate::eht::SupportLevel::Experimental => {
240            notes.push(
241                "Element set is in experimental EHT domain (typically transition metals); use rankings as exploratory guidance only."
242                    .to_string(),
243            );
244        }
245        crate::eht::SupportLevel::Unsupported => {
246            notes.push(
247                "Element set is outside supported EHT parameterization; descriptor reliability is low or undefined."
248                    .to_string(),
249            );
250        }
251    }
252
253    notes.extend(support.warnings);
254    notes
255}
256
257/// Compute Fukui-style local reactivity descriptors from EHT frontier orbitals.
258pub fn compute_fukui_descriptors(
259    elements: &[u8],
260    positions: &[[f64; 3]],
261    eht_result: &crate::eht::EhtResult,
262) -> FukuiDescriptors {
263    let frontier = compute_frontier_descriptors(elements, positions, eht_result);
264    let f_plus = frontier.lumo_atom_contributions.clone();
265    let f_minus = frontier.homo_atom_contributions.clone();
266    let f_radical: Vec<f64> = f_plus
267        .iter()
268        .zip(f_minus.iter())
269        .map(|(fp, fm)| 0.5 * (fp + fm))
270        .collect();
271    let dual_descriptor: Vec<f64> = f_plus
272        .iter()
273        .zip(f_minus.iter())
274        .map(|(fp, fm)| fp - fm)
275        .collect();
276
277    let condensed: Vec<CondensedFukuiAtom> = (0..elements.len())
278        .map(|idx| CondensedFukuiAtom {
279            atom_index: idx,
280            f_plus: f_plus[idx],
281            f_minus: f_minus[idx],
282            f_radical: f_radical[idx],
283            dual_descriptor: dual_descriptor[idx],
284        })
285        .collect();
286
287    FukuiDescriptors {
288        num_atoms: elements.len(),
289        f_plus,
290        f_minus,
291        f_radical,
292        dual_descriptor,
293        condensed,
294        homo_energy: frontier.homo_energy,
295        lumo_energy: frontier.lumo_energy,
296        gap: frontier.gap,
297        validity_notes: validity_notes(elements),
298    }
299}
300
301fn sorted_scores(mut scores: Vec<ReactivitySiteScore>) -> Vec<ReactivitySiteScore> {
302    scores.sort_by(|a, b| {
303        b.score
304            .partial_cmp(&a.score)
305            .unwrap_or(std::cmp::Ordering::Equal)
306    });
307    scores
308}
309
310/// Build empirical local-reactivity rankings from Fukui descriptors and Mulliken charges.
311pub fn rank_reactivity_sites(
312    fukui: &FukuiDescriptors,
313    mulliken_charges: &[f64],
314) -> ReactivityRanking {
315    let n = fukui.num_atoms.min(mulliken_charges.len());
316
317    let mut nucleophilic_attack_sites = Vec::with_capacity(n);
318    let mut electrophilic_attack_sites = Vec::with_capacity(n);
319    let mut radical_attack_sites = Vec::with_capacity(n);
320
321    for i in 0..n {
322        let q = mulliken_charges[i];
323        let fp = fukui.f_plus[i];
324        let fm = fukui.f_minus[i];
325        let f0 = fukui.f_radical[i];
326
327        // Positive charge reinforces nucleophilic attack susceptibility.
328        let nuc_score = fp + 0.25 * q.max(0.0);
329        // Negative charge reinforces electrophilic attack susceptibility.
330        let elec_score = fm + 0.25 * (-q).max(0.0);
331        let rad_score = f0 + 0.1 * q.abs();
332
333        nucleophilic_attack_sites.push(ReactivitySiteScore {
334            atom_index: i,
335            score: nuc_score,
336        });
337        electrophilic_attack_sites.push(ReactivitySiteScore {
338            atom_index: i,
339            score: elec_score,
340        });
341        radical_attack_sites.push(ReactivitySiteScore {
342            atom_index: i,
343            score: rad_score,
344        });
345    }
346
347    ReactivityRanking {
348        nucleophilic_attack_sites: sorted_scores(nucleophilic_attack_sites),
349        electrophilic_attack_sites: sorted_scores(electrophilic_attack_sites),
350        radical_attack_sites: sorted_scores(radical_attack_sites),
351        notes: vec![
352            "Scores are empirical composites of condensed Fukui terms and Mulliken charge bias.".to_string(),
353            "Use ranking order for exploratory prioritization; values are not calibrated kinetic barriers.".to_string(),
354        ],
355    }
356}
357
358fn gaussian(x: f64, mu: f64, sigma: f64) -> f64 {
359    let s = sigma.max(1e-6);
360    let norm = 1.0 / (s * (2.0 * std::f64::consts::PI).sqrt());
361    let dx = x - mu;
362    norm * (-0.5 * dx * dx / (s * s)).exp()
363}
364
365/// Lorentzian line shape: L(x) = (1/π) · γ / [(x - x₀)² + γ²]
366fn lorentzian(x: f64, x0: f64, gamma: f64) -> f64 {
367    let g = gamma.max(1e-6);
368    let dx = x - x0;
369    g / (std::f64::consts::PI * (dx * dx + g * g))
370}
371
372/// Broadening line shape type for UV-Vis spectra.
373#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
374pub enum BroadeningType {
375    /// Gaussian broadening (default, original behavior)
376    Gaussian,
377    /// Lorentzian broadening (natural NMR line shape)
378    Lorentzian,
379}
380
381/// An excitation from sTDA-xTB with proper oscillator strength.
382#[derive(Debug, Clone, Serialize, Deserialize)]
383pub struct StdaExcitation {
384    /// Excitation energy in eV.
385    pub energy_ev: f64,
386    /// Wavelength in nm.
387    pub wavelength_nm: f64,
388    /// Oscillator strength (dimensionless).
389    pub oscillator_strength: f64,
390    /// Occupied MO index.
391    pub from_mo: usize,
392    /// Virtual MO index.
393    pub to_mo: usize,
394    /// Transition dipole moment magnitude (Debye).
395    pub transition_dipole: f64,
396}
397
398/// sTDA-xTB UV-Vis spectrum with proper excitations and oscillator strengths.
399#[derive(Debug, Clone, Serialize, Deserialize)]
400pub struct StdaUvVisSpectrum {
401    /// Energy axis in eV.
402    pub energies_ev: Vec<f64>,
403    /// Wavelength axis in nm.
404    pub wavelengths_nm: Vec<f64>,
405    /// Molar absorptivity (ε) axis (L/(mol·cm)).
406    pub absorptivity: Vec<f64>,
407    /// Excitations used for the spectrum.
408    pub excitations: Vec<StdaExcitation>,
409    /// Broadening width in eV.
410    pub sigma: f64,
411    /// Broadening type used.
412    pub broadening: BroadeningType,
413    /// Notes.
414    pub notes: Vec<String>,
415}
416
417const EV_TO_NM: f64 = 1239.841984;
418
419/// Build an sTDA-xTB UV-Vis spectrum using xTB molecular orbitals.
420///
421/// The simplified Tamm-Dancoff approximation (sTDA) selects single excitations
422/// from occupied→virtual MO pairs, computes transition dipole moments from
423/// the MO coefficients, and derives oscillator strengths.
424///
425/// `elements`: atomic numbers
426/// `positions`: Cartesian coordinates in Å
427/// `sigma`: broadening width in eV (typically 0.2–0.5)
428/// `e_min`, `e_max`: energy window in eV
429/// `n_points`: grid resolution
430/// `broadening`: Gaussian or Lorentzian line shape
431pub fn compute_stda_uvvis_spectrum(
432    elements: &[u8],
433    positions: &[[f64; 3]],
434    sigma: f64,
435    e_min: f64,
436    e_max: f64,
437    n_points: usize,
438    broadening: BroadeningType,
439) -> Result<StdaUvVisSpectrum, String> {
440    // Keep energies and coefficients on the same electronic-structure model.
441    // Mixing xTB orbital energies with EHT MO coefficients produces inconsistent transition energies.
442    let eht = crate::eht::solve_eht(elements, positions, None)?;
443    let basis = crate::eht::basis::build_basis(elements, positions);
444    let orbital_energies = eht.energies.clone();
445    let coefficients = eht.coefficients.clone();
446    let n_electrons = eht.n_electrons;
447    let n_basis = orbital_energies.len();
448
449    let n_occ = n_electrons / 2;
450    let n_virt = n_basis.saturating_sub(n_occ);
451    if n_occ == 0 || n_virt == 0 {
452        return Err("No occupied or virtual orbitals for sTDA".to_string());
453    }
454
455    let n_points = n_points.max(2);
456    let span = (e_max - e_min).max(1e-6);
457    let step = span / (n_points as f64 - 1.0);
458    let energies_ev: Vec<f64> = (0..n_points).map(|i| e_min + step * i as f64).collect();
459    let wavelengths_nm: Vec<f64> = energies_ev
460        .iter()
461        .map(|&e| if e > 0.01 { EV_TO_NM / e } else { 0.0 })
462        .collect();
463    let mut absorptivity = vec![0.0; n_points];
464
465    // sTDA: select single excitations
466    let mut excitations = Vec::new();
467    let n_ao = coefficients.len();
468
469    // Energy window for selecting excitations
470    let e_window = e_max + 2.0 * sigma; // include transitions that could bleed into window
471
472    for occ in 0..n_occ.min(n_basis) {
473        for virt in n_occ..n_basis {
474            let delta_e = orbital_energies[virt] - orbital_energies[occ];
475            if delta_e <= 0.01 || delta_e > e_window {
476                continue;
477            }
478
479            // Transition dipole moment: μ_if = Σ_μ c_μi * c_μf * <μ|r|μ>
480            // One-center approximation: weight each AO pair by its AO center.
481            let mut tdm = [0.0f64; 3];
482            for mu in 0..n_ao {
483                let c_occ = coefficients[mu][occ];
484                let c_virt = coefficients[mu][virt];
485                let product = c_occ * c_virt;
486                if product.abs() < 1e-12 {
487                    continue;
488                }
489                if let Some(ao) = basis.get(mu) {
490                    tdm[0] += product * ao.center[0];
491                    tdm[1] += product * ao.center[1];
492                    tdm[2] += product * ao.center[2];
493                }
494            }
495
496            let tdm_mag = (tdm[0] * tdm[0] + tdm[1] * tdm[1] + tdm[2] * tdm[2]).sqrt();
497
498            // Oscillator strength: f = (2/3) * ΔE * |μ|²
499            // In atomic units. Convert ΔE from eV to Hartree for the formula.
500            let delta_e_ha = delta_e / 27.2114;
501            let f_osc = (2.0 / 3.0) * delta_e_ha * tdm_mag * tdm_mag;
502
503            if f_osc < 1e-8 {
504                continue;
505            }
506
507            excitations.push(StdaExcitation {
508                energy_ev: delta_e,
509                wavelength_nm: EV_TO_NM / delta_e,
510                oscillator_strength: f_osc,
511                from_mo: occ,
512                to_mo: virt,
513                transition_dipole: tdm_mag * 4.80321, // convert to Debye
514            });
515
516            // Add to spectrum with selected broadening
517            // ε(E) = (NA * π * e² / (2 * me * c * ln(10))) * f * g(E)
518            // Simplified: ε ∝ f * g(E-ΔE)
519            let scale = f_osc * 28700.0; // approximate ε scaling (L/(mol·cm))
520            for (idx, &e) in energies_ev.iter().enumerate() {
521                absorptivity[idx] += scale
522                    * match broadening {
523                        BroadeningType::Gaussian => gaussian(e, delta_e, sigma),
524                        BroadeningType::Lorentzian => lorentzian(e, delta_e, sigma),
525                    };
526            }
527        }
528    }
529
530    // Sort excitations by oscillator strength (descending)
531    excitations.sort_by(|a, b| {
532        b.oscillator_strength
533            .partial_cmp(&a.oscillator_strength)
534            .unwrap_or(std::cmp::Ordering::Equal)
535    });
536    excitations.truncate(50); // Keep top 50
537
538    let broadening_name = match broadening {
539        BroadeningType::Gaussian => "Gaussian",
540        BroadeningType::Lorentzian => "Lorentzian",
541    };
542
543    Ok(StdaUvVisSpectrum {
544        energies_ev,
545        wavelengths_nm,
546        absorptivity,
547        excitations,
548        sigma,
549        broadening,
550        notes: vec![
551            format!("sTDA UV-Vis spectrum using internally consistent EHT MO transitions with {} broadening (σ = {} eV).", broadening_name, sigma),
552            "Oscillator strengths derive from AO-center transition dipoles in a one-center approximation; deep-UV bands are more reliable than visible-edge intensities.".to_string(),
553            "Molar absorptivity (ε) values are semi-quantitative; use for trend analysis and peak identification.".to_string(),
554        ],
555    })
556}
557
558/// Build an exploratory UV-Vis-like spectrum from low-cost EHT occupied→virtual transitions.
559pub fn compute_uv_vis_like_spectrum(
560    eht_result: &crate::eht::EhtResult,
561    sigma: f64,
562    e_min: f64,
563    e_max: f64,
564    n_points: usize,
565) -> UvVisSpectrum {
566    let n_points = n_points.max(2);
567    let span = (e_max - e_min).max(1e-6);
568    let step = span / (n_points as f64 - 1.0);
569    let energies_ev: Vec<f64> = (0..n_points).map(|i| e_min + step * i as f64).collect();
570    let mut intensities = vec![0.0; n_points];
571
572    let mut peaks = Vec::new();
573    for occ in 0..=eht_result.homo_index {
574        for virt in eht_result.lumo_index..eht_result.energies.len() {
575            let delta_e = eht_result.energies[virt] - eht_result.energies[occ];
576            if delta_e <= 1e-6 {
577                continue;
578            }
579
580            let mut overlap_strength = 0.0;
581            for ao in 0..eht_result.coefficients.len() {
582                overlap_strength +=
583                    (eht_result.coefficients[ao][occ] * eht_result.coefficients[ao][virt]).abs();
584            }
585
586            let intensity = overlap_strength * overlap_strength;
587            if intensity <= 1e-12 {
588                continue;
589            }
590
591            if peaks.len() < 24 {
592                peaks.push(UvVisPeak {
593                    energy_ev: delta_e,
594                    wavelength_nm: 1239.841984 / delta_e,
595                    intensity,
596                    from_mo: occ,
597                    to_mo: virt,
598                });
599            }
600
601            for (idx, e) in energies_ev.iter().enumerate() {
602                intensities[idx] += intensity * gaussian(*e, delta_e, sigma);
603            }
604        }
605    }
606
607    peaks.sort_by(|a, b| {
608        b.intensity
609            .partial_cmp(&a.intensity)
610            .unwrap_or(std::cmp::Ordering::Equal)
611    });
612
613    UvVisSpectrum {
614        energies_ev,
615        intensities,
616        peaks,
617        sigma,
618        notes: vec![
619            "Exploratory UV-Vis-like spectrum from EHT MO energy differences and coefficient-overlap intensity proxy.".to_string(),
620            "This is a qualitative visualization aid, not a calibrated excited-state method (no CI/TDDFT).".to_string(),
621        ],
622    }
623}
624
625fn has_bond_order(
626    mol: &crate::graph::Molecule,
627    a: usize,
628    b: usize,
629    order: crate::graph::BondOrder,
630) -> bool {
631    let ia = petgraph::graph::NodeIndex::new(a);
632    let ib = petgraph::graph::NodeIndex::new(b);
633    if let Some(edge_idx) = mol.graph.find_edge(ia, ib) {
634        return mol.graph[edge_idx].order == order;
635    }
636    false
637}
638
639fn is_carboxylic_acid_oxygen(mol: &crate::graph::Molecule, atom_idx: usize) -> bool {
640    let idx = petgraph::graph::NodeIndex::new(atom_idx);
641    let atom = &mol.graph[idx];
642    if atom.element != 8 {
643        return false;
644    }
645
646    let neighbors: Vec<_> = mol.graph.neighbors(idx).collect();
647    let has_h = neighbors.iter().any(|n| mol.graph[*n].element == 1);
648    if !has_h {
649        return false;
650    }
651
652    let carbon_neighbor = neighbors.iter().find(|n| mol.graph[**n].element == 6);
653    if let Some(c_idx) = carbon_neighbor {
654        let c_neighbors: Vec<_> = mol.graph.neighbors(*c_idx).collect();
655        let carbonyl_o_count = c_neighbors
656            .iter()
657            .filter(|n| mol.graph[**n].element == 8)
658            .filter(|n| {
659                has_bond_order(
660                    mol,
661                    c_idx.index(),
662                    n.index(),
663                    crate::graph::BondOrder::Double,
664                )
665            })
666            .count();
667        return carbonyl_o_count >= 1;
668    }
669
670    false
671}
672
673fn is_phenol_oxygen(mol: &crate::graph::Molecule, atom_idx: usize) -> bool {
674    let idx = petgraph::graph::NodeIndex::new(atom_idx);
675    let atom = &mol.graph[idx];
676    if atom.element != 8 {
677        return false;
678    }
679    let neighbors: Vec<_> = mol.graph.neighbors(idx).collect();
680    let has_h = neighbors.iter().any(|n| mol.graph[*n].element == 1);
681    if !has_h {
682        return false;
683    }
684    neighbors.iter().any(|n| {
685        if mol.graph[*n].element != 6 {
686            return false;
687        }
688        mol.graph
689            .edges(*n)
690            .any(|e| matches!(e.weight().order, crate::graph::BondOrder::Aromatic))
691    })
692}
693
694fn is_thiol_sulfur(mol: &crate::graph::Molecule, atom_idx: usize) -> bool {
695    let idx = petgraph::graph::NodeIndex::new(atom_idx);
696    let atom = &mol.graph[idx];
697    if atom.element != 16 {
698        return false;
699    }
700    mol.graph.neighbors(idx).any(|n| mol.graph[n].element == 1)
701}
702
703fn is_neutral_amine_nitrogen(mol: &crate::graph::Molecule, atom_idx: usize) -> bool {
704    let idx = petgraph::graph::NodeIndex::new(atom_idx);
705    let atom = &mol.graph[idx];
706    if atom.element != 7 || atom.formal_charge != 0 {
707        return false;
708    }
709    if !matches!(atom.hybridization, crate::graph::Hybridization::SP3) {
710        return false;
711    }
712    !mol.graph
713        .edges(idx)
714        .any(|e| matches!(e.weight().order, crate::graph::BondOrder::Aromatic))
715}
716
717fn is_aromatic_nitrogen(mol: &crate::graph::Molecule, atom_idx: usize) -> bool {
718    let idx = petgraph::graph::NodeIndex::new(atom_idx);
719    let atom = &mol.graph[idx];
720    if atom.element != 7 {
721        return false;
722    }
723    mol.graph
724        .edges(idx)
725        .any(|e| matches!(e.weight().order, crate::graph::BondOrder::Aromatic))
726}
727
728/// Estimate acidic/basic pKa sites from graph environment and Gasteiger charges.
729pub fn estimate_empirical_pka(mol: &crate::graph::Molecule, charges: &[f64]) -> EmpiricalPkaResult {
730    let n = mol.graph.node_count().min(charges.len());
731    let mut acidic_sites = Vec::new();
732    let mut basic_sites = Vec::new();
733
734    for atom_idx in 0..n {
735        let idx = petgraph::graph::NodeIndex::new(atom_idx);
736        let atom = &mol.graph[idx];
737        let q = charges[atom_idx];
738
739        if is_carboxylic_acid_oxygen(mol, atom_idx) {
740            acidic_sites.push(EmpiricalPkaSite {
741                atom_index: atom_idx,
742                site_type: "acidic".to_string(),
743                environment: "carboxylic_acid_oxygen".to_string(),
744                estimated_pka: (4.5 - 2.0 * q).clamp(-1.0, 14.0),
745                confidence: 0.82,
746            });
747        } else if is_phenol_oxygen(mol, atom_idx) {
748            acidic_sites.push(EmpiricalPkaSite {
749                atom_index: atom_idx,
750                site_type: "acidic".to_string(),
751                environment: "phenol_oxygen".to_string(),
752                estimated_pka: (10.0 - 1.5 * q).clamp(2.0, 16.0),
753                confidence: 0.68,
754            });
755        } else if is_thiol_sulfur(mol, atom_idx) {
756            acidic_sites.push(EmpiricalPkaSite {
757                atom_index: atom_idx,
758                site_type: "acidic".to_string(),
759                environment: "thiol_sulfur".to_string(),
760                estimated_pka: (10.5 - 1.2 * q).clamp(2.0, 16.0),
761                confidence: 0.64,
762            });
763        } else if atom.element == 7 && atom.formal_charge > 0 {
764            acidic_sites.push(EmpiricalPkaSite {
765                atom_index: atom_idx,
766                site_type: "acidic".to_string(),
767                environment: "ammonium_like".to_string(),
768                estimated_pka: (9.7 - 1.0 * q).clamp(4.0, 14.0),
769                confidence: 0.62,
770            });
771        }
772
773        if is_neutral_amine_nitrogen(mol, atom_idx) {
774            let h_count = mol
775                .graph
776                .neighbors(idx)
777                .filter(|n| mol.graph[*n].element == 1)
778                .count();
779            let base_pka = if h_count >= 2 {
780                10.8
781            } else if h_count == 1 {
782                10.4
783            } else {
784                9.8
785            };
786            basic_sites.push(EmpiricalPkaSite {
787                atom_index: atom_idx,
788                site_type: "basic".to_string(),
789                environment: "aliphatic_amine".to_string(),
790                estimated_pka: (base_pka - 2.5 * q).clamp(2.0, 14.5),
791                confidence: 0.75,
792            });
793        } else if is_aromatic_nitrogen(mol, atom_idx) && atom.formal_charge <= 0 {
794            basic_sites.push(EmpiricalPkaSite {
795                atom_index: atom_idx,
796                site_type: "basic".to_string(),
797                environment: "aromatic_nitrogen".to_string(),
798                estimated_pka: (5.2 - 1.8 * q).clamp(-1.0, 10.0),
799                confidence: 0.6,
800            });
801        }
802    }
803
804    acidic_sites.sort_by(|a, b| {
805        a.estimated_pka
806            .partial_cmp(&b.estimated_pka)
807            .unwrap_or(std::cmp::Ordering::Equal)
808    });
809    basic_sites.sort_by(|a, b| {
810        b.estimated_pka
811            .partial_cmp(&a.estimated_pka)
812            .unwrap_or(std::cmp::Ordering::Equal)
813    });
814
815    EmpiricalPkaResult {
816        acidic_sites,
817        basic_sites,
818        notes: vec![
819            "Empirical pKa estimates use simple graph environments plus Gasteiger-charge adjustments; values are coarse screening hints, not publication-grade predictions.".to_string(),
820            "Best use is relative ranking within related congeneric series under similar conditions.".to_string(),
821        ],
822    }
823}
824
825/// Apply a lightweight aromatic stabilization correction on top of raw UFF energy.
826pub fn apply_aromatic_uff_correction(
827    mol: &crate::graph::Molecule,
828    raw_energy_kcal_mol: f64,
829) -> UffHeuristicEnergy {
830    let aromatic_bond_count = mol
831        .graph
832        .edge_references()
833        .filter(|e| matches!(e.weight().order, crate::graph::BondOrder::Aromatic))
834        .count();
835    let aromatic_stabilization_kcal_mol = -0.08 * aromatic_bond_count as f64;
836
837    UffHeuristicEnergy {
838        raw_energy_kcal_mol,
839        aromatic_stabilization_kcal_mol,
840        corrected_energy_kcal_mol: raw_energy_kcal_mol + aromatic_stabilization_kcal_mol,
841        aromatic_bond_count,
842        notes: vec![
843            "Aromatic correction is an empirical post-UFF heuristic tied to aromatic bond count and should be used for ranking guidance only.".to_string(),
844            "Raw UFF and corrected values are both reported so downstream workflows can choose their own policy.".to_string(),
845        ],
846    }
847}
848
849#[cfg(test)]
850mod tests {
851    use super::*;
852
853    #[test]
854    fn test_h2_frontier_contributions_are_symmetric() {
855        let elements = [1u8, 1];
856        let positions = [[0.0, 0.0, 0.0], [0.74, 0.0, 0.0]];
857        let eht = crate::eht::solve_eht(&elements, &positions, None).unwrap();
858        let descriptors = compute_frontier_descriptors(&elements, &positions, &eht);
859
860        assert!(
861            (descriptors.homo_atom_contributions[0] - descriptors.homo_atom_contributions[1]).abs()
862                < 1e-6
863        );
864        assert!(
865            (descriptors.lumo_atom_contributions[0] - descriptors.lumo_atom_contributions[1]).abs()
866                < 1e-6
867        );
868    }
869
870    #[test]
871    fn test_frontier_contributions_are_normalized() {
872        let elements = [8u8, 1, 1];
873        let positions = [[0.0, 0.0, 0.0], [0.757, 0.586, 0.0], [-0.757, 0.586, 0.0]];
874        let eht = crate::eht::solve_eht(&elements, &positions, None).unwrap();
875        let descriptors = compute_frontier_descriptors(&elements, &positions, &eht);
876
877        let homo_sum: f64 = descriptors.homo_atom_contributions.iter().sum();
878        let lumo_sum: f64 = descriptors.lumo_atom_contributions.iter().sum();
879        assert!((homo_sum - 1.0).abs() < 1e-6);
880        assert!((lumo_sum - 1.0).abs() < 1e-6);
881    }
882
883    #[test]
884    fn test_fukui_descriptors_are_consistent_and_normalized() {
885        let elements = [8u8, 1, 1];
886        let positions = [[0.0, 0.0, 0.0], [0.757, 0.586, 0.0], [-0.757, 0.586, 0.0]];
887        let eht = crate::eht::solve_eht(&elements, &positions, None).unwrap();
888        let fukui = compute_fukui_descriptors(&elements, &positions, &eht);
889
890        let f_plus_sum: f64 = fukui.f_plus.iter().sum();
891        let f_minus_sum: f64 = fukui.f_minus.iter().sum();
892        let f0_sum: f64 = fukui.f_radical.iter().sum();
893        assert!((f_plus_sum - 1.0).abs() < 1e-6);
894        assert!((f_minus_sum - 1.0).abs() < 1e-6);
895        assert!((f0_sum - 1.0).abs() < 1e-6);
896        assert_eq!(fukui.condensed.len(), elements.len());
897    }
898
899    #[test]
900    fn test_uv_vis_like_spectrum_has_requested_grid() {
901        let elements = [6u8, 6, 1, 1, 1, 1];
902        let positions = [
903            [0.0, 0.0, 0.0],
904            [1.34, 0.0, 0.0],
905            [-0.6, 0.92, 0.0],
906            [-0.6, -0.92, 0.0],
907            [1.94, 0.92, 0.0],
908            [1.94, -0.92, 0.0],
909        ];
910        let eht = crate::eht::solve_eht(&elements, &positions, None).unwrap();
911        let spec = compute_uv_vis_like_spectrum(&eht, 0.2, 0.5, 8.0, 300);
912        assert_eq!(spec.energies_ev.len(), 300);
913        assert_eq!(spec.intensities.len(), 300);
914    }
915
916    #[test]
917    fn test_empirical_pka_detects_carboxylic_acid_site() {
918        let mol = crate::graph::Molecule::from_smiles("CC(=O)O").unwrap();
919        let charges = crate::compute_charges("CC(=O)O").unwrap().charges;
920        let result = estimate_empirical_pka(&mol, &charges);
921
922        assert!(!result.acidic_sites.is_empty());
923        assert!(result
924            .acidic_sites
925            .iter()
926            .any(|site| site.environment == "carboxylic_acid_oxygen"));
927    }
928
929    #[test]
930    fn test_aromatic_uff_correction_is_negative_for_benzene() {
931        let mol = crate::graph::Molecule::from_smiles("c1ccccc1").unwrap();
932        let result = apply_aromatic_uff_correction(&mol, 10.0);
933        assert!(result.aromatic_bond_count >= 6);
934        assert!(result.aromatic_stabilization_kcal_mol < 0.0);
935        assert!(result.corrected_energy_kcal_mol < result.raw_energy_kcal_mol);
936    }
937}