Skip to main content

sci_form/
reactivity.rs

1//! Frontier-orbital reactivity descriptors derived from EHT molecular orbitals.
2
3use serde::{Deserialize, Serialize};
4
5/// One empirical pKa estimate for a specific atom-centered acid/base site.
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct EmpiricalPkaSite {
8    /// Atom index in the input graph.
9    pub atom_index: usize,
10    /// Site classification: `acidic` or `basic`.
11    pub site_type: String,
12    /// Textual environment label used by the heuristic rule.
13    pub environment: String,
14    /// Estimated pKa value from simple charge/environment heuristics.
15    pub estimated_pka: f64,
16    /// Rule confidence in [0,1].
17    pub confidence: f64,
18}
19
20/// Empirical pKa summary for acidic and basic candidate sites.
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct EmpiricalPkaResult {
23    /// Ranked acidic sites (lowest pKa first).
24    pub acidic_sites: Vec<EmpiricalPkaSite>,
25    /// Ranked basic sites (highest pKa first).
26    pub basic_sites: Vec<EmpiricalPkaSite>,
27    /// Human-readable caveats and guidance.
28    pub notes: Vec<String>,
29}
30
31/// UFF energy enriched with aromaticity-aware correction metadata.
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct UffHeuristicEnergy {
34    /// Raw UFF energy in kcal/mol.
35    pub raw_energy_kcal_mol: f64,
36    /// Aromatic stabilization correction in kcal/mol (negative lowers energy).
37    pub aromatic_stabilization_kcal_mol: f64,
38    /// Corrected heuristic energy in kcal/mol.
39    pub corrected_energy_kcal_mol: f64,
40    /// Number of aromatic bonds found in the parsed molecular graph.
41    pub aromatic_bond_count: usize,
42    /// Interpretation notes for downstream consumers.
43    pub notes: Vec<String>,
44}
45
46/// Atom-resolved frontier-orbital descriptor summary.
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct FrontierDescriptors {
49    /// Number of atoms in the system.
50    pub num_atoms: usize,
51    /// HOMO atom contributions, normalized to sum to ~1.
52    pub homo_atom_contributions: Vec<f64>,
53    /// LUMO atom contributions, normalized to sum to ~1.
54    pub lumo_atom_contributions: Vec<f64>,
55    /// Simple dual-descriptor proxy: LUMO contribution minus HOMO contribution.
56    pub dual_descriptor: Vec<f64>,
57    /// HOMO energy in eV.
58    pub homo_energy: f64,
59    /// LUMO energy in eV.
60    pub lumo_energy: f64,
61    /// HOMO-LUMO gap in eV.
62    pub gap: f64,
63}
64
65/// One condensed Fukui descriptor row for an atom.
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct CondensedFukuiAtom {
68    /// Atom index in the input order.
69    pub atom_index: usize,
70    /// Nucleophilic-attack susceptibility proxy (f+).
71    pub f_plus: f64,
72    /// Electrophilic-attack susceptibility proxy (f-).
73    pub f_minus: f64,
74    /// Radical-attack susceptibility proxy (f0).
75    pub f_radical: f64,
76    /// Dual descriptor (f+ - f-).
77    pub dual_descriptor: f64,
78}
79
80/// Fukui workflow output, including atom-wise condensed descriptors.
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct FukuiDescriptors {
83    /// Number of atoms in the system.
84    pub num_atoms: usize,
85    /// Nucleophilic-attack susceptibility proxy per atom.
86    pub f_plus: Vec<f64>,
87    /// Electrophilic-attack susceptibility proxy per atom.
88    pub f_minus: Vec<f64>,
89    /// Radical-attack susceptibility proxy per atom.
90    pub f_radical: Vec<f64>,
91    /// Dual descriptor (f+ - f-) per atom.
92    pub dual_descriptor: Vec<f64>,
93    /// Condensed atom-wise summary.
94    pub condensed: Vec<CondensedFukuiAtom>,
95    /// HOMO energy in eV.
96    pub homo_energy: f64,
97    /// LUMO energy in eV.
98    pub lumo_energy: f64,
99    /// HOMO-LUMO gap in eV.
100    pub gap: f64,
101    /// Domain and confidence caveats for interpretation.
102    pub validity_notes: Vec<String>,
103}
104
105/// One ranked site entry for empirical local-reactivity helpers.
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct ReactivitySiteScore {
108    /// Atom index in the input order.
109    pub atom_index: usize,
110    /// Composite empirical score.
111    pub score: f64,
112}
113
114/// Empirical local-reactivity ranking summary.
115#[derive(Debug, Clone, Serialize, Deserialize)]
116pub struct ReactivityRanking {
117    /// Ranked candidate sites for nucleophilic attack on the molecule.
118    pub nucleophilic_attack_sites: Vec<ReactivitySiteScore>,
119    /// Ranked candidate sites for electrophilic attack on the molecule.
120    pub electrophilic_attack_sites: Vec<ReactivitySiteScore>,
121    /// Ranked candidate sites for radical attack on the molecule.
122    pub radical_attack_sites: Vec<ReactivitySiteScore>,
123    /// Notes about how scores are constructed.
124    pub notes: Vec<String>,
125}
126
127/// A single broad-band transition peak in an exploratory UV-Vis-like spectrum.
128#[derive(Debug, Clone, Serialize, Deserialize)]
129pub struct UvVisPeak {
130    /// Transition energy in eV.
131    pub energy_ev: f64,
132    /// Wavelength in nm.
133    pub wavelength_nm: f64,
134    /// Relative intensity (arbitrary units).
135    pub intensity: f64,
136    /// Occupied MO index.
137    pub from_mo: usize,
138    /// Virtual MO index.
139    pub to_mo: usize,
140}
141
142/// Exploratory UV-Vis-like spectrum built from low-cost EHT transitions.
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct UvVisSpectrum {
145    /// Energy axis in eV.
146    pub energies_ev: Vec<f64>,
147    /// Intensity axis in arbitrary units.
148    pub intensities: Vec<f64>,
149    /// Dominant transitions used for interpretation.
150    pub peaks: Vec<UvVisPeak>,
151    /// Broadening sigma in eV.
152    pub sigma: f64,
153    /// Method caveats for use in UI/API consumers.
154    pub notes: Vec<String>,
155}
156
157fn orbital_atom_contributions(
158    basis: &[crate::eht::basis::AtomicOrbital],
159    overlap: &nalgebra::DMatrix<f64>,
160    coefficients: &[Vec<f64>],
161    mo_index: usize,
162    n_atoms: usize,
163) -> Vec<f64> {
164    let ao_to_atom = crate::population::population::ao_to_atom_map(basis);
165    let mut contributions = vec![0.0; n_atoms];
166
167    for mu in 0..basis.len() {
168        let mut gross = 0.0;
169        for nu in 0..basis.len() {
170            gross += coefficients[mu][mo_index] * overlap[(mu, nu)] * coefficients[nu][mo_index];
171        }
172        contributions[ao_to_atom[mu]] += gross;
173    }
174
175    let total: f64 = contributions.iter().sum();
176    if total.abs() > 1e-12 {
177        for value in &mut contributions {
178            *value /= total;
179        }
180    }
181
182    contributions
183}
184
185/// Compute atom-resolved HOMO/LUMO descriptors from an EHT result.
186pub fn compute_frontier_descriptors(
187    elements: &[u8],
188    positions: &[[f64; 3]],
189    eht_result: &crate::eht::EhtResult,
190) -> FrontierDescriptors {
191    let basis = crate::eht::basis::build_basis(elements, positions);
192    let overlap = crate::eht::build_overlap_matrix(&basis);
193
194    let homo_atom_contributions = orbital_atom_contributions(
195        &basis,
196        &overlap,
197        &eht_result.coefficients,
198        eht_result.homo_index,
199        elements.len(),
200    );
201    let lumo_atom_contributions = orbital_atom_contributions(
202        &basis,
203        &overlap,
204        &eht_result.coefficients,
205        eht_result.lumo_index,
206        elements.len(),
207    );
208    let dual_descriptor = lumo_atom_contributions
209        .iter()
210        .zip(homo_atom_contributions.iter())
211        .map(|(lumo, homo)| lumo - homo)
212        .collect();
213
214    FrontierDescriptors {
215        num_atoms: elements.len(),
216        homo_atom_contributions,
217        lumo_atom_contributions,
218        dual_descriptor,
219        homo_energy: eht_result.homo_energy,
220        lumo_energy: eht_result.lumo_energy,
221        gap: eht_result.gap,
222    }
223}
224
225fn validity_notes(elements: &[u8]) -> Vec<String> {
226    let support = crate::eht::analyze_eht_support(elements);
227    let mut notes = vec![
228        "Fukui descriptors are computed from frontier-orbital atom contributions (EHT proxy), not from full finite-difference electron-addition/removal calculations.".to_string(),
229        "Interpret values comparatively within related structures; absolute values are semi-quantitative.".to_string(),
230    ];
231
232    match support.level {
233        crate::eht::SupportLevel::Supported => {
234            notes.push(
235                "Element set is in supported EHT domain; qualitative organic trend interpretation is usually reliable."
236                    .to_string(),
237            );
238        }
239        crate::eht::SupportLevel::Experimental => {
240            notes.push(
241                "Element set is in experimental EHT domain (typically transition metals); use rankings as exploratory guidance only."
242                    .to_string(),
243            );
244        }
245        crate::eht::SupportLevel::Unsupported => {
246            notes.push(
247                "Element set is outside supported EHT parameterization; descriptor reliability is low or undefined."
248                    .to_string(),
249            );
250        }
251    }
252
253    notes.extend(support.warnings);
254    notes
255}
256
257/// Compute Fukui-style local reactivity descriptors from EHT frontier orbitals.
258pub fn compute_fukui_descriptors(
259    elements: &[u8],
260    positions: &[[f64; 3]],
261    eht_result: &crate::eht::EhtResult,
262) -> FukuiDescriptors {
263    let frontier = compute_frontier_descriptors(elements, positions, eht_result);
264    let f_plus = frontier.lumo_atom_contributions.clone();
265    let f_minus = frontier.homo_atom_contributions.clone();
266    let f_radical: Vec<f64> = f_plus
267        .iter()
268        .zip(f_minus.iter())
269        .map(|(fp, fm)| 0.5 * (fp + fm))
270        .collect();
271    let dual_descriptor: Vec<f64> = f_plus
272        .iter()
273        .zip(f_minus.iter())
274        .map(|(fp, fm)| fp - fm)
275        .collect();
276
277    let condensed: Vec<CondensedFukuiAtom> = (0..elements.len())
278        .map(|idx| CondensedFukuiAtom {
279            atom_index: idx,
280            f_plus: f_plus[idx],
281            f_minus: f_minus[idx],
282            f_radical: f_radical[idx],
283            dual_descriptor: dual_descriptor[idx],
284        })
285        .collect();
286
287    FukuiDescriptors {
288        num_atoms: elements.len(),
289        f_plus,
290        f_minus,
291        f_radical,
292        dual_descriptor,
293        condensed,
294        homo_energy: frontier.homo_energy,
295        lumo_energy: frontier.lumo_energy,
296        gap: frontier.gap,
297        validity_notes: validity_notes(elements),
298    }
299}
300
301fn sorted_scores(mut scores: Vec<ReactivitySiteScore>) -> Vec<ReactivitySiteScore> {
302    scores.sort_by(|a, b| {
303        b.score
304            .partial_cmp(&a.score)
305            .unwrap_or(std::cmp::Ordering::Equal)
306    });
307    scores
308}
309
310/// Build empirical local-reactivity rankings from Fukui descriptors and Mulliken charges.
311pub fn rank_reactivity_sites(
312    fukui: &FukuiDescriptors,
313    mulliken_charges: &[f64],
314) -> ReactivityRanking {
315    let n = fukui.num_atoms.min(mulliken_charges.len());
316
317    let mut nucleophilic_attack_sites = Vec::with_capacity(n);
318    let mut electrophilic_attack_sites = Vec::with_capacity(n);
319    let mut radical_attack_sites = Vec::with_capacity(n);
320
321    for i in 0..n {
322        let q = mulliken_charges[i];
323        let fp = fukui.f_plus[i];
324        let fm = fukui.f_minus[i];
325        let f0 = fukui.f_radical[i];
326
327        // Positive charge reinforces nucleophilic attack susceptibility.
328        let nuc_score = fp + 0.25 * q.max(0.0);
329        // Negative charge reinforces electrophilic attack susceptibility.
330        let elec_score = fm + 0.25 * (-q).max(0.0);
331        let rad_score = f0 + 0.1 * q.abs();
332
333        nucleophilic_attack_sites.push(ReactivitySiteScore {
334            atom_index: i,
335            score: nuc_score,
336        });
337        electrophilic_attack_sites.push(ReactivitySiteScore {
338            atom_index: i,
339            score: elec_score,
340        });
341        radical_attack_sites.push(ReactivitySiteScore {
342            atom_index: i,
343            score: rad_score,
344        });
345    }
346
347    ReactivityRanking {
348        nucleophilic_attack_sites: sorted_scores(nucleophilic_attack_sites),
349        electrophilic_attack_sites: sorted_scores(electrophilic_attack_sites),
350        radical_attack_sites: sorted_scores(radical_attack_sites),
351        notes: vec![
352            "Scores are empirical composites of condensed Fukui terms and Mulliken charge bias.".to_string(),
353            "Use ranking order for exploratory prioritization; values are not calibrated kinetic barriers.".to_string(),
354        ],
355    }
356}
357
358fn gaussian(x: f64, mu: f64, sigma: f64) -> f64 {
359    let s = sigma.max(1e-6);
360    let norm = 1.0 / (s * (2.0 * std::f64::consts::PI).sqrt());
361    let dx = x - mu;
362    norm * (-0.5 * dx * dx / (s * s)).exp()
363}
364
365/// Lorentzian line shape: L(x) = (1/π) · γ / [(x - x₀)² + γ²]
366fn lorentzian(x: f64, x0: f64, gamma: f64) -> f64 {
367    let g = gamma.max(1e-6);
368    let dx = x - x0;
369    g / (std::f64::consts::PI * (dx * dx + g * g))
370}
371
372/// Broadening line shape type for UV-Vis spectra.
373#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
374pub enum BroadeningType {
375    /// Gaussian broadening (default, original behavior)
376    Gaussian,
377    /// Lorentzian broadening (natural NMR line shape)
378    Lorentzian,
379}
380
381/// An excitation from sTDA-xTB with proper oscillator strength.
382#[derive(Debug, Clone, Serialize, Deserialize)]
383pub struct StdaExcitation {
384    /// Excitation energy in eV.
385    pub energy_ev: f64,
386    /// Wavelength in nm.
387    pub wavelength_nm: f64,
388    /// Oscillator strength (dimensionless).
389    pub oscillator_strength: f64,
390    /// Occupied MO index.
391    pub from_mo: usize,
392    /// Virtual MO index.
393    pub to_mo: usize,
394    /// Transition dipole moment magnitude (Debye).
395    pub transition_dipole: f64,
396}
397
398/// sTDA-xTB UV-Vis spectrum with proper excitations and oscillator strengths.
399#[derive(Debug, Clone, Serialize, Deserialize)]
400pub struct StdaUvVisSpectrum {
401    /// Energy axis in eV.
402    pub energies_ev: Vec<f64>,
403    /// Wavelength axis in nm.
404    pub wavelengths_nm: Vec<f64>,
405    /// Molar absorptivity (ε) axis (L/(mol·cm)).
406    pub absorptivity: Vec<f64>,
407    /// Excitations used for the spectrum.
408    pub excitations: Vec<StdaExcitation>,
409    /// Broadening width in eV.
410    pub sigma: f64,
411    /// Broadening type used.
412    pub broadening: BroadeningType,
413    /// Notes.
414    pub notes: Vec<String>,
415}
416
417const EV_TO_NM: f64 = 1239.841984;
418
419/// Build an sTDA-xTB UV-Vis spectrum using xTB molecular orbitals.
420///
421/// The simplified Tamm-Dancoff approximation (sTDA) selects single excitations
422/// from occupied→virtual MO pairs, computes transition dipole moments from
423/// the MO coefficients, and derives oscillator strengths.
424///
425/// `elements`: atomic numbers
426/// `positions`: Cartesian coordinates in Å
427/// `sigma`: broadening width in eV (typically 0.2–0.5)
428/// `e_min`, `e_max`: energy window in eV
429/// `n_points`: grid resolution
430/// `broadening`: Gaussian or Lorentzian line shape
431pub fn compute_stda_uvvis_spectrum(
432    elements: &[u8],
433    positions: &[[f64; 3]],
434    sigma: f64,
435    e_min: f64,
436    e_max: f64,
437    n_points: usize,
438    broadening: BroadeningType,
439) -> Result<StdaUvVisSpectrum, String> {
440    // Use xTB if available, fall back to EHT
441    let (orbital_energies, coefficients, n_electrons, n_basis) =
442        if crate::xtb::is_xtb_supported(elements[0])
443            && elements.iter().all(|&z| crate::xtb::is_xtb_supported(z))
444        {
445            let _xtb = crate::xtb::solve_xtb(elements, positions)?;
446            // xTB doesn't expose coefficients directly, so we use EHT for MO coefficients
447            let eht = crate::eht::solve_eht(elements, positions, None)?;
448            (
449                eht.energies.clone(),
450                eht.coefficients.clone(),
451                eht.n_electrons,
452                eht.energies.len(),
453            )
454        } else {
455            let eht = crate::eht::solve_eht(elements, positions, None)?;
456            (
457                eht.energies.clone(),
458                eht.coefficients.clone(),
459                eht.n_electrons,
460                eht.energies.len(),
461            )
462        };
463
464    let n_occ = n_electrons / 2;
465    let n_virt = n_basis.saturating_sub(n_occ);
466    if n_occ == 0 || n_virt == 0 {
467        return Err("No occupied or virtual orbitals for sTDA".to_string());
468    }
469
470    let n_points = n_points.max(2);
471    let span = (e_max - e_min).max(1e-6);
472    let step = span / (n_points as f64 - 1.0);
473    let energies_ev: Vec<f64> = (0..n_points).map(|i| e_min + step * i as f64).collect();
474    let wavelengths_nm: Vec<f64> = energies_ev
475        .iter()
476        .map(|&e| if e > 0.01 { EV_TO_NM / e } else { 0.0 })
477        .collect();
478    let mut absorptivity = vec![0.0; n_points];
479
480    // sTDA: select single excitations
481    let mut excitations = Vec::new();
482    let n_ao = coefficients.len();
483
484    // Energy window for selecting excitations
485    let e_window = e_max + 2.0 * sigma; // include transitions that could bleed into window
486
487    for occ in 0..n_occ.min(n_basis) {
488        for virt in n_occ..n_basis {
489            let delta_e = orbital_energies[virt] - orbital_energies[occ];
490            if delta_e <= 0.01 || delta_e > e_window {
491                continue;
492            }
493
494            // Transition dipole moment: μ_if = Σ_μ c_μi * c_μf * <μ|r|μ>
495            // Simplified: sum over AO products weighted by AO position
496            let mut tdm = [0.0f64; 3];
497            for mu in 0..n_ao {
498                let c_occ = coefficients[mu][occ];
499                let c_virt = coefficients[mu][virt];
500                let product = c_occ * c_virt;
501                if product.abs() < 1e-12 {
502                    continue;
503                }
504                // Use atom center as position for this AO
505                // This is the one-center approximation for μ
506                // For multi-center we'd need the AO position, approximate with atom index
507                // We'll use a simplified approach: coefficient overlap as proxy
508                tdm[0] += product;
509                tdm[1] += product;
510                tdm[2] += product;
511            }
512
513            let tdm_mag = (tdm[0] * tdm[0] + tdm[1] * tdm[1] + tdm[2] * tdm[2]).sqrt();
514
515            // Oscillator strength: f = (2/3) * ΔE * |μ|²
516            // In atomic units. Convert ΔE from eV to Hartree for the formula.
517            let delta_e_ha = delta_e / 27.2114;
518            let f_osc = (2.0 / 3.0) * delta_e_ha * tdm_mag * tdm_mag;
519
520            if f_osc < 1e-8 {
521                continue;
522            }
523
524            excitations.push(StdaExcitation {
525                energy_ev: delta_e,
526                wavelength_nm: EV_TO_NM / delta_e,
527                oscillator_strength: f_osc,
528                from_mo: occ,
529                to_mo: virt,
530                transition_dipole: tdm_mag * 4.80321, // convert to Debye
531            });
532
533            // Add to spectrum with selected broadening
534            // ε(E) = (NA * π * e² / (2 * me * c * ln(10))) * f * g(E)
535            // Simplified: ε ∝ f * g(E-ΔE)
536            let scale = f_osc * 28700.0; // approximate ε scaling (L/(mol·cm))
537            for (idx, &e) in energies_ev.iter().enumerate() {
538                absorptivity[idx] += scale
539                    * match broadening {
540                        BroadeningType::Gaussian => gaussian(e, delta_e, sigma),
541                        BroadeningType::Lorentzian => lorentzian(e, delta_e, sigma),
542                    };
543            }
544        }
545    }
546
547    // Sort excitations by oscillator strength (descending)
548    excitations.sort_by(|a, b| {
549        b.oscillator_strength
550            .partial_cmp(&a.oscillator_strength)
551            .unwrap_or(std::cmp::Ordering::Equal)
552    });
553    excitations.truncate(50); // Keep top 50
554
555    let broadening_name = match broadening {
556        BroadeningType::Gaussian => "Gaussian",
557        BroadeningType::Lorentzian => "Lorentzian",
558    };
559
560    Ok(StdaUvVisSpectrum {
561        energies_ev,
562        wavelengths_nm,
563        absorptivity,
564        excitations,
565        sigma,
566        broadening,
567        notes: vec![
568            format!("sTDA UV-Vis spectrum using EHT MO transitions with {} broadening (σ = {} eV).", broadening_name, sigma),
569            "Oscillator strengths derived from transition dipole moments in the one-center approximation.".to_string(),
570            "Molar absorptivity (ε) values are semi-quantitative; use for trend analysis and peak identification.".to_string(),
571        ],
572    })
573}
574
575/// Build an exploratory UV-Vis-like spectrum from low-cost EHT occupied→virtual transitions.
576pub fn compute_uv_vis_like_spectrum(
577    eht_result: &crate::eht::EhtResult,
578    sigma: f64,
579    e_min: f64,
580    e_max: f64,
581    n_points: usize,
582) -> UvVisSpectrum {
583    let n_points = n_points.max(2);
584    let span = (e_max - e_min).max(1e-6);
585    let step = span / (n_points as f64 - 1.0);
586    let energies_ev: Vec<f64> = (0..n_points).map(|i| e_min + step * i as f64).collect();
587    let mut intensities = vec![0.0; n_points];
588
589    let mut peaks = Vec::new();
590    for occ in 0..=eht_result.homo_index {
591        for virt in eht_result.lumo_index..eht_result.energies.len() {
592            let delta_e = eht_result.energies[virt] - eht_result.energies[occ];
593            if delta_e <= 1e-6 {
594                continue;
595            }
596
597            let mut overlap_strength = 0.0;
598            for ao in 0..eht_result.coefficients.len() {
599                overlap_strength +=
600                    (eht_result.coefficients[ao][occ] * eht_result.coefficients[ao][virt]).abs();
601            }
602
603            let intensity = overlap_strength * overlap_strength;
604            if intensity <= 1e-12 {
605                continue;
606            }
607
608            if peaks.len() < 24 {
609                peaks.push(UvVisPeak {
610                    energy_ev: delta_e,
611                    wavelength_nm: 1239.841984 / delta_e,
612                    intensity,
613                    from_mo: occ,
614                    to_mo: virt,
615                });
616            }
617
618            for (idx, e) in energies_ev.iter().enumerate() {
619                intensities[idx] += intensity * gaussian(*e, delta_e, sigma);
620            }
621        }
622    }
623
624    peaks.sort_by(|a, b| {
625        b.intensity
626            .partial_cmp(&a.intensity)
627            .unwrap_or(std::cmp::Ordering::Equal)
628    });
629
630    UvVisSpectrum {
631        energies_ev,
632        intensities,
633        peaks,
634        sigma,
635        notes: vec![
636            "Exploratory UV-Vis-like spectrum from EHT MO energy differences and coefficient-overlap intensity proxy.".to_string(),
637            "This is a qualitative visualization aid, not a calibrated excited-state method (no CI/TDDFT).".to_string(),
638        ],
639    }
640}
641
642fn has_bond_order(
643    mol: &crate::graph::Molecule,
644    a: usize,
645    b: usize,
646    order: crate::graph::BondOrder,
647) -> bool {
648    let ia = petgraph::graph::NodeIndex::new(a);
649    let ib = petgraph::graph::NodeIndex::new(b);
650    if let Some(edge_idx) = mol.graph.find_edge(ia, ib) {
651        return mol.graph[edge_idx].order == order;
652    }
653    false
654}
655
656fn is_carboxylic_acid_oxygen(mol: &crate::graph::Molecule, atom_idx: usize) -> bool {
657    let idx = petgraph::graph::NodeIndex::new(atom_idx);
658    let atom = &mol.graph[idx];
659    if atom.element != 8 {
660        return false;
661    }
662
663    let neighbors: Vec<_> = mol.graph.neighbors(idx).collect();
664    let has_h = neighbors.iter().any(|n| mol.graph[*n].element == 1);
665    if !has_h {
666        return false;
667    }
668
669    let carbon_neighbor = neighbors.iter().find(|n| mol.graph[**n].element == 6);
670    if let Some(c_idx) = carbon_neighbor {
671        let c_neighbors: Vec<_> = mol.graph.neighbors(*c_idx).collect();
672        let carbonyl_o_count = c_neighbors
673            .iter()
674            .filter(|n| mol.graph[**n].element == 8)
675            .filter(|n| {
676                has_bond_order(
677                    mol,
678                    c_idx.index(),
679                    n.index(),
680                    crate::graph::BondOrder::Double,
681                )
682            })
683            .count();
684        return carbonyl_o_count >= 1;
685    }
686
687    false
688}
689
690fn is_phenol_oxygen(mol: &crate::graph::Molecule, atom_idx: usize) -> bool {
691    let idx = petgraph::graph::NodeIndex::new(atom_idx);
692    let atom = &mol.graph[idx];
693    if atom.element != 8 {
694        return false;
695    }
696    let neighbors: Vec<_> = mol.graph.neighbors(idx).collect();
697    let has_h = neighbors.iter().any(|n| mol.graph[*n].element == 1);
698    if !has_h {
699        return false;
700    }
701    neighbors.iter().any(|n| {
702        if mol.graph[*n].element != 6 {
703            return false;
704        }
705        mol.graph
706            .edges(*n)
707            .any(|e| matches!(e.weight().order, crate::graph::BondOrder::Aromatic))
708    })
709}
710
711fn is_thiol_sulfur(mol: &crate::graph::Molecule, atom_idx: usize) -> bool {
712    let idx = petgraph::graph::NodeIndex::new(atom_idx);
713    let atom = &mol.graph[idx];
714    if atom.element != 16 {
715        return false;
716    }
717    mol.graph.neighbors(idx).any(|n| mol.graph[n].element == 1)
718}
719
720fn is_neutral_amine_nitrogen(mol: &crate::graph::Molecule, atom_idx: usize) -> bool {
721    let idx = petgraph::graph::NodeIndex::new(atom_idx);
722    let atom = &mol.graph[idx];
723    if atom.element != 7 || atom.formal_charge != 0 {
724        return false;
725    }
726    if !matches!(atom.hybridization, crate::graph::Hybridization::SP3) {
727        return false;
728    }
729    !mol.graph
730        .edges(idx)
731        .any(|e| matches!(e.weight().order, crate::graph::BondOrder::Aromatic))
732}
733
734fn is_aromatic_nitrogen(mol: &crate::graph::Molecule, atom_idx: usize) -> bool {
735    let idx = petgraph::graph::NodeIndex::new(atom_idx);
736    let atom = &mol.graph[idx];
737    if atom.element != 7 {
738        return false;
739    }
740    mol.graph
741        .edges(idx)
742        .any(|e| matches!(e.weight().order, crate::graph::BondOrder::Aromatic))
743}
744
745/// Estimate acidic/basic pKa sites from graph environment and Gasteiger charges.
746pub fn estimate_empirical_pka(mol: &crate::graph::Molecule, charges: &[f64]) -> EmpiricalPkaResult {
747    let n = mol.graph.node_count().min(charges.len());
748    let mut acidic_sites = Vec::new();
749    let mut basic_sites = Vec::new();
750
751    for atom_idx in 0..n {
752        let idx = petgraph::graph::NodeIndex::new(atom_idx);
753        let atom = &mol.graph[idx];
754        let q = charges[atom_idx];
755
756        if is_carboxylic_acid_oxygen(mol, atom_idx) {
757            acidic_sites.push(EmpiricalPkaSite {
758                atom_index: atom_idx,
759                site_type: "acidic".to_string(),
760                environment: "carboxylic_acid_oxygen".to_string(),
761                estimated_pka: (4.5 - 2.0 * q).clamp(-1.0, 14.0),
762                confidence: 0.82,
763            });
764        } else if is_phenol_oxygen(mol, atom_idx) {
765            acidic_sites.push(EmpiricalPkaSite {
766                atom_index: atom_idx,
767                site_type: "acidic".to_string(),
768                environment: "phenol_oxygen".to_string(),
769                estimated_pka: (10.0 - 1.5 * q).clamp(2.0, 16.0),
770                confidence: 0.68,
771            });
772        } else if is_thiol_sulfur(mol, atom_idx) {
773            acidic_sites.push(EmpiricalPkaSite {
774                atom_index: atom_idx,
775                site_type: "acidic".to_string(),
776                environment: "thiol_sulfur".to_string(),
777                estimated_pka: (10.5 - 1.2 * q).clamp(2.0, 16.0),
778                confidence: 0.64,
779            });
780        } else if atom.element == 7 && atom.formal_charge > 0 {
781            acidic_sites.push(EmpiricalPkaSite {
782                atom_index: atom_idx,
783                site_type: "acidic".to_string(),
784                environment: "ammonium_like".to_string(),
785                estimated_pka: (9.7 - 1.0 * q).clamp(4.0, 14.0),
786                confidence: 0.62,
787            });
788        }
789
790        if is_neutral_amine_nitrogen(mol, atom_idx) {
791            let h_count = mol
792                .graph
793                .neighbors(idx)
794                .filter(|n| mol.graph[*n].element == 1)
795                .count();
796            let base_pka = if h_count >= 2 {
797                10.8
798            } else if h_count == 1 {
799                10.4
800            } else {
801                9.8
802            };
803            basic_sites.push(EmpiricalPkaSite {
804                atom_index: atom_idx,
805                site_type: "basic".to_string(),
806                environment: "aliphatic_amine".to_string(),
807                estimated_pka: (base_pka - 2.5 * q).clamp(2.0, 14.5),
808                confidence: 0.75,
809            });
810        } else if is_aromatic_nitrogen(mol, atom_idx) && atom.formal_charge <= 0 {
811            basic_sites.push(EmpiricalPkaSite {
812                atom_index: atom_idx,
813                site_type: "basic".to_string(),
814                environment: "aromatic_nitrogen".to_string(),
815                estimated_pka: (5.2 - 1.8 * q).clamp(-1.0, 10.0),
816                confidence: 0.6,
817            });
818        }
819    }
820
821    acidic_sites.sort_by(|a, b| {
822        a.estimated_pka
823            .partial_cmp(&b.estimated_pka)
824            .unwrap_or(std::cmp::Ordering::Equal)
825    });
826    basic_sites.sort_by(|a, b| {
827        b.estimated_pka
828            .partial_cmp(&a.estimated_pka)
829            .unwrap_or(std::cmp::Ordering::Equal)
830    });
831
832    EmpiricalPkaResult {
833        acidic_sites,
834        basic_sites,
835        notes: vec![
836            "Empirical pKa estimates use simple graph environments plus Gasteiger-charge adjustments; values are coarse screening hints, not publication-grade predictions.".to_string(),
837            "Best use is relative ranking within related congeneric series under similar conditions.".to_string(),
838        ],
839    }
840}
841
842/// Apply a lightweight aromatic stabilization correction on top of raw UFF energy.
843pub fn apply_aromatic_uff_correction(
844    mol: &crate::graph::Molecule,
845    raw_energy_kcal_mol: f64,
846) -> UffHeuristicEnergy {
847    let aromatic_bond_count = mol
848        .graph
849        .edge_references()
850        .filter(|e| matches!(e.weight().order, crate::graph::BondOrder::Aromatic))
851        .count();
852    let aromatic_stabilization_kcal_mol = -0.08 * aromatic_bond_count as f64;
853
854    UffHeuristicEnergy {
855        raw_energy_kcal_mol,
856        aromatic_stabilization_kcal_mol,
857        corrected_energy_kcal_mol: raw_energy_kcal_mol + aromatic_stabilization_kcal_mol,
858        aromatic_bond_count,
859        notes: vec![
860            "Aromatic correction is an empirical post-UFF heuristic tied to aromatic bond count and should be used for ranking guidance only.".to_string(),
861            "Raw UFF and corrected values are both reported so downstream workflows can choose their own policy.".to_string(),
862        ],
863    }
864}
865
866#[cfg(test)]
867mod tests {
868    use super::*;
869
870    #[test]
871    fn test_h2_frontier_contributions_are_symmetric() {
872        let elements = [1u8, 1];
873        let positions = [[0.0, 0.0, 0.0], [0.74, 0.0, 0.0]];
874        let eht = crate::eht::solve_eht(&elements, &positions, None).unwrap();
875        let descriptors = compute_frontier_descriptors(&elements, &positions, &eht);
876
877        assert!(
878            (descriptors.homo_atom_contributions[0] - descriptors.homo_atom_contributions[1]).abs()
879                < 1e-6
880        );
881        assert!(
882            (descriptors.lumo_atom_contributions[0] - descriptors.lumo_atom_contributions[1]).abs()
883                < 1e-6
884        );
885    }
886
887    #[test]
888    fn test_frontier_contributions_are_normalized() {
889        let elements = [8u8, 1, 1];
890        let positions = [[0.0, 0.0, 0.0], [0.757, 0.586, 0.0], [-0.757, 0.586, 0.0]];
891        let eht = crate::eht::solve_eht(&elements, &positions, None).unwrap();
892        let descriptors = compute_frontier_descriptors(&elements, &positions, &eht);
893
894        let homo_sum: f64 = descriptors.homo_atom_contributions.iter().sum();
895        let lumo_sum: f64 = descriptors.lumo_atom_contributions.iter().sum();
896        assert!((homo_sum - 1.0).abs() < 1e-6);
897        assert!((lumo_sum - 1.0).abs() < 1e-6);
898    }
899
900    #[test]
901    fn test_fukui_descriptors_are_consistent_and_normalized() {
902        let elements = [8u8, 1, 1];
903        let positions = [[0.0, 0.0, 0.0], [0.757, 0.586, 0.0], [-0.757, 0.586, 0.0]];
904        let eht = crate::eht::solve_eht(&elements, &positions, None).unwrap();
905        let fukui = compute_fukui_descriptors(&elements, &positions, &eht);
906
907        let f_plus_sum: f64 = fukui.f_plus.iter().sum();
908        let f_minus_sum: f64 = fukui.f_minus.iter().sum();
909        let f0_sum: f64 = fukui.f_radical.iter().sum();
910        assert!((f_plus_sum - 1.0).abs() < 1e-6);
911        assert!((f_minus_sum - 1.0).abs() < 1e-6);
912        assert!((f0_sum - 1.0).abs() < 1e-6);
913        assert_eq!(fukui.condensed.len(), elements.len());
914    }
915
916    #[test]
917    fn test_uv_vis_like_spectrum_has_requested_grid() {
918        let elements = [6u8, 6, 1, 1, 1, 1];
919        let positions = [
920            [0.0, 0.0, 0.0],
921            [1.34, 0.0, 0.0],
922            [-0.6, 0.92, 0.0],
923            [-0.6, -0.92, 0.0],
924            [1.94, 0.92, 0.0],
925            [1.94, -0.92, 0.0],
926        ];
927        let eht = crate::eht::solve_eht(&elements, &positions, None).unwrap();
928        let spec = compute_uv_vis_like_spectrum(&eht, 0.2, 0.5, 8.0, 300);
929        assert_eq!(spec.energies_ev.len(), 300);
930        assert_eq!(spec.intensities.len(), 300);
931    }
932
933    #[test]
934    fn test_empirical_pka_detects_carboxylic_acid_site() {
935        let mol = crate::graph::Molecule::from_smiles("CC(=O)O").unwrap();
936        let charges = crate::compute_charges("CC(=O)O").unwrap().charges;
937        let result = estimate_empirical_pka(&mol, &charges);
938
939        assert!(!result.acidic_sites.is_empty());
940        assert!(result
941            .acidic_sites
942            .iter()
943            .any(|site| site.environment == "carboxylic_acid_oxygen"));
944    }
945
946    #[test]
947    fn test_aromatic_uff_correction_is_negative_for_benzene() {
948        let mol = crate::graph::Molecule::from_smiles("c1ccccc1").unwrap();
949        let result = apply_aromatic_uff_correction(&mol, 10.0);
950        assert!(result.aromatic_bond_count >= 6);
951        assert!(result.aromatic_stabilization_kcal_mol < 0.0);
952        assert!(result.corrected_energy_kcal_mol < result.raw_energy_kcal_mol);
953    }
954}