Skip to main content

sci_form/nmr/
spectrum.rs

1//! NMR spectrum generation with Lorentzian broadening.
2//!
3//! Combines predicted chemical shifts and J-couplings to produce
4//! a synthetic 1D NMR spectrum with Lorentzian line shapes.
5
6use serde::{Deserialize, Serialize};
7
8use super::coupling::JCoupling;
9use super::nucleus::NmrNucleus;
10use super::shifts::{ChemicalShift, NmrShiftResult};
11
12#[derive(Debug, Clone)]
13struct ShiftGroup {
14    shift_ppm: f64,
15    environment: String,
16    atom_indices: Vec<usize>,
17    confidence: f64,
18    exchangeable: bool,
19}
20
21#[derive(Debug, Clone)]
22struct CouplingGroup {
23    n_equivalent_neighbors: usize,
24    average_j_hz: f64,
25}
26
27/// A single peak in the NMR spectrum.
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct NmrPeak {
30    /// Chemical shift in ppm.
31    pub shift_ppm: f64,
32    /// Relative intensity (integration).
33    pub intensity: f64,
34    /// Atom index.
35    pub atom_index: usize,
36    /// All equivalent atom indices contributing to this signal.
37    pub atom_indices: Vec<usize>,
38    /// Multiplicity label (s, d, t, q, m).
39    pub multiplicity: String,
40    /// Environment description.
41    pub environment: String,
42}
43
44/// Complete NMR spectrum.
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct NmrSpectrum {
47    /// Chemical shift axis (ppm), typically decreasing.
48    pub ppm_axis: Vec<f64>,
49    /// Intensity axis.
50    pub intensities: Vec<f64>,
51    /// Identified peaks.
52    pub peaks: Vec<NmrPeak>,
53    /// Nucleus type.
54    pub nucleus: NmrNucleus,
55    /// Line width parameter (ppm).
56    pub gamma: f64,
57    /// Peak group integrations (relative areas, normalized to largest = 1.0).
58    pub integrations: Vec<PeakIntegration>,
59    /// Notes.
60    pub notes: Vec<String>,
61}
62
63/// Integration result for a group of equivalent peaks.
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct PeakIntegration {
66    /// Center of the peak group (ppm).
67    pub center_ppm: f64,
68    /// Integration bounds (ppm_low, ppm_high).
69    pub bounds: (f64, f64),
70    /// Raw area (sum of intensities × step).
71    pub raw_area: f64,
72    /// Relative area (normalized so largest group = 1.0).
73    pub relative_area: f64,
74    /// Number of equivalent atoms contributing.
75    pub n_atoms: usize,
76}
77
78/// Default FWHM (full-width at half-maximum) for each nucleus type, in Hz.
79fn default_fwhm_hz(nucleus: NmrNucleus) -> f64 {
80    nucleus.default_fwhm_hz()
81}
82
83/// Default spectrometer frequency for each nucleus (MHz).
84fn default_frequency_mhz(nucleus: NmrNucleus) -> f64 {
85    nucleus.default_frequency_mhz()
86}
87
88/// Lorentzian line shape: L(x) = (1/π) · γ / [(x - x₀)² + γ²]
89fn lorentzian(x: f64, x0: f64, gamma: f64) -> f64 {
90    let g = gamma.max(1e-8);
91    let dx = x - x0;
92    g / (std::f64::consts::PI * (dx * dx + g * g))
93}
94
95/// Determine multiplicity from J-coupling count.
96#[cfg(test)]
97fn multiplicity_label(n_couplings: usize) -> &'static str {
98    match n_couplings {
99        0 => "s", // singlet
100        1 => "d", // doublet
101        2 => "t", // triplet
102        3 => "q", // quartet
103        _ => "m", // multiplet
104    }
105}
106
107fn shift_group_tolerance(nucleus: NmrNucleus) -> f64 {
108    match nucleus {
109        NmrNucleus::H1 | NmrNucleus::H2 | NmrNucleus::H3 => 1e-3,
110        NmrNucleus::C13 => 1e-2,
111        _ => 5e-3,
112    }
113}
114
115fn is_exchangeable_environment(environment: &str) -> bool {
116    environment.contains("O-H") || environment.contains("N-H") || environment.contains("S-H")
117}
118
119fn build_shift_groups(active_shifts: &[ChemicalShift], nucleus: NmrNucleus) -> Vec<ShiftGroup> {
120    let tolerance = shift_group_tolerance(nucleus);
121    let mut groups: Vec<ShiftGroup> = Vec::new();
122
123    for shift in active_shifts {
124        if let Some(group) = groups.iter_mut().find(|group| {
125            group.environment == shift.environment
126                && (group.shift_ppm - shift.shift_ppm).abs() <= tolerance
127        }) {
128            let count = group.atom_indices.len() as f64;
129            group.shift_ppm = (group.shift_ppm * count + shift.shift_ppm) / (count + 1.0);
130            group.confidence = group.confidence.min(shift.confidence);
131            group.atom_indices.push(shift.atom_index);
132        } else {
133            groups.push(ShiftGroup {
134                shift_ppm: shift.shift_ppm,
135                environment: shift.environment.clone(),
136                atom_indices: vec![shift.atom_index],
137                confidence: shift.confidence,
138                exchangeable: is_exchangeable_environment(&shift.environment),
139            });
140        }
141    }
142
143    groups.sort_by(|left, right| {
144        left.shift_ppm
145            .partial_cmp(&right.shift_ppm)
146            .unwrap_or(std::cmp::Ordering::Equal)
147    });
148    groups
149}
150
151fn build_coupling_groups(
152    source_group: &ShiftGroup,
153    all_groups: &[ShiftGroup],
154    couplings: &[JCoupling],
155) -> Vec<CouplingGroup> {
156    if source_group.exchangeable {
157        return Vec::new();
158    }
159
160    let representative = match source_group.atom_indices.first() {
161        Some(index) => *index,
162        None => return Vec::new(),
163    };
164
165    let mut groups = Vec::new();
166
167    for target_group in all_groups {
168        if std::ptr::eq(source_group, target_group) || target_group.exchangeable {
169            continue;
170        }
171
172        let target_couplings: Vec<f64> = couplings
173            .iter()
174            .filter(|coupling| coupling.n_bonds == 3)
175            .filter_map(|coupling| {
176                if (coupling.h1_index == representative
177                    && target_group.atom_indices.contains(&coupling.h2_index))
178                    || (coupling.h2_index == representative
179                        && target_group.atom_indices.contains(&coupling.h1_index))
180                {
181                    Some(coupling.j_hz.abs())
182                } else {
183                    None
184                }
185            })
186            .filter(|j_hz| *j_hz >= 0.5)
187            .collect();
188
189        if target_couplings.is_empty() {
190            continue;
191        }
192
193        let average_j_hz = target_couplings.iter().sum::<f64>() / target_couplings.len() as f64;
194        groups.push(CouplingGroup {
195            n_equivalent_neighbors: target_couplings.len(),
196            average_j_hz,
197        });
198    }
199
200    groups.sort_by(|left, right| {
201        right
202            .average_j_hz
203            .partial_cmp(&left.average_j_hz)
204            .unwrap_or(std::cmp::Ordering::Equal)
205    });
206    groups
207}
208
209fn merge_nearby_lines(lines: Vec<(f64, f64)>) -> Vec<(f64, f64)> {
210    if lines.is_empty() {
211        return lines;
212    }
213
214    let mut sorted = lines;
215    sorted.sort_by(|left, right| {
216        left.0
217            .partial_cmp(&right.0)
218            .unwrap_or(std::cmp::Ordering::Equal)
219    });
220
221    let mut merged: Vec<(f64, f64)> = Vec::new();
222    for (position, intensity) in sorted {
223        if let Some((prev_position, prev_intensity)) = merged.last_mut() {
224            if (position - *prev_position).abs() <= 1e-6 {
225                let total = *prev_intensity + intensity;
226                *prev_position = (*prev_position * *prev_intensity + position * intensity) / total;
227                *prev_intensity = total;
228                continue;
229            }
230        }
231        merged.push((position, intensity));
232    }
233
234    merged
235}
236
237fn split_lines(
238    center_ppm: f64,
239    total_intensity: f64,
240    coupling_groups: &[CouplingGroup],
241    spectrometer_frequency_mhz: f64,
242) -> Vec<(f64, f64)> {
243    let mut lines = vec![(center_ppm, total_intensity)];
244
245    for group in coupling_groups {
246        let j_ppm = group.average_j_hz / spectrometer_frequency_mhz.max(1e-6);
247        let coeffs = pascal_row(group.n_equivalent_neighbors);
248        let coeff_sum = coeffs.iter().sum::<f64>().max(1e-12);
249        let mut split = Vec::with_capacity(lines.len() * coeffs.len());
250
251        for (line_center, line_intensity) in &lines {
252            for (index, coeff) in coeffs.iter().enumerate() {
253                let offset = (index as f64 - group.n_equivalent_neighbors as f64 / 2.0) * j_ppm;
254                split.push((line_center + offset, line_intensity * coeff / coeff_sum));
255            }
256        }
257
258        lines = merge_nearby_lines(split);
259    }
260
261    lines
262}
263
264fn multiplicity_code(n_equivalent_neighbors: usize) -> &'static str {
265    match n_equivalent_neighbors {
266        0 => "s",
267        1 => "d",
268        2 => "t",
269        3 => "q",
270        4 => "quint",
271        _ => "m",
272    }
273}
274
275fn format_multiplicity(coupling_groups: &[CouplingGroup]) -> String {
276    if coupling_groups.is_empty() {
277        return "s".to_string();
278    }
279
280    let mut codes: Vec<&str> = coupling_groups
281        .iter()
282        .map(|group| multiplicity_code(group.n_equivalent_neighbors))
283        .collect();
284    let base = if codes.len() == 1 {
285        codes.remove(0).to_string()
286    } else if codes.len() == 2 && codes.iter().all(|code| *code != "m" && *code != "quint") {
287        codes.join("")
288    } else {
289        "m".to_string()
290    };
291
292    let j_values = coupling_groups
293        .iter()
294        .map(|group| format!("{:.1}", group.average_j_hz))
295        .collect::<Vec<_>>()
296        .join(", ");
297
298    format!("{} (J≈{} Hz)", base, j_values)
299}
300
301/// Generate an NMR spectrum from shift predictions and J-couplings.
302///
303/// `shifts`: predicted chemical shifts
304/// `couplings`: predicted J-coupling constants
305/// `nucleus`: which nucleus to display
306/// `gamma`: Lorentzian line width in ppm (if 0.0, uses nucleus-specific FWHM default)
307/// `ppm_min`, `ppm_max`: spectral window
308/// `n_points`: grid resolution
309pub fn compute_nmr_spectrum(
310    shifts: &NmrShiftResult,
311    couplings: &[JCoupling],
312    nucleus: NmrNucleus,
313    gamma: f64,
314    ppm_min: f64,
315    ppm_max: f64,
316    n_points: usize,
317) -> NmrSpectrum {
318    compute_nmr_spectrum_for_shifts(
319        shifts.shifts_for_nucleus(nucleus),
320        couplings,
321        nucleus,
322        gamma,
323        ppm_min,
324        ppm_max,
325        n_points,
326    )
327}
328
329pub fn compute_nmr_spectrum_for_shifts(
330    active_shifts: &[ChemicalShift],
331    couplings: &[JCoupling],
332    nucleus: NmrNucleus,
333    gamma: f64,
334    ppm_min: f64,
335    ppm_max: f64,
336    n_points: usize,
337) -> NmrSpectrum {
338    let n_points = n_points.max(2);
339    let step = (ppm_max - ppm_min) / (n_points as f64 - 1.0);
340    // NMR convention: ppm axis runs from high to low
341    let ppm_axis: Vec<f64> = (0..n_points).map(|i| ppm_max - step * i as f64).collect();
342    let mut intensities = vec![0.0; n_points];
343
344    // Use nucleus-specific FWHM if gamma is 0 or very small
345    let effective_gamma = if gamma > 1e-6 {
346        gamma
347    } else {
348        default_fwhm_hz(nucleus) / default_frequency_mhz(nucleus)
349    };
350
351    let shift_groups = build_shift_groups(active_shifts, nucleus);
352    let spectrometer_frequency_mhz = default_frequency_mhz(nucleus);
353    let mut peaks = Vec::with_capacity(shift_groups.len());
354
355    for group in &shift_groups {
356        let peak_intensity = group.atom_indices.len() as f64;
357        let coupling_groups = if matches!(nucleus, NmrNucleus::H1) {
358            build_coupling_groups(group, &shift_groups, couplings)
359        } else {
360            Vec::new()
361        };
362
363        peaks.push(NmrPeak {
364            shift_ppm: group.shift_ppm,
365            intensity: peak_intensity,
366            atom_index: group.atom_indices[0],
367            atom_indices: group.atom_indices.clone(),
368            multiplicity: format_multiplicity(&coupling_groups),
369            environment: group.environment.clone(),
370        });
371
372        let lines = if matches!(nucleus, NmrNucleus::H1) {
373            split_lines(
374                group.shift_ppm,
375                peak_intensity,
376                &coupling_groups,
377                spectrometer_frequency_mhz,
378            )
379        } else {
380            vec![(group.shift_ppm, peak_intensity)]
381        };
382
383        for (line_ppm, line_intensity) in lines {
384            for (idx, &ppm) in ppm_axis.iter().enumerate() {
385                intensities[idx] += line_intensity * lorentzian(ppm, line_ppm, effective_gamma);
386            }
387        }
388    }
389
390    // Compute integrations for each peak group
391    let integration_width = effective_gamma * 10.0; // integrate ±10γ around each peak
392    let integrations =
393        compute_integrations(&peaks, &ppm_axis, &intensities, step, integration_width);
394
395    let mut notes = vec![
396        format!(
397            "{} NMR spectrum with Lorentzian broadening (γ = {:.4} ppm, FWHM = {:.1} Hz).",
398            nucleus.unicode_label(),
399            effective_gamma,
400            effective_gamma * default_frequency_mhz(nucleus)
401        ),
402        "Chemical shifts come from the fast empirical inference layer. ¹H uses explicit vicinal J-coupling splitting; other nuclei are rendered as screening-level singlets unless a dedicated model is available.".to_string(),
403        "Equivalent nuclei are grouped before rendering, and exchangeable O-H/N-H/S-H couplings are suppressed in the default 1H spectrum.".to_string(),
404        "First-order splitting uses explicit coupling groups; higher-order effects (roofing, strong coupling) are not modeled.".to_string(),
405    ];
406    if nucleus.is_quadrupolar() {
407        notes.push(
408            "Quadrupolar nucleus selected: linewidths and positions are approximate relative indicators, not quantitative simulations of relaxation or isotope abundance.".to_string(),
409        );
410    }
411
412    NmrSpectrum {
413        ppm_axis,
414        intensities,
415        peaks,
416        nucleus,
417        gamma: effective_gamma,
418        integrations,
419        notes,
420    }
421}
422
423/// Compute peak integrations for groups of peaks in the spectrum.
424fn compute_integrations(
425    peaks: &[NmrPeak],
426    ppm_axis: &[f64],
427    intensities: &[f64],
428    step: f64,
429    integration_width: f64,
430) -> Vec<PeakIntegration> {
431    if peaks.is_empty() || ppm_axis.is_empty() {
432        return Vec::new();
433    }
434
435    let mut integrations: Vec<PeakIntegration> = peaks
436        .iter()
437        .map(|peak| {
438            let low = peak.shift_ppm - integration_width;
439            let high = peak.shift_ppm + integration_width;
440
441            let raw_area: f64 = ppm_axis
442                .iter()
443                .zip(intensities.iter())
444                .filter(|(&ppm, _)| ppm >= low && ppm <= high)
445                .map(|(_, &intensity)| intensity * step)
446                .sum();
447
448            PeakIntegration {
449                center_ppm: peak.shift_ppm,
450                bounds: (low, high),
451                raw_area,
452                relative_area: 0.0,
453                n_atoms: peak.intensity.round().max(1.0) as usize,
454            }
455        })
456        .collect();
457
458    // Normalize relative areas using the equivalent-atom count to keep areas chemically meaningful.
459    let max_atoms = integrations
460        .iter()
461        .map(|integration| integration.n_atoms)
462        .max()
463        .unwrap_or(1);
464    let max_area = integrations
465        .iter()
466        .map(|i| i.raw_area)
467        .fold(0.0f64, f64::max);
468
469    if max_atoms > 0 {
470        for int in &mut integrations {
471            int.relative_area = int.n_atoms as f64 / max_atoms as f64;
472            if max_area <= 1e-30 {
473                int.raw_area = int.n_atoms as f64;
474            }
475        }
476    }
477
478    integrations
479}
480
481/// Pascal's triangle row n: [C(n,0), C(n,1), ..., C(n,n)].
482fn pascal_row(n: usize) -> Vec<f64> {
483    let mut row = vec![1.0];
484    for k in 1..=n {
485        let prev = row[k - 1];
486        row.push(prev * (n - k + 1) as f64 / k as f64);
487    }
488    row
489}
490
491#[cfg(test)]
492mod tests {
493    use super::*;
494
495    #[test]
496    fn test_lorentzian_normalization() {
497        // Integral of Lorentzian should be 1 (approximately)
498        let gamma = 0.02;
499        let n = 10000;
500        let dx = 20.0 / n as f64;
501        let integral: f64 = (0..n)
502            .map(|i| {
503                let x = -10.0 + i as f64 * dx;
504                lorentzian(x, 0.0, gamma) * dx
505            })
506            .sum();
507        assert!(
508            (integral - 1.0).abs() < 0.01,
509            "Lorentzian integral = {}, expected ~1.0",
510            integral
511        );
512    }
513
514    #[test]
515    fn test_pascal_row() {
516        assert_eq!(pascal_row(0), vec![1.0]);
517        assert_eq!(pascal_row(1), vec![1.0, 1.0]);
518        assert_eq!(pascal_row(2), vec![1.0, 2.0, 1.0]);
519        assert_eq!(pascal_row(3), vec![1.0, 3.0, 3.0, 1.0]);
520    }
521
522    #[test]
523    fn test_nmr_spectrum_h1_ethanol() {
524        let mol = crate::graph::Molecule::from_smiles("CCO").unwrap();
525        let shifts = super::super::shifts::predict_chemical_shifts(&mol);
526        let couplings = super::super::coupling::predict_j_couplings(&mol, &[]);
527
528        let spectrum =
529            compute_nmr_spectrum(&shifts, &couplings, NmrNucleus::H1, 0.02, 0.0, 12.0, 1000);
530
531        assert_eq!(spectrum.ppm_axis.len(), 1000);
532        assert_eq!(spectrum.intensities.len(), 1000);
533        assert!(!spectrum.peaks.is_empty());
534
535        // Verify ppm axis goes from high to low (NMR convention)
536        assert!(spectrum.ppm_axis[0] > spectrum.ppm_axis[999]);
537    }
538
539    #[test]
540    fn test_nmr_spectrum_c13_benzene() {
541        let mol = crate::graph::Molecule::from_smiles("c1ccccc1").unwrap();
542        let shifts = super::super::shifts::predict_chemical_shifts(&mol);
543        let couplings = super::super::coupling::predict_j_couplings(&mol, &[]);
544
545        let spectrum =
546            compute_nmr_spectrum(&shifts, &couplings, NmrNucleus::C13, 1.0, 0.0, 220.0, 1000);
547
548        assert!(!spectrum.peaks.is_empty(), "Benzene should have ¹³C peaks");
549
550        // All aromatic C should cluster near 128 ppm
551        for peak in &spectrum.peaks {
552            assert!(
553                (peak.shift_ppm - 128.0).abs() < 20.0,
554                "Benzene ¹³C peak at {} ppm should be near 128",
555                peak.shift_ppm
556            );
557        }
558    }
559
560    #[test]
561    fn test_multiplicity_labels() {
562        assert_eq!(multiplicity_label(0), "s");
563        assert_eq!(multiplicity_label(1), "d");
564        assert_eq!(multiplicity_label(2), "t");
565        assert_eq!(multiplicity_label(3), "q");
566        assert_eq!(multiplicity_label(4), "m");
567    }
568
569    #[test]
570    fn test_shift_grouping_and_integrations_for_ethanol() {
571        let spectrum = crate::compute_nmr_spectrum("CCO", "1H", 0.02, 0.0, 12.0, 1000).unwrap();
572
573        assert_eq!(
574            spectrum.peaks.len(),
575            3,
576            "ethanol should collapse to CH3, CH2, and OH groups"
577        );
578
579        let mut atom_counts: Vec<usize> = spectrum
580            .integrations
581            .iter()
582            .map(|integration| integration.n_atoms)
583            .collect();
584        atom_counts.sort_unstable();
585        assert_eq!(atom_counts, vec![1, 2, 3]);
586
587        assert!(
588            spectrum
589                .peaks
590                .iter()
591                .any(|peak| peak.environment.contains("methyl")
592                    && peak.multiplicity.starts_with('t')),
593            "methyl group should appear as a triplet-like signal: {:?}",
594            spectrum
595                .peaks
596                .iter()
597                .map(|peak| (&peak.environment, &peak.multiplicity))
598                .collect::<Vec<_>>()
599        );
600        assert!(
601            spectrum
602                .peaks
603                .iter()
604                .any(|peak| peak.environment.contains("methylene")
605                    && peak.multiplicity.starts_with('q')),
606            "methylene group should appear as a quartet-like signal: {:?}",
607            spectrum
608                .peaks
609                .iter()
610                .map(|peak| (&peak.environment, &peak.multiplicity))
611                .collect::<Vec<_>>()
612        );
613        assert!(
614            spectrum
615                .peaks
616                .iter()
617                .any(|peak| peak.environment.contains("O-H") && peak.multiplicity == "s"),
618            "exchangeable alcohol proton should default to a singlet: {:?}",
619            spectrum
620                .peaks
621                .iter()
622                .map(|peak| (&peak.environment, &peak.multiplicity))
623                .collect::<Vec<_>>()
624        );
625    }
626}