use super::nucleus::NmrNucleus;
use crate::graph::{BondOrder, Hybridization, Molecule};
use petgraph::graph::NodeIndex;
use petgraph::visit::EdgeRef;
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChemicalShift {
pub atom_index: usize,
pub element: u8,
pub shift_ppm: f64,
pub environment: String,
pub confidence: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NucleusShiftSeries {
pub nucleus: NmrNucleus,
pub shifts: Vec<ChemicalShift>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NmrShiftResult {
pub h_shifts: Vec<ChemicalShift>,
pub c_shifts: Vec<ChemicalShift>,
pub f_shifts: Vec<ChemicalShift>,
pub p_shifts: Vec<ChemicalShift>,
pub n_shifts: Vec<ChemicalShift>,
pub b_shifts: Vec<ChemicalShift>,
pub si_shifts: Vec<ChemicalShift>,
pub se_shifts: Vec<ChemicalShift>,
pub o_shifts: Vec<ChemicalShift>,
pub s_shifts: Vec<ChemicalShift>,
pub other_shifts: Vec<NucleusShiftSeries>,
pub notes: Vec<String>,
}
impl NmrShiftResult {
pub fn shifts_for_nucleus(&self, nucleus: NmrNucleus) -> &[ChemicalShift] {
match nucleus {
NmrNucleus::H1 => &self.h_shifts,
NmrNucleus::C13 => &self.c_shifts,
NmrNucleus::F19 => &self.f_shifts,
NmrNucleus::P31 => &self.p_shifts,
NmrNucleus::N15 => &self.n_shifts,
NmrNucleus::B11 => &self.b_shifts,
NmrNucleus::Si29 => &self.si_shifts,
NmrNucleus::Se77 => &self.se_shifts,
NmrNucleus::O17 => &self.o_shifts,
NmrNucleus::S33 => &self.s_shifts,
_ => self
.other_shifts
.iter()
.find(|series| series.nucleus == nucleus)
.map(|series| series.shifts.as_slice())
.unwrap_or(&[]),
}
}
}
fn electronegativity(z: u8) -> f64 {
match z {
1 => 2.20, 5 => 2.04, 6 => 2.55, 7 => 3.04, 8 => 3.44, 9 => 3.98, 13 => 1.61, 14 => 1.90, 15 => 2.19, 16 => 2.58, 17 => 3.16, 22 => 1.54, 24 => 1.66, 25 => 1.55, 26 => 1.83, 27 => 1.88, 28 => 1.91, 29 => 1.90, 30 => 1.65, 32 => 2.01, 33 => 2.18, 34 => 2.55, 35 => 2.96, 44 => 2.20, 46 => 2.20, 47 => 1.93, 53 => 2.66, 78 => 2.28, 79 => 2.54, _ => 2.00,
}
}
#[derive(Debug, Clone, Copy)]
struct LocalShiftEnvironment {
heavy_neighbors: usize,
hydrogen_neighbors: usize,
hetero_neighbors: usize,
pi_bonds: usize,
aromatic: bool,
formal_charge: i8,
mean_neighbor_atomic_number: f64,
electronegativity_delta: f64,
}
fn local_shift_environment(mol: &Molecule, idx: NodeIndex) -> LocalShiftEnvironment {
let mut heavy_neighbors = 0usize;
let mut hydrogen_neighbors = 0usize;
let mut hetero_neighbors = 0usize;
let mut neighbor_atomic_number_sum = 0.0;
let mut electronegativity_delta = 0.0;
for neighbor in mol.graph.neighbors(idx) {
let element = mol.graph[neighbor].element;
neighbor_atomic_number_sum += element as f64;
electronegativity_delta +=
electronegativity(element) - electronegativity(mol.graph[idx].element);
if element == 1 {
hydrogen_neighbors += 1;
} else {
heavy_neighbors += 1;
if !matches!(element, 1 | 6) {
hetero_neighbors += 1;
}
}
}
let pi_bonds = mol
.graph
.edges(idx)
.filter(|edge| matches!(edge.weight().order, BondOrder::Double | BondOrder::Triple))
.count();
let aromatic = mol
.graph
.edges(idx)
.any(|edge| matches!(edge.weight().order, BondOrder::Aromatic));
let neighbor_count = mol.graph.neighbors(idx).count().max(1) as f64;
LocalShiftEnvironment {
heavy_neighbors,
hydrogen_neighbors,
hetero_neighbors,
pi_bonds,
aromatic,
formal_charge: mol.graph[idx].formal_charge,
mean_neighbor_atomic_number: neighbor_atomic_number_sum / neighbor_count,
electronegativity_delta,
}
}
fn apply_isotope_offset(shift_ppm: f64, nucleus: NmrNucleus) -> f64 {
match nucleus {
NmrNucleus::H2 => shift_ppm + 0.05,
NmrNucleus::H3 => shift_ppm + 0.10,
NmrNucleus::B10 => shift_ppm - 2.0,
NmrNucleus::N14 => shift_ppm + 12.0,
NmrNucleus::Cl37 => shift_ppm + 1.0,
NmrNucleus::Br81 => shift_ppm + 1.5,
NmrNucleus::Li6 => shift_ppm - 0.5,
NmrNucleus::K40 => shift_ppm + 1.5,
NmrNucleus::K41 => shift_ppm + 0.6,
NmrNucleus::Ti47 => shift_ppm - 2.0,
NmrNucleus::Ti49 => shift_ppm + 2.0,
NmrNucleus::V50 => shift_ppm - 4.0,
NmrNucleus::Mo97 => shift_ppm + 2.0,
NmrNucleus::Ru99 => shift_ppm - 3.0,
NmrNucleus::Ag109 => shift_ppm + 1.0,
NmrNucleus::Cd113 => shift_ppm + 1.0,
NmrNucleus::In113 => shift_ppm - 2.0,
NmrNucleus::Sn115 => shift_ppm - 3.0,
NmrNucleus::Sn117 => shift_ppm - 1.0,
NmrNucleus::Sb123 => shift_ppm + 3.0,
NmrNucleus::Te123 => shift_ppm - 5.0,
NmrNucleus::Xe131 => shift_ppm + 8.0,
NmrNucleus::Ba135 => shift_ppm - 4.0,
NmrNucleus::Hg201 => shift_ppm + 2.0,
NmrNucleus::Tl203 => shift_ppm - 2.0,
_ => shift_ppm,
}
}
fn generic_relative_shift(
mol: &Molecule,
idx: NodeIndex,
nucleus: NmrNucleus,
) -> (f64, String, f64) {
let env = local_shift_environment(mol, idx);
let (min_ppm, max_ppm) = nucleus.empirical_range();
let span = max_ppm - min_ppm;
let sensitivity = if nucleus.is_primary_target() {
0.08 * span
} else if nucleus.is_quadrupolar() {
0.03 * span
} else {
0.05 * span
};
let mut shift_ppm = nucleus.empirical_center();
shift_ppm += env.electronegativity_delta * sensitivity * 0.35;
shift_ppm += env.hetero_neighbors as f64 * sensitivity * 0.22;
shift_ppm += env.pi_bonds as f64 * sensitivity * 0.20;
shift_ppm += env.heavy_neighbors as f64 * sensitivity * 0.08;
shift_ppm += env.hydrogen_neighbors as f64 * sensitivity * 0.03;
shift_ppm += env.formal_charge as f64 * sensitivity * 0.45;
if env.aromatic {
shift_ppm += sensitivity * 0.25;
}
shift_ppm +=
(env.mean_neighbor_atomic_number - nucleus.atomic_number() as f64) * 0.02 * sensitivity;
shift_ppm = apply_isotope_offset(shift_ppm, nucleus).clamp(min_ppm, max_ppm);
let environment = format!(
"relative_{}_coord{}_hetero{}_pi{}_q{}",
nucleus.element_symbol(),
env.heavy_neighbors,
env.hetero_neighbors,
env.pi_bonds,
env.formal_charge,
);
let confidence = if nucleus.is_primary_target() {
0.70
} else if nucleus.is_quadrupolar() {
0.28
} else {
0.38
};
(shift_ppm, environment, confidence)
}
fn predict_shift_for_nucleus(
mol: &Molecule,
idx: NodeIndex,
nucleus: NmrNucleus,
) -> (f64, String, f64) {
match nucleus {
NmrNucleus::H1 => predict_h_shift(mol, idx),
NmrNucleus::H2 | NmrNucleus::H3 => {
let (shift_ppm, environment, confidence) = predict_h_shift(mol, idx);
(
apply_isotope_offset(shift_ppm, nucleus),
format!("{}_relative", environment),
confidence * 0.92,
)
}
NmrNucleus::C13 => predict_c_shift(mol, idx),
NmrNucleus::F19 => predict_f_shift(mol, idx),
NmrNucleus::P31 => predict_p_shift(mol, idx),
NmrNucleus::N15 => predict_n_shift(mol, idx),
NmrNucleus::N14 => {
let (shift_ppm, environment, confidence) = predict_n_shift(mol, idx);
(
apply_isotope_offset(shift_ppm, nucleus),
format!("{}_relative", environment),
confidence * 0.85,
)
}
NmrNucleus::B11 => predict_b_shift(mol, idx),
NmrNucleus::B10 => {
let (shift_ppm, environment, confidence) = predict_b_shift(mol, idx);
(
apply_isotope_offset(shift_ppm, nucleus),
format!("{}_relative", environment),
confidence * 0.85,
)
}
NmrNucleus::Si29 => predict_si_shift(mol, idx),
NmrNucleus::Se77 => predict_se_shift(mol, idx),
NmrNucleus::O17 => predict_o_shift(mol, idx),
NmrNucleus::S33 => predict_s_shift(mol, idx),
_ => generic_relative_shift(mol, idx, nucleus),
}
}
fn predict_h_shift(mol: &Molecule, h_idx: NodeIndex) -> (f64, String, f64) {
let neighbors: Vec<NodeIndex> = mol.graph.neighbors(h_idx).collect();
if neighbors.is_empty() {
return (0.0, "isolated_hydrogen".to_string(), 0.3);
}
let parent = neighbors[0];
let parent_elem = mol.graph[parent].element;
let parent_hyb = &mol.graph[parent].hybridization;
match parent_elem {
6 => {
let is_aromatic = mol
.graph
.edges(parent)
.any(|e| matches!(e.weight().order, BondOrder::Aromatic));
if is_aromatic {
let mut shift = 7.27;
for nb in mol.graph.neighbors(parent) {
if nb == h_idx {
continue;
}
let nb_elem = mol.graph[nb].element;
let en_diff = electronegativity(nb_elem) - electronegativity(6);
shift += en_diff * 0.3;
}
(shift, "aromatic_C-H".to_string(), 0.80)
} else {
match parent_hyb {
Hybridization::SP3 => {
let h_count = mol
.graph
.neighbors(parent)
.filter(|n| mol.graph[*n].element == 1)
.count();
let base = match h_count {
3 => 0.90, 2 => 1.30, _ => 1.50, };
let mut shift = base;
for nb in mol.graph.neighbors(parent) {
if mol.graph[nb].element == 1 {
continue;
}
let nb_elem = mol.graph[nb].element;
let en_diff = electronegativity(nb_elem) - electronegativity(6);
if en_diff > 0.3 {
shift += en_diff * 0.8;
}
}
for nb in mol.graph.neighbors(parent) {
if mol.graph[nb].element != 6 {
continue;
}
for nb2_edge in mol.graph.edges(nb) {
let nb2 = if nb2_edge.source() == nb {
nb2_edge.target()
} else {
nb2_edge.source()
};
if mol.graph[nb2].element == 8
&& matches!(nb2_edge.weight().order, BondOrder::Double)
{
shift += 0.5;
break;
}
}
}
let env = if h_count >= 3 {
"methyl"
} else if h_count == 2 {
"methylene"
} else {
"methine"
};
(shift, format!("sp3_{}", env), 0.75)
}
Hybridization::SP2 => {
let mut shift = 5.25;
for nb in mol.graph.neighbors(parent) {
if nb == h_idx {
continue;
}
let nb_elem = mol.graph[nb].element;
let en_diff = electronegativity(nb_elem) - electronegativity(6);
shift += en_diff * 0.5;
if nb_elem == 8
&& mol.graph.edges(parent).any(|e| {
let other = if e.source() == parent {
e.target()
} else {
e.source()
};
mol.graph[other].element == 8
&& matches!(e.weight().order, BondOrder::Double)
})
{
shift = 9.50; return (shift, "aldehyde_C-H".to_string(), 0.82);
}
}
(shift, "alkene_C-H".to_string(), 0.70)
}
Hybridization::SP => {
(2.50, "alkyne_C-H".to_string(), 0.75)
}
_ => (1.5, "unknown_C-H".to_string(), 0.40),
}
}
}
8 => {
let is_carboxylic = mol.graph.neighbors(parent).any(|nb| {
if mol.graph[nb].element != 6 {
return false;
}
mol.graph.edges(nb).any(|e| {
let other = if e.source() == nb {
e.target()
} else {
e.source()
};
mol.graph[other].element == 8 && matches!(e.weight().order, BondOrder::Double)
})
});
if is_carboxylic {
(11.0, "carboxylic_acid_O-H".to_string(), 0.70)
} else {
let is_phenol = mol.graph.neighbors(parent).any(|nb| {
mol.graph[nb].element == 6
&& mol
.graph
.edges(nb)
.any(|e| matches!(e.weight().order, BondOrder::Aromatic))
});
if is_phenol {
(5.5, "phenol_O-H".to_string(), 0.60)
} else {
(2.5, "alcohol_O-H".to_string(), 0.55)
}
}
}
7 => {
let is_aromatic_n = mol
.graph
.edges(parent)
.any(|e| matches!(e.weight().order, BondOrder::Aromatic));
if is_aromatic_n {
(8.5, "aromatic_N-H".to_string(), 0.55)
} else {
let h_on_n = mol
.graph
.neighbors(parent)
.filter(|n| mol.graph[*n].element == 1)
.count();
match h_on_n {
1 => (2.2, "secondary_amine_N-H".to_string(), 0.50),
_ => (1.5, "primary_amine_N-H".to_string(), 0.50),
}
}
}
16 => (1.8, "thiol_S-H".to_string(), 0.50),
_ => (2.0, "other_X-H".to_string(), 0.30),
}
}
fn predict_c_shift(mol: &Molecule, c_idx: NodeIndex) -> (f64, String, f64) {
let hyb = &mol.graph[c_idx].hybridization;
let is_aromatic = mol
.graph
.edges(c_idx)
.any(|e| matches!(e.weight().order, BondOrder::Aromatic));
let has_carbonyl_o = mol.graph.edges(c_idx).any(|e| {
let other = if e.source() == c_idx {
e.target()
} else {
e.source()
};
mol.graph[other].element == 8 && matches!(e.weight().order, BondOrder::Double)
});
if has_carbonyl_o {
let has_oh = mol.graph.neighbors(c_idx).any(|nb| {
if mol.graph[nb].element != 8 {
return false;
}
mol.graph.neighbors(nb).any(|n| mol.graph[n].element == 1)
});
if has_oh {
return (175.0, "carboxylic_acid_C".to_string(), 0.75);
}
let has_n = mol
.graph
.neighbors(c_idx)
.any(|nb| mol.graph[nb].element == 7);
if has_n {
return (170.0, "amide_C=O".to_string(), 0.72);
}
let has_h = mol
.graph
.neighbors(c_idx)
.any(|nb| mol.graph[nb].element == 1);
if has_h {
return (200.0, "aldehyde_C=O".to_string(), 0.75);
}
return (205.0, "ketone_C=O".to_string(), 0.73);
}
if is_aromatic {
let mut shift = 128.0;
for nb in mol.graph.neighbors(c_idx) {
let nb_elem = mol.graph[nb].element;
match nb_elem {
7 => shift += 5.0, 8 => shift -= 10.0, 9 => shift -= 5.0, 17 => shift += 2.0, 35 => shift += 3.0, _ => {}
}
}
return (shift, "aromatic_C".to_string(), 0.78);
}
match hyb {
Hybridization::SP3 => {
let mut shift = 20.0;
let heavy_neighbors: Vec<u8> = mol
.graph
.neighbors(c_idx)
.filter(|nb| mol.graph[*nb].element != 1)
.map(|nb| mol.graph[nb].element)
.collect();
let n_heavy = heavy_neighbors.len();
shift += (n_heavy as f64 - 1.0) * 8.0;
for &nb_elem in &heavy_neighbors {
let en_diff = electronegativity(nb_elem) - electronegativity(6);
if en_diff > 0.3 {
shift += en_diff * 10.0;
}
}
let env = match n_heavy {
0 | 1 => "methyl_C",
2 => "methylene_C",
3 => "methine_C",
_ => "quaternary_C",
};
(shift.clamp(0.0, 80.0), env.to_string(), 0.70)
}
Hybridization::SP2 => {
let mut shift = 125.0;
for nb in mol.graph.neighbors(c_idx) {
let nb_elem = mol.graph[nb].element;
let en_diff = electronegativity(nb_elem) - electronegativity(6);
shift += en_diff * 8.0;
}
(shift.clamp(80.0, 165.0), "alkene_C".to_string(), 0.65)
}
Hybridization::SP => {
let mut shift = 80.0;
let has_h = mol
.graph
.neighbors(c_idx)
.any(|nb| mol.graph[nb].element == 1);
if has_h {
shift = 70.0; }
(shift, "alkyne_C".to_string(), 0.70)
}
_ => (30.0, "unknown_C".to_string(), 0.40),
}
}
fn predict_f_shift(mol: &Molecule, f_idx: NodeIndex) -> (f64, String, f64) {
let neighbors: Vec<NodeIndex> = mol.graph.neighbors(f_idx).collect();
if neighbors.is_empty() {
return (-100.0, "isolated_F".to_string(), 0.30);
}
let parent = neighbors[0];
let parent_elem = mol.graph[parent].element;
let is_aromatic = mol
.graph
.edges(parent)
.any(|e| matches!(e.weight().order, BondOrder::Aromatic));
match parent_elem {
6 if is_aromatic => {
let mut shift = -113.0;
for nb in mol.graph.neighbors(parent) {
if nb == f_idx {
continue;
}
let nb_elem = mol.graph[nb].element;
let en_diff = electronegativity(nb_elem) - electronegativity(6);
shift -= en_diff * 3.0;
}
(shift, "aryl_C-F".to_string(), 0.65)
}
6 => {
let n_f_on_parent = mol
.graph
.neighbors(parent)
.filter(|n| mol.graph[*n].element == 9)
.count();
let shift = match n_f_on_parent {
3 => -63.0, 2 => -105.0, _ => -170.0, };
let env = match n_f_on_parent {
3 => "CF3",
2 => "CF2",
_ => "CHF",
};
(shift, format!("sp3_{}", env), 0.60)
}
_ => (-100.0, "other_X-F".to_string(), 0.40),
}
}
fn predict_p_shift(mol: &Molecule, p_idx: NodeIndex) -> (f64, String, f64) {
let heavy_neighbors: Vec<u8> = mol
.graph
.neighbors(p_idx)
.filter(|nb| mol.graph[*nb].element != 1)
.map(|nb| mol.graph[nb].element)
.collect();
let n_o = heavy_neighbors.iter().filter(|&&e| e == 8).count();
let n_c = heavy_neighbors.iter().filter(|&&e| e == 6).count();
let n_n = heavy_neighbors.iter().filter(|&&e| e == 7).count();
let has_double_o = mol.graph.edges(p_idx).any(|e| {
let other = if e.source() == p_idx {
e.target()
} else {
e.source()
};
mol.graph[other].element == 8 && matches!(e.weight().order, BondOrder::Double)
});
if has_double_o && n_o >= 3 {
(0.0, "phosphate_P=O".to_string(), 0.65)
} else if has_double_o && n_o >= 1 {
(30.0, "phosphonate_P=O".to_string(), 0.60)
} else if n_c >= 3 {
(-20.0, "trialkylphosphine".to_string(), 0.55)
} else if n_c >= 1 && n_o >= 1 {
(140.0, "phosphite".to_string(), 0.55)
} else if n_n >= 1 {
(25.0, "phosphoramide".to_string(), 0.50)
} else {
(0.0, "other_P".to_string(), 0.40)
}
}
fn predict_n_shift(mol: &Molecule, n_idx: NodeIndex) -> (f64, String, f64) {
let is_aromatic = mol
.graph
.edges(n_idx)
.any(|e| matches!(e.weight().order, BondOrder::Aromatic));
let heavy_neighbors: Vec<u8> = mol
.graph
.neighbors(n_idx)
.filter(|nb| mol.graph[*nb].element != 1)
.map(|nb| mol.graph[nb].element)
.collect();
let n_h = mol
.graph
.neighbors(n_idx)
.filter(|nb| mol.graph[*nb].element == 1)
.count();
let has_double = mol
.graph
.edges(n_idx)
.any(|e| matches!(e.weight().order, BondOrder::Double));
if is_aromatic {
(310.0, "aromatic_N".to_string(), 0.55)
} else if has_double {
(320.0, "imine_N=C".to_string(), 0.50)
} else if heavy_neighbors.contains(&8) && has_double {
(370.0, "nitro_N".to_string(), 0.50)
} else if n_h == 0 && heavy_neighbors.len() >= 3 {
(40.0, "tertiary_amine_N".to_string(), 0.55)
} else if n_h == 1 {
(50.0, "secondary_amine_N".to_string(), 0.50)
} else if n_h >= 2 {
(20.0, "primary_amine_N".to_string(), 0.55)
} else {
(30.0, "other_N".to_string(), 0.40)
}
}
fn predict_b_shift(mol: &Molecule, b_idx: NodeIndex) -> (f64, String, f64) {
let heavy_neighbors: Vec<u8> = mol
.graph
.neighbors(b_idx)
.filter(|nb| mol.graph[*nb].element != 1)
.map(|nb| mol.graph[nb].element)
.collect();
let n_o = heavy_neighbors.iter().filter(|&&e| e == 8).count();
let n_c = heavy_neighbors.iter().filter(|&&e| e == 6).count();
let n_f = heavy_neighbors.iter().filter(|&&e| e == 9).count();
let n_h = mol
.graph
.neighbors(b_idx)
.filter(|nb| mol.graph[*nb].element == 1)
.count();
if n_f >= 3 {
(0.0, "BF3".to_string(), 0.65) } else if n_o >= 2 {
(20.0, "boronic_acid_B(OH)2".to_string(), 0.55)
} else if n_c >= 2 && n_h == 0 {
(70.0, "triorganoborane_BR3".to_string(), 0.50)
} else if n_h >= 2 {
(-20.0, "borohydride_BH".to_string(), 0.50)
} else {
(30.0, "other_B".to_string(), 0.40)
}
}
fn predict_si_shift(mol: &Molecule, si_idx: NodeIndex) -> (f64, String, f64) {
let heavy_neighbors: Vec<u8> = mol
.graph
.neighbors(si_idx)
.filter(|nb| mol.graph[*nb].element != 1)
.map(|nb| mol.graph[nb].element)
.collect();
let n_o = heavy_neighbors.iter().filter(|&&e| e == 8).count();
let n_c = heavy_neighbors.iter().filter(|&&e| e == 6).count();
let n_cl = heavy_neighbors.iter().filter(|&&e| e == 17).count();
if n_c == 4 {
(0.0, "tetraalkylsilane_SiR4".to_string(), 0.65)
} else if n_o >= 4 {
(-110.0, "silicate_SiO4".to_string(), 0.55)
} else if n_o >= 2 {
(-50.0, "alkoxysilane_Si(OR)2".to_string(), 0.50)
} else if n_cl >= 1 {
(0.0 + n_cl as f64 * 10.0, "chlorosilane".to_string(), 0.50)
} else if n_c >= 1 && n_o >= 1 {
(-20.0, "organosiloxane".to_string(), 0.50)
} else {
(0.0, "other_Si".to_string(), 0.40)
}
}
fn predict_se_shift(mol: &Molecule, se_idx: NodeIndex) -> (f64, String, f64) {
let heavy_neighbors: Vec<u8> = mol
.graph
.neighbors(se_idx)
.filter(|nb| mol.graph[*nb].element != 1)
.map(|nb| mol.graph[nb].element)
.collect();
let n_c = heavy_neighbors.iter().filter(|&&e| e == 6).count();
let n_h = mol
.graph
.neighbors(se_idx)
.filter(|nb| mol.graph[*nb].element == 1)
.count();
if n_c >= 2 {
(0.0, "dialkyl_selenide".to_string(), 0.50)
} else if n_h >= 1 {
(-50.0, "selenol_SeH".to_string(), 0.45)
} else {
(200.0, "other_Se".to_string(), 0.35)
}
}
fn predict_o_shift(mol: &Molecule, o_idx: NodeIndex) -> (f64, String, f64) {
let is_double = mol
.graph
.edges(o_idx)
.any(|e| matches!(e.weight().order, BondOrder::Double));
let heavy_neighbors: Vec<u8> = mol
.graph
.neighbors(o_idx)
.filter(|nb| mol.graph[*nb].element != 1)
.map(|nb| mol.graph[nb].element)
.collect();
let n_h = mol
.graph
.neighbors(o_idx)
.filter(|nb| mol.graph[*nb].element == 1)
.count();
if is_double {
(550.0, "carbonyl_O=C".to_string(), 0.50)
} else if n_h >= 1 && heavy_neighbors.len() == 1 {
(0.0, "alcohol_O-H".to_string(), 0.55)
} else if heavy_neighbors.len() >= 2 {
(50.0, "ether_R-O-R".to_string(), 0.50)
} else {
(0.0, "other_O".to_string(), 0.40)
}
}
fn predict_s_shift(mol: &Molecule, s_idx: NodeIndex) -> (f64, String, f64) {
let is_double = mol
.graph
.edges(s_idx)
.any(|e| matches!(e.weight().order, BondOrder::Double));
let heavy_neighbors: Vec<u8> = mol
.graph
.neighbors(s_idx)
.filter(|nb| mol.graph[*nb].element != 1)
.map(|nb| mol.graph[nb].element)
.collect();
let n_o = heavy_neighbors.iter().filter(|&&e| e == 8).count();
let n_c = heavy_neighbors.iter().filter(|&&e| e == 6).count();
let n_h = mol
.graph
.neighbors(s_idx)
.filter(|nb| mol.graph[*nb].element == 1)
.count();
if n_o >= 3 {
(-10.0, "sulfonate_SO3".to_string(), 0.50)
} else if n_o >= 2 && is_double {
(0.0, "sulfone_SO2".to_string(), 0.50)
} else if n_o >= 1 && is_double {
(200.0, "sulfoxide_SO".to_string(), 0.45)
} else if is_double {
(300.0, "thioketone_C=S".to_string(), 0.45)
} else if n_c >= 2 {
(20.0, "dialkyl_sulfide".to_string(), 0.50)
} else if n_h >= 1 {
(-10.0, "thiol_SH".to_string(), 0.50)
} else {
(0.0, "other_S".to_string(), 0.40)
}
}
pub fn predict_chemical_shifts_for_nucleus(
mol: &Molecule,
nucleus: NmrNucleus,
) -> Vec<ChemicalShift> {
let n = mol.graph.node_count();
let mut shifts = Vec::new();
for atom_idx in 0..n {
let idx = NodeIndex::new(atom_idx);
let element = mol.graph[idx].element;
if element != nucleus.atomic_number() {
continue;
}
let (shift_ppm, environment, confidence) = predict_shift_for_nucleus(mol, idx, nucleus);
shifts.push(ChemicalShift {
atom_index: atom_idx,
element,
shift_ppm,
environment,
confidence,
});
}
shifts
}
pub fn predict_chemical_shifts(mol: &Molecule) -> NmrShiftResult {
let n = mol.graph.node_count();
let mut h_shifts = Vec::new();
let mut c_shifts = Vec::new();
let mut f_shifts = Vec::new();
let mut p_shifts = Vec::new();
let mut n_shifts = Vec::new();
let mut b_shifts = Vec::new();
let mut si_shifts = Vec::new();
let mut se_shifts = Vec::new();
let mut o_shifts = Vec::new();
let mut s_shifts = Vec::new();
let mut other_shifts: BTreeMap<NmrNucleus, Vec<ChemicalShift>> = BTreeMap::new();
for atom_idx in 0..n {
let idx = NodeIndex::new(atom_idx);
let elem = mol.graph[idx].element;
let Some(nucleus) = NmrNucleus::default_for_element(elem) else {
continue;
};
let (shift, env, conf) = predict_shift_for_nucleus(mol, idx, nucleus);
let cs = ChemicalShift {
atom_index: atom_idx,
element: elem,
shift_ppm: shift,
environment: env,
confidence: conf,
};
match nucleus {
NmrNucleus::H1 => h_shifts.push(cs),
NmrNucleus::C13 => c_shifts.push(cs),
NmrNucleus::F19 => f_shifts.push(cs),
NmrNucleus::P31 => p_shifts.push(cs),
NmrNucleus::N15 => n_shifts.push(cs),
NmrNucleus::B11 => b_shifts.push(cs),
NmrNucleus::Si29 => si_shifts.push(cs),
NmrNucleus::Se77 => se_shifts.push(cs),
NmrNucleus::O17 => o_shifts.push(cs),
NmrNucleus::S33 => s_shifts.push(cs),
_ => {
other_shifts.entry(nucleus).or_default().push(cs);
}
}
}
let other_shifts = other_shifts
.into_iter()
.map(|(nucleus, shifts)| NucleusShiftSeries { nucleus, shifts })
.collect();
NmrShiftResult {
h_shifts,
c_shifts,
f_shifts,
p_shifts,
n_shifts,
b_shifts,
si_shifts,
se_shifts,
o_shifts,
s_shifts,
other_shifts,
notes: vec![
"Chemical shifts predicted using empirical additivity rules based on local atomic environment.".to_string(),
"¹H and ¹³C remain the main accuracy targets. Other nuclei use fast relative inference intended for screening and trend inspection.".to_string(),
format!(
"Representative nuclei are exported through legacy fields plus other_shifts. Full per-nucleus access is available for {} nuclei.",
NmrNucleus::ALL.len()
),
"Quadrupolar nuclei are treated as quick relative estimates only; relative intensities and isotope abundances are not modeled in this path.".to_string(),
],
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ethanol_h_shifts() {
let mol = Molecule::from_smiles("CCO").unwrap();
let result = predict_chemical_shifts(&mol);
assert!(!result.h_shifts.is_empty(), "Should have H shifts");
assert!(!result.c_shifts.is_empty(), "Should have C shifts");
for shift in &result.h_shifts {
assert!(
shift.shift_ppm >= 0.0 && shift.shift_ppm <= 15.0,
"H shift {} out of range for atom {}",
shift.shift_ppm,
shift.atom_index
);
}
}
#[test]
fn test_benzene_h_shifts() {
let mol = Molecule::from_smiles("c1ccccc1").unwrap();
let result = predict_chemical_shifts(&mol);
let aromatic_h: Vec<&ChemicalShift> = result
.h_shifts
.iter()
.filter(|s| s.environment.contains("aromatic"))
.collect();
assert!(
!aromatic_h.is_empty(),
"Benzene should have aromatic H shifts"
);
for shift in &aromatic_h {
assert!(
(shift.shift_ppm - 7.27).abs() < 1.5,
"Aromatic H shift {} should be near 7.27 ppm",
shift.shift_ppm
);
}
}
#[test]
fn test_acetic_acid_c_shifts() {
let mol = Molecule::from_smiles("CC(=O)O").unwrap();
let result = predict_chemical_shifts(&mol);
let carbonyl_c: Vec<&ChemicalShift> = result
.c_shifts
.iter()
.filter(|s| s.environment.contains("carboxylic") || s.environment.contains("C=O"))
.collect();
assert!(
!carbonyl_c.is_empty(),
"Acetic acid should have a carbonyl C shift"
);
}
#[test]
fn test_benzene_c13_shift() {
let mol = Molecule::from_smiles("c1ccccc1").unwrap();
let result = predict_chemical_shifts(&mol);
for shift in &result.c_shifts {
assert!(
(shift.shift_ppm - 128.0).abs() < 15.0,
"Aromatic C shift {} should be near 128 ppm",
shift.shift_ppm
);
}
}
#[test]
fn test_aldehyde_h_shift() {
let mol = Molecule::from_smiles("CC=O").unwrap();
let result = predict_chemical_shifts(&mol);
let _aldehyde_h: Vec<&ChemicalShift> = result
.h_shifts
.iter()
.filter(|s| s.environment.contains("aldehyde") || s.shift_ppm > 9.0)
.collect();
assert!(!result.h_shifts.is_empty());
}
}