1use super::nucleus::NmrNucleus;
10use crate::graph::{BondOrder, Hybridization, Molecule};
11use petgraph::graph::NodeIndex;
12use petgraph::visit::EdgeRef;
13use serde::{Deserialize, Serialize};
14use std::collections::BTreeMap;
15
16#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct ChemicalShift {
19 pub atom_index: usize,
21 pub element: u8,
23 pub shift_ppm: f64,
25 pub environment: String,
27 pub confidence: f64,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct NucleusShiftSeries {
33 pub nucleus: NmrNucleus,
35 pub shifts: Vec<ChemicalShift>,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct NmrShiftResult {
42 pub h_shifts: Vec<ChemicalShift>,
44 pub c_shifts: Vec<ChemicalShift>,
46 pub f_shifts: Vec<ChemicalShift>,
48 pub p_shifts: Vec<ChemicalShift>,
50 pub n_shifts: Vec<ChemicalShift>,
52 pub b_shifts: Vec<ChemicalShift>,
54 pub si_shifts: Vec<ChemicalShift>,
56 pub se_shifts: Vec<ChemicalShift>,
58 pub o_shifts: Vec<ChemicalShift>,
60 pub s_shifts: Vec<ChemicalShift>,
62 pub other_shifts: Vec<NucleusShiftSeries>,
64 pub notes: Vec<String>,
66}
67
68impl NmrShiftResult {
69 pub fn shifts_for_nucleus(&self, nucleus: NmrNucleus) -> &[ChemicalShift] {
70 match nucleus {
71 NmrNucleus::H1 => &self.h_shifts,
72 NmrNucleus::C13 => &self.c_shifts,
73 NmrNucleus::F19 => &self.f_shifts,
74 NmrNucleus::P31 => &self.p_shifts,
75 NmrNucleus::N15 => &self.n_shifts,
76 NmrNucleus::B11 => &self.b_shifts,
77 NmrNucleus::Si29 => &self.si_shifts,
78 NmrNucleus::Se77 => &self.se_shifts,
79 NmrNucleus::O17 => &self.o_shifts,
80 NmrNucleus::S33 => &self.s_shifts,
81 _ => self
82 .other_shifts
83 .iter()
84 .find(|series| series.nucleus == nucleus)
85 .map(|series| series.shifts.as_slice())
86 .unwrap_or(&[]),
87 }
88 }
89}
90
91fn electronegativity(z: u8) -> f64 {
94 match z {
95 1 => 2.20, 5 => 2.04, 6 => 2.55, 7 => 3.04, 8 => 3.44, 9 => 3.98, 13 => 1.61, 14 => 1.90, 15 => 2.19, 16 => 2.58, 17 => 3.16, 22 => 1.54, 24 => 1.66, 25 => 1.55, 26 => 1.83, 27 => 1.88, 28 => 1.91, 29 => 1.90, 30 => 1.65, 32 => 2.01, 33 => 2.18, 34 => 2.55, 35 => 2.96, 44 => 2.20, 46 => 2.20, 47 => 1.93, 53 => 2.66, 78 => 2.28, 79 => 2.54, _ => 2.00,
125 }
126}
127
128#[derive(Debug, Clone, Copy)]
129struct LocalShiftEnvironment {
130 heavy_neighbors: usize,
131 hydrogen_neighbors: usize,
132 hetero_neighbors: usize,
133 pi_bonds: usize,
134 aromatic: bool,
135 formal_charge: i8,
136 mean_neighbor_atomic_number: f64,
137 electronegativity_delta: f64,
138}
139
140fn local_shift_environment(mol: &Molecule, idx: NodeIndex) -> LocalShiftEnvironment {
141 let mut heavy_neighbors = 0usize;
142 let mut hydrogen_neighbors = 0usize;
143 let mut hetero_neighbors = 0usize;
144 let mut neighbor_atomic_number_sum = 0.0;
145 let mut electronegativity_delta = 0.0;
146
147 for neighbor in mol.graph.neighbors(idx) {
148 let element = mol.graph[neighbor].element;
149 neighbor_atomic_number_sum += element as f64;
150 electronegativity_delta +=
151 electronegativity(element) - electronegativity(mol.graph[idx].element);
152
153 if element == 1 {
154 hydrogen_neighbors += 1;
155 } else {
156 heavy_neighbors += 1;
157 if !matches!(element, 1 | 6) {
158 hetero_neighbors += 1;
159 }
160 }
161 }
162
163 let pi_bonds = mol
164 .graph
165 .edges(idx)
166 .filter(|edge| matches!(edge.weight().order, BondOrder::Double | BondOrder::Triple))
167 .count();
168 let aromatic = mol
169 .graph
170 .edges(idx)
171 .any(|edge| matches!(edge.weight().order, BondOrder::Aromatic));
172 let neighbor_count = mol.graph.neighbors(idx).count().max(1) as f64;
173
174 LocalShiftEnvironment {
175 heavy_neighbors,
176 hydrogen_neighbors,
177 hetero_neighbors,
178 pi_bonds,
179 aromatic,
180 formal_charge: mol.graph[idx].formal_charge,
181 mean_neighbor_atomic_number: neighbor_atomic_number_sum / neighbor_count,
182 electronegativity_delta,
183 }
184}
185
186fn apply_isotope_offset(shift_ppm: f64, nucleus: NmrNucleus) -> f64 {
187 match nucleus {
188 NmrNucleus::H2 => shift_ppm + 0.05,
189 NmrNucleus::H3 => shift_ppm + 0.10,
190 NmrNucleus::B10 => shift_ppm - 2.0,
191 NmrNucleus::N14 => shift_ppm + 12.0,
192 NmrNucleus::Cl37 => shift_ppm + 1.0,
193 NmrNucleus::Br81 => shift_ppm + 1.5,
194 NmrNucleus::Li6 => shift_ppm - 0.5,
195 NmrNucleus::K40 => shift_ppm + 1.5,
196 NmrNucleus::K41 => shift_ppm + 0.6,
197 NmrNucleus::Ti47 => shift_ppm - 2.0,
198 NmrNucleus::Ti49 => shift_ppm + 2.0,
199 NmrNucleus::V50 => shift_ppm - 4.0,
200 NmrNucleus::Mo97 => shift_ppm + 2.0,
201 NmrNucleus::Ru99 => shift_ppm - 3.0,
202 NmrNucleus::Ag109 => shift_ppm + 1.0,
203 NmrNucleus::Cd113 => shift_ppm + 1.0,
204 NmrNucleus::In113 => shift_ppm - 2.0,
205 NmrNucleus::Sn115 => shift_ppm - 3.0,
206 NmrNucleus::Sn117 => shift_ppm - 1.0,
207 NmrNucleus::Sb123 => shift_ppm + 3.0,
208 NmrNucleus::Te123 => shift_ppm - 5.0,
209 NmrNucleus::Xe131 => shift_ppm + 8.0,
210 NmrNucleus::Ba135 => shift_ppm - 4.0,
211 NmrNucleus::Hg201 => shift_ppm + 2.0,
212 NmrNucleus::Tl203 => shift_ppm - 2.0,
213 _ => shift_ppm,
214 }
215}
216
217fn generic_relative_shift(
218 mol: &Molecule,
219 idx: NodeIndex,
220 nucleus: NmrNucleus,
221) -> (f64, String, f64) {
222 let env = local_shift_environment(mol, idx);
223 let (min_ppm, max_ppm) = nucleus.empirical_range();
224 let span = max_ppm - min_ppm;
225 let sensitivity = if nucleus.is_primary_target() {
226 0.08 * span
227 } else if nucleus.is_quadrupolar() {
228 0.03 * span
229 } else {
230 0.05 * span
231 };
232
233 let mut shift_ppm = nucleus.empirical_center();
234 shift_ppm += env.electronegativity_delta * sensitivity * 0.35;
235 shift_ppm += env.hetero_neighbors as f64 * sensitivity * 0.22;
236 shift_ppm += env.pi_bonds as f64 * sensitivity * 0.20;
237 shift_ppm += env.heavy_neighbors as f64 * sensitivity * 0.08;
238 shift_ppm += env.hydrogen_neighbors as f64 * sensitivity * 0.03;
239 shift_ppm += env.formal_charge as f64 * sensitivity * 0.45;
240 if env.aromatic {
241 shift_ppm += sensitivity * 0.25;
242 }
243
244 shift_ppm +=
245 (env.mean_neighbor_atomic_number - nucleus.atomic_number() as f64) * 0.02 * sensitivity;
246 shift_ppm = apply_isotope_offset(shift_ppm, nucleus).clamp(min_ppm, max_ppm);
247
248 let environment = format!(
249 "relative_{}_coord{}_hetero{}_pi{}_q{}",
250 nucleus.element_symbol(),
251 env.heavy_neighbors,
252 env.hetero_neighbors,
253 env.pi_bonds,
254 env.formal_charge,
255 );
256 let confidence = if nucleus.is_primary_target() {
257 0.70
258 } else if nucleus.is_quadrupolar() {
259 0.28
260 } else {
261 0.38
262 };
263
264 (shift_ppm, environment, confidence)
265}
266
267fn predict_shift_for_nucleus(
268 mol: &Molecule,
269 idx: NodeIndex,
270 nucleus: NmrNucleus,
271) -> (f64, String, f64) {
272 match nucleus {
273 NmrNucleus::H1 => predict_h_shift(mol, idx),
274 NmrNucleus::H2 | NmrNucleus::H3 => {
275 let (shift_ppm, environment, confidence) = predict_h_shift(mol, idx);
276 (
277 apply_isotope_offset(shift_ppm, nucleus),
278 format!("{}_relative", environment),
279 confidence * 0.92,
280 )
281 }
282 NmrNucleus::C13 => predict_c_shift(mol, idx),
283 NmrNucleus::F19 => predict_f_shift(mol, idx),
284 NmrNucleus::P31 => predict_p_shift(mol, idx),
285 NmrNucleus::N15 => predict_n_shift(mol, idx),
286 NmrNucleus::N14 => {
287 let (shift_ppm, environment, confidence) = predict_n_shift(mol, idx);
288 (
289 apply_isotope_offset(shift_ppm, nucleus),
290 format!("{}_relative", environment),
291 confidence * 0.85,
292 )
293 }
294 NmrNucleus::B11 => predict_b_shift(mol, idx),
295 NmrNucleus::B10 => {
296 let (shift_ppm, environment, confidence) = predict_b_shift(mol, idx);
297 (
298 apply_isotope_offset(shift_ppm, nucleus),
299 format!("{}_relative", environment),
300 confidence * 0.85,
301 )
302 }
303 NmrNucleus::Si29 => predict_si_shift(mol, idx),
304 NmrNucleus::Se77 => predict_se_shift(mol, idx),
305 NmrNucleus::O17 => predict_o_shift(mol, idx),
306 NmrNucleus::S33 => predict_s_shift(mol, idx),
307 _ => generic_relative_shift(mol, idx, nucleus),
308 }
309}
310
311fn predict_h_shift(mol: &Molecule, h_idx: NodeIndex) -> (f64, String, f64) {
323 let neighbors: Vec<NodeIndex> = mol.graph.neighbors(h_idx).collect();
325 if neighbors.is_empty() {
326 return (0.0, "isolated_hydrogen".to_string(), 0.3);
327 }
328
329 let parent = neighbors[0];
330 let parent_elem = mol.graph[parent].element;
331 let parent_hyb = &mol.graph[parent].hybridization;
332
333 match parent_elem {
334 6 => {
336 let is_aromatic = mol
337 .graph
338 .edges(parent)
339 .any(|e| matches!(e.weight().order, BondOrder::Aromatic));
340
341 if is_aromatic {
342 let mut shift = 7.27;
344 for nb in mol.graph.neighbors(parent) {
346 if nb == h_idx {
347 continue;
348 }
349 let nb_elem = mol.graph[nb].element;
350 let en_diff = electronegativity(nb_elem) - electronegativity(6);
351 shift += en_diff * 0.3;
352 }
353 (shift, "aromatic_C-H".to_string(), 0.80)
354 } else {
355 match parent_hyb {
356 Hybridization::SP3 => {
357 let h_count = mol
359 .graph
360 .neighbors(parent)
361 .filter(|n| mol.graph[*n].element == 1)
362 .count();
363
364 let base = match h_count {
365 3 => 0.90, 2 => 1.30, _ => 1.50, };
369
370 let mut shift = base;
372 for nb in mol.graph.neighbors(parent) {
373 if mol.graph[nb].element == 1 {
374 continue;
375 }
376 let nb_elem = mol.graph[nb].element;
377 let en_diff = electronegativity(nb_elem) - electronegativity(6);
378 if en_diff > 0.3 {
379 shift += en_diff * 0.8;
380 }
381 }
382
383 for nb in mol.graph.neighbors(parent) {
385 if mol.graph[nb].element != 6 {
386 continue;
387 }
388 for nb2_edge in mol.graph.edges(nb) {
389 let nb2 = if nb2_edge.source() == nb {
390 nb2_edge.target()
391 } else {
392 nb2_edge.source()
393 };
394 if mol.graph[nb2].element == 8
395 && matches!(nb2_edge.weight().order, BondOrder::Double)
396 {
397 shift += 0.5;
398 break;
399 }
400 }
401 }
402
403 let env = if h_count >= 3 {
404 "methyl"
405 } else if h_count == 2 {
406 "methylene"
407 } else {
408 "methine"
409 };
410 (shift, format!("sp3_{}", env), 0.75)
411 }
412 Hybridization::SP2 => {
413 let mut shift = 5.25;
415 for nb in mol.graph.neighbors(parent) {
417 if nb == h_idx {
418 continue;
419 }
420 let nb_elem = mol.graph[nb].element;
421 let en_diff = electronegativity(nb_elem) - electronegativity(6);
422 shift += en_diff * 0.5;
423
424 if nb_elem == 8
426 && mol.graph.edges(parent).any(|e| {
427 let other = if e.source() == parent {
428 e.target()
429 } else {
430 e.source()
431 };
432 mol.graph[other].element == 8
433 && matches!(e.weight().order, BondOrder::Double)
434 })
435 {
436 shift = 9.50; return (shift, "aldehyde_C-H".to_string(), 0.82);
438 }
439 }
440 (shift, "alkene_C-H".to_string(), 0.70)
441 }
442 Hybridization::SP => {
443 (2.50, "alkyne_C-H".to_string(), 0.75)
445 }
446 _ => (1.5, "unknown_C-H".to_string(), 0.40),
447 }
448 }
449 }
450 8 => {
452 let is_carboxylic = mol.graph.neighbors(parent).any(|nb| {
454 if mol.graph[nb].element != 6 {
455 return false;
456 }
457 mol.graph.edges(nb).any(|e| {
458 let other = if e.source() == nb {
459 e.target()
460 } else {
461 e.source()
462 };
463 mol.graph[other].element == 8 && matches!(e.weight().order, BondOrder::Double)
464 })
465 });
466
467 if is_carboxylic {
468 (11.0, "carboxylic_acid_O-H".to_string(), 0.70)
469 } else {
470 let is_phenol = mol.graph.neighbors(parent).any(|nb| {
472 mol.graph[nb].element == 6
473 && mol
474 .graph
475 .edges(nb)
476 .any(|e| matches!(e.weight().order, BondOrder::Aromatic))
477 });
478 if is_phenol {
479 (5.5, "phenol_O-H".to_string(), 0.60)
480 } else {
481 (2.5, "alcohol_O-H".to_string(), 0.55)
482 }
483 }
484 }
485 7 => {
487 let is_aromatic_n = mol
488 .graph
489 .edges(parent)
490 .any(|e| matches!(e.weight().order, BondOrder::Aromatic));
491 if is_aromatic_n {
492 (8.5, "aromatic_N-H".to_string(), 0.55)
493 } else {
494 let h_on_n = mol
495 .graph
496 .neighbors(parent)
497 .filter(|n| mol.graph[*n].element == 1)
498 .count();
499 match h_on_n {
500 1 => (2.2, "secondary_amine_N-H".to_string(), 0.50),
501 _ => (1.5, "primary_amine_N-H".to_string(), 0.50),
502 }
503 }
504 }
505 16 => (1.8, "thiol_S-H".to_string(), 0.50),
507 _ => (2.0, "other_X-H".to_string(), 0.30),
508 }
509}
510
511fn predict_c_shift(mol: &Molecule, c_idx: NodeIndex) -> (f64, String, f64) {
521 let hyb = &mol.graph[c_idx].hybridization;
522 let is_aromatic = mol
523 .graph
524 .edges(c_idx)
525 .any(|e| matches!(e.weight().order, BondOrder::Aromatic));
526
527 let has_carbonyl_o = mol.graph.edges(c_idx).any(|e| {
529 let other = if e.source() == c_idx {
530 e.target()
531 } else {
532 e.source()
533 };
534 mol.graph[other].element == 8 && matches!(e.weight().order, BondOrder::Double)
535 });
536
537 if has_carbonyl_o {
538 let has_oh = mol.graph.neighbors(c_idx).any(|nb| {
540 if mol.graph[nb].element != 8 {
541 return false;
542 }
543 mol.graph.neighbors(nb).any(|n| mol.graph[n].element == 1)
544 });
545
546 if has_oh {
547 return (175.0, "carboxylic_acid_C".to_string(), 0.75);
549 }
550
551 let has_n = mol
552 .graph
553 .neighbors(c_idx)
554 .any(|nb| mol.graph[nb].element == 7);
555 if has_n {
556 return (170.0, "amide_C=O".to_string(), 0.72);
558 }
559
560 let has_h = mol
562 .graph
563 .neighbors(c_idx)
564 .any(|nb| mol.graph[nb].element == 1);
565 if has_h {
566 return (200.0, "aldehyde_C=O".to_string(), 0.75);
567 }
568 return (205.0, "ketone_C=O".to_string(), 0.73);
569 }
570
571 if is_aromatic {
572 let mut shift = 128.0;
573 for nb in mol.graph.neighbors(c_idx) {
575 let nb_elem = mol.graph[nb].element;
576 match nb_elem {
577 7 => shift += 5.0, 8 => shift -= 10.0, 9 => shift -= 5.0, 17 => shift += 2.0, 35 => shift += 3.0, _ => {}
583 }
584 }
585 return (shift, "aromatic_C".to_string(), 0.78);
586 }
587
588 match hyb {
589 Hybridization::SP3 => {
590 let mut shift = 20.0;
591 let heavy_neighbors: Vec<u8> = mol
592 .graph
593 .neighbors(c_idx)
594 .filter(|nb| mol.graph[*nb].element != 1)
595 .map(|nb| mol.graph[nb].element)
596 .collect();
597
598 let n_heavy = heavy_neighbors.len();
600 shift += (n_heavy as f64 - 1.0) * 8.0;
601
602 for &nb_elem in &heavy_neighbors {
604 let en_diff = electronegativity(nb_elem) - electronegativity(6);
605 if en_diff > 0.3 {
606 shift += en_diff * 10.0;
607 }
608 }
609
610 let env = match n_heavy {
611 0 | 1 => "methyl_C",
612 2 => "methylene_C",
613 3 => "methine_C",
614 _ => "quaternary_C",
615 };
616 (shift.clamp(0.0, 80.0), env.to_string(), 0.70)
617 }
618 Hybridization::SP2 => {
619 let mut shift = 125.0;
621 for nb in mol.graph.neighbors(c_idx) {
622 let nb_elem = mol.graph[nb].element;
623 let en_diff = electronegativity(nb_elem) - electronegativity(6);
624 shift += en_diff * 8.0;
625 }
626 (shift.clamp(80.0, 165.0), "alkene_C".to_string(), 0.65)
627 }
628 Hybridization::SP => {
629 let mut shift = 80.0;
631 let has_h = mol
632 .graph
633 .neighbors(c_idx)
634 .any(|nb| mol.graph[nb].element == 1);
635 if has_h {
636 shift = 70.0; }
638 (shift, "alkyne_C".to_string(), 0.70)
639 }
640 _ => (30.0, "unknown_C".to_string(), 0.40),
641 }
642}
643
644fn predict_f_shift(mol: &Molecule, f_idx: NodeIndex) -> (f64, String, f64) {
647 let neighbors: Vec<NodeIndex> = mol.graph.neighbors(f_idx).collect();
648 if neighbors.is_empty() {
649 return (-100.0, "isolated_F".to_string(), 0.30);
650 }
651 let parent = neighbors[0];
652 let parent_elem = mol.graph[parent].element;
653 let is_aromatic = mol
654 .graph
655 .edges(parent)
656 .any(|e| matches!(e.weight().order, BondOrder::Aromatic));
657
658 match parent_elem {
659 6 if is_aromatic => {
660 let mut shift = -113.0;
662 for nb in mol.graph.neighbors(parent) {
663 if nb == f_idx {
664 continue;
665 }
666 let nb_elem = mol.graph[nb].element;
667 let en_diff = electronegativity(nb_elem) - electronegativity(6);
668 shift -= en_diff * 3.0;
669 }
670 (shift, "aryl_C-F".to_string(), 0.65)
671 }
672 6 => {
673 let n_f_on_parent = mol
675 .graph
676 .neighbors(parent)
677 .filter(|n| mol.graph[*n].element == 9)
678 .count();
679 let shift = match n_f_on_parent {
680 3 => -63.0, 2 => -105.0, _ => -170.0, };
684 let env = match n_f_on_parent {
685 3 => "CF3",
686 2 => "CF2",
687 _ => "CHF",
688 };
689 (shift, format!("sp3_{}", env), 0.60)
690 }
691 _ => (-100.0, "other_X-F".to_string(), 0.40),
692 }
693}
694
695fn predict_p_shift(mol: &Molecule, p_idx: NodeIndex) -> (f64, String, f64) {
698 let heavy_neighbors: Vec<u8> = mol
699 .graph
700 .neighbors(p_idx)
701 .filter(|nb| mol.graph[*nb].element != 1)
702 .map(|nb| mol.graph[nb].element)
703 .collect();
704 let n_o = heavy_neighbors.iter().filter(|&&e| e == 8).count();
705 let n_c = heavy_neighbors.iter().filter(|&&e| e == 6).count();
706 let n_n = heavy_neighbors.iter().filter(|&&e| e == 7).count();
707 let has_double_o = mol.graph.edges(p_idx).any(|e| {
708 let other = if e.source() == p_idx {
709 e.target()
710 } else {
711 e.source()
712 };
713 mol.graph[other].element == 8 && matches!(e.weight().order, BondOrder::Double)
714 });
715
716 if has_double_o && n_o >= 3 {
717 (0.0, "phosphate_P=O".to_string(), 0.65)
719 } else if has_double_o && n_o >= 1 {
720 (30.0, "phosphonate_P=O".to_string(), 0.60)
722 } else if n_c >= 3 {
723 (-20.0, "trialkylphosphine".to_string(), 0.55)
725 } else if n_c >= 1 && n_o >= 1 {
726 (140.0, "phosphite".to_string(), 0.55)
728 } else if n_n >= 1 {
729 (25.0, "phosphoramide".to_string(), 0.50)
731 } else {
732 (0.0, "other_P".to_string(), 0.40)
733 }
734}
735
736fn predict_n_shift(mol: &Molecule, n_idx: NodeIndex) -> (f64, String, f64) {
740 let is_aromatic = mol
741 .graph
742 .edges(n_idx)
743 .any(|e| matches!(e.weight().order, BondOrder::Aromatic));
744 let heavy_neighbors: Vec<u8> = mol
745 .graph
746 .neighbors(n_idx)
747 .filter(|nb| mol.graph[*nb].element != 1)
748 .map(|nb| mol.graph[nb].element)
749 .collect();
750 let n_h = mol
751 .graph
752 .neighbors(n_idx)
753 .filter(|nb| mol.graph[*nb].element == 1)
754 .count();
755 let has_double = mol
756 .graph
757 .edges(n_idx)
758 .any(|e| matches!(e.weight().order, BondOrder::Double));
759
760 if is_aromatic {
761 (310.0, "aromatic_N".to_string(), 0.55)
763 } else if has_double {
764 (320.0, "imine_N=C".to_string(), 0.50)
766 } else if heavy_neighbors.contains(&8) && has_double {
767 (370.0, "nitro_N".to_string(), 0.50)
769 } else if n_h == 0 && heavy_neighbors.len() >= 3 {
770 (40.0, "tertiary_amine_N".to_string(), 0.55)
772 } else if n_h == 1 {
773 (50.0, "secondary_amine_N".to_string(), 0.50)
775 } else if n_h >= 2 {
776 (20.0, "primary_amine_N".to_string(), 0.55)
778 } else {
779 (30.0, "other_N".to_string(), 0.40)
780 }
781}
782
783fn predict_b_shift(mol: &Molecule, b_idx: NodeIndex) -> (f64, String, f64) {
786 let heavy_neighbors: Vec<u8> = mol
787 .graph
788 .neighbors(b_idx)
789 .filter(|nb| mol.graph[*nb].element != 1)
790 .map(|nb| mol.graph[nb].element)
791 .collect();
792 let n_o = heavy_neighbors.iter().filter(|&&e| e == 8).count();
793 let n_c = heavy_neighbors.iter().filter(|&&e| e == 6).count();
794 let n_f = heavy_neighbors.iter().filter(|&&e| e == 9).count();
795 let n_h = mol
796 .graph
797 .neighbors(b_idx)
798 .filter(|nb| mol.graph[*nb].element == 1)
799 .count();
800
801 if n_f >= 3 {
802 (0.0, "BF3".to_string(), 0.65) } else if n_o >= 2 {
804 (20.0, "boronic_acid_B(OH)2".to_string(), 0.55)
805 } else if n_c >= 2 && n_h == 0 {
806 (70.0, "triorganoborane_BR3".to_string(), 0.50)
807 } else if n_h >= 2 {
808 (-20.0, "borohydride_BH".to_string(), 0.50)
809 } else {
810 (30.0, "other_B".to_string(), 0.40)
811 }
812}
813
814fn predict_si_shift(mol: &Molecule, si_idx: NodeIndex) -> (f64, String, f64) {
817 let heavy_neighbors: Vec<u8> = mol
818 .graph
819 .neighbors(si_idx)
820 .filter(|nb| mol.graph[*nb].element != 1)
821 .map(|nb| mol.graph[nb].element)
822 .collect();
823 let n_o = heavy_neighbors.iter().filter(|&&e| e == 8).count();
824 let n_c = heavy_neighbors.iter().filter(|&&e| e == 6).count();
825 let n_cl = heavy_neighbors.iter().filter(|&&e| e == 17).count();
826
827 if n_c == 4 {
828 (0.0, "tetraalkylsilane_SiR4".to_string(), 0.65)
830 } else if n_o >= 4 {
831 (-110.0, "silicate_SiO4".to_string(), 0.55)
833 } else if n_o >= 2 {
834 (-50.0, "alkoxysilane_Si(OR)2".to_string(), 0.50)
836 } else if n_cl >= 1 {
837 (0.0 + n_cl as f64 * 10.0, "chlorosilane".to_string(), 0.50)
839 } else if n_c >= 1 && n_o >= 1 {
840 (-20.0, "organosiloxane".to_string(), 0.50)
841 } else {
842 (0.0, "other_Si".to_string(), 0.40)
843 }
844}
845
846fn predict_se_shift(mol: &Molecule, se_idx: NodeIndex) -> (f64, String, f64) {
849 let heavy_neighbors: Vec<u8> = mol
850 .graph
851 .neighbors(se_idx)
852 .filter(|nb| mol.graph[*nb].element != 1)
853 .map(|nb| mol.graph[nb].element)
854 .collect();
855 let n_c = heavy_neighbors.iter().filter(|&&e| e == 6).count();
856 let n_h = mol
857 .graph
858 .neighbors(se_idx)
859 .filter(|nb| mol.graph[*nb].element == 1)
860 .count();
861
862 if n_c >= 2 {
863 (0.0, "dialkyl_selenide".to_string(), 0.50)
865 } else if n_h >= 1 {
866 (-50.0, "selenol_SeH".to_string(), 0.45)
868 } else {
869 (200.0, "other_Se".to_string(), 0.35)
870 }
871}
872
873fn predict_o_shift(mol: &Molecule, o_idx: NodeIndex) -> (f64, String, f64) {
876 let is_double = mol
877 .graph
878 .edges(o_idx)
879 .any(|e| matches!(e.weight().order, BondOrder::Double));
880 let heavy_neighbors: Vec<u8> = mol
881 .graph
882 .neighbors(o_idx)
883 .filter(|nb| mol.graph[*nb].element != 1)
884 .map(|nb| mol.graph[nb].element)
885 .collect();
886 let n_h = mol
887 .graph
888 .neighbors(o_idx)
889 .filter(|nb| mol.graph[*nb].element == 1)
890 .count();
891
892 if is_double {
893 (550.0, "carbonyl_O=C".to_string(), 0.50)
895 } else if n_h >= 1 && heavy_neighbors.len() == 1 {
896 (0.0, "alcohol_O-H".to_string(), 0.55)
898 } else if heavy_neighbors.len() >= 2 {
899 (50.0, "ether_R-O-R".to_string(), 0.50)
901 } else {
902 (0.0, "other_O".to_string(), 0.40)
903 }
904}
905
906fn predict_s_shift(mol: &Molecule, s_idx: NodeIndex) -> (f64, String, f64) {
909 let is_double = mol
910 .graph
911 .edges(s_idx)
912 .any(|e| matches!(e.weight().order, BondOrder::Double));
913 let heavy_neighbors: Vec<u8> = mol
914 .graph
915 .neighbors(s_idx)
916 .filter(|nb| mol.graph[*nb].element != 1)
917 .map(|nb| mol.graph[nb].element)
918 .collect();
919 let n_o = heavy_neighbors.iter().filter(|&&e| e == 8).count();
920 let n_c = heavy_neighbors.iter().filter(|&&e| e == 6).count();
921 let n_h = mol
922 .graph
923 .neighbors(s_idx)
924 .filter(|nb| mol.graph[*nb].element == 1)
925 .count();
926
927 if n_o >= 3 {
928 (-10.0, "sulfonate_SO3".to_string(), 0.50)
930 } else if n_o >= 2 && is_double {
931 (0.0, "sulfone_SO2".to_string(), 0.50)
933 } else if n_o >= 1 && is_double {
934 (200.0, "sulfoxide_SO".to_string(), 0.45)
936 } else if is_double {
937 (300.0, "thioketone_C=S".to_string(), 0.45)
939 } else if n_c >= 2 {
940 (20.0, "dialkyl_sulfide".to_string(), 0.50)
942 } else if n_h >= 1 {
943 (-10.0, "thiol_SH".to_string(), 0.50)
945 } else {
946 (0.0, "other_S".to_string(), 0.40)
947 }
948}
949
950pub fn predict_chemical_shifts_for_nucleus(
951 mol: &Molecule,
952 nucleus: NmrNucleus,
953) -> Vec<ChemicalShift> {
954 let n = mol.graph.node_count();
955 let mut shifts = Vec::new();
956
957 for atom_idx in 0..n {
958 let idx = NodeIndex::new(atom_idx);
959 let element = mol.graph[idx].element;
960 if element != nucleus.atomic_number() {
961 continue;
962 }
963
964 let (shift_ppm, environment, confidence) = predict_shift_for_nucleus(mol, idx, nucleus);
965 shifts.push(ChemicalShift {
966 atom_index: atom_idx,
967 element,
968 shift_ppm,
969 environment,
970 confidence,
971 });
972 }
973
974 shifts
975}
976
977pub fn predict_chemical_shifts(mol: &Molecule) -> NmrShiftResult {
979 let n = mol.graph.node_count();
980 let mut h_shifts = Vec::new();
981 let mut c_shifts = Vec::new();
982 let mut f_shifts = Vec::new();
983 let mut p_shifts = Vec::new();
984 let mut n_shifts = Vec::new();
985 let mut b_shifts = Vec::new();
986 let mut si_shifts = Vec::new();
987 let mut se_shifts = Vec::new();
988 let mut o_shifts = Vec::new();
989 let mut s_shifts = Vec::new();
990 let mut other_shifts: BTreeMap<NmrNucleus, Vec<ChemicalShift>> = BTreeMap::new();
991
992 for atom_idx in 0..n {
993 let idx = NodeIndex::new(atom_idx);
994 let elem = mol.graph[idx].element;
995
996 let Some(nucleus) = NmrNucleus::default_for_element(elem) else {
997 continue;
998 };
999
1000 let (shift, env, conf) = predict_shift_for_nucleus(mol, idx, nucleus);
1001
1002 let cs = ChemicalShift {
1003 atom_index: atom_idx,
1004 element: elem,
1005 shift_ppm: shift,
1006 environment: env,
1007 confidence: conf,
1008 };
1009
1010 match nucleus {
1011 NmrNucleus::H1 => h_shifts.push(cs),
1012 NmrNucleus::C13 => c_shifts.push(cs),
1013 NmrNucleus::F19 => f_shifts.push(cs),
1014 NmrNucleus::P31 => p_shifts.push(cs),
1015 NmrNucleus::N15 => n_shifts.push(cs),
1016 NmrNucleus::B11 => b_shifts.push(cs),
1017 NmrNucleus::Si29 => si_shifts.push(cs),
1018 NmrNucleus::Se77 => se_shifts.push(cs),
1019 NmrNucleus::O17 => o_shifts.push(cs),
1020 NmrNucleus::S33 => s_shifts.push(cs),
1021 _ => {
1022 other_shifts.entry(nucleus).or_default().push(cs);
1023 }
1024 }
1025 }
1026
1027 let other_shifts = other_shifts
1028 .into_iter()
1029 .map(|(nucleus, shifts)| NucleusShiftSeries { nucleus, shifts })
1030 .collect();
1031
1032 NmrShiftResult {
1033 h_shifts,
1034 c_shifts,
1035 f_shifts,
1036 p_shifts,
1037 n_shifts,
1038 b_shifts,
1039 si_shifts,
1040 se_shifts,
1041 o_shifts,
1042 s_shifts,
1043 other_shifts,
1044 notes: vec![
1045 "Chemical shifts predicted using empirical additivity rules based on local atomic environment.".to_string(),
1046 "¹H and ¹³C remain the main accuracy targets. Other nuclei use fast relative inference intended for screening and trend inspection.".to_string(),
1047 format!(
1048 "Representative nuclei are exported through legacy fields plus other_shifts. Full per-nucleus access is available for {} nuclei.",
1049 NmrNucleus::ALL.len()
1050 ),
1051 "Quadrupolar nuclei are treated as quick relative estimates only; relative intensities and isotope abundances are not modeled in this path.".to_string(),
1052 ],
1053 }
1054}
1055
1056#[cfg(test)]
1057mod tests {
1058 use super::*;
1059
1060 #[test]
1061 fn test_ethanol_h_shifts() {
1062 let mol = Molecule::from_smiles("CCO").unwrap();
1063 let result = predict_chemical_shifts(&mol);
1064
1065 assert!(!result.h_shifts.is_empty(), "Should have H shifts");
1066 assert!(!result.c_shifts.is_empty(), "Should have C shifts");
1067
1068 for shift in &result.h_shifts {
1070 assert!(
1071 shift.shift_ppm >= 0.0 && shift.shift_ppm <= 15.0,
1072 "H shift {} out of range for atom {}",
1073 shift.shift_ppm,
1074 shift.atom_index
1075 );
1076 }
1077 }
1078
1079 #[test]
1080 fn test_benzene_h_shifts() {
1081 let mol = Molecule::from_smiles("c1ccccc1").unwrap();
1082 let result = predict_chemical_shifts(&mol);
1083
1084 let aromatic_h: Vec<&ChemicalShift> = result
1086 .h_shifts
1087 .iter()
1088 .filter(|s| s.environment.contains("aromatic"))
1089 .collect();
1090 assert!(
1091 !aromatic_h.is_empty(),
1092 "Benzene should have aromatic H shifts"
1093 );
1094 for shift in &aromatic_h {
1095 assert!(
1096 (shift.shift_ppm - 7.27).abs() < 1.5,
1097 "Aromatic H shift {} should be near 7.27 ppm",
1098 shift.shift_ppm
1099 );
1100 }
1101 }
1102
1103 #[test]
1104 fn test_acetic_acid_c_shifts() {
1105 let mol = Molecule::from_smiles("CC(=O)O").unwrap();
1106 let result = predict_chemical_shifts(&mol);
1107
1108 let carbonyl_c: Vec<&ChemicalShift> = result
1110 .c_shifts
1111 .iter()
1112 .filter(|s| s.environment.contains("carboxylic") || s.environment.contains("C=O"))
1113 .collect();
1114 assert!(
1115 !carbonyl_c.is_empty(),
1116 "Acetic acid should have a carbonyl C shift"
1117 );
1118 }
1119
1120 #[test]
1121 fn test_benzene_c13_shift() {
1122 let mol = Molecule::from_smiles("c1ccccc1").unwrap();
1123 let result = predict_chemical_shifts(&mol);
1124
1125 for shift in &result.c_shifts {
1127 assert!(
1128 (shift.shift_ppm - 128.0).abs() < 15.0,
1129 "Aromatic C shift {} should be near 128 ppm",
1130 shift.shift_ppm
1131 );
1132 }
1133 }
1134
1135 #[test]
1136 fn test_aldehyde_h_shift() {
1137 let mol = Molecule::from_smiles("CC=O").unwrap();
1138 let result = predict_chemical_shifts(&mol);
1139
1140 let _aldehyde_h: Vec<&ChemicalShift> = result
1143 .h_shifts
1144 .iter()
1145 .filter(|s| s.environment.contains("aldehyde") || s.shift_ppm > 9.0)
1146 .collect();
1147 assert!(!result.h_shifts.is_empty());
1149 }
1150}