1use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone)]
11pub struct GroupFrequency {
12 pub group: &'static str,
14 pub freq_min: f64,
16 pub freq_max: f64,
18 pub intensity: &'static str,
20 pub vibration_type: &'static str,
22}
23
24#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct PeakAssignment {
27 pub frequency: f64,
29 pub intensity: f64,
31 pub assignments: Vec<FunctionalGroupMatch>,
33 pub confidence: f64,
35}
36
37#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct FunctionalGroupMatch {
40 pub group: String,
42 pub vibration_type: String,
44 pub match_quality: f64,
46}
47
48#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct AssignmentResult {
51 pub assignments: Vec<PeakAssignment>,
53 pub functional_groups: Vec<String>,
55}
56
57const GROUP_FREQUENCIES: &[GroupFrequency] = &[
59 GroupFrequency {
61 group: "O-H (free)",
62 freq_min: 3590.0,
63 freq_max: 3650.0,
64 intensity: "strong",
65 vibration_type: "stretch",
66 },
67 GroupFrequency {
68 group: "O-H (H-bonded)",
69 freq_min: 3200.0,
70 freq_max: 3570.0,
71 intensity: "strong, broad",
72 vibration_type: "stretch",
73 },
74 GroupFrequency {
75 group: "O-H (carboxylic)",
76 freq_min: 2500.0,
77 freq_max: 3300.0,
78 intensity: "strong, broad",
79 vibration_type: "stretch",
80 },
81 GroupFrequency {
83 group: "N-H (primary amine)",
84 freq_min: 3300.0,
85 freq_max: 3500.0,
86 intensity: "medium",
87 vibration_type: "stretch",
88 },
89 GroupFrequency {
90 group: "N-H (secondary amine)",
91 freq_min: 3310.0,
92 freq_max: 3350.0,
93 intensity: "medium",
94 vibration_type: "stretch",
95 },
96 GroupFrequency {
97 group: "N-H (amide)",
98 freq_min: 3180.0,
99 freq_max: 3360.0,
100 intensity: "medium",
101 vibration_type: "stretch",
102 },
103 GroupFrequency {
105 group: "C-H (sp3)",
106 freq_min: 2850.0,
107 freq_max: 2960.0,
108 intensity: "strong",
109 vibration_type: "stretch",
110 },
111 GroupFrequency {
112 group: "C-H (sp2)",
113 freq_min: 3010.0,
114 freq_max: 3100.0,
115 intensity: "medium",
116 vibration_type: "stretch",
117 },
118 GroupFrequency {
119 group: "C-H (sp, alkyne)",
120 freq_min: 3300.0,
121 freq_max: 3320.0,
122 intensity: "strong",
123 vibration_type: "stretch",
124 },
125 GroupFrequency {
126 group: "C-H (aldehyde)",
127 freq_min: 2700.0,
128 freq_max: 2830.0,
129 intensity: "weak",
130 vibration_type: "stretch",
131 },
132 GroupFrequency {
134 group: "C≡C",
135 freq_min: 2100.0,
136 freq_max: 2260.0,
137 intensity: "weak",
138 vibration_type: "stretch",
139 },
140 GroupFrequency {
141 group: "C≡N",
142 freq_min: 2210.0,
143 freq_max: 2260.0,
144 intensity: "medium",
145 vibration_type: "stretch",
146 },
147 GroupFrequency {
149 group: "C=O (ketone)",
150 freq_min: 1705.0,
151 freq_max: 1725.0,
152 intensity: "strong",
153 vibration_type: "stretch",
154 },
155 GroupFrequency {
156 group: "C=O (aldehyde)",
157 freq_min: 1720.0,
158 freq_max: 1740.0,
159 intensity: "strong",
160 vibration_type: "stretch",
161 },
162 GroupFrequency {
163 group: "C=O (carboxylic acid)",
164 freq_min: 1700.0,
165 freq_max: 1725.0,
166 intensity: "strong",
167 vibration_type: "stretch",
168 },
169 GroupFrequency {
170 group: "C=O (ester)",
171 freq_min: 1735.0,
172 freq_max: 1750.0,
173 intensity: "strong",
174 vibration_type: "stretch",
175 },
176 GroupFrequency {
177 group: "C=O (amide)",
178 freq_min: 1630.0,
179 freq_max: 1680.0,
180 intensity: "strong",
181 vibration_type: "stretch",
182 },
183 GroupFrequency {
184 group: "C=C (alkene)",
185 freq_min: 1620.0,
186 freq_max: 1680.0,
187 intensity: "variable",
188 vibration_type: "stretch",
189 },
190 GroupFrequency {
191 group: "C=C (aromatic)",
192 freq_min: 1450.0,
193 freq_max: 1600.0,
194 intensity: "medium",
195 vibration_type: "stretch",
196 },
197 GroupFrequency {
198 group: "C=N",
199 freq_min: 1600.0,
200 freq_max: 1660.0,
201 intensity: "medium",
202 vibration_type: "stretch",
203 },
204 GroupFrequency {
205 group: "N=O (nitro)",
206 freq_min: 1515.0,
207 freq_max: 1560.0,
208 intensity: "strong",
209 vibration_type: "asymm. stretch",
210 },
211 GroupFrequency {
212 group: "N=O (nitro)",
213 freq_min: 1340.0,
214 freq_max: 1385.0,
215 intensity: "strong",
216 vibration_type: "symm. stretch",
217 },
218 GroupFrequency {
219 group: "C=O (acid chloride)",
220 freq_min: 1780.0,
221 freq_max: 1815.0,
222 intensity: "strong",
223 vibration_type: "stretch",
224 },
225 GroupFrequency {
226 group: "C=O (anhydride)",
227 freq_min: 1760.0,
228 freq_max: 1810.0,
229 intensity: "strong",
230 vibration_type: "asymm. stretch",
231 },
232 GroupFrequency {
233 group: "C=O (anhydride)",
234 freq_min: 1740.0,
235 freq_max: 1780.0,
236 intensity: "strong",
237 vibration_type: "symm. stretch",
238 },
239 GroupFrequency {
240 group: "N=C=O (isocyanate)",
241 freq_min: 2250.0,
242 freq_max: 2275.0,
243 intensity: "strong",
244 vibration_type: "stretch",
245 },
246 GroupFrequency {
247 group: "C=C=O (ketene)",
248 freq_min: 2100.0,
249 freq_max: 2140.0,
250 intensity: "strong",
251 vibration_type: "stretch",
252 },
253 GroupFrequency {
255 group: "C-H (methyl)",
256 freq_min: 1370.0,
257 freq_max: 1380.0,
258 intensity: "medium",
259 vibration_type: "bend",
260 },
261 GroupFrequency {
262 group: "C-H (methylene)",
263 freq_min: 1440.0,
264 freq_max: 1465.0,
265 intensity: "medium",
266 vibration_type: "bend",
267 },
268 GroupFrequency {
269 group: "O-H",
270 freq_min: 1200.0,
271 freq_max: 1440.0,
272 intensity: "strong",
273 vibration_type: "bend",
274 },
275 GroupFrequency {
276 group: "N-H",
277 freq_min: 1550.0,
278 freq_max: 1640.0,
279 intensity: "medium",
280 vibration_type: "bend",
281 },
282 GroupFrequency {
283 group: "H-O-H bend",
284 freq_min: 1590.0,
285 freq_max: 1665.0,
286 intensity: "strong",
287 vibration_type: "bend",
288 },
289 GroupFrequency {
291 group: "C-O (alcohol)",
292 freq_min: 1000.0,
293 freq_max: 1100.0,
294 intensity: "strong",
295 vibration_type: "stretch",
296 },
297 GroupFrequency {
298 group: "C-O (ether)",
299 freq_min: 1070.0,
300 freq_max: 1150.0,
301 intensity: "strong",
302 vibration_type: "stretch",
303 },
304 GroupFrequency {
305 group: "C-N",
306 freq_min: 1020.0,
307 freq_max: 1250.0,
308 intensity: "medium",
309 vibration_type: "stretch",
310 },
311 GroupFrequency {
312 group: "C-F",
313 freq_min: 1000.0,
314 freq_max: 1350.0,
315 intensity: "strong",
316 vibration_type: "stretch",
317 },
318 GroupFrequency {
319 group: "C-Cl",
320 freq_min: 600.0,
321 freq_max: 800.0,
322 intensity: "strong",
323 vibration_type: "stretch",
324 },
325 GroupFrequency {
326 group: "C-Br",
327 freq_min: 500.0,
328 freq_max: 680.0,
329 intensity: "strong",
330 vibration_type: "stretch",
331 },
332 GroupFrequency {
333 group: "C-I",
334 freq_min: 485.0,
335 freq_max: 600.0,
336 intensity: "strong",
337 vibration_type: "stretch",
338 },
339 GroupFrequency {
341 group: "S=O (sulfoxide)",
342 freq_min: 1030.0,
343 freq_max: 1070.0,
344 intensity: "strong",
345 vibration_type: "stretch",
346 },
347 GroupFrequency {
348 group: "S=O (sulfone)",
349 freq_min: 1120.0,
350 freq_max: 1160.0,
351 intensity: "strong",
352 vibration_type: "asymm. stretch",
353 },
354 GroupFrequency {
355 group: "P=O",
356 freq_min: 1100.0,
357 freq_max: 1300.0,
358 intensity: "strong",
359 vibration_type: "stretch",
360 },
361 GroupFrequency {
362 group: "P-O-C",
363 freq_min: 900.0,
364 freq_max: 1050.0,
365 intensity: "strong",
366 vibration_type: "stretch",
367 },
368 GroupFrequency {
370 group: "M-H",
371 freq_min: 1600.0,
372 freq_max: 2100.0,
373 intensity: "medium",
374 vibration_type: "stretch",
375 },
376 GroupFrequency {
377 group: "M-CO (bridging carbonyl)",
378 freq_min: 1750.0,
379 freq_max: 1850.0,
380 intensity: "strong",
381 vibration_type: "stretch",
382 },
383 GroupFrequency {
384 group: "M-CO (terminal carbonyl)",
385 freq_min: 1850.0,
386 freq_max: 2120.0,
387 intensity: "strong",
388 vibration_type: "stretch",
389 },
390 GroupFrequency {
391 group: "M=O (terminal oxo)",
392 freq_min: 850.0,
393 freq_max: 1030.0,
394 intensity: "strong",
395 vibration_type: "stretch",
396 },
397 GroupFrequency {
398 group: "M-C",
399 freq_min: 400.0,
400 freq_max: 650.0,
401 intensity: "medium",
402 vibration_type: "stretch",
403 },
404 GroupFrequency {
405 group: "M-N",
406 freq_min: 400.0,
407 freq_max: 650.0,
408 intensity: "medium",
409 vibration_type: "stretch",
410 },
411 GroupFrequency {
412 group: "M-O",
413 freq_min: 350.0,
414 freq_max: 700.0,
415 intensity: "strong",
416 vibration_type: "stretch",
417 },
418 GroupFrequency {
419 group: "M-P",
420 freq_min: 350.0,
421 freq_max: 500.0,
422 intensity: "medium",
423 vibration_type: "stretch",
424 },
425 GroupFrequency {
426 group: "M-S",
427 freq_min: 250.0,
428 freq_max: 500.0,
429 intensity: "medium",
430 vibration_type: "stretch",
431 },
432 GroupFrequency {
433 group: "M-Cl",
434 freq_min: 250.0,
435 freq_max: 400.0,
436 intensity: "strong",
437 vibration_type: "stretch",
438 },
439 GroupFrequency {
440 group: "M-Br",
441 freq_min: 180.0,
442 freq_max: 320.0,
443 intensity: "medium",
444 vibration_type: "stretch",
445 },
446 GroupFrequency {
447 group: "M-I",
448 freq_min: 150.0,
449 freq_max: 260.0,
450 intensity: "medium",
451 vibration_type: "stretch",
452 },
453];
454
455pub fn assign_peaks(
457 frequencies: &[f64],
458 intensities: &[f64],
459 elements: &[u8],
460 mode_displacements: Option<&[Vec<[f64; 3]>]>,
461) -> AssignmentResult {
462 let mut assignments = Vec::new();
463 let mut found_groups = std::collections::BTreeSet::new();
464
465 for (i, &freq) in frequencies.iter().enumerate() {
466 if freq < 150.0 {
467 continue; }
469
470 let intensity = if i < intensities.len() {
471 intensities[i]
472 } else {
473 0.0
474 };
475
476 let mut matches = Vec::new();
477
478 for gf in GROUP_FREQUENCIES {
479 if freq >= gf.freq_min && freq <= gf.freq_max {
480 let mode_matches = mode_displacements
482 .and_then(|all_modes| all_modes.get(i))
483 .map(|mode| group_matches_mode(gf.group, elements, mode))
484 .unwrap_or(true);
485
486 if group_is_possible(gf.group, elements) && mode_matches {
487 let center = (gf.freq_min + gf.freq_max) / 2.0;
488 let range = gf.freq_max - gf.freq_min;
489 let match_quality = 1.0 - (freq - center).abs() / (range / 2.0 + 1.0);
490
491 matches.push(FunctionalGroupMatch {
492 group: gf.group.to_string(),
493 vibration_type: gf.vibration_type.to_string(),
494 match_quality: match_quality.clamp(0.0, 1.0),
495 });
496
497 found_groups.insert(gf.group.to_string());
498 }
499 }
500 }
501
502 let confidence = if matches.is_empty() {
503 0.0
504 } else {
505 matches.iter().map(|m| m.match_quality).fold(0.0, f64::max)
506 };
507
508 assignments.push(PeakAssignment {
509 frequency: freq,
510 intensity,
511 assignments: matches,
512 confidence,
513 });
514 }
515
516 AssignmentResult {
517 assignments,
518 functional_groups: found_groups.into_iter().collect(),
519 }
520}
521
522fn dominant_mode_elements(
523 elements: &[u8],
524 mode_displacement: &[[f64; 3]],
525) -> std::collections::BTreeSet<u8> {
526 let amplitudes: Vec<f64> = mode_displacement
527 .iter()
528 .map(|vector| {
529 (vector[0] * vector[0] + vector[1] * vector[1] + vector[2] * vector[2]).sqrt()
530 })
531 .collect();
532 let max_amplitude = amplitudes.iter().copied().fold(0.0_f64, f64::max);
533 if max_amplitude <= 1e-8 {
534 return std::collections::BTreeSet::new();
535 }
536
537 amplitudes
538 .iter()
539 .enumerate()
540 .filter(|(_, amplitude)| **amplitude >= max_amplitude * 0.35)
541 .filter_map(|(index, _)| elements.get(index).copied())
542 .collect()
543}
544
545fn group_matches_mode(group: &str, elements: &[u8], mode_displacement: &[[f64; 3]]) -> bool {
546 let dominant = dominant_mode_elements(elements, mode_displacement);
547 if dominant.is_empty() {
548 return true;
549 }
550
551 let has = |z: u8| dominant.contains(&z);
552 let has_metal = dominant.iter().copied().any(is_metal_atomic_number);
553
554 if group.starts_with("M-") || group.starts_with("M=") {
555 if !has_metal {
556 return false;
557 }
558 if group.contains("-H") {
559 return has(1);
560 }
561 if group.contains("-CO") {
562 return has(6) && has(8);
563 }
564 if group.contains("=O") || group.contains("-O") {
565 return has(8);
566 }
567 if group.contains("-N") {
568 return has(7);
569 }
570 if group.contains("-S") {
571 return has(16);
572 }
573 if group.contains("-P") {
574 return has(15);
575 }
576 if group.contains("-Cl") {
577 return has(17);
578 }
579 if group.contains("-Br") {
580 return has(35);
581 }
582 if group.contains("-I") {
583 return has(53);
584 }
585 if group == "M-C" {
586 return has(6);
587 }
588 return true;
589 }
590
591 if group.contains("O-H") || group == "H-O-H bend" {
592 return has(8) && has(1);
593 }
594 if group.contains("N-H") {
595 return has(7) && has(1);
596 }
597 if group.contains("C-H") {
598 return has(6) && has(1);
599 }
600 if group.contains("C=O") {
601 return has(6) && has(8);
602 }
603 if group.contains("C-O") {
604 return has(6) && has(8);
605 }
606 if group.contains("C-N") || group.contains("C=N") {
607 return has(6) && has(7);
608 }
609 if group.contains("C≡N") {
610 return has(6) && has(7);
611 }
612 if group.contains("C≡C") || group.contains("C=C") {
613 return has(6);
614 }
615 if group.contains("S=O") {
616 return has(16) && has(8);
617 }
618 if group.contains("P=O") {
619 return has(15) && has(8);
620 }
621 if group.contains("P-O-C") {
622 return has(15) && has(8) && has(6);
623 }
624 if group.contains("C-F") {
625 return has(6) && has(9);
626 }
627 if group.contains("C-Cl") {
628 return has(6) && has(17);
629 }
630 if group.contains("C-Br") {
631 return has(6) && has(35);
632 }
633 if group.contains("C-I") {
634 return has(6) && has(53);
635 }
636
637 true
638}
639
640fn group_is_possible(group: &str, elements: &[u8]) -> bool {
642 let has = |z: u8| elements.contains(&z);
643 let has_metal = elements.iter().copied().any(is_metal_atomic_number);
644
645 if group.starts_with("M-") || group.starts_with("M=") {
646 if !has_metal {
647 return false;
648 }
649 if group.contains("-H") {
650 return has(1);
651 }
652 if group.contains("-CO") {
653 return has(6) && has(8);
654 }
655 if group.contains("=O") || group.contains("-O") {
656 return has(8);
657 }
658 if group.contains("-N") {
659 return has(7);
660 }
661 if group.contains("-S") {
662 return has(16);
663 }
664 if group.contains("-P") {
665 return has(15);
666 }
667 if group.contains("-Cl") {
668 return has(17);
669 }
670 if group.contains("-Br") {
671 return has(35);
672 }
673 if group.contains("-I") {
674 return has(53);
675 }
676 if group == "M-C" {
677 return has(6);
678 }
679
680 return true;
681 }
682
683 if group.contains("O-H") || group.contains("C-O") || group.contains("C=O") {
684 return has(8); }
686 if group.contains("N-H")
687 || group.contains("C-N")
688 || group.contains("C=N")
689 || group.contains("C≡N")
690 || group.contains("N=O")
691 {
692 return has(7); }
694 if group.contains("C-F") {
695 return has(9); }
697 if group.contains("C-Cl") {
698 return has(17); }
700 if group.contains("C-Br") {
701 return has(35); }
703 if group.contains("C-I") {
704 return has(53); }
706 if group.contains("S=O") {
707 return has(16); }
709 if group.contains("P=O") || group.contains("P-O") {
710 return has(15); }
712 if group.contains("C-H") || group.contains("C=C") || group.contains("C≡C") {
713 return has(6); }
715
716 true }
718
719fn is_metal_atomic_number(z: u8) -> bool {
720 matches!(z, 3 | 4 | 11 | 12 | 13 | 19 | 20 | 21..=31 | 37..=50 | 55..=84 | 87..=103)
721}
722
723#[cfg(test)]
724mod tests {
725 use super::*;
726
727 #[test]
728 fn test_peak_assignment_carbonyl() {
729 let freqs = vec![1720.0, 2950.0, 1450.0];
730 let intensities = vec![1.0, 0.5, 0.3];
731 let elements = vec![6u8, 6, 8, 1, 1, 1, 1]; let result = assign_peaks(&freqs, &intensities, &elements, None);
734 assert!(!result.assignments.is_empty());
735
736 let carbonyl = &result.assignments[0];
738 assert!(carbonyl.assignments.iter().any(|a| a.group.contains("C=O")));
739 }
740
741 #[test]
742 fn test_group_possible() {
743 assert!(group_is_possible("O-H (free)", &[6, 8, 1]));
744 assert!(!group_is_possible("C-Cl", &[6, 8, 1]));
745 assert!(group_is_possible("C-H (sp3)", &[6, 1]));
746 }
747
748 #[test]
749 fn test_peak_assignment_metal_ligand_bands() {
750 let freqs = vec![295.0, 520.0, 1985.0];
751 let intensities = vec![0.4, 0.7, 1.0];
752 let elements = vec![26u8, 17, 8, 6];
753
754 let result = assign_peaks(&freqs, &intensities, &elements, None);
755
756 assert!(result.assignments[0]
757 .assignments
758 .iter()
759 .any(|a| a.group == "M-Cl"));
760 assert!(result.assignments[1]
761 .assignments
762 .iter()
763 .any(|a| a.group == "M-O"));
764 assert!(result.assignments[2]
765 .assignments
766 .iter()
767 .any(|a| a.group == "M-CO (terminal carbonyl)"));
768 }
769}