Skip to main content

sci_form/ir/
peak_assignment.rs

1//! Automatic IR peak assignment using group frequency tables.
2//!
3//! Maps computed vibrational frequencies to chemical functional groups
4//! by matching against standard IR group frequency ranges and analyzing
5//! the normal mode character (which atoms move).
6
7use serde::{Deserialize, Serialize};
8
9/// A group frequency range for functional group identification.
10#[derive(Debug, Clone)]
11pub struct GroupFrequency {
12    /// Functional group name.
13    pub group: &'static str,
14    /// Lower frequency bound (cm⁻¹).
15    pub freq_min: f64,
16    /// Upper frequency bound (cm⁻¹).
17    pub freq_max: f64,
18    /// Expected intensity: "strong", "medium", "weak", "variable".
19    pub intensity: &'static str,
20    /// Description of the vibration type.
21    pub vibration_type: &'static str,
22}
23
24/// Assigned IR peak with functional group identification.
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct PeakAssignment {
27    /// Frequency (cm⁻¹).
28    pub frequency: f64,
29    /// Computed intensity.
30    pub intensity: f64,
31    /// Assigned functional group(s).
32    pub assignments: Vec<FunctionalGroupMatch>,
33    /// Confidence score (0-1).
34    pub confidence: f64,
35}
36
37/// A functional group match for a peak.
38#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct FunctionalGroupMatch {
40    /// Group name.
41    pub group: String,
42    /// Vibration type (stretch, bend, etc.).
43    pub vibration_type: String,
44    /// How well the frequency matches (0-1).
45    pub match_quality: f64,
46}
47
48/// IR peak assignment result.
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct AssignmentResult {
51    /// Assigned peaks.
52    pub assignments: Vec<PeakAssignment>,
53    /// Identified functional groups in the molecule.
54    pub functional_groups: Vec<String>,
55}
56
57/// Standard IR group frequency table.
58const GROUP_FREQUENCIES: &[GroupFrequency] = &[
59    // O-H stretches
60    GroupFrequency {
61        group: "O-H (free)",
62        freq_min: 3590.0,
63        freq_max: 3650.0,
64        intensity: "strong",
65        vibration_type: "stretch",
66    },
67    GroupFrequency {
68        group: "O-H (H-bonded)",
69        freq_min: 3200.0,
70        freq_max: 3570.0,
71        intensity: "strong, broad",
72        vibration_type: "stretch",
73    },
74    GroupFrequency {
75        group: "O-H (carboxylic)",
76        freq_min: 2500.0,
77        freq_max: 3300.0,
78        intensity: "strong, broad",
79        vibration_type: "stretch",
80    },
81    // N-H stretches
82    GroupFrequency {
83        group: "N-H (primary amine)",
84        freq_min: 3300.0,
85        freq_max: 3500.0,
86        intensity: "medium",
87        vibration_type: "stretch",
88    },
89    GroupFrequency {
90        group: "N-H (secondary amine)",
91        freq_min: 3310.0,
92        freq_max: 3350.0,
93        intensity: "medium",
94        vibration_type: "stretch",
95    },
96    GroupFrequency {
97        group: "N-H (amide)",
98        freq_min: 3180.0,
99        freq_max: 3360.0,
100        intensity: "medium",
101        vibration_type: "stretch",
102    },
103    // C-H stretches
104    GroupFrequency {
105        group: "C-H (sp3)",
106        freq_min: 2850.0,
107        freq_max: 2960.0,
108        intensity: "strong",
109        vibration_type: "stretch",
110    },
111    GroupFrequency {
112        group: "C-H (sp2)",
113        freq_min: 3010.0,
114        freq_max: 3100.0,
115        intensity: "medium",
116        vibration_type: "stretch",
117    },
118    GroupFrequency {
119        group: "C-H (sp, alkyne)",
120        freq_min: 3300.0,
121        freq_max: 3320.0,
122        intensity: "strong",
123        vibration_type: "stretch",
124    },
125    GroupFrequency {
126        group: "C-H (aldehyde)",
127        freq_min: 2700.0,
128        freq_max: 2830.0,
129        intensity: "weak",
130        vibration_type: "stretch",
131    },
132    // Triple bonds
133    GroupFrequency {
134        group: "C≡C",
135        freq_min: 2100.0,
136        freq_max: 2260.0,
137        intensity: "weak",
138        vibration_type: "stretch",
139    },
140    GroupFrequency {
141        group: "C≡N",
142        freq_min: 2210.0,
143        freq_max: 2260.0,
144        intensity: "medium",
145        vibration_type: "stretch",
146    },
147    // Double bonds
148    GroupFrequency {
149        group: "C=O (ketone)",
150        freq_min: 1705.0,
151        freq_max: 1725.0,
152        intensity: "strong",
153        vibration_type: "stretch",
154    },
155    GroupFrequency {
156        group: "C=O (aldehyde)",
157        freq_min: 1720.0,
158        freq_max: 1740.0,
159        intensity: "strong",
160        vibration_type: "stretch",
161    },
162    GroupFrequency {
163        group: "C=O (carboxylic acid)",
164        freq_min: 1700.0,
165        freq_max: 1725.0,
166        intensity: "strong",
167        vibration_type: "stretch",
168    },
169    GroupFrequency {
170        group: "C=O (ester)",
171        freq_min: 1735.0,
172        freq_max: 1750.0,
173        intensity: "strong",
174        vibration_type: "stretch",
175    },
176    GroupFrequency {
177        group: "C=O (amide)",
178        freq_min: 1630.0,
179        freq_max: 1680.0,
180        intensity: "strong",
181        vibration_type: "stretch",
182    },
183    GroupFrequency {
184        group: "C=C (alkene)",
185        freq_min: 1620.0,
186        freq_max: 1680.0,
187        intensity: "variable",
188        vibration_type: "stretch",
189    },
190    GroupFrequency {
191        group: "C=C (aromatic)",
192        freq_min: 1450.0,
193        freq_max: 1600.0,
194        intensity: "medium",
195        vibration_type: "stretch",
196    },
197    GroupFrequency {
198        group: "C=N",
199        freq_min: 1600.0,
200        freq_max: 1660.0,
201        intensity: "medium",
202        vibration_type: "stretch",
203    },
204    GroupFrequency {
205        group: "N=O (nitro)",
206        freq_min: 1515.0,
207        freq_max: 1560.0,
208        intensity: "strong",
209        vibration_type: "asymm. stretch",
210    },
211    GroupFrequency {
212        group: "N=O (nitro)",
213        freq_min: 1340.0,
214        freq_max: 1385.0,
215        intensity: "strong",
216        vibration_type: "symm. stretch",
217    },
218    GroupFrequency {
219        group: "C=O (acid chloride)",
220        freq_min: 1780.0,
221        freq_max: 1815.0,
222        intensity: "strong",
223        vibration_type: "stretch",
224    },
225    GroupFrequency {
226        group: "C=O (anhydride)",
227        freq_min: 1760.0,
228        freq_max: 1810.0,
229        intensity: "strong",
230        vibration_type: "asymm. stretch",
231    },
232    GroupFrequency {
233        group: "C=O (anhydride)",
234        freq_min: 1740.0,
235        freq_max: 1780.0,
236        intensity: "strong",
237        vibration_type: "symm. stretch",
238    },
239    GroupFrequency {
240        group: "N=C=O (isocyanate)",
241        freq_min: 2250.0,
242        freq_max: 2275.0,
243        intensity: "strong",
244        vibration_type: "stretch",
245    },
246    GroupFrequency {
247        group: "C=C=O (ketene)",
248        freq_min: 2100.0,
249        freq_max: 2140.0,
250        intensity: "strong",
251        vibration_type: "stretch",
252    },
253    // Bending
254    GroupFrequency {
255        group: "C-H (methyl)",
256        freq_min: 1370.0,
257        freq_max: 1380.0,
258        intensity: "medium",
259        vibration_type: "bend",
260    },
261    GroupFrequency {
262        group: "C-H (methylene)",
263        freq_min: 1440.0,
264        freq_max: 1465.0,
265        intensity: "medium",
266        vibration_type: "bend",
267    },
268    GroupFrequency {
269        group: "O-H",
270        freq_min: 1200.0,
271        freq_max: 1440.0,
272        intensity: "strong",
273        vibration_type: "bend",
274    },
275    GroupFrequency {
276        group: "N-H",
277        freq_min: 1550.0,
278        freq_max: 1640.0,
279        intensity: "medium",
280        vibration_type: "bend",
281    },
282    GroupFrequency {
283        group: "H-O-H bend",
284        freq_min: 1590.0,
285        freq_max: 1665.0,
286        intensity: "strong",
287        vibration_type: "bend",
288    },
289    // Single bond stretches
290    GroupFrequency {
291        group: "C-O (alcohol)",
292        freq_min: 1000.0,
293        freq_max: 1100.0,
294        intensity: "strong",
295        vibration_type: "stretch",
296    },
297    GroupFrequency {
298        group: "C-O (ether)",
299        freq_min: 1070.0,
300        freq_max: 1150.0,
301        intensity: "strong",
302        vibration_type: "stretch",
303    },
304    GroupFrequency {
305        group: "C-N",
306        freq_min: 1020.0,
307        freq_max: 1250.0,
308        intensity: "medium",
309        vibration_type: "stretch",
310    },
311    GroupFrequency {
312        group: "C-F",
313        freq_min: 1000.0,
314        freq_max: 1350.0,
315        intensity: "strong",
316        vibration_type: "stretch",
317    },
318    GroupFrequency {
319        group: "C-Cl",
320        freq_min: 600.0,
321        freq_max: 800.0,
322        intensity: "strong",
323        vibration_type: "stretch",
324    },
325    GroupFrequency {
326        group: "C-Br",
327        freq_min: 500.0,
328        freq_max: 680.0,
329        intensity: "strong",
330        vibration_type: "stretch",
331    },
332    GroupFrequency {
333        group: "C-I",
334        freq_min: 485.0,
335        freq_max: 600.0,
336        intensity: "strong",
337        vibration_type: "stretch",
338    },
339    // Fingerprint region
340    GroupFrequency {
341        group: "S=O (sulfoxide)",
342        freq_min: 1030.0,
343        freq_max: 1070.0,
344        intensity: "strong",
345        vibration_type: "stretch",
346    },
347    GroupFrequency {
348        group: "S=O (sulfone)",
349        freq_min: 1120.0,
350        freq_max: 1160.0,
351        intensity: "strong",
352        vibration_type: "asymm. stretch",
353    },
354    GroupFrequency {
355        group: "P=O",
356        freq_min: 1100.0,
357        freq_max: 1300.0,
358        intensity: "strong",
359        vibration_type: "stretch",
360    },
361    GroupFrequency {
362        group: "P-O-C",
363        freq_min: 900.0,
364        freq_max: 1050.0,
365        intensity: "strong",
366        vibration_type: "stretch",
367    },
368    // Metal-ligand and coordination bands
369    GroupFrequency {
370        group: "M-H",
371        freq_min: 1600.0,
372        freq_max: 2100.0,
373        intensity: "medium",
374        vibration_type: "stretch",
375    },
376    GroupFrequency {
377        group: "M-CO (bridging carbonyl)",
378        freq_min: 1750.0,
379        freq_max: 1850.0,
380        intensity: "strong",
381        vibration_type: "stretch",
382    },
383    GroupFrequency {
384        group: "M-CO (terminal carbonyl)",
385        freq_min: 1850.0,
386        freq_max: 2120.0,
387        intensity: "strong",
388        vibration_type: "stretch",
389    },
390    GroupFrequency {
391        group: "M=O (terminal oxo)",
392        freq_min: 850.0,
393        freq_max: 1030.0,
394        intensity: "strong",
395        vibration_type: "stretch",
396    },
397    GroupFrequency {
398        group: "M-C",
399        freq_min: 400.0,
400        freq_max: 650.0,
401        intensity: "medium",
402        vibration_type: "stretch",
403    },
404    GroupFrequency {
405        group: "M-N",
406        freq_min: 400.0,
407        freq_max: 650.0,
408        intensity: "medium",
409        vibration_type: "stretch",
410    },
411    GroupFrequency {
412        group: "M-O",
413        freq_min: 350.0,
414        freq_max: 700.0,
415        intensity: "strong",
416        vibration_type: "stretch",
417    },
418    GroupFrequency {
419        group: "M-P",
420        freq_min: 350.0,
421        freq_max: 500.0,
422        intensity: "medium",
423        vibration_type: "stretch",
424    },
425    GroupFrequency {
426        group: "M-S",
427        freq_min: 250.0,
428        freq_max: 500.0,
429        intensity: "medium",
430        vibration_type: "stretch",
431    },
432    GroupFrequency {
433        group: "M-Cl",
434        freq_min: 250.0,
435        freq_max: 400.0,
436        intensity: "strong",
437        vibration_type: "stretch",
438    },
439    GroupFrequency {
440        group: "M-Br",
441        freq_min: 180.0,
442        freq_max: 320.0,
443        intensity: "medium",
444        vibration_type: "stretch",
445    },
446    GroupFrequency {
447        group: "M-I",
448        freq_min: 150.0,
449        freq_max: 260.0,
450        intensity: "medium",
451        vibration_type: "stretch",
452    },
453];
454
455/// Assign peaks from vibrational analysis to functional groups.
456pub fn assign_peaks(
457    frequencies: &[f64],
458    intensities: &[f64],
459    elements: &[u8],
460    mode_displacements: Option<&[Vec<[f64; 3]>]>,
461) -> AssignmentResult {
462    let mut assignments = Vec::new();
463    let mut found_groups = std::collections::BTreeSet::new();
464
465    for (i, &freq) in frequencies.iter().enumerate() {
466        if freq < 150.0 {
467            continue; // Skip lattice-like modes below the useful far-IR window
468        }
469
470        let intensity = if i < intensities.len() {
471            intensities[i]
472        } else {
473            0.0
474        };
475
476        let mut matches = Vec::new();
477
478        for gf in GROUP_FREQUENCIES {
479            if freq >= gf.freq_min && freq <= gf.freq_max {
480                // Check if the molecule contains elements consistent with this group
481                let mode_matches = mode_displacements
482                    .and_then(|all_modes| all_modes.get(i))
483                    .map(|mode| group_matches_mode(gf.group, elements, mode))
484                    .unwrap_or(true);
485
486                if group_is_possible(gf.group, elements) && mode_matches {
487                    let center = (gf.freq_min + gf.freq_max) / 2.0;
488                    let range = gf.freq_max - gf.freq_min;
489                    let match_quality = 1.0 - (freq - center).abs() / (range / 2.0 + 1.0);
490
491                    matches.push(FunctionalGroupMatch {
492                        group: gf.group.to_string(),
493                        vibration_type: gf.vibration_type.to_string(),
494                        match_quality: match_quality.clamp(0.0, 1.0),
495                    });
496
497                    found_groups.insert(gf.group.to_string());
498                }
499            }
500        }
501
502        let confidence = if matches.is_empty() {
503            0.0
504        } else {
505            matches.iter().map(|m| m.match_quality).fold(0.0, f64::max)
506        };
507
508        assignments.push(PeakAssignment {
509            frequency: freq,
510            intensity,
511            assignments: matches,
512            confidence,
513        });
514    }
515
516    AssignmentResult {
517        assignments,
518        functional_groups: found_groups.into_iter().collect(),
519    }
520}
521
522fn dominant_mode_elements(
523    elements: &[u8],
524    mode_displacement: &[[f64; 3]],
525) -> std::collections::BTreeSet<u8> {
526    let amplitudes: Vec<f64> = mode_displacement
527        .iter()
528        .map(|vector| {
529            (vector[0] * vector[0] + vector[1] * vector[1] + vector[2] * vector[2]).sqrt()
530        })
531        .collect();
532    let max_amplitude = amplitudes.iter().copied().fold(0.0_f64, f64::max);
533    if max_amplitude <= 1e-8 {
534        return std::collections::BTreeSet::new();
535    }
536
537    amplitudes
538        .iter()
539        .enumerate()
540        .filter(|(_, amplitude)| **amplitude >= max_amplitude * 0.35)
541        .filter_map(|(index, _)| elements.get(index).copied())
542        .collect()
543}
544
545fn group_matches_mode(group: &str, elements: &[u8], mode_displacement: &[[f64; 3]]) -> bool {
546    let dominant = dominant_mode_elements(elements, mode_displacement);
547    if dominant.is_empty() {
548        return true;
549    }
550
551    let has = |z: u8| dominant.contains(&z);
552    let has_metal = dominant.iter().copied().any(is_metal_atomic_number);
553
554    if group.starts_with("M-") || group.starts_with("M=") {
555        if !has_metal {
556            return false;
557        }
558        if group.contains("-H") {
559            return has(1);
560        }
561        if group.contains("-CO") {
562            return has(6) && has(8);
563        }
564        if group.contains("=O") || group.contains("-O") {
565            return has(8);
566        }
567        if group.contains("-N") {
568            return has(7);
569        }
570        if group.contains("-S") {
571            return has(16);
572        }
573        if group.contains("-P") {
574            return has(15);
575        }
576        if group.contains("-Cl") {
577            return has(17);
578        }
579        if group.contains("-Br") {
580            return has(35);
581        }
582        if group.contains("-I") {
583            return has(53);
584        }
585        if group == "M-C" {
586            return has(6);
587        }
588        return true;
589    }
590
591    if group.contains("O-H") || group == "H-O-H bend" {
592        return has(8) && has(1);
593    }
594    if group.contains("N-H") {
595        return has(7) && has(1);
596    }
597    if group.contains("C-H") {
598        return has(6) && has(1);
599    }
600    if group.contains("C=O") {
601        return has(6) && has(8);
602    }
603    if group.contains("C-O") {
604        return has(6) && has(8);
605    }
606    if group.contains("C-N") || group.contains("C=N") {
607        return has(6) && has(7);
608    }
609    if group.contains("C≡N") {
610        return has(6) && has(7);
611    }
612    if group.contains("C≡C") || group.contains("C=C") {
613        return has(6);
614    }
615    if group.contains("S=O") {
616        return has(16) && has(8);
617    }
618    if group.contains("P=O") {
619        return has(15) && has(8);
620    }
621    if group.contains("P-O-C") {
622        return has(15) && has(8) && has(6);
623    }
624    if group.contains("C-F") {
625        return has(6) && has(9);
626    }
627    if group.contains("C-Cl") {
628        return has(6) && has(17);
629    }
630    if group.contains("C-Br") {
631        return has(6) && has(35);
632    }
633    if group.contains("C-I") {
634        return has(6) && has(53);
635    }
636
637    true
638}
639
640/// Check if a functional group is possible given the elements present.
641fn group_is_possible(group: &str, elements: &[u8]) -> bool {
642    let has = |z: u8| elements.contains(&z);
643    let has_metal = elements.iter().copied().any(is_metal_atomic_number);
644
645    if group.starts_with("M-") || group.starts_with("M=") {
646        if !has_metal {
647            return false;
648        }
649        if group.contains("-H") {
650            return has(1);
651        }
652        if group.contains("-CO") {
653            return has(6) && has(8);
654        }
655        if group.contains("=O") || group.contains("-O") {
656            return has(8);
657        }
658        if group.contains("-N") {
659            return has(7);
660        }
661        if group.contains("-S") {
662            return has(16);
663        }
664        if group.contains("-P") {
665            return has(15);
666        }
667        if group.contains("-Cl") {
668            return has(17);
669        }
670        if group.contains("-Br") {
671            return has(35);
672        }
673        if group.contains("-I") {
674            return has(53);
675        }
676        if group == "M-C" {
677            return has(6);
678        }
679
680        return true;
681    }
682
683    if group.contains("O-H") || group.contains("C-O") || group.contains("C=O") {
684        return has(8); // Oxygen
685    }
686    if group.contains("N-H")
687        || group.contains("C-N")
688        || group.contains("C=N")
689        || group.contains("C≡N")
690        || group.contains("N=O")
691    {
692        return has(7); // Nitrogen
693    }
694    if group.contains("C-F") {
695        return has(9); // Fluorine
696    }
697    if group.contains("C-Cl") {
698        return has(17); // Chlorine
699    }
700    if group.contains("C-Br") {
701        return has(35); // Bromine
702    }
703    if group.contains("C-I") {
704        return has(53); // Iodine
705    }
706    if group.contains("S=O") {
707        return has(16); // Sulfur
708    }
709    if group.contains("P=O") || group.contains("P-O") {
710        return has(15); // Phosphorus
711    }
712    if group.contains("C-H") || group.contains("C=C") || group.contains("C≡C") {
713        return has(6); // Carbon (almost always true)
714    }
715
716    true // Default: possible
717}
718
719fn is_metal_atomic_number(z: u8) -> bool {
720    matches!(z, 3 | 4 | 11 | 12 | 13 | 19 | 20 | 21..=31 | 37..=50 | 55..=84 | 87..=103)
721}
722
723#[cfg(test)]
724mod tests {
725    use super::*;
726
727    #[test]
728    fn test_peak_assignment_carbonyl() {
729        let freqs = vec![1720.0, 2950.0, 1450.0];
730        let intensities = vec![1.0, 0.5, 0.3];
731        let elements = vec![6u8, 6, 8, 1, 1, 1, 1]; // acetaldehyde-like
732
733        let result = assign_peaks(&freqs, &intensities, &elements, None);
734        assert!(!result.assignments.is_empty());
735
736        // 1720 cm⁻¹ should match C=O
737        let carbonyl = &result.assignments[0];
738        assert!(carbonyl.assignments.iter().any(|a| a.group.contains("C=O")));
739    }
740
741    #[test]
742    fn test_group_possible() {
743        assert!(group_is_possible("O-H (free)", &[6, 8, 1]));
744        assert!(!group_is_possible("C-Cl", &[6, 8, 1]));
745        assert!(group_is_possible("C-H (sp3)", &[6, 1]));
746    }
747
748    #[test]
749    fn test_peak_assignment_metal_ligand_bands() {
750        let freqs = vec![295.0, 520.0, 1985.0];
751        let intensities = vec![0.4, 0.7, 1.0];
752        let elements = vec![26u8, 17, 8, 6];
753
754        let result = assign_peaks(&freqs, &intensities, &elements, None);
755
756        assert!(result.assignments[0]
757            .assignments
758            .iter()
759            .any(|a| a.group == "M-Cl"));
760        assert!(result.assignments[1]
761            .assignments
762            .iter()
763            .any(|a| a.group == "M-O"));
764        assert!(result.assignments[2]
765            .assignments
766            .iter()
767            .any(|a| a.group == "M-CO (terminal carbonyl)"));
768    }
769}