1use crate::types::OrganicInorganic;
12use serde::{Deserialize, Serialize};
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
24#[serde(rename_all = "snake_case")]
25pub enum FunctionalGroup {
26 Anhydride,
28 Isocyanate,
30 Nitrile,
32 Nitro,
34 Epoxide,
36 SulphonicAcid,
38 Phosphate,
40 Amide,
42 Ester,
44 CarboxylicAcid,
46 Aldehyde,
48 Ketone,
50 Phenol,
52 Thiol,
54 Sulphide,
56 Alcohol,
58 Ether,
60 Amine,
62 Halide,
64 AromaticRing,
66}
67
68impl FunctionalGroup {
69 pub fn label(self) -> &'static str {
71 match self {
72 Self::Anhydride => "Anhydride",
73 Self::Isocyanate => "Isocyanate",
74 Self::Nitrile => "Nitrile",
75 Self::Nitro => "Nitro",
76 Self::Epoxide => "Epoxide",
77 Self::SulphonicAcid => "SulphonicAcid",
78 Self::Phosphate => "Phosphate",
79 Self::Amide => "Amide",
80 Self::Ester => "Ester",
81 Self::CarboxylicAcid => "CarboxylicAcid",
82 Self::Aldehyde => "Aldehyde",
83 Self::Ketone => "Ketone",
84 Self::Phenol => "Phenol",
85 Self::Thiol => "Thiol",
86 Self::Sulphide => "Sulphide",
87 Self::Alcohol => "Alcohol",
88 Self::Ether => "Ether",
89 Self::Amine => "Amine",
90 Self::Halide => "Halide",
91 Self::AromaticRing => "AromaticRing",
92 }
93 }
94}
95
96pub fn classify_organic(smiles: &str) -> OrganicInorganic {
107 if !smiles.chars().any(|c| c == 'C' || c == 'c') {
109 return OrganicInorganic::Inorganic;
110 }
111
112 let normalised = smiles.replace(' ', "");
114 let inorganic_exact: &[&str] = &[
115 "O=C=O", "[O-]C(=O)[O-]", "[O-]C([O-])=O",
118 "[C-]#[O+]", "[C+]#[O-]",
120 "S=C=S", "[C-]#N", "[N+]#[C-]",
123 "C(=O)([O-])[O-]", ];
125 if inorganic_exact.iter().any(|p| normalised == *p) {
126 return OrganicInorganic::Inorganic;
127 }
128
129 let metal_symbols: &[&str] = &[
132 "[Fe]", "[Co]", "[Ni]", "[Cr]", "[Mn]", "[Mo]", "[W]",
133 "[Ti]", "[V]", "[Ru]", "[Rh]", "[Pd]", "[Os]", "[Ir]",
134 "[Pt]", "[Zn]", "[Al]", "[Pb]", "[Sn]", "[Hg]", "[Tl]",
135 ];
136 for metal in metal_symbols {
139 if smiles.contains(metal) {
140 let idx = smiles.find(metal).unwrap_or(usize::MAX);
143 let after = smiles.get(idx + metal.len()..).unwrap_or("");
144 let before = smiles.get(..idx).unwrap_or("");
145 let bonded = after.starts_with('C')
146 || after.starts_with('c')
147 || before.ends_with('C')
148 || before.ends_with('c');
149 if bonded {
150 return OrganicInorganic::Organometallic;
151 }
152 }
153 }
154
155 OrganicInorganic::Organic
156}
157
158pub fn detect_functional_groups(smiles: &str) -> Vec<FunctionalGroup> {
173 let mut groups: Vec<FunctionalGroup> = Vec::new();
174
175 let any = |patterns: &[&str]| -> bool { patterns.iter().any(|p| smiles.contains(p)) };
177
178 let cyclic_anhydride = (1u8..=9).any(|n| {
182 smiles.contains(&format!("O=C{}OC(=O)", n))
183 });
184 if smiles.contains("C(=O)OC(=O)") || cyclic_anhydride {
185 groups.push(FunctionalGroup::Anhydride);
186 }
187
188 if any(&["N=C=O", "O=C=N"]) {
190 groups.push(FunctionalGroup::Isocyanate);
191 }
192
193 if any(&["C#N", "N#C"]) {
195 groups.push(FunctionalGroup::Nitrile);
196 }
197
198 if any(&[
201 "O=[N+]([O-])", "[N+](=O)[O-]", "N(=O)=O",
204 "[N+]([O-])=O",
205 "[N+](=O)([O-])",
206 ]) {
207 groups.push(FunctionalGroup::Nitro);
208 }
209
210 if any(&[
214 "C1CO1", "C1OC1", "[C@@H]1O[C@H]1", "[C@H]1O[C@@H]1",
218 ]) {
219 groups.push(FunctionalGroup::Epoxide);
220 }
221
222 if any(&["S(=O)(=O)O", "S(=O)(=O)[OH]", "S(O)(=O)=O", "[S](=O)(=O)O"]) {
224 groups.push(FunctionalGroup::SulphonicAcid);
225 }
226
227 if smiles.contains('P')
229 && any(&["P(=O)(O)", "P(=O)([O", "P(O)(O)", "P([OH])", "OP(=O)", "P(=O)O"])
230 {
231 groups.push(FunctionalGroup::Phosphate);
232 }
233
234 if any(&[
237 "NC(=O)", "NC(C", "C(N)=O", "C(=O)N", "C(=O)[NH", "[NH]C(=O)", "[NH2]C(=O)",
239 "N)=O", ]) {
241 let has_iso = groups.contains(&FunctionalGroup::Isocyanate);
243 let has_nitrile = groups.contains(&FunctionalGroup::Nitrile);
244 if !has_iso && !has_nitrile {
245 groups.push(FunctionalGroup::Amide);
246 }
247 }
248
249 let has_anhydride = groups.contains(&FunctionalGroup::Anhydride);
252 if !has_anhydride
253 && any(&[
254 "OC(C)=O", "OC(=O)C", "C(=O)OC", "C(=O)Oc", "OC(CC", "OC(c", ])
257 {
258 groups.push(FunctionalGroup::Ester);
259 }
260
261 let has_ester = groups.contains(&FunctionalGroup::Ester);
264 if !has_ester && !has_anhydride {
265 let has_acid_pattern = any(&[
268 "C(=O)O", "C(O)=O", "C(=O)[OH]", ]);
272 if has_acid_pattern {
274 groups.push(FunctionalGroup::CarboxylicAcid);
275 }
276 }
277
278 let has_higher_carbonyl = groups.iter().any(|g| {
282 matches!(
283 g,
284 FunctionalGroup::Amide
285 | FunctionalGroup::Ester
286 | FunctionalGroup::CarboxylicAcid
287 | FunctionalGroup::Anhydride
288 )
289 });
290 if !has_higher_carbonyl {
291 let aldehyde = smiles.ends_with("C=O")
292 || smiles.ends_with("[CH]=O")
293 || smiles.starts_with("O=C") || any(&["[CH]=O", "[CHO]"]);
295 if aldehyde {
296 groups.push(FunctionalGroup::Aldehyde);
297 }
298 }
299
300 if !has_higher_carbonyl {
303 let has_aldehyde = groups.contains(&FunctionalGroup::Aldehyde);
304 if !has_aldehyde
305 && any(&[
306 "C(C)=O", "C(CC)=O", "C(CCC)=O",
308 "C(c)=O", "c(=O)C", "C(=O)C", ])
312 {
313 groups.push(FunctionalGroup::Ketone);
314 }
315 }
316
317 if any(&[
319 "c1ccccc1O", "Oc1ccccc1",
320 "c(O)", "c([OH])", "Oc1cc", "Oc1ccc", "c1cc(O)", "c1ccc(O)",
323 ]) {
324 groups.push(FunctionalGroup::Phenol);
325 }
326
327 if any(&["[SH]", "C[SH]", "c[SH]"])
330 || smiles.ends_with("CS")
331 || smiles.ends_with("cS")
332 {
333 groups.push(FunctionalGroup::Thiol);
334 }
335
336 let has_sulphonic = groups.contains(&FunctionalGroup::SulphonicAcid);
338 let has_thiol = groups.contains(&FunctionalGroup::Thiol);
339 if !has_sulphonic
340 && !has_thiol
341 && smiles.contains('S')
342 && any(&["CSC", "cSC", "CSc", "cSc", "C(S)C"])
343 {
344 groups.push(FunctionalGroup::Sulphide);
345 }
346
347 let has_phenol = groups.contains(&FunctionalGroup::Phenol);
350 let has_acid = groups.contains(&FunctionalGroup::CarboxylicAcid);
351 let has_ester2 = groups.contains(&FunctionalGroup::Ester);
352 let has_anhydride2 = groups.contains(&FunctionalGroup::Anhydride);
353 if !has_phenol && !has_acid && !has_ester2 && !has_anhydride2 {
354 let alcohol = any(&["[OH]", "C[OH]"])
355 || smiles.ends_with("CO")
356 || smiles.ends_with("CCO")
357 || smiles.ends_with("O") || any(&["C(O)", "C([OH])"]);
359 if alcohol {
360 groups.push(FunctionalGroup::Alcohol);
361 }
362 }
363
364 let has_epoxide = groups.contains(&FunctionalGroup::Epoxide);
367 let has_ester3 = groups.contains(&FunctionalGroup::Ester);
368 let has_acid2 = groups.contains(&FunctionalGroup::CarboxylicAcid);
369 if !has_epoxide && !has_ester3 && !has_acid2 && !has_anhydride {
370 if any(&["COC", "cOC", "COc", "cOc"]) {
371 groups.push(FunctionalGroup::Ether);
372 }
373 }
374
375 let has_amide = groups.contains(&FunctionalGroup::Amide);
378 let has_nitrile = groups.contains(&FunctionalGroup::Nitrile);
379 let has_nitro = groups.contains(&FunctionalGroup::Nitro);
380 if smiles.contains('N')
381 && !has_nitrile
382 && !has_nitro
383 {
384 let amine = any(&[
386 "CN", "NC", "[NH2]", "[NH3+]", "[NH]", "cN", "Nc",
387 ]);
388 if amine && (!has_amide || any(&["[NH2]", "[NH3+]", "CN(", "N(C)C"])) {
390 groups.push(FunctionalGroup::Amine);
391 }
392 }
393
394 if any(&[
396 "CF", "CCl", "CBr", "CI",
397 "Fc", "Clc", "Brc", "Ic",
398 "[F]", "[Cl]", "[Br]", "[I]",
399 "c[F]", "c[Cl]", "c[Br]", "c[I]",
400 "CF3", "CCl3", "CHF", "CHCl", "CHBr",
401 ]) {
402 groups.push(FunctionalGroup::Halide);
403 }
404
405 if smiles.chars().any(|c| matches!(c, 'c' | 'n' | 'o' | 's' | 'p')) {
407 groups.push(FunctionalGroup::AromaticRing);
408 }
409
410 groups
411}
412
413#[cfg(test)]
418mod tests {
419 use super::*;
420
421 fn fg(smiles: &str) -> Vec<FunctionalGroup> {
422 detect_functional_groups(smiles)
423 }
424
425 fn has(smiles: &str, g: FunctionalGroup) -> bool {
426 fg(smiles).contains(&g)
427 }
428
429 #[test]
432 fn co2_is_inorganic() {
433 assert_eq!(classify_organic("O=C=O"), OrganicInorganic::Inorganic);
434 }
435
436 #[test]
437 fn water_is_inorganic() {
438 assert_eq!(classify_organic("O"), OrganicInorganic::Inorganic);
439 }
440
441 #[test]
442 fn ethanol_is_organic() {
443 assert_eq!(classify_organic("CCO"), OrganicInorganic::Organic);
444 }
445
446 #[test]
447 fn benzene_is_organic() {
448 assert_eq!(classify_organic("c1ccccc1"), OrganicInorganic::Organic);
449 }
450
451 #[test]
454 fn acetic_acid_detected() {
455 assert!(has("CC(=O)O", FunctionalGroup::CarboxylicAcid));
457 assert!(!has("CC(=O)O", FunctionalGroup::Ester));
458 }
459
460 #[test]
461 fn ethyl_acetate_detected_as_ester() {
462 assert!(has("CCOC(C)=O", FunctionalGroup::Ester));
464 assert!(!has("CCOC(C)=O", FunctionalGroup::CarboxylicAcid));
465 }
466
467 #[test]
468 fn phthalic_anhydride_detected() {
469 let groups = fg("O=C1OC(=O)c2ccccc21");
471 assert!(groups.contains(&FunctionalGroup::Anhydride));
472 assert!(!groups.contains(&FunctionalGroup::Ester));
473 }
474
475 #[test]
476 fn acetaldehyde_detected() {
477 assert!(has("CC=O", FunctionalGroup::Aldehyde));
479 assert!(!has("CC=O", FunctionalGroup::Ketone));
480 }
481
482 #[test]
483 fn acetone_detected_as_ketone() {
484 assert!(has("CC(C)=O", FunctionalGroup::Ketone));
486 assert!(!has("CC(C)=O", FunctionalGroup::Aldehyde));
487 }
488
489 #[test]
490 fn ethanol_detected_as_alcohol() {
491 assert!(has("CCO", FunctionalGroup::Alcohol));
493 assert!(!has("CCO", FunctionalGroup::Ether));
494 }
495
496 #[test]
497 fn dimethyl_ether_detected() {
498 assert!(has("COC", FunctionalGroup::Ether));
500 assert!(!has("COC", FunctionalGroup::Alcohol));
501 }
502
503 #[test]
504 fn methylamine_detected() {
505 assert!(has("CN", FunctionalGroup::Amine));
507 }
508
509 #[test]
510 fn acetamide_detected() {
511 assert!(has("CC(N)=O", FunctionalGroup::Amide));
513 assert!(!has("CC(N)=O", FunctionalGroup::Ketone));
514 }
515
516 #[test]
517 fn acetonitrile_detected() {
518 assert!(has("CC#N", FunctionalGroup::Nitrile));
520 }
521
522 #[test]
523 fn chloromethane_detected() {
524 assert!(has("CCl", FunctionalGroup::Halide));
526 }
527
528 #[test]
529 fn ethylene_oxide_detected() {
530 assert!(has("C1CO1", FunctionalGroup::Epoxide));
532 }
533
534 #[test]
535 fn benzene_detected_as_aromatic() {
536 assert!(has("c1ccccc1", FunctionalGroup::AromaticRing));
537 }
538
539 #[test]
540 fn phenol_detected() {
541 assert!(has("Oc1ccccc1", FunctionalGroup::Phenol));
543 }
544
545 #[test]
546 fn nitrobenzene_detected() {
547 assert!(has("O=[N+]([O-])c1ccccc1", FunctionalGroup::Nitro));
549 }
550
551 #[test]
552 fn ethanesulfonic_acid_detected() {
553 assert!(has("CCS(=O)(=O)O", FunctionalGroup::SulphonicAcid));
555 }
556
557 #[test]
558 fn dimethyl_sulfide_detected() {
559 assert!(has("CSC", FunctionalGroup::Sulphide));
561 }
562
563 #[test]
564 fn methanethiol_detected() {
565 assert!(has("C[SH]", FunctionalGroup::Thiol));
567 }
568
569 #[test]
570 fn isocyanate_detected() {
571 assert!(has("CN=C=O", FunctionalGroup::Isocyanate));
573 }
574
575 #[test]
576 fn trimethyl_phosphate_detected() {
577 assert!(has("COP(=O)(OC)OC", FunctionalGroup::Phosphate));
579 }
580}