1use crate::smiles::detector::{FunctionalGroup, StructuralFeatures};
17use crate::types::OrganicInorganic;
18
19#[derive(Debug, Clone, serde::Serialize)]
25pub struct HeadingHint {
26 pub chapter: u8,
28
29 pub heading: Option<u16>,
32
33 pub subheading: Option<String>,
37
38 pub rationale: &'static str,
40
41 pub confidence: f32,
45}
46
47static PRIORITY_MAP: &[(FunctionalGroup, u8, u16, &str, f32)] = &[
56 (
58 FunctionalGroup::Anhydride,
59 29, 2915,
60 "Acid anhydride → HS 29.15–29.17 (acyclic/aromatic acid anhydrides); \
61 use 29.17 for aromatic anhydrides",
62 0.65,
63 ),
64 (
65 FunctionalGroup::Isocyanate,
66 29, 2929,
67 "Isocyanate / carbodiimide → HS 29.29",
68 0.70,
69 ),
70 (
71 FunctionalGroup::Epoxide,
72 29, 2910,
73 "Epoxide → HS 29.10",
74 0.70,
75 ),
76 (
77 FunctionalGroup::SulphonicAcid,
78 29, 2904,
79 "Organo-sulphonic acid → HS 29.04 (sulphonated derivatives)",
80 0.68,
81 ),
82 (
83 FunctionalGroup::Nitrile,
84 29, 2926,
85 "Nitrile → HS 29.26",
86 0.70,
87 ),
88 (
89 FunctionalGroup::Phosphate,
90 29, 2920,
91 "Organophosphate / phosphonate ester → HS 29.20",
92 0.62,
93 ),
94 (
96 FunctionalGroup::Amide,
97 29, 2924,
98 "Amide → HS 29.24 (amide-function compounds)",
99 0.67,
100 ),
101 (
102 FunctionalGroup::CarboxylicAcid,
103 29, 2915,
104 "Carboxylic acid → HS 29.15 (acyclic), 29.16 (cyclic), 29.17 (aromatic), \
105 or 29.18 (other with additional functions); heading depends on chain length / ring",
106 0.60,
107 ),
108 (
109 FunctionalGroup::Ester,
110 29, 2915,
111 "Ester → HS 29.15–29.17 (depends on parent acid type and chain length)",
112 0.55,
113 ),
114 (
115 FunctionalGroup::Aldehyde,
116 29, 2912,
117 "Aldehyde → HS 29.12",
118 0.67,
119 ),
120 (
121 FunctionalGroup::Ketone,
122 29, 2914,
123 "Ketone / quinone → HS 29.14",
124 0.67,
125 ),
126 (
128 FunctionalGroup::Phenol,
129 29, 2907,
130 "Phenol → HS 29.07",
131 0.67,
132 ),
133 (
134 FunctionalGroup::Alcohol,
135 29, 2905,
136 "Alcohol → HS 29.05 (acyclic) or 29.06 (cyclic); \
137 polyols may fall under 29.05 subheading",
138 0.60,
139 ),
140 (
142 FunctionalGroup::Thiol,
143 29, 2930,
144 "Thiol (mercaptan) → HS 29.30 (organo-sulphur compounds)",
145 0.65,
146 ),
147 (
148 FunctionalGroup::Sulphide,
149 29, 2930,
150 "Thioether / sulphide → HS 29.30 (organo-sulphur compounds)",
151 0.65,
152 ),
153 (
155 FunctionalGroup::Amine,
156 29, 2921,
157 "Amine → HS 29.21",
158 0.63,
159 ),
160 (
161 FunctionalGroup::Nitro,
162 29, 2904,
163 "Nitro / nitroso compound → HS 29.04",
164 0.60,
165 ),
166 (
167 FunctionalGroup::Ether,
168 29, 2909,
169 "Ether → HS 29.09",
170 0.63,
171 ),
172 (
173 FunctionalGroup::Halide,
174 29, 2903,
175 "Organohalide → HS 29.03",
176 0.65,
177 ),
178 (
180 FunctionalGroup::AromaticRing,
181 29, 0, "Aromatic compound → Chapter 29; heading depends on substituents",
183 0.40,
184 ),
185];
186
187pub fn map_to_heading(
201 groups: &[FunctionalGroup],
202 organic_class: &OrganicInorganic,
203) -> HeadingHint {
204 if matches!(organic_class, OrganicInorganic::Inorganic) {
206 return HeadingHint {
207 chapter: 28,
208 heading: None,
209 subheading: None,
210 rationale: "Inorganic compound → Chapter 28; \
211 heading depends on element / salt type",
212 confidence: 0.55,
213 };
214 }
215
216 if matches!(organic_class, OrganicInorganic::Organometallic) {
218 return HeadingHint {
219 chapter: 29,
220 heading: Some(2931),
221 subheading: None,
222 rationale: "Organometallic compound → HS 29.31",
223 confidence: 0.62,
224 };
225 }
226
227 for &(group, chapter, heading_code, rationale, confidence) in PRIORITY_MAP {
229 if groups.contains(&group) {
230 let heading = if heading_code == 0 { None } else { Some(heading_code) };
231 return HeadingHint { chapter, heading, subheading: None, rationale, confidence };
232 }
233 }
234
235 HeadingHint {
237 chapter: 29,
238 heading: None,
239 subheading: None,
240 rationale: "Organic compound with no detected functional groups → \
241 Chapter 29 (unsubstituted hydrocarbon) or Chapter 38",
242 confidence: 0.35,
243 }
244}
245
246pub fn map_to_subheading(
256 groups: &[FunctionalGroup],
257 organic_class: &OrganicInorganic,
258 feat: &StructuralFeatures,
259) -> HeadingHint {
260 if !matches!(organic_class, OrganicInorganic::Organic) {
262 return map_to_heading(groups, organic_class);
263 }
264
265 if groups.contains(&FunctionalGroup::Ketone) {
267 return subheading_ketone(feat);
268 }
269
270 if groups.contains(&FunctionalGroup::Alcohol) {
272 return subheading_alcohol(feat);
273 }
274
275 if groups.contains(&FunctionalGroup::CarboxylicAcid) {
277 return subheading_acid(feat);
278 }
279
280 if groups.contains(&FunctionalGroup::Aldehyde) {
282 return subheading_aldehyde(feat);
283 }
284
285 map_to_heading(groups, organic_class)
287}
288
289fn subheading_ketone(f: &StructuralFeatures) -> HeadingHint {
295 let (code, rationale, conf) = if f.has_aromatic_ring {
296 if f.carbon_count == 8 && f.carbonyl_count == 1 {
298 ("291431", "Phenyl methyl ketone (acetophenone) → HS 29.14.31", 0.82_f32)
299 } else {
300 ("291439", "Other aromatic ketone → HS 29.14.39", 0.65)
301 }
302 } else if f.has_ring {
303 match f.carbon_count {
305 10 => ("291421", "Camphor (cyclic C10 ketone) → HS 29.14.21", 0.78),
306 6 => ("291422", "Cyclohexanone → HS 29.14.22", 0.85),
307 7 => ("291423", "Methylcyclohexanone → HS 29.14.23", 0.78),
308 _ => ("291429", "Other cycloaliphatic/cycloterpenic ketone → HS 29.14.29", 0.65),
309 }
310 } else {
311 if f.has_halogen {
313 ("291479", "Halogenated ketone derivative → HS 29.14.79", 0.68)
314 } else if f.hydroxyl_count > 0 {
315 ("291440", "Ketone-alcohol or ketone-aldehyde → HS 29.14.40", 0.70)
316 } else {
317 match f.carbon_count {
318 3 => ("291411", "Acetone (3C acyclic ketone) → HS 29.14.11", 0.87),
319 4 => ("291412", "Butanone / MEK (4C acyclic ketone) → HS 29.14.12", 0.83),
320 6 => ("291413",
321 "4-Methylpentan-2-one / MIBK candidate (6C acyclic ketone) → HS 29.14.13; \
322 verify branching pattern",
323 0.72),
324 _ => ("291419", "Other acyclic ketone without other O → HS 29.14.19", 0.68),
325 }
326 }
327 };
328 HeadingHint {
329 chapter: 29,
330 heading: Some(2914),
331 subheading: Some(code.to_string()),
332 rationale,
333 confidence: conf,
334 }
335}
336
337fn subheading_alcohol(f: &StructuralFeatures) -> HeadingHint {
339 let oh = f.hydroxyl_count.max(1); let (code, chapter, heading, rationale, conf): (&str, u8, u16, &'static str, f32) =
342 if oh >= 3 {
343 match f.carbon_count {
344 3 => ("290541", 29, 2905, "Glycerol (3C triol) → HS 29.05.41", 0.90),
345 _ => ("290549", 29, 2905, "Other polyol → HS 29.05.49", 0.65),
346 }
347 } else if oh == 2 {
348 match f.carbon_count {
349 2 => ("290531", 29, 2905, "Ethylene glycol (2C diol) → HS 29.05.31", 0.88),
350 3 => ("290532", 29, 2905, "Propylene glycol (3C diol) → HS 29.05.32", 0.85),
351 _ => ("290539", 29, 2905, "Other diol → HS 29.05.39", 0.68),
352 }
353 } else {
354 if f.has_cc_double_bond {
356 ("290529", 29, 2905,
357 "Unsaturated monohydric acyclic alcohol → HS 29.05.29", 0.65)
358 } else {
359 match f.carbon_count {
360 1 => ("290511", 29, 2905,
361 "Methanol (1C) → HS 29.05.11", 0.90),
362 2 => ("220710", 22, 2207,
363 "Ethanol (2C) → HS 22.07.10 (undenatured ethyl alcohol ≥ 80 %); \
364 verify concentration — denatured → 22.07.20, dilute → 22.08",
365 0.85),
366 3 => ("290512", 29, 2905,
367 "Propan-1-ol (3C saturated monohydric) → HS 29.05.12", 0.82),
368 4 => ("290513", 29, 2905,
369 "Butan-1-ol (4C primary alcohol) → HS 29.05.13; \
370 other butanols → 29.05.14",
371 0.75),
372 8 => ("290516", 29, 2905,
373 "Octanol and isomers → HS 29.05.16", 0.78),
374 12 | 16 | 18 => ("290517", 29, 2905,
375 "Dodecan-1-ol / hexadecan-1-ol / octadecan-1-ol \
376 → HS 29.05.17",
377 0.75),
378 _ => ("290519", 29, 2905,
379 "Other saturated monohydric acyclic alcohol → HS 29.05.19", 0.65),
380 }
381 }
382 };
383
384 HeadingHint {
385 chapter,
386 heading: Some(heading),
387 subheading: Some(code.to_string()),
388 rationale,
389 confidence: conf,
390 }
391}
392
393fn subheading_acid(f: &StructuralFeatures) -> HeadingHint {
395 let (code, heading, rationale, conf): (&str, u16, &'static str, f32) =
396 if f.has_aromatic_ring {
397 match f.carbon_count {
398 7 => ("291631", 2916,
399 "Benzoic acid (7C aromatic acid) → HS 29.16.31", 0.85),
400 8 => ("291634", 2916,
401 "Phenylacetic acid (8C aromatic acid) → HS 29.16.34", 0.78),
402 _ => ("291639", 2916,
403 "Other aromatic monocarboxylic acid → HS 29.16.39", 0.65),
404 }
405 } else if f.has_cc_double_bond {
406 match f.carbon_count {
408 3 => ("291611", 2916,
409 "Acrylic acid (3C unsaturated) → HS 29.16.11", 0.87),
410 4 => ("291613", 2916,
411 "Methacrylic acid (4C unsaturated, branch C=C) → HS 29.16.13; \
412 esters → 29.16.14",
413 0.82),
414 _ => ("291619", 2916,
415 "Other unsaturated aliphatic monocarboxylic acid → HS 29.16.19", 0.65),
416 }
417 } else {
418 let extra_oh = f.hydroxyl_count.saturating_sub(1);
421 if extra_oh >= 1 {
422 ("291819", 2918,
423 "Carboxylic acid with additional oxygen function → HS 29.18.19", 0.65)
424 } else {
425 match f.carbon_count {
426 1 => ("291511", 2915, "Formic acid (1C) → HS 29.15.11", 0.90),
427 2 => ("291521", 2915, "Acetic acid (2C) → HS 29.15.21", 0.90),
428 3 => ("291550", 2915, "Propionic acid (3C) → HS 29.15.50", 0.87),
429 4 => ("291560", 2915,
430 "Butanoic / butyric acid (4C) → HS 29.15.60", 0.83),
431 16 | 18 => ("291570", 2915,
432 "Palmitic / stearic acid (C16/C18) → HS 29.15.70", 0.80),
433 _ => ("291590", 2915,
434 "Other saturated acyclic monocarboxylic acid → HS 29.15.90", 0.65),
435 }
436 }
437 };
438
439 HeadingHint {
440 chapter: 29,
441 heading: Some(heading),
442 subheading: Some(code.to_string()),
443 rationale,
444 confidence: conf,
445 }
446}
447
448fn subheading_aldehyde(f: &StructuralFeatures) -> HeadingHint {
450 let (code, rationale, conf): (&str, &'static str, f32) = if f.has_aromatic_ring {
451 match f.carbon_count {
452 7 => ("291211", "Benzaldehyde (7C aromatic aldehyde) → HS 29.12.11", 0.85),
453 _ => ("291219", "Other aromatic aldehyde → HS 29.12.19", 0.65),
454 }
455 } else {
456 match f.carbon_count {
457 1 => ("291211", "Formaldehyde → HS 29.12.11", 0.82),
458 2 => ("291212", "Acetaldehyde (2C) → HS 29.12.12", 0.85),
459 3 => ("291219", "Propanal / acrolein candidate (3C) → HS 29.12.19", 0.72),
460 _ => ("291219", "Other aliphatic aldehyde → HS 29.12.19", 0.65),
461 }
462 };
463 HeadingHint {
464 chapter: 29,
465 heading: Some(2912),
466 subheading: Some(code.to_string()),
467 rationale,
468 confidence: conf,
469 }
470}
471
472#[cfg(test)]
477mod tests {
478 use super::*;
479
480 fn hint(groups: &[FunctionalGroup]) -> HeadingHint {
481 map_to_heading(groups, &OrganicInorganic::Organic)
482 }
483
484 #[test]
485 fn inorganic_gives_ch28() {
486 let h = map_to_heading(&[], &OrganicInorganic::Inorganic);
487 assert_eq!(h.chapter, 28);
488 assert!(h.heading.is_none());
489 }
490
491 #[test]
492 fn organometallic_gives_2931() {
493 let h = map_to_heading(&[], &OrganicInorganic::Organometallic);
494 assert_eq!(h.heading, Some(2931));
495 }
496
497 #[test]
498 fn anhydride_wins_over_acid() {
499 let h = hint(&[FunctionalGroup::Anhydride, FunctionalGroup::CarboxylicAcid]);
500 assert_eq!(h.heading, Some(2915));
502 assert!(h.rationale.to_lowercase().contains("anhydride"));
503 }
504
505 #[test]
506 fn aldehyde_maps_to_2912() {
507 let h = hint(&[FunctionalGroup::Aldehyde]);
508 assert_eq!(h.heading, Some(2912));
509 }
510
511 #[test]
512 fn ketone_maps_to_2914() {
513 let h = hint(&[FunctionalGroup::Ketone]);
514 assert_eq!(h.heading, Some(2914));
515 }
516
517 #[test]
518 fn alcohol_maps_to_2905() {
519 let h = hint(&[FunctionalGroup::Alcohol]);
520 assert_eq!(h.heading, Some(2905));
521 }
522
523 #[test]
524 fn nitrile_maps_to_2926() {
525 let h = hint(&[FunctionalGroup::Nitrile]);
526 assert_eq!(h.heading, Some(2926));
527 }
528
529 #[test]
530 fn amine_maps_to_2921() {
531 let h = hint(&[FunctionalGroup::Amine]);
532 assert_eq!(h.heading, Some(2921));
533 }
534
535 #[test]
536 fn halide_maps_to_2903() {
537 let h = hint(&[FunctionalGroup::Halide]);
538 assert_eq!(h.heading, Some(2903));
539 }
540
541 #[test]
542 fn no_groups_gives_low_confidence() {
543 let h = hint(&[]);
544 assert!(h.confidence < 0.50);
545 }
546
547 #[test]
548 fn isocyanate_maps_to_2929() {
549 let h = hint(&[FunctionalGroup::Isocyanate]);
550 assert_eq!(h.heading, Some(2929));
551 }
552
553 #[test]
554 fn epoxide_maps_to_2910() {
555 let h = hint(&[FunctionalGroup::Epoxide]);
556 assert_eq!(h.heading, Some(2910));
557 }
558
559 fn feat(carbon: u32, oh: u32, co: u32, ring: bool, arom: bool, cc: bool, hal: bool)
562 -> StructuralFeatures
563 {
564 StructuralFeatures {
565 carbon_count: carbon,
566 hydroxyl_count: oh,
567 carbonyl_count: co,
568 has_ring: ring,
569 has_aromatic_ring: arom,
570 has_cc_double_bond: cc,
571 has_halogen: hal,
572 }
573 }
574
575 #[test]
576 fn acetone_subheading_291411() {
577 let f = feat(3, 0, 1, false, false, false, false);
578 let h = map_to_subheading(
579 &[FunctionalGroup::Ketone], &OrganicInorganic::Organic, &f,
580 );
581 assert_eq!(h.subheading.as_deref(), Some("291411"));
582 assert!(h.confidence >= 0.85);
583 }
584
585 #[test]
586 fn mek_subheading_291412() {
587 let f = feat(4, 0, 1, false, false, false, false);
588 let h = map_to_subheading(
589 &[FunctionalGroup::Ketone], &OrganicInorganic::Organic, &f,
590 );
591 assert_eq!(h.subheading.as_deref(), Some("291412"));
592 }
593
594 #[test]
595 fn cyclohexanone_subheading_291422() {
596 let f = feat(6, 0, 1, true, false, false, false);
597 let h = map_to_subheading(
598 &[FunctionalGroup::Ketone], &OrganicInorganic::Organic, &f,
599 );
600 assert_eq!(h.subheading.as_deref(), Some("291422"));
601 assert!(h.confidence >= 0.80);
602 }
603
604 #[test]
605 fn methanol_subheading_290511() {
606 let f = feat(1, 1, 0, false, false, false, false);
607 let h = map_to_subheading(
608 &[FunctionalGroup::Alcohol], &OrganicInorganic::Organic, &f,
609 );
610 assert_eq!(h.subheading.as_deref(), Some("290511"));
611 }
612
613 #[test]
614 fn ethanol_subheading_220710() {
615 let f = feat(2, 1, 0, false, false, false, false);
616 let h = map_to_subheading(
617 &[FunctionalGroup::Alcohol], &OrganicInorganic::Organic, &f,
618 );
619 assert_eq!(h.subheading.as_deref(), Some("220710"));
621 assert_eq!(h.chapter, 22);
622 }
623
624 #[test]
625 fn ethylene_glycol_subheading_290531() {
626 let f = feat(2, 2, 0, false, false, false, false);
627 let h = map_to_subheading(
628 &[FunctionalGroup::Alcohol], &OrganicInorganic::Organic, &f,
629 );
630 assert_eq!(h.subheading.as_deref(), Some("290531"));
631 }
632
633 #[test]
634 fn glycerol_subheading_290541() {
635 let f = feat(3, 3, 0, false, false, false, false);
636 let h = map_to_subheading(
637 &[FunctionalGroup::Alcohol], &OrganicInorganic::Organic, &f,
638 );
639 assert_eq!(h.subheading.as_deref(), Some("290541"));
640 }
641
642 #[test]
643 fn acetic_acid_subheading_291521() {
644 let f = feat(2, 1, 1, false, false, false, false);
645 let h = map_to_subheading(
646 &[FunctionalGroup::CarboxylicAcid], &OrganicInorganic::Organic, &f,
647 );
648 assert_eq!(h.subheading.as_deref(), Some("291521"));
649 }
650
651 #[test]
652 fn formic_acid_subheading_291511() {
653 let f = feat(1, 1, 1, false, false, false, false);
654 let h = map_to_subheading(
655 &[FunctionalGroup::CarboxylicAcid], &OrganicInorganic::Organic, &f,
656 );
657 assert_eq!(h.subheading.as_deref(), Some("291511"));
658 }
659
660 #[test]
661 fn acrylic_acid_subheading_291611() {
662 let f = feat(3, 1, 1, false, false, true, false);
663 let h = map_to_subheading(
664 &[FunctionalGroup::CarboxylicAcid], &OrganicInorganic::Organic, &f,
665 );
666 assert_eq!(h.subheading.as_deref(), Some("291611"));
667 assert_eq!(h.heading, Some(2916));
668 }
669
670 #[test]
671 fn methacrylic_acid_subheading_291613() {
672 let f = feat(4, 1, 1, false, false, true, false);
673 let h = map_to_subheading(
674 &[FunctionalGroup::CarboxylicAcid], &OrganicInorganic::Organic, &f,
675 );
676 assert_eq!(h.subheading.as_deref(), Some("291613"));
677 }
678
679 #[test]
680 fn benzoic_acid_subheading_291631() {
681 let f = feat(7, 1, 1, true, true, false, false);
682 let h = map_to_subheading(
683 &[FunctionalGroup::CarboxylicAcid], &OrganicInorganic::Organic, &f,
684 );
685 assert_eq!(h.subheading.as_deref(), Some("291631"));
686 }
687
688 #[test]
689 fn benzaldehyde_subheading_291211() {
690 let f = feat(7, 0, 1, true, true, false, false);
691 let h = map_to_subheading(
692 &[FunctionalGroup::Aldehyde], &OrganicInorganic::Organic, &f,
693 );
694 assert_eq!(h.subheading.as_deref(), Some("291211"));
695 }
696
697 #[test]
698 fn inorganic_subheading_falls_back_to_heading_only() {
699 let f = feat(0, 0, 0, false, false, false, false);
700 let h = map_to_subheading(&[], &OrganicInorganic::Inorganic, &f);
701 assert!(h.subheading.is_none());
702 assert_eq!(h.chapter, 28);
703 }
704}