Skip to main content

hs_predict/smiles/
chapter_map.rs

1//! Maps detected functional groups to HS chapter / heading hints.
2//!
3//! The mapping is intentionally approximate; predictions from this module
4//! carry confidence ≤ 0.70 and are tagged with
5//! [`PredictionSource::RuleEngine`](crate::types::PredictionSource::RuleEngine).
6//!
7//! # Priority order
8//! More specific functional groups take precedence (e.g. anhydride >
9//! carboxylic acid > alcohol). The first matching rule wins.
10//!
11//! # HS structure used
12//! - Chapter 28 — inorganic chemicals (when `organic_class` is `Inorganic`)
13//! - Chapter 29 — organic chemicals (sub-headings by functional group)
14//! - Chapter 38 — misc. chemical preparations (default organic fallback)
15
16use crate::smiles::detector::FunctionalGroup;
17use crate::types::OrganicInorganic;
18
19// ─────────────────────────────────────────────────────────────────────────────
20// HeadingHint
21// ─────────────────────────────────────────────────────────────────────────────
22
23/// HS chapter / heading hint derived from SMILES functional group analysis.
24#[derive(Debug, Clone, serde::Serialize)]
25pub struct HeadingHint {
26    /// HS chapter number (e.g. `28`, `29`).
27    pub chapter: u8,
28
29    /// Four-digit HS heading (e.g. `2912` for aldehydes).
30    /// `None` when only the chapter can be determined.
31    pub heading: Option<u16>,
32
33    /// Human-readable rationale for the hint.
34    pub rationale: &'static str,
35
36    /// Confidence in [0.0, 1.0].
37    /// Capped at 0.70 because SMILES pattern matching is approximate.
38    pub confidence: f32,
39}
40
41// ─────────────────────────────────────────────────────────────────────────────
42// Mapping table (priority-ordered)
43// ─────────────────────────────────────────────────────────────────────────────
44
45/// Priority-ordered mapping: (FunctionalGroup, chapter, heading, rationale, confidence).
46///
47/// The first entry whose group is present in the detected set wins.
48/// Groups higher in the list are more specific (e.g. anhydride before acid).
49static PRIORITY_MAP: &[(FunctionalGroup, u8, u16, &str, f32)] = &[
50    // ── High-specificity groups ─────────────────────────────────────────
51    (
52        FunctionalGroup::Anhydride,
53        29, 2915,
54        "Acid anhydride → HS 29.15–29.17 (acyclic/aromatic acid anhydrides); \
55         use 29.17 for aromatic anhydrides",
56        0.65,
57    ),
58    (
59        FunctionalGroup::Isocyanate,
60        29, 2929,
61        "Isocyanate / carbodiimide → HS 29.29",
62        0.70,
63    ),
64    (
65        FunctionalGroup::Epoxide,
66        29, 2910,
67        "Epoxide → HS 29.10",
68        0.70,
69    ),
70    (
71        FunctionalGroup::SulphonicAcid,
72        29, 2904,
73        "Organo-sulphonic acid → HS 29.04 (sulphonated derivatives)",
74        0.68,
75    ),
76    (
77        FunctionalGroup::Nitrile,
78        29, 2926,
79        "Nitrile → HS 29.26",
80        0.70,
81    ),
82    (
83        FunctionalGroup::Phosphate,
84        29, 2920,
85        "Organophosphate / phosphonate ester → HS 29.20",
86        0.62,
87    ),
88    // ── Carbonyl groups ─────────────────────────────────────────────────
89    (
90        FunctionalGroup::Amide,
91        29, 2924,
92        "Amide → HS 29.24 (amide-function compounds)",
93        0.67,
94    ),
95    (
96        FunctionalGroup::CarboxylicAcid,
97        29, 2915,
98        "Carboxylic acid → HS 29.15 (acyclic), 29.16 (cyclic), 29.17 (aromatic), \
99         or 29.18 (other with additional functions); heading depends on chain length / ring",
100        0.60,
101    ),
102    (
103        FunctionalGroup::Ester,
104        29, 2915,
105        "Ester → HS 29.15–29.17 (depends on parent acid type and chain length)",
106        0.55,
107    ),
108    (
109        FunctionalGroup::Aldehyde,
110        29, 2912,
111        "Aldehyde → HS 29.12",
112        0.67,
113    ),
114    (
115        FunctionalGroup::Ketone,
116        29, 2914,
117        "Ketone / quinone → HS 29.14",
118        0.67,
119    ),
120    // ── OH groups ───────────────────────────────────────────────────────
121    (
122        FunctionalGroup::Phenol,
123        29, 2907,
124        "Phenol → HS 29.07",
125        0.67,
126    ),
127    (
128        FunctionalGroup::Alcohol,
129        29, 2905,
130        "Alcohol → HS 29.05 (acyclic) or 29.06 (cyclic); \
131         polyols may fall under 29.05 subheading",
132        0.60,
133    ),
134    // ── Organo-sulphur ──────────────────────────────────────────────────
135    (
136        FunctionalGroup::Thiol,
137        29, 2930,
138        "Thiol (mercaptan) → HS 29.30 (organo-sulphur compounds)",
139        0.65,
140    ),
141    (
142        FunctionalGroup::Sulphide,
143        29, 2930,
144        "Thioether / sulphide → HS 29.30 (organo-sulphur compounds)",
145        0.65,
146    ),
147    // ── N-function, O-function, halide ────────────────────────────────
148    (
149        FunctionalGroup::Amine,
150        29, 2921,
151        "Amine → HS 29.21",
152        0.63,
153    ),
154    (
155        FunctionalGroup::Nitro,
156        29, 2904,
157        "Nitro / nitroso compound → HS 29.04",
158        0.60,
159    ),
160    (
161        FunctionalGroup::Ether,
162        29, 2909,
163        "Ether → HS 29.09",
164        0.63,
165    ),
166    (
167        FunctionalGroup::Halide,
168        29, 2903,
169        "Organohalide → HS 29.03",
170        0.65,
171    ),
172    // ── Aromatic (lowest organic priority) ──────────────────────────────
173    (
174        FunctionalGroup::AromaticRing,
175        29, 0,    // heading unknown — only chapter hint
176        "Aromatic compound → Chapter 29; heading depends on substituents",
177        0.40,
178    ),
179];
180
181// ─────────────────────────────────────────────────────────────────────────────
182// Public API
183// ─────────────────────────────────────────────────────────────────────────────
184
185/// Derive an HS chapter / heading hint from functional group analysis.
186///
187/// # Arguments
188/// - `groups` — functional groups detected by [`detect_functional_groups`](crate::smiles::detector::detect_functional_groups).
189/// - `organic_class` — result of [`classify_organic`](crate::smiles::detector::classify_organic).
190///
191/// # Returns
192/// The first matching entry in the priority table, or a Chapter-28/29 generic
193/// fallback if no specific match is found.
194pub fn map_to_heading(
195    groups: &[FunctionalGroup],
196    organic_class: &OrganicInorganic,
197) -> HeadingHint {
198    // ── Inorganic branch ─────────────────────────────────────────────────
199    if matches!(organic_class, OrganicInorganic::Inorganic) {
200        return HeadingHint {
201            chapter: 28,
202            heading: None,
203            rationale: "Inorganic compound → Chapter 28; \
204                        heading depends on element / salt type",
205            confidence: 0.55,
206        };
207    }
208
209    // ── Organometallic branch ────────────────────────────────────────────
210    if matches!(organic_class, OrganicInorganic::Organometallic) {
211        return HeadingHint {
212            chapter: 29,
213            heading: Some(2931),
214            rationale: "Organometallic compound → HS 29.31",
215            confidence: 0.62,
216        };
217    }
218
219    // ── Organic: use priority table ──────────────────────────────────────
220    for &(group, chapter, heading_code, rationale, confidence) in PRIORITY_MAP {
221        if groups.contains(&group) {
222            let heading = if heading_code == 0 { None } else { Some(heading_code) };
223            return HeadingHint { chapter, heading, rationale, confidence };
224        }
225    }
226
227    // ── Fallback: generic organic (Chapter 38 or unclassified Ch.29) ─────
228    HeadingHint {
229        chapter: 29,
230        heading: None,
231        rationale: "Organic compound with no detected functional groups → \
232                    Chapter 29 (unsubstituted hydrocarbon) or Chapter 38",
233        confidence: 0.35,
234    }
235}
236
237// ─────────────────────────────────────────────────────────────────────────────
238// Tests
239// ─────────────────────────────────────────────────────────────────────────────
240
241#[cfg(test)]
242mod tests {
243    use super::*;
244
245    fn hint(groups: &[FunctionalGroup]) -> HeadingHint {
246        map_to_heading(groups, &OrganicInorganic::Organic)
247    }
248
249    #[test]
250    fn inorganic_gives_ch28() {
251        let h = map_to_heading(&[], &OrganicInorganic::Inorganic);
252        assert_eq!(h.chapter, 28);
253        assert!(h.heading.is_none());
254    }
255
256    #[test]
257    fn organometallic_gives_2931() {
258        let h = map_to_heading(&[], &OrganicInorganic::Organometallic);
259        assert_eq!(h.heading, Some(2931));
260    }
261
262    #[test]
263    fn anhydride_wins_over_acid() {
264        let h = hint(&[FunctionalGroup::Anhydride, FunctionalGroup::CarboxylicAcid]);
265        // Anhydride is higher priority → heading 2915, not a different one
266        assert_eq!(h.heading, Some(2915));
267        assert!(h.rationale.to_lowercase().contains("anhydride"));
268    }
269
270    #[test]
271    fn aldehyde_maps_to_2912() {
272        let h = hint(&[FunctionalGroup::Aldehyde]);
273        assert_eq!(h.heading, Some(2912));
274    }
275
276    #[test]
277    fn ketone_maps_to_2914() {
278        let h = hint(&[FunctionalGroup::Ketone]);
279        assert_eq!(h.heading, Some(2914));
280    }
281
282    #[test]
283    fn alcohol_maps_to_2905() {
284        let h = hint(&[FunctionalGroup::Alcohol]);
285        assert_eq!(h.heading, Some(2905));
286    }
287
288    #[test]
289    fn nitrile_maps_to_2926() {
290        let h = hint(&[FunctionalGroup::Nitrile]);
291        assert_eq!(h.heading, Some(2926));
292    }
293
294    #[test]
295    fn amine_maps_to_2921() {
296        let h = hint(&[FunctionalGroup::Amine]);
297        assert_eq!(h.heading, Some(2921));
298    }
299
300    #[test]
301    fn halide_maps_to_2903() {
302        let h = hint(&[FunctionalGroup::Halide]);
303        assert_eq!(h.heading, Some(2903));
304    }
305
306    #[test]
307    fn no_groups_gives_low_confidence() {
308        let h = hint(&[]);
309        assert!(h.confidence < 0.50);
310    }
311
312    #[test]
313    fn isocyanate_maps_to_2929() {
314        let h = hint(&[FunctionalGroup::Isocyanate]);
315        assert_eq!(h.heading, Some(2929));
316    }
317
318    #[test]
319    fn epoxide_maps_to_2910() {
320        let h = hint(&[FunctionalGroup::Epoxide]);
321        assert_eq!(h.heading, Some(2910));
322    }
323}