Skip to main content

hs_predict/smiles/
chapter_map.rs

1//! Maps detected functional groups to HS chapter / heading hints.
2//!
3//! The mapping is intentionally approximate; predictions from this module
4//! carry confidence ≤ 0.70 and are tagged with
5//! [`PredictionSource::RuleEngine`](crate::types::PredictionSource::RuleEngine).
6//!
7//! # Priority order
8//! More specific functional groups take precedence (e.g. anhydride >
9//! carboxylic acid > alcohol). The first matching rule wins.
10//!
11//! # HS structure used
12//! - Chapter 28 — inorganic chemicals (when `organic_class` is `Inorganic`)
13//! - Chapter 29 — organic chemicals (sub-headings by functional group)
14//! - Chapter 38 — misc. chemical preparations (default organic fallback)
15
16use crate::smiles::detector::{FunctionalGroup, StructuralFeatures};
17use crate::types::OrganicInorganic;
18
19// ─────────────────────────────────────────────────────────────────────────────
20// HeadingHint
21// ─────────────────────────────────────────────────────────────────────────────
22
23/// HS chapter / heading hint derived from SMILES functional group analysis.
24#[derive(Debug, Clone, serde::Serialize)]
25pub struct HeadingHint {
26    /// HS chapter number (e.g. `28`, `29`).
27    pub chapter: u8,
28
29    /// Four-digit HS heading (e.g. `2914` for ketones).
30    /// `None` when only the chapter can be determined.
31    pub heading: Option<u16>,
32
33    /// Six-digit HS subheading when structural features allow it
34    /// (e.g. `"291411"` for acetone).  `None` when only the 4-digit
35    /// heading can be determined.
36    pub subheading: Option<String>,
37
38    /// Human-readable rationale for the hint.
39    pub rationale: &'static str,
40
41    /// Confidence in [0.0, 1.0].
42    /// Capped at 0.70 for heading-only results; up to 0.90 when a
43    /// specific 6-digit subheading is identified.
44    pub confidence: f32,
45}
46
47// ─────────────────────────────────────────────────────────────────────────────
48// Mapping table (priority-ordered)
49// ─────────────────────────────────────────────────────────────────────────────
50
51/// Priority-ordered mapping: (FunctionalGroup, chapter, heading, rationale, confidence).
52///
53/// The first entry whose group is present in the detected set wins.
54/// Groups higher in the list are more specific (e.g. anhydride before acid).
55static PRIORITY_MAP: &[(FunctionalGroup, u8, u16, &str, f32)] = &[
56    // ── High-specificity groups ─────────────────────────────────────────
57    (
58        FunctionalGroup::Anhydride,
59        29, 2915,
60        "Acid anhydride → HS 29.15–29.17 (acyclic/aromatic acid anhydrides); \
61         use 29.17 for aromatic anhydrides",
62        0.65,
63    ),
64    (
65        FunctionalGroup::Isocyanate,
66        29, 2929,
67        "Isocyanate / carbodiimide → HS 29.29",
68        0.70,
69    ),
70    (
71        FunctionalGroup::Epoxide,
72        29, 2910,
73        "Epoxide → HS 29.10",
74        0.70,
75    ),
76    (
77        FunctionalGroup::SulphonicAcid,
78        29, 2904,
79        "Organo-sulphonic acid → HS 29.04 (sulphonated derivatives)",
80        0.68,
81    ),
82    (
83        FunctionalGroup::Nitrile,
84        29, 2926,
85        "Nitrile → HS 29.26",
86        0.70,
87    ),
88    (
89        FunctionalGroup::Phosphate,
90        29, 2920,
91        "Organophosphate / phosphonate ester → HS 29.20",
92        0.62,
93    ),
94    // ── Carbonyl groups ─────────────────────────────────────────────────
95    (
96        FunctionalGroup::Amide,
97        29, 2924,
98        "Amide → HS 29.24 (amide-function compounds)",
99        0.67,
100    ),
101    (
102        FunctionalGroup::CarboxylicAcid,
103        29, 2915,
104        "Carboxylic acid → HS 29.15 (acyclic), 29.16 (cyclic), 29.17 (aromatic), \
105         or 29.18 (other with additional functions); heading depends on chain length / ring",
106        0.60,
107    ),
108    (
109        FunctionalGroup::Ester,
110        29, 2915,
111        "Ester → HS 29.15–29.17 (depends on parent acid type and chain length)",
112        0.55,
113    ),
114    (
115        FunctionalGroup::Aldehyde,
116        29, 2912,
117        "Aldehyde → HS 29.12",
118        0.67,
119    ),
120    (
121        FunctionalGroup::Ketone,
122        29, 2914,
123        "Ketone / quinone → HS 29.14",
124        0.67,
125    ),
126    // ── OH groups ───────────────────────────────────────────────────────
127    (
128        FunctionalGroup::Phenol,
129        29, 2907,
130        "Phenol → HS 29.07",
131        0.67,
132    ),
133    (
134        FunctionalGroup::Alcohol,
135        29, 2905,
136        "Alcohol → HS 29.05 (acyclic) or 29.06 (cyclic); \
137         polyols may fall under 29.05 subheading",
138        0.60,
139    ),
140    // ── Organo-sulphur ──────────────────────────────────────────────────
141    (
142        FunctionalGroup::Thiol,
143        29, 2930,
144        "Thiol (mercaptan) → HS 29.30 (organo-sulphur compounds)",
145        0.65,
146    ),
147    (
148        FunctionalGroup::Sulphide,
149        29, 2930,
150        "Thioether / sulphide → HS 29.30 (organo-sulphur compounds)",
151        0.65,
152    ),
153    // ── N-function, O-function, halide ────────────────────────────────
154    (
155        FunctionalGroup::Amine,
156        29, 2921,
157        "Amine → HS 29.21",
158        0.63,
159    ),
160    (
161        FunctionalGroup::Nitro,
162        29, 2904,
163        "Nitro / nitroso compound → HS 29.04",
164        0.60,
165    ),
166    (
167        FunctionalGroup::Ether,
168        29, 2909,
169        "Ether → HS 29.09",
170        0.63,
171    ),
172    (
173        FunctionalGroup::Halide,
174        29, 2903,
175        "Organohalide → HS 29.03",
176        0.65,
177    ),
178    // ── Aromatic (lowest organic priority) ──────────────────────────────
179    (
180        FunctionalGroup::AromaticRing,
181        29, 0,    // heading unknown — only chapter hint
182        "Aromatic compound → Chapter 29; heading depends on substituents",
183        0.40,
184    ),
185];
186
187// ─────────────────────────────────────────────────────────────────────────────
188// Public API
189// ─────────────────────────────────────────────────────────────────────────────
190
191/// Derive an HS chapter / heading hint from functional group analysis.
192///
193/// # Arguments
194/// - `groups` — functional groups detected by [`detect_functional_groups`](crate::smiles::detector::detect_functional_groups).
195/// - `organic_class` — result of [`classify_organic`](crate::smiles::detector::classify_organic).
196///
197/// # Returns
198/// The first matching entry in the priority table, or a Chapter-28/29 generic
199/// fallback if no specific match is found.
200pub fn map_to_heading(
201    groups: &[FunctionalGroup],
202    organic_class: &OrganicInorganic,
203) -> HeadingHint {
204    // ── Inorganic branch ─────────────────────────────────────────────────
205    if matches!(organic_class, OrganicInorganic::Inorganic) {
206        return HeadingHint {
207            chapter: 28,
208            heading: None,
209            subheading: None,
210            rationale: "Inorganic compound → Chapter 28; \
211                        heading depends on element / salt type",
212            confidence: 0.55,
213        };
214    }
215
216    // ── Organometallic branch ────────────────────────────────────────────
217    if matches!(organic_class, OrganicInorganic::Organometallic) {
218        return HeadingHint {
219            chapter: 29,
220            heading: Some(2931),
221            subheading: None,
222            rationale: "Organometallic compound → HS 29.31",
223            confidence: 0.62,
224        };
225    }
226
227    // ── Organic: use priority table ──────────────────────────────────────
228    for &(group, chapter, heading_code, rationale, confidence) in PRIORITY_MAP {
229        if groups.contains(&group) {
230            let heading = if heading_code == 0 { None } else { Some(heading_code) };
231            return HeadingHint { chapter, heading, subheading: None, rationale, confidence };
232        }
233    }
234
235    // ── Fallback: generic organic (Chapter 38 or unclassified Ch.29) ─────
236    HeadingHint {
237        chapter: 29,
238        heading: None,
239        subheading: None,
240        rationale: "Organic compound with no detected functional groups → \
241                    Chapter 29 (unsubstituted hydrocarbon) or Chapter 38",
242        confidence: 0.35,
243    }
244}
245
246/// Derive a 6-digit HS subheading when structural features permit.
247///
248/// Combines functional-group detection with atom counts and ring/bond
249/// information.  Covers the three most common Chapter 29 groups in
250/// chemical trade: **ketones** (29.14), **alcohols** (29.05 / 22.07),
251/// and **carboxylic acids** (29.15 / 29.16).
252///
253/// When no 6-digit code can be determined, falls back to
254/// [`map_to_heading`] (returns `subheading: None`).
255pub fn map_to_subheading(
256    groups: &[FunctionalGroup],
257    organic_class: &OrganicInorganic,
258    feat: &StructuralFeatures,
259) -> HeadingHint {
260    // Only applies to pure organic compounds.
261    if !matches!(organic_class, OrganicInorganic::Organic) {
262        return map_to_heading(groups, organic_class);
263    }
264
265    // ── v0.5.2: pure hydrocarbons and simple chloroalkanes ───────────────
266    if feat.is_pure_hydrocarbon {
267        return subheading_hydrocarbon(feat);
268    }
269    if feat.is_chloro_hydrocarbon && !feat.has_ring && !feat.has_cc_double_bond {
270        return subheading_chloroalkane(feat);
271    }
272
273    // ── Ketones (HS 29.14) ────────────────────────────────────────────────
274    if groups.contains(&FunctionalGroup::Ketone) {
275        return subheading_ketone(feat);
276    }
277
278    // ── Alcohols (HS 29.05 / 22.07) ──────────────────────────────────────
279    if groups.contains(&FunctionalGroup::Alcohol) {
280        return subheading_alcohol(feat);
281    }
282
283    // ── Carboxylic acids (HS 29.15 / 29.16) ──────────────────────────────
284    if groups.contains(&FunctionalGroup::CarboxylicAcid) {
285        return subheading_acid(feat);
286    }
287
288    // ── Aldehydes (HS 29.12) ──────────────────────────────────────────────
289    if groups.contains(&FunctionalGroup::Aldehyde) {
290        return subheading_aldehyde(feat);
291    }
292
293    // No specific subheading logic — fall back to heading-only.
294    map_to_heading(groups, organic_class)
295}
296
297// ─────────────────────────────────────────────────────────────────────────────
298// Subheading decision trees
299// ─────────────────────────────────────────────────────────────────────────────
300
301/// HS 29.14 — ketones and quinones.
302fn subheading_ketone(f: &StructuralFeatures) -> HeadingHint {
303    let (code, rationale, conf) = if f.has_aromatic_ring {
304        // Aromatic ketones
305        if f.carbon_count == 8 && f.carbonyl_count == 1 {
306            ("291431", "Phenyl methyl ketone (acetophenone) → HS 29.14.31", 0.82_f32)
307        } else {
308            ("291439", "Other aromatic ketone → HS 29.14.39", 0.65)
309        }
310    } else if f.has_ring {
311        // Cycloaliphatic ketones
312        match f.carbon_count {
313            10 => ("291421", "Camphor (cyclic C10 ketone) → HS 29.14.21", 0.78),
314            6  => ("291422", "Cyclohexanone → HS 29.14.22", 0.85),
315            7  => ("291423", "Methylcyclohexanone → HS 29.14.23", 0.78),
316            _  => ("291429", "Other cycloaliphatic/cycloterpenic ketone → HS 29.14.29", 0.65),
317        }
318    } else {
319        // Acyclic ketones
320        if f.has_halogen {
321            ("291479", "Halogenated ketone derivative → HS 29.14.79", 0.68)
322        } else if f.hydroxyl_count > 0 {
323            ("291440", "Ketone-alcohol or ketone-aldehyde → HS 29.14.40", 0.70)
324        } else {
325            match f.carbon_count {
326                3 => ("291411", "Acetone (3C acyclic ketone) → HS 29.14.11", 0.87),
327                4 => ("291412", "Butanone / MEK (4C acyclic ketone) → HS 29.14.12", 0.83),
328                6 => ("291413",
329                      "4-Methylpentan-2-one / MIBK candidate (6C acyclic ketone) → HS 29.14.13; \
330                       verify branching pattern",
331                      0.72),
332                _ => ("291419", "Other acyclic ketone without other O → HS 29.14.19", 0.68),
333            }
334        }
335    };
336    HeadingHint {
337        chapter: 29,
338        heading: Some(2914),
339        subheading: Some(code.to_string()),
340        rationale,
341        confidence: conf,
342    }
343}
344
345/// HS 29.05 — acyclic alcohols; 22.07 — ethanol special case.
346fn subheading_alcohol(f: &StructuralFeatures) -> HeadingHint {
347    let oh = f.hydroxyl_count.max(1); // guard zero-count edge case
348
349    let (code, chapter, heading, rationale, conf): (&str, u8, u16, &'static str, f32) =
350        if oh >= 3 {
351            match f.carbon_count {
352                3 => ("290541", 29, 2905, "Glycerol (3C triol) → HS 29.05.41", 0.90),
353                _ => ("290549", 29, 2905, "Other polyol → HS 29.05.49", 0.65),
354            }
355        } else if oh == 2 {
356            match f.carbon_count {
357                2 => ("290531", 29, 2905, "Ethylene glycol (2C diol) → HS 29.05.31", 0.88),
358                3 => ("290532", 29, 2905, "Propylene glycol (3C diol) → HS 29.05.32", 0.85),
359                _ => ("290539", 29, 2905, "Other diol → HS 29.05.39", 0.68),
360            }
361        } else {
362            // Monohydric alcohol
363            if f.has_cc_double_bond {
364                ("290529", 29, 2905,
365                 "Unsaturated monohydric acyclic alcohol → HS 29.05.29", 0.65)
366            } else {
367                match f.carbon_count {
368                    1 => ("290511", 29, 2905,
369                          "Methanol (1C) → HS 29.05.11", 0.90),
370                    2 => ("220710", 22, 2207,
371                          "Ethanol (2C) → HS 22.07.10 (undenatured ethyl alcohol ≥ 80 %); \
372                           verify concentration — denatured → 22.07.20, dilute → 22.08",
373                          0.85),
374                    3 => ("290512", 29, 2905,
375                          "Propan-1-ol (3C saturated monohydric) → HS 29.05.12", 0.82),
376                    4 => ("290513", 29, 2905,
377                          "Butan-1-ol (4C primary alcohol) → HS 29.05.13; \
378                           other butanols → 29.05.14",
379                          0.75),
380                    8 => ("290516", 29, 2905,
381                          "Octanol and isomers → HS 29.05.16", 0.78),
382                    12 | 16 | 18 => ("290517", 29, 2905,
383                                     "Dodecan-1-ol / hexadecan-1-ol / octadecan-1-ol \
384                                      → HS 29.05.17",
385                                     0.75),
386                    _ => ("290519", 29, 2905,
387                          "Other saturated monohydric acyclic alcohol → HS 29.05.19", 0.65),
388                }
389            }
390        };
391
392    HeadingHint {
393        chapter,
394        heading: Some(heading),
395        subheading: Some(code.to_string()),
396        rationale,
397        confidence: conf,
398    }
399}
400
401/// HS 29.15 (saturated aliphatic) / 29.16 (unsaturated or aromatic) acids.
402fn subheading_acid(f: &StructuralFeatures) -> HeadingHint {
403    let (code, heading, rationale, conf): (&str, u16, &'static str, f32) =
404        if f.has_aromatic_ring {
405            match f.carbon_count {
406                7 => ("291631", 2916,
407                      "Benzoic acid (7C aromatic acid) → HS 29.16.31", 0.85),
408                8 => ("291634", 2916,
409                      "Phenylacetic acid (8C aromatic acid) → HS 29.16.34", 0.78),
410                _ => ("291639", 2916,
411                      "Other aromatic monocarboxylic acid → HS 29.16.39", 0.65),
412            }
413        } else if f.has_cc_double_bond {
414            // Unsaturated aliphatic → heading 2916
415            match f.carbon_count {
416                3 => ("291611", 2916,
417                      "Acrylic acid (3C unsaturated) → HS 29.16.11", 0.87),
418                4 => ("291613", 2916,
419                      "Methacrylic acid (4C unsaturated, branch C=C) → HS 29.16.13; \
420                       esters → 29.16.14",
421                      0.82),
422                _ => ("291619", 2916,
423                      "Other unsaturated aliphatic monocarboxylic acid → HS 29.16.19", 0.65),
424            }
425        } else {
426            // Saturated aliphatic → heading 2915
427            // hydroxyl_count includes the acid –OH; extra OH means hydroxy-acid (2918)
428            let extra_oh = f.hydroxyl_count.saturating_sub(1);
429            if extra_oh >= 1 {
430                ("291819", 2918,
431                 "Carboxylic acid with additional oxygen function → HS 29.18.19", 0.65)
432            } else {
433                match f.carbon_count {
434                    1  => ("291511", 2915, "Formic acid (1C) → HS 29.15.11", 0.90),
435                    2  => ("291521", 2915, "Acetic acid (2C) → HS 29.15.21", 0.90),
436                    3  => ("291550", 2915, "Propionic acid (3C) → HS 29.15.50", 0.87),
437                    4  => ("291560", 2915,
438                           "Butanoic / butyric acid (4C) → HS 29.15.60", 0.83),
439                    16 | 18 => ("291570", 2915,
440                                "Palmitic / stearic acid (C16/C18) → HS 29.15.70", 0.80),
441                    _  => ("291590", 2915,
442                           "Other saturated acyclic monocarboxylic acid → HS 29.15.90", 0.65),
443                }
444            }
445        };
446
447    HeadingHint {
448        chapter: 29,
449        heading: Some(heading),
450        subheading: Some(code.to_string()),
451        rationale,
452        confidence: conf,
453    }
454}
455
456/// HS 29.12 — aldehydes.
457fn subheading_aldehyde(f: &StructuralFeatures) -> HeadingHint {
458    let (code, rationale, conf): (&str, &'static str, f32) = if f.has_aromatic_ring {
459        match f.carbon_count {
460            7 => ("291211", "Benzaldehyde (7C aromatic aldehyde) → HS 29.12.11", 0.85),
461            _ => ("291219", "Other aromatic aldehyde → HS 29.12.19", 0.65),
462        }
463    } else {
464        match f.carbon_count {
465            1 => ("291211", "Formaldehyde → HS 29.12.11", 0.82),
466            2 => ("291212", "Acetaldehyde (2C) → HS 29.12.12", 0.85),
467            3 => ("291219", "Propanal / acrolein candidate (3C) → HS 29.12.19", 0.72),
468            _ => ("291219", "Other aliphatic aldehyde → HS 29.12.19", 0.65),
469        }
470    };
471    HeadingHint {
472        chapter: 29,
473        heading: Some(2912),
474        subheading: Some(code.to_string()),
475        rationale,
476        confidence: conf,
477    }
478}
479
480/// Resolve a pure hydrocarbon (no heteroatoms) to a 6-digit HS subheading.
481/// HS 2901 = acyclic hydrocarbons; HS 2902 = cyclic hydrocarbons.
482fn subheading_hydrocarbon(f: &StructuralFeatures) -> HeadingHint {
483    if f.has_aromatic_ring {
484        // HS 2902 — cyclic aromatic hydrocarbons
485        let (sub, rationale, conf) = match (f.carbon_count, f.cc_double_bond_count) {
486            (6, _) => ("290220", "benzene", 0.85_f32),
487            (7, _) => ("290230", "toluene", 0.82_f32),
488            (8, n) if n >= 1 => ("290250", "styrene", 0.82_f32),
489            (8, _) => ("290244", "xylene (isomer undetermined)", 0.65_f32),
490            (9, _) => ("290270", "cumene (isopropylbenzene)", 0.75_f32),
491            _      => ("290290", "other aromatic hydrocarbon", 0.60_f32),
492        };
493        return HeadingHint {
494            chapter: 29,
495            heading: Some(2902),
496            subheading: Some(sub.to_string()),
497            rationale,
498            confidence: conf,
499        };
500    }
501
502    if f.has_ring {
503        // HS 2902 — cyclic non-aromatic (cycloalkanes/cycloalkenes)
504        let (sub, rationale, conf) = match f.carbon_count {
505            6 => ("290211", "cyclohexane", 0.85_f32),
506            _ => ("290219", "other cycloalkane", 0.65_f32),
507        };
508        return HeadingHint {
509            chapter: 29,
510            heading: Some(2902),
511            subheading: Some(sub.to_string()),
512            rationale,
513            confidence: conf,
514        };
515    }
516
517    // HS 2901 — acyclic hydrocarbons
518    if f.has_triple_bond {
519        return HeadingHint {
520            chapter: 29,
521            heading: Some(2901),
522            subheading: Some("290129".to_string()),
523            rationale: "alkyne (acyclic unsaturated)",
524            confidence: 0.65,
525        };
526    }
527
528    let (sub, rationale, conf) = match (f.cc_double_bond_count, f.carbon_count) {
529        (0, _) => ("290110", "saturated acyclic hydrocarbon (alkane)", 0.72_f32),
530        (1, 2) => ("290121", "ethylene", 0.90_f32),
531        (1, 3) => ("290122", "propylene", 0.88_f32),
532        (1, 4) => ("290123", "butylene", 0.85_f32),
533        (1, _) => ("290129", "other unsaturated acyclic hydrocarbon", 0.70_f32),
534        (n, 4) if n >= 2 => ("290124", "buta-1,3-diene", 0.88_f32),
535        (n, 5) if n >= 2 => ("290124", "isoprene (2-methylbuta-1,3-diene)", 0.87_f32),
536        _      => ("290129", "other diene or polyene", 0.68_f32),
537    };
538    HeadingHint {
539        chapter: 29,
540        heading: Some(2901),
541        subheading: Some(sub.to_string()),
542        rationale,
543        confidence: conf,
544    }
545}
546
547/// Resolve a simple chlorinated hydrocarbon (C + H + Cl only, saturated,
548/// acyclic) to a 6-digit HS subheading within 2903.
549fn subheading_chloroalkane(f: &StructuralFeatures) -> HeadingHint {
550    let (sub, rationale, conf) = match (f.carbon_count, f.chlorine_count) {
551        (1, 1) => ("290311", "chloromethane (methyl chloride)", 0.85_f32),
552        (1, 2) => ("290312", "dichloromethane (methylene chloride)", 0.90_f32),
553        (1, 3) => ("290313", "chloroform (trichloromethane)", 0.90_f32),
554        (1, 4) => ("290314", "carbon tetrachloride", 0.90_f32),
555        (2, 1) => ("290311", "chloroethane (ethyl chloride)", 0.82_f32),
556        // 1,2-DCE vs 1,1-DCE cannot be distinguished by substring matching
557        (2, 2) => ("290315", "ethylene dichloride (1,2-DCE, most likely isomer)", 0.72_f32),
558        _      => ("290319", "other chlorinated hydrocarbon", 0.60_f32),
559    };
560    HeadingHint {
561        chapter: 29,
562        heading: Some(2903),
563        subheading: Some(sub.to_string()),
564        rationale,
565        confidence: conf,
566    }
567}
568
569// ─────────────────────────────────────────────────────────────────────────────
570// Tests
571// ─────────────────────────────────────────────────────────────────────────────
572
573#[cfg(test)]
574mod tests {
575    use super::*;
576    use crate::smiles::classify_smiles;
577
578    fn hint(groups: &[FunctionalGroup]) -> HeadingHint {
579        map_to_heading(groups, &OrganicInorganic::Organic)
580    }
581
582    #[test]
583    fn inorganic_gives_ch28() {
584        let h = map_to_heading(&[], &OrganicInorganic::Inorganic);
585        assert_eq!(h.chapter, 28);
586        assert!(h.heading.is_none());
587    }
588
589    #[test]
590    fn organometallic_gives_2931() {
591        let h = map_to_heading(&[], &OrganicInorganic::Organometallic);
592        assert_eq!(h.heading, Some(2931));
593    }
594
595    #[test]
596    fn anhydride_wins_over_acid() {
597        let h = hint(&[FunctionalGroup::Anhydride, FunctionalGroup::CarboxylicAcid]);
598        // Anhydride is higher priority → heading 2915, not a different one
599        assert_eq!(h.heading, Some(2915));
600        assert!(h.rationale.to_lowercase().contains("anhydride"));
601    }
602
603    #[test]
604    fn aldehyde_maps_to_2912() {
605        let h = hint(&[FunctionalGroup::Aldehyde]);
606        assert_eq!(h.heading, Some(2912));
607    }
608
609    #[test]
610    fn ketone_maps_to_2914() {
611        let h = hint(&[FunctionalGroup::Ketone]);
612        assert_eq!(h.heading, Some(2914));
613    }
614
615    #[test]
616    fn alcohol_maps_to_2905() {
617        let h = hint(&[FunctionalGroup::Alcohol]);
618        assert_eq!(h.heading, Some(2905));
619    }
620
621    #[test]
622    fn nitrile_maps_to_2926() {
623        let h = hint(&[FunctionalGroup::Nitrile]);
624        assert_eq!(h.heading, Some(2926));
625    }
626
627    #[test]
628    fn amine_maps_to_2921() {
629        let h = hint(&[FunctionalGroup::Amine]);
630        assert_eq!(h.heading, Some(2921));
631    }
632
633    #[test]
634    fn halide_maps_to_2903() {
635        let h = hint(&[FunctionalGroup::Halide]);
636        assert_eq!(h.heading, Some(2903));
637    }
638
639    #[test]
640    fn no_groups_gives_low_confidence() {
641        let h = hint(&[]);
642        assert!(h.confidence < 0.50);
643    }
644
645    #[test]
646    fn isocyanate_maps_to_2929() {
647        let h = hint(&[FunctionalGroup::Isocyanate]);
648        assert_eq!(h.heading, Some(2929));
649    }
650
651    #[test]
652    fn epoxide_maps_to_2910() {
653        let h = hint(&[FunctionalGroup::Epoxide]);
654        assert_eq!(h.heading, Some(2910));
655    }
656
657    // ── map_to_subheading ─────────────────────────────────────────────────
658
659    fn feat(carbon: u32, oh: u32, co: u32, ring: bool, arom: bool, cc: bool, hal: bool)
660        -> StructuralFeatures
661    {
662        StructuralFeatures {
663            carbon_count: carbon,
664            hydroxyl_count: oh,
665            carbonyl_count: co,
666            has_ring: ring,
667            has_aromatic_ring: arom,
668            has_cc_double_bond: cc,
669            has_halogen: hal,
670            cc_double_bond_count: 0,
671            has_triple_bond: false,
672            chlorine_count: 0,
673            is_pure_hydrocarbon: false,
674            is_chloro_hydrocarbon: false,
675        }
676    }
677
678    #[test]
679    fn acetone_subheading_291411() {
680        let f = feat(3, 0, 1, false, false, false, false);
681        let h = map_to_subheading(
682            &[FunctionalGroup::Ketone], &OrganicInorganic::Organic, &f,
683        );
684        assert_eq!(h.subheading.as_deref(), Some("291411"));
685        assert!(h.confidence >= 0.85);
686    }
687
688    #[test]
689    fn mek_subheading_291412() {
690        let f = feat(4, 0, 1, false, false, false, false);
691        let h = map_to_subheading(
692            &[FunctionalGroup::Ketone], &OrganicInorganic::Organic, &f,
693        );
694        assert_eq!(h.subheading.as_deref(), Some("291412"));
695    }
696
697    #[test]
698    fn cyclohexanone_subheading_291422() {
699        let f = feat(6, 0, 1, true, false, false, false);
700        let h = map_to_subheading(
701            &[FunctionalGroup::Ketone], &OrganicInorganic::Organic, &f,
702        );
703        assert_eq!(h.subheading.as_deref(), Some("291422"));
704        assert!(h.confidence >= 0.80);
705    }
706
707    #[test]
708    fn methanol_subheading_290511() {
709        let f = feat(1, 1, 0, false, false, false, false);
710        let h = map_to_subheading(
711            &[FunctionalGroup::Alcohol], &OrganicInorganic::Organic, &f,
712        );
713        assert_eq!(h.subheading.as_deref(), Some("290511"));
714    }
715
716    #[test]
717    fn ethanol_subheading_220710() {
718        let f = feat(2, 1, 0, false, false, false, false);
719        let h = map_to_subheading(
720            &[FunctionalGroup::Alcohol], &OrganicInorganic::Organic, &f,
721        );
722        // Ethanol goes to Ch. 22, not Ch. 29
723        assert_eq!(h.subheading.as_deref(), Some("220710"));
724        assert_eq!(h.chapter, 22);
725    }
726
727    #[test]
728    fn ethylene_glycol_subheading_290531() {
729        let f = feat(2, 2, 0, false, false, false, false);
730        let h = map_to_subheading(
731            &[FunctionalGroup::Alcohol], &OrganicInorganic::Organic, &f,
732        );
733        assert_eq!(h.subheading.as_deref(), Some("290531"));
734    }
735
736    #[test]
737    fn glycerol_subheading_290541() {
738        let f = feat(3, 3, 0, false, false, false, false);
739        let h = map_to_subheading(
740            &[FunctionalGroup::Alcohol], &OrganicInorganic::Organic, &f,
741        );
742        assert_eq!(h.subheading.as_deref(), Some("290541"));
743    }
744
745    #[test]
746    fn acetic_acid_subheading_291521() {
747        let f = feat(2, 1, 1, false, false, false, false);
748        let h = map_to_subheading(
749            &[FunctionalGroup::CarboxylicAcid], &OrganicInorganic::Organic, &f,
750        );
751        assert_eq!(h.subheading.as_deref(), Some("291521"));
752    }
753
754    #[test]
755    fn formic_acid_subheading_291511() {
756        let f = feat(1, 1, 1, false, false, false, false);
757        let h = map_to_subheading(
758            &[FunctionalGroup::CarboxylicAcid], &OrganicInorganic::Organic, &f,
759        );
760        assert_eq!(h.subheading.as_deref(), Some("291511"));
761    }
762
763    #[test]
764    fn acrylic_acid_subheading_291611() {
765        let f = feat(3, 1, 1, false, false, true, false);
766        let h = map_to_subheading(
767            &[FunctionalGroup::CarboxylicAcid], &OrganicInorganic::Organic, &f,
768        );
769        assert_eq!(h.subheading.as_deref(), Some("291611"));
770        assert_eq!(h.heading, Some(2916));
771    }
772
773    #[test]
774    fn methacrylic_acid_subheading_291613() {
775        let f = feat(4, 1, 1, false, false, true, false);
776        let h = map_to_subheading(
777            &[FunctionalGroup::CarboxylicAcid], &OrganicInorganic::Organic, &f,
778        );
779        assert_eq!(h.subheading.as_deref(), Some("291613"));
780    }
781
782    #[test]
783    fn benzoic_acid_subheading_291631() {
784        let f = feat(7, 1, 1, true, true, false, false);
785        let h = map_to_subheading(
786            &[FunctionalGroup::CarboxylicAcid], &OrganicInorganic::Organic, &f,
787        );
788        assert_eq!(h.subheading.as_deref(), Some("291631"));
789    }
790
791    #[test]
792    fn benzaldehyde_subheading_291211() {
793        let f = feat(7, 0, 1, true, true, false, false);
794        let h = map_to_subheading(
795            &[FunctionalGroup::Aldehyde], &OrganicInorganic::Organic, &f,
796        );
797        assert_eq!(h.subheading.as_deref(), Some("291211"));
798    }
799
800    #[test]
801    fn inorganic_subheading_falls_back_to_heading_only() {
802        let f = feat(0, 0, 0, false, false, false, false);
803        let h = map_to_subheading(&[], &OrganicInorganic::Inorganic, &f);
804        assert!(h.subheading.is_none());
805        assert_eq!(h.chapter, 28);
806    }
807
808    // ── v0.5.2 hydrocarbon / chloroalkane subheadings ────────────────────
809    #[test]
810    fn isoprene_subheading_290124() {
811        let r = classify_smiles("C=CC(C)=C").unwrap();
812        assert_eq!(r.heading_hint.subheading.as_deref(), Some("290124"));
813        assert!(r.heading_hint.confidence >= 0.85);
814    }
815
816    #[test]
817    fn buta13diene_subheading_290124() {
818        let r = classify_smiles("C=CC=C").unwrap();
819        assert_eq!(r.heading_hint.subheading.as_deref(), Some("290124"));
820    }
821
822    #[test]
823    fn cyclohexane_subheading_290211() {
824        let r = classify_smiles("C1CCCCC1").unwrap();
825        assert_eq!(r.heading_hint.subheading.as_deref(), Some("290211"));
826        assert!(r.heading_hint.confidence >= 0.85);
827    }
828
829    #[test]
830    fn ethylene_subheading_290121() {
831        let r = classify_smiles("C=C").unwrap();
832        assert_eq!(r.heading_hint.subheading.as_deref(), Some("290121"));
833        assert!(r.heading_hint.confidence >= 0.88);
834    }
835
836    #[test]
837    fn propylene_subheading_290122() {
838        let r = classify_smiles("CC=C").unwrap();
839        assert_eq!(r.heading_hint.subheading.as_deref(), Some("290122"));
840    }
841
842    #[test]
843    fn hexane_subheading_290110() {
844        let r = classify_smiles("CCCCCC").unwrap();
845        assert_eq!(r.heading_hint.subheading.as_deref(), Some("290110"));
846    }
847
848    #[test]
849    fn benzene_subheading_290220() {
850        let r = classify_smiles("c1ccccc1").unwrap();
851        assert_eq!(r.heading_hint.subheading.as_deref(), Some("290220"));
852        assert_eq!(r.heading_hint.chapter, 29);
853    }
854
855    #[test]
856    fn toluene_subheading_290230() {
857        let r = classify_smiles("Cc1ccccc1").unwrap();
858        assert_eq!(r.heading_hint.subheading.as_deref(), Some("290230"));
859    }
860
861    #[test]
862    fn styrene_subheading_290250() {
863        let r = classify_smiles("C=Cc1ccccc1").unwrap();
864        assert_eq!(r.heading_hint.subheading.as_deref(), Some("290250"));
865    }
866
867    #[test]
868    fn dcm_subheading_290312() {
869        let r = classify_smiles("ClCCl").unwrap();
870        assert_eq!(r.heading_hint.subheading.as_deref(), Some("290312"));
871        assert!(r.heading_hint.confidence >= 0.88);
872    }
873
874    #[test]
875    fn chloroform_subheading_290313() {
876        let r = classify_smiles("ClC(Cl)Cl").unwrap();
877        assert_eq!(r.heading_hint.subheading.as_deref(), Some("290313"));
878    }
879
880    #[test]
881    fn ccl4_subheading_290314() {
882        let r = classify_smiles("ClC(Cl)(Cl)Cl").unwrap();
883        assert_eq!(r.heading_hint.subheading.as_deref(), Some("290314"));
884    }
885}