Skip to main content

hs_predict/
mixture.rs

1//! Mixture classification using the WCO General Rules for Interpretation (GRI).
2//!
3//! Mixtures and preparations of chemical substances must be classified according
4//! to the General Rules for the Interpretation of the Harmonized System (GRIs):
5//!
6//! | GRI | Principle |
7//! |-----|-----------|
8//! | 3a  | Most specific description (when all components share the same chapter, use the most specific heading) |
9//! | 3b  | Essential character (classify by the component that gives the mixture its essential character, typically the dominant component by weight ≥ 50 % w/w) |
10//! | 3c  | Last heading numerically (when 3b cannot determine essential character, use the heading that occurs last among those equally worthy of consideration) |
11//!
12//! ## Chapter priority (special cases handled before GRI 3)
13//! Before applying GRI 3, the intended use is checked. Some uses are so
14//! determinative that they override structural analysis:
15//!
16//! | Intended use | Chapter |
17//! |---|---|
18//! | Pharmaceutical | Ch. 30 |
19//! | Agricultural (pesticide formulation) | Ch. 38.08 |
20//! | Cosmetic / beauty | Ch. 33 |
21//! | Food additive / food preparation | Ch. 21 |
22
23use crate::error::{HsPredictError, Result};
24use crate::rules::chapter38::{
25    classify_by_intended_use, special_chapter_by_use, CHAPTER38_CATCH_ALL_CODE,
26    CHAPTER38_CATCH_ALL_DESC,
27};
28use crate::rules::jp_table::{find_jp_rule, JP_TARIFF_YEAR};
29use crate::types::{
30    GrayZone, HsPrediction, IntendedUse, MixtureComponent, ProductDescription,
31    PredictionSource, RecommendedAction,
32};
33
34// ─────────────────────────────────────────────────────────────────────────────
35// Public entry point
36// ─────────────────────────────────────────────────────────────────────────────
37
38/// Classify a mixture product using GRI 3 rules.
39///
40/// This function is called by [`HsPipeline::classify`](crate::pipeline::HsPipeline::classify)
41/// when `product.is_mixture()` is `true`.
42///
43/// ## Classification steps
44/// 1. Special use check (Ch. 30 / 33 / 21 / 38.08).
45/// 2. Classify each component individually via `classify_component`.
46/// 3. GRI 3a: all components in the same chapter → most specific heading.
47/// 4. GRI 3b: dominant component (>50% w/w) → that component's classification.
48/// 5. GRI 3c fallback: last heading by number (low confidence, gray zone set).
49pub(crate) fn classify_mixture(
50    product: &ProductDescription,
51    classify_component: impl Fn(&ProductDescription) -> Result<HsPrediction>,
52) -> Result<HsPrediction> {
53    let components = product
54        .mixture_components
55        .as_ref()
56        .filter(|v| !v.is_empty())
57        .ok_or(HsPredictError::MissingIdentifier)?;
58
59    // ── Step 0: Intended-use special chapters ─────────────────────────────
60    if let Some(ref intended_use) = product.intended_use {
61        // Non-Ch.38 special chapters (pharma, cosmetic, food)
62        if let Some((hs_code, desc, confidence)) = special_chapter_by_use(intended_use) {
63            return Ok(PredictionBuilder {
64                hs_code: hs_code.to_string(),
65                heading_description: desc.to_string(),
66                confidence,
67                source: PredictionSource::EmbeddedRule {
68                    rule_id: "chapter38::special_use".to_string(),
69                },
70                notes: vec![format!(
71                    "Mixture classified by intended use ({}); verify with Chapter Notes.",
72                    intended_use_label(intended_use)
73                )],
74                gray_zone: None,
75                recommended_action: RecommendedAction::VerifyWithLlm,
76            }
77            .build());
78        }
79
80        // Ch. 38 agricultural preparations
81        if let Some((hs_code, desc, confidence)) = classify_by_intended_use(intended_use) {
82            return Ok(PredictionBuilder {
83                hs_code: hs_code.to_string(),
84                heading_description: desc.to_string(),
85                confidence,
86                source: PredictionSource::EmbeddedRule {
87                    rule_id: "chapter38::agricultural".to_string(),
88                },
89                notes: vec![
90                    "Mixture classified by agricultural intended use → Ch. 38.08.".to_string(),
91                    "Verify: active ingredient type and concentration may shift the sub-heading."
92                        .to_string(),
93                ],
94                gray_zone: Some(GrayZone::Chapter29vs38),
95                recommended_action: RecommendedAction::PriorConsultation,
96            }
97            .build());
98        }
99    }
100
101    // ── Step 1: Classify each component individually ──────────────────────
102    let mut component_preds: Vec<(Option<f64>, HsPrediction)> = Vec::new();
103    let mut unclassified_count = 0usize;
104
105    for comp in components {
106        let comp_product = component_to_product(comp);
107        match classify_component(&comp_product) {
108            Ok(pred) => {
109                component_preds.push((comp.weight_fraction_pct, pred));
110            }
111            Err(_) => {
112                // Could not classify this component — track but continue
113                unclassified_count += 1;
114            }
115        }
116    }
117
118    // If we couldn't classify any component, fall back to Ch.38 catch-all
119    if component_preds.is_empty() {
120        return Ok(ch38_catch_all(
121            vec![
122                "No components could be individually classified.".to_string(),
123                "Review each component's CAS/SMILES and consult a trade-compliance expert."
124                    .to_string(),
125            ],
126            0.35,
127        ));
128    }
129
130    // ── Step 2: GRI 3a — all components in the same chapter ──────────────
131    if let Some(gri3a_result) = try_gri3a(&component_preds) {
132        return Ok(gri3a_result);
133    }
134
135    // ── Step 3: GRI 3b — dominant component (> 50 % w/w) ─────────────────
136    if let Some(gri3b_result) = try_gri3b(&component_preds) {
137        return Ok(gri3b_result);
138    }
139
140    // ── Step 4: GRI 3c — last heading numerically ─────────────────────────
141    Ok(gri3c(&component_preds, unclassified_count))
142}
143
144// ─────────────────────────────────────────────────────────────────────────────
145// GRI implementations
146// ─────────────────────────────────────────────────────────────────────────────
147
148/// GRI 3a: all components share the same 2-digit chapter → use the most
149/// specific heading (the one with the highest confidence).
150fn try_gri3a(
151    component_preds: &[(Option<f64>, HsPrediction)],
152) -> Option<HsPrediction> {
153    if component_preds.is_empty() {
154        return None;
155    }
156
157    let first_chapter = &component_preds[0].1.hs_code[..2];
158    let all_same_chapter = component_preds
159        .iter()
160        .all(|(_, p)| &p.hs_code[..2] == first_chapter);
161
162    if !all_same_chapter {
163        return None;
164    }
165
166    // All in the same chapter → pick the most specific (highest confidence).
167    // Use `unwrap_or(Ordering::Equal)` so that NaN confidences (which can arise
168    // from user-constructed or LLM-supplied `HsPrediction`s) do not panic.
169    let best = component_preds
170        .iter()
171        .max_by(|(_, a), (_, b)| {
172            a.confidence
173                .partial_cmp(&b.confidence)
174                .unwrap_or(std::cmp::Ordering::Equal)
175        })?;
176
177    let pred = &best.1;
178    let confidence = (pred.confidence * 0.90).min(0.85); // slightly lower for mixture
179    let recommended_action = if confidence >= 0.85 {
180        RecommendedAction::Accept
181    } else {
182        RecommendedAction::VerifyWithLlm
183    };
184
185    Some(
186        PredictionBuilder {
187            hs_code: pred.hs_code.clone(),
188            heading_description: pred.heading_description.clone(),
189            confidence,
190            source: PredictionSource::RuleEngine {
191                matched_rules: vec!["GRI-3a: all components same chapter".to_string()],
192            },
193            notes: vec![format!(
194                "GRI 3a applied: all {} component(s) are in Chapter {}. \
195                 Most specific heading selected by confidence.",
196                component_preds.len(),
197                first_chapter
198            )],
199            gray_zone: None, // no gray zone when chapter is unambiguous
200            recommended_action,
201        }
202        .build(),
203    )
204}
205
206/// GRI 3b: if one component exceeds 50 % w/w, it determines the essential
207/// character and the mixture is classified with that component's HS code.
208fn try_gri3b(
209    component_preds: &[(Option<f64>, HsPrediction)],
210) -> Option<HsPrediction> {
211    // Find a component with known weight fraction > 50 %
212    let dominant = component_preds
213        .iter()
214        .find(|(frac, _)| frac.map(|f| f > 50.0).unwrap_or(false));
215
216    let (frac, pred) = dominant?;
217    let fraction = frac.unwrap();
218    let confidence = (pred.confidence * 0.88).min(0.82); // slightly lower for mixture
219
220    // A dominant organic component in Ch.29 still has Ch.29 vs Ch.38 risk
221    let gray_zone = if pred.hs_code.starts_with("29") {
222        Some(GrayZone::Chapter29vs38)
223    } else {
224        None
225    };
226
227    let recommended_action = match (&gray_zone, confidence >= 0.75) {
228        (Some(_), _) => RecommendedAction::PriorConsultation,
229        (None, true) => RecommendedAction::VerifyWithLlm,
230        (None, false) => RecommendedAction::ExpertReview,
231    };
232
233    let mut notes = vec![format!(
234        "GRI 3b applied: dominant component ({:.1}% w/w) determines essential character.",
235        fraction
236    )];
237    if gray_zone.is_some() {
238        notes.push(
239            "Chapter 29 vs 38 boundary: verify whether this mixture is sold as a \
240             pure substance (Ch. 29) or as a prepared formulation (Ch. 38)."
241                .to_string(),
242        );
243    }
244
245    Some(
246        PredictionBuilder {
247            hs_code: pred.hs_code.clone(),
248            heading_description: pred.heading_description.clone(),
249            confidence,
250            source: PredictionSource::RuleEngine {
251                matched_rules: vec![format!("GRI-3b: dominant component {:.1}% w/w", fraction)],
252            },
253            notes,
254            gray_zone,
255            recommended_action,
256        }
257        .build(),
258    )
259}
260
261/// GRI 3c: when essential character cannot be determined, use the heading that
262/// occurs last in numeric order among all candidate headings.
263fn gri3c(
264    component_preds: &[(Option<f64>, HsPrediction)],
265    unclassified_count: usize,
266) -> HsPrediction {
267    // Sort by HS code string (lexicographic = numeric for 6-digit codes)
268    let last = component_preds
269        .iter()
270        .max_by(|(_, a), (_, b)| a.hs_code.cmp(&b.hs_code));
271
272    if let Some((_, pred)) = last {
273        let mut notes = vec![
274            "GRI 3c applied: essential character could not be determined (no dominant \
275             component >50% w/w); last heading by numeric order was used."
276                .to_string(),
277            "Confidence is LOW. An advance ruling (事前教示) from customs is strongly \
278             recommended before making a declaration."
279                .to_string(),
280        ];
281        if unclassified_count > 0 {
282            notes.push(format!(
283                "{} component(s) could not be classified individually and were excluded.",
284                unclassified_count
285            ));
286        }
287
288        PredictionBuilder {
289            hs_code: pred.hs_code.clone(),
290            heading_description: pred.heading_description.clone(),
291            confidence: 0.40,
292            source: PredictionSource::RuleEngine {
293                matched_rules: vec!["GRI-3c: last heading numerically".to_string()],
294            },
295            notes,
296            gray_zone: Some(GrayZone::MixtureEssentialCharacterUnclear),
297            recommended_action: RecommendedAction::PriorConsultation,
298        }
299        .build()
300    } else {
301        ch38_catch_all(
302            vec![
303                "GRI 3c could not be applied (no components classified).".to_string(),
304                "Ch. 38 NEC catch-all used as last resort.".to_string(),
305            ],
306            0.30,
307        )
308    }
309}
310
311// ─────────────────────────────────────────────────────────────────────────────
312// Helpers
313// ─────────────────────────────────────────────────────────────────────────────
314
315/// Build a Ch.38 catch-all prediction for unclassifiable mixtures.
316fn ch38_catch_all(notes: Vec<String>, confidence: f32) -> HsPrediction {
317    PredictionBuilder {
318        hs_code: CHAPTER38_CATCH_ALL_CODE.to_string(),
319        heading_description: CHAPTER38_CATCH_ALL_DESC.to_string(),
320        confidence,
321        source: PredictionSource::RuleEngine {
322            matched_rules: vec!["chapter38::catch_all".to_string()],
323        },
324        notes,
325        gray_zone: Some(GrayZone::Chapter29vs38),
326        recommended_action: RecommendedAction::PriorConsultation,
327    }
328    .build()
329}
330
331/// Convert a [`MixtureComponent`] into a [`ProductDescription`] suitable for
332/// single-substance classification.
333fn component_to_product(comp: &MixtureComponent) -> ProductDescription {
334    ProductDescription {
335        identifier: comp.substance.clone(),
336        physical_form: None, // form not relevant for component classification
337        purity_pct: None,
338        purity_type: None,
339        mixture_components: None, // treat as pure substance
340        intended_use: None,
341        additional_context: None,
342    }
343}
344
345/// Builder for [`HsPrediction`] used by mixture classification.
346///
347/// All fields use named-struct literal initialisation at the call site, which
348/// avoids the readability cost of a 9-argument function. The JP tariff lookup
349/// is performed centrally in [`PredictionBuilder::build`].
350struct PredictionBuilder {
351    hs_code: String,
352    heading_description: String,
353    confidence: f32,
354    source: PredictionSource,
355    notes: Vec<String>,
356    gray_zone: Option<GrayZone>,
357    recommended_action: RecommendedAction,
358}
359
360impl PredictionBuilder {
361    /// Finalise the builder into an [`HsPrediction`], populating Japan-specific
362    /// tariff fields from the embedded JP table.
363    fn build(self) -> HsPrediction {
364        let jp = find_jp_rule(&self.hs_code);
365        HsPrediction {
366            hs_code: self.hs_code,
367            heading_description: self.heading_description,
368            confidence: self.confidence,
369            source: self.source,
370            notes: self.notes,
371            alternatives: vec![],
372            recommended_action: self.recommended_action,
373            gray_zone: self.gray_zone,
374            jp_tariff_code: jp.map(|r| r.jp_code.to_string()),
375            jp_tariff_year: jp.map(|_| JP_TARIFF_YEAR),
376        }
377    }
378}
379
380fn intended_use_label(use_: &IntendedUse) -> &'static str {
381    match use_ {
382        IntendedUse::Pharmaceutical => "pharmaceutical",
383        IntendedUse::Agricultural => "agricultural",
384        IntendedUse::Cosmetic => "cosmetic",
385        IntendedUse::Food => "food",
386        IntendedUse::Industrial => "industrial",
387        IntendedUse::Other(_) => "other",
388    }
389}
390
391// ─────────────────────────────────────────────────────────────────────────────
392// Tests
393// ─────────────────────────────────────────────────────────────────────────────
394
395#[cfg(test)]
396mod tests {
397    use super::*;
398    use crate::types::{MixtureComponent, SubstanceIdentifier};
399
400    fn make_pred(hs_code: &str, confidence: f32) -> HsPrediction {
401        HsPrediction {
402            hs_code: hs_code.to_string(),
403            heading_description: format!("Test heading for {}", hs_code),
404            confidence,
405            source: PredictionSource::EmbeddedRule { rule_id: "test".to_string() },
406            notes: vec![],
407            alternatives: vec![],
408            recommended_action: RecommendedAction::Accept,
409            gray_zone: None,
410            jp_tariff_code: None,
411            jp_tariff_year: None,
412        }
413    }
414
415    // Build a component with a given CAS and optional weight fraction
416    fn comp(cas: &str, weight_pct: Option<f64>) -> MixtureComponent {
417        MixtureComponent {
418            substance: SubstanceIdentifier::from_cas(cas),
419            weight_fraction_pct: weight_pct,
420            volume_fraction_pct: None,
421            is_solvent: false,
422        }
423    }
424
425    #[test]
426    fn gri3a_same_chapter_picks_highest_confidence() {
427        let preds = vec![
428            (Some(40.0), make_pred("290511", 0.97)), // methanol Ch.29
429            (Some(60.0), make_pred("290531", 0.90)), // ethylene glycol Ch.29
430        ];
431        let result = try_gri3a(&preds).unwrap();
432        assert_eq!(&result.hs_code, "290511"); // higher confidence wins
433        assert!(result.gray_zone.is_none());
434    }
435
436    #[test]
437    fn gri3a_different_chapters_returns_none() {
438        let preds = vec![
439            (Some(50.0), make_pred("281511", 0.97)), // NaOH Ch.28
440            (Some(50.0), make_pred("290511", 0.97)), // methanol Ch.29
441        ];
442        assert!(try_gri3a(&preds).is_none());
443    }
444
445    #[test]
446    fn gri3b_dominant_component_wins() {
447        let preds = vec![
448            (Some(70.0), make_pred("280700", 0.97)), // sulphuric acid 70%
449            (Some(30.0), make_pred("290531", 0.97)), // ethylene glycol 30%
450        ];
451        let result = try_gri3b(&preds).unwrap();
452        assert_eq!(&result.hs_code, "280700");
453    }
454
455    #[test]
456    fn gri3b_no_dominant_returns_none() {
457        let preds = vec![
458            (Some(40.0), make_pred("280700", 0.97)),
459            (Some(40.0), make_pred("290511", 0.97)),
460        ];
461        assert!(try_gri3b(&preds).is_none());
462    }
463
464    #[test]
465    fn gri3b_ch29_sets_gray_zone() {
466        let preds = vec![
467            (Some(60.0), make_pred("290531", 0.97)), // ethylene glycol 60%
468            (Some(40.0), make_pred("280700", 0.90)), // sulphuric acid 40%
469        ];
470        let result = try_gri3b(&preds).unwrap();
471        assert_eq!(result.gray_zone, Some(GrayZone::Chapter29vs38));
472        assert_eq!(result.recommended_action, RecommendedAction::PriorConsultation);
473    }
474
475    #[test]
476    fn gri3c_picks_last_heading_numerically() {
477        let preds = vec![
478            (Some(35.0), make_pred("280700", 0.90)), // 280700
479            (Some(35.0), make_pred("290511", 0.90)), // 290511 (higher)
480            (Some(30.0), make_pred("280610", 0.90)), // 280610
481        ];
482        let result = gri3c(&preds, 0);
483        assert_eq!(&result.hs_code, "290511");
484        assert_eq!(result.gray_zone, Some(GrayZone::MixtureEssentialCharacterUnclear));
485        assert_eq!(result.recommended_action, RecommendedAction::PriorConsultation);
486        assert!(result.confidence <= 0.40);
487    }
488
489    #[test]
490    fn pharmaceutical_use_gives_ch30() {
491        let product = ProductDescription {
492            identifier: SubstanceIdentifier::default(),
493            physical_form: None,
494            purity_pct: None,
495            purity_type: None,
496            mixture_components: Some(vec![comp("64-17-5", Some(50.0))]),
497            intended_use: Some(IntendedUse::Pharmaceutical),
498            additional_context: None,
499        };
500        let result = classify_mixture(&product, |_p| {
501            Ok(make_pred("290511", 0.97))
502        })
503        .unwrap();
504        assert_eq!(&result.hs_code[..2], "30");
505    }
506
507    #[test]
508    fn agricultural_use_gives_ch38() {
509        let product = ProductDescription {
510            identifier: SubstanceIdentifier::default(),
511            physical_form: None,
512            purity_pct: None,
513            purity_type: None,
514            mixture_components: Some(vec![comp("64-17-5", Some(50.0))]),
515            intended_use: Some(IntendedUse::Agricultural),
516            additional_context: None,
517        };
518        let result = classify_mixture(&product, |_p| {
519            Ok(make_pred("290511", 0.97))
520        })
521        .unwrap();
522        assert_eq!(&result.hs_code[..2], "38");
523        assert_eq!(result.recommended_action, RecommendedAction::PriorConsultation);
524    }
525
526    #[test]
527    fn empty_components_returns_error() {
528        let product = ProductDescription {
529            identifier: SubstanceIdentifier::default(),
530            physical_form: None,
531            purity_pct: None,
532            purity_type: None,
533            mixture_components: Some(vec![]),
534            intended_use: None,
535            additional_context: None,
536        };
537        let result = classify_mixture(&product, |_p| Ok(make_pred("290511", 0.97)));
538        assert!(result.is_err());
539    }
540
541    /// A mixture where all component weights are None should fall through to
542    /// GRI 3c (no dominant component can be found → GRI 3b skipped).
543    #[test]
544    fn all_unknown_weights_falls_to_gri3c() {
545        let preds = vec![
546            (None, make_pred("280700", 0.90)), // sulphuric acid — no weight
547            (None, make_pred("290511", 0.90)), // methanol — no weight, higher HS code
548        ];
549        // GRI 3b needs a weight fraction; with all None it should return None
550        assert!(try_gri3b(&preds).is_none(), "GRI 3b must return None when all weights are unknown");
551        // GRI 3c picks last heading numerically
552        let result = gri3c(&preds, 0);
553        assert_eq!(&result.hs_code, "290511");
554        assert_eq!(result.gray_zone, Some(GrayZone::MixtureEssentialCharacterUnclear));
555        assert_eq!(result.recommended_action, RecommendedAction::PriorConsultation);
556    }
557
558    /// A single-component mixture: GRI 3a applies (trivially, one chapter).
559    #[test]
560    fn single_component_mixture_classifies_via_gri3a() {
561        let preds = vec![(Some(100.0), make_pred("290511", 0.97))];
562        let result = try_gri3a(&preds);
563        assert!(result.is_some(), "GRI 3a must succeed for a single-component mixture");
564        let result = result.unwrap();
565        assert_eq!(&result.hs_code, "290511");
566    }
567
568    /// GRI 3b boundary condition: exactly 50.0% w/w must NOT qualify as dominant
569    /// (the threshold is strictly > 50.0).
570    #[test]
571    fn gri3b_exactly_50pct_is_not_dominant() {
572        let preds = vec![
573            (Some(50.0), make_pred("280700", 0.97)),
574            (Some(50.0), make_pred("290511", 0.97)),
575        ];
576        assert!(
577            try_gri3b(&preds).is_none(),
578            "50.0% is not strictly > 50.0; GRI 3b must return None"
579        );
580    }
581
582    /// NaN confidence must not cause a panic in GRI 3a's max_by comparator.
583    #[test]
584    fn gri3a_nan_confidence_does_not_panic() {
585        let preds = vec![
586            (Some(50.0), make_pred("290511", f32::NAN)),
587            (Some(50.0), make_pred("290512", 0.80)),
588        ];
589        // Should not panic; result may be either code depending on ordering
590        let _ = try_gri3a(&preds);
591    }
592}