Skip to main content

hs_predict/
pipeline.rs

1//! Main classification pipeline.
2//!
3//! Runs classification in priority order:
4//! 1. User-provided CAS → HS mappings (confidence = 1.0)
5//! 2. Embedded static rule table (CAS + shape + purity)
6//! 3. SMILES-based rule engine (v0.3)
7//! 4. LLM fallback via [`LlmClassifier`] trait hook (v0.4, `llm` feature)
8
9use std::collections::HashMap;
10#[cfg(feature = "llm")]
11use std::sync::Arc;
12
13use crate::error::{HsPredictError, Result};
14use crate::rules::jp_table::{find_jp_rule, JP_TARIFF_YEAR};
15use crate::rules::matcher::find_best_rule;
16use crate::types::{
17    GrayZone, HsPrediction, OrganicInorganic, PhysicalForm, ProductDescription,
18    PredictionSource, RecommendedAction,
19};
20
21/// Configuration for the classification pipeline.
22#[derive(Debug, Clone)]
23pub struct PipelineConfig {
24    /// Confidence threshold above which a result is returned directly
25    /// without asking for LLM confirmation.
26    pub confidence_threshold_direct: f32,
27
28    /// Confidence threshold below which LLM is required.
29    /// Between `confidence_threshold_llm_required` and `confidence_threshold_direct`
30    /// the result is returned with `RecommendedAction::VerifyWithLlm`.
31    pub confidence_threshold_llm_required: f32,
32}
33
34impl Default for PipelineConfig {
35    fn default() -> Self {
36        Self {
37            confidence_threshold_direct: 0.85,
38            confidence_threshold_llm_required: 0.50,
39        }
40    }
41}
42
43/// Main HS code classification pipeline.
44///
45/// # Example — direct (sync)
46/// ```rust,no_run
47/// use hs_predict::pipeline::HsPipeline;
48/// use hs_predict::types::{ProductDescription, SubstanceIdentifier, PhysicalForm};
49///
50/// let pipeline = HsPipeline::new();
51///
52/// let product = ProductDescription {
53///     identifier: SubstanceIdentifier::from_cas("1310-73-2"),
54///     physical_form: Some(PhysicalForm::Solid),
55///     purity_pct: None,
56///     purity_type: None,
57///     mixture_components: None,
58///     intended_use: None,
59///     additional_context: None,
60/// };
61///
62/// let prediction = pipeline.classify(&product).unwrap();
63/// assert_eq!(&prediction.hs_code, "281511");
64/// ```
65///
66/// # Example — with PubChem enrichment (async, `pubchem` feature)
67/// ```rust,no_run
68/// # #[cfg(feature = "pubchem")]
69/// # async fn example() -> hs_predict::Result<()> {
70/// use hs_predict::pipeline::HsPipeline;
71/// use hs_predict::pubchem::PubChemClient;
72/// use hs_predict::types::{ProductDescription, SubstanceIdentifier, PhysicalForm};
73///
74/// let pipeline = HsPipeline::new().with_pubchem(PubChemClient::new());
75///
76/// let mut product = ProductDescription {
77///     identifier: SubstanceIdentifier::from_cas("1310-73-2"),
78///     physical_form: Some(PhysicalForm::Solid),
79///     purity_pct: None,
80///     purity_type: None,
81///     mixture_components: None,
82///     intended_use: None,
83///     additional_context: None,
84/// };
85///
86/// pipeline.enrich(&mut product).await?;   // fills SMILES, InChI, IUPAC name …
87/// let prediction = pipeline.classify(&product)?;
88/// println!("{}", prediction.display());   // "28.15.11"
89/// # Ok(())
90/// # }
91/// ```
92///
93/// # Example — with LLM fallback (async, `llm` feature)
94/// ```rust,no_run
95/// # #[cfg(feature = "llm")]
96/// # async fn example() -> hs_predict::Result<()> {
97/// use hs_predict::pipeline::HsPipeline;
98/// use hs_predict::llm::{LlmClassifier, LlmPrompt, LlmResponse};
99/// use futures::future::BoxFuture;
100///
101/// struct MyClient;
102/// impl LlmClassifier for MyClient {
103///     fn classify<'a>(&'a self, prompt: &'a LlmPrompt) -> BoxFuture<'a, hs_predict::Result<LlmResponse>> {
104///         Box::pin(async move { todo!() })
105///     }
106/// }
107///
108/// let pipeline = HsPipeline::new().with_llm(MyClient);
109/// use hs_predict::types::{ProductDescription, SubstanceIdentifier};
110/// let product = ProductDescription {
111///     identifier: SubstanceIdentifier::from_cas("12-34-5"),
112///     physical_form: None, purity_pct: None, purity_type: None,
113///     mixture_components: None, intended_use: None, additional_context: None,
114/// };
115/// let prediction = pipeline.classify_with_llm(&product).await?;
116/// println!("{}", prediction.display());
117/// # Ok(())
118/// # }
119/// ```
120#[derive(Default)]
121pub struct HsPipeline {
122    /// User-supplied CAS → HS code overrides. Highest priority.
123    user_mappings: HashMap<String, String>,
124
125    config: PipelineConfig,
126
127    /// PubChem client for identifier enrichment (v0.2, `pubchem` feature).
128    #[cfg(feature = "pubchem")]
129    pubchem: Option<std::sync::Arc<crate::pubchem::PubChemClient>>,
130
131    /// LLM classifier hook (v0.4, `llm` feature).
132    #[cfg(feature = "llm")]
133    llm: Option<Arc<dyn crate::llm::LlmClassifier>>,
134}
135
136impl std::fmt::Debug for HsPipeline {
137    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
138        let mut s = f.debug_struct("HsPipeline");
139        s.field("user_mappings", &self.user_mappings);
140        s.field("config", &self.config);
141        #[cfg(feature = "pubchem")]
142        s.field("pubchem", &self.pubchem.as_ref().map(|_| "<PubChemClient>"));
143        #[cfg(feature = "llm")]
144        s.field("llm", &self.llm.as_ref().map(|_| "<dyn LlmClassifier>"));
145        s.finish()
146    }
147}
148
149impl HsPipeline {
150    /// Create a pipeline with default configuration.
151    pub fn new() -> Self {
152        Self::default()
153    }
154
155    /// Add a user-provided CAS → HS code mapping.
156    ///
157    /// These mappings override the embedded rule table with `confidence = 1.0`.
158    ///
159    /// The `hs_code` must be exactly 6 ASCII digits (e.g. `"281511"`).
160    /// If the code does not satisfy this constraint the mapping is silently
161    /// ignored and the pipeline is returned unchanged.
162    pub fn with_mapping(mut self, cas: impl Into<String>, hs_code: impl Into<String>) -> Self {
163        let hs_code = hs_code.into();
164        let valid = hs_code.len() == 6 && hs_code.chars().all(|c| c.is_ascii_digit());
165        if valid {
166            self.user_mappings.insert(cas.into(), hs_code);
167        }
168        self
169    }
170
171    /// Override the default pipeline configuration.
172    pub fn with_config(mut self, config: PipelineConfig) -> Self {
173        self.config = config;
174        self
175    }
176
177    /// Attach an [`LlmClassifier`](crate::llm::LlmClassifier) implementation to
178    /// enable the LLM fallback (Priority 4).
179    ///
180    /// The LLM is called by [`classify_with_llm`](Self::classify_with_llm) when
181    /// the rule-based pipeline returns a result with
182    /// `recommended_action != Accept`, or returns
183    /// [`LowConfidenceNoLlm`](crate::HsPredictError::LowConfidenceNoLlm).
184    ///
185    /// Requires the **`llm`** Cargo feature.
186    #[cfg(feature = "llm")]
187    pub fn with_llm(mut self, client: impl crate::llm::LlmClassifier + 'static) -> Self {
188        self.llm = Some(Arc::new(client));
189        self
190    }
191
192    /// Attach a [`PubChemClient`](crate::pubchem::PubChemClient) to enable
193    /// automatic identifier enrichment before classification.
194    ///
195    /// Requires the **`pubchem`** Cargo feature.
196    #[cfg(feature = "pubchem")]
197    pub fn with_pubchem(mut self, client: crate::pubchem::PubChemClient) -> Self {
198        self.pubchem = Some(std::sync::Arc::new(client));
199        self
200    }
201
202    /// Enrich a [`ProductDescription`] with PubChem data.
203    ///
204    /// Fills in any missing fields of the main identifier and each mixture
205    /// component's identifier (SMILES, InChI, InChIKey, IUPAC name, CID).
206    ///
207    /// This is a **best-effort** operation:
208    /// - "Not found" and "no usable identifier" results are silently ignored.
209    /// - Network / parse errors **are** propagated.
210    /// - If no PubChem client is configured, returns `Ok(())` immediately.
211    ///
212    /// Requires the **`pubchem`** Cargo feature.
213    #[cfg(feature = "pubchem")]
214    pub async fn enrich(&self, product: &mut ProductDescription) -> Result<()> {
215        let Some(ref client) = self.pubchem else {
216            return Ok(());
217        };
218
219        client.enrich(&mut product.identifier).await?;
220
221        if let Some(ref mut comps) = product.mixture_components {
222            for comp in comps.iter_mut() {
223                client.enrich(&mut comp.substance).await?;
224            }
225        }
226
227        Ok(())
228    }
229
230    /// Classify a product and return an HS code prediction.
231    ///
232    /// Priority order:
233    /// 0. Mixture branch (v0.5) — GRI 3a/3b/3c via [`crate::mixture`]
234    /// 1. User-provided mapping
235    /// 2. Embedded static rule table
236    /// 3. (v0.3) SMILES rule engine
237    /// 4. (v0.4) LLM fallback
238    pub fn classify(&self, product: &ProductDescription) -> Result<HsPrediction> {
239        // ── Priority 0: Mixture branch (v0.5) ────────────────────────────
240        if product.is_mixture() {
241            return crate::mixture::classify_mixture(product, |comp| self.classify(comp));
242        }
243
244        // ── Priority 1: User-provided mappings ────────────────────────
245        if let Some(ref cas) = product.identifier.cas {
246            if let Some(hs_code) = self.user_mappings.get(cas.as_str()) {
247                let jp = find_jp_rule(hs_code);
248                return Ok(HsPrediction {
249                    hs_code: hs_code.clone(),
250                    heading_description: String::new(),
251                    confidence: 1.0,
252                    source: PredictionSource::UserMapping,
253                    notes: vec!["From user-provided mapping".to_string()],
254                    alternatives: vec![],
255                    recommended_action: RecommendedAction::Accept,
256                    gray_zone: None,
257                    jp_tariff_code: jp.map(|r| r.jp_code.to_string()),
258                    jp_tariff_year: jp.map(|_| JP_TARIFF_YEAR),
259                });
260            }
261        }
262
263        // ── Priority 2: Embedded static rule table ────────────────────
264        if let Some(ref cas) = product.identifier.cas {
265            if let Some(rule) = find_best_rule(
266                cas,
267                product.physical_form.as_ref(),
268                product.purity_pct,
269            ) {
270                let gray_zone = self.detect_gray_zone(product, rule.hs_code, None);
271                let action = self.recommended_action_with_gz(rule.confidence, gray_zone.as_ref());
272                let jp = find_jp_rule(rule.hs_code);
273                return Ok(HsPrediction {
274                    hs_code: rule.hs_code.to_string(),
275                    heading_description: rule.heading_description.to_string(),
276                    confidence: rule.confidence,
277                    source: PredictionSource::EmbeddedRule {
278                        rule_id: format!("{}:{}", rule.cas, rule.hs_code),
279                    },
280                    notes: self.build_notes(product),
281                    alternatives: vec![],
282                    recommended_action: action,
283                    gray_zone,
284                    jp_tariff_code: jp.map(|r| r.jp_code.to_string()),
285                    jp_tariff_year: jp.map(|_| JP_TARIFF_YEAR),
286                });
287            }
288        }
289
290        // ── Priority 3: SMILES-based rule engine ─────────────────────────
291        if let Some(ref smiles) = product.identifier.smiles {
292            if let Some(classification) = crate::smiles::classify_smiles(smiles) {
293                let hint = &classification.heading_hint;
294                // Only emit a result when we have at least a 4-digit heading
295                // and confidence meets the LLM-required threshold.
296                if let Some(heading) = hint.heading {
297                    if hint.confidence >= self.config.confidence_threshold_llm_required {
298                        // Pad to 6 digits with "00" sub-heading (best guess)
299                        let hs_code = format!("{:04}00", heading);
300                        let jp = find_jp_rule(&hs_code);
301
302                        // Detect gray zone using the pre-computed organic class.
303                        let gray_zone = self.detect_gray_zone(
304                            product,
305                            &hs_code,
306                            Some(&classification.organic_class),
307                        );
308                        let action =
309                            self.recommended_action_with_gz(hint.confidence, gray_zone.as_ref());
310
311                        let mut notes = self.build_notes(product);
312                        notes.push(
313                            "Heading is derived from SMILES functional-group analysis. \
314                             Sub-heading (last two digits) is a placeholder — \
315                             verify the exact 6-digit code with the product specification."
316                                .to_string(),
317                        );
318
319                        let matched_rules: Vec<String> = classification
320                            .functional_groups
321                            .iter()
322                            .map(|g| g.label().to_string())
323                            .collect();
324
325                        return Ok(HsPrediction {
326                            hs_code,
327                            heading_description: hint.rationale.to_string(),
328                            confidence: hint.confidence,
329                            source: PredictionSource::RuleEngine { matched_rules },
330                            notes,
331                            alternatives: vec![],
332                            recommended_action: action,
333                            gray_zone,
334                            jp_tariff_code: jp.map(|r| r.jp_code.to_string()),
335                            jp_tariff_year: jp.map(|_| JP_TARIFF_YEAR),
336                        });
337                    }
338                }
339            }
340        }
341
342        // ── Priority 4: LLM fallback ─────────────────────────────────
343        // (async path — use classify_with_llm for LLM support)
344        Err(HsPredictError::LowConfidenceNoLlm {
345            confidence: 0.0,
346            threshold: self.config.confidence_threshold_llm_required,
347        })
348    }
349
350    /// Classify a batch of products concurrently.
351    ///
352    /// Returns one `Result<HsPrediction>` per input, in the same order.
353    /// Uses synchronous [`classify`](Self::classify) internally — for LLM-backed
354    /// batch classification see `classify_batch_with_llm` (future work).
355    pub fn classify_batch(&self, products: &[ProductDescription]) -> Vec<Result<HsPrediction>> {
356        products.iter().map(|p| self.classify(p)).collect()
357    }
358
359    /// Classify a batch of products using the async LLM path.
360    ///
361    /// Each product is classified via [`classify_with_llm`](Self::classify_with_llm).
362    /// All requests are issued concurrently.
363    ///
364    /// Requires the **`llm`** Cargo feature.
365    #[cfg(feature = "llm")]
366    pub async fn classify_batch_with_llm(
367        &self,
368        products: &[ProductDescription],
369    ) -> Vec<Result<HsPrediction>> {
370        use futures::future::join_all;
371        let futures: Vec<_> = products.iter().map(|p| self.classify_with_llm(p)).collect();
372        join_all(futures).await
373    }
374
375    /// Classify a product, falling back to the configured LLM when the
376    /// rule-based pipeline returns a low-confidence or uncertain result.
377    ///
378    /// # Priority order (same as [`classify`](Self::classify) + LLM)
379    ///
380    /// 1. User-provided mapping → `Accept` → return immediately.
381    /// 2. Embedded static rule table → `Accept` → return immediately.
382    /// 3. SMILES rule engine → `Accept` → return immediately.
383    /// 4. Any result with `recommended_action != Accept`, or
384    ///    `LowConfidenceNoLlm` → forward to LLM.
385    ///
386    /// If no LLM client has been configured via [`with_llm`](Self::with_llm),
387    /// returns [`HsPredictError::LlmNotConfigured`].
388    ///
389    /// # Validation
390    /// The LLM's `hs_code` must be exactly 6 ASCII digits; otherwise
391    /// [`HsPredictError::ValidationFailed`] is returned.
392    ///
393    /// # Chapter consistency
394    /// If the LLM chapter differs from the SMILES engine's chapter hint, a
395    /// warning note is appended — this is **not** a hard error.
396    ///
397    /// Requires the **`llm`** Cargo feature.
398    #[cfg(feature = "llm")]
399    pub async fn classify_with_llm(
400        &self,
401        product: &ProductDescription,
402    ) -> Result<HsPrediction> {
403        use crate::llm::PromptBuilder;
404        use crate::types::AlternativePrediction;
405
406        // First try the synchronous rule-based pipeline.
407        let needs_llm = match self.classify(product) {
408            Ok(pred) if pred.recommended_action == RecommendedAction::Accept => {
409                return Ok(pred);
410            }
411            Ok(_pred) => true,  // low-confidence result → try LLM
412            Err(HsPredictError::LowConfidenceNoLlm { .. }) => true,
413            Err(e) => return Err(e),
414        };
415
416        debug_assert!(needs_llm);
417
418        // Require a configured LLM client.
419        let llm = self
420            .llm
421            .as_ref()
422            .ok_or(HsPredictError::LlmNotConfigured)?;
423
424        // Build prompt and call the LLM.
425        let prompt = PromptBuilder::new().build(product);
426        let resp = llm.classify(&prompt).await?;
427
428        // Validate: must be exactly 6 ASCII digits.
429        if resp.hs_code.len() != 6 || !resp.hs_code.chars().all(|c| c.is_ascii_digit()) {
430            return Err(HsPredictError::ValidationFailed { code: resp.hs_code });
431        }
432
433        // Chapter consistency check (warning only).
434        let mut notes = self.build_notes(product);
435        if let Some(ref analysis) = prompt.smiles_analysis {
436            let llm_chapter = &resp.hs_code[..2];
437            let expected_chapter = format!("{:02}", analysis.heading_hint.chapter);
438            if llm_chapter != expected_chapter {
439                notes.push(format!(
440                    "Chapter mismatch: LLM returned Chapter {} but SMILES engine \
441                     suggested Chapter {}. Verify with Chapter Notes.",
442                    llm_chapter, expected_chapter
443                ));
444            }
445        }
446
447        notes.push(format!("LLM rationale: {}", resp.rationale));
448
449        let jp = find_jp_rule(&resp.hs_code);
450        let action = self.recommended_action(resp.confidence);
451
452        // Only include alternatives whose hs_code passes the same 6-digit
453        // format check applied to the primary result.
454        let alternatives = resp
455            .alternatives
456            .into_iter()
457            .filter(|a| a.hs_code.len() == 6 && a.hs_code.chars().all(|c| c.is_ascii_digit()))
458            .map(|a| AlternativePrediction {
459                hs_code: a.hs_code,
460                confidence: a.confidence,
461                reason: a.reason,
462            })
463            .collect();
464
465        Ok(HsPrediction {
466            hs_code: resp.hs_code,
467            heading_description: String::new(),
468            confidence: resp.confidence,
469            source: PredictionSource::LlmApi { model: String::new() },
470            notes,
471            alternatives,
472            recommended_action: action,
473            gray_zone: None, // LLM response does not carry gray-zone information
474            jp_tariff_code: jp.map(|r| r.jp_code.to_string()),
475            jp_tariff_year: jp.map(|_| JP_TARIFF_YEAR),
476        })
477    }
478
479    // ─── Private helpers ──────────────────────────────────────────────
480
481    fn recommended_action(&self, confidence: f32) -> RecommendedAction {
482        if confidence >= self.config.confidence_threshold_direct {
483            RecommendedAction::Accept
484        } else if confidence >= self.config.confidence_threshold_llm_required {
485            RecommendedAction::VerifyWithLlm
486        } else {
487            RecommendedAction::ExpertReview
488        }
489    }
490
491    /// Like `recommended_action` but upgrades to `PriorConsultation` when a
492    /// gray zone is present and the confidence does not reach the "direct" threshold.
493    fn recommended_action_with_gz(
494        &self,
495        confidence: f32,
496        gray_zone: Option<&GrayZone>,
497    ) -> RecommendedAction {
498        let base = self.recommended_action(confidence);
499        if gray_zone.is_some() && base != RecommendedAction::Accept {
500            // Gray zone identified → recommend an advance ruling (事前教示)
501            RecommendedAction::PriorConsultation
502        } else {
503            base
504        }
505    }
506
507    /// Detect whether a prediction falls in a well-known gray zone.
508    ///
509    /// When `organic_class` is `Some`, the supplied classification is used
510    /// (e.g. when the SMILES engine has already analysed the structure);
511    /// otherwise the classification is re-derived from
512    /// `product.identifier.smiles` when available.
513    fn detect_gray_zone(
514        &self,
515        product: &ProductDescription,
516        hs_code: &str,
517        organic_class: Option<&OrganicInorganic>,
518    ) -> Option<GrayZone> {
519        let chapter = &hs_code[..2];
520
521        // Chapter 28 / 29 boundary: organometallic or borderline compound
522        if chapter == "28" && self.is_organometallic(product, organic_class) {
523            return Some(GrayZone::Chapter28vs29);
524        }
525
526        // Chapter 29 result but product is used industrially → Ch.29 vs Ch.38
527        if chapter == "29" {
528            use crate::types::IntendedUse;
529            if let Some(IntendedUse::Industrial) = &product.intended_use {
530                return Some(GrayZone::Chapter29vs38);
531            }
532        }
533
534        None
535    }
536
537    /// Whether the product is an organometallic compound — either via the
538    /// pre-computed `organic_class` (preferred) or by re-deriving from SMILES.
539    fn is_organometallic(
540        &self,
541        product: &ProductDescription,
542        organic_class: Option<&OrganicInorganic>,
543    ) -> bool {
544        match organic_class {
545            Some(oc) => matches!(oc, OrganicInorganic::Organometallic),
546            None => product.identifier.smiles.as_deref().is_some_and(|s| {
547                matches!(
548                    crate::smiles::detector::classify_organic(s),
549                    OrganicInorganic::Organometallic,
550                )
551            }),
552        }
553    }
554
555    /// Build supplementary notes about shape / purity caveats.
556    fn build_notes(&self, product: &ProductDescription) -> Vec<String> {
557        let mut notes = Vec::new();
558
559        match &product.physical_form {
560            None | Some(PhysicalForm::Unknown) => {
561                notes.push(
562                    "Physical form not specified — the HS subheading may differ \
563                     (e.g. solid vs. solution).".to_string(),
564                );
565            }
566            Some(PhysicalForm::Solution { concentration_pct_ww: None, .. }) => {
567                notes.push(
568                    "Solution concentration not specified — subheading may differ \
569                     (e.g. fuming vs. standard grade).".to_string(),
570                );
571            }
572            _ => {}
573        }
574
575        if product.purity_pct.is_none() {
576            notes.push(
577                "Purity not specified — some headings require a minimum purity threshold."
578                    .to_string(),
579            );
580        }
581
582        notes
583    }
584}
585
586// ─────────────────────────────────────────────────────────────────────────────
587// Pipeline integration tests
588// ─────────────────────────────────────────────────────────────────────────────
589
590#[cfg(all(test, feature = "mock"))]
591mod tests {
592    use super::*;
593    use crate::llm::MockLlmClassifier;
594    use crate::types::{SubstanceIdentifier};
595
596    /// A product with no static rule and a SMILES → triggers LLM path.
597    fn unknown_organic() -> ProductDescription {
598        ProductDescription {
599            identifier: SubstanceIdentifier {
600                cas: None,
601                smiles: Some("CC(O)=O".to_string()), // acetic acid SMILES, unknown CAS
602                iupac_name: None,
603                inchi: None,
604                inchi_key: None,
605                cid: None,
606            },
607            physical_form: None,
608            purity_pct: None,
609            purity_type: None,
610            mixture_components: None,
611            intended_use: None,
612            additional_context: None,
613        }
614    }
615
616    #[tokio::test]
617    async fn classify_with_llm_mock_returns_6_digit_code() {
618        let pipeline = HsPipeline::new().with_llm(MockLlmClassifier::new());
619        let product = unknown_organic();
620        let pred = pipeline.classify_with_llm(&product).await.unwrap();
621        assert_eq!(pred.hs_code.len(), 6);
622        assert!(pred.hs_code.chars().all(|c| c.is_ascii_digit()));
623    }
624
625    #[tokio::test]
626    async fn classify_with_llm_mock_chapter_29_for_smiles_acid() {
627        let pipeline = HsPipeline::new().with_llm(MockLlmClassifier::new());
628        let product = unknown_organic();
629        let pred = pipeline.classify_with_llm(&product).await.unwrap();
630        assert!(
631            pred.hs_code.starts_with("29"),
632            "acetic acid SMILES should yield Chapter 29, got {}",
633            pred.hs_code
634        );
635    }
636
637    #[tokio::test]
638    async fn classify_with_llm_no_client_returns_error() {
639        let pipeline = HsPipeline::new(); // no LLM attached
640        let product = unknown_organic();
641        let err = pipeline.classify_with_llm(&product).await.unwrap_err();
642        assert!(
643            matches!(err, HsPredictError::LlmNotConfigured),
644            "expected LlmNotConfigured, got {:?}",
645            err
646        );
647    }
648
649    #[tokio::test]
650    async fn classify_with_llm_skips_llm_for_high_confidence_rule() {
651        // NaOH solid → static rule, confidence = 1.0 → should NOT call LLM
652        let pipeline = HsPipeline::new()
653            .with_llm(MockLlmClassifier::with_default("999999", 0.1));
654        let product = ProductDescription {
655            identifier: SubstanceIdentifier::from_cas("1310-73-2"),
656            physical_form: Some(crate::types::PhysicalForm::Solid),
657            purity_pct: None,
658            purity_type: None,
659            mixture_components: None,
660            intended_use: None,
661            additional_context: None,
662        };
663        let pred = pipeline.classify_with_llm(&product).await.unwrap();
664        // Should be the static rule result, not the mock's "999999"
665        assert_eq!(pred.hs_code, "281511", "static rule should win over LLM");
666    }
667
668    #[tokio::test]
669    async fn classify_with_llm_invalid_code_returns_validation_error() {
670        // Mock returning an invalid code
671        struct BadMock;
672        impl crate::llm::LlmClassifier for BadMock {
673            fn classify<'a>(
674                &'a self,
675                _prompt: &'a crate::llm::LlmPrompt,
676            ) -> futures::future::BoxFuture<'a, crate::Result<crate::llm::LlmResponse>> {
677                Box::pin(async {
678                    Ok(crate::llm::LlmResponse {
679                        hs_code: "BAD!!".to_string(),
680                        confidence: 0.5,
681                        rationale: "bad".to_string(),
682                        alternatives: vec![],
683                    })
684                })
685            }
686        }
687        let pipeline = HsPipeline::new().with_llm(BadMock);
688        let product = unknown_organic();
689        let err = pipeline.classify_with_llm(&product).await.unwrap_err();
690        assert!(
691            matches!(err, HsPredictError::ValidationFailed { .. }),
692            "expected ValidationFailed, got {:?}",
693            err
694        );
695    }
696}