hs_predict/pipeline.rs
1//! Main classification pipeline.
2//!
3//! Runs classification in priority order:
4//! 1. User-provided CAS → HS mappings (confidence = 1.0)
5//! 2. Embedded static rule table (CAS + shape + purity)
6//! 3. SMILES-based rule engine (v0.3)
7//! 4. LLM fallback via [`LlmClassifier`] trait hook (v0.4, `llm` feature)
8
9use std::collections::HashMap;
10#[cfg(feature = "llm")]
11use std::sync::Arc;
12
13use crate::error::{HsPredictError, Result};
14use crate::rules::jp_table::{find_jp_rule, JP_TARIFF_YEAR};
15use crate::rules::matcher::find_best_rule;
16use crate::types::{
17 GrayZone, HsPrediction, OrganicInorganic, PhysicalForm, ProductDescription,
18 PredictionSource, RecommendedAction,
19};
20
21/// Configuration for the classification pipeline.
22#[derive(Debug, Clone)]
23pub struct PipelineConfig {
24 /// Confidence threshold above which a result is returned directly
25 /// without asking for LLM confirmation.
26 pub confidence_threshold_direct: f32,
27
28 /// Confidence threshold below which LLM is required.
29 /// Between `confidence_threshold_llm_required` and `confidence_threshold_direct`
30 /// the result is returned with `RecommendedAction::VerifyWithLlm`.
31 pub confidence_threshold_llm_required: f32,
32}
33
34impl Default for PipelineConfig {
35 fn default() -> Self {
36 Self {
37 confidence_threshold_direct: 0.85,
38 confidence_threshold_llm_required: 0.50,
39 }
40 }
41}
42
43/// Main HS code classification pipeline.
44///
45/// # Example — direct (sync)
46/// ```rust,no_run
47/// use hs_predict::pipeline::HsPipeline;
48/// use hs_predict::types::{ProductDescription, SubstanceIdentifier, PhysicalForm};
49///
50/// let pipeline = HsPipeline::new();
51///
52/// let product = ProductDescription {
53/// identifier: SubstanceIdentifier::from_cas("1310-73-2"),
54/// physical_form: Some(PhysicalForm::Solid),
55/// purity_pct: None,
56/// purity_type: None,
57/// mixture_components: None,
58/// intended_use: None,
59/// additional_context: None,
60/// };
61///
62/// let prediction = pipeline.classify(&product).unwrap();
63/// assert_eq!(&prediction.hs_code, "281511");
64/// ```
65///
66/// # Example — with PubChem enrichment (async, `pubchem` feature)
67/// ```rust,no_run
68/// # #[cfg(feature = "pubchem")]
69/// # async fn example() -> hs_predict::Result<()> {
70/// use hs_predict::pipeline::HsPipeline;
71/// use hs_predict::pubchem::PubChemClient;
72/// use hs_predict::types::{ProductDescription, SubstanceIdentifier, PhysicalForm};
73///
74/// let pipeline = HsPipeline::new().with_pubchem(PubChemClient::new());
75///
76/// let mut product = ProductDescription {
77/// identifier: SubstanceIdentifier::from_cas("1310-73-2"),
78/// physical_form: Some(PhysicalForm::Solid),
79/// purity_pct: None,
80/// purity_type: None,
81/// mixture_components: None,
82/// intended_use: None,
83/// additional_context: None,
84/// };
85///
86/// pipeline.enrich(&mut product).await?; // fills SMILES, InChI, IUPAC name …
87/// let prediction = pipeline.classify(&product)?;
88/// println!("{}", prediction.display()); // "28.15.11"
89/// # Ok(())
90/// # }
91/// ```
92///
93/// # Example — with LLM fallback (async, `llm` feature)
94/// ```rust,no_run
95/// # #[cfg(feature = "llm")]
96/// # async fn example() -> hs_predict::Result<()> {
97/// use hs_predict::pipeline::HsPipeline;
98/// use hs_predict::llm::{LlmClassifier, LlmPrompt, LlmResponse};
99/// use futures::future::BoxFuture;
100///
101/// struct MyClient;
102/// impl LlmClassifier for MyClient {
103/// fn classify<'a>(&'a self, prompt: &'a LlmPrompt) -> BoxFuture<'a, hs_predict::Result<LlmResponse>> {
104/// Box::pin(async move { todo!() })
105/// }
106/// }
107///
108/// let pipeline = HsPipeline::new().with_llm(MyClient);
109/// use hs_predict::types::{ProductDescription, SubstanceIdentifier};
110/// let product = ProductDescription {
111/// identifier: SubstanceIdentifier::from_cas("12-34-5"),
112/// physical_form: None, purity_pct: None, purity_type: None,
113/// mixture_components: None, intended_use: None, additional_context: None,
114/// };
115/// let prediction = pipeline.classify_with_llm(&product).await?;
116/// println!("{}", prediction.display());
117/// # Ok(())
118/// # }
119/// ```
120#[derive(Default)]
121pub struct HsPipeline {
122 /// User-supplied CAS → HS code overrides. Highest priority.
123 user_mappings: HashMap<String, String>,
124
125 config: PipelineConfig,
126
127 /// PubChem client for identifier enrichment (v0.2, `pubchem` feature).
128 #[cfg(feature = "pubchem")]
129 pubchem: Option<std::sync::Arc<crate::pubchem::PubChemClient>>,
130
131 /// LLM classifier hook (v0.4, `llm` feature).
132 #[cfg(feature = "llm")]
133 llm: Option<Arc<dyn crate::llm::LlmClassifier>>,
134}
135
136impl std::fmt::Debug for HsPipeline {
137 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
138 let mut s = f.debug_struct("HsPipeline");
139 s.field("user_mappings", &self.user_mappings);
140 s.field("config", &self.config);
141 #[cfg(feature = "pubchem")]
142 s.field("pubchem", &self.pubchem.as_ref().map(|_| "<PubChemClient>"));
143 #[cfg(feature = "llm")]
144 s.field("llm", &self.llm.as_ref().map(|_| "<dyn LlmClassifier>"));
145 s.finish()
146 }
147}
148
149impl HsPipeline {
150 /// Create a pipeline with default configuration.
151 pub fn new() -> Self {
152 Self::default()
153 }
154
155 /// Add a user-provided CAS → HS code mapping.
156 ///
157 /// These mappings override the embedded rule table with `confidence = 1.0`.
158 ///
159 /// The `hs_code` must be exactly 6 ASCII digits (e.g. `"281511"`).
160 /// If the code does not satisfy this constraint the mapping is silently
161 /// ignored and the pipeline is returned unchanged.
162 pub fn with_mapping(mut self, cas: impl Into<String>, hs_code: impl Into<String>) -> Self {
163 let hs_code = hs_code.into();
164 let valid = hs_code.len() == 6 && hs_code.chars().all(|c| c.is_ascii_digit());
165 if valid {
166 self.user_mappings.insert(cas.into(), hs_code);
167 }
168 self
169 }
170
171 /// Override the default pipeline configuration.
172 pub fn with_config(mut self, config: PipelineConfig) -> Self {
173 self.config = config;
174 self
175 }
176
177 /// Attach an [`LlmClassifier`](crate::llm::LlmClassifier) implementation to
178 /// enable the LLM fallback (Priority 4).
179 ///
180 /// The LLM is called by [`classify_with_llm`](Self::classify_with_llm) when
181 /// the rule-based pipeline returns a result with
182 /// `recommended_action != Accept`, or returns
183 /// [`LowConfidenceNoLlm`](crate::HsPredictError::LowConfidenceNoLlm).
184 ///
185 /// Requires the **`llm`** Cargo feature.
186 #[cfg(feature = "llm")]
187 pub fn with_llm(mut self, client: impl crate::llm::LlmClassifier + 'static) -> Self {
188 self.llm = Some(Arc::new(client));
189 self
190 }
191
192 /// Attach a [`PubChemClient`](crate::pubchem::PubChemClient) to enable
193 /// automatic identifier enrichment before classification.
194 ///
195 /// Requires the **`pubchem`** Cargo feature.
196 #[cfg(feature = "pubchem")]
197 pub fn with_pubchem(mut self, client: crate::pubchem::PubChemClient) -> Self {
198 self.pubchem = Some(std::sync::Arc::new(client));
199 self
200 }
201
202 /// Enrich a [`ProductDescription`] with PubChem data.
203 ///
204 /// Fills in any missing fields of the main identifier and each mixture
205 /// component's identifier (SMILES, InChI, InChIKey, IUPAC name, CID).
206 ///
207 /// This is a **best-effort** operation:
208 /// - "Not found" and "no usable identifier" results are silently ignored.
209 /// - Network / parse errors **are** propagated.
210 /// - If no PubChem client is configured, returns `Ok(())` immediately.
211 ///
212 /// Requires the **`pubchem`** Cargo feature.
213 #[cfg(feature = "pubchem")]
214 pub async fn enrich(&self, product: &mut ProductDescription) -> Result<()> {
215 let Some(ref client) = self.pubchem else {
216 return Ok(());
217 };
218
219 client.enrich(&mut product.identifier).await?;
220
221 if let Some(ref mut comps) = product.mixture_components {
222 for comp in comps.iter_mut() {
223 client.enrich(&mut comp.substance).await?;
224 }
225 }
226
227 Ok(())
228 }
229
230 /// Classify a product and return an HS code prediction.
231 ///
232 /// Priority order:
233 /// 0. Mixture branch (v0.5) — GRI 3a/3b/3c via [`crate::mixture`]
234 /// 1. User-provided mapping
235 /// 2. Embedded static rule table
236 /// 3. (v0.3) SMILES rule engine
237 /// 4. (v0.4) LLM fallback
238 pub fn classify(&self, product: &ProductDescription) -> Result<HsPrediction> {
239 // ── Priority 0: Mixture branch (v0.5) ────────────────────────────
240 if product.is_mixture() {
241 return crate::mixture::classify_mixture(product, |comp| self.classify(comp));
242 }
243
244 // ── Priority 1: User-provided mappings ────────────────────────
245 if let Some(ref cas) = product.identifier.cas {
246 if let Some(hs_code) = self.user_mappings.get(cas.as_str()) {
247 let jp = find_jp_rule(hs_code);
248 return Ok(HsPrediction {
249 hs_code: hs_code.clone(),
250 heading_description: String::new(),
251 confidence: 1.0,
252 source: PredictionSource::UserMapping,
253 notes: vec!["From user-provided mapping".to_string()],
254 alternatives: vec![],
255 recommended_action: RecommendedAction::Accept,
256 gray_zone: None,
257 jp_tariff_code: jp.map(|r| r.jp_code.to_string()),
258 jp_tariff_year: jp.map(|_| JP_TARIFF_YEAR),
259 });
260 }
261 }
262
263 // ── Priority 2: Embedded static rule table ────────────────────
264 if let Some(ref cas) = product.identifier.cas {
265 if let Some(rule) = find_best_rule(
266 cas,
267 product.physical_form.as_ref(),
268 product.purity_pct,
269 ) {
270 let gray_zone = self.detect_gray_zone(product, rule.hs_code, None);
271 let action = self.recommended_action_with_gz(rule.confidence, gray_zone.as_ref());
272 let jp = find_jp_rule(rule.hs_code);
273 return Ok(HsPrediction {
274 hs_code: rule.hs_code.to_string(),
275 heading_description: rule.heading_description.to_string(),
276 confidence: rule.confidence,
277 source: PredictionSource::EmbeddedRule {
278 rule_id: format!("{}:{}", rule.cas, rule.hs_code),
279 },
280 notes: self.build_notes(product),
281 alternatives: vec![],
282 recommended_action: action,
283 gray_zone,
284 jp_tariff_code: jp.map(|r| r.jp_code.to_string()),
285 jp_tariff_year: jp.map(|_| JP_TARIFF_YEAR),
286 });
287 }
288 }
289
290 // ── Priority 3: SMILES-based rule engine ─────────────────────────
291 if let Some(ref smiles) = product.identifier.smiles {
292 if let Some(classification) = crate::smiles::classify_smiles(smiles) {
293 let hint = &classification.heading_hint;
294 // Only emit a result when we have at least a 4-digit heading
295 // and confidence meets the LLM-required threshold.
296 if let Some(heading) = hint.heading {
297 if hint.confidence >= self.config.confidence_threshold_llm_required {
298 // Pad to 6 digits with "00" sub-heading (best guess)
299 let hs_code = format!("{:04}00", heading);
300 let jp = find_jp_rule(&hs_code);
301
302 // Detect gray zone using the pre-computed organic class.
303 let gray_zone = self.detect_gray_zone(
304 product,
305 &hs_code,
306 Some(&classification.organic_class),
307 );
308 let action =
309 self.recommended_action_with_gz(hint.confidence, gray_zone.as_ref());
310
311 let mut notes = self.build_notes(product);
312 notes.push(
313 "Heading is derived from SMILES functional-group analysis. \
314 Sub-heading (last two digits) is a placeholder — \
315 verify the exact 6-digit code with the product specification."
316 .to_string(),
317 );
318
319 let matched_rules: Vec<String> = classification
320 .functional_groups
321 .iter()
322 .map(|g| g.label().to_string())
323 .collect();
324
325 return Ok(HsPrediction {
326 hs_code,
327 heading_description: hint.rationale.to_string(),
328 confidence: hint.confidence,
329 source: PredictionSource::RuleEngine { matched_rules },
330 notes,
331 alternatives: vec![],
332 recommended_action: action,
333 gray_zone,
334 jp_tariff_code: jp.map(|r| r.jp_code.to_string()),
335 jp_tariff_year: jp.map(|_| JP_TARIFF_YEAR),
336 });
337 }
338 }
339 }
340 }
341
342 // ── Priority 4: LLM fallback ─────────────────────────────────
343 // (async path — use classify_with_llm for LLM support)
344 Err(HsPredictError::LowConfidenceNoLlm {
345 confidence: 0.0,
346 threshold: self.config.confidence_threshold_llm_required,
347 })
348 }
349
350 /// Classify a batch of products concurrently.
351 ///
352 /// Returns one `Result<HsPrediction>` per input, in the same order.
353 /// Uses synchronous [`classify`](Self::classify) internally — for LLM-backed
354 /// batch classification see `classify_batch_with_llm` (future work).
355 pub fn classify_batch(&self, products: &[ProductDescription]) -> Vec<Result<HsPrediction>> {
356 products.iter().map(|p| self.classify(p)).collect()
357 }
358
359 /// Classify a batch of products using the async LLM path.
360 ///
361 /// Each product is classified via [`classify_with_llm`](Self::classify_with_llm).
362 /// All requests are issued concurrently.
363 ///
364 /// Requires the **`llm`** Cargo feature.
365 #[cfg(feature = "llm")]
366 pub async fn classify_batch_with_llm(
367 &self,
368 products: &[ProductDescription],
369 ) -> Vec<Result<HsPrediction>> {
370 use futures::future::join_all;
371 let futures: Vec<_> = products.iter().map(|p| self.classify_with_llm(p)).collect();
372 join_all(futures).await
373 }
374
375 /// Classify a product, falling back to the configured LLM when the
376 /// rule-based pipeline returns a low-confidence or uncertain result.
377 ///
378 /// # Priority order (same as [`classify`](Self::classify) + LLM)
379 ///
380 /// 1. User-provided mapping → `Accept` → return immediately.
381 /// 2. Embedded static rule table → `Accept` → return immediately.
382 /// 3. SMILES rule engine → `Accept` → return immediately.
383 /// 4. Any result with `recommended_action != Accept`, or
384 /// `LowConfidenceNoLlm` → forward to LLM.
385 ///
386 /// If no LLM client has been configured via [`with_llm`](Self::with_llm),
387 /// returns [`HsPredictError::LlmNotConfigured`].
388 ///
389 /// # Validation
390 /// The LLM's `hs_code` must be exactly 6 ASCII digits; otherwise
391 /// [`HsPredictError::ValidationFailed`] is returned.
392 ///
393 /// # Chapter consistency
394 /// If the LLM chapter differs from the SMILES engine's chapter hint, a
395 /// warning note is appended — this is **not** a hard error.
396 ///
397 /// Requires the **`llm`** Cargo feature.
398 #[cfg(feature = "llm")]
399 pub async fn classify_with_llm(
400 &self,
401 product: &ProductDescription,
402 ) -> Result<HsPrediction> {
403 use crate::llm::PromptBuilder;
404 use crate::types::AlternativePrediction;
405
406 // First try the synchronous rule-based pipeline.
407 let needs_llm = match self.classify(product) {
408 Ok(pred) if pred.recommended_action == RecommendedAction::Accept => {
409 return Ok(pred);
410 }
411 Ok(_pred) => true, // low-confidence result → try LLM
412 Err(HsPredictError::LowConfidenceNoLlm { .. }) => true,
413 Err(e) => return Err(e),
414 };
415
416 debug_assert!(needs_llm);
417
418 // Require a configured LLM client.
419 let llm = self
420 .llm
421 .as_ref()
422 .ok_or(HsPredictError::LlmNotConfigured)?;
423
424 // Build prompt and call the LLM.
425 let prompt = PromptBuilder::new().build(product);
426 let resp = llm.classify(&prompt).await?;
427
428 // Validate: must be exactly 6 ASCII digits.
429 if resp.hs_code.len() != 6 || !resp.hs_code.chars().all(|c| c.is_ascii_digit()) {
430 return Err(HsPredictError::ValidationFailed { code: resp.hs_code });
431 }
432
433 // Chapter consistency check (warning only).
434 let mut notes = self.build_notes(product);
435 if let Some(ref analysis) = prompt.smiles_analysis {
436 let llm_chapter = &resp.hs_code[..2];
437 let expected_chapter = format!("{:02}", analysis.heading_hint.chapter);
438 if llm_chapter != expected_chapter {
439 notes.push(format!(
440 "Chapter mismatch: LLM returned Chapter {} but SMILES engine \
441 suggested Chapter {}. Verify with Chapter Notes.",
442 llm_chapter, expected_chapter
443 ));
444 }
445 }
446
447 notes.push(format!("LLM rationale: {}", resp.rationale));
448
449 let jp = find_jp_rule(&resp.hs_code);
450 let action = self.recommended_action(resp.confidence);
451
452 // Only include alternatives whose hs_code passes the same 6-digit
453 // format check applied to the primary result.
454 let alternatives = resp
455 .alternatives
456 .into_iter()
457 .filter(|a| a.hs_code.len() == 6 && a.hs_code.chars().all(|c| c.is_ascii_digit()))
458 .map(|a| AlternativePrediction {
459 hs_code: a.hs_code,
460 confidence: a.confidence,
461 reason: a.reason,
462 })
463 .collect();
464
465 Ok(HsPrediction {
466 hs_code: resp.hs_code,
467 heading_description: String::new(),
468 confidence: resp.confidence,
469 source: PredictionSource::LlmApi { model: String::new() },
470 notes,
471 alternatives,
472 recommended_action: action,
473 gray_zone: None, // LLM response does not carry gray-zone information
474 jp_tariff_code: jp.map(|r| r.jp_code.to_string()),
475 jp_tariff_year: jp.map(|_| JP_TARIFF_YEAR),
476 })
477 }
478
479 // ─── Private helpers ──────────────────────────────────────────────
480
481 fn recommended_action(&self, confidence: f32) -> RecommendedAction {
482 if confidence >= self.config.confidence_threshold_direct {
483 RecommendedAction::Accept
484 } else if confidence >= self.config.confidence_threshold_llm_required {
485 RecommendedAction::VerifyWithLlm
486 } else {
487 RecommendedAction::ExpertReview
488 }
489 }
490
491 /// Like `recommended_action` but upgrades to `PriorConsultation` when a
492 /// gray zone is present and the confidence does not reach the "direct" threshold.
493 fn recommended_action_with_gz(
494 &self,
495 confidence: f32,
496 gray_zone: Option<&GrayZone>,
497 ) -> RecommendedAction {
498 let base = self.recommended_action(confidence);
499 if gray_zone.is_some() && base != RecommendedAction::Accept {
500 // Gray zone identified → recommend an advance ruling (事前教示)
501 RecommendedAction::PriorConsultation
502 } else {
503 base
504 }
505 }
506
507 /// Detect whether a prediction falls in a well-known gray zone.
508 ///
509 /// When `organic_class` is `Some`, the supplied classification is used
510 /// (e.g. when the SMILES engine has already analysed the structure);
511 /// otherwise the classification is re-derived from
512 /// `product.identifier.smiles` when available.
513 fn detect_gray_zone(
514 &self,
515 product: &ProductDescription,
516 hs_code: &str,
517 organic_class: Option<&OrganicInorganic>,
518 ) -> Option<GrayZone> {
519 let chapter = &hs_code[..2];
520
521 // Chapter 28 / 29 boundary: organometallic or borderline compound
522 if chapter == "28" && self.is_organometallic(product, organic_class) {
523 return Some(GrayZone::Chapter28vs29);
524 }
525
526 // Chapter 29 result but product is used industrially → Ch.29 vs Ch.38
527 if chapter == "29" {
528 use crate::types::IntendedUse;
529 if let Some(IntendedUse::Industrial) = &product.intended_use {
530 return Some(GrayZone::Chapter29vs38);
531 }
532 }
533
534 None
535 }
536
537 /// Whether the product is an organometallic compound — either via the
538 /// pre-computed `organic_class` (preferred) or by re-deriving from SMILES.
539 fn is_organometallic(
540 &self,
541 product: &ProductDescription,
542 organic_class: Option<&OrganicInorganic>,
543 ) -> bool {
544 match organic_class {
545 Some(oc) => matches!(oc, OrganicInorganic::Organometallic),
546 None => product.identifier.smiles.as_deref().is_some_and(|s| {
547 matches!(
548 crate::smiles::detector::classify_organic(s),
549 OrganicInorganic::Organometallic,
550 )
551 }),
552 }
553 }
554
555 /// Build supplementary notes about shape / purity caveats.
556 fn build_notes(&self, product: &ProductDescription) -> Vec<String> {
557 let mut notes = Vec::new();
558
559 match &product.physical_form {
560 None | Some(PhysicalForm::Unknown) => {
561 notes.push(
562 "Physical form not specified — the HS subheading may differ \
563 (e.g. solid vs. solution).".to_string(),
564 );
565 }
566 Some(PhysicalForm::Solution { concentration_pct_ww: None, .. }) => {
567 notes.push(
568 "Solution concentration not specified — subheading may differ \
569 (e.g. fuming vs. standard grade).".to_string(),
570 );
571 }
572 _ => {}
573 }
574
575 if product.purity_pct.is_none() {
576 notes.push(
577 "Purity not specified — some headings require a minimum purity threshold."
578 .to_string(),
579 );
580 }
581
582 notes
583 }
584}
585
586// ─────────────────────────────────────────────────────────────────────────────
587// Pipeline integration tests
588// ─────────────────────────────────────────────────────────────────────────────
589
590#[cfg(all(test, feature = "mock"))]
591mod tests {
592 use super::*;
593 use crate::llm::MockLlmClassifier;
594 use crate::types::{SubstanceIdentifier};
595
596 /// A product with no static rule and a SMILES → triggers LLM path.
597 fn unknown_organic() -> ProductDescription {
598 ProductDescription {
599 identifier: SubstanceIdentifier {
600 cas: None,
601 smiles: Some("CC(O)=O".to_string()), // acetic acid SMILES, unknown CAS
602 iupac_name: None,
603 inchi: None,
604 inchi_key: None,
605 cid: None,
606 },
607 physical_form: None,
608 purity_pct: None,
609 purity_type: None,
610 mixture_components: None,
611 intended_use: None,
612 additional_context: None,
613 }
614 }
615
616 #[tokio::test]
617 async fn classify_with_llm_mock_returns_6_digit_code() {
618 let pipeline = HsPipeline::new().with_llm(MockLlmClassifier::new());
619 let product = unknown_organic();
620 let pred = pipeline.classify_with_llm(&product).await.unwrap();
621 assert_eq!(pred.hs_code.len(), 6);
622 assert!(pred.hs_code.chars().all(|c| c.is_ascii_digit()));
623 }
624
625 #[tokio::test]
626 async fn classify_with_llm_mock_chapter_29_for_smiles_acid() {
627 let pipeline = HsPipeline::new().with_llm(MockLlmClassifier::new());
628 let product = unknown_organic();
629 let pred = pipeline.classify_with_llm(&product).await.unwrap();
630 assert!(
631 pred.hs_code.starts_with("29"),
632 "acetic acid SMILES should yield Chapter 29, got {}",
633 pred.hs_code
634 );
635 }
636
637 #[tokio::test]
638 async fn classify_with_llm_no_client_returns_error() {
639 let pipeline = HsPipeline::new(); // no LLM attached
640 let product = unknown_organic();
641 let err = pipeline.classify_with_llm(&product).await.unwrap_err();
642 assert!(
643 matches!(err, HsPredictError::LlmNotConfigured),
644 "expected LlmNotConfigured, got {:?}",
645 err
646 );
647 }
648
649 #[tokio::test]
650 async fn classify_with_llm_skips_llm_for_high_confidence_rule() {
651 // NaOH solid → static rule, confidence = 1.0 → should NOT call LLM
652 let pipeline = HsPipeline::new()
653 .with_llm(MockLlmClassifier::with_default("999999", 0.1));
654 let product = ProductDescription {
655 identifier: SubstanceIdentifier::from_cas("1310-73-2"),
656 physical_form: Some(crate::types::PhysicalForm::Solid),
657 purity_pct: None,
658 purity_type: None,
659 mixture_components: None,
660 intended_use: None,
661 additional_context: None,
662 };
663 let pred = pipeline.classify_with_llm(&product).await.unwrap();
664 // Should be the static rule result, not the mock's "999999"
665 assert_eq!(pred.hs_code, "281511", "static rule should win over LLM");
666 }
667
668 #[tokio::test]
669 async fn classify_with_llm_invalid_code_returns_validation_error() {
670 // Mock returning an invalid code
671 struct BadMock;
672 impl crate::llm::LlmClassifier for BadMock {
673 fn classify<'a>(
674 &'a self,
675 _prompt: &'a crate::llm::LlmPrompt,
676 ) -> futures::future::BoxFuture<'a, crate::Result<crate::llm::LlmResponse>> {
677 Box::pin(async {
678 Ok(crate::llm::LlmResponse {
679 hs_code: "BAD!!".to_string(),
680 confidence: 0.5,
681 rationale: "bad".to_string(),
682 alternatives: vec![],
683 })
684 })
685 }
686 }
687 let pipeline = HsPipeline::new().with_llm(BadMock);
688 let product = unknown_organic();
689 let err = pipeline.classify_with_llm(&product).await.unwrap_err();
690 assert!(
691 matches!(err, HsPredictError::ValidationFailed { .. }),
692 "expected ValidationFailed, got {:?}",
693 err
694 );
695 }
696}