Skip to main content

hs_predict/
types.rs

1use serde::{Deserialize, Serialize};
2
3// ─────────────────────────────────────────────
4// Language
5// ─────────────────────────────────────────────
6
7/// UI language for session question prompts.
8///
9/// Defaults to English. Pass [`Language::Ja`] to
10/// [`ClassificationSession::with_language`](crate::session::ClassificationSession::with_language)
11/// for Japanese prompts.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
13#[serde(rename_all = "lowercase")]
14pub enum Language {
15    /// English (default)
16    #[default]
17    En,
18    /// Japanese (日本語)
19    Ja,
20}
21
22// ─────────────────────────────────────────────
23// Substance identifier
24// ─────────────────────────────────────────────
25
26/// Set of identifiers for a single chemical compound.
27///
28/// Provide at least one field. When multiple fields are set, the pipeline
29/// uses them in priority order: CAS → SMILES → InChIKey → InChI → IUPAC name.
30///
31/// **Important**: `iupac_name` must be an IUPAC systematic name.
32/// Trade names and common aliases (e.g. "caustic soda") are not accepted
33/// because they cannot be reliably resolved in PubChem.
34#[derive(Debug, Clone, Default, Serialize, Deserialize)]
35pub struct SubstanceIdentifier {
36    /// CAS registry number (e.g. `"1310-73-2"`).
37    pub cas: Option<String>,
38    /// Canonical SMILES string (e.g. `"[Na+].[OH-]"`).
39    pub smiles: Option<String>,
40    /// IUPAC systematic name (e.g. `"sodium hydroxide"`).
41    ///
42    /// Do not use trade names or common aliases.
43    pub iupac_name: Option<String>,
44    /// InChI string (e.g. `"InChI=1S/Na.H2O/h;1H/q+1;/p-1"`).
45    pub inchi: Option<String>,
46    /// 27-character InChIKey (e.g. `"HEMHJVSKTPXQMS-UHFFFAOYSA-M"`).
47    pub inchi_key: Option<String>,
48    /// PubChem Compound ID — set automatically after a PubChem lookup.
49    pub cid: Option<u64>,
50}
51
52impl SubstanceIdentifier {
53    pub fn from_cas(cas: impl Into<String>) -> Self {
54        Self { cas: Some(cas.into()), ..Default::default() }
55    }
56
57    pub fn from_smiles(smiles: impl Into<String>) -> Self {
58        Self { smiles: Some(smiles.into()), ..Default::default() }
59    }
60
61    pub fn from_iupac_name(name: impl Into<String>) -> Self {
62        Self { iupac_name: Some(name.into()), ..Default::default() }
63    }
64
65    /// Returns `true` when no identifier field has been set.
66    pub fn is_empty(&self) -> bool {
67        self.cas.is_none()
68            && self.smiles.is_none()
69            && self.iupac_name.is_none()
70            && self.inchi.is_none()
71            && self.inchi_key.is_none()
72            && self.cid.is_none()
73    }
74
75    /// Short display string for logging and error messages.
76    pub fn display_name(&self) -> String {
77        if let Some(ref n) = self.iupac_name {
78            return n.clone();
79        }
80        if let Some(ref cas) = self.cas {
81            return format!("CAS:{}", cas);
82        }
83        if let Some(cid) = self.cid {
84            return format!("CID:{}", cid);
85        }
86        if let Some(ref s) = self.smiles {
87            // Use char-based slicing to avoid panicking on multi-byte UTF-8 sequences.
88            let short: String = s.chars().take(20).collect();
89            return format!("SMILES:{}", short);
90        }
91        "(unknown)".to_string()
92    }
93}
94
95// ─────────────────────────────────────────────
96// Physical form
97// ─────────────────────────────────────────────
98
99/// Physical state / form of the chemical product.
100///
101/// The same compound can have different HS subheadings depending on its form.
102/// For example, sodium hydroxide solid → 2815.11, aqueous solution → 2815.12.
103#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
104#[serde(tag = "type", rename_all = "snake_case")]
105pub enum PhysicalForm {
106    /// Solid bulk material (lumps, pellets, flakes, rods, …).
107    Solid,
108    /// Fine-grained powder.
109    Powder {
110        /// Median particle size in micrometres. `None` if unknown.
111        particle_size_um: Option<f64>,
112    },
113    /// Coarser granulated product.
114    Granules,
115    /// Pure liquid (not a solution).
116    Liquid,
117    /// Solution of the substance in a solvent.
118    Solution {
119        /// Solvent IUPAC name. `None` implies water.
120        solvent: Option<String>,
121        /// Solute concentration in w/w%. `None` if unknown.
122        concentration_pct_ww: Option<f64>,
123    },
124    /// Gas or vapour.
125    Gas,
126    /// Thin metal sheet.
127    Foil {
128        /// Thickness in millimetres. `None` if unknown.
129        thickness_mm: Option<f64>,
130    },
131    /// Cast metal product (ingot, billet, slab, …).
132    Ingot,
133    /// Form not yet determined (initial session value).
134    Unknown,
135}
136
137impl PhysicalForm {
138    /// Returns `true` if this is a solution variant.
139    pub fn is_solution(&self) -> bool {
140        matches!(self, PhysicalForm::Solution { .. })
141    }
142
143    /// Returns the concentration (w/w%) if this is a solution with known concentration.
144    pub fn concentration_pct(&self) -> Option<f64> {
145        if let PhysicalForm::Solution { concentration_pct_ww, .. } = self {
146            *concentration_pct_ww
147        } else {
148            None
149        }
150    }
151}
152
153// ─────────────────────────────────────────────
154// Purity
155// ─────────────────────────────────────────────
156
157#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
158#[serde(tag = "type", rename_all = "snake_case")]
159pub enum PurityType {
160    ReagentGrade,
161    TechnicalGrade,
162    /// Pharmaceutical grade (PhEur / USP / JP, etc.).
163    PharmaceuticalGrade { standard: Option<String> },
164    FoodGrade,
165    ElectronicsGrade,
166    /// Numeric purity value in % (0.0–100.0).
167    Specified(f64),
168}
169
170// ─────────────────────────────────────────────
171// Mixture component
172// ─────────────────────────────────────────────
173
174/// A single component of a mixture product.
175///
176/// Set either `weight_fraction_pct` or `volume_fraction_pct`, not both.
177#[derive(Debug, Clone, Serialize, Deserialize)]
178pub struct MixtureComponent {
179    /// Identifier for this component substance.
180    pub substance: SubstanceIdentifier,
181    /// Weight fraction in w/w%. The sum of all components need not equal 100
182    /// (remaining fraction may be unknown).
183    pub weight_fraction_pct: Option<f64>,
184    /// Volume fraction in v/v%. Mutually exclusive with `weight_fraction_pct`.
185    pub volume_fraction_pct: Option<f64>,
186    /// Marks this component as the solvent (for solution products).
187    pub is_solvent: bool,
188}
189
190// ─────────────────────────────────────────────
191// Product description (pipeline input)
192// ─────────────────────────────────────────────
193
194/// Complete description of a product for HS code classification.
195///
196/// Build this struct via [`ClassificationSession`](crate::session::ClassificationSession)
197/// or fill it directly and pass it to
198/// [`HsPipeline::classify`](crate::pipeline::HsPipeline::classify).
199#[derive(Debug, Clone, Serialize, Deserialize)]
200pub struct ProductDescription {
201    /// Primary identifier (CAS / SMILES / IUPAC name / InChI, etc.).
202    pub identifier: SubstanceIdentifier,
203
204    /// Physical form of the product. `None` means unknown.
205    pub physical_form: Option<PhysicalForm>,
206
207    /// Purity in % (0.0–100.0). `None` means unspecified.
208    pub purity_pct: Option<f64>,
209
210    /// Qualitative purity category.
211    pub purity_type: Option<PurityType>,
212
213    /// Component list for mixture products. `None` means pure substance.
214    pub mixture_components: Option<Vec<MixtureComponent>>,
215
216    /// Intended end-use of the product.
217    pub intended_use: Option<IntendedUse>,
218
219    /// Free-form additional context forwarded to the LLM prompt.
220    pub additional_context: Option<String>,
221}
222
223/// Intended end-use category (influences chapter selection for special cases
224/// such as pharmaceuticals → Ch. 30, fertilisers → Ch. 31).
225#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
226#[serde(rename_all = "snake_case")]
227pub enum IntendedUse {
228    Industrial,
229    Pharmaceutical,
230    Agricultural,
231    Food,
232    Cosmetic,
233    Other(String),
234}
235
236impl ProductDescription {
237    /// Returns `true` if the product has mixture components set.
238    pub fn is_mixture(&self) -> bool {
239        self.mixture_components
240            .as_ref()
241            .map(|v| !v.is_empty())
242            .unwrap_or(false)
243    }
244}
245
246// ─────────────────────────────────────────────
247// Prediction result
248// ─────────────────────────────────────────────
249
250/// HS code prediction result returned by the classification pipeline.
251#[derive(Debug, Clone, Serialize, Deserialize)]
252pub struct HsPrediction {
253    /// Six-digit HS 2022 code without punctuation (e.g. `"281511"`).
254    pub hs_code: String,
255    /// Official HS 2022 heading description for this code.
256    pub heading_description: String,
257    /// Confidence score in the range [0.0, 1.0].
258    pub confidence: f32,
259    /// Which part of the pipeline produced this prediction.
260    pub source: PredictionSource,
261    /// Supplementary notes (shape caveats, concentration notes, etc.).
262    pub notes: Vec<String>,
263    /// Alternative HS codes worth considering.
264    pub alternatives: Vec<AlternativePrediction>,
265    /// Recommended next action for the user.
266    pub recommended_action: RecommendedAction,
267
268    /// Classification boundary risk indicator (v0.5).
269    ///
270    /// `Some` when the prediction falls in a well-known misclassification gray
271    /// zone (e.g. Chapter 29 vs 38 for organic preparations).  When present,
272    /// consider requesting an advance ruling from customs authorities (事前教示).
273    pub gray_zone: Option<GrayZone>,
274
275    /// Nine-digit Japan statistical item code (統計品目番号).
276    ///
277    /// Based on Japan Customs 実行関税率表. Updated annually; the year used
278    /// is indicated by the `jp_tariff_year` field.
279    /// `None` when no Japan-specific code is registered for this HS heading.
280    pub jp_tariff_code: Option<String>,
281
282    /// Tariff schedule year used for the `jp_tariff_code` field (e.g. `2026`).
283    pub jp_tariff_year: Option<u16>,
284}
285
286impl HsPrediction {
287    /// Two-digit chapter code (e.g. `"28"`).
288    ///
289    /// Returns `&self.hs_code` unchanged if the code is shorter than 2 ASCII
290    /// digits (which should not happen for valid predictions).
291    pub fn chapter(&self) -> &str {
292        self.hs_code.get(..2).unwrap_or(&self.hs_code)
293    }
294
295    /// Four-digit heading code (e.g. `"2815"`).
296    ///
297    /// Returns `&self.hs_code` unchanged if the code is shorter than 4 ASCII
298    /// digits (which should not happen for valid predictions).
299    pub fn heading(&self) -> &str {
300        self.hs_code.get(..4).unwrap_or(&self.hs_code)
301    }
302
303    /// Dot-separated display string (e.g. `"28.15.11"`).
304    pub fn display(&self) -> String {
305        let c = &self.hs_code;
306        if c.len() == 6 {
307            format!("{}.{}.{}", &c[..2], &c[2..4], &c[4..6])
308        } else {
309            c.clone()
310        }
311    }
312}
313
314#[derive(Debug, Clone, Serialize, Deserialize)]
315pub struct AlternativePrediction {
316    pub hs_code: String,
317    pub confidence: f32,
318    pub reason: String,
319}
320
321/// Which part of the pipeline produced the prediction.
322#[derive(Debug, Clone, Serialize, Deserialize)]
323#[serde(tag = "type", rename_all = "snake_case")]
324pub enum PredictionSource {
325    /// From the user's own CAS → HS mapping (highest trust).
326    UserMapping,
327    /// From the embedded compile-time rule table.
328    EmbeddedRule { rule_id: String },
329    /// From the SMILES-based rule engine (v0.3).
330    RuleEngine { matched_rules: Vec<String> },
331    /// From an LLM API call (v0.4).
332    LlmApi { model: String },
333    /// Combined rule-engine pre-classification + LLM final decision.
334    Hybrid { rule_id: String, model: String },
335}
336
337/// Recommended follow-up action for the customs practitioner.
338#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
339#[serde(rename_all = "snake_case")]
340pub enum RecommendedAction {
341    /// High-confidence result — safe to use in a customs declaration.
342    Accept,
343    /// Moderate-confidence result — recommend LLM or manual review.
344    VerifyWithLlm,
345    /// A formal advance ruling (prior consultation / 事前教示) is recommended.
346    ///
347    /// Applied when a [`GrayZone`] boundary is detected or when mixture
348    /// classification falls back to GRI 3c (last heading by number).
349    /// Contact your local customs authority for a binding ruling before declaration.
350    PriorConsultation,
351    /// Low-confidence result — consult a qualified trade-compliance expert.
352    ExpertReview,
353}
354
355/// Identifies a classification boundary where misclassification risk is elevated.
356///
357/// When present in [`HsPrediction::gray_zone`], consider requesting a formal
358/// advance ruling (事前教示 / binding tariff information) from customs authorities
359/// before making a customs declaration. Misclassification of chemicals can result
360/// in retroactive duty assessments going back up to five years.
361#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
362#[serde(rename_all = "snake_case")]
363pub enum GrayZone {
364    /// Chapter 29 (pure organic chemicals) vs Chapter 38 (prepared/mixed products).
365    ///
366    /// Occurs when an organic compound is part of a formulation, has industrial
367    /// use, or when purity / presentation may shift classification to Ch. 38.
368    /// Multi-function additives and flame-retardant mixtures are common examples.
369    Chapter29vs38,
370
371    /// Chapter 28 (inorganic chemicals) vs Chapter 29 (organic chemicals).
372    ///
373    /// Occurs for organometallic compounds or borderline organic/inorganic cases
374    /// where the presence of metal–carbon bonds determines the correct chapter.
375    Chapter28vs29,
376
377    /// Mixture where essential character (GRI 3b) is ambiguous.
378    ///
379    /// No single component exceeds 50 % w/w, so GRI 3c (last heading by number)
380    /// was applied with low confidence. An expert or advance ruling is strongly
381    /// recommended.
382    MixtureEssentialCharacterUnclear,
383}
384
385// ─────────────────────────────────────────────
386// Organic / inorganic classification
387// ─────────────────────────────────────────────
388
389/// Result of SMILES-based organic / inorganic detection (v0.3).
390#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
391#[serde(rename_all = "snake_case")]
392pub enum OrganicInorganic {
393    Organic,
394    Inorganic,
395    /// Compound with a direct metal–carbon bond.
396    Organometallic,
397    Unknown,
398}