Skip to main content

hs_predict/
types.rs

1use serde::{Deserialize, Serialize};
2
3// ─────────────────────────────────────────────
4// Language
5// ─────────────────────────────────────────────
6
7/// UI language for session question prompts.
8///
9/// Defaults to English. Pass [`Language::Ja`] to
10/// [`ClassificationSession::with_language`](crate::session::ClassificationSession::with_language)
11/// for Japanese prompts.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
13#[serde(rename_all = "lowercase")]
14pub enum Language {
15    /// English (default)
16    #[default]
17    En,
18    /// Japanese (日本語)
19    Ja,
20}
21
22// ─────────────────────────────────────────────
23// Substance identifier
24// ─────────────────────────────────────────────
25
26/// Set of identifiers for a single chemical compound.
27///
28/// Provide at least one field. When multiple fields are set, the pipeline
29/// uses them in priority order: CAS → SMILES → InChIKey → InChI → IUPAC name.
30///
31/// **Important**: `iupac_name` must be an IUPAC systematic name.
32/// Trade names and common aliases (e.g. "caustic soda") are not accepted
33/// because they cannot be reliably resolved in PubChem.
34#[derive(Debug, Clone, Default, Serialize, Deserialize)]
35pub struct SubstanceIdentifier {
36    /// CAS registry number (e.g. `"1310-73-2"`).
37    pub cas: Option<String>,
38    /// Canonical SMILES string (e.g. `"[Na+].[OH-]"`).
39    pub smiles: Option<String>,
40    /// IUPAC systematic name (e.g. `"sodium hydroxide"`).
41    ///
42    /// Do not use trade names or common aliases.
43    pub iupac_name: Option<String>,
44    /// InChI string (e.g. `"InChI=1S/Na.H2O/h;1H/q+1;/p-1"`).
45    pub inchi: Option<String>,
46    /// 27-character InChIKey (e.g. `"HEMHJVSKTPXQMS-UHFFFAOYSA-M"`).
47    pub inchi_key: Option<String>,
48    /// PubChem Compound ID — set automatically after a PubChem lookup.
49    pub cid: Option<u64>,
50}
51
52impl SubstanceIdentifier {
53    pub fn from_cas(cas: impl Into<String>) -> Self {
54        Self { cas: Some(cas.into()), ..Default::default() }
55    }
56
57    pub fn from_smiles(smiles: impl Into<String>) -> Self {
58        Self { smiles: Some(smiles.into()), ..Default::default() }
59    }
60
61    pub fn from_iupac_name(name: impl Into<String>) -> Self {
62        Self { iupac_name: Some(name.into()), ..Default::default() }
63    }
64
65    /// Returns `true` when no identifier field has been set.
66    pub fn is_empty(&self) -> bool {
67        self.cas.is_none()
68            && self.smiles.is_none()
69            && self.iupac_name.is_none()
70            && self.inchi.is_none()
71            && self.inchi_key.is_none()
72            && self.cid.is_none()
73    }
74
75    /// Short display string for logging and error messages.
76    pub fn display_name(&self) -> String {
77        if let Some(ref n) = self.iupac_name {
78            return n.clone();
79        }
80        if let Some(ref cas) = self.cas {
81            return format!("CAS:{}", cas);
82        }
83        if let Some(cid) = self.cid {
84            return format!("CID:{}", cid);
85        }
86        if let Some(ref s) = self.smiles {
87            let short = if s.len() > 20 { &s[..20] } else { s.as_str() };
88            return format!("SMILES:{}", short);
89        }
90        "(unknown)".to_string()
91    }
92}
93
94// ─────────────────────────────────────────────
95// Physical form
96// ─────────────────────────────────────────────
97
98/// Physical state / form of the chemical product.
99///
100/// The same compound can have different HS subheadings depending on its form.
101/// For example, sodium hydroxide solid → 2815.11, aqueous solution → 2815.12.
102#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
103#[serde(tag = "type", rename_all = "snake_case")]
104pub enum PhysicalForm {
105    /// Solid bulk material (lumps, pellets, flakes, rods, …).
106    Solid,
107    /// Fine-grained powder.
108    Powder {
109        /// Median particle size in micrometres. `None` if unknown.
110        particle_size_um: Option<f64>,
111    },
112    /// Coarser granulated product.
113    Granules,
114    /// Pure liquid (not a solution).
115    Liquid,
116    /// Solution of the substance in a solvent.
117    Solution {
118        /// Solvent IUPAC name. `None` implies water.
119        solvent: Option<String>,
120        /// Solute concentration in w/w%. `None` if unknown.
121        concentration_pct_ww: Option<f64>,
122    },
123    /// Gas or vapour.
124    Gas,
125    /// Thin metal sheet.
126    Foil {
127        /// Thickness in millimetres. `None` if unknown.
128        thickness_mm: Option<f64>,
129    },
130    /// Cast metal product (ingot, billet, slab, …).
131    Ingot,
132    /// Form not yet determined (initial session value).
133    Unknown,
134}
135
136impl PhysicalForm {
137    /// Returns `true` if this is a solution variant.
138    pub fn is_solution(&self) -> bool {
139        matches!(self, PhysicalForm::Solution { .. })
140    }
141
142    /// Returns the concentration (w/w%) if this is a solution with known concentration.
143    pub fn concentration_pct(&self) -> Option<f64> {
144        if let PhysicalForm::Solution { concentration_pct_ww, .. } = self {
145            *concentration_pct_ww
146        } else {
147            None
148        }
149    }
150}
151
152// ─────────────────────────────────────────────
153// Purity
154// ─────────────────────────────────────────────
155
156#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
157#[serde(tag = "type", rename_all = "snake_case")]
158pub enum PurityType {
159    ReagentGrade,
160    TechnicalGrade,
161    /// Pharmaceutical grade (PhEur / USP / JP, etc.).
162    PharmaceuticalGrade { standard: Option<String> },
163    FoodGrade,
164    ElectronicsGrade,
165    /// Numeric purity value in % (0.0–100.0).
166    Specified(f64),
167}
168
169// ─────────────────────────────────────────────
170// Mixture component
171// ─────────────────────────────────────────────
172
173/// A single component of a mixture product.
174///
175/// Set either `weight_fraction_pct` or `volume_fraction_pct`, not both.
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct MixtureComponent {
178    /// Identifier for this component substance.
179    pub substance: SubstanceIdentifier,
180    /// Weight fraction in w/w%. The sum of all components need not equal 100
181    /// (remaining fraction may be unknown).
182    pub weight_fraction_pct: Option<f64>,
183    /// Volume fraction in v/v%. Mutually exclusive with `weight_fraction_pct`.
184    pub volume_fraction_pct: Option<f64>,
185    /// Marks this component as the solvent (for solution products).
186    pub is_solvent: bool,
187}
188
189// ─────────────────────────────────────────────
190// Product description (pipeline input)
191// ─────────────────────────────────────────────
192
193/// Complete description of a product for HS code classification.
194///
195/// Build this struct via [`ClassificationSession`](crate::session::ClassificationSession)
196/// or fill it directly and pass it to
197/// [`HsPipeline::classify`](crate::pipeline::HsPipeline::classify).
198#[derive(Debug, Clone, Serialize, Deserialize)]
199pub struct ProductDescription {
200    /// Primary identifier (CAS / SMILES / IUPAC name / InChI, etc.).
201    pub identifier: SubstanceIdentifier,
202
203    /// Physical form of the product. `None` means unknown.
204    pub physical_form: Option<PhysicalForm>,
205
206    /// Purity in % (0.0–100.0). `None` means unspecified.
207    pub purity_pct: Option<f64>,
208
209    /// Qualitative purity category.
210    pub purity_type: Option<PurityType>,
211
212    /// Component list for mixture products. `None` means pure substance.
213    pub mixture_components: Option<Vec<MixtureComponent>>,
214
215    /// Intended end-use of the product.
216    pub intended_use: Option<IntendedUse>,
217
218    /// Free-form additional context forwarded to the LLM prompt.
219    pub additional_context: Option<String>,
220}
221
222/// Intended end-use category (influences chapter selection for special cases
223/// such as pharmaceuticals → Ch. 30, fertilisers → Ch. 31).
224#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
225#[serde(rename_all = "snake_case")]
226pub enum IntendedUse {
227    Industrial,
228    Pharmaceutical,
229    Agricultural,
230    Food,
231    Cosmetic,
232    Other(String),
233}
234
235impl ProductDescription {
236    /// Returns `true` if the product has mixture components set.
237    pub fn is_mixture(&self) -> bool {
238        self.mixture_components
239            .as_ref()
240            .map(|v| !v.is_empty())
241            .unwrap_or(false)
242    }
243}
244
245// ─────────────────────────────────────────────
246// Prediction result
247// ─────────────────────────────────────────────
248
249/// HS code prediction result returned by the classification pipeline.
250#[derive(Debug, Clone, Serialize, Deserialize)]
251pub struct HsPrediction {
252    /// Six-digit HS 2022 code without punctuation (e.g. `"281511"`).
253    pub hs_code: String,
254    /// Official HS 2022 heading description for this code.
255    pub heading_description: String,
256    /// Confidence score in the range [0.0, 1.0].
257    pub confidence: f32,
258    /// Which part of the pipeline produced this prediction.
259    pub source: PredictionSource,
260    /// Supplementary notes (shape caveats, concentration notes, etc.).
261    pub notes: Vec<String>,
262    /// Alternative HS codes worth considering.
263    pub alternatives: Vec<AlternativePrediction>,
264    /// Recommended next action for the user.
265    pub recommended_action: RecommendedAction,
266
267    /// Nine-digit Japan statistical item code (統計品目番号).
268    ///
269    /// Based on Japan Customs 実行関税率表. Updated annually; the year used
270    /// is indicated by the `jp_tariff_year` field.
271    /// `None` when no Japan-specific code is registered for this HS heading.
272    pub jp_tariff_code: Option<String>,
273
274    /// Tariff schedule year used for the `jp_tariff_code` field (e.g. `2026`).
275    pub jp_tariff_year: Option<u16>,
276}
277
278impl HsPrediction {
279    /// Two-digit chapter code (e.g. `"28"`).
280    pub fn chapter(&self) -> &str {
281        &self.hs_code[..2]
282    }
283
284    /// Four-digit heading code (e.g. `"2815"`).
285    pub fn heading(&self) -> &str {
286        &self.hs_code[..4]
287    }
288
289    /// Dot-separated display string (e.g. `"28.15.11"`).
290    pub fn display(&self) -> String {
291        let c = &self.hs_code;
292        if c.len() == 6 {
293            format!("{}.{}.{}", &c[..2], &c[2..4], &c[4..6])
294        } else {
295            c.clone()
296        }
297    }
298}
299
300#[derive(Debug, Clone, Serialize, Deserialize)]
301pub struct AlternativePrediction {
302    pub hs_code: String,
303    pub confidence: f32,
304    pub reason: String,
305}
306
307/// Which part of the pipeline produced the prediction.
308#[derive(Debug, Clone, Serialize, Deserialize)]
309#[serde(tag = "type", rename_all = "snake_case")]
310pub enum PredictionSource {
311    /// From the user's own CAS → HS mapping (highest trust).
312    UserMapping,
313    /// From the embedded compile-time rule table.
314    EmbeddedRule { rule_id: String },
315    /// From the SMILES-based rule engine (v0.3).
316    RuleEngine { matched_rules: Vec<String> },
317    /// From an LLM API call (v0.4).
318    LlmApi { model: String },
319    /// Combined rule-engine pre-classification + LLM final decision.
320    Hybrid { rule_id: String, model: String },
321}
322
323/// Recommended follow-up action for the customs practitioner.
324#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
325#[serde(rename_all = "snake_case")]
326pub enum RecommendedAction {
327    /// High-confidence result — safe to use in a customs declaration.
328    Accept,
329    /// Moderate-confidence result — recommend LLM or manual review.
330    VerifyWithLlm,
331    /// Low-confidence result — consult a qualified trade-compliance expert.
332    ExpertReview,
333}
334
335// ─────────────────────────────────────────────
336// Organic / inorganic classification
337// ─────────────────────────────────────────────
338
339/// Result of SMILES-based organic / inorganic detection (v0.3).
340#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
341#[serde(rename_all = "snake_case")]
342pub enum OrganicInorganic {
343    Organic,
344    Inorganic,
345    /// Compound with a direct metal–carbon bond.
346    Organometallic,
347    Unknown,
348}