hs_predict/types.rs
1use serde::{Deserialize, Serialize};
2
3// ─────────────────────────────────────────────
4// Language
5// ─────────────────────────────────────────────
6
7/// UI language for session question prompts.
8///
9/// Defaults to English. Pass [`Language::Ja`] to
10/// [`ClassificationSession::with_language`](crate::session::ClassificationSession::with_language)
11/// for Japanese prompts.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
13#[serde(rename_all = "lowercase")]
14pub enum Language {
15 /// English (default)
16 #[default]
17 En,
18 /// Japanese (日本語)
19 Ja,
20}
21
22// ─────────────────────────────────────────────
23// Substance identifier
24// ─────────────────────────────────────────────
25
26/// Set of identifiers for a single chemical compound.
27///
28/// Provide at least one field. When multiple fields are set, the pipeline
29/// uses them in priority order: CAS → SMILES → InChIKey → InChI → IUPAC name.
30///
31/// **Important**: `iupac_name` must be an IUPAC systematic name.
32/// Trade names and common aliases (e.g. "caustic soda") are not accepted
33/// because they cannot be reliably resolved in PubChem.
34#[derive(Debug, Clone, Default, Serialize, Deserialize)]
35pub struct SubstanceIdentifier {
36 /// CAS registry number (e.g. `"1310-73-2"`).
37 pub cas: Option<String>,
38 /// Canonical SMILES string (e.g. `"[Na+].[OH-]"`).
39 pub smiles: Option<String>,
40 /// IUPAC systematic name (e.g. `"sodium hydroxide"`).
41 ///
42 /// Do not use trade names or common aliases.
43 pub iupac_name: Option<String>,
44 /// InChI string (e.g. `"InChI=1S/Na.H2O/h;1H/q+1;/p-1"`).
45 pub inchi: Option<String>,
46 /// 27-character InChIKey (e.g. `"HEMHJVSKTPXQMS-UHFFFAOYSA-M"`).
47 pub inchi_key: Option<String>,
48 /// PubChem Compound ID — set automatically after a PubChem lookup.
49 pub cid: Option<u64>,
50}
51
52impl SubstanceIdentifier {
53 pub fn from_cas(cas: impl Into<String>) -> Self {
54 Self { cas: Some(cas.into()), ..Default::default() }
55 }
56
57 pub fn from_smiles(smiles: impl Into<String>) -> Self {
58 Self { smiles: Some(smiles.into()), ..Default::default() }
59 }
60
61 pub fn from_iupac_name(name: impl Into<String>) -> Self {
62 Self { iupac_name: Some(name.into()), ..Default::default() }
63 }
64
65 /// Returns `true` when no identifier field has been set.
66 pub fn is_empty(&self) -> bool {
67 self.cas.is_none()
68 && self.smiles.is_none()
69 && self.iupac_name.is_none()
70 && self.inchi.is_none()
71 && self.inchi_key.is_none()
72 && self.cid.is_none()
73 }
74
75 /// Short display string for logging and error messages.
76 pub fn display_name(&self) -> String {
77 if let Some(ref n) = self.iupac_name {
78 return n.clone();
79 }
80 if let Some(ref cas) = self.cas {
81 return format!("CAS:{}", cas);
82 }
83 if let Some(cid) = self.cid {
84 return format!("CID:{}", cid);
85 }
86 if let Some(ref s) = self.smiles {
87 let short = if s.len() > 20 { &s[..20] } else { s.as_str() };
88 return format!("SMILES:{}", short);
89 }
90 "(unknown)".to_string()
91 }
92}
93
94// ─────────────────────────────────────────────
95// Physical form
96// ─────────────────────────────────────────────
97
98/// Physical state / form of the chemical product.
99///
100/// The same compound can have different HS subheadings depending on its form.
101/// For example, sodium hydroxide solid → 2815.11, aqueous solution → 2815.12.
102#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
103#[serde(tag = "type", rename_all = "snake_case")]
104pub enum PhysicalForm {
105 /// Solid bulk material (lumps, pellets, flakes, rods, …).
106 Solid,
107 /// Fine-grained powder.
108 Powder {
109 /// Median particle size in micrometres. `None` if unknown.
110 particle_size_um: Option<f64>,
111 },
112 /// Coarser granulated product.
113 Granules,
114 /// Pure liquid (not a solution).
115 Liquid,
116 /// Solution of the substance in a solvent.
117 Solution {
118 /// Solvent IUPAC name. `None` implies water.
119 solvent: Option<String>,
120 /// Solute concentration in w/w%. `None` if unknown.
121 concentration_pct_ww: Option<f64>,
122 },
123 /// Gas or vapour.
124 Gas,
125 /// Thin metal sheet.
126 Foil {
127 /// Thickness in millimetres. `None` if unknown.
128 thickness_mm: Option<f64>,
129 },
130 /// Cast metal product (ingot, billet, slab, …).
131 Ingot,
132 /// Form not yet determined (initial session value).
133 Unknown,
134}
135
136impl PhysicalForm {
137 /// Returns `true` if this is a solution variant.
138 pub fn is_solution(&self) -> bool {
139 matches!(self, PhysicalForm::Solution { .. })
140 }
141
142 /// Returns the concentration (w/w%) if this is a solution with known concentration.
143 pub fn concentration_pct(&self) -> Option<f64> {
144 if let PhysicalForm::Solution { concentration_pct_ww, .. } = self {
145 *concentration_pct_ww
146 } else {
147 None
148 }
149 }
150}
151
152// ─────────────────────────────────────────────
153// Purity
154// ─────────────────────────────────────────────
155
156#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
157#[serde(tag = "type", rename_all = "snake_case")]
158pub enum PurityType {
159 ReagentGrade,
160 TechnicalGrade,
161 /// Pharmaceutical grade (PhEur / USP / JP, etc.).
162 PharmaceuticalGrade { standard: Option<String> },
163 FoodGrade,
164 ElectronicsGrade,
165 /// Numeric purity value in % (0.0–100.0).
166 Specified(f64),
167}
168
169// ─────────────────────────────────────────────
170// Mixture component
171// ─────────────────────────────────────────────
172
173/// A single component of a mixture product.
174///
175/// Set either `weight_fraction_pct` or `volume_fraction_pct`, not both.
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct MixtureComponent {
178 /// Identifier for this component substance.
179 pub substance: SubstanceIdentifier,
180 /// Weight fraction in w/w%. The sum of all components need not equal 100
181 /// (remaining fraction may be unknown).
182 pub weight_fraction_pct: Option<f64>,
183 /// Volume fraction in v/v%. Mutually exclusive with `weight_fraction_pct`.
184 pub volume_fraction_pct: Option<f64>,
185 /// Marks this component as the solvent (for solution products).
186 pub is_solvent: bool,
187}
188
189// ─────────────────────────────────────────────
190// Product description (pipeline input)
191// ─────────────────────────────────────────────
192
193/// Complete description of a product for HS code classification.
194///
195/// Build this struct via [`ClassificationSession`](crate::session::ClassificationSession)
196/// or fill it directly and pass it to
197/// [`HsPipeline::classify`](crate::pipeline::HsPipeline::classify).
198#[derive(Debug, Clone, Serialize, Deserialize)]
199pub struct ProductDescription {
200 /// Primary identifier (CAS / SMILES / IUPAC name / InChI, etc.).
201 pub identifier: SubstanceIdentifier,
202
203 /// Physical form of the product. `None` means unknown.
204 pub physical_form: Option<PhysicalForm>,
205
206 /// Purity in % (0.0–100.0). `None` means unspecified.
207 pub purity_pct: Option<f64>,
208
209 /// Qualitative purity category.
210 pub purity_type: Option<PurityType>,
211
212 /// Component list for mixture products. `None` means pure substance.
213 pub mixture_components: Option<Vec<MixtureComponent>>,
214
215 /// Intended end-use of the product.
216 pub intended_use: Option<IntendedUse>,
217
218 /// Free-form additional context forwarded to the LLM prompt.
219 pub additional_context: Option<String>,
220}
221
222/// Intended end-use category (influences chapter selection for special cases
223/// such as pharmaceuticals → Ch. 30, fertilisers → Ch. 31).
224#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
225#[serde(rename_all = "snake_case")]
226pub enum IntendedUse {
227 Industrial,
228 Pharmaceutical,
229 Agricultural,
230 Food,
231 Cosmetic,
232 Other(String),
233}
234
235impl ProductDescription {
236 /// Returns `true` if the product has mixture components set.
237 pub fn is_mixture(&self) -> bool {
238 self.mixture_components
239 .as_ref()
240 .map(|v| !v.is_empty())
241 .unwrap_or(false)
242 }
243}
244
245// ─────────────────────────────────────────────
246// Prediction result
247// ─────────────────────────────────────────────
248
249/// HS code prediction result returned by the classification pipeline.
250#[derive(Debug, Clone, Serialize, Deserialize)]
251pub struct HsPrediction {
252 /// Six-digit HS 2022 code without punctuation (e.g. `"281511"`).
253 pub hs_code: String,
254 /// Official HS 2022 heading description for this code.
255 pub heading_description: String,
256 /// Confidence score in the range [0.0, 1.0].
257 pub confidence: f32,
258 /// Which part of the pipeline produced this prediction.
259 pub source: PredictionSource,
260 /// Supplementary notes (shape caveats, concentration notes, etc.).
261 pub notes: Vec<String>,
262 /// Alternative HS codes worth considering.
263 pub alternatives: Vec<AlternativePrediction>,
264 /// Recommended next action for the user.
265 pub recommended_action: RecommendedAction,
266
267 /// Nine-digit Japan statistical item code (統計品目番号).
268 ///
269 /// Based on Japan Customs 実行関税率表. Updated annually; the year used
270 /// is indicated by the `jp_tariff_year` field.
271 /// `None` when no Japan-specific code is registered for this HS heading.
272 pub jp_tariff_code: Option<String>,
273
274 /// Tariff schedule year used for the `jp_tariff_code` field (e.g. `2026`).
275 pub jp_tariff_year: Option<u16>,
276}
277
278impl HsPrediction {
279 /// Two-digit chapter code (e.g. `"28"`).
280 pub fn chapter(&self) -> &str {
281 &self.hs_code[..2]
282 }
283
284 /// Four-digit heading code (e.g. `"2815"`).
285 pub fn heading(&self) -> &str {
286 &self.hs_code[..4]
287 }
288
289 /// Dot-separated display string (e.g. `"28.15.11"`).
290 pub fn display(&self) -> String {
291 let c = &self.hs_code;
292 if c.len() == 6 {
293 format!("{}.{}.{}", &c[..2], &c[2..4], &c[4..6])
294 } else {
295 c.clone()
296 }
297 }
298}
299
300#[derive(Debug, Clone, Serialize, Deserialize)]
301pub struct AlternativePrediction {
302 pub hs_code: String,
303 pub confidence: f32,
304 pub reason: String,
305}
306
307/// Which part of the pipeline produced the prediction.
308#[derive(Debug, Clone, Serialize, Deserialize)]
309#[serde(tag = "type", rename_all = "snake_case")]
310pub enum PredictionSource {
311 /// From the user's own CAS → HS mapping (highest trust).
312 UserMapping,
313 /// From the embedded compile-time rule table.
314 EmbeddedRule { rule_id: String },
315 /// From the SMILES-based rule engine (v0.3).
316 RuleEngine { matched_rules: Vec<String> },
317 /// From an LLM API call (v0.4).
318 LlmApi { model: String },
319 /// Combined rule-engine pre-classification + LLM final decision.
320 Hybrid { rule_id: String, model: String },
321}
322
323/// Recommended follow-up action for the customs practitioner.
324#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
325#[serde(rename_all = "snake_case")]
326pub enum RecommendedAction {
327 /// High-confidence result — safe to use in a customs declaration.
328 Accept,
329 /// Moderate-confidence result — recommend LLM or manual review.
330 VerifyWithLlm,
331 /// Low-confidence result — consult a qualified trade-compliance expert.
332 ExpertReview,
333}
334
335// ─────────────────────────────────────────────
336// Organic / inorganic classification
337// ─────────────────────────────────────────────
338
339/// Result of SMILES-based organic / inorganic detection (v0.3).
340#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
341#[serde(rename_all = "snake_case")]
342pub enum OrganicInorganic {
343 Organic,
344 Inorganic,
345 /// Compound with a direct metal–carbon bond.
346 Organometallic,
347 Unknown,
348}