hs_predict/types.rs
1use serde::{Deserialize, Serialize};
2
3// ─────────────────────────────────────────────
4// Language
5// ─────────────────────────────────────────────
6
7/// UI language for session question prompts.
8///
9/// Defaults to English. Pass [`Language::Ja`] to
10/// [`ClassificationSession::with_language`](crate::session::ClassificationSession::with_language)
11/// for Japanese prompts.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
13#[serde(rename_all = "lowercase")]
14pub enum Language {
15 /// English (default)
16 #[default]
17 En,
18 /// Japanese (日本語)
19 Ja,
20}
21
22// ─────────────────────────────────────────────
23// Substance identifier
24// ─────────────────────────────────────────────
25
26/// Set of identifiers for a single chemical compound.
27///
28/// Provide at least one field. When multiple fields are set, the pipeline
29/// uses them in priority order: CAS → SMILES → InChIKey → InChI → IUPAC name.
30///
31/// **Important**: `iupac_name` must be an IUPAC systematic name.
32/// Trade names and common aliases (e.g. "caustic soda") are not accepted
33/// because they cannot be reliably resolved in PubChem.
34#[derive(Debug, Clone, Default, Serialize, Deserialize)]
35pub struct SubstanceIdentifier {
36 /// CAS registry number (e.g. `"1310-73-2"`).
37 pub cas: Option<String>,
38 /// Canonical SMILES string (e.g. `"[Na+].[OH-]"`).
39 pub smiles: Option<String>,
40 /// IUPAC systematic name (e.g. `"sodium hydroxide"`).
41 ///
42 /// Do not use trade names or common aliases.
43 pub iupac_name: Option<String>,
44 /// InChI string (e.g. `"InChI=1S/Na.H2O/h;1H/q+1;/p-1"`).
45 pub inchi: Option<String>,
46 /// 27-character InChIKey (e.g. `"HEMHJVSKTPXQMS-UHFFFAOYSA-M"`).
47 pub inchi_key: Option<String>,
48 /// PubChem Compound ID — set automatically after a PubChem lookup.
49 pub cid: Option<u64>,
50}
51
52impl SubstanceIdentifier {
53 pub fn from_cas(cas: impl Into<String>) -> Self {
54 Self { cas: Some(cas.into()), ..Default::default() }
55 }
56
57 pub fn from_smiles(smiles: impl Into<String>) -> Self {
58 Self { smiles: Some(smiles.into()), ..Default::default() }
59 }
60
61 pub fn from_iupac_name(name: impl Into<String>) -> Self {
62 Self { iupac_name: Some(name.into()), ..Default::default() }
63 }
64
65 /// Returns `true` when no identifier field has been set.
66 pub fn is_empty(&self) -> bool {
67 self.cas.is_none()
68 && self.smiles.is_none()
69 && self.iupac_name.is_none()
70 && self.inchi.is_none()
71 && self.inchi_key.is_none()
72 && self.cid.is_none()
73 }
74
75 /// Short display string for logging and error messages.
76 pub fn display_name(&self) -> String {
77 if let Some(ref n) = self.iupac_name {
78 return n.clone();
79 }
80 if let Some(ref cas) = self.cas {
81 return format!("CAS:{}", cas);
82 }
83 if let Some(cid) = self.cid {
84 return format!("CID:{}", cid);
85 }
86 if let Some(ref s) = self.smiles {
87 // Use char-based slicing to avoid panicking on multi-byte UTF-8 sequences.
88 let short: String = s.chars().take(20).collect();
89 return format!("SMILES:{}", short);
90 }
91 "(unknown)".to_string()
92 }
93}
94
95// ─────────────────────────────────────────────
96// Physical form
97// ─────────────────────────────────────────────
98
99/// Physical state / form of the chemical product.
100///
101/// The same compound can have different HS subheadings depending on its form.
102/// For example, sodium hydroxide solid → 2815.11, aqueous solution → 2815.12.
103#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
104#[serde(tag = "type", rename_all = "snake_case")]
105pub enum PhysicalForm {
106 /// Solid bulk material (lumps, pellets, flakes, rods, …).
107 Solid,
108 /// Fine-grained powder.
109 Powder {
110 /// Median particle size in micrometres. `None` if unknown.
111 particle_size_um: Option<f64>,
112 },
113 /// Coarser granulated product.
114 Granules,
115 /// Pure liquid (not a solution).
116 Liquid,
117 /// Solution of the substance in a solvent.
118 Solution {
119 /// Solvent IUPAC name. `None` implies water.
120 solvent: Option<String>,
121 /// Solute concentration in w/w%. `None` if unknown.
122 concentration_pct_ww: Option<f64>,
123 },
124 /// Gas or vapour.
125 Gas,
126 /// Thin metal sheet.
127 Foil {
128 /// Thickness in millimetres. `None` if unknown.
129 thickness_mm: Option<f64>,
130 },
131 /// Cast metal product (ingot, billet, slab, …).
132 Ingot,
133 /// Form not yet determined (initial session value).
134 Unknown,
135}
136
137impl PhysicalForm {
138 /// Returns `true` if this is a solution variant.
139 pub fn is_solution(&self) -> bool {
140 matches!(self, PhysicalForm::Solution { .. })
141 }
142
143 /// Returns the concentration (w/w%) if this is a solution with known concentration.
144 pub fn concentration_pct(&self) -> Option<f64> {
145 if let PhysicalForm::Solution { concentration_pct_ww, .. } = self {
146 *concentration_pct_ww
147 } else {
148 None
149 }
150 }
151}
152
153// ─────────────────────────────────────────────
154// Purity
155// ─────────────────────────────────────────────
156
157#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
158#[serde(tag = "type", rename_all = "snake_case")]
159pub enum PurityType {
160 ReagentGrade,
161 TechnicalGrade,
162 /// Pharmaceutical grade (PhEur / USP / JP, etc.).
163 PharmaceuticalGrade { standard: Option<String> },
164 FoodGrade,
165 ElectronicsGrade,
166 /// Numeric purity value in % (0.0–100.0).
167 Specified(f64),
168}
169
170// ─────────────────────────────────────────────
171// Mixture component
172// ─────────────────────────────────────────────
173
174/// A single component of a mixture product.
175///
176/// Set either `weight_fraction_pct` or `volume_fraction_pct`, not both.
177#[derive(Debug, Clone, Serialize, Deserialize)]
178pub struct MixtureComponent {
179 /// Identifier for this component substance.
180 pub substance: SubstanceIdentifier,
181 /// Weight fraction in w/w%. The sum of all components need not equal 100
182 /// (remaining fraction may be unknown).
183 pub weight_fraction_pct: Option<f64>,
184 /// Volume fraction in v/v%. Mutually exclusive with `weight_fraction_pct`.
185 pub volume_fraction_pct: Option<f64>,
186 /// Marks this component as the solvent (for solution products).
187 pub is_solvent: bool,
188}
189
190// ─────────────────────────────────────────────
191// Product description (pipeline input)
192// ─────────────────────────────────────────────
193
194/// Complete description of a product for HS code classification.
195///
196/// Build this struct via [`ClassificationSession`](crate::session::ClassificationSession)
197/// or fill it directly and pass it to
198/// [`HsPipeline::classify`](crate::pipeline::HsPipeline::classify).
199#[derive(Debug, Clone, Serialize, Deserialize)]
200pub struct ProductDescription {
201 /// Primary identifier (CAS / SMILES / IUPAC name / InChI, etc.).
202 pub identifier: SubstanceIdentifier,
203
204 /// Physical form of the product. `None` means unknown.
205 pub physical_form: Option<PhysicalForm>,
206
207 /// Purity in % (0.0–100.0). `None` means unspecified.
208 pub purity_pct: Option<f64>,
209
210 /// Qualitative purity category.
211 pub purity_type: Option<PurityType>,
212
213 /// Component list for mixture products. `None` means pure substance.
214 pub mixture_components: Option<Vec<MixtureComponent>>,
215
216 /// Intended end-use of the product.
217 pub intended_use: Option<IntendedUse>,
218
219 /// Free-form additional context forwarded to the LLM prompt.
220 pub additional_context: Option<String>,
221}
222
223/// Intended end-use category (influences chapter selection for special cases
224/// such as pharmaceuticals → Ch. 30, fertilisers → Ch. 31).
225#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
226#[serde(rename_all = "snake_case")]
227pub enum IntendedUse {
228 Industrial,
229 Pharmaceutical,
230 Agricultural,
231 Food,
232 Cosmetic,
233 Other(String),
234}
235
236impl ProductDescription {
237 /// Returns `true` if the product has mixture components set.
238 pub fn is_mixture(&self) -> bool {
239 self.mixture_components
240 .as_ref()
241 .map(|v| !v.is_empty())
242 .unwrap_or(false)
243 }
244}
245
246// ─────────────────────────────────────────────
247// Prediction result
248// ─────────────────────────────────────────────
249
250/// HS code prediction result returned by the classification pipeline.
251#[derive(Debug, Clone, Serialize, Deserialize)]
252pub struct HsPrediction {
253 /// Six-digit HS 2022 code without punctuation (e.g. `"281511"`).
254 pub hs_code: String,
255 /// Official HS 2022 heading description for this code.
256 pub heading_description: String,
257 /// Confidence score in the range [0.0, 1.0].
258 pub confidence: f32,
259 /// Which part of the pipeline produced this prediction.
260 pub source: PredictionSource,
261 /// Supplementary notes (shape caveats, concentration notes, etc.).
262 pub notes: Vec<String>,
263 /// Alternative HS codes worth considering.
264 pub alternatives: Vec<AlternativePrediction>,
265 /// Recommended next action for the user.
266 pub recommended_action: RecommendedAction,
267
268 /// Classification boundary risk indicator (v0.5).
269 ///
270 /// `Some` when the prediction falls in a well-known misclassification gray
271 /// zone (e.g. Chapter 29 vs 38 for organic preparations). When present,
272 /// consider requesting an advance ruling from customs authorities (事前教示).
273 pub gray_zone: Option<GrayZone>,
274
275 /// Nine-digit Japan statistical item code (統計品目番号).
276 ///
277 /// Based on Japan Customs 実行関税率表. Updated annually; the year used
278 /// is indicated by the `jp_tariff_year` field.
279 /// `None` when no Japan-specific code is registered for this HS heading.
280 pub jp_tariff_code: Option<String>,
281
282 /// Tariff schedule year used for the `jp_tariff_code` field (e.g. `2026`).
283 pub jp_tariff_year: Option<u16>,
284}
285
286impl HsPrediction {
287 /// Two-digit chapter code (e.g. `"28"`).
288 ///
289 /// Returns `&self.hs_code` unchanged if the code is shorter than 2 ASCII
290 /// digits (which should not happen for valid predictions).
291 pub fn chapter(&self) -> &str {
292 self.hs_code.get(..2).unwrap_or(&self.hs_code)
293 }
294
295 /// Four-digit heading code (e.g. `"2815"`).
296 ///
297 /// Returns `&self.hs_code` unchanged if the code is shorter than 4 ASCII
298 /// digits (which should not happen for valid predictions).
299 pub fn heading(&self) -> &str {
300 self.hs_code.get(..4).unwrap_or(&self.hs_code)
301 }
302
303 /// Dot-separated display string (e.g. `"28.15.11"`).
304 pub fn display(&self) -> String {
305 let c = &self.hs_code;
306 if c.len() == 6 {
307 format!("{}.{}.{}", &c[..2], &c[2..4], &c[4..6])
308 } else {
309 c.clone()
310 }
311 }
312}
313
314#[derive(Debug, Clone, Serialize, Deserialize)]
315pub struct AlternativePrediction {
316 pub hs_code: String,
317 pub confidence: f32,
318 pub reason: String,
319}
320
321/// Which part of the pipeline produced the prediction.
322#[derive(Debug, Clone, Serialize, Deserialize)]
323#[serde(tag = "type", rename_all = "snake_case")]
324pub enum PredictionSource {
325 /// From the user's own CAS → HS mapping (highest trust).
326 UserMapping,
327 /// From the embedded compile-time rule table.
328 EmbeddedRule { rule_id: String },
329 /// From the SMILES-based rule engine (v0.3).
330 RuleEngine { matched_rules: Vec<String> },
331 /// From an LLM API call (v0.4).
332 LlmApi { model: String },
333 /// Combined rule-engine pre-classification + LLM final decision.
334 Hybrid { rule_id: String, model: String },
335}
336
337/// Recommended follow-up action for the customs practitioner.
338#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
339#[serde(rename_all = "snake_case")]
340pub enum RecommendedAction {
341 /// High-confidence result — safe to use in a customs declaration.
342 Accept,
343 /// Moderate-confidence result — recommend LLM or manual review.
344 VerifyWithLlm,
345 /// A formal advance ruling (prior consultation / 事前教示) is recommended.
346 ///
347 /// Applied when a [`GrayZone`] boundary is detected or when mixture
348 /// classification falls back to GRI 3c (last heading by number).
349 /// Contact your local customs authority for a binding ruling before declaration.
350 PriorConsultation,
351 /// Low-confidence result — consult a qualified trade-compliance expert.
352 ExpertReview,
353}
354
355/// Identifies a classification boundary where misclassification risk is elevated.
356///
357/// When present in [`HsPrediction::gray_zone`], consider requesting a formal
358/// advance ruling (事前教示 / binding tariff information) from customs authorities
359/// before making a customs declaration. Misclassification of chemicals can result
360/// in retroactive duty assessments going back up to five years.
361#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
362#[serde(rename_all = "snake_case")]
363pub enum GrayZone {
364 /// Chapter 29 (pure organic chemicals) vs Chapter 38 (prepared/mixed products).
365 ///
366 /// Occurs when an organic compound is part of a formulation, has industrial
367 /// use, or when purity / presentation may shift classification to Ch. 38.
368 /// Multi-function additives and flame-retardant mixtures are common examples.
369 Chapter29vs38,
370
371 /// Chapter 28 (inorganic chemicals) vs Chapter 29 (organic chemicals).
372 ///
373 /// Occurs for organometallic compounds or borderline organic/inorganic cases
374 /// where the presence of metal–carbon bonds determines the correct chapter.
375 Chapter28vs29,
376
377 /// Mixture where essential character (GRI 3b) is ambiguous.
378 ///
379 /// No single component exceeds 50 % w/w, so GRI 3c (last heading by number)
380 /// was applied with low confidence. An expert or advance ruling is strongly
381 /// recommended.
382 MixtureEssentialCharacterUnclear,
383}
384
385// ─────────────────────────────────────────────
386// Organic / inorganic classification
387// ─────────────────────────────────────────────
388
389/// Result of SMILES-based organic / inorganic detection (v0.3).
390#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
391#[serde(rename_all = "snake_case")]
392pub enum OrganicInorganic {
393 Organic,
394 Inorganic,
395 /// Compound with a direct metal–carbon bond.
396 Organometallic,
397 Unknown,
398}