hs_predict/session/state.rs
1use serde::{Deserialize, Serialize};
2use crate::types::{IntendedUse, OrganicInorganic, PhysicalForm, SubstanceIdentifier};
3
4/// Classification state accumulated across session Q&A rounds.
5///
6/// Each field starts as `None` and is filled in as the user answers questions.
7/// The pipeline reads this state (via [`to_product_description`](super::ClassificationSession::to_product_description))
8/// when the session is complete.
9#[derive(Debug, Clone, Default, Serialize, Deserialize)]
10pub struct ClassificationState {
11 // ── Identifier ───────────────────────────────────────────────
12 /// Identifier entered by the user in the first question.
13 pub identifier: SubstanceIdentifier,
14
15 // ── Mixture ──────────────────────────────────────────────────
16 /// Whether the product is a mixture. `None` = not yet answered.
17 pub is_mixture: Option<bool>,
18
19 /// Number of components (set when `is_mixture` = true).
20 pub component_count: Option<usize>,
21
22 /// Components collected so far (built up one at a time).
23 pub components: Vec<PartialComponent>,
24
25 /// Index of the component currently being entered.
26 pub current_component_index: usize,
27
28 // ── Physical form ─────────────────────────────────────────────
29 pub physical_form: Option<PhysicalForm>,
30
31 // ── Purity ────────────────────────────────────────────────────
32 pub purity_pct: Option<f64>,
33
34 // ── Chemistry ─────────────────────────────────────────────────
35 /// Organic / inorganic classification. `None` = not yet answered.
36 pub organic_inorganic: Option<OrganicInorganic>,
37
38 /// HS chapter hint narrowed down by the rule engine (two-digit string, e.g. `"28"`).
39 pub chapter_hint: Option<String>,
40
41 // ── Intended use ──────────────────────────────────────────────
42 pub intended_use: Option<IntendedUse>,
43
44 // ── Functional groups (organic compounds without SMILES) ──────
45 /// Functional group keys selected by the user (e.g. `"carboxylic_acid"`).
46 pub detected_functional_groups: Vec<String>,
47
48 // ── Completion ────────────────────────────────────────────────
49 /// Set to `true` when `next_question()` returns `None`.
50 pub is_complete: bool,
51}
52
53/// Partially-filled mixture component (accumulated during session).
54#[derive(Debug, Clone, Default, Serialize, Deserialize)]
55pub struct PartialComponent {
56 pub identifier: SubstanceIdentifier,
57 /// Weight fraction in w/w%.
58 pub weight_fraction_pct: Option<f64>,
59 pub is_solvent: bool,
60}
61
62impl ClassificationState {
63 /// Returns `true` if at least one identifier field has been set.
64 pub fn has_identifier(&self) -> bool {
65 !self.identifier.is_empty()
66 }
67
68 /// Rough confidence estimate based on how many fields are known.
69 ///
70 /// Used to decide whether to emit `SessionResult::RequiresLlm`
71 /// when all questions have been answered.
72 pub fn confidence_estimate(&self) -> f32 {
73 let mut score: f32 = 0.0;
74
75 // Identifier quality
76 if self.identifier.cas.is_some() {
77 score += 0.40; // CAS is the most reliable identifier
78 } else if self.identifier.smiles.is_some() {
79 score += 0.30;
80 } else if self.identifier.iupac_name.is_some()
81 || self.identifier.inchi.is_some()
82 || self.identifier.inchi_key.is_some()
83 {
84 score += 0.25;
85 }
86
87 // Physical form known
88 if self.physical_form.is_some() {
89 score += 0.15;
90 }
91
92 // Organic / inorganic known
93 if self.organic_inorganic.is_some() {
94 score += 0.15;
95 }
96
97 // HS chapter narrowed down
98 if self.chapter_hint.is_some() {
99 score += 0.15;
100 }
101
102 // Intended use known — pharmaceutical / agricultural constrain the chapter strongly
103 if matches!(
104 self.intended_use,
105 Some(IntendedUse::Pharmaceutical) | Some(IntendedUse::Agricultural)
106 ) {
107 score += 0.10;
108 }
109
110 // All mixture components collected
111 if self.is_mixture == Some(true) {
112 let expected = self.component_count.unwrap_or(0);
113 let filled = self.components.iter().filter(|c| !c.identifier.is_empty()).count();
114 if expected > 0 && filled >= expected {
115 score += 0.05;
116 }
117 }
118
119 score.min(1.0)
120 }
121
122 /// Returns `true` if all expected mixture components have been entered.
123 ///
124 /// Always returns `true` for non-mixture products.
125 pub fn all_components_filled(&self) -> bool {
126 if self.is_mixture != Some(true) {
127 return true;
128 }
129 let expected = self.component_count.unwrap_or(0);
130 if expected == 0 {
131 return false;
132 }
133 self.components.len() >= expected
134 && self.components.iter().all(|c| !c.identifier.is_empty())
135 }
136
137 /// Returns `true` if the current mixture component has an identifier.
138 pub fn current_component_has_identifier(&self) -> bool {
139 self.components
140 .get(self.current_component_index)
141 .map(|c| !c.identifier.is_empty())
142 .unwrap_or(false)
143 }
144
145 /// Returns `true` if the current mixture component has a weight fraction.
146 pub fn current_component_has_fraction(&self) -> bool {
147 self.components
148 .get(self.current_component_index)
149 .map(|c| c.weight_fraction_pct.is_some())
150 .unwrap_or(false)
151 }
152}