Skip to main content

hs_predict/session/
state.rs

1use serde::{Deserialize, Serialize};
2use crate::types::{IntendedUse, OrganicInorganic, PhysicalForm, SubstanceIdentifier};
3
4/// Classification state accumulated across session Q&A rounds.
5///
6/// Each field starts as `None` and is filled in as the user answers questions.
7/// The pipeline reads this state (via [`to_product_description`](super::ClassificationSession::to_product_description))
8/// when the session is complete.
9#[derive(Debug, Clone, Default, Serialize, Deserialize)]
10pub struct ClassificationState {
11    // ── Identifier ───────────────────────────────────────────────
12    /// Identifier entered by the user in the first question.
13    pub identifier: SubstanceIdentifier,
14
15    // ── Mixture ──────────────────────────────────────────────────
16    /// Whether the product is a mixture. `None` = not yet answered.
17    pub is_mixture: Option<bool>,
18
19    /// Number of components (set when `is_mixture` = true).
20    pub component_count: Option<usize>,
21
22    /// Components collected so far (built up one at a time).
23    pub components: Vec<PartialComponent>,
24
25    /// Index of the component currently being entered.
26    pub current_component_index: usize,
27
28    // ── Physical form ─────────────────────────────────────────────
29    pub physical_form: Option<PhysicalForm>,
30
31    // ── Purity ────────────────────────────────────────────────────
32    pub purity_pct: Option<f64>,
33
34    // ── Chemistry ─────────────────────────────────────────────────
35    /// Organic / inorganic classification. `None` = not yet answered.
36    pub organic_inorganic: Option<OrganicInorganic>,
37
38    /// HS chapter hint narrowed down by the rule engine (two-digit string, e.g. `"28"`).
39    pub chapter_hint: Option<String>,
40
41    // ── Intended use ──────────────────────────────────────────────
42    pub intended_use: Option<IntendedUse>,
43
44    // ── Functional groups (organic compounds without SMILES) ──────
45    /// Functional group keys selected by the user (e.g. `"carboxylic_acid"`).
46    pub detected_functional_groups: Vec<String>,
47
48    // ── Completion ────────────────────────────────────────────────
49    /// Set to `true` when `next_question()` returns `None`.
50    pub is_complete: bool,
51}
52
53/// Partially-filled mixture component (accumulated during session).
54#[derive(Debug, Clone, Default, Serialize, Deserialize)]
55pub struct PartialComponent {
56    pub identifier: SubstanceIdentifier,
57    /// Weight fraction in w/w%.
58    pub weight_fraction_pct: Option<f64>,
59    pub is_solvent: bool,
60}
61
62impl ClassificationState {
63    /// Returns `true` if at least one identifier field has been set.
64    pub fn has_identifier(&self) -> bool {
65        !self.identifier.is_empty()
66    }
67
68    /// Rough confidence estimate based on how many fields are known.
69    ///
70    /// Used to decide whether to emit `SessionResult::RequiresLlm`
71    /// when all questions have been answered.
72    pub fn confidence_estimate(&self) -> f32 {
73        let mut score: f32 = 0.0;
74
75        // Identifier quality
76        if self.identifier.cas.is_some() {
77            score += 0.40; // CAS is the most reliable identifier
78        } else if self.identifier.smiles.is_some() {
79            score += 0.30;
80        } else if self.identifier.iupac_name.is_some()
81            || self.identifier.inchi.is_some()
82            || self.identifier.inchi_key.is_some()
83        {
84            score += 0.25;
85        }
86
87        // Physical form known
88        if self.physical_form.is_some() {
89            score += 0.15;
90        }
91
92        // Organic / inorganic known
93        if self.organic_inorganic.is_some() {
94            score += 0.15;
95        }
96
97        // HS chapter narrowed down
98        if self.chapter_hint.is_some() {
99            score += 0.15;
100        }
101
102        // Intended use known — pharmaceutical / agricultural constrain the chapter strongly
103        if matches!(
104            self.intended_use,
105            Some(IntendedUse::Pharmaceutical) | Some(IntendedUse::Agricultural)
106        ) {
107            score += 0.10;
108        }
109
110        // All mixture components collected
111        if self.is_mixture == Some(true) {
112            let expected = self.component_count.unwrap_or(0);
113            let filled = self.components.iter().filter(|c| !c.identifier.is_empty()).count();
114            if expected > 0 && filled >= expected {
115                score += 0.05;
116            }
117        }
118
119        score.min(1.0)
120    }
121
122    /// Returns `true` if all expected mixture components have been entered.
123    ///
124    /// Always returns `true` for non-mixture products.
125    pub fn all_components_filled(&self) -> bool {
126        if self.is_mixture != Some(true) {
127            return true;
128        }
129        let expected = self.component_count.unwrap_or(0);
130        if expected == 0 {
131            return false;
132        }
133        self.components.len() >= expected
134            && self.components.iter().all(|c| !c.identifier.is_empty())
135    }
136
137    /// Returns `true` if the current mixture component has an identifier.
138    pub fn current_component_has_identifier(&self) -> bool {
139        self.components
140            .get(self.current_component_index)
141            .map(|c| !c.identifier.is_empty())
142            .unwrap_or(false)
143    }
144
145    /// Returns `true` if the current mixture component has a weight fraction.
146    pub fn current_component_has_fraction(&self) -> bool {
147        self.components
148            .get(self.current_component_index)
149            .map(|c| c.weight_fraction_pct.is_some())
150            .unwrap_or(false)
151    }
152}