Skip to main content

hs_predict/llm/
prompt.rs

1//! Prompt builder for LLM-based HS code classification.
2//!
3//! [`PromptBuilder`] converts a [`ProductDescription`](crate::types::ProductDescription)
4//! into a ready-to-send [`LlmPrompt`](super::LlmPrompt) in English or Japanese.
5
6use crate::types::{Language, PhysicalForm, ProductDescription};
7use super::LlmPrompt;
8
9// ─────────────────────────────────────────────────────────────────────────────
10// PromptBuilder
11// ─────────────────────────────────────────────────────────────────────────────
12
13/// Builds the system and user prompt texts from a [`ProductDescription`].
14///
15/// # Example
16/// ```rust
17/// # #[cfg(feature = "llm")]
18/// # {
19/// use hs_predict::llm::PromptBuilder;
20/// use hs_predict::types::{ProductDescription, SubstanceIdentifier, PhysicalForm, Language};
21///
22/// let product = ProductDescription {
23///     identifier: SubstanceIdentifier::from_cas("64-19-7"),
24///     physical_form: Some(PhysicalForm::Liquid),
25///     purity_pct: Some(99.8),
26///     purity_type: None,
27///     mixture_components: None,
28///     intended_use: None,
29///     additional_context: None,
30/// };
31///
32/// let prompt = PromptBuilder::new().build(&product);
33/// assert!(prompt.system_text.contains("HS 2022"));
34/// assert!(prompt.user_text.contains("64-19-7"));
35/// # }
36/// ```
37#[derive(Debug, Clone, Default)]
38pub struct PromptBuilder {
39    language: Language,
40}
41
42impl PromptBuilder {
43    /// Create a new builder that emits English prompts.
44    pub fn new() -> Self {
45        Self { language: Language::En }
46    }
47
48    /// Set the output language.
49    pub fn with_language(mut self, language: Language) -> Self {
50        self.language = language;
51        self
52    }
53
54    /// Build the [`LlmPrompt`] from the given product description.
55    pub fn build(&self, product: &ProductDescription) -> LlmPrompt {
56        let smiles_analysis = product
57            .identifier
58            .smiles
59            .as_deref()
60            .and_then(crate::smiles::classify_smiles);
61
62        let system_text = match self.language {
63            Language::En => self.system_text_en(),
64            Language::Ja => self.system_text_ja(),
65        };
66
67        let user_text = match self.language {
68            Language::En => self.user_text_en(product, smiles_analysis.as_ref()),
69            Language::Ja => self.user_text_ja(product, smiles_analysis.as_ref()),
70        };
71
72        LlmPrompt {
73            system_text,
74            user_text,
75            smiles_analysis,
76        }
77    }
78
79    // ─── System prompts ───────────────────────────────────────────────
80
81    fn system_text_en(&self) -> String {
82        r#"You are an expert customs classification specialist with deep knowledge of the
83Harmonized System (HS) 2022 nomenclature, particularly Chapters 28 and 29 for
84chemical products.
85
86Your task is to assign a six-digit HS 2022 code to the chemical product described
87in the user message.
88
89## Output format
90
91Respond with **only** a JSON object — no prose, no markdown:
92
93```json
94{
95  "hs_code":    "<6 ASCII digits, no dots>",
96  "confidence": <float 0.0–1.0>,
97  "rationale":  "<1–3 sentences explaining the classification>",
98  "alternatives": [
99    { "hs_code": "<6 digits>", "confidence": <float>, "reason": "<brief>" }
100  ]
101}
102```
103
104`alternatives` may be an empty array `[]`.
105
106## Confidence guide
107
108| Score | Meaning |
109|-------|---------|
110| ≥ 0.90 | Certain of the 6-digit sub-heading |
111| ≥ 0.70 | Certain of the 4-digit heading, sub-heading uncertain |
112| ≥ 0.50 | Chapter correct, heading uncertain |
113| < 0.50 | Significant uncertainty — classify to the most likely heading |
114
115## Rules
116
117- Use HS 2022 edition.
118- If a SMILES-derived heading hint is provided, treat it as a cross-check, not
119  authoritative — rule 1 of HS Explanatory Notes takes precedence over chemical
120  structure alone.
121- Always verify Chapter Notes and Section Notes before finalising.
122- For mixtures, classify by the component that gives the mixture its essential
123  character (GRI 3b) unless a specific mixture heading applies.
124"#.to_string()
125    }
126
127    fn system_text_ja(&self) -> String {
128        r#"あなたは輸出入通関の専門家であり、HS 2022 品目表(特に第28類・第29類の化学品)に
129精通しています。
130
131ユーザーメッセージに記載された化学品に対して、6桁の HS 2022 コードを付与してください。
132
133## 出力形式
134
135**JSON オブジェクトのみ**を返答してください(文章・マークダウン不要):
136
137```json
138{
139  "hs_code":    "<6桁の数字、ドットなし>",
140  "confidence": <0.0〜1.0 の小数>,
141  "rationale":  "<分類根拠を1〜3文で>",
142  "alternatives": [
143    { "hs_code": "<6桁>", "confidence": <小数>, "reason": "<簡潔な理由>" }
144  ]
145}
146```
147
148`alternatives` は空配列 `[]` でも可。
149
150## 信頼度の目安
151
152| スコア | 意味 |
153|--------|------|
154| ≥ 0.90 | 6桁の細分まで確実 |
155| ≥ 0.70 | 4桁の号まで確実、細分は不確実 |
156| ≥ 0.50 | 類は正しいが号が不確実 |
157| < 0.50 | 大きな不確実性あり — 最も可能性の高い号に分類 |
158
159## ルール
160
161- HS 2022年版を使用すること。
162- SMILES由来のヘッディングヒントが提供された場合は参考情報として扱い、
163  HS解説書の通則1を優先すること。
164- 分類確定前に類注および部注を確認すること。
165- 混合物の場合、特定の混合物号がない限り、本質的特性を与える成分で分類(通則3(b))。
166"#.to_string()
167    }
168
169    // ─── User prompts ─────────────────────────────────────────────────
170
171    fn user_text_en(
172        &self,
173        product: &ProductDescription,
174        smiles_analysis: Option<&crate::smiles::SmilesClassification>,
175    ) -> String {
176        let mut parts: Vec<String> = Vec::new();
177
178        parts.push("## Product to classify".to_string());
179        parts.push(String::new());
180
181        // Identifiers
182        let id = &product.identifier;
183        if let Some(ref cas) = id.cas {
184            parts.push(format!("- **CAS**: {}", cas));
185        }
186        if let Some(ref iupac) = id.iupac_name {
187            parts.push(format!("- **IUPAC name**: {}", iupac));
188        }
189        if let Some(ref smiles) = id.smiles {
190            parts.push(format!("- **SMILES**: {}", smiles));
191        }
192        if let Some(ref inchi) = id.inchi {
193            parts.push(format!("- **InChI**: {}", inchi));
194        }
195        if let Some(ref inchikey) = id.inchi_key {
196            parts.push(format!("- **InChIKey**: {}", inchikey));
197        }
198
199        // Physical form
200        if let Some(ref form) = product.physical_form {
201            parts.push(format!("- **Physical form**: {}", physical_form_en(form)));
202        }
203
204        // Purity
205        if let Some(purity) = product.purity_pct {
206            parts.push(format!("- **Purity**: {:.1}%", purity));
207        }
208
209        // Intended use
210        if let Some(ref use_) = product.intended_use {
211            parts.push(format!("- **Intended use**: {:?}", use_));
212        }
213
214        // Mixture components
215        if let Some(ref comps) = product.mixture_components {
216            parts.push("- **Mixture components**:".to_string());
217            for c in comps {
218                let frac = c
219                    .weight_fraction_pct
220                    .map(|f| format!(" ({:.1}% w/w)", f))
221                    .unwrap_or_default();
222                let name = c.substance.cas.as_deref()
223                    .or(c.substance.iupac_name.as_deref())
224                    .unwrap_or("unknown");
225                parts.push(format!("  - {}{}", name, frac));
226            }
227        }
228
229        // Additional context
230        if let Some(ref ctx) = product.additional_context {
231            parts.push(format!("- **Additional context**: {}", ctx));
232        }
233
234        // SMILES analysis hint
235        if let Some(analysis) = smiles_analysis {
236            parts.push(String::new());
237            parts.push("## SMILES pre-analysis hint".to_string());
238            parts.push(String::new());
239            parts.push(format!(
240                "- **Organic class**: {}",
241                format!("{:?}", analysis.organic_class)
242            ));
243            if !analysis.functional_groups.is_empty() {
244                let groups: Vec<&str> = analysis
245                    .functional_groups
246                    .iter()
247                    .map(|g| g.label())
248                    .collect();
249                parts.push(format!("- **Functional groups detected**: {}", groups.join(", ")));
250            }
251            let hint = &analysis.heading_hint;
252            if let Some(heading) = hint.heading {
253                parts.push(format!(
254                    "- **Heading hint**: {}.{:02} ({}, confidence {:.2})",
255                    heading / 100,
256                    heading % 100,
257                    hint.rationale,
258                    hint.confidence
259                ));
260            } else {
261                parts.push(format!(
262                    "- **Chapter hint**: Ch.{:02} (confidence {:.2})",
263                    hint.chapter, hint.confidence
264                ));
265            }
266            parts.push(String::new());
267            parts.push(
268                "_This hint is derived from SMILES pattern matching and is provided for \
269                 cross-checking only. Apply the HS Explanatory Notes authoritatively._"
270                    .to_string(),
271            );
272        }
273
274        parts.join("\n")
275    }
276
277    fn user_text_ja(
278        &self,
279        product: &ProductDescription,
280        smiles_analysis: Option<&crate::smiles::SmilesClassification>,
281    ) -> String {
282        let mut parts: Vec<String> = Vec::new();
283
284        parts.push("## 分類対象品目".to_string());
285        parts.push(String::new());
286
287        let id = &product.identifier;
288        if let Some(ref cas) = id.cas {
289            parts.push(format!("- **CAS番号**: {}", cas));
290        }
291        if let Some(ref iupac) = id.iupac_name {
292            parts.push(format!("- **IUPAC名**: {}", iupac));
293        }
294        if let Some(ref smiles) = id.smiles {
295            parts.push(format!("- **SMILES**: {}", smiles));
296        }
297        if let Some(ref inchi) = id.inchi {
298            parts.push(format!("- **InChI**: {}", inchi));
299        }
300        if let Some(ref inchikey) = id.inchi_key {
301            parts.push(format!("- **InChIKey**: {}", inchikey));
302        }
303
304        if let Some(ref form) = product.physical_form {
305            parts.push(format!("- **物理的形状**: {}", physical_form_ja(form)));
306        }
307
308        if let Some(purity) = product.purity_pct {
309            parts.push(format!("- **純度**: {:.1}%", purity));
310        }
311
312        if let Some(ref use_) = product.intended_use {
313            parts.push(format!("- **用途**: {:?}", use_));
314        }
315
316        if let Some(ref comps) = product.mixture_components {
317            parts.push("- **混合成分**:".to_string());
318            for c in comps {
319                let frac = c
320                    .weight_fraction_pct
321                    .map(|f| format!(" ({:.1}% w/w)", f))
322                    .unwrap_or_default();
323                let name = c.substance.cas.as_deref()
324                    .or(c.substance.iupac_name.as_deref())
325                    .unwrap_or("不明");
326                parts.push(format!("  - {}{}", name, frac));
327            }
328        }
329
330        if let Some(ref ctx) = product.additional_context {
331            parts.push(format!("- **補足情報**: {}", ctx));
332        }
333
334        if let Some(analysis) = smiles_analysis {
335            parts.push(String::new());
336            parts.push("## SMILES 事前解析ヒント".to_string());
337            parts.push(String::new());
338            parts.push(format!(
339                "- **有機/無機区分**: {}",
340                format!("{:?}", analysis.organic_class)
341            ));
342            if !analysis.functional_groups.is_empty() {
343                let groups: Vec<&str> = analysis
344                    .functional_groups
345                    .iter()
346                    .map(|g| g.label())
347                    .collect();
348                parts.push(format!("- **検出官能基**: {}", groups.join("、")));
349            }
350            let hint = &analysis.heading_hint;
351            if let Some(heading) = hint.heading {
352                parts.push(format!(
353                    "- **号ヒント**: {}.{:02}({}、信頼度 {:.2})",
354                    heading / 100,
355                    heading % 100,
356                    hint.rationale,
357                    hint.confidence
358                ));
359            } else {
360                parts.push(format!(
361                    "- **類ヒント**: 第{:02}類(信頼度 {:.2})",
362                    hint.chapter, hint.confidence
363                ));
364            }
365            parts.push(String::new());
366            parts.push(
367                "_このヒントはSMILESパターンマッチングによるもので、参考情報です。\
368                 HS解説書を正式な根拠として適用してください。_"
369                    .to_string(),
370            );
371        }
372
373        parts.join("\n")
374    }
375}
376
377// ─────────────────────────────────────────────────────────────────────────────
378// Helpers
379// ─────────────────────────────────────────────────────────────────────────────
380
381fn physical_form_en(form: &PhysicalForm) -> &'static str {
382    match form {
383        PhysicalForm::Solid => "Solid",
384        PhysicalForm::Powder { .. } => "Powder",
385        PhysicalForm::Granules => "Granules",
386        PhysicalForm::Liquid => "Liquid",
387        PhysicalForm::Solution { .. } => "Solution",
388        PhysicalForm::Gas => "Gas",
389        PhysicalForm::Foil { .. } => "Foil",
390        PhysicalForm::Ingot => "Ingot",
391        PhysicalForm::Unknown => "Unknown",
392    }
393}
394
395fn physical_form_ja(form: &PhysicalForm) -> &'static str {
396    match form {
397        PhysicalForm::Solid => "固体",
398        PhysicalForm::Powder { .. } => "粉末",
399        PhysicalForm::Granules => "顆粒",
400        PhysicalForm::Liquid => "液体",
401        PhysicalForm::Solution { .. } => "溶液",
402        PhysicalForm::Gas => "気体",
403        PhysicalForm::Foil { .. } => "箔",
404        PhysicalForm::Ingot => "インゴット",
405        PhysicalForm::Unknown => "不明",
406    }
407}
408
409// ─────────────────────────────────────────────────────────────────────────────
410// Tests
411// ─────────────────────────────────────────────────────────────────────────────
412
413#[cfg(test)]
414mod tests {
415    use super::*;
416    use crate::types::{ProductDescription, SubstanceIdentifier};
417
418    fn acetic_acid() -> ProductDescription {
419        ProductDescription {
420            identifier: SubstanceIdentifier {
421                cas: Some("64-19-7".to_string()),
422                iupac_name: Some("acetic acid".to_string()),
423                smiles: Some("CC(O)=O".to_string()),
424                inchi: None,
425                inchi_key: None,
426                cid: None,
427            },
428            physical_form: Some(PhysicalForm::Liquid),
429            purity_pct: Some(99.5),
430            purity_type: None,
431            mixture_components: None,
432            intended_use: None,
433            additional_context: None,
434        }
435    }
436
437    #[test]
438    fn en_system_prompt_contains_hs_2022() {
439        let p = PromptBuilder::new().build(&acetic_acid());
440        assert!(p.system_text.contains("HS 2022"));
441    }
442
443    #[test]
444    fn en_user_text_contains_cas() {
445        let p = PromptBuilder::new().build(&acetic_acid());
446        assert!(p.user_text.contains("64-19-7"));
447    }
448
449    #[test]
450    fn en_user_text_contains_purity() {
451        let p = PromptBuilder::new().build(&acetic_acid());
452        assert!(p.user_text.contains("99.5"));
453    }
454
455    #[test]
456    fn en_user_text_contains_smiles_hint() {
457        let p = PromptBuilder::new().build(&acetic_acid());
458        // acetic acid SMILES → carboxylic acid → heading 29.15
459        assert!(p.user_text.contains("Heading hint") || p.user_text.contains("heading hint")
460            || p.user_text.contains("SMILES pre-analysis"));
461    }
462
463    #[test]
464    fn smiles_analysis_populated_when_smiles_present() {
465        let p = PromptBuilder::new().build(&acetic_acid());
466        assert!(p.smiles_analysis.is_some());
467    }
468
469    #[test]
470    fn smiles_analysis_none_when_no_smiles() {
471        let product = ProductDescription {
472            identifier: SubstanceIdentifier::from_cas("64-19-7"),
473            physical_form: None,
474            purity_pct: None,
475            purity_type: None,
476            mixture_components: None,
477            intended_use: None,
478            additional_context: None,
479        };
480        let p = PromptBuilder::new().build(&product);
481        assert!(p.smiles_analysis.is_none());
482    }
483
484    #[test]
485    fn ja_system_prompt_contains_hs_2022_ja() {
486        let p = PromptBuilder::new()
487            .with_language(Language::Ja)
488            .build(&acetic_acid());
489        assert!(p.system_text.contains("HS 2022"));
490    }
491
492    #[test]
493    fn ja_user_text_contains_cas() {
494        let p = PromptBuilder::new()
495            .with_language(Language::Ja)
496            .build(&acetic_acid());
497        assert!(p.user_text.contains("64-19-7"));
498    }
499
500    #[test]
501    fn mixture_components_listed() {
502        use crate::types::MixtureComponent;
503        let product = ProductDescription {
504            identifier: SubstanceIdentifier::from_cas("7732-18-5"),
505            physical_form: Some(PhysicalForm::Solution {
506                concentration_pct_ww: Some(30.0),
507                solvent: None,
508            }),
509            purity_pct: None,
510            purity_type: None,
511            mixture_components: Some(vec![
512                MixtureComponent {
513                    substance: SubstanceIdentifier::from_cas("1310-73-2"),
514                    weight_fraction_pct: Some(30.0),
515                    volume_fraction_pct: None,
516                    is_solvent: false,
517                },
518            ]),
519            intended_use: None,
520            additional_context: None,
521        };
522        let p = PromptBuilder::new().build(&product);
523        assert!(p.user_text.contains("1310-73-2"));
524        assert!(p.user_text.contains("30.0"));
525    }
526}