1use crate::types::{Language, PhysicalForm, ProductDescription};
7use super::LlmPrompt;
8
9#[derive(Debug, Clone, Default)]
38pub struct PromptBuilder {
39 language: Language,
40}
41
42impl PromptBuilder {
43 pub fn new() -> Self {
45 Self { language: Language::En }
46 }
47
48 pub fn with_language(mut self, language: Language) -> Self {
50 self.language = language;
51 self
52 }
53
54 pub fn build(&self, product: &ProductDescription) -> LlmPrompt {
56 let smiles_analysis = product
57 .identifier
58 .smiles
59 .as_deref()
60 .and_then(crate::smiles::classify_smiles);
61
62 let system_text = match self.language {
63 Language::En => self.system_text_en(),
64 Language::Ja => self.system_text_ja(),
65 };
66
67 let user_text = match self.language {
68 Language::En => self.user_text_en(product, smiles_analysis.as_ref()),
69 Language::Ja => self.user_text_ja(product, smiles_analysis.as_ref()),
70 };
71
72 LlmPrompt {
73 system_text,
74 user_text,
75 smiles_analysis,
76 }
77 }
78
79 fn system_text_en(&self) -> String {
82 r#"You are an expert customs classification specialist with deep knowledge of the
83Harmonized System (HS) 2022 nomenclature, particularly Chapters 28 and 29 for
84chemical products.
85
86Your task is to assign a six-digit HS 2022 code to the chemical product described
87in the user message.
88
89## Output format
90
91Respond with **only** a JSON object — no prose, no markdown:
92
93```json
94{
95 "hs_code": "<6 ASCII digits, no dots>",
96 "confidence": <float 0.0–1.0>,
97 "rationale": "<1–3 sentences explaining the classification>",
98 "alternatives": [
99 { "hs_code": "<6 digits>", "confidence": <float>, "reason": "<brief>" }
100 ]
101}
102```
103
104`alternatives` may be an empty array `[]`.
105
106## Confidence guide
107
108| Score | Meaning |
109|-------|---------|
110| ≥ 0.90 | Certain of the 6-digit sub-heading |
111| ≥ 0.70 | Certain of the 4-digit heading, sub-heading uncertain |
112| ≥ 0.50 | Chapter correct, heading uncertain |
113| < 0.50 | Significant uncertainty — classify to the most likely heading |
114
115## Rules
116
117- Use HS 2022 edition.
118- If a SMILES-derived heading hint is provided, treat it as a cross-check, not
119 authoritative — rule 1 of HS Explanatory Notes takes precedence over chemical
120 structure alone.
121- Always verify Chapter Notes and Section Notes before finalising.
122- For mixtures, classify by the component that gives the mixture its essential
123 character (GRI 3b) unless a specific mixture heading applies.
124"#.to_string()
125 }
126
127 fn system_text_ja(&self) -> String {
128 r#"あなたは輸出入通関の専門家であり、HS 2022 品目表(特に第28類・第29類の化学品)に
129精通しています。
130
131ユーザーメッセージに記載された化学品に対して、6桁の HS 2022 コードを付与してください。
132
133## 出力形式
134
135**JSON オブジェクトのみ**を返答してください(文章・マークダウン不要):
136
137```json
138{
139 "hs_code": "<6桁の数字、ドットなし>",
140 "confidence": <0.0〜1.0 の小数>,
141 "rationale": "<分類根拠を1〜3文で>",
142 "alternatives": [
143 { "hs_code": "<6桁>", "confidence": <小数>, "reason": "<簡潔な理由>" }
144 ]
145}
146```
147
148`alternatives` は空配列 `[]` でも可。
149
150## 信頼度の目安
151
152| スコア | 意味 |
153|--------|------|
154| ≥ 0.90 | 6桁の細分まで確実 |
155| ≥ 0.70 | 4桁の号まで確実、細分は不確実 |
156| ≥ 0.50 | 類は正しいが号が不確実 |
157| < 0.50 | 大きな不確実性あり — 最も可能性の高い号に分類 |
158
159## ルール
160
161- HS 2022年版を使用すること。
162- SMILES由来のヘッディングヒントが提供された場合は参考情報として扱い、
163 HS解説書の通則1を優先すること。
164- 分類確定前に類注および部注を確認すること。
165- 混合物の場合、特定の混合物号がない限り、本質的特性を与える成分で分類(通則3(b))。
166"#.to_string()
167 }
168
169 fn user_text_en(
172 &self,
173 product: &ProductDescription,
174 smiles_analysis: Option<&crate::smiles::SmilesClassification>,
175 ) -> String {
176 let mut parts: Vec<String> = Vec::new();
177
178 parts.push("## Product to classify".to_string());
179 parts.push(String::new());
180
181 let id = &product.identifier;
183 if let Some(ref cas) = id.cas {
184 parts.push(format!("- **CAS**: {}", cas));
185 }
186 if let Some(ref iupac) = id.iupac_name {
187 parts.push(format!("- **IUPAC name**: {}", iupac));
188 }
189 if let Some(ref smiles) = id.smiles {
190 parts.push(format!("- **SMILES**: {}", smiles));
191 }
192 if let Some(ref inchi) = id.inchi {
193 parts.push(format!("- **InChI**: {}", inchi));
194 }
195 if let Some(ref inchikey) = id.inchi_key {
196 parts.push(format!("- **InChIKey**: {}", inchikey));
197 }
198
199 if let Some(ref form) = product.physical_form {
201 parts.push(format!("- **Physical form**: {}", physical_form_en(form)));
202 }
203
204 if let Some(purity) = product.purity_pct {
206 parts.push(format!("- **Purity**: {:.1}%", purity));
207 }
208
209 if let Some(ref use_) = product.intended_use {
211 parts.push(format!("- **Intended use**: {:?}", use_));
212 }
213
214 if let Some(ref comps) = product.mixture_components {
216 parts.push("- **Mixture components**:".to_string());
217 for c in comps {
218 let frac = c
219 .weight_fraction_pct
220 .map(|f| format!(" ({:.1}% w/w)", f))
221 .unwrap_or_default();
222 let name = c.substance.cas.as_deref()
223 .or(c.substance.iupac_name.as_deref())
224 .unwrap_or("unknown");
225 parts.push(format!(" - {}{}", name, frac));
226 }
227 }
228
229 if let Some(ref ctx) = product.additional_context {
231 parts.push(format!("- **Additional context**: {}", ctx));
232 }
233
234 if let Some(analysis) = smiles_analysis {
236 parts.push(String::new());
237 parts.push("## SMILES pre-analysis hint".to_string());
238 parts.push(String::new());
239 parts.push(format!(
240 "- **Organic class**: {}",
241 format!("{:?}", analysis.organic_class)
242 ));
243 if !analysis.functional_groups.is_empty() {
244 let groups: Vec<&str> = analysis
245 .functional_groups
246 .iter()
247 .map(|g| g.label())
248 .collect();
249 parts.push(format!("- **Functional groups detected**: {}", groups.join(", ")));
250 }
251 let hint = &analysis.heading_hint;
252 if let Some(heading) = hint.heading {
253 parts.push(format!(
254 "- **Heading hint**: {}.{:02} ({}, confidence {:.2})",
255 heading / 100,
256 heading % 100,
257 hint.rationale,
258 hint.confidence
259 ));
260 } else {
261 parts.push(format!(
262 "- **Chapter hint**: Ch.{:02} (confidence {:.2})",
263 hint.chapter, hint.confidence
264 ));
265 }
266 parts.push(String::new());
267 parts.push(
268 "_This hint is derived from SMILES pattern matching and is provided for \
269 cross-checking only. Apply the HS Explanatory Notes authoritatively._"
270 .to_string(),
271 );
272 }
273
274 parts.join("\n")
275 }
276
277 fn user_text_ja(
278 &self,
279 product: &ProductDescription,
280 smiles_analysis: Option<&crate::smiles::SmilesClassification>,
281 ) -> String {
282 let mut parts: Vec<String> = Vec::new();
283
284 parts.push("## 分類対象品目".to_string());
285 parts.push(String::new());
286
287 let id = &product.identifier;
288 if let Some(ref cas) = id.cas {
289 parts.push(format!("- **CAS番号**: {}", cas));
290 }
291 if let Some(ref iupac) = id.iupac_name {
292 parts.push(format!("- **IUPAC名**: {}", iupac));
293 }
294 if let Some(ref smiles) = id.smiles {
295 parts.push(format!("- **SMILES**: {}", smiles));
296 }
297 if let Some(ref inchi) = id.inchi {
298 parts.push(format!("- **InChI**: {}", inchi));
299 }
300 if let Some(ref inchikey) = id.inchi_key {
301 parts.push(format!("- **InChIKey**: {}", inchikey));
302 }
303
304 if let Some(ref form) = product.physical_form {
305 parts.push(format!("- **物理的形状**: {}", physical_form_ja(form)));
306 }
307
308 if let Some(purity) = product.purity_pct {
309 parts.push(format!("- **純度**: {:.1}%", purity));
310 }
311
312 if let Some(ref use_) = product.intended_use {
313 parts.push(format!("- **用途**: {:?}", use_));
314 }
315
316 if let Some(ref comps) = product.mixture_components {
317 parts.push("- **混合成分**:".to_string());
318 for c in comps {
319 let frac = c
320 .weight_fraction_pct
321 .map(|f| format!(" ({:.1}% w/w)", f))
322 .unwrap_or_default();
323 let name = c.substance.cas.as_deref()
324 .or(c.substance.iupac_name.as_deref())
325 .unwrap_or("不明");
326 parts.push(format!(" - {}{}", name, frac));
327 }
328 }
329
330 if let Some(ref ctx) = product.additional_context {
331 parts.push(format!("- **補足情報**: {}", ctx));
332 }
333
334 if let Some(analysis) = smiles_analysis {
335 parts.push(String::new());
336 parts.push("## SMILES 事前解析ヒント".to_string());
337 parts.push(String::new());
338 parts.push(format!(
339 "- **有機/無機区分**: {}",
340 format!("{:?}", analysis.organic_class)
341 ));
342 if !analysis.functional_groups.is_empty() {
343 let groups: Vec<&str> = analysis
344 .functional_groups
345 .iter()
346 .map(|g| g.label())
347 .collect();
348 parts.push(format!("- **検出官能基**: {}", groups.join("、")));
349 }
350 let hint = &analysis.heading_hint;
351 if let Some(heading) = hint.heading {
352 parts.push(format!(
353 "- **号ヒント**: {}.{:02}({}、信頼度 {:.2})",
354 heading / 100,
355 heading % 100,
356 hint.rationale,
357 hint.confidence
358 ));
359 } else {
360 parts.push(format!(
361 "- **類ヒント**: 第{:02}類(信頼度 {:.2})",
362 hint.chapter, hint.confidence
363 ));
364 }
365 parts.push(String::new());
366 parts.push(
367 "_このヒントはSMILESパターンマッチングによるもので、参考情報です。\
368 HS解説書を正式な根拠として適用してください。_"
369 .to_string(),
370 );
371 }
372
373 parts.join("\n")
374 }
375}
376
377fn physical_form_en(form: &PhysicalForm) -> &'static str {
382 match form {
383 PhysicalForm::Solid => "Solid",
384 PhysicalForm::Powder { .. } => "Powder",
385 PhysicalForm::Granules => "Granules",
386 PhysicalForm::Liquid => "Liquid",
387 PhysicalForm::Solution { .. } => "Solution",
388 PhysicalForm::Gas => "Gas",
389 PhysicalForm::Foil { .. } => "Foil",
390 PhysicalForm::Ingot => "Ingot",
391 PhysicalForm::Unknown => "Unknown",
392 }
393}
394
395fn physical_form_ja(form: &PhysicalForm) -> &'static str {
396 match form {
397 PhysicalForm::Solid => "固体",
398 PhysicalForm::Powder { .. } => "粉末",
399 PhysicalForm::Granules => "顆粒",
400 PhysicalForm::Liquid => "液体",
401 PhysicalForm::Solution { .. } => "溶液",
402 PhysicalForm::Gas => "気体",
403 PhysicalForm::Foil { .. } => "箔",
404 PhysicalForm::Ingot => "インゴット",
405 PhysicalForm::Unknown => "不明",
406 }
407}
408
409#[cfg(test)]
414mod tests {
415 use super::*;
416 use crate::types::{ProductDescription, SubstanceIdentifier};
417
418 fn acetic_acid() -> ProductDescription {
419 ProductDescription {
420 identifier: SubstanceIdentifier {
421 cas: Some("64-19-7".to_string()),
422 iupac_name: Some("acetic acid".to_string()),
423 smiles: Some("CC(O)=O".to_string()),
424 inchi: None,
425 inchi_key: None,
426 cid: None,
427 },
428 physical_form: Some(PhysicalForm::Liquid),
429 purity_pct: Some(99.5),
430 purity_type: None,
431 mixture_components: None,
432 intended_use: None,
433 additional_context: None,
434 }
435 }
436
437 #[test]
438 fn en_system_prompt_contains_hs_2022() {
439 let p = PromptBuilder::new().build(&acetic_acid());
440 assert!(p.system_text.contains("HS 2022"));
441 }
442
443 #[test]
444 fn en_user_text_contains_cas() {
445 let p = PromptBuilder::new().build(&acetic_acid());
446 assert!(p.user_text.contains("64-19-7"));
447 }
448
449 #[test]
450 fn en_user_text_contains_purity() {
451 let p = PromptBuilder::new().build(&acetic_acid());
452 assert!(p.user_text.contains("99.5"));
453 }
454
455 #[test]
456 fn en_user_text_contains_smiles_hint() {
457 let p = PromptBuilder::new().build(&acetic_acid());
458 assert!(p.user_text.contains("Heading hint") || p.user_text.contains("heading hint")
460 || p.user_text.contains("SMILES pre-analysis"));
461 }
462
463 #[test]
464 fn smiles_analysis_populated_when_smiles_present() {
465 let p = PromptBuilder::new().build(&acetic_acid());
466 assert!(p.smiles_analysis.is_some());
467 }
468
469 #[test]
470 fn smiles_analysis_none_when_no_smiles() {
471 let product = ProductDescription {
472 identifier: SubstanceIdentifier::from_cas("64-19-7"),
473 physical_form: None,
474 purity_pct: None,
475 purity_type: None,
476 mixture_components: None,
477 intended_use: None,
478 additional_context: None,
479 };
480 let p = PromptBuilder::new().build(&product);
481 assert!(p.smiles_analysis.is_none());
482 }
483
484 #[test]
485 fn ja_system_prompt_contains_hs_2022_ja() {
486 let p = PromptBuilder::new()
487 .with_language(Language::Ja)
488 .build(&acetic_acid());
489 assert!(p.system_text.contains("HS 2022"));
490 }
491
492 #[test]
493 fn ja_user_text_contains_cas() {
494 let p = PromptBuilder::new()
495 .with_language(Language::Ja)
496 .build(&acetic_acid());
497 assert!(p.user_text.contains("64-19-7"));
498 }
499
500 #[test]
501 fn mixture_components_listed() {
502 use crate::types::MixtureComponent;
503 let product = ProductDescription {
504 identifier: SubstanceIdentifier::from_cas("7732-18-5"),
505 physical_form: Some(PhysicalForm::Solution {
506 concentration_pct_ww: Some(30.0),
507 solvent: None,
508 }),
509 purity_pct: None,
510 purity_type: None,
511 mixture_components: Some(vec![
512 MixtureComponent {
513 substance: SubstanceIdentifier::from_cas("1310-73-2"),
514 weight_fraction_pct: Some(30.0),
515 volume_fraction_pct: None,
516 is_solvent: false,
517 },
518 ]),
519 intended_use: None,
520 additional_context: None,
521 };
522 let p = PromptBuilder::new().build(&product);
523 assert!(p.user_text.contains("1310-73-2"));
524 assert!(p.user_text.contains("30.0"));
525 }
526}