1use crate::types::{Language, PhysicalForm, ProductDescription};
7use super::LlmPrompt;
8
9const MAX_CONTEXT_CHARS: usize = 500;
20
21fn sanitize_context(ctx: &str) -> String {
28 let cleaned: String = ctx
29 .chars()
30 .filter(|c| !c.is_control())
31 .take(MAX_CONTEXT_CHARS)
32 .collect();
33 cleaned
34}
35
36#[derive(Debug, Clone, Default)]
61pub struct PromptBuilder {
62 language: Language,
63}
64
65impl PromptBuilder {
66 pub fn new() -> Self {
68 Self { language: Language::En }
69 }
70
71 pub fn with_language(mut self, language: Language) -> Self {
73 self.language = language;
74 self
75 }
76
77 pub fn build(&self, product: &ProductDescription) -> LlmPrompt {
79 let smiles_analysis = product
80 .identifier
81 .smiles
82 .as_deref()
83 .and_then(crate::smiles::classify_smiles);
84
85 let system_text = match self.language {
86 Language::En => self.system_text_en(),
87 Language::Ja => self.system_text_ja(),
88 };
89
90 let user_text = match self.language {
91 Language::En => self.user_text_en(product, smiles_analysis.as_ref()),
92 Language::Ja => self.user_text_ja(product, smiles_analysis.as_ref()),
93 };
94
95 LlmPrompt {
96 system_text,
97 user_text,
98 smiles_analysis,
99 }
100 }
101
102 fn system_text_en(&self) -> String {
105 r#"You are an expert customs classification specialist with deep knowledge of the
106Harmonized System (HS) 2022 nomenclature, particularly Chapters 28 and 29 for
107chemical products.
108
109Your task is to assign a six-digit HS 2022 code to the chemical product described
110in the user message.
111
112## Output format
113
114Respond with **only** a JSON object — no prose, no markdown:
115
116```json
117{
118 "hs_code": "<6 ASCII digits, no dots>",
119 "confidence": <float 0.0–1.0>,
120 "rationale": "<1–3 sentences explaining the classification>",
121 "alternatives": [
122 { "hs_code": "<6 digits>", "confidence": <float>, "reason": "<brief>" }
123 ]
124}
125```
126
127`alternatives` may be an empty array `[]`.
128
129## Confidence guide
130
131| Score | Meaning |
132|-------|---------|
133| ≥ 0.90 | Certain of the 6-digit sub-heading |
134| ≥ 0.70 | Certain of the 4-digit heading, sub-heading uncertain |
135| ≥ 0.50 | Chapter correct, heading uncertain |
136| < 0.50 | Significant uncertainty — classify to the most likely heading |
137
138## Rules
139
140- Use HS 2022 edition.
141- If a SMILES-derived heading hint is provided, treat it as a cross-check, not
142 authoritative — rule 1 of HS Explanatory Notes takes precedence over chemical
143 structure alone.
144- Always verify Chapter Notes and Section Notes before finalising.
145- For mixtures, classify by the component that gives the mixture its essential
146 character (GRI 3b) unless a specific mixture heading applies.
147"#.to_string()
148 }
149
150 fn system_text_ja(&self) -> String {
151 r#"あなたは輸出入通関の専門家であり、HS 2022 品目表(特に第28類・第29類の化学品)に
152精通しています。
153
154ユーザーメッセージに記載された化学品に対して、6桁の HS 2022 コードを付与してください。
155
156## 出力形式
157
158**JSON オブジェクトのみ**を返答してください(文章・マークダウン不要):
159
160```json
161{
162 "hs_code": "<6桁の数字、ドットなし>",
163 "confidence": <0.0〜1.0 の小数>,
164 "rationale": "<分類根拠を1〜3文で>",
165 "alternatives": [
166 { "hs_code": "<6桁>", "confidence": <小数>, "reason": "<簡潔な理由>" }
167 ]
168}
169```
170
171`alternatives` は空配列 `[]` でも可。
172
173## 信頼度の目安
174
175| スコア | 意味 |
176|--------|------|
177| ≥ 0.90 | 6桁の細分まで確実 |
178| ≥ 0.70 | 4桁の号まで確実、細分は不確実 |
179| ≥ 0.50 | 類は正しいが号が不確実 |
180| < 0.50 | 大きな不確実性あり — 最も可能性の高い号に分類 |
181
182## ルール
183
184- HS 2022年版を使用すること。
185- SMILES由来のヘッディングヒントが提供された場合は参考情報として扱い、
186 HS解説書の通則1を優先すること。
187- 分類確定前に類注および部注を確認すること。
188- 混合物の場合、特定の混合物号がない限り、本質的特性を与える成分で分類(通則3(b))。
189"#.to_string()
190 }
191
192 fn user_text_en(
195 &self,
196 product: &ProductDescription,
197 smiles_analysis: Option<&crate::smiles::SmilesClassification>,
198 ) -> String {
199 let mut parts: Vec<String> = Vec::new();
200
201 parts.push("## Product to classify".to_string());
202 parts.push(String::new());
203
204 let id = &product.identifier;
206 if let Some(ref cas) = id.cas {
207 parts.push(format!("- **CAS**: {}", cas));
208 }
209 if let Some(ref iupac) = id.iupac_name {
210 parts.push(format!("- **IUPAC name**: {}", iupac));
211 }
212 if let Some(ref smiles) = id.smiles {
213 parts.push(format!("- **SMILES**: {}", smiles));
214 }
215 if let Some(ref inchi) = id.inchi {
216 parts.push(format!("- **InChI**: {}", inchi));
217 }
218 if let Some(ref inchikey) = id.inchi_key {
219 parts.push(format!("- **InChIKey**: {}", inchikey));
220 }
221
222 if let Some(ref form) = product.physical_form {
224 parts.push(format!("- **Physical form**: {}", physical_form_en(form)));
225 }
226
227 if let Some(purity) = product.purity_pct {
229 parts.push(format!("- **Purity**: {:.1}%", purity));
230 }
231
232 if let Some(ref use_) = product.intended_use {
234 parts.push(format!("- **Intended use**: {:?}", use_));
235 }
236
237 if let Some(ref comps) = product.mixture_components {
239 parts.push("- **Mixture components**:".to_string());
240 for c in comps {
241 let frac = c
242 .weight_fraction_pct
243 .map(|f| format!(" ({:.1}% w/w)", f))
244 .unwrap_or_default();
245 let name = c.substance.cas.as_deref()
246 .or(c.substance.iupac_name.as_deref())
247 .unwrap_or("unknown");
248 parts.push(format!(" - {}{}", name, frac));
249 }
250 }
251
252 if let Some(ref ctx) = product.additional_context {
255 parts.push(format!("- **Additional context**: {}", sanitize_context(ctx)));
256 }
257
258 if let Some(analysis) = smiles_analysis {
260 parts.push(String::new());
261 parts.push("## SMILES pre-analysis hint".to_string());
262 parts.push(String::new());
263 parts.push(format!("- **Organic class**: {:?}", analysis.organic_class));
264 if !analysis.functional_groups.is_empty() {
265 let groups: Vec<&str> = analysis
266 .functional_groups
267 .iter()
268 .map(|g| g.label())
269 .collect();
270 parts.push(format!("- **Functional groups detected**: {}", groups.join(", ")));
271 }
272 let hint = &analysis.heading_hint;
273 if let Some(heading) = hint.heading {
274 parts.push(format!(
275 "- **Heading hint**: {}.{:02} ({}, confidence {:.2})",
276 heading / 100,
277 heading % 100,
278 hint.rationale,
279 hint.confidence
280 ));
281 } else {
282 parts.push(format!(
283 "- **Chapter hint**: Ch.{:02} (confidence {:.2})",
284 hint.chapter, hint.confidence
285 ));
286 }
287 parts.push(String::new());
288 parts.push(
289 "_This hint is derived from SMILES pattern matching and is provided for \
290 cross-checking only. Apply the HS Explanatory Notes authoritatively._"
291 .to_string(),
292 );
293 }
294
295 parts.join("\n")
296 }
297
298 fn user_text_ja(
299 &self,
300 product: &ProductDescription,
301 smiles_analysis: Option<&crate::smiles::SmilesClassification>,
302 ) -> String {
303 let mut parts: Vec<String> = Vec::new();
304
305 parts.push("## 分類対象品目".to_string());
306 parts.push(String::new());
307
308 let id = &product.identifier;
309 if let Some(ref cas) = id.cas {
310 parts.push(format!("- **CAS番号**: {}", cas));
311 }
312 if let Some(ref iupac) = id.iupac_name {
313 parts.push(format!("- **IUPAC名**: {}", iupac));
314 }
315 if let Some(ref smiles) = id.smiles {
316 parts.push(format!("- **SMILES**: {}", smiles));
317 }
318 if let Some(ref inchi) = id.inchi {
319 parts.push(format!("- **InChI**: {}", inchi));
320 }
321 if let Some(ref inchikey) = id.inchi_key {
322 parts.push(format!("- **InChIKey**: {}", inchikey));
323 }
324
325 if let Some(ref form) = product.physical_form {
326 parts.push(format!("- **物理的形状**: {}", physical_form_ja(form)));
327 }
328
329 if let Some(purity) = product.purity_pct {
330 parts.push(format!("- **純度**: {:.1}%", purity));
331 }
332
333 if let Some(ref use_) = product.intended_use {
334 parts.push(format!("- **用途**: {:?}", use_));
335 }
336
337 if let Some(ref comps) = product.mixture_components {
338 parts.push("- **混合成分**:".to_string());
339 for c in comps {
340 let frac = c
341 .weight_fraction_pct
342 .map(|f| format!(" ({:.1}% w/w)", f))
343 .unwrap_or_default();
344 let name = c.substance.cas.as_deref()
345 .or(c.substance.iupac_name.as_deref())
346 .unwrap_or("不明");
347 parts.push(format!(" - {}{}", name, frac));
348 }
349 }
350
351 if let Some(ref ctx) = product.additional_context {
353 parts.push(format!("- **補足情報**: {}", sanitize_context(ctx)));
354 }
355
356 if let Some(analysis) = smiles_analysis {
357 parts.push(String::new());
358 parts.push("## SMILES 事前解析ヒント".to_string());
359 parts.push(String::new());
360 parts.push(format!("- **有機/無機区分**: {:?}", analysis.organic_class));
361 if !analysis.functional_groups.is_empty() {
362 let groups: Vec<&str> = analysis
363 .functional_groups
364 .iter()
365 .map(|g| g.label())
366 .collect();
367 parts.push(format!("- **検出官能基**: {}", groups.join("、")));
368 }
369 let hint = &analysis.heading_hint;
370 if let Some(heading) = hint.heading {
371 parts.push(format!(
372 "- **号ヒント**: {}.{:02}({}、信頼度 {:.2})",
373 heading / 100,
374 heading % 100,
375 hint.rationale,
376 hint.confidence
377 ));
378 } else {
379 parts.push(format!(
380 "- **類ヒント**: 第{:02}類(信頼度 {:.2})",
381 hint.chapter, hint.confidence
382 ));
383 }
384 parts.push(String::new());
385 parts.push(
386 "_このヒントはSMILESパターンマッチングによるもので、参考情報です。\
387 HS解説書を正式な根拠として適用してください。_"
388 .to_string(),
389 );
390 }
391
392 parts.join("\n")
393 }
394}
395
396fn physical_form_en(form: &PhysicalForm) -> &'static str {
401 match form {
402 PhysicalForm::Solid => "Solid",
403 PhysicalForm::Powder { .. } => "Powder",
404 PhysicalForm::Granules => "Granules",
405 PhysicalForm::Liquid => "Liquid",
406 PhysicalForm::Solution { .. } => "Solution",
407 PhysicalForm::Gas => "Gas",
408 PhysicalForm::Foil { .. } => "Foil",
409 PhysicalForm::Ingot => "Ingot",
410 PhysicalForm::Unknown => "Unknown",
411 }
412}
413
414fn physical_form_ja(form: &PhysicalForm) -> &'static str {
415 match form {
416 PhysicalForm::Solid => "固体",
417 PhysicalForm::Powder { .. } => "粉末",
418 PhysicalForm::Granules => "顆粒",
419 PhysicalForm::Liquid => "液体",
420 PhysicalForm::Solution { .. } => "溶液",
421 PhysicalForm::Gas => "気体",
422 PhysicalForm::Foil { .. } => "箔",
423 PhysicalForm::Ingot => "インゴット",
424 PhysicalForm::Unknown => "不明",
425 }
426}
427
428#[cfg(test)]
433mod tests {
434 use super::*;
435 use crate::types::{ProductDescription, SubstanceIdentifier};
436
437 fn acetic_acid() -> ProductDescription {
438 ProductDescription {
439 identifier: SubstanceIdentifier {
440 cas: Some("64-19-7".to_string()),
441 iupac_name: Some("acetic acid".to_string()),
442 smiles: Some("CC(O)=O".to_string()),
443 inchi: None,
444 inchi_key: None,
445 cid: None,
446 },
447 physical_form: Some(PhysicalForm::Liquid),
448 purity_pct: Some(99.5),
449 purity_type: None,
450 mixture_components: None,
451 intended_use: None,
452 additional_context: None,
453 }
454 }
455
456 #[test]
457 fn en_system_prompt_contains_hs_2022() {
458 let p = PromptBuilder::new().build(&acetic_acid());
459 assert!(p.system_text.contains("HS 2022"));
460 }
461
462 #[test]
463 fn en_user_text_contains_cas() {
464 let p = PromptBuilder::new().build(&acetic_acid());
465 assert!(p.user_text.contains("64-19-7"));
466 }
467
468 #[test]
469 fn en_user_text_contains_purity() {
470 let p = PromptBuilder::new().build(&acetic_acid());
471 assert!(p.user_text.contains("99.5"));
472 }
473
474 #[test]
475 fn en_user_text_contains_smiles_hint() {
476 let p = PromptBuilder::new().build(&acetic_acid());
477 assert!(p.user_text.contains("Heading hint") || p.user_text.contains("heading hint")
479 || p.user_text.contains("SMILES pre-analysis"));
480 }
481
482 #[test]
483 fn smiles_analysis_populated_when_smiles_present() {
484 let p = PromptBuilder::new().build(&acetic_acid());
485 assert!(p.smiles_analysis.is_some());
486 }
487
488 #[test]
489 fn smiles_analysis_none_when_no_smiles() {
490 let product = ProductDescription {
491 identifier: SubstanceIdentifier::from_cas("64-19-7"),
492 physical_form: None,
493 purity_pct: None,
494 purity_type: None,
495 mixture_components: None,
496 intended_use: None,
497 additional_context: None,
498 };
499 let p = PromptBuilder::new().build(&product);
500 assert!(p.smiles_analysis.is_none());
501 }
502
503 #[test]
504 fn ja_system_prompt_contains_hs_2022_ja() {
505 let p = PromptBuilder::new()
506 .with_language(Language::Ja)
507 .build(&acetic_acid());
508 assert!(p.system_text.contains("HS 2022"));
509 }
510
511 #[test]
512 fn ja_user_text_contains_cas() {
513 let p = PromptBuilder::new()
514 .with_language(Language::Ja)
515 .build(&acetic_acid());
516 assert!(p.user_text.contains("64-19-7"));
517 }
518
519 #[test]
520 fn mixture_components_listed() {
521 use crate::types::MixtureComponent;
522 let product = ProductDescription {
523 identifier: SubstanceIdentifier::from_cas("7732-18-5"),
524 physical_form: Some(PhysicalForm::Solution {
525 concentration_pct_ww: Some(30.0),
526 solvent: None,
527 }),
528 purity_pct: None,
529 purity_type: None,
530 mixture_components: Some(vec![
531 MixtureComponent {
532 substance: SubstanceIdentifier::from_cas("1310-73-2"),
533 weight_fraction_pct: Some(30.0),
534 volume_fraction_pct: None,
535 is_solvent: false,
536 },
537 ]),
538 intended_use: None,
539 additional_context: None,
540 };
541 let p = PromptBuilder::new().build(&product);
542 assert!(p.user_text.contains("1310-73-2"));
543 assert!(p.user_text.contains("30.0"));
544 }
545}