sds_converter_core/lib.rs
1//! LLM-based bidirectional conversion between Safety Data Sheet (SDS) documents and
2//! the Japanese MHLW SDS Data Exchange Format v1.0 (JIS Z 7253 / GHS).
3//!
4//! # Quick start
5//!
6//! ```no_run
7//! use sds_converter_core::{
8//! AnthropicBackend, LlmConfig,
9//! convert_to_json, convert_to_json_with_report, ConvertConfig, Language,
10//! };
11//!
12//! #[tokio::main]
13//! async fn main() -> anyhow::Result<()> {
14//! let backend = AnthropicBackend::new(
15//! std::env::var("ANTHROPIC_API_KEY")?,
16//! LlmConfig::default(),
17//! );
18//! let config = ConvertConfig {
19//! source_language: Some(Language::Japanese),
20//! output_language: Language::Japanese,
21//! ..Default::default()
22//! };
23//! // `convert_to_json_with_report` returns structured metadata (language, sections, notes).
24//! let (sds, report) =
25//! convert_to_json_with_report(std::path::Path::new("input.pdf"), &backend, &config).await?;
26//! for w in &report.warnings { eprintln!("WARN: {w}"); }
27//! eprintln!("Populated sections: {:?}", report.populated_sections);
28//! eprintln!("Standardization notes: {:?}", report.standardization_notes);
29//! std::fs::write("output.json", serde_json::to_string_pretty(&sds)?)?;
30//! std::fs::write("output_report.json", serde_json::to_string_pretty(&report)?)?;
31//! Ok(())
32//! }
33//! ```
34//!
35//! # Features
36//!
37//! - **SDS → JSON**: PDF/DOCX/XLSX/TXT → MHLW standard JSON via LLM (parallel extraction,
38//! automatic retry, JSON repair).
39//! - **JSON → DOCX**: Generates a JIS Z 7253-compliant Word document with localized headings.
40//! - **Multilingual**: `ja` / `en` / `zh-CN` / `zh-TW` source documents and output headings.
41//! - **Pluggable LLM**: Ships with [`AnthropicBackend`] and [`OpenAiCompatBackend`].
42//! Implement [`converter::LlmBackend`] to bring your own.
43
44pub mod converter;
45pub mod enrichment;
46pub mod error;
47pub mod ghs_codes;
48pub mod language;
49pub mod schema;
50
51pub use converter::{
52 AnthropicBackend, AnyBackend, ConvertConfig, ConversionReport, LlmBackend, LlmConfig,
53 OpenAiCompatBackend, build_any_backend,
54 convert_bytes_to_json, convert_bytes_to_json_with_report,
55 convert_from_json, convert_from_template,
56 convert_pdf_to_json_vision, convert_to_json, convert_to_json_with_report,
57 convert_url_to_json,
58 extract_sds_from_pdf_vision, fill_template, openai_compat_url,
59};
60pub use converter::extractor::{
61 detect_format_str, detect_language_from_file, detect_language_from_url,
62 extract_text, extract_text_from_url, extract_text_limited,
63};
64pub use converter::validator::validate;
65pub use enrichment::{CasInfo, CasWarning, enrich_composition, lookup_cas};
66pub use error::SdsError;
67pub use ghs_codes::{h_code_description, is_valid_h_code, is_valid_p_code, p_code_description};
68pub use language::{detect_language, Language};
69pub use schema::SdsRoot;