Skip to main content

sds_converter_core/
lib.rs

1//! LLM-based bidirectional conversion between Safety Data Sheet (SDS) documents and
2//! the Japanese MHLW SDS Data Exchange Format v1.0 (JIS Z 7253 / GHS).
3//!
4//! # Quick start
5//!
6//! ```no_run
7//! use sds_converter_core::{
8//!     AnthropicBackend, LlmConfig,
9//!     convert_to_json, convert_to_json_with_report, ConvertConfig, Language,
10//! };
11//!
12//! #[tokio::main]
13//! async fn main() -> anyhow::Result<()> {
14//!     let backend = AnthropicBackend::new(
15//!         std::env::var("ANTHROPIC_API_KEY")?,
16//!         LlmConfig::default(),
17//!     );
18//!     let config = ConvertConfig {
19//!         source_language: Some(Language::Japanese),
20//!         output_language: Language::Japanese,
21//!         ..Default::default()
22//!     };
23//!     // `convert_to_json_with_report` returns structured metadata (language, sections, notes).
24//!     let (sds, report) =
25//!         convert_to_json_with_report(std::path::Path::new("input.pdf"), &backend, &config).await?;
26//!     for w in &report.warnings { eprintln!("WARN: {w}"); }
27//!     eprintln!("Populated sections: {:?}", report.populated_sections);
28//!     eprintln!("Standardization notes: {:?}", report.standardization_notes);
29//!     std::fs::write("output.json", serde_json::to_string_pretty(&sds)?)?;
30//!     std::fs::write("output_report.json", serde_json::to_string_pretty(&report)?)?;
31//!     Ok(())
32//! }
33//! ```
34//!
35//! # Features
36//!
37//! - **SDS → JSON**: PDF/DOCX/XLSX/TXT → MHLW standard JSON via LLM (parallel extraction,
38//!   automatic retry, JSON repair).
39//! - **JSON → DOCX**: Generates a JIS Z 7253-compliant Word document with localized headings.
40//! - **Multilingual**: `ja` / `en` / `zh-CN` / `zh-TW` source documents and output headings.
41//! - **Pluggable LLM**: Ships with [`AnthropicBackend`] and [`OpenAiCompatBackend`].
42//!   Implement [`converter::LlmBackend`] to bring your own.
43
44pub mod converter;
45pub mod enrichment;
46pub mod error;
47pub mod ghs_codes;
48pub mod language;
49pub mod schema;
50
51pub use converter::{
52    AnthropicBackend, AnyBackend, ConvertConfig, ConversionReport, LlmBackend, LlmConfig,
53    OpenAiCompatBackend, build_any_backend,
54    convert_bytes_to_json, convert_bytes_to_json_with_report,
55    convert_from_json, convert_from_template,
56    convert_pdf_to_json_vision, convert_to_json, convert_to_json_with_report,
57    convert_url_to_json,
58    extract_sds_from_pdf_vision, fill_template, openai_compat_url,
59};
60pub use converter::extractor::{
61    detect_format_str, detect_language_from_file, detect_language_from_url,
62    extract_text, extract_text_from_url, extract_text_limited,
63};
64pub use converter::validator::validate;
65pub use enrichment::{CasInfo, CasWarning, enrich_composition, lookup_cas};
66pub use error::SdsError;
67pub use ghs_codes::{h_code_description, is_valid_h_code, is_valid_p_code, p_code_description};
68pub use language::{detect_language, Language};
69pub use schema::SdsRoot;