Expand description
LLM-based bidirectional conversion between Safety Data Sheet (SDS) documents and the Japanese MHLW SDS Data Exchange Format v1.0 (JIS Z 7253 / GHS).
§Quick start
use sds_converter_core::{
AnthropicBackend, LlmConfig,
convert_to_json, convert_to_json_with_report, ConvertConfig, Language,
};
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let backend = AnthropicBackend::new(
std::env::var("ANTHROPIC_API_KEY")?,
LlmConfig::default(),
);
let config = ConvertConfig {
source_language: Some(Language::Japanese),
output_language: Language::Japanese,
..Default::default()
};
// `convert_to_json_with_report` returns structured metadata (language, sections, notes).
let (sds, report) =
convert_to_json_with_report(std::path::Path::new("input.pdf"), &backend, &config).await?;
for w in &report.warnings { eprintln!("WARN: {w}"); }
eprintln!("Populated sections: {:?}", report.populated_sections);
eprintln!("Standardization notes: {:?}", report.standardization_notes);
std::fs::write("output.json", serde_json::to_string_pretty(&sds)?)?;
std::fs::write("output_report.json", serde_json::to_string_pretty(&report)?)?;
Ok(())
}§Features
- SDS → JSON: PDF/DOCX/XLSX/TXT → MHLW standard JSON via LLM (parallel extraction, automatic retry, JSON repair).
- JSON → DOCX: Generates a JIS Z 7253-compliant Word document with localized headings.
- Multilingual:
ja/en/zh-CN/zh-TWsource documents and output headings. - Pluggable LLM: Ships with
AnthropicBackendandOpenAiCompatBackend. Implementconverter::LlmBackendto bring your own.
Re-exports§
pub use converter::AnthropicBackend;pub use converter::AnyBackend;pub use converter::ConvertConfig;pub use converter::ConversionReport;pub use converter::LlmBackend;pub use converter::LlmConfig;pub use converter::OpenAiCompatBackend;pub use converter::build_any_backend;pub use converter::convert_bytes_to_json;pub use converter::convert_bytes_to_json_with_report;pub use converter::convert_from_json;pub use converter::convert_from_template;pub use converter::convert_pdf_to_json_vision;pub use converter::convert_to_json;pub use converter::convert_to_json_with_report;pub use converter::convert_url_to_json;pub use converter::extract_sds_from_pdf_vision;pub use converter::fill_template;pub use converter::openai_compat_url;pub use converter::extractor::detect_format_str;pub use converter::extractor::detect_language_from_file;pub use converter::extractor::detect_language_from_url;pub use converter::extractor::extract_text;pub use converter::extractor::extract_text_from_url;pub use converter::extractor::extract_text_limited;pub use converter::validator::validate;pub use enrichment::CasInfo;pub use enrichment::CasWarning;pub use enrichment::enrich_composition;pub use enrichment::lookup_cas;pub use error::SdsError;pub use ghs_codes::h_code_description;pub use ghs_codes::is_valid_h_code;pub use ghs_codes::is_valid_p_code;pub use ghs_codes::p_code_description;pub use language::detect_language;pub use language::Language;pub use schema::SdsRoot;