Skip to main content

Crate sds_converter_core

Crate sds_converter_core 

Source
Expand description

LLM-based bidirectional conversion between Safety Data Sheet (SDS) documents and the Japanese MHLW SDS Data Exchange Format v1.0 (JIS Z 7253 / GHS).

§Quick start

use sds_converter_core::{
    AnthropicBackend, LlmConfig,
    convert_to_json, convert_to_json_with_report, ConvertConfig, Language,
};

#[tokio::main]
async fn main() -> anyhow::Result<()> {
    let backend = AnthropicBackend::new(
        std::env::var("ANTHROPIC_API_KEY")?,
        LlmConfig::default(),
    );
    let config = ConvertConfig {
        source_language: Some(Language::Japanese),
        output_language: Language::Japanese,
        ..Default::default()
    };
    // `convert_to_json_with_report` returns structured metadata (language, sections, notes).
    let (sds, report) =
        convert_to_json_with_report(std::path::Path::new("input.pdf"), &backend, &config).await?;
    for w in &report.warnings { eprintln!("WARN: {w}"); }
    eprintln!("Populated sections: {:?}", report.populated_sections);
    eprintln!("Standardization notes: {:?}", report.standardization_notes);
    std::fs::write("output.json", serde_json::to_string_pretty(&sds)?)?;
    std::fs::write("output_report.json", serde_json::to_string_pretty(&report)?)?;
    Ok(())
}

§Features

  • SDS → JSON: PDF/DOCX/XLSX/TXT → MHLW standard JSON via LLM (parallel extraction, automatic retry, JSON repair).
  • JSON → DOCX: Generates a JIS Z 7253-compliant Word document with localized headings.
  • Multilingual: ja / en / zh-CN / zh-TW source documents and output headings.
  • Pluggable LLM: Ships with AnthropicBackend and OpenAiCompatBackend. Implement converter::LlmBackend to bring your own.

Re-exports§

pub use converter::AnthropicBackend;
pub use converter::AnyBackend;
pub use converter::ConvertConfig;
pub use converter::ConversionReport;
pub use converter::LlmBackend;
pub use converter::LlmConfig;
pub use converter::OpenAiCompatBackend;
pub use converter::build_any_backend;
pub use converter::convert_bytes_to_json;
pub use converter::convert_bytes_to_json_with_report;
pub use converter::convert_from_json;
pub use converter::convert_from_template;
pub use converter::convert_pdf_to_json_vision;
pub use converter::convert_to_json;
pub use converter::convert_to_json_with_report;
pub use converter::convert_url_to_json;
pub use converter::extract_sds_from_pdf_vision;
pub use converter::fill_template;
pub use converter::openai_compat_url;
pub use converter::extractor::detect_format_str;
pub use converter::extractor::detect_language_from_file;
pub use converter::extractor::detect_language_from_url;
pub use converter::extractor::extract_text;
pub use converter::extractor::extract_text_from_url;
pub use converter::extractor::extract_text_limited;
pub use converter::validator::validate;
pub use enrichment::CasInfo;
pub use enrichment::CasWarning;
pub use enrichment::enrich_composition;
pub use enrichment::lookup_cas;
pub use error::SdsError;
pub use ghs_codes::h_code_description;
pub use ghs_codes::is_valid_h_code;
pub use ghs_codes::is_valid_p_code;
pub use ghs_codes::p_code_description;
pub use language::detect_language;
pub use language::Language;
pub use schema::SdsRoot;

Modules§

converter
enrichment
error
ghs_codes
GHS Rev.10 H-code and P-code validation and descriptions.
language
schema