pub mod extractor;
pub mod generator;
pub mod html;
pub mod llm;
pub mod pdf;
pub mod template;
pub mod validator;
use std::path::Path;
use crate::error::SdsError;
use crate::language::Language;
use crate::schema::SdsRoot;
pub use extractor::InputFormat;
pub use generator::generate_docx;
pub use pdf::generate_pdf;
pub use llm::{
openai_compat_url, AnthropicBackend, AnyBackend, build_any_backend,
extract_sds_from_pdf_vision, LlmBackend, LlmConfig, OpenAiCompatBackend,
};
pub use template::fill_template;
#[derive(Debug, Clone)]
pub struct ConvertConfig {
pub source_language: Option<Language>,
pub output_language: Language,
pub max_chars: usize,
}
impl Default for ConvertConfig {
fn default() -> Self {
Self {
source_language: None,
output_language: Language::default(),
max_chars: 80_000,
}
}
}
pub async fn convert_to_json<B: LlmBackend + Sync>(
input_path: &Path,
backend: &B,
config: &ConvertConfig,
) -> Result<(SdsRoot, Vec<String>), SdsError> {
let text = extractor::extract_text_limited(input_path, config.max_chars).await?;
if text.trim().is_empty() {
return Err(SdsError::Extract(
"No text extracted — document may be image-only or empty".into(),
));
}
let (sds, mut warnings) =
llm::extract_sds_from_text(backend, &text, config.source_language).await?;
let validation_warnings = validator::validate(&sds);
warnings.extend(validation_warnings);
Ok((sds, warnings))
}
pub async fn convert_bytes_to_json<B: LlmBackend + Sync>(
data: &[u8],
filename: &str,
backend: &B,
config: &ConvertConfig,
) -> Result<(SdsRoot, Vec<String>), SdsError> {
let suffix = Path::new(filename)
.extension()
.and_then(|e| e.to_str())
.map(|e| format!(".{}", e.to_ascii_lowercase()))
.unwrap_or_default();
let data_owned = data.to_vec();
let tmp = tokio::task::spawn_blocking(move || {
use std::io::Write as _;
let mut f = tempfile::Builder::new()
.suffix(&suffix)
.tempfile()
.map_err(|e| SdsError::Extract(format!("tempfile create: {e}")))?;
f.write_all(&data_owned)
.map_err(|e| SdsError::Extract(format!("tempfile write: {e}")))?;
f.flush()
.map_err(|e| SdsError::Extract(format!("tempfile flush: {e}")))?;
Ok::<_, SdsError>(f.into_temp_path())
})
.await
.map_err(|e| SdsError::Extract(format!("spawn_blocking panicked: {e}")))??;
convert_to_json(tmp.as_ref(), backend, config).await
}
pub fn convert_from_json(
sds: &SdsRoot,
output_path: &Path,
config: &ConvertConfig,
) -> Result<(), SdsError> {
generate_docx(sds, output_path, config.output_language)
}
pub async fn convert_url_to_json<B: LlmBackend + Sync>(
url: &str,
backend: &B,
config: &ConvertConfig,
) -> Result<(SdsRoot, Vec<String>), SdsError> {
let text = extractor::extract_text_from_url_limited(url, config.max_chars).await?;
if text.trim().is_empty() {
return Err(SdsError::Extract(
"No text extracted from URL — page may be empty or JavaScript-rendered".into(),
));
}
let (sds, mut warnings) =
llm::extract_sds_from_text(backend, &text, config.source_language).await?;
let validation_warnings = validator::validate(&sds);
warnings.extend(validation_warnings);
Ok((sds, warnings))
}
pub async fn convert_pdf_to_json_vision(
input_path: &Path,
api_key: &str,
llm_config: &LlmConfig,
config: &ConvertConfig,
) -> Result<(SdsRoot, Vec<String>), SdsError> {
let bytes = std::fs::read(input_path)
.map_err(|e| SdsError::Extract(format!("reading PDF: {e}")))?;
let (sds, mut warnings) =
llm::extract_sds_from_pdf_vision(api_key, llm_config, &bytes, config.source_language)
.await?;
let validation_warnings = validator::validate(&sds);
warnings.extend(validation_warnings);
Ok((sds, warnings))
}
pub fn convert_from_template(
sds: &SdsRoot,
template_path: &Path,
output_path: &Path,
) -> Result<(), SdsError> {
fill_template(sds, template_path, output_path)
}