use anyhow::{Context, Result};
use kreuzberg::{LlmConfig, StructuredExtractionConfig, extract_file_sync};
use std::path::PathBuf;
use crate::WireFormat;
pub struct ExtractStructuredArgs {
pub path: PathBuf,
pub schema_path: PathBuf,
pub model: String,
pub api_key: Option<String>,
pub prompt: Option<String>,
pub schema_name: Option<String>,
pub strict: bool,
pub config_path: Option<PathBuf>,
pub format: WireFormat,
}
pub fn extract_structured_command(args: ExtractStructuredArgs) -> Result<()> {
let ExtractStructuredArgs {
path,
schema_path,
model,
api_key,
prompt,
schema_name,
strict,
config_path,
format,
} = args;
let schema_str = std::fs::read_to_string(&schema_path).with_context(|| {
format!(
"Failed to read JSON schema file '{}'. Ensure the file exists and is readable.",
schema_path.display()
)
})?;
let schema: serde_json::Value = serde_json::from_str(&schema_str).with_context(|| {
format!(
"Failed to parse JSON schema from '{}'. Ensure the file contains valid JSON.",
schema_path.display()
)
})?;
let mut config = super::load_config(config_path)?;
let llm_config = LlmConfig {
model,
api_key,
base_url: None,
timeout_secs: None,
max_retries: None,
temperature: None,
max_tokens: None,
};
config.structured_extraction = Some(StructuredExtractionConfig {
schema,
schema_name: schema_name.unwrap_or_else(|| "extraction".to_string()),
schema_description: None,
strict,
prompt,
llm: llm_config,
});
let path_str = path.to_string_lossy().to_string();
let result = extract_file_sync(&path_str, None, &config).with_context(|| {
format!(
"Failed to extract structured data from '{}'. Ensure the file is readable and the LLM configuration is correct.",
path.display()
)
})?;
let structured = result.structured_output.with_context(|| {
"Structured extraction completed but returned no structured output. \
This may indicate the LLM failed to produce valid structured data matching the schema."
})?;
match format {
WireFormat::Json => {
println!(
"{}",
serde_json::to_string_pretty(&structured).context("Failed to serialize structured output to JSON")?
);
}
WireFormat::Toon => {
println!(
"{}",
serde_toon::to_string(&structured).context("Failed to serialize structured output to TOON")?
);
}
WireFormat::Text => {
println!(
"{}",
serde_json::to_string_pretty(&structured).context("Failed to serialize structured output to text")?
);
}
}
Ok(())
}