use rusqlite::Connection;
use std::fs::File;
use std::io::{BufWriter, Write};
use anyhow::Result;
pub fn generate_dataset(database_path: &str, output_path: &str) -> Result<()> {
let conn = Connection::open(database_path)?;
let mut stmt = conn.prepare("SELECT llm_input, llm_output FROM program_logs")?;
let rows = stmt.query_map([], |row| {
let llm_input: String = row.get(0)?;
let llm_output: String = row.get(1)?;
Ok((llm_input, llm_output))
})?;
let file = File::create(output_path)?;
let mut writer = BufWriter::new(file);
for row in rows {
let (llm_input, llm_output) = row?;
let dataset_entry = serde_json::json!({
"instruction": llm_input,
"output": llm_output
});
writeln!(writer, "{}", dataset_entry)?;
}
writer.flush()?;
println!("Dataset generated and saved to {}", output_path);
Ok(())
}