llmprogram 0.1.0

A Rust library that provides a structured and powerful way to create and run programs that use Large Language Models (LLMs). It uses a YAML-based configuration to define the behavior of your LLM programs, making them easy to create, manage, and share.
Documentation
use rusqlite::Connection;
use std::fs::File;
use std::io::{BufWriter, Write};
use anyhow::Result;

/// Generate an instruction dataset for LLM fine-tuning from a SQLite log file.
/// 
/// # Arguments
/// 
/// * `database_path` - The path to the SQLite database file
/// * `output_path` - The path to write the generated dataset to
/// 
/// # Returns
/// 
/// * `Result<()>` - Ok if successful, Err if there was an error
pub fn generate_dataset(database_path: &str, output_path: &str) -> Result<()> {
    // Open the database
    let conn = Connection::open(database_path)?;
    
    // Query the program logs
    let mut stmt = conn.prepare("SELECT llm_input, llm_output FROM program_logs")?;
    let rows = stmt.query_map([], |row| {
        let llm_input: String = row.get(0)?;
        let llm_output: String = row.get(1)?;
        Ok((llm_input, llm_output))
    })?;
    
    // Create the output file
    let file = File::create(output_path)?;
    let mut writer = BufWriter::new(file);
    
    // Process each row and write to the output file
    for row in rows {
        let (llm_input, llm_output) = row?;
        
        // Create a JSON object with instruction and output
        let dataset_entry = serde_json::json!({
            "instruction": llm_input,
            "output": llm_output
        });
        
        // Write to file as JSONL (JSON Lines format)
        writeln!(writer, "{}", dataset_entry)?;
    }
    
    // Flush the writer to ensure all data is written
    writer.flush()?;
    
    println!("Dataset generated and saved to {}", output_path);
    
    Ok(())
}