llmprogram 0.1.0

A Rust library that provides a structured and powerful way to create and run programs that use Large Language Models (LLMs). It uses a YAML-based configuration to define the behavior of your LLM programs, making them easy to create, manage, and share.
Documentation
use clap::{Parser, Subcommand};
use llmprogram::LLMProgram;
use std::collections::HashMap;
use serde_json::Value;
use futures_util::StreamExt;

#[derive(Parser)]
#[clap(name = "llmprogram", version = "0.1.0", author = "Your Name")]
struct Cli {
    #[clap(subcommand)]
    command: Commands,
}

#[derive(Subcommand)]
enum Commands {
    /// Run an LLM program with inputs
    Run {
        /// Path to the program YAML file
        program_path: String,
        
        /// Path to JSON/YAML file containing inputs
        #[clap(short, long)]
        inputs: Option<String>,
        
        /// JSON string of inputs
        #[clap(long)]
        input_json: Option<String>,
        
        /// Path to output file (default: stdout)
        #[clap(short, long)]
        output: Option<String>,
        
        /// Stream the response
        #[clap(short, long)]
        stream: bool,
    },
    
    /// Generate an LLM program YAML file based on description
    GenerateYaml {
        /// Description of what the LLM program should do
        description: String,
        
        /// Example of the input the program will receive
        #[clap(long)]
        example_input: Option<String>,
        
        /// Example of the output the program should generate
        #[clap(long)]
        example_output: Option<String>,
        
        /// Path to output YAML file (default: stdout)
        #[clap(short, long)]
        output: Option<String>,
        
        /// OpenAI API key (optional, defaults to OPENAI_API_KEY env var)
        #[clap(long)]
        api_key: Option<String>,
    },
    
    /// Show analytics data
    Analytics {
        /// Path to the analytics database
        #[clap(long, default_value = "llmprogram_analytics.db")]
        db_path: String,
        
        /// Filter by program name
        #[clap(long)]
        program: Option<String>,
        
        /// Filter by model name
        #[clap(long)]
        model: Option<String>,
    },
    
    /// Generate an instruction dataset for LLM fine-tuning
    GenerateDataset {
        /// The path to the SQLite database file
        database_path: String,
        
        /// The path to write the generated dataset to
        output_path: String,
    },
}

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let cli = Cli::parse();
    
    match &cli.command {
        Commands::Run { 
            program_path, 
            inputs, 
            input_json, 
            output, 
            stream 
        } => {
            run_program(program_path, inputs, input_json, output, *stream).await?;
        },
        Commands::GenerateYaml { 
            description, 
            example_input, 
            example_output, 
            output,
            api_key,
        } => {
            generate_yaml_program_command(
                description,
                example_input.as_deref(),
                example_output.as_deref(),
                output.as_deref(),
                api_key.as_deref(),
            ).await?;
        },
        Commands::Analytics { 
            db_path, 
            program, 
            model 
        } => {
            show_analytics(db_path, program.as_deref(), model.as_deref()).await?;
        },
        Commands::GenerateDataset { 
            database_path, 
            output_path 
        } => {
            generate_dataset_command(database_path, output_path)?;
        },
    }
    
    Ok(())
}

async fn run_program(
    program_path: &str,
    inputs_file: &Option<String>,
    input_json: &Option<String>,
    output_file: &Option<String>,
    stream: bool,
) -> Result<(), Box<dyn std::error::Error>> {
    // Load inputs
    let inputs_value = if let Some(json_str) = input_json {
        serde_json::from_str(json_str)?
    } else if let Some(file_path) = inputs_file {
        let content = std::fs::read_to_string(file_path)?;
        if file_path.ends_with(".yaml") || file_path.ends_with(".yml") {
            serde_yaml::from_str(&content)?
        } else {
            serde_json::from_str(&content)?
        }
    } else {
        // Try to read from stdin
        use std::io::Read;
        let mut buffer = String::new();
        std::io::stdin().read_to_string(&mut buffer)?;
        serde_json::from_str(&buffer)?
    };
    
    // Create and run program with mock server URL
    let program = LLMProgram::new_with_options(
        program_path,
        None, // Use default API key
        None, // Use default base URL
        true, // Enable cache
        "redis://localhost:6379",
    )?;
    
    match inputs_value {
        // If it's an array, treat as batch processing
        Value::Array(inputs_array) => {
            // Convert to Vec<HashMap<String, Value>>
            let mut inputs_list = Vec::new();
            for input_value in inputs_array {
                if let Value::Object(map) = input_value {
                    inputs_list.push(map.into_iter().collect());
                } else {
                    return Err("Batch inputs must be an array of objects".into());
                }
            }
            
            println!("Running batch processing with {} inputs...", inputs_list.len());
            let results = program.batch_process(&inputs_list).await?;
            
            // Output results
            let output_str = serde_json::to_string_pretty(&results)?;
            if let Some(file_path) = output_file {
                std::fs::write(file_path, output_str)?;
                println!("Results saved to {}", file_path);
            } else {
                println!("{}", output_str);
            }
        }
        // If it's an object, treat as single input
        Value::Object(inputs_map) => {
            let inputs_map: HashMap<String, Value> = inputs_map.into_iter().collect();
            
            if stream {
                println!("Streaming response:");
                let mut stream = program.stream(&inputs_map).await?;
                while let Some(result) = stream.next().await {
                    match result {
                        Ok(chunk) => {
                            if let Some(data) = chunk.get("data") {
                                println!("{}", data);
                            }
                        }
                        Err(e) => {
                            eprintln!("Error in stream: {}", e);
                            break;
                        }
                    }
                }
            } else {
                let result = program.run(&inputs_map).await?;
                
                // Output result
                let output_str = serde_json::to_string_pretty(&result)?;
                if let Some(file_path) = output_file {
                    std::fs::write(file_path, output_str)?;
                    println!("Results saved to {}", file_path);
                } else {
                    println!("{}", output_str);
                }
            }
        }
        _ => return Err("Inputs must be a JSON object or array".into()),
    }
    
    Ok(())
}

async fn generate_yaml_program_command(
    description: &str,
    example_input: Option<&str>,
    example_output: Option<&str>,
    output_file: Option<&str>,
    api_key: Option<&str>,
) -> Result<(), Box<dyn std::error::Error>> {
    llmprogram::yaml_generator::generate_yaml_program(
        description,
        example_input,
        example_output,
        output_file,
        api_key,
    ).await?;
    Ok(())
}

async fn show_analytics(
    db_path: &str,
    program_name: Option<&str>,
    model_name: Option<&str>,
) -> Result<(), Box<dyn std::error::Error>> {
    let analytics_engine = llmprogram::analytics::AnalyticsEngine::new(db_path)?;
    
    println!("=== LLM Call Statistics ===");
    let llm_stats = analytics_engine.get_llm_call_stats(program_name, model_name)?;
    if !llm_stats.is_empty() {
        for stat in llm_stats {
            println!("Program: {}", stat.program_name);
            println!("  Model: {}", stat.model_name);
            println!("  Calls: {}", stat.call_count);
            println!("  Tokens: {} (prompt: {}, completion: {})", 
                stat.total_tokens, stat.total_prompt_tokens, stat.total_completion_tokens);
            println!("  Avg Execution Time: {:.2}ms", stat.avg_execution_time_ms);
            println!("  Cache Hits: {}", stat.cache_hits);
            println!("  Unique Users: {}", stat.unique_users);
            println!();
        }
    } else {
        println!("No LLM call data found.");
    }
    
    println!("=== Program Usage Statistics ===");
    let program_stats = analytics_engine.get_program_usage_stats(program_name)?;
    if !program_stats.is_empty() {
        for stat in program_stats {
            println!("Program: {}", stat.program_name);
            println!("  Usage Count: {}", stat.usage_count);
            println!("  Successful Calls: {}", stat.successful_calls);
            println!("  Failed Calls: {}", stat.failed_calls);
            println!("  Avg Execution Time: {:.2}ms", stat.avg_execution_time_ms);
            println!("  Unique Users: {}", stat.unique_users);
            println!();
        }
    } else {
        println!("No program usage data found.");
    }
    
    println!("=== Token Usage Statistics ===");
    let token_stats = analytics_engine.get_token_usage_stats(program_name, model_name)?;
    if !token_stats.is_empty() {
        for stat in token_stats {
            println!("Program: {}", stat.program_name);
            println!("  Model: {}", stat.model_name);
            println!("  Tokens: {} (prompt: {}, completion: {})", 
                stat.total_tokens, stat.total_prompt_tokens, stat.total_completion_tokens);
            println!("  Estimated Cost: ${:.4}", stat.total_cost_estimate);
            println!("  Unique Users: {}", stat.unique_users);
            println!();
        }
    } else {
        println!("No token usage data found.");
    }
    
    Ok(())
}

fn generate_dataset_command(
    database_path: &str,
    output_path: &str,
) -> Result<(), Box<dyn std::error::Error>> {
    llmprogram::dataset_generator::generate_dataset(database_path, output_path)?;
    Ok(())
}