interpolize 1.5.0

a rust program that scrapes discord, learns how your friends talk, and generates new messages in their collective voice. yes, this is what we're doing with our lives.
use anyhow::Result;
use clap::{Parser, Subcommand};

mod config;
mod scraper;
mod storage;
mod embed;
mod markov;
mod interpolate;
mod shell;

#[derive(Parser)]
#[command(name = "interpolize")]
struct Cli {
    #[arg(short, long, default_value = "config.toml")]
    config: String,

    #[command(subcommand)]
    command: Command,
}

#[derive(Subcommand)]
enum Command {
    Scrape,
    Train,
    Chat,
    Ask {
        #[arg(trailing_var_arg = true)]
        question: Vec<String>,
    },
}

#[tokio::main]
async fn main() -> Result<()> {
    let cli = Cli::parse();
    let config = config::Config::load(&cli.config)?;

    match cli.command {
        Command::Scrape => {
            let scraper = scraper::Scraper::new(&config)?;
            let incoming = scraper.scrape_all(&config).await?;
            let mut existing = storage::load("messages.toml")?;
            storage::merge(&mut existing, incoming);
            storage::save("messages.toml", &existing)?;
            eprintln!("saved {} total messages", existing.len());
        }

        Command::Train => {
            let messages = storage::load("messages.toml")?;
            if messages.is_empty() {
                anyhow::bail!("no messages found, run scrape first");
            }

            let embeddings = embed::Embeddings::train(
                &messages,
                config.embeddings.vector_dim,
                config.embeddings.window_size,
            );
            embeddings.save(&config.embeddings.storage_path)?;
            eprintln!("embeddings saved to {}", config.embeddings.storage_path);
        }

        Command::Chat => {
            let messages = storage::load("messages.toml")?;
            if messages.is_empty() {
                anyhow::bail!("no messages found, run scrape first");
            }

            let embeddings = embed::Embeddings::load(&config.embeddings.storage_path)
                .unwrap_or_else(|_| {
                    eprintln!("no embeddings found, training now...");
                    let e = embed::Embeddings::train(
                        &messages,
                        config.embeddings.vector_dim,
                        config.embeddings.window_size,
                    );
                    let _ = e.save(&config.embeddings.storage_path);
                    e
                });

            let texts: Vec<&str> = messages.iter().map(|m| m.content.as_str()).collect();
            let markov = markov::Markov::train(&texts, 5);

            let shell = shell::Shell::new(&config, &embeddings, &messages, markov);
            shell.run()?;
        }

        Command::Ask { question } => {
            let question = question.join(" ");
            let messages = storage::load("messages.toml")?;
            if messages.is_empty() {
                anyhow::bail!("no messages found, run scrape first");
            }

            let embeddings = embed::Embeddings::load(&config.embeddings.storage_path)
                .unwrap_or_else(|_| {
                    eprintln!("no embeddings found, training now...");
                    let e = embed::Embeddings::train(
                        &messages,
                        config.embeddings.vector_dim,
                        config.embeddings.window_size,
                    );
                    let _ = e.save(&config.embeddings.storage_path);
                    e
                });

            let texts: Vec<&str> = messages.iter().map(|m| m.content.as_str()).collect();
            let markov = markov::Markov::train(&texts, 5);

            let shell = shell::Shell::new(&config, &embeddings, &messages, markov);
            let answer = shell.answer(&question)?;
            println!("{}", answer);
        }
    }

    Ok(())
}