use anyhow::Result;
use clap::{Parser, Subcommand};
mod config;
mod scraper;
mod storage;
mod embed;
mod markov;
mod interpolate;
mod shell;
#[derive(Parser)]
#[command(name = "interpolize")]
struct Cli {
#[arg(short, long, default_value = "config.toml")]
config: String,
#[command(subcommand)]
command: Command,
}
#[derive(Subcommand)]
enum Command {
Scrape,
Train,
Chat,
}
#[tokio::main]
async fn main() -> Result<()> {
let cli = Cli::parse();
let config = config::Config::load(&cli.config)?;
match cli.command {
Command::Scrape => {
let scraper = scraper::Scraper::new(&config)?;
let incoming = scraper.scrape_all(&config).await?;
let mut existing = storage::load("messages.toml")?;
storage::merge(&mut existing, incoming);
storage::save("messages.toml", &existing)?;
eprintln!("saved {} total messages", existing.len());
}
Command::Train => {
let messages = storage::load("messages.toml")?;
if messages.is_empty() {
anyhow::bail!("no messages found, run scrape first");
}
let embeddings = embed::Embeddings::train(
&messages,
config.embeddings.vector_dim,
config.embeddings.window_size,
);
embeddings.save(&config.embeddings.storage_path)?;
eprintln!("embeddings saved to {}", config.embeddings.storage_path);
}
Command::Chat => {
let messages = storage::load("messages.toml")?;
if messages.is_empty() {
anyhow::bail!("no messages found, run scrape first");
}
let embeddings = embed::Embeddings::load(&config.embeddings.storage_path)
.unwrap_or_else(|_| {
eprintln!("no embeddings found, training now...");
let e = embed::Embeddings::train(
&messages,
config.embeddings.vector_dim,
config.embeddings.window_size,
);
let _ = e.save(&config.embeddings.storage_path);
e
});
let texts: Vec<&str> = messages.iter().map(|m| m.content.as_str()).collect();
let markov = markov::Markov::train(&texts, 5);
let shell = shell::Shell::new(&config, &embeddings, &messages, markov);
shell.run()?;
}
}
Ok(())
}