mcpkill 0.1.0

Universal MCP proxy — semantic cache + chunking to kill token waste
Documentation
use anyhow::Result;
use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};

const MODEL: EmbeddingModel = EmbeddingModel::AllMiniLML6V2;

/// Wraps fastembed TextEmbedding (all-MiniLM-L6-v2, ~23 MB, runs locally via ONNX).
/// The model is downloaded once to `~/.cache/fastembed` on first use.
pub struct Embedder {
    model: TextEmbedding,
}

impl Embedder {
    pub fn new() -> Result<Self> {
        // Warn the user on first run so they don't think it's frozen.
        if !model_is_cached() {
            let dir = std::env::var("FASTEMBED_CACHE_DIR")
                .unwrap_or_else(|_| ".fastembed_cache".to_string());
            eprintln!(
                "[mcpkill] First run: downloading embedding model (~23 MB) to {dir}"
            );
        }

        let model =
            TextEmbedding::try_new(InitOptions::new(MODEL).with_show_download_progress(true))?;

        Ok(Self { model })
    }

    /// Embed a single text. Returns a 384-dim vector.
    pub fn embed(&self, text: &str) -> Result<Vec<f32>> {
        let mut results = self.model.embed(vec![text], None)?;
        Ok(results.remove(0))
    }

    /// Embed a batch of texts efficiently.
    pub fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
        self.model.embed(texts.to_vec(), None)
    }
}

/// Returns `true` if the model files are already present in the fastembed cache.
///
/// fastembed stores models under `$FASTEMBED_CACHE_DIR/models--Qdrant--all-MiniLM-L6-v2-onnx/`
/// (defaulting to `.fastembed_cache/` relative to the current directory when the env var is unset).
pub fn model_is_cached() -> bool {
    let cache_dir = std::env::var("FASTEMBED_CACHE_DIR")
        .unwrap_or_else(|_| ".fastembed_cache".to_string());
    let model_dir = format!("{cache_dir}/models--Qdrant--all-MiniLM-L6-v2-onnx");
    std::path::Path::new(&model_dir).exists()
}