ruve-db 0.1.1

A hybrid vector and full-text search database with HNSW approximate nearest-neighbour indexing and BM25
Documentation
use ruve::database::Database;

struct TempDir(std::path::PathBuf);

impl TempDir {
    fn new(label: &str) -> Self {
        let path = std::env::temp_dir().join(format!(
            "ruve_test_{}_{}",
            label,
            std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .unwrap()
                .subsec_nanos()
        ));
        std::fs::create_dir_all(&path).unwrap();
        TempDir(path)
    }
}

impl Drop for TempDir {
    fn drop(&mut self) {
        let _ = std::fs::remove_dir_all(&self.0);
    }
}

fn make_db(dir: &TempDir) -> Database {
    let p = |name: &str| dir.0.join(name).to_str().unwrap().to_string();
    Database::new(&p("data.bin"), &p("index.json"), &p("bm25.json"), &p("hnsw.json"), &p("graph.bin"))
}


#[test]
fn empty_db_searches_return_empty() {
    let dir = TempDir::new("empty");
    let db = make_db(&dir);

    assert!(db.search_hnsw(&[1.0, 0.0], 5).is_empty());
    assert!(db.text_search("anything", 5).is_empty());
}


#[test]
fn nearest_neighbour_is_most_similar_vector() {
    let dir = TempDir::new("hnsw");
    let mut db = make_db(&dir);

    db.insert_raw(vec![1.0, 0.0, 0.0], "red",   None);
    db.insert_raw(vec![0.0, 1.0, 0.0], "green", None);
    db.insert_raw(vec![0.0, 0.0, 1.0], "blue",  None);

    let results = db.search_hnsw(&[1.0, 0.0, 0.0], 3);
    assert!(!results.is_empty());
    assert_eq!(results[0].metadata.as_deref(), Some("red"));
}

#[test]
fn hnsw_respects_k_limit() {
    let dir = TempDir::new("hnsw_k");
    let mut db = make_db(&dir);

    for i in 0..10 {
        db.insert_raw(vec![i as f32, 0.0], &format!("doc {i}"), None);
    }

    let results = db.search_hnsw(&[1.0, 0.0], 3);
    assert!(results.len() <= 3);
}

#[test]
fn single_record_hnsw_search_works() {
    let dir = TempDir::new("hnsw_single");
    let mut db = make_db(&dir);

    db.insert_raw(vec![1.0, 0.0], "only one", None);
    let results = db.search_hnsw(&[1.0, 0.0], 5);
    assert_eq!(results.len(), 1);
    assert_eq!(results[0].metadata.as_deref(), Some("only one"));
}

// ── insert + BM25 text search ─────────────────────────────────────────────────

#[test]
fn text_search_returns_matching_document() {
    let dir = TempDir::new("bm25");
    let mut db = make_db(&dir);

    db.insert_raw(vec![1.0, 0.0], "rust programming language", None);
    db.insert_raw(vec![0.0, 1.0], "python scripting language", None);

    let results = db.text_search("rust", 5);
    assert_eq!(results.len(), 1);
    assert_eq!(results[0].metadata.as_deref(), Some("rust programming language"));
}

#[test]
fn text_search_returns_empty_for_unknown_term() {
    let dir = TempDir::new("bm25_miss");
    let mut db = make_db(&dir);

    db.insert_raw(vec![1.0], "rust programming", None);
    assert!(db.text_search("haskell", 5).is_empty());
}

#[test]
fn text_search_ranks_more_relevant_doc_first() {
    let dir = TempDir::new("bm25_rank");
    let mut db = make_db(&dir);

    db.insert_raw(vec![1.0, 0.0], "vector search", None);
    db.insert_raw(vec![0.0, 1.0], "vector vector vector database", None);

    let results = db.text_search("vector", 5);
    assert_eq!(results.len(), 2);
    assert_eq!(results[0].metadata.as_deref(), Some("vector vector vector database"));
}


#[test]
fn custom_id_is_preserved_in_index_and_search() {
    let dir = TempDir::new("custom_id");
    let mut db = make_db(&dir);

    db.insert_raw(vec![1.0, 0.0], "document", Some("my-key"));
    assert!(db.index.contains_key("my-key"));

    let results = db.text_search("document", 1);
    assert_eq!(results[0].id, "my-key");
}


#[test]
fn delete_removes_record_from_index() {
    let dir = TempDir::new("delete");
    let mut db = make_db(&dir);

    db.insert_raw(vec![1.0], "gone", Some("del-id"));
    assert!(db.delete("del-id"));
    assert!(!db.index.contains_key("del-id"));
}

#[test]
fn delete_removes_record_from_text_search() {
    let dir = TempDir::new("delete_bm25");
    let mut db = make_db(&dir);

    db.insert_raw(vec![1.0], "unique token zxyq", Some("del-id"));
    db.delete("del-id");

    assert!(db.text_search("zxyq", 5).is_empty());
}

#[test]
fn delete_returns_false_for_unknown_id() {
    let dir = TempDir::new("delete_miss");
    let mut db = make_db(&dir);

    assert!(!db.delete("does-not-exist"));
}

#[test]
fn wipe_empties_all_indices() {
    let dir = TempDir::new("wipe");
    let mut db = make_db(&dir);

    db.insert_raw(vec![1.0, 0.0], "some text", None);
    db.insert_raw(vec![0.0, 1.0], "more text", None);
    db.wipe();

    assert!(db.index.is_empty());
    assert!(db.search_hnsw(&[1.0, 0.0], 5).is_empty());
    assert!(db.text_search("some", 5).is_empty());
}

#[test]
fn data_survives_db_reopen() {
    let dir = TempDir::new("persist");

    {
        let mut db = make_db(&dir);
        db.insert_raw(vec![1.0, 0.0], "persisted record", Some("persist-id"));
    }

    // reopen from the same files
    let db = make_db(&dir);
    assert!(db.index.contains_key("persist-id"));

    let results = db.text_search("persisted", 1);
    assert_eq!(results.len(), 1);
    assert_eq!(results[0].id, "persist-id");
}