ruve-db 0.1.1

A hybrid vector and full-text search database with HNSW approximate nearest-neighbour indexing and BM25
Documentation
use rustyline::DefaultEditor;
use rustyline::error::ReadlineError;

use ruve::database::Database;
use ruve::embedder::Embedder;
use ruve::storage::retrieve_record;

fn parse_vector(s: &str) -> Option<(Vec<f32>, &str)> {
    let s = s.trim();
    let start = s.find('[')?;
    let end = s.find(']')?;
    let inner = &s[start + 1..end];
    let vector = inner
        .split(',')
        .map(|n| n.trim().parse::<f32>())
        .collect::<Result<Vec<_>, _>>()
        .ok()?;
    let remainder = s[end + 1..].trim();
    Some((vector, remainder))
}

fn main() {
    let mut db = Database::new(
        "data/data.bin",
        "data/index.json",
        "data/bm25_index.json",
        "data/hnsw.json",
        "data/hnsw_graph.bin",
    );
    let embedder = Embedder::new();

    let mut rl = DefaultEditor::new().unwrap();

    println!("RuVe v0.1.0 — type help for available commands, quit to exit");

    loop {
        match rl.readline("ruve> ") {
            Ok(line) => {
                let line = line.trim();
                if line.is_empty() { continue; }

                rl.add_history_entry(line).ok();

                let (cmd, rest) = line.split_once(' ').unwrap_or((line, ""));

                match cmd {
                    "quit" | "exit" => break,
                    "help" => {
                        println!("Commands:");
                        println!("  insert <text>                      insert a record (auto-embed)");
                        println!("  insert raw [1.0, 2.0] <text>       insert with a raw vector");
                        println!("  search vec <query> <k>             vector search, k defaults to 5");
                        println!("  search text <query> <k>            text search, k defaults to 5");
                        println!("  delete <id>                        delete a record by id");
                        println!("  wipe                               delete all records");
                        println!("  load <filename>                    load and index a book from books/");
                        println!("  list                               list all records");
                        println!("  quit, exit                         exit");
                    }
                    "list" => {
                        for (id, offset) in &db.index {
                            let record = retrieve_record(*offset, &db.data_path);
                            println!("{}{:?}", id, record.metadata);
                        }
                    }
                    "load" => {
                        if rest.is_empty() {
                            eprintln!("usage: load <filename>");
                        } else {
                            let path = format!("books/{}", rest);
                            match std::fs::read_to_string(&path) {
                                Err(e) => eprintln!("could not read {path}: {e}"),
                                Ok(text) => {
                                    let lines: Vec<&str> = text.lines()
                                        .map(|l| l.trim())
                                        .filter(|l| !l.is_empty())
                                        .collect();
                                    let total = lines.len();
                                    for (i, line) in lines.into_iter().enumerate() {
                                        db.insert_raw(embedder.embed(line), line, None);
                                        print!("\rindexing {}/{total}", i + 1);
                                        use std::io::Write;
                                        std::io::stdout().flush().ok();
                                    }
                                    println!("\ndone — {total} records inserted");
                                }
                            }
                        }
                    }
                    "delete" => {
                        if rest.is_empty() {
                            eprintln!("usage: delete <id>");
                        } else if db.delete(rest) {
                            println!("deleted");
                        } else {
                            eprintln!("no record with id: {rest}");
                        }
                    }
                    "wipe" => {
                        db.wipe();
                        println!("wiped");
                    }
                    "insert" => {
                        if rest.is_empty() {
                            eprintln!("usage: insert <text>");
                        } else if let Some(raw) = rest.strip_prefix("raw ") {
                            match parse_vector(raw) {
                                None => eprintln!("usage: insert raw [1.0, 2.0, ...] <text>"),
                                Some((vector, text)) => {
                                    db.insert_raw(vector, text, None);
                                    println!("inserted");
                                }
                            }
                        } else {
                            db.insert_raw(embedder.embed(rest), rest, None);
                            println!("inserted");
                        }
                    }
                    "search" => {
                        let (mode, rest) = rest.split_once(' ').unwrap_or((rest, ""));
                        match mode {
                            "vec" => {
                                let (query_text, k_str) = rest.rsplit_once(' ').unwrap_or((rest, ""));
                                let k = k_str.parse::<usize>().unwrap_or(5);
                                let vector = embedder.embed(query_text);
                                println!("query vector dim: {}", vector.len());
                                let results = db.search_scored(&vector, k);
                                for (score, r) in &results {
                                    println!("{:.4} | dim={} | {}{:?}", score, r.vector.len(), r.id, r.metadata);
                                }
                                if results.is_empty() { println!("no results"); }
                            }
                            "text" => {
                                let (query_text, k_str) = rest.rsplit_once(' ').unwrap_or((rest, ""));
                                let k = k_str.parse::<usize>().unwrap_or(5);
                                let results = db.text_search(query_text, k);
                                for r in &results { println!("{}{:?}", r.id, r.metadata); }
                                if results.is_empty() { println!("no results"); }
                            }
                            _ => eprintln!("usage: search vec <query> <k>  |  search text <query> <k>"),
                        }
                    }
                    _ => eprintln!("unknown command: {cmd}"),
                }
            }
            Err(ReadlineError::Interrupted) | Err(ReadlineError::Eof) => break,
            Err(e) => {
                eprintln!("error: {e}");
                break;
            }
        }
    }
}