use crate::cli::{
export_csv, export_json, format_error, format_search_results, format_stats, format_success,
ProgressTracker,
};
use crate::config::Config;
use anyhow::{Context, Result};
use colored::*;
use ruvector_core::{
types::{DbOptions, SearchQuery, VectorEntry},
VectorDB,
};
use std::path::{Path, PathBuf};
use std::time::Instant;
pub fn create_database(path: &str, dimensions: usize, config: &Config) -> Result<()> {
let mut db_options = config.to_db_options();
db_options.storage_path = path.to_string();
db_options.dimensions = dimensions;
println!(
"{}",
format_success(&format!("Creating database at: {}", path))
);
println!(" Dimensions: {}", dimensions.to_string().cyan());
println!(" Distance metric: {:?}", db_options.distance_metric);
let _db = VectorDB::new(db_options).context("Failed to create database")?;
println!("{}", format_success("Database created successfully!"));
Ok(())
}
pub fn insert_vectors(
db_path: &str,
input_file: &str,
format: &str,
config: &Config,
show_progress: bool,
) -> Result<()> {
let mut db_options = config.to_db_options();
db_options.storage_path = db_path.to_string();
let db = VectorDB::new(db_options).context("Failed to open database")?;
let entries = match format {
"json" => parse_json_file(input_file)?,
"csv" => parse_csv_file(input_file)?,
"npy" => parse_npy_file(input_file)?,
_ => return Err(anyhow::anyhow!("Unsupported format: {}", format)),
};
let total = entries.len();
println!(
"{}",
format_success(&format!("Loaded {} vectors from {}", total, input_file))
);
let start = Instant::now();
let tracker = ProgressTracker::new();
let pb = if show_progress {
Some(tracker.create_bar(total as u64, "Inserting vectors..."))
} else {
None
};
let batch_size = config.cli.batch_size;
let mut inserted = 0;
for chunk in entries.chunks(batch_size) {
db.insert_batch(chunk.to_vec())
.context("Failed to insert batch")?;
inserted += chunk.len();
if let Some(ref pb) = pb {
pb.set_position(inserted as u64);
}
}
if let Some(pb) = pb {
pb.finish_with_message("Insertion complete!");
}
let elapsed = start.elapsed();
println!(
"{}",
format_success(&format!(
"Inserted {} vectors in {:.2}s ({:.0} vectors/sec)",
total,
elapsed.as_secs_f64(),
total as f64 / elapsed.as_secs_f64()
))
);
Ok(())
}
pub fn search_vectors(
db_path: &str,
query_vector: Vec<f32>,
k: usize,
config: &Config,
show_vectors: bool,
) -> Result<()> {
let mut db_options = config.to_db_options();
db_options.storage_path = db_path.to_string();
let db = VectorDB::new(db_options).context("Failed to open database")?;
let start = Instant::now();
let results = db
.search(SearchQuery {
vector: query_vector,
k,
filter: None,
ef_search: None,
})
.context("Failed to search")?;
let elapsed = start.elapsed();
println!("{}", format_search_results(&results, show_vectors));
println!(
"\n{}",
format!(
"Search completed in {:.2}ms",
elapsed.as_secs_f64() * 1000.0
)
.dimmed()
);
Ok(())
}
pub fn show_info(db_path: &str, config: &Config) -> Result<()> {
let mut db_options = config.to_db_options();
db_options.storage_path = db_path.to_string();
let db = VectorDB::new(db_options).context("Failed to open database")?;
let count = db.len().context("Failed to get count")?;
let dimensions = db.options().dimensions;
let metric = format!("{:?}", db.options().distance_metric);
println!("{}", format_stats(count, dimensions, &metric));
if let Some(hnsw_config) = &db.options().hnsw_config {
println!("{}", "HNSW Configuration:".bold().green());
println!(" M: {}", hnsw_config.m.to_string().cyan());
println!(
" ef_construction: {}",
hnsw_config.ef_construction.to_string().cyan()
);
println!(" ef_search: {}", hnsw_config.ef_search.to_string().cyan());
}
Ok(())
}
pub fn run_benchmark(db_path: &str, config: &Config, num_queries: usize) -> Result<()> {
let mut db_options = config.to_db_options();
db_options.storage_path = db_path.to_string();
let db = VectorDB::new(db_options).context("Failed to open database")?;
let dimensions = db.options().dimensions;
println!("{}", "Running benchmark...".bold().green());
println!(" Queries: {}", num_queries.to_string().cyan());
println!(" Dimensions: {}", dimensions.to_string().cyan());
use rand::Rng;
let mut rng = rand::thread_rng();
let queries: Vec<Vec<f32>> = (0..num_queries)
.map(|_| (0..dimensions).map(|_| rng.gen()).collect())
.collect();
for query in queries.iter().take(10) {
let _ = db.search(SearchQuery {
vector: query.clone(),
k: 10,
filter: None,
ef_search: None,
});
}
let start = Instant::now();
for query in &queries {
db.search(SearchQuery {
vector: query.clone(),
k: 10,
filter: None,
ef_search: None,
})
.context("Search failed")?;
}
let elapsed = start.elapsed();
let qps = num_queries as f64 / elapsed.as_secs_f64();
let avg_latency = elapsed.as_secs_f64() * 1000.0 / num_queries as f64;
println!("\n{}", "Benchmark Results:".bold().green());
println!(" Total time: {:.2}s", elapsed.as_secs_f64());
println!(" Queries per second: {:.0}", qps.to_string().cyan());
println!(" Average latency: {:.2}ms", avg_latency.to_string().cyan());
Ok(())
}
pub fn export_database(
db_path: &str,
output_file: &str,
format: &str,
config: &Config,
) -> Result<()> {
let mut db_options = config.to_db_options();
db_options.storage_path = db_path.to_string();
let db = VectorDB::new(db_options).context("Failed to open database")?;
println!(
"{}",
format_success(&format!("Exporting database to: {}", output_file))
);
return Err(anyhow::anyhow!(
"Export functionality requires VectorDB::all_ids() method. This will be implemented in a future update."
));
}
pub fn import_from_external(
db_path: &str,
source: &str,
source_path: &str,
config: &Config,
) -> Result<()> {
println!(
"{}",
format_success(&format!("Importing from {} database", source))
);
match source {
"faiss" => {
return Err(anyhow::anyhow!("FAISS import not yet implemented"));
}
"pinecone" => {
return Err(anyhow::anyhow!("Pinecone import not yet implemented"));
}
"weaviate" => {
return Err(anyhow::anyhow!("Weaviate import not yet implemented"));
}
_ => return Err(anyhow::anyhow!("Unsupported source: {}", source)),
}
}
fn parse_json_file(path: &str) -> Result<Vec<VectorEntry>> {
let content = std::fs::read_to_string(path).context("Failed to read JSON file")?;
serde_json::from_str(&content).context("Failed to parse JSON")
}
fn parse_csv_file(path: &str) -> Result<Vec<VectorEntry>> {
let mut reader = csv::Reader::from_path(path).context("Failed to open CSV file")?;
let mut entries = Vec::new();
for result in reader.records() {
let record = result.context("Failed to read CSV record")?;
let id = if record.get(0).map(|s| s.is_empty()).unwrap_or(true) {
None
} else {
Some(record.get(0).unwrap().to_string())
};
let vector: Vec<f32> =
serde_json::from_str(record.get(1).context("Missing vector column")?)
.context("Failed to parse vector")?;
let metadata = if let Some(meta_str) = record.get(2) {
if !meta_str.is_empty() {
Some(serde_json::from_str(meta_str).context("Failed to parse metadata")?)
} else {
None
}
} else {
None
};
entries.push(VectorEntry {
id,
vector,
metadata,
});
}
Ok(entries)
}
fn parse_npy_file(path: &str) -> Result<Vec<VectorEntry>> {
use ndarray::Array2;
use ndarray_npy::ReadNpyExt;
let file = std::fs::File::open(path).context("Failed to open NPY file")?;
let array: Array2<f32> = Array2::read_npy(file).context("Failed to read NPY file")?;
let entries: Vec<VectorEntry> = array
.outer_iter()
.enumerate()
.map(|(i, row)| VectorEntry {
id: Some(format!("vec_{}", i)),
vector: row.to_vec(),
metadata: None,
})
.collect();
Ok(entries)
}