use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GpuVector {
pub id: String,
pub data: Vec<f32>,
pub metadata: HashMap<String, String>,
}
impl GpuVector {
pub fn new(id: String, data: Vec<f32>) -> Self {
Self {
id,
data,
metadata: HashMap::new(),
}
}
pub fn with_metadata(id: String, data: Vec<f32>, metadata: HashMap<String, String>) -> Self {
Self { id, data, metadata }
}
pub fn dimension(&self) -> usize {
self.data.len()
}
pub fn memory_size(&self) -> usize {
self.data.len() * std::mem::size_of::<f32>() + self.id.len() + self.metadata.len() * 32 }
}
impl From<&GpuVector> for Vec<f32> {
fn from(v: &GpuVector) -> Self {
v.data.clone()
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum GpuDistanceMetric {
Cosine,
Euclidean,
DotProduct,
}
impl std::fmt::Display for GpuDistanceMetric {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
GpuDistanceMetric::Cosine => write!(f, "cosine"),
GpuDistanceMetric::Euclidean => write!(f, "euclidean"),
GpuDistanceMetric::DotProduct => write!(f, "dot_product"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GpuSearchResult {
pub id: String,
pub score: f32,
pub index: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GpuDeviceInfo {
pub name: String,
pub backend: String,
pub total_vram_bytes: u64,
pub available_vram_bytes: u64,
pub used_vram_bytes: u64,
pub driver_version: String,
pub compute_capability: Option<String>,
pub max_threads_per_block: u32,
pub max_shared_memory_per_block: u64,
pub device_id: i32,
pub pci_bus_id: Option<String>,
}
impl GpuDeviceInfo {
pub fn vram_usage_percent(&self) -> f64 {
if self.total_vram_bytes == 0 {
return 0.0;
}
(self.used_vram_bytes as f64 / self.total_vram_bytes as f64) * 100.0
}
pub fn has_available_vram(&self, required_bytes: u64) -> bool {
self.available_vram_bytes >= required_bytes
}
pub fn available_vram_mb(&self) -> u64 {
self.available_vram_bytes / (1024 * 1024)
}
pub fn total_vram_mb(&self) -> u64 {
self.total_vram_bytes / (1024 * 1024)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GpuCapabilities {
pub supports_hnsw: bool,
pub supports_batch: bool,
pub max_dimension: usize,
pub max_batch_size: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GpuMemoryStats {
pub total_allocated: usize,
pub available: usize,
pub utilization: f32,
pub buffer_count: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HnswConfig {
pub max_connections: usize,
pub ef_construction: usize,
pub ef_search: usize,
pub max_level: usize,
pub level_multiplier: f32,
pub seed: Option<u64>,
}
impl Default for HnswConfig {
fn default() -> Self {
Self {
max_connections: 16,
ef_construction: 100,
ef_search: 50,
max_level: 8,
level_multiplier: 0.5,
seed: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IvfConfig {
pub n_list: usize,
pub nprobe: usize,
pub training_sample_size: usize,
pub kmeans_iters: usize,
pub seed: Option<u64>,
}
impl Default for IvfConfig {
fn default() -> Self {
Self {
n_list: 1024,
nprobe: 64,
training_sample_size: 256 * 1024,
kmeans_iters: 20,
seed: None,
}
}
}
impl IvfConfig {
#[must_use]
pub fn for_dataset_size(n: usize) -> Self {
let n_list = (n as f64).sqrt().ceil() as usize;
let n_list = n_list.clamp(16, 65_536);
let nprobe = (n_list / 16).max(1);
Self {
n_list,
nprobe,
training_sample_size: (256 * n_list).min(n),
kmeans_iters: 20,
seed: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VectorMetadata {
pub original_id: String,
pub index: usize,
pub timestamp: u64,
}