#![cfg(all(feature = "intel", any(target_os = "linux", target_os = "windows")))]
use super::context::{ComputePipeline, IntelContext, SgemvPushConstants};
use crate::error::{HiveGpuError, Result};
use crate::traits::GpuVectorStorage;
use crate::types::{GpuDistanceMetric, GpuSearchResult, GpuVector};
use ash::vk;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use tracing::{debug, info};
const MIN_INITIAL_VECTORS: usize = 1024;
const MIN_INITIAL_BYTES: usize = 1024 * 1024;
pub(crate) struct VulkanBuffer {
pub buffer: vk::Buffer,
pub memory: vk::DeviceMemory,
pub size_bytes: usize,
pub host_ptr: *mut u8,
}
unsafe impl Send for VulkanBuffer {}
unsafe impl Sync for VulkanBuffer {}
impl VulkanBuffer {
pub(crate) fn destroy(self, device: &ash::Device) {
unsafe {
if !self.host_ptr.is_null() {
device.unmap_memory(self.memory);
}
device.destroy_buffer(self.buffer, None);
device.free_memory(self.memory, None);
}
}
}
pub(crate) fn allocate_host_visible_buffer(
context: &IntelContext,
size_bytes: usize,
) -> Result<VulkanBuffer> {
let size_bytes = size_bytes.max(1);
let device = context.device();
let buffer_info = vk::BufferCreateInfo::default()
.size(size_bytes as u64)
.usage(
vk::BufferUsageFlags::STORAGE_BUFFER
| vk::BufferUsageFlags::TRANSFER_SRC
| vk::BufferUsageFlags::TRANSFER_DST,
)
.sharing_mode(vk::SharingMode::EXCLUSIVE);
let buffer = unsafe { device.create_buffer(&buffer_info, None) }
.map_err(|e| HiveGpuError::VulkanError(format!("create_buffer: {e:?}")))?;
let reqs = unsafe { device.get_buffer_memory_requirements(buffer) };
let memory_type_index = pick_memory_type(
context,
reqs.memory_type_bits,
vk::MemoryPropertyFlags::HOST_VISIBLE | vk::MemoryPropertyFlags::HOST_COHERENT,
)
.ok_or_else(|| {
HiveGpuError::VulkanError("no host-visible memory type available".to_string())
})?;
let alloc_info = vk::MemoryAllocateInfo::default()
.allocation_size(reqs.size)
.memory_type_index(memory_type_index);
let memory = unsafe { device.allocate_memory(&alloc_info, None) }.map_err(|e| {
unsafe {
device.destroy_buffer(buffer, None);
}
HiveGpuError::VulkanError(format!("allocate_memory: {e:?}"))
})?;
unsafe {
device
.bind_buffer_memory(buffer, memory, 0)
.map_err(|e| HiveGpuError::VulkanError(format!("bind_buffer_memory: {e:?}")))?;
}
let host_ptr = unsafe {
device
.map_memory(memory, 0, reqs.size, vk::MemoryMapFlags::empty())
.map_err(|e| HiveGpuError::VulkanError(format!("map_memory: {e:?}")))?
as *mut u8
};
Ok(VulkanBuffer {
buffer,
memory,
size_bytes: reqs.size as usize,
host_ptr,
})
}
fn pick_memory_type(
context: &IntelContext,
type_bits: u32,
required: vk::MemoryPropertyFlags,
) -> Option<u32> {
let props = context.memory_properties();
(0..props.memory_type_count).find(|i| {
let ty = props.memory_types[*i as usize];
(type_bits & (1 << i)) != 0 && ty.property_flags.contains(required)
})
}
pub(crate) fn write_f32_slice(buf: &VulkanBuffer, data: &[f32]) -> Result<()> {
if buf.host_ptr.is_null() {
return Err(HiveGpuError::VulkanError(
"write_f32_slice requires a host-visible buffer".to_string(),
));
}
let bytes = data.len() * std::mem::size_of::<f32>();
if bytes > buf.size_bytes {
return Err(HiveGpuError::VulkanError(format!(
"write overflows buffer: {bytes} > {size}",
size = buf.size_bytes
)));
}
unsafe {
std::ptr::copy_nonoverlapping(data.as_ptr() as *const u8, buf.host_ptr, bytes);
}
Ok(())
}
pub(crate) fn write_f32_slice_at(
buf: &VulkanBuffer,
byte_offset: usize,
data: &[f32],
) -> Result<()> {
if buf.host_ptr.is_null() {
return Err(HiveGpuError::VulkanError(
"write_f32_slice_at requires a host-visible buffer".to_string(),
));
}
let bytes = data.len() * std::mem::size_of::<f32>();
if byte_offset + bytes > buf.size_bytes {
return Err(HiveGpuError::VulkanError(format!(
"offset write overflows buffer: {byte_offset}+{bytes} > {size}",
size = buf.size_bytes
)));
}
unsafe {
std::ptr::copy_nonoverlapping(
data.as_ptr() as *const u8,
buf.host_ptr.add(byte_offset),
bytes,
);
}
Ok(())
}
pub(crate) fn read_f32_vec(buf: &VulkanBuffer, count: usize) -> Result<Vec<f32>> {
if buf.host_ptr.is_null() {
return Err(HiveGpuError::VulkanError(
"read_f32_vec requires a host-visible buffer".to_string(),
));
}
let bytes = count * std::mem::size_of::<f32>();
if bytes > buf.size_bytes {
return Err(HiveGpuError::VulkanError(format!(
"read overflows buffer: {bytes} > {size}",
size = buf.size_bytes
)));
}
let mut out = vec![0f32; count];
unsafe {
std::ptr::copy_nonoverlapping(
buf.host_ptr as *const u8,
out.as_mut_ptr() as *mut u8,
bytes,
);
}
Ok(out)
}
pub(crate) fn dtod_copy(
context: &IntelContext,
src: &VulkanBuffer,
dst: &VulkanBuffer,
bytes: usize,
) -> Result<()> {
if bytes == 0 {
return Ok(());
}
let device = context.device();
let cb = begin_one_shot_command(context)?;
let region = [vk::BufferCopy {
src_offset: 0,
dst_offset: 0,
size: bytes as u64,
}];
unsafe {
device.cmd_copy_buffer(cb, src.buffer, dst.buffer, ®ion);
}
end_and_submit_one_shot(context, cb)
}
pub(crate) fn dispatch_three_buffer_compute<P: bytemuck_like::PodLike>(
context: &IntelContext,
pipeline: ComputePipeline,
buffers: [&VulkanBuffer; 3],
push_constants: P,
grid: (u32, u32, u32),
) -> Result<()> {
let device = context.device();
let layouts = [pipeline.set_layout];
let ds_alloc = vk::DescriptorSetAllocateInfo::default()
.descriptor_pool(context.descriptor_pool())
.set_layouts(&layouts);
let descriptor_sets = unsafe { device.allocate_descriptor_sets(&ds_alloc) }
.map_err(|e| HiveGpuError::VulkanError(format!("allocate_descriptor_sets: {e:?}")))?;
let descriptor_set = descriptor_sets[0];
let buffer_infos: [vk::DescriptorBufferInfo; 3] = [
vk::DescriptorBufferInfo::default()
.buffer(buffers[0].buffer)
.offset(0)
.range(vk::WHOLE_SIZE),
vk::DescriptorBufferInfo::default()
.buffer(buffers[1].buffer)
.offset(0)
.range(vk::WHOLE_SIZE),
vk::DescriptorBufferInfo::default()
.buffer(buffers[2].buffer)
.offset(0)
.range(vk::WHOLE_SIZE),
];
let writes = [
vk::WriteDescriptorSet::default()
.dst_set(descriptor_set)
.dst_binding(0)
.descriptor_type(vk::DescriptorType::STORAGE_BUFFER)
.buffer_info(std::slice::from_ref(&buffer_infos[0])),
vk::WriteDescriptorSet::default()
.dst_set(descriptor_set)
.dst_binding(1)
.descriptor_type(vk::DescriptorType::STORAGE_BUFFER)
.buffer_info(std::slice::from_ref(&buffer_infos[1])),
vk::WriteDescriptorSet::default()
.dst_set(descriptor_set)
.dst_binding(2)
.descriptor_type(vk::DescriptorType::STORAGE_BUFFER)
.buffer_info(std::slice::from_ref(&buffer_infos[2])),
];
unsafe {
device.update_descriptor_sets(&writes, &[]);
}
let cb = begin_one_shot_command(context)?;
unsafe {
device.cmd_bind_pipeline(cb, vk::PipelineBindPoint::COMPUTE, pipeline.pipeline);
device.cmd_bind_descriptor_sets(
cb,
vk::PipelineBindPoint::COMPUTE,
pipeline.layout,
0,
&[descriptor_set],
&[],
);
let pc_bytes = std::slice::from_raw_parts(
&push_constants as *const P as *const u8,
std::mem::size_of::<P>(),
);
device.cmd_push_constants(
cb,
pipeline.layout,
vk::ShaderStageFlags::COMPUTE,
0,
pc_bytes,
);
device.cmd_dispatch(cb, grid.0, grid.1, grid.2);
}
end_and_submit_one_shot(context, cb)?;
unsafe {
device
.free_descriptor_sets(context.descriptor_pool(), &[descriptor_set])
.map_err(|e| HiveGpuError::VulkanError(format!("free_descriptor_sets: {e:?}")))?;
}
Ok(())
}
fn begin_one_shot_command(context: &IntelContext) -> Result<vk::CommandBuffer> {
let device = context.device();
let alloc = vk::CommandBufferAllocateInfo::default()
.command_pool(context.command_pool())
.level(vk::CommandBufferLevel::PRIMARY)
.command_buffer_count(1);
let buffers = unsafe { device.allocate_command_buffers(&alloc) }
.map_err(|e| HiveGpuError::VulkanError(format!("allocate_command_buffers: {e:?}")))?;
let cb = buffers[0];
let begin =
vk::CommandBufferBeginInfo::default().flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT);
unsafe { device.begin_command_buffer(cb, &begin) }
.map_err(|e| HiveGpuError::VulkanError(format!("begin_command_buffer: {e:?}")))?;
Ok(cb)
}
fn end_and_submit_one_shot(context: &IntelContext, cb: vk::CommandBuffer) -> Result<()> {
let device = context.device();
unsafe {
device
.end_command_buffer(cb)
.map_err(|e| HiveGpuError::VulkanError(format!("end_command_buffer: {e:?}")))?;
}
let fence_info = vk::FenceCreateInfo::default();
let fence = unsafe { device.create_fence(&fence_info, None) }
.map_err(|e| HiveGpuError::VulkanError(format!("create_fence: {e:?}")))?;
let cmd_buffers = [cb];
let submit = [vk::SubmitInfo::default().command_buffers(&cmd_buffers)];
unsafe {
device
.queue_submit(context.queue(), &submit, fence)
.map_err(|e| HiveGpuError::VulkanError(format!("queue_submit: {e:?}")))?;
device
.wait_for_fences(&[fence], true, u64::MAX)
.map_err(|e| HiveGpuError::VulkanError(format!("wait_for_fences: {e:?}")))?;
device.destroy_fence(fence, None);
device.free_command_buffers(context.command_pool(), &cmd_buffers);
}
Ok(())
}
pub(crate) fn dispatch_three_buffer_compute_ranged<P: bytemuck_like::PodLike>(
context: &IntelContext,
pipeline: ComputePipeline,
bindings: [(&VulkanBuffer, u64, u64); 3],
push_constants: P,
grid: (u32, u32, u32),
) -> Result<()> {
let device = context.device();
let layouts = [pipeline.set_layout];
let ds_alloc = vk::DescriptorSetAllocateInfo::default()
.descriptor_pool(context.descriptor_pool())
.set_layouts(&layouts);
let descriptor_sets = unsafe { device.allocate_descriptor_sets(&ds_alloc) }
.map_err(|e| HiveGpuError::VulkanError(format!("allocate_descriptor_sets: {e:?}")))?;
let descriptor_set = descriptor_sets[0];
let buffer_infos: [vk::DescriptorBufferInfo; 3] = [
vk::DescriptorBufferInfo::default()
.buffer(bindings[0].0.buffer)
.offset(bindings[0].1)
.range(bindings[0].2),
vk::DescriptorBufferInfo::default()
.buffer(bindings[1].0.buffer)
.offset(bindings[1].1)
.range(bindings[1].2),
vk::DescriptorBufferInfo::default()
.buffer(bindings[2].0.buffer)
.offset(bindings[2].1)
.range(bindings[2].2),
];
let writes = [
vk::WriteDescriptorSet::default()
.dst_set(descriptor_set)
.dst_binding(0)
.descriptor_type(vk::DescriptorType::STORAGE_BUFFER)
.buffer_info(std::slice::from_ref(&buffer_infos[0])),
vk::WriteDescriptorSet::default()
.dst_set(descriptor_set)
.dst_binding(1)
.descriptor_type(vk::DescriptorType::STORAGE_BUFFER)
.buffer_info(std::slice::from_ref(&buffer_infos[1])),
vk::WriteDescriptorSet::default()
.dst_set(descriptor_set)
.dst_binding(2)
.descriptor_type(vk::DescriptorType::STORAGE_BUFFER)
.buffer_info(std::slice::from_ref(&buffer_infos[2])),
];
unsafe {
device.update_descriptor_sets(&writes, &[]);
}
let cb = begin_one_shot_command(context)?;
unsafe {
device.cmd_bind_pipeline(cb, vk::PipelineBindPoint::COMPUTE, pipeline.pipeline);
device.cmd_bind_descriptor_sets(
cb,
vk::PipelineBindPoint::COMPUTE,
pipeline.layout,
0,
&[descriptor_set],
&[],
);
let pc_bytes = std::slice::from_raw_parts(
&push_constants as *const P as *const u8,
std::mem::size_of::<P>(),
);
device.cmd_push_constants(
cb,
pipeline.layout,
vk::ShaderStageFlags::COMPUTE,
0,
pc_bytes,
);
device.cmd_dispatch(cb, grid.0, grid.1, grid.2);
}
end_and_submit_one_shot(context, cb)?;
unsafe {
device
.free_descriptor_sets(context.descriptor_pool(), &[descriptor_set])
.map_err(|e| HiveGpuError::VulkanError(format!("free_descriptor_sets: {e:?}")))?;
}
Ok(())
}
pub(crate) mod bytemuck_like {
pub trait PodLike: Copy {}
impl<T: Copy> PodLike for T {}
}
pub struct IntelVectorStorage {
context: Arc<IntelContext>,
storage: Option<VulkanBuffer>,
buffer_capacity: usize,
vector_count: usize,
dimension: usize,
metric: GpuDistanceMetric,
vector_id_map: HashMap<String, usize>,
index_to_id: Vec<String>,
removed_indices: HashSet<usize>,
payloads: HashMap<String, HashMap<String, String>>,
norms_sq: Vec<f32>,
}
impl std::fmt::Debug for IntelVectorStorage {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("IntelVectorStorage")
.field("vector_count", &self.vector_count)
.field("buffer_capacity", &self.buffer_capacity)
.field("dimension", &self.dimension)
.field("metric", &self.metric)
.field("removed", &self.removed_indices.len())
.finish()
}
}
impl IntelVectorStorage {
pub fn new(
context: Arc<IntelContext>,
dimension: usize,
metric: GpuDistanceMetric,
) -> Result<Self> {
if dimension == 0 {
return Err(HiveGpuError::InvalidConfiguration(
"dimension must be > 0".to_string(),
));
}
let min_vectors_by_bytes =
(MIN_INITIAL_BYTES / (dimension * std::mem::size_of::<f32>())).max(1);
let capacity = MIN_INITIAL_VECTORS.max(min_vectors_by_bytes);
let slots = capacity
.checked_mul(dimension)
.ok_or_else(|| HiveGpuError::InvalidConfiguration("capacity overflow".to_string()))?;
let bytes = slots * std::mem::size_of::<f32>();
let storage = allocate_host_visible_buffer(&context, bytes)?;
debug!(
"intel storage created: dim={} capacity={} bytes={}",
dimension, capacity, bytes
);
Ok(Self {
context,
storage: Some(storage),
buffer_capacity: capacity,
vector_count: 0,
dimension,
metric,
vector_id_map: HashMap::new(),
index_to_id: Vec::new(),
removed_indices: HashSet::new(),
payloads: HashMap::new(),
norms_sq: Vec::new(),
})
}
fn validate_vector(&self, vector: &GpuVector) -> Result<()> {
if vector.data.len() != self.dimension {
return Err(HiveGpuError::DimensionMismatch {
expected: self.dimension,
actual: vector.data.len(),
});
}
if vector.id.is_empty() {
return Err(HiveGpuError::InvalidConfiguration(
"vector id must be non-empty".to_string(),
));
}
if vector.id.len() > 256 {
return Err(HiveGpuError::InvalidConfiguration(
"vector id must be <= 256 chars".to_string(),
));
}
if self.vector_id_map.contains_key(&vector.id) {
return Err(HiveGpuError::InvalidConfiguration(format!(
"duplicate vector id: {}",
vector.id
)));
}
for (i, &v) in vector.data.iter().enumerate() {
if !v.is_finite() {
return Err(HiveGpuError::InvalidConfiguration(format!(
"non-finite component at index {i} in vector {}",
vector.id
)));
}
}
Ok(())
}
fn ensure_capacity(&mut self, additional: usize) -> Result<()> {
let required = self
.vector_count
.checked_add(additional)
.ok_or_else(|| HiveGpuError::InvalidConfiguration("capacity overflow".to_string()))?;
if required <= self.buffer_capacity {
return Ok(());
}
let mut new_capacity = self.buffer_capacity;
while new_capacity < required {
let factor = if new_capacity < 1_000 {
2.0f32
} else if new_capacity < 10_000 {
1.5f32
} else {
1.2f32
};
new_capacity = ((new_capacity as f32) * factor).ceil() as usize;
new_capacity = new_capacity.max(required);
}
let slots = new_capacity
.checked_mul(self.dimension)
.ok_or_else(|| HiveGpuError::InvalidConfiguration("slots overflow".to_string()))?;
let bytes = slots * std::mem::size_of::<f32>();
let new_buf = allocate_host_visible_buffer(&self.context, bytes)?;
if self.vector_count > 0 {
let live_bytes = self.vector_count * self.dimension * std::mem::size_of::<f32>();
let old = self.storage.as_ref().expect("storage live");
dtod_copy(&self.context, old, &new_buf, live_bytes)?;
}
if let Some(old) = self.storage.take() {
old.destroy(self.context.device());
}
info!(
"intel storage expand: {} -> {} vectors ({:.2} MiB)",
self.buffer_capacity,
new_capacity,
bytes as f64 / (1024.0 * 1024.0)
);
self.storage = Some(new_buf);
self.buffer_capacity = new_capacity;
Ok(())
}
#[allow(dead_code)] pub(crate) fn storage_buffer(&self) -> &VulkanBuffer {
self.storage.as_ref().expect("storage initialised")
}
#[allow(dead_code)] pub(crate) fn norms_sq(&self) -> &[f32] {
&self.norms_sq
}
pub(crate) fn gpu_scores(&self, query: &[f32]) -> Result<Vec<f32>> {
if self.vector_count == 0 {
return Ok(Vec::new());
}
if query.len() != self.dimension {
return Err(HiveGpuError::DimensionMismatch {
expected: self.dimension,
actual: query.len(),
});
}
for (i, &v) in query.iter().enumerate() {
if !v.is_finite() {
return Err(HiveGpuError::InvalidConfiguration(format!(
"non-finite query component at index {i}"
)));
}
}
let query_bytes = query.len() * std::mem::size_of::<f32>();
let query_buf = allocate_host_visible_buffer(&self.context, query_bytes)?;
write_f32_slice(&query_buf, query)?;
let scores_bytes = self.vector_count * std::mem::size_of::<f32>();
let scores_buf = allocate_host_visible_buffer(&self.context, scores_bytes)?;
let pipeline = self.context.sgemv_dot();
let pc = SgemvPushConstants {
dimension: self.dimension as u32,
n_vectors: self.vector_count as u32,
};
let grid = (((self.vector_count as u32) + 255) / 256, 1, 1);
let storage_buf = self.storage.as_ref().expect("storage live");
dispatch_three_buffer_compute(
&self.context,
pipeline,
[storage_buf, &query_buf, &scores_buf],
pc,
grid,
)?;
let scores = read_f32_vec(&scores_buf, self.vector_count)?;
query_buf.destroy(self.context.device());
scores_buf.destroy(self.context.device());
Ok(scores)
}
fn apply_metric(&self, raw_scores: &mut [f32], query: &[f32]) {
let query_norm_sq: f32 = query.iter().map(|&x| x * x).sum();
match self.metric {
GpuDistanceMetric::DotProduct => {}
GpuDistanceMetric::Cosine => {
let q_norm = query_norm_sq.sqrt();
for (i, s) in raw_scores.iter_mut().enumerate() {
let v_norm = self.norms_sq[i].sqrt();
let denom = v_norm * q_norm;
*s = if denom > 0.0 { *s / denom } else { 0.0 };
}
}
GpuDistanceMetric::Euclidean => {
for (i, s) in raw_scores.iter_mut().enumerate() {
*s = (self.norms_sq[i] - 2.0 * *s + query_norm_sq).max(0.0);
}
}
}
}
fn select_top_k(&self, mut scored: Vec<(usize, f32)>, limit: usize) -> Vec<GpuSearchResult> {
scored.retain(|(idx, _)| !self.removed_indices.contains(idx));
match self.metric {
GpuDistanceMetric::Euclidean => {
scored.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
}
_ => scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)),
}
scored.truncate(limit);
scored
.into_iter()
.map(|(index, score)| {
let id = self.index_to_id[index].clone();
let similarity = match self.metric {
GpuDistanceMetric::Euclidean => 1.0 / (1.0 + score.sqrt()),
_ => score,
};
GpuSearchResult {
id,
score: similarity,
index,
}
})
.collect()
}
}
impl Drop for IntelVectorStorage {
fn drop(&mut self) {
if let Some(buf) = self.storage.take() {
buf.destroy(self.context.device());
}
}
}
impl GpuVectorStorage for IntelVectorStorage {
fn add_vectors(&mut self, vectors: &[GpuVector]) -> Result<Vec<usize>> {
if vectors.is_empty() {
return Ok(Vec::new());
}
let mut seen = HashSet::with_capacity(vectors.len());
for v in vectors {
self.validate_vector(v)?;
if !seen.insert(v.id.as_str()) {
return Err(HiveGpuError::InvalidConfiguration(format!(
"duplicate vector id within batch: {}",
v.id
)));
}
}
self.ensure_capacity(vectors.len())?;
let mut flat = Vec::with_capacity(vectors.len() * self.dimension);
for v in vectors {
flat.extend_from_slice(&v.data);
}
let offset_bytes = self.vector_count * self.dimension * std::mem::size_of::<f32>();
let storage = self.storage.as_ref().expect("storage live");
write_f32_slice_at(storage, offset_bytes, &flat)?;
let mut indices = Vec::with_capacity(vectors.len());
for v in vectors {
let index = self.vector_count;
self.vector_id_map.insert(v.id.clone(), index);
self.index_to_id.push(v.id.clone());
self.payloads.insert(v.id.clone(), v.metadata.clone());
self.norms_sq
.push(v.data.iter().map(|&x| x * x).sum::<f32>());
self.vector_count += 1;
indices.push(index);
}
Ok(indices)
}
fn search(&self, query: &[f32], limit: usize) -> Result<Vec<GpuSearchResult>> {
if limit == 0 || self.vector_count == 0 {
return Ok(Vec::new());
}
let mut scores = self.gpu_scores(query)?;
self.apply_metric(&mut scores, query);
let scored: Vec<(usize, f32)> = scores.into_iter().enumerate().collect();
Ok(self.select_top_k(scored, limit))
}
fn remove_vectors(&mut self, ids: &[String]) -> Result<()> {
for id in ids {
if let Some(&index) = self.vector_id_map.get(id) {
self.removed_indices.insert(index);
self.payloads.remove(id);
} else {
return Err(HiveGpuError::VectorNotFound(id.clone()));
}
}
Ok(())
}
fn vector_count(&self) -> usize {
self.vector_count.saturating_sub(self.removed_indices.len())
}
fn dimension(&self) -> usize {
self.dimension
}
fn get_vector(&self, id: &str) -> Result<Option<GpuVector>> {
let Some(&index) = self.vector_id_map.get(id) else {
return Ok(None);
};
if self.removed_indices.contains(&index) {
return Ok(None);
}
let offset_bytes = index * self.dimension * std::mem::size_of::<f32>();
let storage = self.storage.as_ref().expect("storage live");
if storage.host_ptr.is_null() {
return Err(HiveGpuError::VulkanError(
"storage buffer not host-visible".to_string(),
));
}
let mut host = vec![0f32; self.dimension];
unsafe {
std::ptr::copy_nonoverlapping(
storage.host_ptr.add(offset_bytes) as *const u8,
host.as_mut_ptr() as *mut u8,
self.dimension * std::mem::size_of::<f32>(),
);
}
let metadata = self.payloads.get(id).cloned().unwrap_or_default();
Ok(Some(GpuVector {
id: id.to_string(),
data: host,
metadata,
}))
}
fn clear(&mut self) -> Result<()> {
self.vector_count = 0;
self.buffer_capacity = self.buffer_capacity.max(MIN_INITIAL_VECTORS);
self.vector_id_map.clear();
self.index_to_id.clear();
self.removed_indices.clear();
self.payloads.clear();
self.norms_sq.clear();
Ok(())
}
}