Struct GpuAccelerator

Source

pub struct GpuAccelerator { /* private fields */ }

Expand description

GPU-accelerated vector search engine.

Upload vectors once, then run many searches against them. If GPU initialization fails, callers should fall back to CPU SIMD.

Vectors are automatically split into chunks when they exceed the device’s max_storage_buffer_binding_size (typically 128 MB). Searches dispatch against each chunk and merge results transparently.

Implementations§

Source §

impl GpuAccelerator

Source

pub fn is_available() -> bool

Check if any GPU is available on this system.

Source

pub fn new() -> Result<Self>

Initialize the best available GPU device.

Requests the adapter’s maximum buffer limits so that chunking only kicks in when truly necessary.

Source

pub fn device_info(&self) -> &DeviceInfo

Source

pub fn max_storage_buffer_binding_size(&self) -> u32

Source

pub fn upload_vectors(&mut self, vectors: &[f32], dim: usize) -> Result<()>

Upload a flat array of vectors to GPU memory. Automatically splits into chunks when data exceeds the binding limit.

Source

pub fn upload_norms(&mut self, norms: &[f32]) -> Result<()>

Upload pre-computed L2 norms, split to match vector chunk layout.

Source

pub fn cosine_search( &self, query: &[f32], k: usize, ) -> Result<Vec<(usize, f32)>>

Cosine similarity search: returns top-k (index, score) pairs, highest first. Dispatches against each vector chunk and merges results.

Source

pub fn batch_cosine_search( &self, queries: &[Vec<f32>], k: usize, ) -> Result<Vec<Vec<(usize, f32)>>>

Batch cosine search: multiple queries at once.

Source

pub fn l2_search(&self, query: &[f32], k: usize) -> Result<Vec<(usize, f32)>>

L2 distance search: returns top-k (index, distance) pairs, smallest first.

Source

pub fn compute_norms(&self) -> Result<Vec<f32>>

Compute L2 norms for all uploaded vectors on GPU.

Source

pub fn compute_norms_gpu(&self, vectors: &[f32], dim: usize) -> Result<Vec<f32>>

Compute L2 norms from raw vectors (not previously uploaded). Handles chunking automatically for large inputs.

Source

pub fn batch_dot_product( &self, queries_flat: &[f32], num_queries: usize, ) -> Result<Vec<f32>>

Batch dot product: queries [Q×D] × vectors [N×D] -> flat [Q×N] scores.

Source

pub fn distance_matrix( &self, queries: &[f32], vectors: &[f32], dim: usize, ) -> Result<Vec<Vec<f32>>>

Compute L2 distance matrix: queries × vectors -> Q×N distances. Uses 16×16 workgroup tiling for cache efficiency.

Source

pub fn f16_to_f32_batch(&self, f16_bits: &[u16]) -> Result<Vec<f32>>

Convert f16 values (as raw u16 bits) to f32 on the GPU.

Source

pub fn f32_to_f16_batch(&self, values: &[f32]) -> Result<Vec<u16>>

Convert f32 values to f16 (as raw u16 bits) on the GPU.

Source

pub fn vector_count(&self) -> usize

Source

pub fn dimension(&self) -> usize

Source

pub fn chunk_count(&self) -> usize

Auto Trait Implementations§

§

impl !UnwindSafe for GpuAccelerator

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> Downcast<T> for T

Source §

fn downcast(&self) -> &T

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<T> Upcast<T> for T

Source §

fn upcast(&self) -> Option<&T>

Source §

impl<T> WasmNotSend for T
where T: Send,

Source §

impl<T> WasmNotSendSync for T
where T: WasmNotSend + WasmNotSync,

Source §

GpuAccelerator

Struct GpuAccelerator Copy item path

Implementations§

impl GpuAccelerator

pub fn is_available() -> bool

pub fn new() -> Result<Self>

pub fn device_info(&self) -> &DeviceInfo

pub fn max_storage_buffer_binding_size(&self) -> u32

pub fn upload_vectors(&mut self, vectors: &[f32], dim: usize) -> Result<()>

pub fn upload_norms(&mut self, norms: &[f32]) -> Result<()>

pub fn cosine_search( &self, query: &[f32], k: usize, ) -> Result<Vec<(usize, f32)>>

pub fn batch_cosine_search( &self, queries: &[Vec<f32>], k: usize, ) -> Result<Vec<Vec<(usize, f32)>>>

pub fn l2_search(&self, query: &[f32], k: usize) -> Result<Vec<(usize, f32)>>

pub fn compute_norms(&self) -> Result<Vec<f32>>

pub fn compute_norms_gpu(&self, vectors: &[f32], dim: usize) -> Result<Vec<f32>>

pub fn batch_dot_product( &self, queries_flat: &[f32], num_queries: usize, ) -> Result<Vec<f32>>

pub fn distance_matrix( &self, queries: &[f32], vectors: &[f32], dim: usize, ) -> Result<Vec<Vec<f32>>>

pub fn f16_to_f32_batch(&self, f16_bits: &[u16]) -> Result<Vec<f32>>

pub fn f32_to_f16_batch(&self, values: &[f32]) -> Result<Vec<u16>>

pub fn vector_count(&self) -> usize

pub fn dimension(&self) -> usize

pub fn chunk_count(&self) -> usize

Auto Trait Implementations§

impl Freeze for GpuAccelerator

impl !RefUnwindSafe for GpuAccelerator

impl Send for GpuAccelerator

impl Sync for GpuAccelerator

impl Unpin for GpuAccelerator

impl UnsafeUnpin for GpuAccelerator

impl !UnwindSafe for GpuAccelerator

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> Downcast<T> for T

fn downcast(&self) -> &T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T> Upcast<T> for T

fn upcast(&self) -> Option<&T>

impl<T> WasmNotSend for Twhere T: Send,

impl<T> WasmNotSendSync for Twhere T: WasmNotSend + WasmNotSync,

impl<T> WasmNotSync for Twhere T: Sync,

Struct GpuAccelerator

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<T> WasmNotSend for T
where T: Send,

impl<T> WasmNotSendSync for T
where T: WasmNotSend + WasmNotSync,

impl<T> WasmNotSync for T
where T: Sync,