1#[cfg(feature = "gpu")]
4use crate::gpu::GpuAccelerator;
5use crate::hnsw::HnswIndex;
6use crate::Vector;
7use anyhow::Result;
8use std::sync::Arc;
9
10#[derive(Debug, Clone)]
12pub struct GpuPerformanceStats {
13 pub gpu_memory_used: usize,
14 pub kernel_execution_time: f64,
15 pub memory_transfer_time: f64,
16 pub throughput_vectors_per_second: f64,
17}
18
19#[cfg(feature = "gpu")]
20impl HnswIndex {
21 pub fn gpu_batch_distance_calculation(
23 &self,
24 query: &Vector,
25 candidates: &[usize],
26 ) -> Result<Vec<f32>> {
27 if candidates.len() < self.config().gpu_batch_threshold {
28 return self.cpu_batch_distance_calculation(query, candidates);
30 }
31
32 if let Some(accelerator) = self.gpu_accelerator() {
33 self.single_gpu_distance_calculation(accelerator, query, candidates)
34 } else if !self.multi_gpu_accelerators().is_empty() {
35 self.multi_gpu_distance_calculation(query, candidates)
36 } else {
37 self.cpu_batch_distance_calculation(query, candidates)
39 }
40 }
41
42 pub fn single_gpu_distance_calculation(
44 &self,
45 _accelerator: &Arc<GpuAccelerator>,
46 query: &Vector,
47 candidates: &[usize],
48 ) -> Result<Vec<f32>> {
49 self.cpu_batch_distance_calculation(query, candidates)
58 }
59
60 pub fn multi_gpu_distance_calculation(
62 &self,
63 query: &Vector,
64 candidates: &[usize],
65 ) -> Result<Vec<f32>> {
66 if self.multi_gpu_accelerators().is_empty() {
67 return self.cpu_batch_distance_calculation(query, candidates);
68 }
69
70 self.cpu_batch_distance_calculation(query, candidates)
79 }
80
81 pub fn gpu_search(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>> {
83 if self.nodes().is_empty() || self.entry_point().is_none() {
86 return Ok(Vec::new());
87 }
88
89 if !self.is_gpu_enabled() {
91 return self.search_knn(query, k);
93 }
94
95 if k >= self.config().gpu_batch_threshold && self.nodes().len() >= 1000 {
97 return self.gpu_accelerated_search_large(query, k);
98 }
99
100 self.search_knn(query, k)
102 }
103
104 fn gpu_accelerated_search_large(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>> {
106 let candidate_ids: Vec<usize> = (0..self.nodes().len()).collect();
108
109 let distances = self.gpu_batch_distance_calculation(query, &candidate_ids)?;
111
112 let mut id_distance_pairs: Vec<(usize, f32)> =
114 candidate_ids.into_iter().zip(distances).collect();
115
116 id_distance_pairs
118 .sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
119
120 let results: Vec<(String, f32)> = id_distance_pairs
122 .into_iter()
123 .take(k)
124 .filter_map(|(id, distance)| {
125 self.nodes()
126 .get(id)
127 .map(|node| (node.uri.clone(), distance))
128 })
129 .collect();
130
131 Ok(results)
132 }
133
134 pub fn is_gpu_enabled(&self) -> bool {
136 self.gpu_accelerator().is_some() || !self.multi_gpu_accelerators().is_empty()
137 }
138
139 pub fn gpu_performance_stats(&self) -> Option<GpuPerformanceStats> {
141 self.gpu_accelerator()
142 .map(|accelerator| GpuPerformanceStats {
143 gpu_memory_used: accelerator.get_memory_usage().unwrap_or(0),
144 kernel_execution_time: 0.0, memory_transfer_time: 0.0, throughput_vectors_per_second: 0.0, })
148 }
149
150 pub fn warmup_gpu(&self) -> Result<()> {
152 if !self.is_gpu_enabled() {
153 return Ok(());
154 }
155
156 Ok(())
163 }
164
165 pub fn preload_to_gpu(&self) -> Result<()> {
167 if !self.is_gpu_enabled() {
168 return Ok(());
169 }
170
171 Ok(())
178 }
179}
180
181#[cfg(not(feature = "gpu"))]
182impl HnswIndex {
183 pub fn gpu_batch_distance_calculation(
185 &self,
186 query: &Vector,
187 candidates: &[usize],
188 ) -> Result<Vec<f32>> {
189 self.cpu_batch_distance_calculation(query, candidates)
190 }
191}