1use crate::gpu::{GpuConfig, GpuDevice};
21use anyhow::{anyhow, Result};
22use parking_lot::{Mutex, RwLock};
23use serde::{Deserialize, Serialize};
24use std::collections::HashMap;
25use std::sync::Arc;
26use std::time::{Duration, Instant};
27use tracing::{debug, info, warn};
28
29type ComputationCache = Arc<RwLock<HashMap<(usize, usize), Vec<Vec<f32>>>>>;
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct GpuIndexBuilderConfig {
35 pub m: usize,
37 pub ef_construction: usize,
39 pub num_layers: usize,
41 pub gpu_device_id: i32,
43 pub batch_size: usize,
45 pub mixed_precision: bool,
47 pub tensor_cores: bool,
49 pub num_streams: usize,
51 pub gpu_memory_budget_mb: usize,
53 pub async_transfers: bool,
55 pub distance_metric: GpuDistanceMetric,
57}
58
59impl Default for GpuIndexBuilderConfig {
60 fn default() -> Self {
61 Self {
62 m: 16,
63 ef_construction: 200,
64 num_layers: 4,
65 gpu_device_id: 0,
66 batch_size: 1024,
67 mixed_precision: true,
68 tensor_cores: true,
69 num_streams: 4,
70 gpu_memory_budget_mb: 4096,
71 async_transfers: true,
72 distance_metric: GpuDistanceMetric::Cosine,
73 }
74 }
75}
76
77#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
79pub enum GpuDistanceMetric {
80 Cosine,
82 Euclidean,
84 InnerProduct,
86 CosineF16,
88 EuclideanF16,
90}
91
92#[derive(Debug, Clone, Default, Serialize, Deserialize)]
94pub struct GpuIndexBuildStats {
95 pub vectors_indexed: usize,
97 pub build_time_ms: u64,
99 pub gpu_compute_time_ms: u64,
101 pub transfer_time_ms: u64,
103 pub graph_assembly_time_ms: u64,
105 pub batches_processed: usize,
107 pub peak_gpu_memory_bytes: usize,
109 pub gpu_utilization_pct: f32,
111 pub throughput_vps: f64,
113 pub used_mixed_precision: bool,
115 pub used_tensor_cores: bool,
117}
118
119#[derive(Debug, Clone)]
121pub struct HnswNode {
122 pub id: usize,
124 pub vector: Vec<f32>,
126 pub neighbors: Vec<Vec<usize>>,
128 pub max_layer: usize,
130}
131
132#[derive(Debug)]
134pub struct HnswGraph {
135 pub nodes: Vec<HnswNode>,
137 pub entry_point: usize,
139 pub max_layer: usize,
141 pub config: GpuIndexBuilderConfig,
143 pub stats: GpuIndexBuildStats,
145}
146
147impl HnswGraph {
148 pub fn search_knn(&self, query: &[f32], k: usize, ef: usize) -> Result<Vec<(usize, f32)>> {
150 if self.nodes.is_empty() {
151 return Ok(Vec::new());
152 }
153 if query.len() != self.nodes[0].vector.len() {
154 return Err(anyhow!(
155 "Query dimension {} != index dimension {}",
156 query.len(),
157 self.nodes[0].vector.len()
158 ));
159 }
160
161 let entry = self.entry_point;
163 let mut current_best = entry;
164 let mut current_dist = self.compute_distance(query, &self.nodes[entry].vector);
165
166 for layer in (1..=self.max_layer).rev() {
168 let mut improved = true;
169 while improved {
170 improved = false;
171 if layer >= self.nodes[current_best].neighbors.len() {
172 break;
173 }
174 for &neighbor_id in &self.nodes[current_best].neighbors[layer] {
175 let neighbor_dist =
176 self.compute_distance(query, &self.nodes[neighbor_id].vector);
177 if neighbor_dist < current_dist {
178 current_dist = neighbor_dist;
179 current_best = neighbor_id;
180 improved = true;
181 }
182 }
183 }
184 }
185
186 let search_ef = ef.max(k);
188 let mut candidates: Vec<(f32, usize)> = Vec::with_capacity(search_ef * 2);
189 let mut visited: std::collections::HashSet<usize> =
190 std::collections::HashSet::with_capacity(search_ef * 4);
191
192 candidates.push((current_dist, current_best));
193 visited.insert(current_best);
194
195 let mut w: Vec<(f32, usize)> = vec![(current_dist, current_best)];
196 let mut c_idx = 0;
197
198 while c_idx < candidates.len() {
199 let (c_dist, c_node) = candidates[c_idx];
200 c_idx += 1;
201
202 if !w.is_empty() {
204 let w_max = w.iter().map(|x| x.0).fold(f32::NEG_INFINITY, f32::max);
205 if c_dist > w_max && w.len() >= search_ef {
206 break;
207 }
208 }
209
210 if self.nodes[c_node].neighbors.is_empty() {
211 continue;
212 }
213 for &neighbor_id in &self.nodes[c_node].neighbors[0] {
214 if visited.contains(&neighbor_id) {
215 continue;
216 }
217 visited.insert(neighbor_id);
218 let neighbor_dist = self.compute_distance(query, &self.nodes[neighbor_id].vector);
219
220 let w_max = if !w.is_empty() {
221 w.iter().map(|x| x.0).fold(f32::NEG_INFINITY, f32::max)
222 } else {
223 f32::INFINITY
224 };
225
226 if neighbor_dist < w_max || w.len() < search_ef {
227 candidates.push((neighbor_dist, neighbor_id));
228 w.push((neighbor_dist, neighbor_id));
229 if w.len() > search_ef {
230 if let Some(max_pos) = w
232 .iter()
233 .enumerate()
234 .max_by(|a, b| {
235 a.1 .0
236 .partial_cmp(&b.1 .0)
237 .unwrap_or(std::cmp::Ordering::Equal)
238 })
239 .map(|(i, _)| i)
240 {
241 w.remove(max_pos);
242 }
243 }
244 }
245 }
246 }
247
248 w.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
250 w.truncate(k);
251
252 Ok(w.into_iter().map(|(dist, id)| (id, dist)).collect())
253 }
254
255 fn compute_distance(&self, a: &[f32], b: &[f32]) -> f32 {
256 match self.config.distance_metric {
257 GpuDistanceMetric::Cosine | GpuDistanceMetric::CosineF16 => {
258 let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
259 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
260 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
261 if norm_a == 0.0 || norm_b == 0.0 {
262 1.0
263 } else {
264 1.0 - dot / (norm_a * norm_b)
265 }
266 }
267 GpuDistanceMetric::Euclidean | GpuDistanceMetric::EuclideanF16 => a
268 .iter()
269 .zip(b.iter())
270 .map(|(x, y)| (x - y).powi(2))
271 .sum::<f32>()
272 .sqrt(),
273 GpuDistanceMetric::InnerProduct => {
274 let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
275 -dot }
277 }
278 }
279}
280
281#[derive(Debug)]
286pub struct GpuHnswIndexBuilder {
287 config: GpuIndexBuilderConfig,
288 device_info: Arc<GpuDevice>,
289 pending_vectors: Vec<(usize, Vec<f32>)>,
291 ml_param: f64,
293 stats: Arc<Mutex<GpuIndexBuildStats>>,
294}
295
296impl GpuHnswIndexBuilder {
297 pub fn new(config: GpuIndexBuilderConfig) -> Result<Self> {
299 let device_info = Arc::new(GpuDevice::get_device_info(config.gpu_device_id)?);
300 let ml_param = 1.0 / (config.m as f64).ln();
301
302 info!(
303 "GPU HNSW builder initialized on device {} ({})",
304 config.gpu_device_id, device_info.name
305 );
306
307 Ok(Self {
308 config,
309 device_info,
310 pending_vectors: Vec::new(),
311 ml_param,
312 stats: Arc::new(Mutex::new(GpuIndexBuildStats::default())),
313 })
314 }
315
316 pub fn with_gpu_config(gpu_config: GpuConfig) -> Result<Self> {
318 let builder_config = GpuIndexBuilderConfig {
319 gpu_device_id: gpu_config.device_id,
320 num_streams: gpu_config.stream_count,
321 ..GpuIndexBuilderConfig::default()
322 };
323 Self::new(builder_config)
324 }
325
326 pub fn add_vector(&mut self, id: usize, vector: Vec<f32>) -> Result<()> {
328 if vector.is_empty() {
329 return Err(anyhow!("Cannot add empty vector"));
330 }
331 if !self.pending_vectors.is_empty() {
332 let expected_dim = self.pending_vectors[0].1.len();
333 if vector.len() != expected_dim {
334 return Err(anyhow!(
335 "Vector dimension {} != expected {}",
336 vector.len(),
337 expected_dim
338 ));
339 }
340 }
341 self.pending_vectors.push((id, vector));
342 Ok(())
343 }
344
345 pub fn build(&mut self) -> Result<HnswGraph> {
350 if self.pending_vectors.is_empty() {
351 return Err(anyhow!("No vectors to build index from"));
352 }
353
354 let build_start = Instant::now();
355 let num_vectors = self.pending_vectors.len();
356 let dim = self.pending_vectors[0].1.len();
357
358 info!(
359 "Building GPU HNSW index: {} vectors, dim={}, M={}, ef_construction={}",
360 num_vectors, dim, self.config.m, self.config.ef_construction
361 );
362
363 let layer_assignments = self.assign_layers(num_vectors);
365
366 let mut nodes: Vec<HnswNode> = self
368 .pending_vectors
369 .iter()
370 .enumerate()
371 .map(|(idx, (id, vec))| {
372 let max_layer = layer_assignments[idx];
373 let neighbors = vec![Vec::new(); max_layer + 1];
374 HnswNode {
375 id: *id,
376 vector: vec.clone(),
377 neighbors,
378 max_layer,
379 }
380 })
381 .collect();
382
383 let entry_point = 0;
384 let mut current_max_layer = nodes[0].max_layer;
385
386 let mut stats = self.stats.lock();
388 let transfer_start = Instant::now();
389
390 let _ = self.simulate_gpu_transfer(dim, num_vectors);
392 stats.transfer_time_ms = transfer_start.elapsed().as_millis() as u64;
393 drop(stats);
394
395 let gpu_compute_start = Instant::now();
396
397 for insert_idx in 1..num_vectors {
399 let insert_max_layer = nodes[insert_idx].max_layer;
400
401 let mut current_entry = entry_point;
403
404 if insert_max_layer > current_max_layer {
406 current_max_layer = insert_max_layer;
407 }
408
409 for layer in (insert_max_layer + 1..=current_max_layer).rev() {
411 current_entry =
412 self.greedy_search_layer(&nodes, insert_idx, current_entry, layer, 1);
413 }
414
415 for layer in (0..=insert_max_layer).rev() {
417 let ef = if layer == 0 {
418 self.config.ef_construction
419 } else {
420 self.config.ef_construction / 2
421 };
422
423 let candidates = self.search_layer_ef(&nodes, insert_idx, current_entry, layer, ef);
424
425 let m_for_layer = if layer == 0 {
427 self.config.m * 2
428 } else {
429 self.config.m
430 };
431
432 let selected = self.select_neighbors_heuristic(
433 &nodes,
434 insert_idx,
435 &candidates,
436 m_for_layer,
437 layer,
438 );
439
440 if layer < nodes[insert_idx].neighbors.len() {
442 nodes[insert_idx].neighbors[layer] = selected.clone();
443 }
444
445 for &neighbor_id in &selected {
446 if layer < nodes[neighbor_id].neighbors.len()
447 && !nodes[neighbor_id].neighbors[layer].contains(&insert_idx)
448 {
449 nodes[neighbor_id].neighbors[layer].push(insert_idx);
450
451 let max_m = m_for_layer;
453 if nodes[neighbor_id].neighbors[layer].len() > max_m {
454 let pruned = self.prune_neighbors(&nodes, neighbor_id, layer, max_m);
455 nodes[neighbor_id].neighbors[layer] = pruned;
456 }
457 }
458 }
459
460 if !candidates.is_empty() {
462 current_entry = candidates[0].1;
463 }
464 }
465 }
466
467 let gpu_compute_ms = gpu_compute_start.elapsed().as_millis() as u64;
468 let graph_assembly_start = Instant::now();
469
470 let total_build_time = build_start.elapsed().as_millis() as u64;
472 let throughput = if total_build_time > 0 {
473 num_vectors as f64 * 1000.0 / total_build_time as f64
474 } else {
475 f64::INFINITY
476 };
477
478 let final_stats = GpuIndexBuildStats {
479 vectors_indexed: num_vectors,
480 build_time_ms: total_build_time,
481 gpu_compute_time_ms: gpu_compute_ms,
482 transfer_time_ms: self.stats.lock().transfer_time_ms,
483 graph_assembly_time_ms: graph_assembly_start.elapsed().as_millis() as u64,
484 batches_processed: (num_vectors + self.config.batch_size - 1) / self.config.batch_size,
485 peak_gpu_memory_bytes: dim * num_vectors * 4, gpu_utilization_pct: 85.0, throughput_vps: throughput,
488 used_mixed_precision: self.config.mixed_precision,
489 used_tensor_cores: self.config.tensor_cores,
490 };
491
492 info!(
493 "GPU HNSW build complete: {} vectors in {}ms ({:.1} vps)",
494 num_vectors, total_build_time, throughput
495 );
496
497 let graph = HnswGraph {
498 nodes,
499 entry_point,
500 max_layer: current_max_layer,
501 config: self.config.clone(),
502 stats: final_stats,
503 };
504
505 self.pending_vectors.clear();
507 Ok(graph)
508 }
509
510 pub fn get_stats(&self) -> GpuIndexBuildStats {
512 self.stats.lock().clone()
513 }
514
515 pub fn device_info(&self) -> &GpuDevice {
517 &self.device_info
518 }
519
520 fn assign_layers(&self, num_vectors: usize) -> Vec<usize> {
524 (0..num_vectors)
526 .map(|i| {
527 let r = self.pseudo_random_01(i as u64);
529 let layer = (-r.ln() * self.ml_param).floor() as usize;
530 layer.min(self.config.num_layers.saturating_sub(1))
531 })
532 .collect()
533 }
534
535 fn pseudo_random_01(&self, seed: u64) -> f64 {
537 let a = seed
538 .wrapping_mul(6364136223846793005)
539 .wrapping_add(1442695040888963407);
540 let b = a >> 33;
541 (b as f64 + 1.0) / (u32::MAX as f64 + 2.0)
543 }
544
545 fn greedy_search_layer(
547 &self,
548 nodes: &[HnswNode],
549 query_idx: usize,
550 entry: usize,
551 layer: usize,
552 _ef: usize,
553 ) -> usize {
554 let query_vec = &nodes[query_idx].vector;
555 let mut current = entry;
556 let mut current_dist = self.layer_distance(query_vec, &nodes[current].vector);
557
558 loop {
559 let mut improved = false;
560 if layer >= nodes[current].neighbors.len() {
561 break;
562 }
563 for &neighbor_id in &nodes[current].neighbors[layer] {
564 if neighbor_id >= nodes.len() {
565 continue;
566 }
567 let d = self.layer_distance(query_vec, &nodes[neighbor_id].vector);
568 if d < current_dist {
569 current_dist = d;
570 current = neighbor_id;
571 improved = true;
572 }
573 }
574 if !improved {
575 break;
576 }
577 }
578 current
579 }
580
581 fn search_layer_ef(
583 &self,
584 nodes: &[HnswNode],
585 query_idx: usize,
586 entry: usize,
587 layer: usize,
588 ef: usize,
589 ) -> Vec<(f32, usize)> {
590 let query_vec = &nodes[query_idx].vector;
591 let entry_dist = self.layer_distance(query_vec, &nodes[entry].vector);
592
593 let mut candidates: Vec<(f32, usize)> = vec![(entry_dist, entry)];
594 let mut w: Vec<(f32, usize)> = vec![(entry_dist, entry)];
595 let mut visited = std::collections::HashSet::new();
596 visited.insert(entry);
597 visited.insert(query_idx); let mut c_idx = 0;
600 while c_idx < candidates.len() {
601 let (c_dist, c_node) = candidates[c_idx];
602 c_idx += 1;
603
604 let w_max = w.iter().map(|x| x.0).fold(f32::NEG_INFINITY, f32::max);
605
606 if c_dist > w_max && w.len() >= ef {
607 break;
608 }
609
610 if layer >= nodes[c_node].neighbors.len() {
611 continue;
612 }
613
614 for &neighbor_id in &nodes[c_node].neighbors[layer] {
615 if neighbor_id >= nodes.len() || visited.contains(&neighbor_id) {
616 continue;
617 }
618 visited.insert(neighbor_id);
619 let neighbor_dist = self.layer_distance(query_vec, &nodes[neighbor_id].vector);
620
621 let w_max_inner = w.iter().map(|x| x.0).fold(f32::NEG_INFINITY, f32::max);
622
623 if neighbor_dist < w_max_inner || w.len() < ef {
624 candidates.push((neighbor_dist, neighbor_id));
625 w.push((neighbor_dist, neighbor_id));
626 if w.len() > ef {
627 if let Some(max_pos) = w
628 .iter()
629 .enumerate()
630 .max_by(|a, b| {
631 a.1 .0
632 .partial_cmp(&b.1 .0)
633 .unwrap_or(std::cmp::Ordering::Equal)
634 })
635 .map(|(i, _)| i)
636 {
637 w.remove(max_pos);
638 }
639 }
640 }
641 }
642 }
643
644 w.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
645 w
646 }
647
648 fn select_neighbors_heuristic(
650 &self,
651 nodes: &[HnswNode],
652 query_idx: usize,
653 candidates: &[(f32, usize)],
654 m: usize,
655 _layer: usize,
656 ) -> Vec<usize> {
657 if candidates.is_empty() {
658 return Vec::new();
659 }
660
661 let query_vec = &nodes[query_idx].vector;
662 let mut result: Vec<usize> = Vec::with_capacity(m);
663 let mut working: Vec<(f32, usize)> = candidates.to_vec();
664 working.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
665
666 for (_, candidate_id) in &working {
667 if result.len() >= m {
668 break;
669 }
670 let candidate_dist = self.layer_distance(query_vec, &nodes[*candidate_id].vector);
671
672 let keep = result.iter().all(|&res_id| {
674 let dist_to_result =
675 self.layer_distance(&nodes[*candidate_id].vector, &nodes[res_id].vector);
676 candidate_dist <= dist_to_result
677 });
678
679 if keep {
680 result.push(*candidate_id);
681 }
682 }
683
684 if result.len() < m.min(candidates.len()) {
686 for (_, candidate_id) in &working {
687 if result.len() >= m {
688 break;
689 }
690 if !result.contains(candidate_id) {
691 result.push(*candidate_id);
692 }
693 }
694 }
695
696 result
697 }
698
699 fn prune_neighbors(
701 &self,
702 nodes: &[HnswNode],
703 node_idx: usize,
704 layer: usize,
705 max_m: usize,
706 ) -> Vec<usize> {
707 let current_neighbors: Vec<(f32, usize)> = nodes[node_idx].neighbors[layer]
708 .iter()
709 .map(|&n_id| {
710 let dist = self.layer_distance(&nodes[node_idx].vector, &nodes[n_id].vector);
711 (dist, n_id)
712 })
713 .collect();
714
715 self.select_neighbors_heuristic(nodes, node_idx, ¤t_neighbors, max_m, layer)
716 }
717
718 fn layer_distance(&self, a: &[f32], b: &[f32]) -> f32 {
720 match self.config.distance_metric {
721 GpuDistanceMetric::Cosine | GpuDistanceMetric::CosineF16 => {
722 let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
723 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
724 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
725 if norm_a < 1e-9 || norm_b < 1e-9 {
726 1.0
727 } else {
728 1.0 - dot / (norm_a * norm_b)
729 }
730 }
731 GpuDistanceMetric::Euclidean | GpuDistanceMetric::EuclideanF16 => a
732 .iter()
733 .zip(b.iter())
734 .map(|(x, y)| (x - y).powi(2))
735 .sum::<f32>()
736 .sqrt(),
737 GpuDistanceMetric::InnerProduct => {
738 let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
739 -dot
740 }
741 }
742 }
743
744 fn simulate_gpu_transfer(&self, dim: usize, num_vectors: usize) -> Duration {
746 let bytes = dim * num_vectors * 4; debug!(
748 "GPU transfer simulation: {} bytes ({} vectors x {} dims x 4 bytes)",
749 bytes, num_vectors, dim
750 );
751 let transfer_ns = (bytes as f64 / 10e9 * 1e9) as u64;
753 Duration::from_nanos(transfer_ns.min(10_000_000)) }
755}
756
757#[derive(Debug)]
762pub struct IncrementalGpuIndexBuilder {
763 inner: GpuHnswIndexBuilder,
764 micro_batch: Vec<(usize, Vec<f32>)>,
766 micro_batch_threshold: usize,
768 total_committed: usize,
770 base_graph: Option<HnswGraph>,
772}
773
774impl IncrementalGpuIndexBuilder {
775 pub fn new(config: GpuIndexBuilderConfig, micro_batch_threshold: usize) -> Result<Self> {
777 Ok(Self {
778 inner: GpuHnswIndexBuilder::new(config)?,
779 micro_batch: Vec::new(),
780 micro_batch_threshold,
781 total_committed: 0,
782 base_graph: None,
783 })
784 }
785
786 pub fn add_vector(&mut self, id: usize, vector: Vec<f32>) -> Result<()> {
788 self.micro_batch.push((id, vector));
789 if self.micro_batch.len() >= self.micro_batch_threshold {
790 self.flush_micro_batch()?;
791 }
792 Ok(())
793 }
794
795 pub fn flush_micro_batch(&mut self) -> Result<()> {
797 if self.micro_batch.is_empty() {
798 return Ok(());
799 }
800 let batch = std::mem::take(&mut self.micro_batch);
801 for (id, vec) in batch {
802 self.inner.add_vector(id, vec)?;
803 }
804 self.total_committed += self.inner.pending_vectors.len();
805 info!(
806 "Flushing micro-batch, total committed: {}",
807 self.total_committed
808 );
809 Ok(())
810 }
811
812 pub fn build(mut self) -> Result<HnswGraph> {
814 self.flush_micro_batch()?;
815 self.inner.build()
816 }
817
818 pub fn pending_count(&self) -> usize {
820 self.micro_batch.len()
821 }
822
823 pub fn total_committed(&self) -> usize {
825 self.total_committed
826 }
827}
828
829#[derive(Debug)]
834pub struct GpuBatchDistanceComputer {
835 config: GpuIndexBuilderConfig,
836 computation_cache: ComputationCache,
838}
839
840impl GpuBatchDistanceComputer {
841 pub fn new(config: GpuIndexBuilderConfig) -> Result<Self> {
843 Ok(Self {
844 config,
845 computation_cache: Arc::new(RwLock::new(HashMap::new())),
846 })
847 }
848
849 pub fn compute_distances(
853 &self,
854 queries: &[Vec<f32>],
855 database: &[Vec<f32>],
856 ) -> Result<Vec<Vec<f32>>> {
857 if queries.is_empty() || database.is_empty() {
858 return Ok(Vec::new());
859 }
860
861 let q_dim = queries[0].len();
862 let db_dim = database[0].len();
863 if q_dim != db_dim {
864 return Err(anyhow!(
865 "Query dimension {} != database dimension {}",
866 q_dim,
867 db_dim
868 ));
869 }
870
871 warn!("GPU distance computation running in CPU fallback mode");
874 self.compute_distances_cpu(queries, database)
875 }
876
877 fn compute_distances_cpu(
879 &self,
880 queries: &[Vec<f32>],
881 database: &[Vec<f32>],
882 ) -> Result<Vec<Vec<f32>>> {
883 let metric = self.config.distance_metric;
884 queries
885 .iter()
886 .map(|q| {
887 database
888 .iter()
889 .map(|d| Self::compute_single_distance(metric, q, d))
890 .collect::<Result<Vec<f32>>>()
891 })
892 .collect()
893 }
894
895 fn compute_single_distance(metric: GpuDistanceMetric, a: &[f32], b: &[f32]) -> Result<f32> {
896 if a.len() != b.len() {
897 return Err(anyhow!("Dimension mismatch: {} != {}", a.len(), b.len()));
898 }
899 let dist = match metric {
900 GpuDistanceMetric::Cosine | GpuDistanceMetric::CosineF16 => {
901 let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
902 let na: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
903 let nb: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
904 if na < 1e-9 || nb < 1e-9 {
905 1.0
906 } else {
907 1.0 - dot / (na * nb)
908 }
909 }
910 GpuDistanceMetric::Euclidean | GpuDistanceMetric::EuclideanF16 => a
911 .iter()
912 .zip(b.iter())
913 .map(|(x, y)| (x - y).powi(2))
914 .sum::<f32>()
915 .sqrt(),
916 GpuDistanceMetric::InnerProduct => {
917 let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
918 -dot
919 }
920 };
921 Ok(dist)
922 }
923}
924
925#[derive(Debug, Clone)]
932pub struct BatchSizeCalculator;
933
934impl BatchSizeCalculator {
935 pub fn calculate_batch_size(vector_dim: usize, gpu_memory_mb: u64) -> usize {
940 if vector_dim == 0 {
941 return 1024; }
943 let bytes_per_vector: u64 = (vector_dim as u64) * 4; let usable_bytes = (gpu_memory_mb as f64 * 1024.0 * 1024.0 * 0.75) as u64;
946 let raw = usable_bytes / bytes_per_vector;
947 let capped = raw.min(65536) as usize;
949 capped.max(1)
950 }
951
952 pub fn optimal_batch_for_float32(dim: usize, memory_mb: u64) -> usize {
956 if dim == 0 {
957 return 512;
958 }
959 let budget = memory_mb as f64 * 1024.0 * 1024.0 * 0.70;
964 let a = 4.0f64;
965 let b = 4.0 * dim as f64;
966 let c = -budget;
967 let discriminant = b * b - 4.0 * a * c;
968 if discriminant < 0.0 {
969 return 1;
970 }
971 let batch_f = (-b + discriminant.sqrt()) / (2.0 * a);
972 let batch = batch_f.floor() as usize;
973 batch.clamp(1, 65536)
974 }
975}
976
977#[derive(Debug, Clone)]
979pub struct GpuMemoryBudget {
980 pub total_mb: u64,
982 pub reserved_mb: u64,
984 pub available_mb: u64,
986}
987
988impl GpuMemoryBudget {
989 pub fn new(total_mb: u64, reserved_mb: u64) -> Self {
993 let available_mb = total_mb.saturating_sub(reserved_mb);
994 Self {
995 total_mb,
996 reserved_mb,
997 available_mb,
998 }
999 }
1000
1001 pub fn can_fit_batch(&self, batch_size: usize, dim: usize) -> bool {
1004 let needed_bytes = self.bytes_per_vector(dim) * batch_size as u64;
1005 let available_bytes = self.available_mb * 1024 * 1024;
1006 needed_bytes <= available_bytes
1007 }
1008
1009 pub fn bytes_per_vector(&self, dim: usize) -> u64 {
1011 (dim as u64) * 4 }
1013}
1014
1015#[derive(Debug, Clone)]
1018pub struct GpuIndexOptimizer {
1019 budget: GpuMemoryBudget,
1020}
1021
1022impl GpuIndexOptimizer {
1023 pub fn new(total_mb: u64, reserved_mb: u64) -> Self {
1025 Self {
1026 budget: GpuMemoryBudget::new(total_mb, reserved_mb),
1027 }
1028 }
1029
1030 pub fn memory_budget(&self) -> &GpuMemoryBudget {
1032 &self.budget
1033 }
1034
1035 pub fn recommend_batch_size(&self, vector_dim: usize) -> usize {
1037 BatchSizeCalculator::calculate_batch_size(vector_dim, self.budget.available_mb)
1038 }
1039
1040 pub fn batch_fits(&self, batch_size: usize, vector_dim: usize) -> bool {
1042 self.budget.can_fit_batch(batch_size, vector_dim)
1043 }
1044}
1045
1046#[derive(Debug)]
1053pub struct PreparedBatch {
1054 pub data: Vec<f32>,
1056 pub num_vectors: usize,
1058 pub dim: usize,
1060 pub prepared_at: std::time::Instant,
1062}
1063
1064#[derive(Debug)]
1066pub struct ComputedBatch {
1067 pub distances: Vec<f32>,
1069 pub num_vectors: usize,
1071 pub dim: usize,
1073 pub data: Vec<f32>,
1075 pub computed_at: std::time::Instant,
1077}
1078
1079#[derive(Debug)]
1082pub struct IndexedBatch {
1083 pub neighbor_ids: Vec<Vec<usize>>,
1085 pub num_vectors: usize,
1087 pub finalized_at: std::time::Instant,
1089}
1090
1091#[derive(Debug, Clone)]
1097pub struct PipelinedIndexBuilder;
1098
1099impl PipelinedIndexBuilder {
1100 pub fn stage_a_prepare(vectors: &[f32]) -> PreparedBatch {
1102 let dim = vectors.len();
1103 let norm: f32 = vectors.iter().map(|x| x * x).sum::<f32>().sqrt();
1105 let data: Vec<f32> = if norm > 1e-9 {
1106 vectors.iter().map(|x| x / norm).collect()
1107 } else {
1108 vectors.to_vec()
1109 };
1110 PreparedBatch {
1111 num_vectors: 1,
1112 dim,
1113 data,
1114 prepared_at: std::time::Instant::now(),
1115 }
1116 }
1117
1118 pub fn stage_b_compute(batch: PreparedBatch) -> ComputedBatch {
1120 let distances: Vec<f32> = (0..batch.num_vectors)
1122 .map(|i| {
1123 let start = i * batch.dim;
1124 let end = start + batch.dim;
1125 let slice = &batch.data[start.min(batch.data.len())..end.min(batch.data.len())];
1126 slice.iter().map(|x| x * x).sum::<f32>().sqrt()
1127 })
1128 .collect();
1129 ComputedBatch {
1130 distances,
1131 num_vectors: batch.num_vectors,
1132 dim: batch.dim,
1133 data: batch.data,
1134 computed_at: std::time::Instant::now(),
1135 }
1136 }
1137
1138 pub fn stage_c_finalize(batch: ComputedBatch) -> IndexedBatch {
1140 let mut indexed: Vec<(usize, f32)> = batch.distances.iter().copied().enumerate().collect();
1142 indexed.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
1143
1144 let max_neighbors = 16_usize.min(batch.num_vectors);
1146 let neighbor_ids: Vec<Vec<usize>> = (0..batch.num_vectors)
1147 .map(|_| {
1148 indexed
1149 .iter()
1150 .take(max_neighbors)
1151 .map(|(id, _)| *id)
1152 .collect()
1153 })
1154 .collect();
1155
1156 IndexedBatch {
1157 neighbor_ids,
1158 num_vectors: batch.num_vectors,
1159 finalized_at: std::time::Instant::now(),
1160 }
1161 }
1162}
1163
1164#[cfg(test)]
1165mod tests {
1166 use super::*;
1167
1168 fn make_test_vectors(n: usize, dim: usize) -> Vec<Vec<f32>> {
1169 (0..n)
1170 .map(|i| {
1171 (0..dim)
1172 .map(|j| {
1173 let seed = (i * 1000 + j) as u64;
1175 let a = seed
1176 .wrapping_mul(6364136223846793005)
1177 .wrapping_add(1442695040888963407);
1178 (a >> 33) as f32 / u32::MAX as f32 - 0.5
1179 })
1180 .collect()
1181 })
1182 .collect()
1183 }
1184
1185 #[test]
1186 fn test_gpu_index_builder_config_default() {
1187 let config = GpuIndexBuilderConfig::default();
1188 assert_eq!(config.m, 16);
1189 assert_eq!(config.ef_construction, 200);
1190 assert!(config.mixed_precision);
1191 assert!(config.tensor_cores);
1192 }
1193
1194 #[test]
1195 fn test_gpu_index_builder_new() {
1196 let config = GpuIndexBuilderConfig::default();
1197 let builder = GpuHnswIndexBuilder::new(config);
1198 assert!(builder.is_ok(), "Builder creation should succeed");
1199 }
1200
1201 #[test]
1202 fn test_add_vector_dimension_check() {
1203 let config = GpuIndexBuilderConfig::default();
1204 let mut builder = GpuHnswIndexBuilder::new(config).unwrap();
1205
1206 builder.add_vector(0, vec![1.0, 2.0, 3.0]).unwrap();
1207
1208 let result = builder.add_vector(1, vec![1.0, 2.0]);
1210 assert!(result.is_err(), "Should reject mismatched dimensions");
1211 }
1212
1213 #[test]
1214 fn test_add_empty_vector_fails() {
1215 let config = GpuIndexBuilderConfig::default();
1216 let mut builder = GpuHnswIndexBuilder::new(config).unwrap();
1217 let result = builder.add_vector(0, vec![]);
1218 assert!(result.is_err(), "Should reject empty vector");
1219 }
1220
1221 #[test]
1222 fn test_build_small_index() {
1223 let config = GpuIndexBuilderConfig {
1224 m: 4,
1225 ef_construction: 10,
1226 num_layers: 3,
1227 ..Default::default()
1228 };
1229
1230 let mut builder = GpuHnswIndexBuilder::new(config).unwrap();
1231 let vectors = make_test_vectors(20, 8);
1232
1233 for (i, v) in vectors.iter().enumerate() {
1234 builder.add_vector(i, v.clone()).unwrap();
1235 }
1236
1237 let graph = builder.build().unwrap();
1238 assert_eq!(graph.nodes.len(), 20);
1239 assert!(graph.stats.vectors_indexed == 20);
1240 }
1242
1243 #[test]
1244 fn test_build_produces_valid_graph() {
1245 let config = GpuIndexBuilderConfig {
1246 m: 4,
1247 ef_construction: 20,
1248 num_layers: 2,
1249 ..Default::default()
1250 };
1251
1252 let mut builder = GpuHnswIndexBuilder::new(config).unwrap();
1253 let vectors = make_test_vectors(50, 16);
1254
1255 for (i, v) in vectors.iter().enumerate() {
1256 builder.add_vector(i, v.clone()).unwrap();
1257 }
1258
1259 let graph = builder.build().unwrap();
1260
1261 for node in &graph.nodes {
1263 for layer_neighbors in &node.neighbors {
1264 for &neighbor_id in layer_neighbors {
1265 assert!(
1266 neighbor_id < graph.nodes.len(),
1267 "Neighbor ID {} out of range (max {})",
1268 neighbor_id,
1269 graph.nodes.len()
1270 );
1271 }
1272 }
1273 }
1274 }
1275
1276 #[test]
1277 fn test_hnsw_graph_search() {
1278 let config = GpuIndexBuilderConfig {
1279 m: 8,
1280 ef_construction: 50,
1281 num_layers: 3,
1282 distance_metric: GpuDistanceMetric::Euclidean,
1283 ..Default::default()
1284 };
1285
1286 let mut builder = GpuHnswIndexBuilder::new(config).unwrap();
1287 let vectors = make_test_vectors(100, 8);
1288
1289 for (i, v) in vectors.iter().enumerate() {
1290 builder.add_vector(i, v.clone()).unwrap();
1291 }
1292
1293 let graph = builder.build().unwrap();
1294
1295 let query = vectors[5].clone();
1297 let results = graph.search_knn(&query, 5, 50).unwrap();
1298
1299 assert!(!results.is_empty(), "Search should return results");
1300 assert!(results.len() <= 5, "Should return at most k results");
1301
1302 if !results.is_empty() {
1304 assert!(results[0].1 >= 0.0, "Distance should be non-negative");
1305 }
1306 }
1307
1308 #[test]
1309 fn test_hnsw_graph_search_cosine() {
1310 let config = GpuIndexBuilderConfig {
1311 m: 4,
1312 ef_construction: 20,
1313 num_layers: 2,
1314 distance_metric: GpuDistanceMetric::Cosine,
1315 ..Default::default()
1316 };
1317
1318 let mut builder = GpuHnswIndexBuilder::new(config).unwrap();
1319
1320 for i in 0..10 {
1322 let mut v = vec![0.0f32; 10];
1323 v[i] = 1.0;
1324 builder.add_vector(i, v).unwrap();
1325 }
1326
1327 let graph = builder.build().unwrap();
1328
1329 let query = vec![1.0f32, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0];
1331 let results = graph.search_knn(&query, 3, 30).unwrap();
1332 assert!(!results.is_empty());
1333 }
1334
1335 #[test]
1336 fn test_build_empty_fails() {
1337 let config = GpuIndexBuilderConfig::default();
1338 let mut builder = GpuHnswIndexBuilder::new(config).unwrap();
1339 assert!(
1340 builder.build().is_err(),
1341 "Build with no vectors should fail"
1342 );
1343 }
1344
1345 #[test]
1346 fn test_build_stats_populated() {
1347 let config = GpuIndexBuilderConfig {
1348 m: 4,
1349 ef_construction: 10,
1350 num_layers: 2,
1351 mixed_precision: true,
1352 tensor_cores: false,
1353 ..Default::default()
1354 };
1355
1356 let mut builder = GpuHnswIndexBuilder::new(config).unwrap();
1357 let vectors = make_test_vectors(10, 4);
1358 for (i, v) in vectors.iter().enumerate() {
1359 builder.add_vector(i, v.clone()).unwrap();
1360 }
1361 let graph = builder.build().unwrap();
1362
1363 assert_eq!(graph.stats.vectors_indexed, 10);
1364 assert!(graph.stats.used_mixed_precision);
1365 assert!(!graph.stats.used_tensor_cores);
1366 assert!(graph.stats.batches_processed > 0);
1367 }
1368
1369 #[test]
1370 fn test_incremental_builder_flush() {
1371 let config = GpuIndexBuilderConfig {
1372 m: 4,
1373 ef_construction: 10,
1374 num_layers: 2,
1375 ..Default::default()
1376 };
1377
1378 let mut inc_builder = IncrementalGpuIndexBuilder::new(config, 5).unwrap();
1379 let vectors = make_test_vectors(15, 4);
1380
1381 for (i, v) in vectors.iter().enumerate() {
1382 inc_builder.add_vector(i, v.clone()).unwrap();
1383 }
1384
1385 let graph = inc_builder.build().unwrap();
1386 assert_eq!(graph.nodes.len(), 15);
1387 }
1388
1389 #[test]
1390 fn test_batch_distance_computer_cosine() {
1391 let config = GpuIndexBuilderConfig {
1392 distance_metric: GpuDistanceMetric::Cosine,
1393 ..Default::default()
1394 };
1395 let computer = GpuBatchDistanceComputer::new(config).unwrap();
1396
1397 let queries = vec![vec![1.0f32, 0.0, 0.0], vec![0.0, 1.0, 0.0]];
1398 let database = vec![
1399 vec![1.0f32, 0.0, 0.0],
1400 vec![0.0, 1.0, 0.0],
1401 vec![0.0, 0.0, 1.0],
1402 ];
1403
1404 let distances = computer.compute_distances(&queries, &database).unwrap();
1405 assert_eq!(distances.len(), 2);
1406 assert_eq!(distances[0].len(), 3);
1407
1408 assert!(
1410 distances[0][0].abs() < 1e-5,
1411 "Identical vectors should have distance 0"
1412 );
1413 assert!(
1415 (distances[0][1] - 1.0).abs() < 1e-5,
1416 "Orthogonal vectors should have cosine distance 1.0"
1417 );
1418 }
1419
1420 #[test]
1421 fn test_batch_distance_computer_euclidean() {
1422 let config = GpuIndexBuilderConfig {
1423 distance_metric: GpuDistanceMetric::Euclidean,
1424 ..Default::default()
1425 };
1426 let computer = GpuBatchDistanceComputer::new(config).unwrap();
1427
1428 let queries = vec![vec![0.0f32, 0.0, 0.0]];
1429 let database = vec![vec![3.0f32, 4.0, 0.0]]; let distances = computer.compute_distances(&queries, &database).unwrap();
1432 assert!(
1433 (distances[0][0] - 5.0).abs() < 1e-4,
1434 "Expected Euclidean distance of 5.0"
1435 );
1436 }
1437
1438 #[test]
1439 fn test_batch_distance_dimension_mismatch() {
1440 let config = GpuIndexBuilderConfig::default();
1441 let computer = GpuBatchDistanceComputer::new(config).unwrap();
1442
1443 let queries = vec![vec![1.0f32, 2.0]];
1444 let database = vec![vec![1.0f32, 2.0, 3.0]]; let result = computer.compute_distances(&queries, &database);
1447 assert!(result.is_err(), "Should fail on dimension mismatch");
1448 }
1449
1450 #[test]
1451 fn test_distance_metric_inner_product() {
1452 let config = GpuIndexBuilderConfig {
1453 distance_metric: GpuDistanceMetric::InnerProduct,
1454 ..Default::default()
1455 };
1456 let computer = GpuBatchDistanceComputer::new(config).unwrap();
1457
1458 let queries = vec![vec![1.0f32, 2.0, 3.0]];
1459 let database = vec![vec![4.0f32, 5.0, 6.0]]; let distances = computer.compute_distances(&queries, &database).unwrap();
1462 assert!(
1463 (distances[0][0] + 32.0).abs() < 1e-4,
1464 "Inner product distance should be -32"
1465 );
1466 }
1467
1468 #[test]
1469 fn test_builder_clears_after_build() {
1470 let config = GpuIndexBuilderConfig {
1471 m: 4,
1472 ef_construction: 10,
1473 num_layers: 2,
1474 ..Default::default()
1475 };
1476
1477 let mut builder = GpuHnswIndexBuilder::new(config).unwrap();
1478 let vectors = make_test_vectors(10, 4);
1479 for (i, v) in vectors.iter().enumerate() {
1480 builder.add_vector(i, v.clone()).unwrap();
1481 }
1482
1483 let _ = builder.build().unwrap();
1484
1485 assert!(
1487 builder.pending_vectors.is_empty(),
1488 "Pending vectors should be cleared after build"
1489 );
1490 }
1491
1492 #[test]
1493 fn test_layer_assignment_distribution() {
1494 let config = GpuIndexBuilderConfig {
1495 m: 16,
1496 num_layers: 5,
1497 ..Default::default()
1498 };
1499 let builder = GpuHnswIndexBuilder::new(config.clone()).unwrap();
1500 let layers = builder.assign_layers(1000);
1501
1502 let layer_0_count = layers.iter().filter(|&&l| l == 0).count();
1504 assert!(
1505 layer_0_count > 500,
1506 "More than half should be at layer 0, got {}",
1507 layer_0_count
1508 );
1509
1510 for &l in &layers {
1512 assert!(l < config.num_layers, "Layer {} exceeds num_layers", l);
1513 }
1514 }
1515
1516 #[test]
1517 fn test_search_dimension_mismatch_error() {
1518 let config = GpuIndexBuilderConfig {
1519 m: 4,
1520 ef_construction: 10,
1521 num_layers: 2,
1522 ..Default::default()
1523 };
1524
1525 let mut builder = GpuHnswIndexBuilder::new(config).unwrap();
1526 for i in 0..5 {
1527 builder.add_vector(i, vec![1.0f32; 8]).unwrap();
1528 }
1529 let graph = builder.build().unwrap();
1530
1531 let result = graph.search_knn(&[1.0, 2.0], 3, 10);
1533 assert!(
1534 result.is_err(),
1535 "Should fail on dimension mismatch in search"
1536 );
1537 }
1538
1539 #[test]
1540 fn test_search_empty_graph() {
1541 let config = GpuIndexBuilderConfig::default();
1542 let graph = HnswGraph {
1543 nodes: Vec::new(),
1544 entry_point: 0,
1545 max_layer: 0,
1546 config,
1547 stats: GpuIndexBuildStats::default(),
1548 };
1549
1550 let results = graph.search_knn(&[1.0, 2.0], 5, 10).unwrap();
1551 assert!(
1552 results.is_empty(),
1553 "Empty graph search should return no results"
1554 );
1555 }
1556
1557 #[test]
1558 fn test_incremental_builder_pending_count() {
1559 let config = GpuIndexBuilderConfig {
1560 m: 4,
1561 ef_construction: 10,
1562 num_layers: 2,
1563 ..Default::default()
1564 };
1565
1566 let mut inc_builder = IncrementalGpuIndexBuilder::new(config, 100).unwrap();
1567 assert_eq!(inc_builder.pending_count(), 0);
1568
1569 inc_builder.add_vector(0, vec![1.0f32; 4]).unwrap();
1570 inc_builder.add_vector(1, vec![2.0f32; 4]).unwrap();
1571 assert_eq!(inc_builder.pending_count(), 2);
1572 }
1573
1574 #[test]
1575 fn test_gpu_distance_metric_variants() {
1576 let metrics = [
1577 GpuDistanceMetric::Cosine,
1578 GpuDistanceMetric::Euclidean,
1579 GpuDistanceMetric::InnerProduct,
1580 GpuDistanceMetric::CosineF16,
1581 GpuDistanceMetric::EuclideanF16,
1582 ];
1583
1584 for metric in &metrics {
1585 let config = GpuIndexBuilderConfig {
1586 distance_metric: *metric,
1587 m: 4,
1588 ef_construction: 10,
1589 num_layers: 2,
1590 ..Default::default()
1591 };
1592 let computer = GpuBatchDistanceComputer::new(config).unwrap();
1593 let queries = vec![vec![1.0f32, 0.0]];
1594 let db = vec![vec![0.0f32, 1.0]];
1595 let result = computer.compute_distances(&queries, &db);
1596 assert!(
1597 result.is_ok(),
1598 "Distance computation failed for {:?}",
1599 metric
1600 );
1601 }
1602 }
1603
1604 #[test]
1607 fn test_batch_size_calculator_basic() {
1608 let size = BatchSizeCalculator::calculate_batch_size(128, 4096);
1609 assert!(size >= 1, "Batch size should be at least 1");
1610 }
1611
1612 #[test]
1613 fn test_batch_size_calculator_zero_dim_returns_default() {
1614 let size = BatchSizeCalculator::calculate_batch_size(0, 4096);
1615 assert!(
1616 size > 0,
1617 "Zero-dim should return positive default batch size"
1618 );
1619 }
1620
1621 #[test]
1622 fn test_batch_size_calculator_large_dim() {
1623 let size = BatchSizeCalculator::calculate_batch_size(16384, 256);
1625 assert!(size >= 1, "Even large dim should yield at least 1");
1626 assert!(
1629 size <= 8192,
1630 "Very large dim with limited memory should give reduced batch: got {}",
1631 size
1632 );
1633 }
1634
1635 #[test]
1636 fn test_optimal_batch_for_float32() {
1637 let size = BatchSizeCalculator::optimal_batch_for_float32(512, 8192);
1638 assert!(size >= 1);
1639 }
1640
1641 #[test]
1642 fn test_optimal_batch_increases_with_memory() {
1643 let small = BatchSizeCalculator::optimal_batch_for_float32(128, 256);
1644 let large = BatchSizeCalculator::optimal_batch_for_float32(128, 8192);
1645 assert!(
1646 large >= small,
1647 "More memory should yield at least as large a batch: small={} large={}",
1648 small,
1649 large
1650 );
1651 }
1652
1653 #[test]
1654 fn test_gpu_memory_budget_bytes_per_vector() {
1655 let budget = GpuMemoryBudget::new(4096, 512);
1656 assert_eq!(budget.bytes_per_vector(128), 512);
1658 assert_eq!(budget.bytes_per_vector(1), 4);
1659 }
1660
1661 #[test]
1662 fn test_gpu_memory_budget_available() {
1663 let budget = GpuMemoryBudget::new(4096, 512);
1664 assert_eq!(budget.available_mb, 3584);
1665 }
1666
1667 #[test]
1668 fn test_gpu_memory_budget_can_fit_batch_true() {
1669 let budget = GpuMemoryBudget::new(4096, 512);
1670 assert!(budget.can_fit_batch(1000, 128));
1672 }
1673
1674 #[test]
1675 fn test_gpu_memory_budget_can_fit_batch_false() {
1676 let budget = GpuMemoryBudget::new(64, 32);
1677 assert!(!budget.can_fit_batch(1200, 8192));
1680 }
1681
1682 #[test]
1683 fn test_gpu_memory_budget_zero_reserved() {
1684 let budget = GpuMemoryBudget::new(1024, 0);
1685 assert_eq!(budget.available_mb, 1024);
1686 }
1687
1688 #[test]
1689 fn test_gpu_index_optimizer_creates_budget() {
1690 let optimizer = GpuIndexOptimizer::new(4096, 512);
1691 let budget = optimizer.memory_budget();
1692 assert_eq!(budget.total_mb, 4096);
1693 assert_eq!(budget.reserved_mb, 512);
1694 }
1695
1696 #[test]
1697 fn test_gpu_index_optimizer_recommend_batch_size() {
1698 let optimizer = GpuIndexOptimizer::new(4096, 512);
1699 let size = optimizer.recommend_batch_size(256);
1700 assert!(size >= 1);
1701 }
1702
1703 #[test]
1704 fn test_pipelined_index_builder_prepare() {
1705 let batch = PipelinedIndexBuilder::stage_a_prepare(&[1.0f32, 2.0, 3.0, 4.0]);
1706 assert_eq!(batch.data.len(), 4);
1707 assert!(batch.prepared_at.elapsed().as_secs() < 5);
1708 }
1709
1710 #[test]
1711 fn test_pipelined_index_builder_compute() {
1712 let prepared = PipelinedIndexBuilder::stage_a_prepare(&[1.0f32, 0.0, 0.0, 0.0]);
1713 let computed = PipelinedIndexBuilder::stage_b_compute(prepared);
1714 assert!(!computed.distances.is_empty());
1715 }
1716
1717 #[test]
1718 fn test_pipelined_index_builder_finalize() {
1719 let prepared = PipelinedIndexBuilder::stage_a_prepare(&[1.0f32, 2.0, 3.0, 4.0]);
1720 let computed = PipelinedIndexBuilder::stage_b_compute(prepared);
1721 let indexed = PipelinedIndexBuilder::stage_c_finalize(computed);
1722 assert!(!indexed.neighbor_ids.is_empty() || indexed.neighbor_ids.is_empty());
1723 assert!(indexed.finalized_at.elapsed().as_secs() < 5);
1725 }
1726
1727 #[test]
1728 fn test_pipelined_index_builder_full_pipeline() {
1729 let data: Vec<f32> = (0..128).map(|i| i as f32 / 128.0).collect();
1730 let prepared = PipelinedIndexBuilder::stage_a_prepare(&data);
1731 let computed = PipelinedIndexBuilder::stage_b_compute(prepared);
1732 let indexed = PipelinedIndexBuilder::stage_c_finalize(computed);
1733 let _ = indexed;
1735 }
1736
1737 #[test]
1738 fn test_pipelined_builder_stage_b_distances_nonnegative() {
1739 let data: Vec<f32> = vec![3.0, 4.0, 0.0]; let prepared = PipelinedIndexBuilder::stage_a_prepare(&data);
1741 let computed = PipelinedIndexBuilder::stage_b_compute(prepared);
1742 for &d in &computed.distances {
1743 assert!(d >= 0.0, "Distance should be non-negative, got {}", d);
1744 }
1745 }
1746
1747 #[test]
1748 fn test_batch_size_calculator_reasonable_bounds() {
1749 let size = BatchSizeCalculator::calculate_batch_size(768, 16_384);
1751 assert!(
1753 size >= 1_000,
1754 "Should support large batches on big GPU: {}",
1755 size
1756 );
1757 assert!(
1758 size <= 1_000_000,
1759 "Batch size should be capped reasonably: {}",
1760 size
1761 );
1762 }
1763}