oxirs_vec/hnsw/
types.rs

1//! Basic types for HNSW implementation
2
3use crate::Vector;
4use std::cmp::Ordering;
5use std::collections::HashSet;
6
7/// A candidate for nearest neighbor search
8#[derive(Debug, Clone, Copy)]
9pub struct Candidate {
10    pub distance: f32,
11    pub id: usize,
12}
13
14impl PartialEq for Candidate {
15    fn eq(&self, other: &Self) -> bool {
16        self.distance == other.distance && self.id == other.id
17    }
18}
19
20impl Eq for Candidate {}
21
22impl PartialOrd for Candidate {
23    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
24        Some(self.cmp(other))
25    }
26}
27
28impl Ord for Candidate {
29    fn cmp(&self, other: &Self) -> Ordering {
30        // Reverse ordering for min-heap behavior
31        other
32            .distance
33            .partial_cmp(&self.distance)
34            .unwrap_or(Ordering::Equal)
35            .then_with(|| self.id.cmp(&other.id))
36    }
37}
38
39impl Candidate {
40    /// Create a new candidate
41    pub fn new(id: usize, distance: f32) -> Self {
42        Self { id, distance }
43    }
44}
45
46/// Node in the HNSW graph with cache-friendly layout
47#[derive(Debug, Clone)]
48pub struct Node {
49    /// Vector data (stored first for cache efficiency)
50    pub vector: Vector,
51    /// URI identifier
52    pub uri: String,
53    /// Connections for each layer (layer -> set of connected node IDs)
54    pub connections: Vec<HashSet<usize>>,
55    /// Cache-friendly vector data for SIMD operations
56    pub vector_data_f32: Vec<f32>,
57    /// Node access frequency (for cache optimization)
58    pub access_count: u64,
59    /// Metadata for filtered search
60    pub metadata: std::collections::HashMap<String, String>,
61}
62
63impl Node {
64    pub fn new(uri: String, vector: Vector, max_level: usize) -> Self {
65        let vector_data_f32 = vector.as_f32();
66        Self {
67            vector,
68            uri,
69            connections: vec![HashSet::new(); max_level + 1],
70            vector_data_f32,
71            access_count: 0,
72            metadata: std::collections::HashMap::new(),
73        }
74    }
75
76    /// Create a new node with metadata
77    pub fn with_metadata(
78        uri: String,
79        vector: Vector,
80        max_level: usize,
81        metadata: std::collections::HashMap<String, String>,
82    ) -> Self {
83        let vector_data_f32 = vector.as_f32();
84        Self {
85            vector,
86            uri,
87            connections: vec![HashSet::new(); max_level + 1],
88            vector_data_f32,
89            access_count: 0,
90            metadata,
91        }
92    }
93
94    /// Increment access count for cache optimization
95    pub fn record_access(&mut self) {
96        self.access_count = self.access_count.saturating_add(1);
97    }
98
99    /// Get the maximum level for this node
100    pub fn level(&self) -> usize {
101        self.connections.len().saturating_sub(1)
102    }
103
104    /// Get connections at a specific level
105    pub fn get_connections(&self, level: usize) -> Option<&HashSet<usize>> {
106        self.connections.get(level)
107    }
108
109    /// Get mutable connections at a specific level
110    pub fn get_connections_mut(&mut self, level: usize) -> Option<&mut HashSet<usize>> {
111        self.connections.get_mut(level)
112    }
113
114    /// Add a connection at a specific level
115    pub fn add_connection(&mut self, level: usize, node_id: usize) {
116        if let Some(connections) = self.connections.get_mut(level) {
117            connections.insert(node_id);
118        }
119    }
120
121    /// Remove a connection at a specific level
122    pub fn remove_connection(&mut self, level: usize, node_id: usize) {
123        if let Some(connections) = self.connections.get_mut(level) {
124            connections.remove(&node_id);
125        }
126    }
127}
128
129/// Connectivity statistics for HNSW graph analysis
130#[derive(Debug, Clone)]
131pub struct ConnectivityStats {
132    pub total_nodes: usize,
133    pub total_connections: usize,
134    pub avg_connections: f64,
135    pub max_connections: usize,
136    pub isolated_nodes: usize,
137    pub connectivity_ratio: f64,
138}