grafeo_engine/database/index.rs
1//! Index management for GrafeoDB (property, vector, and text indexes).
2
3#[cfg(any(feature = "vector-index", feature = "text-index"))]
4use std::sync::Arc;
5
6#[cfg(feature = "text-index")]
7use parking_lot::RwLock;
8
9use grafeo_common::utils::error::Result;
10
11impl super::GrafeoDB {
12 // =========================================================================
13 // PROPERTY INDEX API
14 // =========================================================================
15
16 /// Creates an index on a node property for O(1) lookups by value.
17 ///
18 /// After creating an index, calls to [`Self::find_nodes_by_property`] will be
19 /// O(1) instead of O(n) for this property. The index is automatically
20 /// maintained when properties are set or removed.
21 ///
22 /// # Example
23 ///
24 /// ```no_run
25 /// # use grafeo_engine::GrafeoDB;
26 /// # use grafeo_common::types::Value;
27 /// # let db = GrafeoDB::new_in_memory();
28 /// // Create an index on the 'email' property
29 /// db.create_property_index("email");
30 ///
31 /// // Now lookups by email are O(1)
32 /// let nodes = db.find_nodes_by_property("email", &Value::from("alice@example.com"));
33 /// ```
34 pub fn create_property_index(&self, property: &str) {
35 self.store.create_property_index(property);
36 }
37
38 /// Drops an index on a node property.
39 ///
40 /// Returns `true` if the index existed and was removed.
41 pub fn drop_property_index(&self, property: &str) -> bool {
42 self.store.drop_property_index(property)
43 }
44
45 /// Returns `true` if the property has an index.
46 #[must_use]
47 pub fn has_property_index(&self, property: &str) -> bool {
48 self.store.has_property_index(property)
49 }
50
51 /// Finds all nodes that have a specific property value.
52 ///
53 /// If the property is indexed, this is O(1). Otherwise, it scans all nodes
54 /// which is O(n). Use [`Self::create_property_index`] for frequently queried properties.
55 ///
56 /// # Example
57 ///
58 /// ```no_run
59 /// # use grafeo_engine::GrafeoDB;
60 /// # use grafeo_common::types::Value;
61 /// # let db = GrafeoDB::new_in_memory();
62 /// // Create index for fast lookups (optional but recommended)
63 /// db.create_property_index("city");
64 ///
65 /// // Find all nodes where city = "NYC"
66 /// let nyc_nodes = db.find_nodes_by_property("city", &Value::from("NYC"));
67 /// ```
68 #[must_use]
69 pub fn find_nodes_by_property(
70 &self,
71 property: &str,
72 value: &grafeo_common::types::Value,
73 ) -> Vec<grafeo_common::types::NodeId> {
74 self.store.find_nodes_by_property(property, value)
75 }
76
77 // =========================================================================
78 // VECTOR INDEX API
79 // =========================================================================
80
81 /// Creates a vector similarity index on a node property.
82 ///
83 /// This enables efficient approximate nearest-neighbor search on vector
84 /// properties. Currently validates the index parameters and scans existing
85 /// nodes to verify the property contains vectors of the expected dimensions.
86 ///
87 /// # Arguments
88 ///
89 /// * `label` - Node label to index (e.g., `"Doc"`)
90 /// * `property` - Property containing vector embeddings (e.g., `"embedding"`)
91 /// * `dimensions` - Expected vector dimensions (inferred from data if `None`)
92 /// * `metric` - Distance metric: `"cosine"` (default), `"euclidean"`, `"dot_product"`, `"manhattan"`
93 /// * `m` - HNSW links per node (default: 16). Higher = better recall, more memory.
94 /// * `ef_construction` - Construction beam width (default: 128). Higher = better index quality, slower build.
95 ///
96 /// # Errors
97 ///
98 /// Returns an error if the metric is invalid, no vectors are found, or
99 /// dimensions don't match.
100 pub fn create_vector_index(
101 &self,
102 label: &str,
103 property: &str,
104 dimensions: Option<usize>,
105 metric: Option<&str>,
106 m: Option<usize>,
107 ef_construction: Option<usize>,
108 ) -> Result<()> {
109 use grafeo_common::types::{PropertyKey, Value};
110 use grafeo_core::index::vector::DistanceMetric;
111
112 let metric = match metric {
113 Some(m) => DistanceMetric::from_str(m).ok_or_else(|| {
114 grafeo_common::utils::error::Error::Internal(format!(
115 "Unknown distance metric '{}'. Use: cosine, euclidean, dot_product, manhattan",
116 m
117 ))
118 })?,
119 None => DistanceMetric::Cosine,
120 };
121
122 // Scan nodes to validate vectors exist and check dimensions
123 let prop_key = PropertyKey::new(property);
124 let mut found_dims: Option<usize> = dimensions;
125 let mut vector_count = 0usize;
126
127 #[cfg(feature = "vector-index")]
128 let mut vectors: Vec<(grafeo_common::types::NodeId, Vec<f32>)> = Vec::new();
129
130 for node in self.store.nodes_with_label(label) {
131 if let Some(Value::Vector(v)) = node.properties.get(&prop_key) {
132 if let Some(expected) = found_dims {
133 if v.len() != expected {
134 return Err(grafeo_common::utils::error::Error::Internal(format!(
135 "Vector dimension mismatch: expected {}, found {} on node {}",
136 expected,
137 v.len(),
138 node.id.0
139 )));
140 }
141 } else {
142 found_dims = Some(v.len());
143 }
144 vector_count += 1;
145 #[cfg(feature = "vector-index")]
146 vectors.push((node.id, v.to_vec()));
147 }
148 }
149
150 let Some(dims) = found_dims else {
151 // No vectors found yet — caller must have supplied explicit dimensions
152 // so we can create an empty index that auto-populates via set_node_property.
153 return if let Some(d) = dimensions {
154 #[cfg(feature = "vector-index")]
155 {
156 use grafeo_core::index::vector::{HnswConfig, HnswIndex};
157
158 let mut config = HnswConfig::new(d, metric);
159 if let Some(m_val) = m {
160 config = config.with_m(m_val);
161 }
162 if let Some(ef_c) = ef_construction {
163 config = config.with_ef_construction(ef_c);
164 }
165
166 let index = HnswIndex::new(config);
167 self.store
168 .add_vector_index(label, property, Arc::new(index));
169 }
170
171 let _ = (m, ef_construction);
172 tracing::info!(
173 "Empty vector index created: :{label}({property}) - 0 vectors, {d} dimensions, metric={metric_name}",
174 metric_name = metric.name()
175 );
176 Ok(())
177 } else {
178 Err(grafeo_common::utils::error::Error::Internal(format!(
179 "No vector properties found on :{label}({property}) and no dimensions specified"
180 )))
181 };
182 };
183
184 // Build and populate the HNSW index
185 #[cfg(feature = "vector-index")]
186 {
187 use grafeo_core::index::vector::{HnswConfig, HnswIndex};
188
189 let mut config = HnswConfig::new(dims, metric);
190 if let Some(m_val) = m {
191 config = config.with_m(m_val);
192 }
193 if let Some(ef_c) = ef_construction {
194 config = config.with_ef_construction(ef_c);
195 }
196
197 let index = HnswIndex::with_capacity(config, vectors.len());
198 let accessor =
199 grafeo_core::index::vector::PropertyVectorAccessor::new(&*self.store, property);
200 for (node_id, vec) in &vectors {
201 index.insert(*node_id, vec, &accessor);
202 }
203
204 self.store
205 .add_vector_index(label, property, Arc::new(index));
206 }
207
208 // Suppress unused variable warnings when vector-index is off
209 let _ = (m, ef_construction);
210
211 tracing::info!(
212 "Vector index created: :{label}({property}) - {vector_count} vectors, {dims} dimensions, metric={metric_name}",
213 metric_name = metric.name()
214 );
215
216 Ok(())
217 }
218
219 /// Drops a vector index for the given label and property.
220 ///
221 /// Returns `true` if the index existed and was removed, `false` if no
222 /// index was found.
223 ///
224 /// After dropping, [`vector_search`](Self::vector_search) for this
225 /// label+property pair will return an error.
226 #[cfg(feature = "vector-index")]
227 pub fn drop_vector_index(&self, label: &str, property: &str) -> bool {
228 let removed = self.store.remove_vector_index(label, property);
229 if removed {
230 tracing::info!("Vector index dropped: :{label}({property})");
231 }
232 removed
233 }
234
235 /// Drops and recreates a vector index, rescanning all matching nodes.
236 ///
237 /// This is useful after bulk inserts or when the index may be out of sync.
238 /// When the index still exists, the previous configuration (dimensions,
239 /// metric, M, ef\_construction) is preserved. When it has already been
240 /// dropped, dimensions are inferred from existing data and default
241 /// parameters are used.
242 ///
243 /// # Errors
244 ///
245 /// Returns an error if the rebuild fails (e.g., no matching vectors found
246 /// and no dimensions can be inferred).
247 #[cfg(feature = "vector-index")]
248 pub fn rebuild_vector_index(&self, label: &str, property: &str) -> Result<()> {
249 // Preserve config from existing index if available
250 let config = self
251 .store
252 .get_vector_index(label, property)
253 .map(|idx| idx.config().clone());
254
255 self.store.remove_vector_index(label, property);
256
257 if let Some(config) = config {
258 self.create_vector_index(
259 label,
260 property,
261 Some(config.dimensions),
262 Some(config.metric.name()),
263 Some(config.m),
264 Some(config.ef_construction),
265 )
266 } else {
267 // Index was already dropped – infer dimensions from data
268 self.create_vector_index(label, property, None, None, None, None)
269 }
270 }
271
272 // =========================================================================
273 // TEXT INDEX API
274 // =========================================================================
275
276 /// Creates a BM25 text index on a node property for full-text search.
277 ///
278 /// Indexes all existing nodes with the given label and property.
279 /// The index stays in sync automatically as nodes are created, updated,
280 /// or deleted. Use [`rebuild_text_index`](Self::rebuild_text_index) only
281 /// if the index was created before existing data was loaded.
282 ///
283 /// # Errors
284 ///
285 /// Returns an error if the label has no nodes or the property contains no text values.
286 #[cfg(feature = "text-index")]
287 pub fn create_text_index(&self, label: &str, property: &str) -> Result<()> {
288 use grafeo_common::types::{PropertyKey, Value};
289 use grafeo_core::index::text::{BM25Config, InvertedIndex};
290
291 let mut index = InvertedIndex::new(BM25Config::default());
292 let prop_key = PropertyKey::new(property);
293
294 // Index all existing nodes with this label + property
295 let nodes = self.store.nodes_by_label(label);
296 for node_id in nodes {
297 if let Some(Value::String(text)) = self.store.get_node_property(node_id, &prop_key) {
298 index.insert(node_id, text.as_str());
299 }
300 }
301
302 self.store
303 .add_text_index(label, property, Arc::new(RwLock::new(index)));
304 Ok(())
305 }
306
307 /// Drops a text index on a label+property pair.
308 ///
309 /// Returns `true` if the index existed and was removed.
310 #[cfg(feature = "text-index")]
311 pub fn drop_text_index(&self, label: &str, property: &str) -> bool {
312 self.store.remove_text_index(label, property)
313 }
314
315 /// Rebuilds a text index by re-scanning all matching nodes.
316 ///
317 /// Use after bulk property updates to keep the index current.
318 ///
319 /// # Errors
320 ///
321 /// Returns an error if no text index exists for this label+property.
322 #[cfg(feature = "text-index")]
323 pub fn rebuild_text_index(&self, label: &str, property: &str) -> Result<()> {
324 self.store.remove_text_index(label, property);
325 self.create_text_index(label, property)
326 }
327}