grafeo_engine/database/index.rs
1//! Index management for GrafeoDB (property, vector, and text indexes).
2
3use grafeo_common::grafeo_info;
4#[cfg(any(feature = "vector-index", feature = "text-index"))]
5use std::sync::Arc;
6
7#[cfg(feature = "text-index")]
8use parking_lot::RwLock;
9
10use grafeo_common::utils::error::Result;
11
12impl super::GrafeoDB {
13 // =========================================================================
14 // PROPERTY INDEX API
15 // =========================================================================
16
17 /// Creates an index on a node property for O(1) lookups by value.
18 ///
19 /// After creating an index, calls to [`Self::find_nodes_by_property`] will be
20 /// O(1) instead of O(n) for this property. The index is automatically
21 /// maintained when properties are set or removed.
22 ///
23 /// # Example
24 ///
25 /// ```no_run
26 /// # use grafeo_engine::GrafeoDB;
27 /// # use grafeo_common::types::Value;
28 /// # let db = GrafeoDB::new_in_memory();
29 /// // Create an index on the 'email' property
30 /// db.create_property_index("email");
31 ///
32 /// // Now lookups by email are O(1)
33 /// let nodes = db.find_nodes_by_property("email", &Value::from("alix@example.com"));
34 /// ```
35 pub fn create_property_index(&self, property: &str) {
36 self.store.create_property_index(property);
37 }
38
39 /// Drops an index on a node property.
40 ///
41 /// Returns `true` if the index existed and was removed.
42 pub fn drop_property_index(&self, property: &str) -> bool {
43 self.store.drop_property_index(property)
44 }
45
46 /// Returns `true` if the property has an index.
47 #[must_use]
48 pub fn has_property_index(&self, property: &str) -> bool {
49 self.store.has_property_index(property)
50 }
51
52 /// Finds all nodes that have a specific property value.
53 ///
54 /// If the property is indexed, this is O(1). Otherwise, it scans all nodes
55 /// which is O(n). Use [`Self::create_property_index`] for frequently queried properties.
56 ///
57 /// # Example
58 ///
59 /// ```no_run
60 /// # use grafeo_engine::GrafeoDB;
61 /// # use grafeo_common::types::Value;
62 /// # let db = GrafeoDB::new_in_memory();
63 /// // Create index for fast lookups (optional but recommended)
64 /// db.create_property_index("city");
65 ///
66 /// // Find all nodes where city = "NYC"
67 /// let nyc_nodes = db.find_nodes_by_property("city", &Value::from("NYC"));
68 /// ```
69 #[must_use]
70 pub fn find_nodes_by_property(
71 &self,
72 property: &str,
73 value: &grafeo_common::types::Value,
74 ) -> Vec<grafeo_common::types::NodeId> {
75 self.store.find_nodes_by_property(property, value)
76 }
77
78 // =========================================================================
79 // VECTOR INDEX API
80 // =========================================================================
81
82 /// Creates a vector similarity index on a node property.
83 ///
84 /// This enables efficient approximate nearest-neighbor search on vector
85 /// properties. Currently validates the index parameters and scans existing
86 /// nodes to verify the property contains vectors of the expected dimensions.
87 ///
88 /// # Arguments
89 ///
90 /// * `label` - Node label to index (e.g., `"Doc"`)
91 /// * `property` - Property containing vector embeddings (e.g., `"embedding"`)
92 /// * `dimensions` - Expected vector dimensions (inferred from data if `None`)
93 /// * `metric` - Distance metric: `"cosine"` (default), `"euclidean"`, `"dot_product"`, `"manhattan"`
94 /// * `m` - HNSW links per node (default: 16). Higher = better recall, more memory.
95 /// * `ef_construction` - Construction beam width (default: 128). Higher = better index quality, slower build.
96 ///
97 /// # Errors
98 ///
99 /// Returns an error if the metric is invalid, no vectors are found, or
100 /// dimensions don't match.
101 pub fn create_vector_index(
102 &self,
103 label: &str,
104 property: &str,
105 dimensions: Option<usize>,
106 metric: Option<&str>,
107 m: Option<usize>,
108 ef_construction: Option<usize>,
109 ) -> Result<()> {
110 use grafeo_common::types::{PropertyKey, Value};
111 use grafeo_core::index::vector::DistanceMetric;
112
113 let metric = match metric {
114 Some(m) => DistanceMetric::from_str(m).ok_or_else(|| {
115 grafeo_common::utils::error::Error::Internal(format!(
116 "Unknown distance metric '{}'. Use: cosine, euclidean, dot_product, manhattan",
117 m
118 ))
119 })?,
120 None => DistanceMetric::Cosine,
121 };
122
123 // Scan nodes to validate vectors exist and check dimensions
124 let prop_key = PropertyKey::new(property);
125 let mut found_dims: Option<usize> = dimensions;
126 let mut vector_count = 0usize;
127
128 #[cfg(feature = "vector-index")]
129 let mut vectors: Vec<(grafeo_common::types::NodeId, Vec<f32>)> = Vec::new();
130
131 for node in self.store.nodes_with_label(label) {
132 if let Some(Value::Vector(v)) = node.properties.get(&prop_key) {
133 if let Some(expected) = found_dims {
134 if v.len() != expected {
135 return Err(grafeo_common::utils::error::Error::Internal(format!(
136 "Vector dimension mismatch: expected {}, found {} on node {}",
137 expected,
138 v.len(),
139 node.id.0
140 )));
141 }
142 } else {
143 found_dims = Some(v.len());
144 }
145 vector_count += 1;
146 #[cfg(feature = "vector-index")]
147 vectors.push((node.id, v.to_vec()));
148 }
149 }
150
151 let Some(dims) = found_dims else {
152 // No vectors found yet: caller must have supplied explicit dimensions
153 // so we can create an empty index that auto-populates via set_node_property.
154 return if let Some(d) = dimensions {
155 #[cfg(feature = "vector-index")]
156 {
157 use grafeo_core::index::vector::{HnswConfig, HnswIndex};
158
159 let mut config = HnswConfig::new(d, metric);
160 if let Some(m_val) = m {
161 config = config.with_m(m_val);
162 }
163 if let Some(ef_c) = ef_construction {
164 config = config.with_ef_construction(ef_c);
165 }
166
167 let index = HnswIndex::new(config);
168 self.store
169 .add_vector_index(label, property, Arc::new(index));
170 }
171
172 let _ = (m, ef_construction);
173 grafeo_info!(
174 "Empty vector index created: :{label}({property}) - 0 vectors, {d} dimensions, metric={metric_name}",
175 metric_name = metric.name()
176 );
177 Ok(())
178 } else {
179 Err(grafeo_common::utils::error::Error::Internal(format!(
180 "No vector properties found on :{label}({property}) and no dimensions specified"
181 )))
182 };
183 };
184
185 // Build and populate the HNSW index
186 #[cfg(feature = "vector-index")]
187 {
188 use grafeo_core::index::vector::{HnswConfig, HnswIndex};
189
190 let mut config = HnswConfig::new(dims, metric);
191 if let Some(m_val) = m {
192 config = config.with_m(m_val);
193 }
194 if let Some(ef_c) = ef_construction {
195 config = config.with_ef_construction(ef_c);
196 }
197
198 let index = HnswIndex::with_capacity(config, vectors.len());
199 let accessor =
200 grafeo_core::index::vector::PropertyVectorAccessor::new(&*self.store, property);
201 for (node_id, vec) in &vectors {
202 index.insert(*node_id, vec, &accessor);
203 }
204
205 self.store
206 .add_vector_index(label, property, Arc::new(index));
207 }
208
209 // Suppress unused variable warnings when vector-index is off
210 let _ = (m, ef_construction);
211
212 grafeo_info!(
213 "Vector index created: :{label}({property}) - {vector_count} vectors, {dims} dimensions, metric={metric_name}",
214 metric_name = metric.name()
215 );
216
217 Ok(())
218 }
219
220 /// Drops a vector index for the given label and property.
221 ///
222 /// Returns `true` if the index existed and was removed, `false` if no
223 /// index was found.
224 ///
225 /// After dropping, [`vector_search`](Self::vector_search) for this
226 /// label+property pair will return an error.
227 #[cfg(feature = "vector-index")]
228 pub fn drop_vector_index(&self, label: &str, property: &str) -> bool {
229 let removed = self.store.remove_vector_index(label, property);
230 if removed {
231 grafeo_info!("Vector index dropped: :{label}({property})");
232 }
233 removed
234 }
235
236 /// Drops and recreates a vector index, rescanning all matching nodes.
237 ///
238 /// This is useful after bulk inserts or when the index may be out of sync.
239 /// When the index still exists, the previous configuration (dimensions,
240 /// metric, M, ef\_construction) is preserved. When it has already been
241 /// dropped, dimensions are inferred from existing data and default
242 /// parameters are used.
243 ///
244 /// # Errors
245 ///
246 /// Returns an error if the rebuild fails (e.g., no matching vectors found
247 /// and no dimensions can be inferred).
248 #[cfg(feature = "vector-index")]
249 pub fn rebuild_vector_index(&self, label: &str, property: &str) -> Result<()> {
250 // Preserve config from existing index if available
251 let config = self
252 .store
253 .get_vector_index(label, property)
254 .map(|idx| idx.config().clone());
255
256 self.store.remove_vector_index(label, property);
257
258 if let Some(config) = config {
259 self.create_vector_index(
260 label,
261 property,
262 Some(config.dimensions),
263 Some(config.metric.name()),
264 Some(config.m),
265 Some(config.ef_construction),
266 )
267 } else {
268 // Index was already dropped: infer dimensions from data
269 self.create_vector_index(label, property, None, None, None, None)
270 }
271 }
272
273 // =========================================================================
274 // TEXT INDEX API
275 // =========================================================================
276
277 /// Creates a BM25 text index on a node property for full-text search.
278 ///
279 /// Indexes all existing nodes with the given label and property.
280 /// The index stays in sync automatically as nodes are created, updated,
281 /// or deleted. Use [`rebuild_text_index`](Self::rebuild_text_index) only
282 /// if the index was created before existing data was loaded.
283 ///
284 /// # Errors
285 ///
286 /// Returns an error if the label has no nodes or the property contains no text values.
287 #[cfg(feature = "text-index")]
288 pub fn create_text_index(&self, label: &str, property: &str) -> Result<()> {
289 use grafeo_common::types::{PropertyKey, Value};
290 use grafeo_core::index::text::{BM25Config, InvertedIndex};
291
292 let mut index = InvertedIndex::new(BM25Config::default());
293 let prop_key = PropertyKey::new(property);
294
295 // Index all existing nodes with this label + property
296 let nodes = self.store.nodes_by_label(label);
297 for node_id in nodes {
298 if let Some(Value::String(text)) = self.store.get_node_property(node_id, &prop_key) {
299 index.insert(node_id, text.as_str());
300 }
301 }
302
303 self.store
304 .add_text_index(label, property, Arc::new(RwLock::new(index)));
305 Ok(())
306 }
307
308 /// Drops a text index on a label+property pair.
309 ///
310 /// Returns `true` if the index existed and was removed.
311 #[cfg(feature = "text-index")]
312 pub fn drop_text_index(&self, label: &str, property: &str) -> bool {
313 self.store.remove_text_index(label, property)
314 }
315
316 /// Rebuilds a text index by re-scanning all matching nodes.
317 ///
318 /// Use after bulk property updates to keep the index current.
319 ///
320 /// # Errors
321 ///
322 /// Returns an error if no text index exists for this label+property.
323 #[cfg(feature = "text-index")]
324 pub fn rebuild_text_index(&self, label: &str, property: &str) -> Result<()> {
325 self.store.remove_text_index(label, property);
326 self.create_text_index(label, property)
327 }
328}