Skip to main content

grafeo_engine/database/
index.rs

1//! Index management for GrafeoDB (property, vector, and text indexes).
2
3#[cfg(any(feature = "vector-index", feature = "text-index"))]
4use std::sync::Arc;
5
6#[cfg(feature = "text-index")]
7use parking_lot::RwLock;
8
9use grafeo_common::utils::error::Result;
10
11impl super::GrafeoDB {
12    // =========================================================================
13    // PROPERTY INDEX API
14    // =========================================================================
15
16    /// Creates an index on a node property for O(1) lookups by value.
17    ///
18    /// After creating an index, calls to [`Self::find_nodes_by_property`] will be
19    /// O(1) instead of O(n) for this property. The index is automatically
20    /// maintained when properties are set or removed.
21    ///
22    /// # Example
23    ///
24    /// ```no_run
25    /// # use grafeo_engine::GrafeoDB;
26    /// # use grafeo_common::types::Value;
27    /// # let db = GrafeoDB::new_in_memory();
28    /// // Create an index on the 'email' property
29    /// db.create_property_index("email");
30    ///
31    /// // Now lookups by email are O(1)
32    /// let nodes = db.find_nodes_by_property("email", &Value::from("alice@example.com"));
33    /// ```
34    pub fn create_property_index(&self, property: &str) {
35        self.store.create_property_index(property);
36    }
37
38    /// Drops an index on a node property.
39    ///
40    /// Returns `true` if the index existed and was removed.
41    pub fn drop_property_index(&self, property: &str) -> bool {
42        self.store.drop_property_index(property)
43    }
44
45    /// Returns `true` if the property has an index.
46    #[must_use]
47    pub fn has_property_index(&self, property: &str) -> bool {
48        self.store.has_property_index(property)
49    }
50
51    /// Finds all nodes that have a specific property value.
52    ///
53    /// If the property is indexed, this is O(1). Otherwise, it scans all nodes
54    /// which is O(n). Use [`Self::create_property_index`] for frequently queried properties.
55    ///
56    /// # Example
57    ///
58    /// ```no_run
59    /// # use grafeo_engine::GrafeoDB;
60    /// # use grafeo_common::types::Value;
61    /// # let db = GrafeoDB::new_in_memory();
62    /// // Create index for fast lookups (optional but recommended)
63    /// db.create_property_index("city");
64    ///
65    /// // Find all nodes where city = "NYC"
66    /// let nyc_nodes = db.find_nodes_by_property("city", &Value::from("NYC"));
67    /// ```
68    #[must_use]
69    pub fn find_nodes_by_property(
70        &self,
71        property: &str,
72        value: &grafeo_common::types::Value,
73    ) -> Vec<grafeo_common::types::NodeId> {
74        self.store.find_nodes_by_property(property, value)
75    }
76
77    // =========================================================================
78    // VECTOR INDEX API
79    // =========================================================================
80
81    /// Creates a vector similarity index on a node property.
82    ///
83    /// This enables efficient approximate nearest-neighbor search on vector
84    /// properties. Currently validates the index parameters and scans existing
85    /// nodes to verify the property contains vectors of the expected dimensions.
86    ///
87    /// # Arguments
88    ///
89    /// * `label` - Node label to index (e.g., `"Doc"`)
90    /// * `property` - Property containing vector embeddings (e.g., `"embedding"`)
91    /// * `dimensions` - Expected vector dimensions (inferred from data if `None`)
92    /// * `metric` - Distance metric: `"cosine"` (default), `"euclidean"`, `"dot_product"`, `"manhattan"`
93    /// * `m` - HNSW links per node (default: 16). Higher = better recall, more memory.
94    /// * `ef_construction` - Construction beam width (default: 128). Higher = better index quality, slower build.
95    ///
96    /// # Errors
97    ///
98    /// Returns an error if the metric is invalid, no vectors are found, or
99    /// dimensions don't match.
100    pub fn create_vector_index(
101        &self,
102        label: &str,
103        property: &str,
104        dimensions: Option<usize>,
105        metric: Option<&str>,
106        m: Option<usize>,
107        ef_construction: Option<usize>,
108    ) -> Result<()> {
109        use grafeo_common::types::{PropertyKey, Value};
110        use grafeo_core::index::vector::DistanceMetric;
111
112        let metric = match metric {
113            Some(m) => DistanceMetric::from_str(m).ok_or_else(|| {
114                grafeo_common::utils::error::Error::Internal(format!(
115                    "Unknown distance metric '{}'. Use: cosine, euclidean, dot_product, manhattan",
116                    m
117                ))
118            })?,
119            None => DistanceMetric::Cosine,
120        };
121
122        // Scan nodes to validate vectors exist and check dimensions
123        let prop_key = PropertyKey::new(property);
124        let mut found_dims: Option<usize> = dimensions;
125        let mut vector_count = 0usize;
126
127        #[cfg(feature = "vector-index")]
128        let mut vectors: Vec<(grafeo_common::types::NodeId, Vec<f32>)> = Vec::new();
129
130        for node in self.store.nodes_with_label(label) {
131            if let Some(Value::Vector(v)) = node.properties.get(&prop_key) {
132                if let Some(expected) = found_dims {
133                    if v.len() != expected {
134                        return Err(grafeo_common::utils::error::Error::Internal(format!(
135                            "Vector dimension mismatch: expected {}, found {} on node {}",
136                            expected,
137                            v.len(),
138                            node.id.0
139                        )));
140                    }
141                } else {
142                    found_dims = Some(v.len());
143                }
144                vector_count += 1;
145                #[cfg(feature = "vector-index")]
146                vectors.push((node.id, v.to_vec()));
147            }
148        }
149
150        let Some(dims) = found_dims else {
151            // No vectors found yet — caller must have supplied explicit dimensions
152            // so we can create an empty index that auto-populates via set_node_property.
153            return if let Some(d) = dimensions {
154                #[cfg(feature = "vector-index")]
155                {
156                    use grafeo_core::index::vector::{HnswConfig, HnswIndex};
157
158                    let mut config = HnswConfig::new(d, metric);
159                    if let Some(m_val) = m {
160                        config = config.with_m(m_val);
161                    }
162                    if let Some(ef_c) = ef_construction {
163                        config = config.with_ef_construction(ef_c);
164                    }
165
166                    let index = HnswIndex::new(config);
167                    self.store
168                        .add_vector_index(label, property, Arc::new(index));
169                }
170
171                let _ = (m, ef_construction);
172                tracing::info!(
173                    "Empty vector index created: :{label}({property}) - 0 vectors, {d} dimensions, metric={metric_name}",
174                    metric_name = metric.name()
175                );
176                Ok(())
177            } else {
178                Err(grafeo_common::utils::error::Error::Internal(format!(
179                    "No vector properties found on :{label}({property}) and no dimensions specified"
180                )))
181            };
182        };
183
184        // Build and populate the HNSW index
185        #[cfg(feature = "vector-index")]
186        {
187            use grafeo_core::index::vector::{HnswConfig, HnswIndex};
188
189            let mut config = HnswConfig::new(dims, metric);
190            if let Some(m_val) = m {
191                config = config.with_m(m_val);
192            }
193            if let Some(ef_c) = ef_construction {
194                config = config.with_ef_construction(ef_c);
195            }
196
197            let index = HnswIndex::with_capacity(config, vectors.len());
198            let accessor =
199                grafeo_core::index::vector::PropertyVectorAccessor::new(&*self.store, property);
200            for (node_id, vec) in &vectors {
201                index.insert(*node_id, vec, &accessor);
202            }
203
204            self.store
205                .add_vector_index(label, property, Arc::new(index));
206        }
207
208        // Suppress unused variable warnings when vector-index is off
209        let _ = (m, ef_construction);
210
211        tracing::info!(
212            "Vector index created: :{label}({property}) - {vector_count} vectors, {dims} dimensions, metric={metric_name}",
213            metric_name = metric.name()
214        );
215
216        Ok(())
217    }
218
219    /// Drops a vector index for the given label and property.
220    ///
221    /// Returns `true` if the index existed and was removed, `false` if no
222    /// index was found.
223    ///
224    /// After dropping, [`vector_search`](Self::vector_search) for this
225    /// label+property pair will return an error.
226    #[cfg(feature = "vector-index")]
227    pub fn drop_vector_index(&self, label: &str, property: &str) -> bool {
228        let removed = self.store.remove_vector_index(label, property);
229        if removed {
230            tracing::info!("Vector index dropped: :{label}({property})");
231        }
232        removed
233    }
234
235    /// Drops and recreates a vector index, rescanning all matching nodes.
236    ///
237    /// This is useful after bulk inserts or when the index may be out of sync.
238    /// When the index still exists, the previous configuration (dimensions,
239    /// metric, M, ef\_construction) is preserved. When it has already been
240    /// dropped, dimensions are inferred from existing data and default
241    /// parameters are used.
242    ///
243    /// # Errors
244    ///
245    /// Returns an error if the rebuild fails (e.g., no matching vectors found
246    /// and no dimensions can be inferred).
247    #[cfg(feature = "vector-index")]
248    pub fn rebuild_vector_index(&self, label: &str, property: &str) -> Result<()> {
249        // Preserve config from existing index if available
250        let config = self
251            .store
252            .get_vector_index(label, property)
253            .map(|idx| idx.config().clone());
254
255        self.store.remove_vector_index(label, property);
256
257        if let Some(config) = config {
258            self.create_vector_index(
259                label,
260                property,
261                Some(config.dimensions),
262                Some(config.metric.name()),
263                Some(config.m),
264                Some(config.ef_construction),
265            )
266        } else {
267            // Index was already dropped – infer dimensions from data
268            self.create_vector_index(label, property, None, None, None, None)
269        }
270    }
271
272    // =========================================================================
273    // TEXT INDEX API
274    // =========================================================================
275
276    /// Creates a BM25 text index on a node property for full-text search.
277    ///
278    /// Indexes all existing nodes with the given label and property.
279    /// The index stays in sync automatically as nodes are created, updated,
280    /// or deleted. Use [`rebuild_text_index`](Self::rebuild_text_index) only
281    /// if the index was created before existing data was loaded.
282    ///
283    /// # Errors
284    ///
285    /// Returns an error if the label has no nodes or the property contains no text values.
286    #[cfg(feature = "text-index")]
287    pub fn create_text_index(&self, label: &str, property: &str) -> Result<()> {
288        use grafeo_common::types::{PropertyKey, Value};
289        use grafeo_core::index::text::{BM25Config, InvertedIndex};
290
291        let mut index = InvertedIndex::new(BM25Config::default());
292        let prop_key = PropertyKey::new(property);
293
294        // Index all existing nodes with this label + property
295        let nodes = self.store.nodes_by_label(label);
296        for node_id in nodes {
297            if let Some(Value::String(text)) = self.store.get_node_property(node_id, &prop_key) {
298                index.insert(node_id, text.as_str());
299            }
300        }
301
302        self.store
303            .add_text_index(label, property, Arc::new(RwLock::new(index)));
304        Ok(())
305    }
306
307    /// Drops a text index on a label+property pair.
308    ///
309    /// Returns `true` if the index existed and was removed.
310    #[cfg(feature = "text-index")]
311    pub fn drop_text_index(&self, label: &str, property: &str) -> bool {
312        self.store.remove_text_index(label, property)
313    }
314
315    /// Rebuilds a text index by re-scanning all matching nodes.
316    ///
317    /// Use after bulk property updates to keep the index current.
318    ///
319    /// # Errors
320    ///
321    /// Returns an error if no text index exists for this label+property.
322    #[cfg(feature = "text-index")]
323    pub fn rebuild_text_index(&self, label: &str, property: &str) -> Result<()> {
324        self.store.remove_text_index(label, property);
325        self.create_text_index(label, property)
326    }
327}