Skip to main content

grafeo_engine/database/
index.rs

1//! Index management for GrafeoDB (property, vector, and text indexes).
2
3use grafeo_common::grafeo_info;
4#[cfg(any(feature = "vector-index", feature = "text-index"))]
5use std::sync::Arc;
6
7#[cfg(feature = "text-index")]
8use parking_lot::RwLock;
9
10use grafeo_common::utils::error::Result;
11
12impl super::GrafeoDB {
13    // =========================================================================
14    // PROPERTY INDEX API
15    // =========================================================================
16
17    /// Creates an index on a node property for O(1) lookups by value.
18    ///
19    /// After creating an index, calls to [`Self::find_nodes_by_property`] will be
20    /// O(1) instead of O(n) for this property. The index is automatically
21    /// maintained when properties are set or removed.
22    ///
23    /// # Example
24    ///
25    /// ```no_run
26    /// # use grafeo_engine::GrafeoDB;
27    /// # use grafeo_common::types::Value;
28    /// # let db = GrafeoDB::new_in_memory();
29    /// // Create an index on the 'email' property
30    /// db.create_property_index("email");
31    ///
32    /// // Now lookups by email are O(1)
33    /// let nodes = db.find_nodes_by_property("email", &Value::from("alix@example.com"));
34    /// ```
35    pub fn create_property_index(&self, property: &str) {
36        self.store.create_property_index(property);
37    }
38
39    /// Drops an index on a node property.
40    ///
41    /// Returns `true` if the index existed and was removed.
42    pub fn drop_property_index(&self, property: &str) -> bool {
43        self.store.drop_property_index(property)
44    }
45
46    /// Returns `true` if the property has an index.
47    #[must_use]
48    pub fn has_property_index(&self, property: &str) -> bool {
49        self.store.has_property_index(property)
50    }
51
52    /// Finds all nodes that have a specific property value.
53    ///
54    /// If the property is indexed, this is O(1). Otherwise, it scans all nodes
55    /// which is O(n). Use [`Self::create_property_index`] for frequently queried properties.
56    ///
57    /// # Example
58    ///
59    /// ```no_run
60    /// # use grafeo_engine::GrafeoDB;
61    /// # use grafeo_common::types::Value;
62    /// # let db = GrafeoDB::new_in_memory();
63    /// // Create index for fast lookups (optional but recommended)
64    /// db.create_property_index("city");
65    ///
66    /// // Find all nodes where city = "NYC"
67    /// let nyc_nodes = db.find_nodes_by_property("city", &Value::from("NYC"));
68    /// ```
69    #[must_use]
70    pub fn find_nodes_by_property(
71        &self,
72        property: &str,
73        value: &grafeo_common::types::Value,
74    ) -> Vec<grafeo_common::types::NodeId> {
75        self.store.find_nodes_by_property(property, value)
76    }
77
78    // =========================================================================
79    // VECTOR INDEX API
80    // =========================================================================
81
82    /// Creates a vector similarity index on a node property.
83    ///
84    /// This enables efficient approximate nearest-neighbor search on vector
85    /// properties. Currently validates the index parameters and scans existing
86    /// nodes to verify the property contains vectors of the expected dimensions.
87    ///
88    /// # Arguments
89    ///
90    /// * `label` - Node label to index (e.g., `"Doc"`)
91    /// * `property` - Property containing vector embeddings (e.g., `"embedding"`)
92    /// * `dimensions` - Expected vector dimensions (inferred from data if `None`)
93    /// * `metric` - Distance metric: `"cosine"` (default), `"euclidean"`, `"dot_product"`, `"manhattan"`
94    /// * `m` - HNSW links per node (default: 16). Higher = better recall, more memory.
95    /// * `ef_construction` - Construction beam width (default: 128). Higher = better index quality, slower build.
96    ///
97    /// # Errors
98    ///
99    /// Returns an error if the metric is invalid, no vectors are found, or
100    /// dimensions don't match.
101    pub fn create_vector_index(
102        &self,
103        label: &str,
104        property: &str,
105        dimensions: Option<usize>,
106        metric: Option<&str>,
107        m: Option<usize>,
108        ef_construction: Option<usize>,
109    ) -> Result<()> {
110        use grafeo_common::types::{PropertyKey, Value};
111        use grafeo_core::index::vector::DistanceMetric;
112
113        let metric = match metric {
114            Some(m) => DistanceMetric::from_str(m).ok_or_else(|| {
115                grafeo_common::utils::error::Error::Internal(format!(
116                    "Unknown distance metric '{}'. Use: cosine, euclidean, dot_product, manhattan",
117                    m
118                ))
119            })?,
120            None => DistanceMetric::Cosine,
121        };
122
123        // Scan nodes to validate vectors exist and check dimensions
124        let prop_key = PropertyKey::new(property);
125        let mut found_dims: Option<usize> = dimensions;
126        let mut vector_count = 0usize;
127
128        #[cfg(feature = "vector-index")]
129        let mut vectors: Vec<(grafeo_common::types::NodeId, Vec<f32>)> = Vec::new();
130
131        for node in self.store.nodes_with_label(label) {
132            if let Some(Value::Vector(v)) = node.properties.get(&prop_key) {
133                if let Some(expected) = found_dims {
134                    if v.len() != expected {
135                        return Err(grafeo_common::utils::error::Error::Internal(format!(
136                            "Vector dimension mismatch: expected {}, found {} on node {}",
137                            expected,
138                            v.len(),
139                            node.id.0
140                        )));
141                    }
142                } else {
143                    found_dims = Some(v.len());
144                }
145                vector_count += 1;
146                #[cfg(feature = "vector-index")]
147                vectors.push((node.id, v.to_vec()));
148            }
149        }
150
151        let Some(dims) = found_dims else {
152            // No vectors found yet: caller must have supplied explicit dimensions
153            // so we can create an empty index that auto-populates via set_node_property.
154            return if let Some(d) = dimensions {
155                #[cfg(feature = "vector-index")]
156                {
157                    use grafeo_core::index::vector::{HnswConfig, HnswIndex};
158
159                    let mut config = HnswConfig::new(d, metric);
160                    if let Some(m_val) = m {
161                        config = config.with_m(m_val);
162                    }
163                    if let Some(ef_c) = ef_construction {
164                        config = config.with_ef_construction(ef_c);
165                    }
166
167                    let index = HnswIndex::new(config);
168                    self.store
169                        .add_vector_index(label, property, Arc::new(index));
170                }
171
172                let _ = (m, ef_construction);
173                grafeo_info!(
174                    "Empty vector index created: :{label}({property}) - 0 vectors, {d} dimensions, metric={metric_name}",
175                    metric_name = metric.name()
176                );
177                Ok(())
178            } else {
179                Err(grafeo_common::utils::error::Error::Internal(format!(
180                    "No vector properties found on :{label}({property}) and no dimensions specified"
181                )))
182            };
183        };
184
185        // Build and populate the HNSW index
186        #[cfg(feature = "vector-index")]
187        {
188            use grafeo_core::index::vector::{HnswConfig, HnswIndex};
189
190            let mut config = HnswConfig::new(dims, metric);
191            if let Some(m_val) = m {
192                config = config.with_m(m_val);
193            }
194            if let Some(ef_c) = ef_construction {
195                config = config.with_ef_construction(ef_c);
196            }
197
198            let index = HnswIndex::with_capacity(config, vectors.len());
199            let accessor =
200                grafeo_core::index::vector::PropertyVectorAccessor::new(&*self.store, property);
201            for (node_id, vec) in &vectors {
202                index.insert(*node_id, vec, &accessor);
203            }
204
205            self.store
206                .add_vector_index(label, property, Arc::new(index));
207        }
208
209        // Suppress unused variable warnings when vector-index is off
210        let _ = (m, ef_construction);
211
212        grafeo_info!(
213            "Vector index created: :{label}({property}) - {vector_count} vectors, {dims} dimensions, metric={metric_name}",
214            metric_name = metric.name()
215        );
216
217        Ok(())
218    }
219
220    /// Drops a vector index for the given label and property.
221    ///
222    /// Returns `true` if the index existed and was removed, `false` if no
223    /// index was found.
224    ///
225    /// After dropping, [`vector_search`](Self::vector_search) for this
226    /// label+property pair will return an error.
227    #[cfg(feature = "vector-index")]
228    pub fn drop_vector_index(&self, label: &str, property: &str) -> bool {
229        let removed = self.store.remove_vector_index(label, property);
230        if removed {
231            grafeo_info!("Vector index dropped: :{label}({property})");
232        }
233        removed
234    }
235
236    /// Drops and recreates a vector index, rescanning all matching nodes.
237    ///
238    /// This is useful after bulk inserts or when the index may be out of sync.
239    /// When the index still exists, the previous configuration (dimensions,
240    /// metric, M, ef\_construction) is preserved. When it has already been
241    /// dropped, dimensions are inferred from existing data and default
242    /// parameters are used.
243    ///
244    /// # Errors
245    ///
246    /// Returns an error if the rebuild fails (e.g., no matching vectors found
247    /// and no dimensions can be inferred).
248    #[cfg(feature = "vector-index")]
249    pub fn rebuild_vector_index(&self, label: &str, property: &str) -> Result<()> {
250        // Preserve config from existing index if available
251        let config = self
252            .store
253            .get_vector_index(label, property)
254            .map(|idx| idx.config().clone());
255
256        self.store.remove_vector_index(label, property);
257
258        if let Some(config) = config {
259            self.create_vector_index(
260                label,
261                property,
262                Some(config.dimensions),
263                Some(config.metric.name()),
264                Some(config.m),
265                Some(config.ef_construction),
266            )
267        } else {
268            // Index was already dropped: infer dimensions from data
269            self.create_vector_index(label, property, None, None, None, None)
270        }
271    }
272
273    // =========================================================================
274    // TEXT INDEX API
275    // =========================================================================
276
277    /// Creates a BM25 text index on a node property for full-text search.
278    ///
279    /// Indexes all existing nodes with the given label and property.
280    /// The index stays in sync automatically as nodes are created, updated,
281    /// or deleted. Use [`rebuild_text_index`](Self::rebuild_text_index) only
282    /// if the index was created before existing data was loaded.
283    ///
284    /// # Errors
285    ///
286    /// Returns an error if the label has no nodes or the property contains no text values.
287    #[cfg(feature = "text-index")]
288    pub fn create_text_index(&self, label: &str, property: &str) -> Result<()> {
289        use grafeo_common::types::{PropertyKey, Value};
290        use grafeo_core::index::text::{BM25Config, InvertedIndex};
291
292        let mut index = InvertedIndex::new(BM25Config::default());
293        let prop_key = PropertyKey::new(property);
294
295        // Index all existing nodes with this label + property
296        let nodes = self.store.nodes_by_label(label);
297        for node_id in nodes {
298            if let Some(Value::String(text)) = self.store.get_node_property(node_id, &prop_key) {
299                index.insert(node_id, text.as_str());
300            }
301        }
302
303        self.store
304            .add_text_index(label, property, Arc::new(RwLock::new(index)));
305        Ok(())
306    }
307
308    /// Drops a text index on a label+property pair.
309    ///
310    /// Returns `true` if the index existed and was removed.
311    #[cfg(feature = "text-index")]
312    pub fn drop_text_index(&self, label: &str, property: &str) -> bool {
313        self.store.remove_text_index(label, property)
314    }
315
316    /// Rebuilds a text index by re-scanning all matching nodes.
317    ///
318    /// Use after bulk property updates to keep the index current.
319    ///
320    /// # Errors
321    ///
322    /// Returns an error if no text index exists for this label+property.
323    #[cfg(feature = "text-index")]
324    pub fn rebuild_text_index(&self, label: &str, property: &str) -> Result<()> {
325        self.store.remove_text_index(label, property);
326        self.create_text_index(label, property)
327    }
328}