Skip to main content

grafeo_engine/database/
index.rs

1//! Index management for GrafeoDB (property, vector, and text indexes).
2
3use grafeo_common::grafeo_info;
4#[cfg(any(feature = "vector-index", feature = "text-index"))]
5use std::sync::Arc;
6
7#[cfg(feature = "text-index")]
8use parking_lot::RwLock;
9
10use grafeo_common::utils::error::Result;
11
12impl super::GrafeoDB {
13    // =========================================================================
14    // PROPERTY INDEX API
15    // =========================================================================
16
17    /// Creates an index on a node property for O(1) lookups by value.
18    ///
19    /// After creating an index, calls to [`Self::find_nodes_by_property`] will be
20    /// O(1) instead of O(n) for this property. The index is automatically
21    /// maintained when properties are set or removed.
22    ///
23    /// # Example
24    ///
25    /// ```no_run
26    /// # use grafeo_engine::GrafeoDB;
27    /// # use grafeo_common::types::Value;
28    /// # let db = GrafeoDB::new_in_memory();
29    /// // Create an index on the 'email' property
30    /// db.create_property_index("email");
31    ///
32    /// // Now lookups by email are O(1)
33    /// let nodes = db.find_nodes_by_property("email", &Value::from("alix@example.com"));
34    /// ```
35    pub fn create_property_index(&self, property: &str) {
36        self.lpg_store().create_property_index(property);
37    }
38
39    /// Drops an index on a node property.
40    ///
41    /// Returns `true` if the index existed and was removed.
42    pub fn drop_property_index(&self, property: &str) -> bool {
43        self.lpg_store().drop_property_index(property)
44    }
45
46    /// Returns `true` if the property has an index.
47    #[must_use]
48    pub fn has_property_index(&self, property: &str) -> bool {
49        self.lpg_store().has_property_index(property)
50    }
51
52    /// Finds all nodes that have a specific property value.
53    ///
54    /// If the property is indexed, this is O(1). Otherwise, it scans all nodes
55    /// which is O(n). Use [`Self::create_property_index`] for frequently queried properties.
56    ///
57    /// # Example
58    ///
59    /// ```no_run
60    /// # use grafeo_engine::GrafeoDB;
61    /// # use grafeo_common::types::Value;
62    /// # let db = GrafeoDB::new_in_memory();
63    /// // Create index for fast lookups (optional but recommended)
64    /// db.create_property_index("city");
65    ///
66    /// // Find all nodes where city = "NYC"
67    /// let nyc_nodes = db.find_nodes_by_property("city", &Value::from("NYC"));
68    /// ```
69    #[must_use]
70    pub fn find_nodes_by_property(
71        &self,
72        property: &str,
73        value: &grafeo_common::types::Value,
74    ) -> Vec<grafeo_common::types::NodeId> {
75        self.lpg_store().find_nodes_by_property(property, value)
76    }
77
78    // =========================================================================
79    // VECTOR INDEX API
80    // =========================================================================
81
82    /// Creates a vector similarity index on a node property.
83    ///
84    /// This enables efficient approximate nearest-neighbor search on vector
85    /// properties. Currently validates the index parameters and scans existing
86    /// nodes to verify the property contains vectors of the expected dimensions.
87    ///
88    /// # Arguments
89    ///
90    /// * `label` - Node label to index (e.g., `"Doc"`)
91    /// * `property` - Property containing vector embeddings (e.g., `"embedding"`)
92    /// * `dimensions` - Expected vector dimensions (inferred from data if `None`)
93    /// * `metric` - Distance metric: `"cosine"` (default), `"euclidean"`, `"dot_product"`, `"manhattan"`
94    /// * `m` - HNSW links per node (default: 16). Higher = better recall, more memory.
95    /// * `ef_construction` - Construction beam width (default: 128). Higher = better index quality, slower build.
96    ///
97    /// # Errors
98    ///
99    /// Returns an error if the metric is invalid, no vectors are found, or
100    /// dimensions don't match.
101    pub fn create_vector_index(
102        &self,
103        label: &str,
104        property: &str,
105        dimensions: Option<usize>,
106        metric: Option<&str>,
107        m: Option<usize>,
108        ef_construction: Option<usize>,
109    ) -> Result<()> {
110        use grafeo_common::types::{PropertyKey, Value};
111        use grafeo_core::index::vector::DistanceMetric;
112
113        let metric = match metric {
114            Some(m) => DistanceMetric::from_str(m).ok_or_else(|| {
115                grafeo_common::utils::error::Error::Internal(format!(
116                    "Unknown distance metric '{}'. Use: cosine, euclidean, dot_product, manhattan",
117                    m
118                ))
119            })?,
120            None => DistanceMetric::Cosine,
121        };
122
123        // Scan nodes to validate vectors exist and check dimensions
124        let prop_key = PropertyKey::new(property);
125        let mut found_dims: Option<usize> = dimensions;
126        let mut vector_count = 0usize;
127
128        #[cfg(feature = "vector-index")]
129        let mut vectors: Vec<(grafeo_common::types::NodeId, Vec<f32>)> = Vec::new();
130
131        for node in self.lpg_store().nodes_with_label(label) {
132            if let Some(Value::Vector(v)) = node.properties.get(&prop_key) {
133                if let Some(expected) = found_dims {
134                    if v.len() != expected {
135                        return Err(grafeo_common::utils::error::Error::Internal(format!(
136                            "Vector dimension mismatch: expected {}, found {} on node {}",
137                            expected,
138                            v.len(),
139                            node.id.0
140                        )));
141                    }
142                } else {
143                    found_dims = Some(v.len());
144                }
145                vector_count += 1;
146                #[cfg(feature = "vector-index")]
147                vectors.push((node.id, v.to_vec()));
148            }
149        }
150
151        let Some(dims) = found_dims else {
152            // No vectors found yet: caller must have supplied explicit dimensions
153            // so we can create an empty index that auto-populates via set_node_property.
154            return if let Some(d) = dimensions {
155                #[cfg(feature = "vector-index")]
156                {
157                    use grafeo_core::index::vector::{HnswConfig, HnswIndex};
158
159                    let mut config = HnswConfig::new(d, metric);
160                    if let Some(m_val) = m {
161                        config = config.with_m(m_val);
162                    }
163                    if let Some(ef_c) = ef_construction {
164                        config = config.with_ef_construction(ef_c);
165                    }
166
167                    let index = HnswIndex::new(config);
168                    self.lpg_store()
169                        .add_vector_index(label, property, Arc::new(index));
170                }
171
172                let _ = (m, ef_construction);
173                grafeo_info!(
174                    "Empty vector index created: :{label}({property}) - 0 vectors, {d} dimensions, metric={metric_name}",
175                    metric_name = metric.name()
176                );
177                Ok(())
178            } else {
179                Err(grafeo_common::utils::error::Error::Internal(format!(
180                    "No vector properties found on :{label}({property}) and no dimensions specified"
181                )))
182            };
183        };
184
185        // Build and populate the HNSW index
186        #[cfg(feature = "vector-index")]
187        {
188            use grafeo_core::index::vector::{HnswConfig, HnswIndex};
189
190            let mut config = HnswConfig::new(dims, metric);
191            if let Some(m_val) = m {
192                config = config.with_m(m_val);
193            }
194            if let Some(ef_c) = ef_construction {
195                config = config.with_ef_construction(ef_c);
196            }
197
198            let index = HnswIndex::with_capacity(config, vectors.len());
199            let accessor = grafeo_core::index::vector::PropertyVectorAccessor::new(
200                &**self.lpg_store(),
201                property,
202            );
203            for (node_id, vec) in &vectors {
204                index.insert(*node_id, vec, &accessor);
205            }
206
207            self.lpg_store()
208                .add_vector_index(label, property, Arc::new(index));
209        }
210
211        // Suppress unused variable warnings when vector-index is off
212        let _ = (m, ef_construction);
213
214        grafeo_info!(
215            "Vector index created: :{label}({property}) - {vector_count} vectors, {dims} dimensions, metric={metric_name}",
216            metric_name = metric.name()
217        );
218
219        Ok(())
220    }
221
222    /// Drops a vector index for the given label and property.
223    ///
224    /// Returns `true` if the index existed and was removed, `false` if no
225    /// index was found.
226    ///
227    /// After dropping, [`vector_search`](Self::vector_search) for this
228    /// label+property pair will return an error.
229    #[cfg(feature = "vector-index")]
230    pub fn drop_vector_index(&self, label: &str, property: &str) -> bool {
231        let removed = self.lpg_store().remove_vector_index(label, property);
232        if removed {
233            grafeo_info!("Vector index dropped: :{label}({property})");
234        }
235        removed
236    }
237
238    /// Drops and recreates a vector index, rescanning all matching nodes.
239    ///
240    /// This is useful after bulk inserts or when the index may be out of sync.
241    /// When the index still exists, the previous configuration (dimensions,
242    /// metric, M, ef\_construction) is preserved. When it has already been
243    /// dropped, dimensions are inferred from existing data and default
244    /// parameters are used.
245    ///
246    /// # Errors
247    ///
248    /// Returns an error if the rebuild fails (e.g., no matching vectors found
249    /// and no dimensions can be inferred).
250    #[cfg(feature = "vector-index")]
251    pub fn rebuild_vector_index(&self, label: &str, property: &str) -> Result<()> {
252        // Preserve config from existing index if available
253        let config = self
254            .lpg_store()
255            .get_vector_index(label, property)
256            .map(|idx| idx.config().clone());
257
258        self.lpg_store().remove_vector_index(label, property);
259
260        if let Some(config) = config {
261            self.create_vector_index(
262                label,
263                property,
264                Some(config.dimensions),
265                Some(config.metric.name()),
266                Some(config.m),
267                Some(config.ef_construction),
268            )
269        } else {
270            // Index was already dropped: infer dimensions from data
271            self.create_vector_index(label, property, None, None, None, None)
272        }
273    }
274
275    // =========================================================================
276    // TEXT INDEX API
277    // =========================================================================
278
279    /// Creates a BM25 text index on a node property for full-text search.
280    ///
281    /// Indexes all existing nodes with the given label and property.
282    /// The index stays in sync automatically as nodes are created, updated,
283    /// or deleted. Use [`rebuild_text_index`](Self::rebuild_text_index) only
284    /// if the index was created before existing data was loaded.
285    ///
286    /// # Errors
287    ///
288    /// Returns an error if the label has no nodes or the property contains no text values.
289    #[cfg(feature = "text-index")]
290    pub fn create_text_index(&self, label: &str, property: &str) -> Result<()> {
291        use grafeo_common::types::{PropertyKey, Value};
292        use grafeo_core::index::text::{BM25Config, InvertedIndex};
293
294        let mut index = InvertedIndex::new(BM25Config::default());
295        let prop_key = PropertyKey::new(property);
296
297        // Index all existing nodes with this label + property
298        let nodes = self.lpg_store().nodes_by_label(label);
299        for node_id in nodes {
300            if let Some(Value::String(text)) =
301                self.lpg_store().get_node_property(node_id, &prop_key)
302            {
303                index.insert(node_id, text.as_str());
304            }
305        }
306
307        self.lpg_store()
308            .add_text_index(label, property, Arc::new(RwLock::new(index)));
309        Ok(())
310    }
311
312    /// Drops a text index on a label+property pair.
313    ///
314    /// Returns `true` if the index existed and was removed.
315    #[cfg(feature = "text-index")]
316    pub fn drop_text_index(&self, label: &str, property: &str) -> bool {
317        self.lpg_store().remove_text_index(label, property)
318    }
319
320    /// Rebuilds a text index by re-scanning all matching nodes.
321    ///
322    /// Use after bulk property updates to keep the index current.
323    ///
324    /// # Errors
325    ///
326    /// Returns an error if no text index exists for this label+property.
327    #[cfg(feature = "text-index")]
328    pub fn rebuild_text_index(&self, label: &str, property: &str) -> Result<()> {
329        self.lpg_store().remove_text_index(label, property);
330        self.create_text_index(label, property)
331    }
332}