Skip to main content

velesdb_core/collection/
graph_collection.rs

1//! `GraphCollection`: knowledge graph with optional node embeddings.
2//!
3//! # Design
4//!
5//! `GraphCollection` is a pure newtype over `Collection` (C-02).
6//! All graph state (edge store, property/range indexes, node payloads, optional
7//! HNSW for node embeddings) lives inside the single `inner: Collection`.
8//! The graph schema and embedding dimension are persisted in `config.json`.
9//! There are no separate engine fields — no dual-storage risk.
10
11use std::collections::HashMap;
12use std::path::PathBuf;
13
14use crate::collection::graph::{GraphEdge, GraphSchema, TraversalConfig, TraversalResult};
15use crate::collection::types::Collection;
16use crate::distance::DistanceMetric;
17use crate::error::Result;
18use crate::point::{Point, SearchResult};
19
20/// A graph collection storing typed relationships between nodes.
21///
22/// Node embeddings are optional: if `dimension` is `None`, no vector index is created.
23///
24/// # Examples
25///
26/// ```rust,no_run
27/// use velesdb_core::{GraphCollection, GraphSchema, GraphEdge, DistanceMetric};
28///
29/// let coll = GraphCollection::create(
30///     "./data/kg".into(),
31///     "knowledge",
32///     None,                    // no embeddings
33///     DistanceMetric::Cosine,  // unused when no embeddings
34///     GraphSchema::schemaless(),
35/// )?;
36///
37/// let edge = GraphEdge::new(1, 100, 200, "KNOWS")?;
38/// coll.add_edge(edge)?;
39/// # Ok::<(), velesdb_core::Error>(())
40/// ```
41#[derive(Clone)]
42pub struct GraphCollection {
43    /// Single source of truth — all graph state lives here (C-02 pure newtype).
44    pub(crate) inner: Collection,
45}
46
47impl GraphCollection {
48    // -------------------------------------------------------------------------
49    // Lifecycle
50    // -------------------------------------------------------------------------
51
52    /// Creates a new `GraphCollection`.
53    ///
54    /// # Errors
55    ///
56    /// Returns an error if the directory cannot be created or storage fails.
57    pub fn create(
58        path: PathBuf,
59        name: &str,
60        dimension: Option<usize>,
61        metric: DistanceMetric,
62        schema: GraphSchema,
63    ) -> Result<Self> {
64        Ok(Self {
65            inner: Collection::create_graph_collection(path, name, schema, dimension, metric)?,
66        })
67    }
68
69    /// Opens an existing `GraphCollection` from disk.
70    ///
71    /// # Errors
72    ///
73    /// Returns an error if config or storage cannot be opened.
74    pub fn open(path: PathBuf) -> Result<Self> {
75        Ok(Self {
76            inner: Collection::open(path)?,
77        })
78    }
79
80    /// Flushes all state to disk.
81    ///
82    /// # Errors
83    ///
84    /// Returns an error if any flush operation fails.
85    pub fn flush(&self) -> Result<()> {
86        self.inner.flush()
87    }
88
89    // -------------------------------------------------------------------------
90    // Metadata
91    // -------------------------------------------------------------------------
92
93    /// Returns the collection name.
94    #[must_use]
95    pub fn name(&self) -> String {
96        self.inner.config().name
97    }
98
99    /// Returns the graph schema stored in config.
100    ///
101    /// Returns `GraphSchema::schemaless()` for collections that have no schema set.
102    #[must_use]
103    pub fn schema(&self) -> GraphSchema {
104        self.inner
105            .graph_schema()
106            .unwrap_or_else(GraphSchema::schemaless)
107    }
108
109    /// Returns `true` if this collection stores node embeddings.
110    #[must_use]
111    pub fn has_embeddings(&self) -> bool {
112        self.inner.has_embeddings()
113    }
114
115    // -------------------------------------------------------------------------
116    // Graph operations — delegate to Collection graph API
117    // -------------------------------------------------------------------------
118
119    /// Adds an edge between two nodes.
120    ///
121    /// # Errors
122    ///
123    /// - Returns `Error::EdgeExists` if an edge with the same ID already exists.
124    ///
125    /// # Examples
126    ///
127    /// ```rust,no_run
128    /// # use velesdb_core::{GraphCollection, GraphSchema, GraphEdge, DistanceMetric};
129    /// # let coll = GraphCollection::create("./data/kg".into(), "kg", None, DistanceMetric::Cosine, GraphSchema::schemaless())?;
130    /// let edge = GraphEdge::new(1, 100, 200, "KNOWS")?;
131    /// coll.add_edge(edge)?;
132    /// # Ok::<(), velesdb_core::Error>(())
133    /// ```
134    pub fn add_edge(&self, edge: GraphEdge) -> Result<()> {
135        self.inner.add_edge(edge)
136    }
137
138    /// Returns edges, optionally filtered by label.
139    #[must_use]
140    pub fn get_edges(&self, label: Option<&str>) -> Vec<GraphEdge> {
141        match label {
142            Some(lbl) => self.inner.get_edges_by_label(lbl),
143            None => self.inner.get_all_edges(),
144        }
145    }
146
147    /// Returns all outgoing edges from a node.
148    #[must_use]
149    pub fn get_outgoing(&self, node_id: u64) -> Vec<GraphEdge> {
150        self.inner.get_outgoing_edges(node_id)
151    }
152
153    /// Returns all incoming edges to a node.
154    #[must_use]
155    pub fn get_incoming(&self, node_id: u64) -> Vec<GraphEdge> {
156        self.inner.get_incoming_edges(node_id)
157    }
158
159    /// Returns the total number of edges in the graph without materializing them.
160    #[must_use]
161    pub fn edge_count(&self) -> usize {
162        self.inner.edge_count()
163    }
164
165    /// Returns `(in_degree, out_degree)` for a node.
166    #[must_use]
167    pub fn node_degree(&self, node_id: u64) -> (usize, usize) {
168        self.inner.get_node_degree(node_id)
169    }
170
171    /// Returns the IDs of all nodes that have a stored payload.
172    ///
173    /// Nodes that appear only as edge endpoints without a stored payload
174    /// are not included. Use [`GraphCollection::get_edges`] to discover
175    /// all referenced node IDs.
176    #[must_use]
177    pub fn all_node_ids(&self) -> Vec<u64> {
178        self.inner.all_ids()
179    }
180
181    /// Returns the number of nodes (points) stored in this collection.
182    #[must_use]
183    pub fn len(&self) -> usize {
184        self.inner.len()
185    }
186
187    /// Returns `true` if the collection contains no nodes.
188    #[must_use]
189    pub fn is_empty(&self) -> bool {
190        self.inner.is_empty()
191    }
192
193    /// Retrieves nodes by IDs, returning `None` for missing entries.
194    #[must_use]
195    pub fn get(&self, ids: &[u64]) -> Vec<Option<Point>> {
196        self.inner.get(ids)
197    }
198
199    /// Deletes nodes by IDs.
200    ///
201    /// Missing IDs are silently ignored.
202    ///
203    /// # Errors
204    ///
205    /// Returns an error if storage operations fail.
206    pub fn delete(&self, ids: &[u64]) -> Result<()> {
207        self.inner.delete(ids)
208    }
209
210    /// Removes an edge from the graph by ID.
211    ///
212    /// Returns `true` if the edge existed and was removed, `false` otherwise.
213    #[must_use]
214    pub fn remove_edge(&self, edge_id: u64) -> bool {
215        self.inner.remove_edge(edge_id)
216    }
217
218    /// Performs BFS traversal from a source node.
219    ///
220    /// # Examples
221    ///
222    /// ```rust,no_run
223    /// # use velesdb_core::{GraphCollection, GraphSchema, GraphEdge, DistanceMetric};
224    /// # use velesdb_core::collection::graph::TraversalConfig;
225    /// # let coll = GraphCollection::create("./data/kg".into(), "kg", None, DistanceMetric::Cosine, GraphSchema::schemaless())?;
226    /// let config = TraversalConfig { max_depth: 3, ..TraversalConfig::default() };
227    /// let results = coll.traverse_bfs(100, &config);
228    /// for r in &results {
229    ///     println!("node={} depth={}", r.target_id, r.depth);
230    /// }
231    /// # Ok::<(), velesdb_core::Error>(())
232    /// ```
233    #[must_use]
234    pub fn traverse_bfs(&self, source_id: u64, config: &TraversalConfig) -> Vec<TraversalResult> {
235        self.inner.traverse_bfs_config(source_id, config)
236    }
237
238    /// Performs DFS traversal from a source node.
239    #[must_use]
240    pub fn traverse_dfs(&self, source_id: u64, config: &TraversalConfig) -> Vec<TraversalResult> {
241        self.inner.traverse_dfs_config(source_id, config)
242    }
243
244    // -------------------------------------------------------------------------
245    // Payload / node properties
246    // -------------------------------------------------------------------------
247
248    /// Inserts or updates node payload (properties).
249    ///
250    /// # Errors
251    ///
252    /// Returns an error if storage fails.
253    pub fn upsert_node_payload(&self, node_id: u64, payload: &serde_json::Value) -> Result<()> {
254        self.inner.store_node_payload(node_id, payload)
255    }
256
257    /// Inserts or updates node payload (properties).
258    ///
259    /// # Errors
260    ///
261    /// Returns an error if storage fails.
262    #[deprecated(since = "1.6.0", note = "Use upsert_node_payload() instead")]
263    pub fn store_node_payload(&self, node_id: u64, payload: &serde_json::Value) -> Result<()> {
264        self.upsert_node_payload(node_id, payload)
265    }
266
267    /// Retrieves node payload.
268    ///
269    /// # Errors
270    ///
271    /// Returns an error if retrieval fails.
272    pub fn get_node_payload(&self, node_id: u64) -> Result<Option<serde_json::Value>> {
273        self.inner.get_node_payload(node_id)
274    }
275
276    // -------------------------------------------------------------------------
277    // Optional embedding search
278    // -------------------------------------------------------------------------
279
280    /// Searches for similar nodes by embedding (only available if `has_embeddings()`).
281    ///
282    /// # Errors
283    ///
284    /// Returns `Error::VectorNotAllowed` if this collection has no embeddings,
285    /// or `Error::DimensionMismatch` if the query dimension is wrong.
286    pub fn search_by_embedding(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
287        self.inner.search_by_embedding(query, k)
288    }
289
290    /// Alias for [`search_by_embedding`](Self::search_by_embedding).
291    ///
292    /// Provided for API parity with [`VectorCollection::search`].
293    ///
294    /// # Errors
295    ///
296    /// Returns `Error::VectorNotAllowed` if this collection has no embeddings,
297    /// or `Error::DimensionMismatch` if the query dimension is wrong.
298    pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
299        self.search_by_embedding(query, k)
300    }
301
302    // -------------------------------------------------------------------------
303    // VelesQL
304    // -------------------------------------------------------------------------
305
306    /// Executes a parsed `VelesQL` query.
307    ///
308    /// # Errors
309    ///
310    /// Returns an error if the query is invalid or execution fails.
311    pub fn execute_query(
312        &self,
313        query: &crate::velesql::Query,
314        params: &HashMap<String, serde_json::Value>,
315    ) -> Result<Vec<SearchResult>> {
316        self.inner.execute_query(query, params)
317    }
318
319    /// Executes a raw VelesQL string, parsing it before execution.
320    ///
321    /// # Errors
322    ///
323    /// - Returns an error if the SQL string cannot be parsed.
324    /// - Returns an error if query execution fails.
325    pub fn execute_query_str(
326        &self,
327        sql: &str,
328        params: &HashMap<String, serde_json::Value>,
329    ) -> Result<Vec<SearchResult>> {
330        self.inner.execute_query_str(sql, params)
331    }
332}
333
334#[cfg(test)]
335mod tests {
336    use super::*;
337    use crate::collection::graph::GraphSchema;
338    use crate::distance::DistanceMetric;
339    use tempfile::tempdir;
340
341    #[test]
342    fn test_all_node_ids_returns_ids_with_payload() {
343        let dir = tempdir().unwrap();
344        let col = GraphCollection::create(
345            dir.path().to_path_buf(),
346            "kg",
347            None,
348            DistanceMetric::Cosine,
349            GraphSchema::schemaless(),
350        )
351        .unwrap();
352
353        // Store payloads on two nodes
354        col.upsert_node_payload(10, &serde_json::json!({"name": "Alice"}))
355            .unwrap();
356        col.upsert_node_payload(20, &serde_json::json!({"name": "Bob"}))
357            .unwrap();
358
359        let ids = col.all_node_ids();
360        assert!(ids.contains(&10), "node 10 should be present");
361        assert!(ids.contains(&20), "node 20 should be present");
362        assert_eq!(ids.len(), 2);
363    }
364
365    #[test]
366    fn test_edge_count_returns_correct_count() {
367        let dir = tempdir().unwrap();
368        let col = GraphCollection::create(
369            dir.path().to_path_buf(),
370            "kg",
371            None,
372            DistanceMetric::Cosine,
373            GraphSchema::schemaless(),
374        )
375        .unwrap();
376
377        assert_eq!(col.edge_count(), 0);
378
379        let edge1 = crate::collection::graph::GraphEdge::new(1, 10, 20, "knows").unwrap();
380        col.add_edge(edge1).unwrap();
381        assert_eq!(col.edge_count(), 1);
382
383        let edge2 = crate::collection::graph::GraphEdge::new(2, 20, 30, "likes").unwrap();
384        col.add_edge(edge2).unwrap();
385        assert_eq!(col.edge_count(), 2);
386    }
387}