velesdb_core/collection/graph_collection.rs
1//! `GraphCollection`: knowledge graph with optional node embeddings.
2//!
3//! # Design
4//!
5//! `GraphCollection` is a pure newtype over `Collection` (C-02).
6//! All graph state (edge store, property/range indexes, node payloads, optional
7//! HNSW for node embeddings) lives inside the single `inner: Collection`.
8//! The graph schema and embedding dimension are persisted in `config.json`.
9//! There are no separate engine fields — no dual-storage risk.
10
11use std::collections::HashMap;
12use std::path::PathBuf;
13
14use crate::collection::graph::{GraphEdge, GraphSchema, TraversalConfig, TraversalResult};
15use crate::collection::types::Collection;
16use crate::distance::DistanceMetric;
17use crate::error::Result;
18use crate::point::{Point, SearchResult};
19
20/// A graph collection storing typed relationships between nodes.
21///
22/// Node embeddings are optional: if `dimension` is `None`, no vector index is created.
23///
24/// # Examples
25///
26/// ```rust,no_run
27/// use velesdb_core::{GraphCollection, GraphSchema, GraphEdge, DistanceMetric};
28///
29/// let coll = GraphCollection::create(
30/// "./data/kg".into(),
31/// "knowledge",
32/// None, // no embeddings
33/// DistanceMetric::Cosine, // unused when no embeddings
34/// GraphSchema::schemaless(),
35/// )?;
36///
37/// let edge = GraphEdge::new(1, 100, 200, "KNOWS")?;
38/// coll.add_edge(edge)?;
39/// # Ok::<(), velesdb_core::Error>(())
40/// ```
41#[derive(Clone)]
42pub struct GraphCollection {
43 /// Single source of truth — all graph state lives here (C-02 pure newtype).
44 pub(crate) inner: Collection,
45}
46
47impl GraphCollection {
48 // -------------------------------------------------------------------------
49 // Lifecycle
50 // -------------------------------------------------------------------------
51
52 /// Creates a new `GraphCollection`.
53 ///
54 /// # Errors
55 ///
56 /// Returns an error if the directory cannot be created or storage fails.
57 pub fn create(
58 path: PathBuf,
59 name: &str,
60 dimension: Option<usize>,
61 metric: DistanceMetric,
62 schema: GraphSchema,
63 ) -> Result<Self> {
64 Ok(Self {
65 inner: Collection::create_graph_collection(path, name, schema, dimension, metric)?,
66 })
67 }
68
69 /// Opens an existing `GraphCollection` from disk.
70 ///
71 /// # Errors
72 ///
73 /// Returns an error if config or storage cannot be opened.
74 pub fn open(path: PathBuf) -> Result<Self> {
75 Ok(Self {
76 inner: Collection::open(path)?,
77 })
78 }
79
80 /// Flushes all state to disk.
81 ///
82 /// # Errors
83 ///
84 /// Returns an error if any flush operation fails.
85 pub fn flush(&self) -> Result<()> {
86 self.inner.flush()
87 }
88
89 // -------------------------------------------------------------------------
90 // Metadata
91 // -------------------------------------------------------------------------
92
93 /// Returns the collection name.
94 #[must_use]
95 pub fn name(&self) -> String {
96 self.inner.config().name
97 }
98
99 /// Returns the graph schema stored in config.
100 ///
101 /// Returns `GraphSchema::schemaless()` for collections that have no schema set.
102 #[must_use]
103 pub fn schema(&self) -> GraphSchema {
104 self.inner
105 .graph_schema()
106 .unwrap_or_else(GraphSchema::schemaless)
107 }
108
109 /// Returns `true` if this collection stores node embeddings.
110 #[must_use]
111 pub fn has_embeddings(&self) -> bool {
112 self.inner.has_embeddings()
113 }
114
115 // -------------------------------------------------------------------------
116 // Graph operations — delegate to Collection graph API
117 // -------------------------------------------------------------------------
118
119 /// Adds an edge between two nodes.
120 ///
121 /// # Errors
122 ///
123 /// - Returns `Error::EdgeExists` if an edge with the same ID already exists.
124 ///
125 /// # Examples
126 ///
127 /// ```rust,no_run
128 /// # use velesdb_core::{GraphCollection, GraphSchema, GraphEdge, DistanceMetric};
129 /// # let coll = GraphCollection::create("./data/kg".into(), "kg", None, DistanceMetric::Cosine, GraphSchema::schemaless())?;
130 /// let edge = GraphEdge::new(1, 100, 200, "KNOWS")?;
131 /// coll.add_edge(edge)?;
132 /// # Ok::<(), velesdb_core::Error>(())
133 /// ```
134 pub fn add_edge(&self, edge: GraphEdge) -> Result<()> {
135 self.inner.add_edge(edge)
136 }
137
138 /// Returns edges, optionally filtered by label.
139 #[must_use]
140 pub fn get_edges(&self, label: Option<&str>) -> Vec<GraphEdge> {
141 match label {
142 Some(lbl) => self.inner.get_edges_by_label(lbl),
143 None => self.inner.get_all_edges(),
144 }
145 }
146
147 /// Returns all outgoing edges from a node.
148 #[must_use]
149 pub fn get_outgoing(&self, node_id: u64) -> Vec<GraphEdge> {
150 self.inner.get_outgoing_edges(node_id)
151 }
152
153 /// Returns all incoming edges to a node.
154 #[must_use]
155 pub fn get_incoming(&self, node_id: u64) -> Vec<GraphEdge> {
156 self.inner.get_incoming_edges(node_id)
157 }
158
159 /// Returns the total number of edges in the graph without materializing them.
160 #[must_use]
161 pub fn edge_count(&self) -> usize {
162 self.inner.edge_count()
163 }
164
165 /// Returns `(in_degree, out_degree)` for a node.
166 #[must_use]
167 pub fn node_degree(&self, node_id: u64) -> (usize, usize) {
168 self.inner.get_node_degree(node_id)
169 }
170
171 /// Returns the IDs of all nodes that have a stored payload.
172 ///
173 /// Nodes that appear only as edge endpoints without a stored payload
174 /// are not included. Use [`GraphCollection::get_edges`] to discover
175 /// all referenced node IDs.
176 #[must_use]
177 pub fn all_node_ids(&self) -> Vec<u64> {
178 self.inner.all_ids()
179 }
180
181 /// Returns the number of nodes (points) stored in this collection.
182 #[must_use]
183 pub fn len(&self) -> usize {
184 self.inner.len()
185 }
186
187 /// Returns `true` if the collection contains no nodes.
188 #[must_use]
189 pub fn is_empty(&self) -> bool {
190 self.inner.is_empty()
191 }
192
193 /// Retrieves nodes by IDs, returning `None` for missing entries.
194 #[must_use]
195 pub fn get(&self, ids: &[u64]) -> Vec<Option<Point>> {
196 self.inner.get(ids)
197 }
198
199 /// Deletes nodes by IDs.
200 ///
201 /// Missing IDs are silently ignored.
202 ///
203 /// # Errors
204 ///
205 /// Returns an error if storage operations fail.
206 pub fn delete(&self, ids: &[u64]) -> Result<()> {
207 self.inner.delete(ids)
208 }
209
210 /// Removes an edge from the graph by ID.
211 ///
212 /// Returns `true` if the edge existed and was removed, `false` otherwise.
213 #[must_use]
214 pub fn remove_edge(&self, edge_id: u64) -> bool {
215 self.inner.remove_edge(edge_id)
216 }
217
218 /// Performs BFS traversal from a source node.
219 ///
220 /// # Examples
221 ///
222 /// ```rust,no_run
223 /// # use velesdb_core::{GraphCollection, GraphSchema, GraphEdge, DistanceMetric};
224 /// # use velesdb_core::collection::graph::TraversalConfig;
225 /// # let coll = GraphCollection::create("./data/kg".into(), "kg", None, DistanceMetric::Cosine, GraphSchema::schemaless())?;
226 /// let config = TraversalConfig { max_depth: 3, ..TraversalConfig::default() };
227 /// let results = coll.traverse_bfs(100, &config);
228 /// for r in &results {
229 /// println!("node={} depth={}", r.target_id, r.depth);
230 /// }
231 /// # Ok::<(), velesdb_core::Error>(())
232 /// ```
233 #[must_use]
234 pub fn traverse_bfs(&self, source_id: u64, config: &TraversalConfig) -> Vec<TraversalResult> {
235 self.inner.traverse_bfs_config(source_id, config)
236 }
237
238 /// Performs DFS traversal from a source node.
239 #[must_use]
240 pub fn traverse_dfs(&self, source_id: u64, config: &TraversalConfig) -> Vec<TraversalResult> {
241 self.inner.traverse_dfs_config(source_id, config)
242 }
243
244 // -------------------------------------------------------------------------
245 // Payload / node properties
246 // -------------------------------------------------------------------------
247
248 /// Inserts or updates node payload (properties).
249 ///
250 /// # Errors
251 ///
252 /// Returns an error if storage fails.
253 pub fn upsert_node_payload(&self, node_id: u64, payload: &serde_json::Value) -> Result<()> {
254 self.inner.store_node_payload(node_id, payload)
255 }
256
257 /// Inserts or updates node payload (properties).
258 ///
259 /// # Errors
260 ///
261 /// Returns an error if storage fails.
262 #[deprecated(since = "1.6.0", note = "Use upsert_node_payload() instead")]
263 pub fn store_node_payload(&self, node_id: u64, payload: &serde_json::Value) -> Result<()> {
264 self.upsert_node_payload(node_id, payload)
265 }
266
267 /// Retrieves node payload.
268 ///
269 /// # Errors
270 ///
271 /// Returns an error if retrieval fails.
272 pub fn get_node_payload(&self, node_id: u64) -> Result<Option<serde_json::Value>> {
273 self.inner.get_node_payload(node_id)
274 }
275
276 // -------------------------------------------------------------------------
277 // Optional embedding search
278 // -------------------------------------------------------------------------
279
280 /// Searches for similar nodes by embedding (only available if `has_embeddings()`).
281 ///
282 /// # Errors
283 ///
284 /// Returns `Error::VectorNotAllowed` if this collection has no embeddings,
285 /// or `Error::DimensionMismatch` if the query dimension is wrong.
286 pub fn search_by_embedding(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
287 self.inner.search_by_embedding(query, k)
288 }
289
290 /// Alias for [`search_by_embedding`](Self::search_by_embedding).
291 ///
292 /// Provided for API parity with [`VectorCollection::search`].
293 ///
294 /// # Errors
295 ///
296 /// Returns `Error::VectorNotAllowed` if this collection has no embeddings,
297 /// or `Error::DimensionMismatch` if the query dimension is wrong.
298 pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
299 self.search_by_embedding(query, k)
300 }
301
302 // -------------------------------------------------------------------------
303 // VelesQL
304 // -------------------------------------------------------------------------
305
306 /// Executes a parsed `VelesQL` query.
307 ///
308 /// # Errors
309 ///
310 /// Returns an error if the query is invalid or execution fails.
311 pub fn execute_query(
312 &self,
313 query: &crate::velesql::Query,
314 params: &HashMap<String, serde_json::Value>,
315 ) -> Result<Vec<SearchResult>> {
316 self.inner.execute_query(query, params)
317 }
318
319 /// Executes a raw VelesQL string, parsing it before execution.
320 ///
321 /// # Errors
322 ///
323 /// - Returns an error if the SQL string cannot be parsed.
324 /// - Returns an error if query execution fails.
325 pub fn execute_query_str(
326 &self,
327 sql: &str,
328 params: &HashMap<String, serde_json::Value>,
329 ) -> Result<Vec<SearchResult>> {
330 self.inner.execute_query_str(sql, params)
331 }
332}
333
334#[cfg(test)]
335mod tests {
336 use super::*;
337 use crate::collection::graph::GraphSchema;
338 use crate::distance::DistanceMetric;
339 use tempfile::tempdir;
340
341 #[test]
342 fn test_all_node_ids_returns_ids_with_payload() {
343 let dir = tempdir().unwrap();
344 let col = GraphCollection::create(
345 dir.path().to_path_buf(),
346 "kg",
347 None,
348 DistanceMetric::Cosine,
349 GraphSchema::schemaless(),
350 )
351 .unwrap();
352
353 // Store payloads on two nodes
354 col.upsert_node_payload(10, &serde_json::json!({"name": "Alice"}))
355 .unwrap();
356 col.upsert_node_payload(20, &serde_json::json!({"name": "Bob"}))
357 .unwrap();
358
359 let ids = col.all_node_ids();
360 assert!(ids.contains(&10), "node 10 should be present");
361 assert!(ids.contains(&20), "node 20 should be present");
362 assert_eq!(ids.len(), 2);
363 }
364
365 #[test]
366 fn test_edge_count_returns_correct_count() {
367 let dir = tempdir().unwrap();
368 let col = GraphCollection::create(
369 dir.path().to_path_buf(),
370 "kg",
371 None,
372 DistanceMetric::Cosine,
373 GraphSchema::schemaless(),
374 )
375 .unwrap();
376
377 assert_eq!(col.edge_count(), 0);
378
379 let edge1 = crate::collection::graph::GraphEdge::new(1, 10, 20, "knows").unwrap();
380 col.add_edge(edge1).unwrap();
381 assert_eq!(col.edge_count(), 1);
382
383 let edge2 = crate::collection::graph::GraphEdge::new(2, 20, 30, "likes").unwrap();
384 col.add_edge(edge2).unwrap();
385 assert_eq!(col.edge_count(), 2);
386 }
387}