velesdb-core 1.9.0

High-performance vector database engine written in Rust
Documentation
//! `GraphCollection`: knowledge graph with optional node embeddings.
//!
//! # Design
//!
//! `GraphCollection` is a pure newtype over `Collection` (C-02).
//! All graph state (edge store, property/range indexes, node payloads, optional
//! HNSW for node embeddings) lives inside the single `inner: Collection`.
//! The graph schema and embedding dimension are persisted in `config.json`.
//! There are no separate engine fields — no dual-storage risk.

use std::collections::HashMap;
use std::path::PathBuf;

use crate::collection::graph::{GraphEdge, GraphSchema, TraversalConfig, TraversalResult};
use crate::collection::types::Collection;
use crate::distance::DistanceMetric;
use crate::error::Result;
use crate::point::{Point, SearchResult};

/// A graph collection storing typed relationships between nodes.
///
/// Node embeddings are optional: if `dimension` is `None`, no vector index is created.
///
/// # Examples
///
/// ```rust,no_run
/// use velesdb_core::{GraphCollection, GraphSchema, GraphEdge, DistanceMetric};
///
/// let coll = GraphCollection::create(
///     "./data/kg".into(),
///     "knowledge",
///     None,                    // no embeddings
///     DistanceMetric::Cosine,  // unused when no embeddings
///     GraphSchema::schemaless(),
/// )?;
///
/// let edge = GraphEdge::new(1, 100, 200, "KNOWS")?;
/// coll.add_edge(edge)?;
/// # Ok::<(), velesdb_core::Error>(())
/// ```
#[derive(Clone)]
pub struct GraphCollection {
    /// Single source of truth — all graph state lives here (C-02 pure newtype).
    pub(crate) inner: Collection,
}

impl GraphCollection {
    // -------------------------------------------------------------------------
    // Lifecycle
    // -------------------------------------------------------------------------

    /// Creates a new `GraphCollection`.
    ///
    /// # Errors
    ///
    /// Returns an error if the directory cannot be created or storage fails.
    pub fn create(
        path: PathBuf,
        name: &str,
        dimension: Option<usize>,
        metric: DistanceMetric,
        schema: GraphSchema,
    ) -> Result<Self> {
        Ok(Self {
            inner: Collection::create_graph_collection(path, name, schema, dimension, metric)?,
        })
    }

    /// Opens an existing `GraphCollection` from disk.
    ///
    /// # Errors
    ///
    /// Returns an error if config or storage cannot be opened.
    pub fn open(path: PathBuf) -> Result<Self> {
        Ok(Self {
            inner: Collection::open(path)?,
        })
    }

    /// Flushes all state to disk.
    ///
    /// Issue #423: This fast-path flush skips `vectors.idx` serialization.
    /// The WAL provides crash recovery for the vector index.
    ///
    /// # Errors
    ///
    /// Returns an error if any flush operation fails.
    pub fn flush(&self) -> Result<()> {
        self.inner.flush()
    }

    /// Full durability flush including `vectors.idx` serialization.
    ///
    /// Issue #423: Use on graceful shutdown to avoid a full WAL replay
    /// on the next startup.
    ///
    /// # Errors
    ///
    /// Returns an error if any flush operation fails.
    pub fn flush_full(&self) -> Result<()> {
        self.inner.flush_full()
    }

    // -------------------------------------------------------------------------
    // Metadata
    // -------------------------------------------------------------------------

    /// Returns the collection name.
    #[must_use]
    pub fn name(&self) -> String {
        self.inner.config().name
    }

    /// Returns the graph schema stored in config.
    ///
    /// Returns `GraphSchema::schemaless()` for collections that have no schema set.
    #[must_use]
    pub fn schema(&self) -> GraphSchema {
        self.inner
            .graph_schema()
            .unwrap_or_else(GraphSchema::schemaless)
    }

    /// Returns `true` if this collection stores node embeddings.
    #[must_use]
    pub fn has_embeddings(&self) -> bool {
        self.inner.has_embeddings()
    }

    // -------------------------------------------------------------------------
    // Graph operations — delegate to Collection graph API
    // -------------------------------------------------------------------------

    /// Adds an edge between two nodes.
    ///
    /// # Errors
    ///
    /// - Returns `Error::EdgeExists` if an edge with the same ID already exists.
    ///
    /// # Examples
    ///
    /// ```rust,no_run
    /// # use velesdb_core::{GraphCollection, GraphSchema, GraphEdge, DistanceMetric};
    /// # let coll = GraphCollection::create("./data/kg".into(), "kg", None, DistanceMetric::Cosine, GraphSchema::schemaless())?;
    /// let edge = GraphEdge::new(1, 100, 200, "KNOWS")?;
    /// coll.add_edge(edge)?;
    /// # Ok::<(), velesdb_core::Error>(())
    /// ```
    pub fn add_edge(&self, edge: GraphEdge) -> Result<()> {
        self.inner.add_edge(edge)
    }

    /// Returns edges, optionally filtered by label.
    #[must_use]
    pub fn get_edges(&self, label: Option<&str>) -> Vec<GraphEdge> {
        match label {
            Some(lbl) => self.inner.get_edges_by_label(lbl),
            None => self.inner.get_all_edges(),
        }
    }

    /// Returns all outgoing edges from a node.
    #[must_use]
    pub fn get_outgoing(&self, node_id: u64) -> Vec<GraphEdge> {
        self.inner.get_outgoing_edges(node_id)
    }

    /// Returns all incoming edges to a node.
    #[must_use]
    pub fn get_incoming(&self, node_id: u64) -> Vec<GraphEdge> {
        self.inner.get_incoming_edges(node_id)
    }

    /// Returns the total number of edges in the graph without materializing them.
    #[must_use]
    pub fn edge_count(&self) -> usize {
        self.inner.edge_count()
    }

    /// Returns `(in_degree, out_degree)` for a node.
    #[must_use]
    pub fn node_degree(&self, node_id: u64) -> (usize, usize) {
        self.inner.get_node_degree(node_id)
    }

    /// Returns the IDs of all nodes that have a stored payload.
    ///
    /// Nodes that appear only as edge endpoints without a stored payload
    /// are not included. Use [`GraphCollection::get_edges`] to discover
    /// all referenced node IDs.
    #[must_use]
    pub fn all_node_ids(&self) -> Vec<u64> {
        self.inner.all_ids()
    }

    /// Returns the number of nodes (points) stored in this collection.
    #[must_use]
    pub fn len(&self) -> usize {
        self.inner.len()
    }

    /// Returns `true` if the collection contains no nodes.
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.inner.is_empty()
    }

    /// Retrieves nodes by IDs, returning `None` for missing entries.
    #[must_use]
    pub fn get(&self, ids: &[u64]) -> Vec<Option<Point>> {
        self.inner.get(ids)
    }

    /// Deletes nodes by IDs.
    ///
    /// Missing IDs are silently ignored.
    ///
    /// # Errors
    ///
    /// Returns an error if storage operations fail.
    pub fn delete(&self, ids: &[u64]) -> Result<()> {
        self.inner.delete(ids)
    }

    /// Removes an edge from the graph by ID.
    ///
    /// Returns `true` if the edge existed and was removed, `false` otherwise.
    #[must_use]
    pub fn remove_edge(&self, edge_id: u64) -> bool {
        self.inner.remove_edge(edge_id)
    }

    /// Performs BFS traversal from a source node.
    ///
    /// # Examples
    ///
    /// ```rust,no_run
    /// # use velesdb_core::{GraphCollection, GraphSchema, GraphEdge, DistanceMetric};
    /// # use velesdb_core::collection::graph::TraversalConfig;
    /// # let coll = GraphCollection::create("./data/kg".into(), "kg", None, DistanceMetric::Cosine, GraphSchema::schemaless())?;
    /// let config = TraversalConfig { max_depth: 3, ..TraversalConfig::default() };
    /// let results = coll.traverse_bfs(100, &config);
    /// for r in &results {
    ///     println!("node={} depth={}", r.target_id, r.depth);
    /// }
    /// # Ok::<(), velesdb_core::Error>(())
    /// ```
    #[must_use]
    pub fn traverse_bfs(&self, source_id: u64, config: &TraversalConfig) -> Vec<TraversalResult> {
        self.inner.traverse_bfs_config(source_id, config)
    }

    /// Performs DFS traversal from a source node.
    #[must_use]
    pub fn traverse_dfs(&self, source_id: u64, config: &TraversalConfig) -> Vec<TraversalResult> {
        self.inner.traverse_dfs_config(source_id, config)
    }

    // -------------------------------------------------------------------------
    // Payload / node properties
    // -------------------------------------------------------------------------

    /// Inserts or updates node payload (properties).
    ///
    /// # Errors
    ///
    /// Returns an error if storage fails.
    pub fn upsert_node_payload(&self, node_id: u64, payload: &serde_json::Value) -> Result<()> {
        self.inner.store_node_payload(node_id, payload)
    }

    /// Inserts or updates node payload (properties).
    ///
    /// # Errors
    ///
    /// Returns an error if storage fails.
    #[deprecated(since = "1.6.0", note = "Use upsert_node_payload() instead")]
    pub fn store_node_payload(&self, node_id: u64, payload: &serde_json::Value) -> Result<()> {
        self.upsert_node_payload(node_id, payload)
    }

    /// Retrieves node payload.
    ///
    /// # Errors
    ///
    /// Returns an error if retrieval fails.
    pub fn get_node_payload(&self, node_id: u64) -> Result<Option<serde_json::Value>> {
        self.inner.get_node_payload(node_id)
    }

    // -------------------------------------------------------------------------
    // Optional embedding search
    // -------------------------------------------------------------------------

    /// Searches for similar nodes by embedding (only available if `has_embeddings()`).
    ///
    /// # Errors
    ///
    /// Returns `Error::VectorNotAllowed` if this collection has no embeddings,
    /// or `Error::DimensionMismatch` if the query dimension is wrong.
    pub fn search_by_embedding(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
        self.inner.search_by_embedding(query, k)
    }

    /// Alias for [`search_by_embedding`](Self::search_by_embedding).
    ///
    /// Provided for API parity with [`VectorCollection::search`].
    ///
    /// # Errors
    ///
    /// Returns `Error::VectorNotAllowed` if this collection has no embeddings,
    /// or `Error::DimensionMismatch` if the query dimension is wrong.
    pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
        self.search_by_embedding(query, k)
    }

    // -------------------------------------------------------------------------
    // VelesQL
    // -------------------------------------------------------------------------

    /// Executes a parsed `VelesQL` query.
    ///
    /// # Errors
    ///
    /// Returns an error if the query is invalid or execution fails.
    pub fn execute_query(
        &self,
        query: &crate::velesql::Query,
        params: &HashMap<String, serde_json::Value>,
    ) -> Result<Vec<SearchResult>> {
        self.inner.execute_query(query, params)
    }

    /// Executes a raw VelesQL string, parsing it before execution.
    ///
    /// # Errors
    ///
    /// - Returns an error if the SQL string cannot be parsed.
    /// - Returns an error if query execution fails.
    pub fn execute_query_str(
        &self,
        sql: &str,
        params: &HashMap<String, serde_json::Value>,
    ) -> Result<Vec<SearchResult>> {
        self.inner.execute_query_str(sql, params)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::collection::graph::GraphSchema;
    use crate::distance::DistanceMetric;
    use tempfile::tempdir;

    #[test]
    fn test_all_node_ids_returns_ids_with_payload() {
        let dir = tempdir().unwrap();
        let col = GraphCollection::create(
            dir.path().to_path_buf(),
            "kg",
            None,
            DistanceMetric::Cosine,
            GraphSchema::schemaless(),
        )
        .unwrap();

        // Store payloads on two nodes
        col.upsert_node_payload(10, &serde_json::json!({"name": "Alice"}))
            .unwrap();
        col.upsert_node_payload(20, &serde_json::json!({"name": "Bob"}))
            .unwrap();

        let ids = col.all_node_ids();
        assert!(ids.contains(&10), "node 10 should be present");
        assert!(ids.contains(&20), "node 20 should be present");
        assert_eq!(ids.len(), 2);
    }

    #[test]
    fn test_edge_count_returns_correct_count() {
        let dir = tempdir().unwrap();
        let col = GraphCollection::create(
            dir.path().to_path_buf(),
            "kg",
            None,
            DistanceMetric::Cosine,
            GraphSchema::schemaless(),
        )
        .unwrap();

        assert_eq!(col.edge_count(), 0);

        let edge1 = crate::collection::graph::GraphEdge::new(1, 10, 20, "knows").unwrap();
        col.add_edge(edge1).unwrap();
        assert_eq!(col.edge_count(), 1);

        let edge2 = crate::collection::graph::GraphEdge::new(2, 20, 30, "likes").unwrap();
        col.add_edge(edge2).unwrap();
        assert_eq!(col.edge_count(), 2);
    }
}