cerebro 0.1.0

Blazing-fast, storage-agnostic semantic memory engine for AI Agents — written in pure Rust
Documentation
use neo4rs::{query, Graph};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use crate::models::{Chunk};
use crate::traits::{CerebroError, Result};

/// An extracted relationship triple representing a concrete factual memory.
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct EntityTriple {
    pub subject: String,
    pub predicate: String,
    pub object: String,
}

/// A module to handle extracting structural (Subject-Predicate-Object) facts from semantic chunks
/// and routing them into a Neo4j / MemGraph persistent store.
pub struct GraphMemoryLayer {
    graph: Arc<Graph>,
}

impl GraphMemoryLayer {
    /// Connect to a Neo4j or Memgraph instance.
    pub async fn new(uri: &str, user: &str, pass: &str) -> Result<Self> {
        let graph = Graph::new(uri, user, pass)
            .await
            .map_err(|e| CerebroError::StorageError(format!("Neo4j connection error: {}", e)))?;

        Ok(Self { graph: Arc::new(graph) })
    }

    /// Takes raw text and uses an LLM to extract entity triplets.
    /// In a production system, this would call out to OpenAI or Claude.
    pub async fn extract_knowledge(&self, chunk: &Chunk) -> Result<Vec<EntityTriple>> {
        // [NOTE] Production system connects to an LLM provider and asks for JSON back.
        // For demonstration logic without blocking on network:
        println!("Extracting knowledge from chunk: {}", chunk.index);
        let sample = EntityTriple {
            subject: "Cerebro".to_string(),
            predicate: "stores".to_string(),
            object: "Memory Nodes".to_string(),
        };
        Ok(vec![sample])
    }

    /// Stores the triplets inside Neo4j, making them queryable via Cypher.
    pub async fn upsert_triplets(&self, triplets: &[EntityTriple], doc_id: &str) -> Result<()> {
        let mut txn = self.graph.start_txn()
            .await
            .map_err(|e| CerebroError::StorageError(e.to_string()))?;

        for t in triplets {
            // Very naive push logic. Production uses UNWIND on arrays.
            let cql = format!(
                "MERGE (s:Entity {{name: $subject}})
                 MERGE (o:Entity {{name: $object}})
                 MERGE (s)-[r:RELATION {{type: $predicate, doc_id: $doc_id}}]->(o)"
            );

            txn.run(query(&cql)
                .param("subject", t.subject.clone())
                .param("object", t.object.clone())
                .param("predicate", t.predicate.clone())
                .param("doc_id", doc_id.to_string())
            )
            .await
            .map_err(|e| CerebroError::StorageError(e.to_string()))?;
        }

        txn.commit().await.map_err(|e| CerebroError::StorageError(e.to_string()))?;
        Ok(())
    }
}