Skip to main content

memoir_core/graph/
mod.rs

1//! Property-graph abstraction for entity/relationship storage.
2//!
3//! Defines [`GraphStore`], memoir's optional secondary index for the knowledge
4//! graph derived from extracted memories (epic 0012). Two implementations ship:
5//! [`InMemoryGraphStore`] (always available — the test/benchmark boundary) and
6//! [`FalkorGraphStore`] (behind the `knowledge-graph` feature — production,
7//! backed by FalkorDB).
8//!
9//! This trait is deliberately thin: it covers *connectivity* (ensuring the
10//! named graph is reachable) and a raw Cypher [`GraphStore::query`] escape
11//! hatch. Typed upsert and traversal methods are layered on top of it.
12//!
13//! Like [`crate::vector::VectorIndex`], the graph is a *derived* index: Postgres
14//! remains the source of truth, and the graph can be rebuilt from the episodic
15//! memories. Absence of a graph store is a first-class, non-degraded state —
16//! recall simply returns vector hits with no graph enrichment.
17
18mod commit;
19mod cosine;
20mod edge;
21mod enrich;
22mod error;
23mod extraction;
24mod forget;
25mod inspect;
26mod memory;
27mod resolve;
28mod synthesis;
29
30pub use commit::{CommitContext, CommitError};
31pub use enrich::{
32    DEFAULT_ENRICHMENT_DEPTH, GraphContext, GraphEntity, GraphRelationship, MAX_ENRICHMENT_DEPTH,
33};
34pub use edge::{
35    CardinalityPolicy, Edge, EdgeCatalog, EdgeError, EdgeResolution, EdgeResolver, ExistingEdge, NaiveAppendResolver,
36    RelationCardinality, TemporalEdgeResolver,
37};
38pub use error::GraphError;
39pub use extraction::{
40    DEFAULT_TRIPLE_PROMPT, LlmExtractor, TRIPLE_REPLY_MAX_CHARS, Triple, TripleExtractor, TripleSet,
41};
42pub use inspect::{
43    DEFAULT_INSPECTION_LIMIT, GraphEdge, GraphNode, GraphSnapshot, MAX_INSPECTION_LIMIT,
44};
45pub use memory::InMemoryGraphStore;
46pub use resolve::{
47    EmbeddingEntityResolver, EntityCatalog, EntityResolver, EntityVector, ExactStringResolver, InMemoryEntityCatalog,
48    Resolution, ResolveError, MIN_ENTITY_SIMILARITY,
49};
50pub use synthesis::{
51    EmbeddingSynthesizer, MIN_CORROBORATION_SIMILARITY, PassthroughSynthesizer, SemanticFact, SynthesisError,
52    Synthesizer,
53};
54
55#[cfg(feature = "knowledge-graph")]
56mod falkor;
57
58#[cfg(feature = "knowledge-graph")]
59mod falkor_catalog;
60
61#[cfg(feature = "knowledge-graph")]
62mod staging;
63
64#[cfg(feature = "knowledge-graph")]
65pub use falkor::FalkorGraphStore;
66
67#[cfg(feature = "knowledge-graph")]
68pub use falkor_catalog::{FalkorEdgeCatalog, FalkorEntityCatalog};
69
70#[cfg(feature = "knowledge-graph")]
71pub use staging::TripleStaging;
72
73use std::collections::HashMap;
74use std::future::Future;
75
76/// Default graph name memoir writes to within a shared FalkorDB instance.
77///
78/// FalkorDB hosts many named graphs in one process; memoir confines its writes
79/// to this graph so it coexists with a host application's own graphs. Override
80/// per deployment so two memoir instances never collide on one engine.
81pub const DEFAULT_GRAPH_NAME: &str = "memoir";
82
83/// One row of a Cypher result, mapping each returned column to a scalar value.
84///
85/// Scalars are rendered to `String` so the public surface never leaks a
86/// backend-specific value type. Columns preserve the order of the `RETURN`
87/// clause.
88pub type GraphRow = Vec<(String, String)>;
89
90/// The rows produced by a Cypher [`GraphStore::query`], in result order.
91pub type GraphRows = Vec<GraphRow>;
92
93/// A typed value bound to a Cypher query parameter.
94///
95/// Carries the value *and* its kind so the backend can render each parameter as
96/// the correct Cypher literal: strings are quoted and escaped, numbers stay
97/// bare. The kind cannot be recovered from a plain string once erased — a
98/// `LIMIT` needs the bare integer `500`, while a name needs the quoted literal
99/// `'Alice'` — so callers state the kind at the bind site rather than letting
100/// the backend guess.
101///
102/// FalkorDB's parameter mechanism textually substitutes `CYPHER key=value` into
103/// the query, so the rendered form must be a valid Cypher literal; this enum is
104/// what makes that rendering type-directed instead of a fragile heuristic.
105#[derive(Clone, Debug, PartialEq)]
106pub enum GraphParam {
107    /// A string value, rendered as a quoted, escaped Cypher string literal.
108    Str(String),
109    /// An integer value, rendered bare (e.g. for `LIMIT`).
110    Int(i64),
111    /// A floating-point value, rendered bare.
112    Float(f64),
113}
114
115impl GraphParam {
116    /// Renders the value as a Cypher literal safe to substitute into a query.
117    ///
118    /// [`GraphParam::Str`] is wrapped in single quotes with embedded backslashes
119    /// and single quotes escaped, so a value drawn from user content (an entity
120    /// name, a memory id) cannot break out of the literal. Numeric variants
121    /// render to their bare textual form.
122    #[must_use]
123    pub fn to_cypher_literal(&self) -> String {
124        match self {
125            Self::Str(s) => format!("'{}'", s.replace('\\', "\\\\").replace('\'', "\\'")),
126            Self::Int(n) => n.to_string(),
127            Self::Float(f) => f.to_string(),
128        }
129    }
130}
131
132impl From<String> for GraphParam {
133    fn from(value: String) -> Self {
134        Self::Str(value)
135    }
136}
137
138impl From<&str> for GraphParam {
139    fn from(value: &str) -> Self {
140        Self::Str(value.to_string())
141    }
142}
143
144/// Stores and queries an entity/relationship property graph.
145///
146/// Implementations own the graph-backend connection and confine their writes to
147/// a single named graph (see [`DEFAULT_GRAPH_NAME`]). The trait methods are
148/// async and `Send`-bound so callers can drive them from any tokio runtime,
149/// mirroring [`crate::vector::VectorIndex`].
150pub trait GraphStore: Send + Sync + 'static {
151    /// Ensures the configured named graph is reachable.
152    ///
153    /// Idempotent: callers invoke this on startup to fail fast when the backend
154    /// is unreachable or misconfigured, rather than on first write. FalkorDB
155    /// creates a graph lazily on first write, so this is a connectivity probe,
156    /// not a schema-creation step.
157    ///
158    /// # Errors
159    ///
160    /// Returns [`GraphError::Connection`] if the backend is unreachable.
161    fn ensure_graph(&self) -> impl Future<Output = Result<(), GraphError>> + Send;
162
163    /// Runs a parameterized Cypher query against the graph, returning its rows.
164    ///
165    /// The raw escape hatch the write-path and read-path build their operations
166    /// on. `params` binds query parameters by name, referenced as `$name` in the
167    /// `cypher` body — the injection-safe way to embed values drawn from user
168    /// content (entity names, memory ids). Each [`GraphParam`] renders to a
169    /// correctly-quoted Cypher literal ([`GraphParam::to_cypher_literal`]), so a
170    /// value cannot break out of its literal regardless of backend parameter
171    /// mechanics. Relationship *types* and labels cannot be parameterized by
172    /// Cypher and must be sanitized by the caller. Pass an empty map for a query
173    /// with no parameters. Scalar result values are rendered to `String`;
174    /// node/edge/path projections are out of scope until a consumer needs them.
175    ///
176    /// # Errors
177    ///
178    /// Returns [`GraphError::Query`] when the backend rejects or fails the
179    /// query, and [`GraphError::Connection`] when the backend is unreachable.
180    fn query(
181        &self,
182        cypher: &str,
183        params: &HashMap<String, GraphParam>,
184    ) -> impl Future<Output = Result<GraphRows, GraphError>> + Send;
185
186    /// Removes each forgotten pid from the graph, reference-counted.
187    ///
188    /// For each pid: strips it from every edge's and node's `memory_pids`,
189    /// deletes edges whose array empties, then deletes nodes whose array empties
190    /// *and* that have no surviving edges (a node still joined by an other-pid
191    /// edge is kept). Edges are processed before nodes so a node is never deleted
192    /// out from under a surviving edge. A pid is a globally-unique memory id, so
193    /// matching needs no scope guard; the pid binds as a parameter. Idempotent —
194    /// re-forgetting an absent pid changes nothing.
195    ///
196    /// # Errors
197    ///
198    /// Returns [`GraphError`] if the backend rejects a statement.
199    fn forget_pids(&self, pids: &[&str]) -> impl Future<Output = Result<(), GraphError>> + Send {
200        forget::forget_pids(self, pids)
201    }
202
203    /// Deletes every node and edge in `scope` — a whole-tenant forget.
204    ///
205    /// The entire scoped subgraph is removed regardless of `memory_pids`, so
206    /// this needs no pid list. `DETACH DELETE` removes each node together with
207    /// its edges.
208    ///
209    /// # Errors
210    ///
211    /// Returns [`GraphError`] if the backend rejects the statement.
212    fn forget_scope(&self, scope: &crate::memory::Scope) -> impl Future<Output = Result<(), GraphError>> + Send {
213        forget::forget_scope(self, scope)
214    }
215
216    /// Commits a source's resolved triples to the graph, returning the count.
217    ///
218    /// Resolves each triple's entities ([`EntityResolver`]) and edge
219    /// ([`EdgeResolver`]), embeds new nodes ([`EmbeddingModel`]), then `MERGE`s
220    /// the nodes and the (possibly supersession-closing) edge — tagging every
221    /// element with the source's pid and scope from `ctx`. Writes are idempotent,
222    /// so retrying a partially-failed batch does not double-write. Triples whose
223    /// subject and object resolve to the same node are skipped.
224    ///
225    /// # Errors
226    ///
227    /// Returns [`CommitError`] on the first resolution or write failure.
228    fn commit_triples<EM, ER, EdgeR>(
229        &self,
230        embedder: &EM,
231        entities: &ER,
232        edges: &EdgeR,
233        ctx: &CommitContext,
234        triples: &TripleSet,
235    ) -> impl Future<Output = Result<usize, CommitError>> + Send
236    where
237        EM: crate::embedding::EmbeddingModel,
238        ER: EntityResolver,
239        EdgeR: EdgeResolver,
240    {
241        commit::commit_triples(self, embedder, entities, edges, ctx, triples)
242    }
243
244    /// Returns the graph neighborhood around a set of seed memories.
245    ///
246    /// Seeds from the entities whose `memory_pids` contains any of `seed_pids`,
247    /// then walks current edges (`valid_to = null`) out to `depth` hops
248    /// (clamped to [`MAX_ENRICHMENT_DEPTH`]), scope-confined. Returns a flat,
249    /// deduplicated [`GraphContext`]; an empty `seed_pids` yields an empty
250    /// context with no query. The read-path enrichment behind `.with_graph()`.
251    ///
252    /// # Errors
253    ///
254    /// Returns [`GraphError`] if the backend rejects the traversal.
255    fn neighbors(
256        &self,
257        seed_pids: &[&str],
258        scope: &crate::memory::Scope,
259        depth: usize,
260    ) -> impl Future<Output = Result<GraphContext, GraphError>> + Send {
261        enrich::neighbors(self, seed_pids, scope, depth)
262    }
263
264    /// Returns a whole-scope snapshot of the graph for admin inspection.
265    ///
266    /// Reads every entity and relationship matching the *partial* scope — any of
267    /// `agent_id` / `org_id` / `user_id` may be `None`, and an absent dimension
268    /// imposes no filter, so a fully-`None` scope dumps the whole graph. This is
269    /// the one cross-scope read in memoir (an admin views across agents/users/
270    /// orgs); the caller's auth layer gates it. Nodes and edges are each capped
271    /// at `limit` (clamped to [`MAX_INSPECTION_LIMIT`]); the snapshot's
272    /// `truncated` flag marks when a cap was hit. Both current and superseded
273    /// edges are returned, each flagged by `valid_to`, for a temporal view —
274    /// unlike [`neighbors`](Self::neighbors), which reads current edges only.
275    /// Scope values bind as parameters, never interpolated.
276    ///
277    /// # Errors
278    ///
279    /// Returns [`GraphError`] if the backend rejects either read.
280    fn inspect_scope(
281        &self,
282        agent_id: Option<&str>,
283        org_id: Option<&str>,
284        user_id: Option<&str>,
285        limit: usize,
286    ) -> impl Future<Output = Result<GraphSnapshot, GraphError>> + Send {
287        inspect::inspect_scope(self, agent_id, org_id, user_id, limit)
288    }
289}
290
291#[cfg(test)]
292mod tests {
293    use super::GraphParam;
294
295    #[test]
296    fn should_quote_string_param_as_cypher_literal() {
297        assert_eq!(GraphParam::Str("Alice".to_string()).to_cypher_literal(), "'Alice'");
298    }
299
300    #[test]
301    fn should_render_int_param_bare() {
302        assert_eq!(GraphParam::Int(500).to_cypher_literal(), "500");
303    }
304
305    #[test]
306    fn should_render_float_param_bare() {
307        assert_eq!(GraphParam::Float(0.85).to_cypher_literal(), "0.85");
308    }
309
310    #[test]
311    fn should_escape_embedded_single_quote_in_string_param() {
312        assert_eq!(GraphParam::Str("O'Brien".to_string()).to_cypher_literal(), r"'O\'Brien'");
313    }
314
315    #[test]
316    fn should_escape_backslash_before_quote_in_string_param() {
317        assert_eq!(GraphParam::Str(r"a\b".to_string()).to_cypher_literal(), r"'a\\b'");
318    }
319
320    #[test]
321    fn should_not_let_injection_break_out_of_string_literal() {
322        let injection = r#"x"}) DETACH DELETE n //"#;
323        let rendered = GraphParam::Str(injection.to_string()).to_cypher_literal();
324        assert!(rendered.starts_with('\''));
325        assert!(rendered.ends_with('\''));
326    }
327}