memoir_core/graph/mod.rs
1//! Property-graph abstraction for entity/relationship storage.
2//!
3//! Defines [`GraphStore`], memoir's optional secondary index for the knowledge
4//! graph derived from extracted memories (epic 0012). Two implementations ship:
5//! [`InMemoryGraphStore`] (always available — the test/benchmark boundary) and
6//! [`FalkorGraphStore`] (behind the `knowledge-graph` feature — production,
7//! backed by FalkorDB).
8//!
9//! This trait is deliberately thin: it covers *connectivity* (ensuring the
10//! named graph is reachable) and a raw Cypher [`GraphStore::query`] escape
11//! hatch. Typed upsert and traversal methods are layered on top of it.
12//!
13//! Like [`crate::vector::VectorIndex`], the graph is a *derived* index: Postgres
14//! remains the source of truth, and the graph can be rebuilt from the episodic
15//! memories. Absence of a graph store is a first-class, non-degraded state —
16//! recall simply returns vector hits with no graph enrichment.
17
18mod commit;
19mod cosine;
20mod edge;
21mod enrich;
22mod error;
23mod extraction;
24mod forget;
25mod inspect;
26mod memory;
27mod resolve;
28mod synthesis;
29
30pub use commit::{CommitContext, CommitError};
31pub use enrich::{
32 DEFAULT_ENRICHMENT_DEPTH, GraphContext, GraphEntity, GraphRelationship, MAX_ENRICHMENT_DEPTH,
33};
34pub use edge::{
35 CardinalityPolicy, Edge, EdgeCatalog, EdgeError, EdgeResolution, EdgeResolver, ExistingEdge, NaiveAppendResolver,
36 RelationCardinality, TemporalEdgeResolver,
37};
38pub use error::GraphError;
39pub use extraction::{
40 DEFAULT_TRIPLE_PROMPT, LlmExtractor, TRIPLE_REPLY_MAX_CHARS, Triple, TripleExtractor, TripleSet,
41};
42pub use inspect::{
43 DEFAULT_INSPECTION_LIMIT, GraphEdge, GraphNode, GraphSnapshot, MAX_INSPECTION_LIMIT,
44};
45pub use memory::InMemoryGraphStore;
46pub use resolve::{
47 EmbeddingEntityResolver, EntityCatalog, EntityResolver, EntityVector, ExactStringResolver, InMemoryEntityCatalog,
48 Resolution, ResolveError, MIN_ENTITY_SIMILARITY,
49};
50pub use synthesis::{
51 EmbeddingSynthesizer, MIN_CORROBORATION_SIMILARITY, PassthroughSynthesizer, SemanticFact, SynthesisError,
52 Synthesizer,
53};
54
55#[cfg(feature = "knowledge-graph")]
56mod falkor;
57
58#[cfg(feature = "knowledge-graph")]
59mod falkor_catalog;
60
61#[cfg(feature = "knowledge-graph")]
62mod staging;
63
64#[cfg(feature = "knowledge-graph")]
65pub use falkor::FalkorGraphStore;
66
67#[cfg(feature = "knowledge-graph")]
68pub use falkor_catalog::{FalkorEdgeCatalog, FalkorEntityCatalog};
69
70#[cfg(feature = "knowledge-graph")]
71pub use staging::TripleStaging;
72
73use std::collections::HashMap;
74use std::future::Future;
75
76/// Default graph name memoir writes to within a shared FalkorDB instance.
77///
78/// FalkorDB hosts many named graphs in one process; memoir confines its writes
79/// to this graph so it coexists with a host application's own graphs. Override
80/// per deployment so two memoir instances never collide on one engine.
81pub const DEFAULT_GRAPH_NAME: &str = "memoir";
82
83/// One row of a Cypher result, mapping each returned column to a scalar value.
84///
85/// Scalars are rendered to `String` so the public surface never leaks a
86/// backend-specific value type. Columns preserve the order of the `RETURN`
87/// clause.
88pub type GraphRow = Vec<(String, String)>;
89
90/// The rows produced by a Cypher [`GraphStore::query`], in result order.
91pub type GraphRows = Vec<GraphRow>;
92
93/// A typed value bound to a Cypher query parameter.
94///
95/// Carries the value *and* its kind so the backend can render each parameter as
96/// the correct Cypher literal: strings are quoted and escaped, numbers stay
97/// bare. The kind cannot be recovered from a plain string once erased — a
98/// `LIMIT` needs the bare integer `500`, while a name needs the quoted literal
99/// `'Alice'` — so callers state the kind at the bind site rather than letting
100/// the backend guess.
101///
102/// FalkorDB's parameter mechanism textually substitutes `CYPHER key=value` into
103/// the query, so the rendered form must be a valid Cypher literal; this enum is
104/// what makes that rendering type-directed instead of a fragile heuristic.
105#[derive(Clone, Debug, PartialEq)]
106pub enum GraphParam {
107 /// A string value, rendered as a quoted, escaped Cypher string literal.
108 Str(String),
109 /// An integer value, rendered bare (e.g. for `LIMIT`).
110 Int(i64),
111 /// A floating-point value, rendered bare.
112 Float(f64),
113}
114
115impl GraphParam {
116 /// Renders the value as a Cypher literal safe to substitute into a query.
117 ///
118 /// [`GraphParam::Str`] is wrapped in single quotes with embedded backslashes
119 /// and single quotes escaped, so a value drawn from user content (an entity
120 /// name, a memory id) cannot break out of the literal. Numeric variants
121 /// render to their bare textual form.
122 #[must_use]
123 pub fn to_cypher_literal(&self) -> String {
124 match self {
125 Self::Str(s) => format!("'{}'", s.replace('\\', "\\\\").replace('\'', "\\'")),
126 Self::Int(n) => n.to_string(),
127 Self::Float(f) => f.to_string(),
128 }
129 }
130}
131
132impl From<String> for GraphParam {
133 fn from(value: String) -> Self {
134 Self::Str(value)
135 }
136}
137
138impl From<&str> for GraphParam {
139 fn from(value: &str) -> Self {
140 Self::Str(value.to_string())
141 }
142}
143
144/// Stores and queries an entity/relationship property graph.
145///
146/// Implementations own the graph-backend connection and confine their writes to
147/// a single named graph (see [`DEFAULT_GRAPH_NAME`]). The trait methods are
148/// async and `Send`-bound so callers can drive them from any tokio runtime,
149/// mirroring [`crate::vector::VectorIndex`].
150pub trait GraphStore: Send + Sync + 'static {
151 /// Ensures the configured named graph is reachable.
152 ///
153 /// Idempotent: callers invoke this on startup to fail fast when the backend
154 /// is unreachable or misconfigured, rather than on first write. FalkorDB
155 /// creates a graph lazily on first write, so this is a connectivity probe,
156 /// not a schema-creation step.
157 ///
158 /// # Errors
159 ///
160 /// Returns [`GraphError::Connection`] if the backend is unreachable.
161 fn ensure_graph(&self) -> impl Future<Output = Result<(), GraphError>> + Send;
162
163 /// Runs a parameterized Cypher query against the graph, returning its rows.
164 ///
165 /// The raw escape hatch the write-path and read-path build their operations
166 /// on. `params` binds query parameters by name, referenced as `$name` in the
167 /// `cypher` body — the injection-safe way to embed values drawn from user
168 /// content (entity names, memory ids). Each [`GraphParam`] renders to a
169 /// correctly-quoted Cypher literal ([`GraphParam::to_cypher_literal`]), so a
170 /// value cannot break out of its literal regardless of backend parameter
171 /// mechanics. Relationship *types* and labels cannot be parameterized by
172 /// Cypher and must be sanitized by the caller. Pass an empty map for a query
173 /// with no parameters. Scalar result values are rendered to `String`;
174 /// node/edge/path projections are out of scope until a consumer needs them.
175 ///
176 /// # Errors
177 ///
178 /// Returns [`GraphError::Query`] when the backend rejects or fails the
179 /// query, and [`GraphError::Connection`] when the backend is unreachable.
180 fn query(
181 &self,
182 cypher: &str,
183 params: &HashMap<String, GraphParam>,
184 ) -> impl Future<Output = Result<GraphRows, GraphError>> + Send;
185
186 /// Removes each forgotten pid from the graph, reference-counted.
187 ///
188 /// For each pid: strips it from every edge's and node's `memory_pids`,
189 /// deletes edges whose array empties, then deletes nodes whose array empties
190 /// *and* that have no surviving edges (a node still joined by an other-pid
191 /// edge is kept). Edges are processed before nodes so a node is never deleted
192 /// out from under a surviving edge. A pid is a globally-unique memory id, so
193 /// matching needs no scope guard; the pid binds as a parameter. Idempotent —
194 /// re-forgetting an absent pid changes nothing.
195 ///
196 /// # Errors
197 ///
198 /// Returns [`GraphError`] if the backend rejects a statement.
199 fn forget_pids(&self, pids: &[&str]) -> impl Future<Output = Result<(), GraphError>> + Send {
200 forget::forget_pids(self, pids)
201 }
202
203 /// Deletes every node and edge in `scope` — a whole-tenant forget.
204 ///
205 /// The entire scoped subgraph is removed regardless of `memory_pids`, so
206 /// this needs no pid list. `DETACH DELETE` removes each node together with
207 /// its edges.
208 ///
209 /// # Errors
210 ///
211 /// Returns [`GraphError`] if the backend rejects the statement.
212 fn forget_scope(&self, scope: &crate::memory::Scope) -> impl Future<Output = Result<(), GraphError>> + Send {
213 forget::forget_scope(self, scope)
214 }
215
216 /// Commits a source's resolved triples to the graph, returning the count.
217 ///
218 /// Resolves each triple's entities ([`EntityResolver`]) and edge
219 /// ([`EdgeResolver`]), embeds new nodes ([`EmbeddingModel`]), then `MERGE`s
220 /// the nodes and the (possibly supersession-closing) edge — tagging every
221 /// element with the source's pid and scope from `ctx`. Writes are idempotent,
222 /// so retrying a partially-failed batch does not double-write. Triples whose
223 /// subject and object resolve to the same node are skipped.
224 ///
225 /// # Errors
226 ///
227 /// Returns [`CommitError`] on the first resolution or write failure.
228 fn commit_triples<EM, ER, EdgeR>(
229 &self,
230 embedder: &EM,
231 entities: &ER,
232 edges: &EdgeR,
233 ctx: &CommitContext,
234 triples: &TripleSet,
235 ) -> impl Future<Output = Result<usize, CommitError>> + Send
236 where
237 EM: crate::embedding::EmbeddingModel,
238 ER: EntityResolver,
239 EdgeR: EdgeResolver,
240 {
241 commit::commit_triples(self, embedder, entities, edges, ctx, triples)
242 }
243
244 /// Returns the graph neighborhood around a set of seed memories.
245 ///
246 /// Seeds from the entities whose `memory_pids` contains any of `seed_pids`,
247 /// then walks current edges (`valid_to = null`) out to `depth` hops
248 /// (clamped to [`MAX_ENRICHMENT_DEPTH`]), scope-confined. Returns a flat,
249 /// deduplicated [`GraphContext`]; an empty `seed_pids` yields an empty
250 /// context with no query. The read-path enrichment behind `.with_graph()`.
251 ///
252 /// # Errors
253 ///
254 /// Returns [`GraphError`] if the backend rejects the traversal.
255 fn neighbors(
256 &self,
257 seed_pids: &[&str],
258 scope: &crate::memory::Scope,
259 depth: usize,
260 ) -> impl Future<Output = Result<GraphContext, GraphError>> + Send {
261 enrich::neighbors(self, seed_pids, scope, depth)
262 }
263
264 /// Returns a whole-scope snapshot of the graph for admin inspection.
265 ///
266 /// Reads every entity and relationship matching the *partial* scope — any of
267 /// `agent_id` / `org_id` / `user_id` may be `None`, and an absent dimension
268 /// imposes no filter, so a fully-`None` scope dumps the whole graph. This is
269 /// the one cross-scope read in memoir (an admin views across agents/users/
270 /// orgs); the caller's auth layer gates it. Nodes and edges are each capped
271 /// at `limit` (clamped to [`MAX_INSPECTION_LIMIT`]); the snapshot's
272 /// `truncated` flag marks when a cap was hit. Both current and superseded
273 /// edges are returned, each flagged by `valid_to`, for a temporal view —
274 /// unlike [`neighbors`](Self::neighbors), which reads current edges only.
275 /// Scope values bind as parameters, never interpolated.
276 ///
277 /// # Errors
278 ///
279 /// Returns [`GraphError`] if the backend rejects either read.
280 fn inspect_scope(
281 &self,
282 agent_id: Option<&str>,
283 org_id: Option<&str>,
284 user_id: Option<&str>,
285 limit: usize,
286 ) -> impl Future<Output = Result<GraphSnapshot, GraphError>> + Send {
287 inspect::inspect_scope(self, agent_id, org_id, user_id, limit)
288 }
289}
290
291#[cfg(test)]
292mod tests {
293 use super::GraphParam;
294
295 #[test]
296 fn should_quote_string_param_as_cypher_literal() {
297 assert_eq!(GraphParam::Str("Alice".to_string()).to_cypher_literal(), "'Alice'");
298 }
299
300 #[test]
301 fn should_render_int_param_bare() {
302 assert_eq!(GraphParam::Int(500).to_cypher_literal(), "500");
303 }
304
305 #[test]
306 fn should_render_float_param_bare() {
307 assert_eq!(GraphParam::Float(0.85).to_cypher_literal(), "0.85");
308 }
309
310 #[test]
311 fn should_escape_embedded_single_quote_in_string_param() {
312 assert_eq!(GraphParam::Str("O'Brien".to_string()).to_cypher_literal(), r"'O\'Brien'");
313 }
314
315 #[test]
316 fn should_escape_backslash_before_quote_in_string_param() {
317 assert_eq!(GraphParam::Str(r"a\b".to_string()).to_cypher_literal(), r"'a\\b'");
318 }
319
320 #[test]
321 fn should_not_let_injection_break_out_of_string_literal() {
322 let injection = r#"x"}) DETACH DELETE n //"#;
323 let rendered = GraphParam::Str(injection.to_string()).to_cypher_literal();
324 assert!(rendered.starts_with('\''));
325 assert!(rendered.ends_with('\''));
326 }
327}