Skip to main content

zeph_memory/
vector_store.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Abstract vector-store trait and associated types.
5//!
6//! The [`VectorStore`] trait decouples the rest of `zeph-memory` from any specific
7//! vector database. Two implementations ship in this crate:
8//!
9//! - [`crate::qdrant_ops::QdrantOps`] / [`crate::embedding_store::EmbeddingStore`] —
10//!   production Qdrant-backed store.
11//! - [`crate::db_vector_store::DbVectorStore`] — `SQLite` BLOB store for testing and offline use.
12//! - [`crate::in_memory_store::InMemoryVectorStore`] — purely in-memory store for unit tests.
13
14use std::collections::HashMap;
15use std::future::Future;
16use std::pin::Pin;
17
18/// Error type for [`VectorStore`] operations.
19#[derive(Debug, thiserror::Error)]
20#[non_exhaustive]
21pub enum VectorStoreError {
22    #[error("connection error: {0}")]
23    Connection(String),
24    #[error("collection error: {0}")]
25    Collection(String),
26    #[error("upsert error: {0}")]
27    Upsert(String),
28    #[error("search error: {0}")]
29    Search(String),
30    #[error("delete error: {0}")]
31    Delete(String),
32    #[error("scroll error: {0}")]
33    Scroll(String),
34    #[error("serialization error: {0}")]
35    Serialization(String),
36    /// Operation is not supported by this backend (e.g. `get_points` on `DbVectorStore`).
37    #[error("operation unsupported: {0}")]
38    Unsupported(String),
39}
40
41/// A vector point to be stored in or retrieved from a [`VectorStore`].
42#[derive(Debug, Clone)]
43pub struct VectorPoint {
44    /// Unique string identifier for the point (e.g. a UUID).
45    pub id: String,
46    /// Dense embedding vector.
47    pub vector: Vec<f32>,
48    /// Arbitrary JSON metadata stored alongside the vector.
49    pub payload: HashMap<String, serde_json::Value>,
50}
51
52/// Filter applied to [`VectorStore::search`] and [`VectorStore::scroll_all`].
53///
54/// All `must` conditions are `ANDed`; all `must_not` conditions are `ANDed`.
55#[derive(Debug, Clone, Default)]
56pub struct VectorFilter {
57    /// All of these conditions must match.
58    pub must: Vec<FieldCondition>,
59    /// None of these conditions must match.
60    pub must_not: Vec<FieldCondition>,
61}
62
63/// A single payload field condition in a [`VectorFilter`].
64#[derive(Debug, Clone)]
65pub struct FieldCondition {
66    /// Payload field name.
67    pub field: String,
68    /// Expected value for the field.
69    pub value: FieldValue,
70}
71
72/// Value type in a [`FieldCondition`].
73#[derive(Debug, Clone)]
74#[non_exhaustive]
75pub enum FieldValue {
76    /// Exact integer match.
77    Integer(i64),
78    /// Exact string match.
79    Text(String),
80}
81
82/// A vector point returned by [`VectorStore::search`] with an attached similarity score.
83#[derive(Debug, Clone)]
84pub struct ScoredVectorPoint {
85    /// Point identifier (matches [`VectorPoint::id`]).
86    pub id: String,
87    /// Cosine similarity score in `[0, 1]`.
88    pub score: f32,
89    /// Payload stored alongside the vector.
90    pub payload: HashMap<String, serde_json::Value>,
91}
92
93/// Shared return type alias for all [`VectorStore`] trait methods.
94///
95/// Intentionally `pub(crate)` — all [`VectorStore`] implementations are internal to this crate.
96/// If the trait is ever made externally extensible, this alias should become `pub`.
97pub(crate) type BoxFuture<'a, T> = Pin<Box<dyn Future<Output = T> + Send + 'a>>;
98
99/// Result of [`VectorStore::scroll_all`]: maps point ID → key → value payload strings.
100pub type ScrollResult = HashMap<String, HashMap<String, String>>;
101
102/// Result of [`VectorStore::scroll_all_with_point_ids`]: a list of `(point_id, string_fields)` pairs.
103///
104/// Only points whose payload contains `key_field` as a `StringValue` are included.
105pub type ScrollWithIdsResult = Vec<(String, HashMap<String, String>)>;
106
107/// Abstraction over a vector database backend.
108///
109/// Implementations must be `Send + Sync` so they can be wrapped in `Arc` and shared
110/// across async tasks. All methods return boxed futures via `BoxFuture` to remain
111/// object-safe.
112///
113/// # Implementations
114///
115/// | Type | Notes |
116/// |------|-------|
117/// | [`crate::embedding_store::EmbeddingStore`] | Qdrant-backed; production default. |
118/// | [`crate::db_vector_store::DbVectorStore`] | SQLite BLOB; offline / CI use. |
119/// | [`crate::in_memory_store::InMemoryVectorStore`] | Fully in-process; unit tests. |
120pub trait VectorStore: Send + Sync {
121    /// Create a collection with cosine-distance vectors of `vector_size` dimensions.
122    ///
123    /// Idempotent — no error if the collection already exists with the same dimension.
124    fn ensure_collection(
125        &self,
126        collection: &str,
127        vector_size: u64,
128    ) -> BoxFuture<'_, Result<(), VectorStoreError>>;
129
130    /// Returns `true` if `collection` exists in the backend.
131    fn collection_exists(&self, collection: &str) -> BoxFuture<'_, Result<bool, VectorStoreError>>;
132
133    /// Delete a collection and all its points.
134    fn delete_collection(&self, collection: &str) -> BoxFuture<'_, Result<(), VectorStoreError>>;
135
136    /// Upsert `points` into `collection`.
137    ///
138    /// Points with existing IDs are overwritten; new IDs are inserted.
139    fn upsert(
140        &self,
141        collection: &str,
142        points: Vec<VectorPoint>,
143    ) -> BoxFuture<'_, Result<(), VectorStoreError>>;
144
145    /// Search `collection` for the `limit` nearest neighbours of `vector`.
146    ///
147    /// Returns results in descending similarity order.  An optional [`VectorFilter`]
148    /// restricts the search space to points matching the payload conditions.
149    fn search(
150        &self,
151        collection: &str,
152        vector: Vec<f32>,
153        limit: u64,
154        filter: Option<VectorFilter>,
155    ) -> BoxFuture<'_, Result<Vec<ScoredVectorPoint>, VectorStoreError>>;
156
157    /// Delete specific points from `collection` by their string IDs.
158    fn delete_by_ids(
159        &self,
160        collection: &str,
161        ids: Vec<String>,
162    ) -> BoxFuture<'_, Result<(), VectorStoreError>>;
163
164    /// Scroll (paginate) all points in `collection` and return a map of
165    /// `point_id → { key_field → value }` payload entries.
166    fn scroll_all(
167        &self,
168        collection: &str,
169        key_field: &str,
170    ) -> BoxFuture<'_, Result<ScrollResult, VectorStoreError>>;
171
172    /// Scroll all points in `collection`, returning `(point_id, string_payload_fields)` pairs.
173    ///
174    /// Only points whose payload contains `key_field` as a string value are included.
175    /// Unlike [`Self::scroll_all`], the Qdrant point ID is preserved as the first tuple element
176    /// rather than being used as the map key — this is required when consumers need to delete
177    /// points by their IDs (e.g. stale-embedding cleanup).
178    ///
179    /// # Errors
180    ///
181    /// Returns an error if the underlying scroll operation fails.
182    fn scroll_all_with_point_ids(
183        &self,
184        collection: &str,
185        key_field: &str,
186    ) -> BoxFuture<'_, Result<ScrollWithIdsResult, VectorStoreError>>;
187
188    /// Return `true` if the backend is reachable and operational.
189    fn health_check(&self) -> BoxFuture<'_, Result<bool, VectorStoreError>>;
190
191    /// Create keyword payload indexes for the given field names.
192    ///
193    /// Default implementation is a no-op (for non-Qdrant backends).
194    fn create_keyword_indexes(
195        &self,
196        _collection: &str,
197        _fields: &[&str],
198    ) -> BoxFuture<'_, Result<(), VectorStoreError>> {
199        Box::pin(async { Ok(()) })
200    }
201
202    /// Batched vector + payload retrieval by point IDs.
203    ///
204    /// Returns one [`VectorPoint`] per matched id (missing ids are silently dropped).
205    /// Backends that cannot return vectors return `Err(VectorStoreError::Unsupported)`.
206    ///
207    /// # Errors
208    ///
209    /// Returns [`VectorStoreError::Unsupported`] when the backend does not support
210    /// direct point retrieval with vectors (e.g. `DbVectorStore`, `InMemoryVectorStore`
211    /// unless overridden in tests).
212    fn get_points(
213        &self,
214        _collection: &str,
215        _ids: Vec<String>,
216    ) -> BoxFuture<'_, Result<Vec<VectorPoint>, VectorStoreError>> {
217        Box::pin(async {
218            Err(VectorStoreError::Unsupported(
219                "get_points not implemented for this backend".into(),
220            ))
221        })
222    }
223}