Skip to main content

zeph_memory/
vector_store.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Abstract vector-store trait and associated types.
5//!
6//! The [`VectorStore`] trait decouples the rest of `zeph-memory` from any specific
7//! vector database. Two implementations ship in this crate:
8//!
9//! - [`crate::qdrant_ops::QdrantOps`] / [`crate::embedding_store::EmbeddingStore`] —
10//!   production Qdrant-backed store.
11//! - [`crate::db_vector_store::DbVectorStore`] — `SQLite` BLOB store for testing and offline use.
12//! - [`crate::in_memory_store::InMemoryVectorStore`] — purely in-memory store for unit tests.
13
14use std::collections::HashMap;
15use std::future::Future;
16use std::pin::Pin;
17
18/// Error type for [`VectorStore`] operations.
19#[derive(Debug, thiserror::Error)]
20pub enum VectorStoreError {
21    #[error("connection error: {0}")]
22    Connection(String),
23    #[error("collection error: {0}")]
24    Collection(String),
25    #[error("upsert error: {0}")]
26    Upsert(String),
27    #[error("search error: {0}")]
28    Search(String),
29    #[error("delete error: {0}")]
30    Delete(String),
31    #[error("scroll error: {0}")]
32    Scroll(String),
33    #[error("serialization error: {0}")]
34    Serialization(String),
35    /// Operation is not supported by this backend (e.g. `get_points` on `DbVectorStore`).
36    #[error("operation unsupported: {0}")]
37    Unsupported(String),
38}
39
40/// A vector point to be stored in or retrieved from a [`VectorStore`].
41#[derive(Debug, Clone)]
42pub struct VectorPoint {
43    /// Unique string identifier for the point (e.g. a UUID).
44    pub id: String,
45    /// Dense embedding vector.
46    pub vector: Vec<f32>,
47    /// Arbitrary JSON metadata stored alongside the vector.
48    pub payload: HashMap<String, serde_json::Value>,
49}
50
51/// Filter applied to [`VectorStore::search`] and [`VectorStore::scroll_all`].
52///
53/// All `must` conditions are `ANDed`; all `must_not` conditions are `ANDed`.
54#[derive(Debug, Clone, Default)]
55pub struct VectorFilter {
56    /// All of these conditions must match.
57    pub must: Vec<FieldCondition>,
58    /// None of these conditions must match.
59    pub must_not: Vec<FieldCondition>,
60}
61
62/// A single payload field condition in a [`VectorFilter`].
63#[derive(Debug, Clone)]
64pub struct FieldCondition {
65    /// Payload field name.
66    pub field: String,
67    /// Expected value for the field.
68    pub value: FieldValue,
69}
70
71/// Value type in a [`FieldCondition`].
72#[derive(Debug, Clone)]
73pub enum FieldValue {
74    /// Exact integer match.
75    Integer(i64),
76    /// Exact string match.
77    Text(String),
78}
79
80/// A vector point returned by [`VectorStore::search`] with an attached similarity score.
81#[derive(Debug, Clone)]
82pub struct ScoredVectorPoint {
83    /// Point identifier (matches [`VectorPoint::id`]).
84    pub id: String,
85    /// Cosine similarity score in `[0, 1]`.
86    pub score: f32,
87    /// Payload stored alongside the vector.
88    pub payload: HashMap<String, serde_json::Value>,
89}
90
91/// Shared return type alias for all [`VectorStore`] trait methods.
92///
93/// Intentionally `pub(crate)` — all [`VectorStore`] implementations are internal to this crate.
94/// If the trait is ever made externally extensible, this alias should become `pub`.
95pub(crate) type BoxFuture<'a, T> = Pin<Box<dyn Future<Output = T> + Send + 'a>>;
96
97/// Result of [`VectorStore::scroll_all`]: maps point ID → key → value payload strings.
98pub type ScrollResult = HashMap<String, HashMap<String, String>>;
99
100/// Result of [`VectorStore::scroll_all_with_point_ids`]: a list of `(point_id, string_fields)` pairs.
101///
102/// Only points whose payload contains `key_field` as a `StringValue` are included.
103pub type ScrollWithIdsResult = Vec<(String, HashMap<String, String>)>;
104
105/// Abstraction over a vector database backend.
106///
107/// Implementations must be `Send + Sync` so they can be wrapped in `Arc` and shared
108/// across async tasks. All methods return boxed futures via `BoxFuture` to remain
109/// object-safe.
110///
111/// # Implementations
112///
113/// | Type | Notes |
114/// |------|-------|
115/// | [`crate::embedding_store::EmbeddingStore`] | Qdrant-backed; production default. |
116/// | [`crate::db_vector_store::DbVectorStore`] | SQLite BLOB; offline / CI use. |
117/// | [`crate::in_memory_store::InMemoryVectorStore`] | Fully in-process; unit tests. |
118pub trait VectorStore: Send + Sync {
119    /// Create a collection with cosine-distance vectors of `vector_size` dimensions.
120    ///
121    /// Idempotent — no error if the collection already exists with the same dimension.
122    fn ensure_collection(
123        &self,
124        collection: &str,
125        vector_size: u64,
126    ) -> BoxFuture<'_, Result<(), VectorStoreError>>;
127
128    /// Returns `true` if `collection` exists in the backend.
129    fn collection_exists(&self, collection: &str) -> BoxFuture<'_, Result<bool, VectorStoreError>>;
130
131    /// Delete a collection and all its points.
132    fn delete_collection(&self, collection: &str) -> BoxFuture<'_, Result<(), VectorStoreError>>;
133
134    /// Upsert `points` into `collection`.
135    ///
136    /// Points with existing IDs are overwritten; new IDs are inserted.
137    fn upsert(
138        &self,
139        collection: &str,
140        points: Vec<VectorPoint>,
141    ) -> BoxFuture<'_, Result<(), VectorStoreError>>;
142
143    /// Search `collection` for the `limit` nearest neighbours of `vector`.
144    ///
145    /// Returns results in descending similarity order.  An optional [`VectorFilter`]
146    /// restricts the search space to points matching the payload conditions.
147    fn search(
148        &self,
149        collection: &str,
150        vector: Vec<f32>,
151        limit: u64,
152        filter: Option<VectorFilter>,
153    ) -> BoxFuture<'_, Result<Vec<ScoredVectorPoint>, VectorStoreError>>;
154
155    /// Delete specific points from `collection` by their string IDs.
156    fn delete_by_ids(
157        &self,
158        collection: &str,
159        ids: Vec<String>,
160    ) -> BoxFuture<'_, Result<(), VectorStoreError>>;
161
162    /// Scroll (paginate) all points in `collection` and return a map of
163    /// `point_id → { key_field → value }` payload entries.
164    fn scroll_all(
165        &self,
166        collection: &str,
167        key_field: &str,
168    ) -> BoxFuture<'_, Result<ScrollResult, VectorStoreError>>;
169
170    /// Scroll all points in `collection`, returning `(point_id, string_payload_fields)` pairs.
171    ///
172    /// Only points whose payload contains `key_field` as a string value are included.
173    /// Unlike [`Self::scroll_all`], the Qdrant point ID is preserved as the first tuple element
174    /// rather than being used as the map key — this is required when consumers need to delete
175    /// points by their IDs (e.g. stale-embedding cleanup).
176    ///
177    /// # Errors
178    ///
179    /// Returns an error if the underlying scroll operation fails.
180    fn scroll_all_with_point_ids(
181        &self,
182        collection: &str,
183        key_field: &str,
184    ) -> BoxFuture<'_, Result<ScrollWithIdsResult, VectorStoreError>>;
185
186    /// Return `true` if the backend is reachable and operational.
187    fn health_check(&self) -> BoxFuture<'_, Result<bool, VectorStoreError>>;
188
189    /// Create keyword payload indexes for the given field names.
190    ///
191    /// Default implementation is a no-op (for non-Qdrant backends).
192    fn create_keyword_indexes(
193        &self,
194        _collection: &str,
195        _fields: &[&str],
196    ) -> BoxFuture<'_, Result<(), VectorStoreError>> {
197        Box::pin(async { Ok(()) })
198    }
199
200    /// Batched vector + payload retrieval by point IDs.
201    ///
202    /// Returns one [`VectorPoint`] per matched id (missing ids are silently dropped).
203    /// Backends that cannot return vectors return `Err(VectorStoreError::Unsupported)`.
204    ///
205    /// # Errors
206    ///
207    /// Returns [`VectorStoreError::Unsupported`] when the backend does not support
208    /// direct point retrieval with vectors (e.g. `DbVectorStore`, `InMemoryVectorStore`
209    /// unless overridden in tests).
210    fn get_points(
211        &self,
212        _collection: &str,
213        _ids: Vec<String>,
214    ) -> BoxFuture<'_, Result<Vec<VectorPoint>, VectorStoreError>> {
215        Box::pin(async {
216            Err(VectorStoreError::Unsupported(
217                "get_points not implemented for this backend".into(),
218            ))
219        })
220    }
221}