Skip to main content

zeph_memory/
vector_store.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Abstract vector-store trait and associated types.
5//!
6//! The [`VectorStore`] trait decouples the rest of `zeph-memory` from any specific
7//! vector database. Two implementations ship in this crate:
8//!
9//! - [`crate::qdrant_ops::QdrantOps`] / [`crate::embedding_store::EmbeddingStore`] —
10//!   production Qdrant-backed store.
11//! - [`crate::db_vector_store::DbVectorStore`] — `SQLite` BLOB store for testing and offline use.
12//! - [`crate::in_memory_store::InMemoryVectorStore`] — purely in-memory store for unit tests.
13
14use std::collections::HashMap;
15use std::future::Future;
16use std::pin::Pin;
17
18/// Error type for [`VectorStore`] operations.
19#[derive(Debug, thiserror::Error)]
20pub enum VectorStoreError {
21    #[error("connection error: {0}")]
22    Connection(String),
23    #[error("collection error: {0}")]
24    Collection(String),
25    #[error("upsert error: {0}")]
26    Upsert(String),
27    #[error("search error: {0}")]
28    Search(String),
29    #[error("delete error: {0}")]
30    Delete(String),
31    #[error("scroll error: {0}")]
32    Scroll(String),
33    #[error("serialization error: {0}")]
34    Serialization(String),
35}
36
37/// A vector point to be stored in or retrieved from a [`VectorStore`].
38#[derive(Debug, Clone)]
39pub struct VectorPoint {
40    /// Unique string identifier for the point (e.g. a UUID).
41    pub id: String,
42    /// Dense embedding vector.
43    pub vector: Vec<f32>,
44    /// Arbitrary JSON metadata stored alongside the vector.
45    pub payload: HashMap<String, serde_json::Value>,
46}
47
48/// Filter applied to [`VectorStore::search`] and [`VectorStore::scroll_all`].
49///
50/// All `must` conditions are `ANDed`; all `must_not` conditions are `ANDed`.
51#[derive(Debug, Clone, Default)]
52pub struct VectorFilter {
53    /// All of these conditions must match.
54    pub must: Vec<FieldCondition>,
55    /// None of these conditions must match.
56    pub must_not: Vec<FieldCondition>,
57}
58
59/// A single payload field condition in a [`VectorFilter`].
60#[derive(Debug, Clone)]
61pub struct FieldCondition {
62    /// Payload field name.
63    pub field: String,
64    /// Expected value for the field.
65    pub value: FieldValue,
66}
67
68/// Value type in a [`FieldCondition`].
69#[derive(Debug, Clone)]
70pub enum FieldValue {
71    /// Exact integer match.
72    Integer(i64),
73    /// Exact string match.
74    Text(String),
75}
76
77/// A vector point returned by [`VectorStore::search`] with an attached similarity score.
78#[derive(Debug, Clone)]
79pub struct ScoredVectorPoint {
80    /// Point identifier (matches [`VectorPoint::id`]).
81    pub id: String,
82    /// Cosine similarity score in `[0, 1]`.
83    pub score: f32,
84    /// Payload stored alongside the vector.
85    pub payload: HashMap<String, serde_json::Value>,
86}
87
88/// Shared return type alias for all [`VectorStore`] trait methods.
89///
90/// Intentionally `pub(crate)` — all [`VectorStore`] implementations are internal to this crate.
91/// If the trait is ever made externally extensible, this alias should become `pub`.
92pub(crate) type BoxFuture<'a, T> = Pin<Box<dyn Future<Output = T> + Send + 'a>>;
93
94/// Result of [`VectorStore::scroll_all`]: maps point ID → key → value payload strings.
95pub type ScrollResult = HashMap<String, HashMap<String, String>>;
96
97/// Abstraction over a vector database backend.
98///
99/// Implementations must be `Send + Sync` so they can be wrapped in `Arc` and shared
100/// across async tasks. All methods return boxed futures via `BoxFuture` to remain
101/// object-safe.
102///
103/// # Implementations
104///
105/// | Type | Notes |
106/// |------|-------|
107/// | [`crate::embedding_store::EmbeddingStore`] | Qdrant-backed; production default. |
108/// | [`crate::db_vector_store::DbVectorStore`] | SQLite BLOB; offline / CI use. |
109/// | [`crate::in_memory_store::InMemoryVectorStore`] | Fully in-process; unit tests. |
110pub trait VectorStore: Send + Sync {
111    /// Create a collection with cosine-distance vectors of `vector_size` dimensions.
112    ///
113    /// Idempotent — no error if the collection already exists with the same dimension.
114    fn ensure_collection(
115        &self,
116        collection: &str,
117        vector_size: u64,
118    ) -> BoxFuture<'_, Result<(), VectorStoreError>>;
119
120    /// Returns `true` if `collection` exists in the backend.
121    fn collection_exists(&self, collection: &str) -> BoxFuture<'_, Result<bool, VectorStoreError>>;
122
123    /// Delete a collection and all its points.
124    fn delete_collection(&self, collection: &str) -> BoxFuture<'_, Result<(), VectorStoreError>>;
125
126    /// Upsert `points` into `collection`.
127    ///
128    /// Points with existing IDs are overwritten; new IDs are inserted.
129    fn upsert(
130        &self,
131        collection: &str,
132        points: Vec<VectorPoint>,
133    ) -> BoxFuture<'_, Result<(), VectorStoreError>>;
134
135    /// Search `collection` for the `limit` nearest neighbours of `vector`.
136    ///
137    /// Returns results in descending similarity order.  An optional [`VectorFilter`]
138    /// restricts the search space to points matching the payload conditions.
139    fn search(
140        &self,
141        collection: &str,
142        vector: Vec<f32>,
143        limit: u64,
144        filter: Option<VectorFilter>,
145    ) -> BoxFuture<'_, Result<Vec<ScoredVectorPoint>, VectorStoreError>>;
146
147    /// Delete specific points from `collection` by their string IDs.
148    fn delete_by_ids(
149        &self,
150        collection: &str,
151        ids: Vec<String>,
152    ) -> BoxFuture<'_, Result<(), VectorStoreError>>;
153
154    /// Scroll (paginate) all points in `collection` and return a map of
155    /// `point_id → { key_field → value }` payload entries.
156    fn scroll_all(
157        &self,
158        collection: &str,
159        key_field: &str,
160    ) -> BoxFuture<'_, Result<ScrollResult, VectorStoreError>>;
161
162    /// Return `true` if the backend is reachable and operational.
163    fn health_check(&self) -> BoxFuture<'_, Result<bool, VectorStoreError>>;
164
165    /// Create keyword payload indexes for the given field names.
166    ///
167    /// Default implementation is a no-op (for non-Qdrant backends).
168    fn create_keyword_indexes(
169        &self,
170        _collection: &str,
171        _fields: &[&str],
172    ) -> BoxFuture<'_, Result<(), VectorStoreError>> {
173        Box::pin(async { Ok(()) })
174    }
175}