ringdb/engine.rs
1use std::time::Instant;
2
3use serde::{Serialize, de::DeserializeOwned};
4
5use crate::backend::{CpuBackend, RingComputeBackend};
6use crate::config::RingDbConfig;
7use crate::error::Result;
8use crate::payload::{PayloadStore, PayloadStoreBuilder};
9use crate::query::{QueryResult, RingQuery};
10
11/// Builder for a ring-query vector database.
12///
13/// Insert vectors (and their associated payloads) with
14/// [`add_vector()`](Self::add_vector), then call [`build()`](Self::build) to
15/// transfer ownership to the compute backend and obtain a [`SealedRingDb`]
16/// that can be queried.
17///
18/// `T` is the payload type stored alongside each vector. Use `T = ()` when
19/// no payload is needed.
20///
21/// # Example — no payload
22///
23/// ```
24/// use ringdb::{RingDb, RingDbConfig, RingQuery};
25///
26/// let config = RingDbConfig::new(4);
27/// let mut db = RingDb::new(config).unwrap();
28///
29/// db.add_vector(&[1.0, 0.0, 0.0, 0.0], ()).unwrap();
30/// db.add_vector(&[0.0, 1.0, 0.0, 0.0], ()).unwrap();
31///
32/// let db = db.build().unwrap();
33/// let result = db.query(&RingQuery { query: &[1.0f32, 0.0, 0.0, 0.0], d: 1.0, lambda: 0.1 }).unwrap();
34/// println!("hits: {:?}", result.ids);
35/// ```
36///
37/// # Example — with payload
38///
39/// ```
40/// use ringdb::{RingDb, RingDbConfig, RingQuery};
41/// use serde::{Serialize, Deserialize};
42///
43/// #[derive(Serialize, Deserialize)]
44/// struct Meta { label: String }
45///
46/// let mut db: RingDb<Meta> = RingDb::new(RingDbConfig::new(2)).unwrap();
47/// db.add_vector(&[1.0, 0.0], Meta { label: "dog".into() }).unwrap();
48/// db.add_vector(&[0.0, 1.0], Meta { label: "cat".into() }).unwrap();
49///
50/// let db = db.build().unwrap();
51/// let result = db.query(&RingQuery { query: &[1.0f32, 0.0], d: 1.0, lambda: 0.1 }).unwrap();
52/// let payloads = db.fetch_payloads(&result.ids).unwrap();
53/// ```
54pub struct RingDb<T = ()> {
55 config: RingDbConfig,
56 backend: Box<dyn RingComputeBackend>,
57 n_vectors: usize,
58
59 /// Staging buffer: f32 vectors, row-major, `n_vectors × dims`.
60 vectors: Vec<f32>,
61
62 /// Staging buffer: per-vector squared L2 norm.
63 norms_sq: Vec<f32>,
64
65 /// Streams payloads to a temp file as they arrive; never accumulates in RAM.
66 payload_builder: PayloadStoreBuilder<T>,
67}
68
69impl<T: Serialize + DeserializeOwned> RingDb<T> {
70 /// Create a new empty `RingDb` with the given configuration.
71 pub fn new(config: RingDbConfig) -> Result<Self> {
72 Ok(Self {
73 config,
74 backend: Box::new(CpuBackend::new()),
75 n_vectors: 0,
76 vectors: Vec::new(),
77 norms_sq: Vec::new(),
78 payload_builder: PayloadStoreBuilder::new()?,
79 })
80 }
81
82 /// Insert a single vector and its associated payload.
83 ///
84 /// Vectors are assigned sequential IDs starting from 0.
85 /// The slice length must equal `dims`.
86 pub fn add_vector(&mut self, vector: &[f32], payload: T) -> Result<()> {
87 let dims = self.config.dims;
88 if vector.len() != dims {
89 return Err(crate::error::RingDbError::DimensionMismatch {
90 expected: dims,
91 got: vector.len(),
92 });
93 }
94
95 let norm_sq: f32 = vector.iter().map(|x| x * x).sum();
96 self.norms_sq.push(norm_sq);
97 self.vectors.extend_from_slice(vector);
98 self.payload_builder.push(payload)?;
99 self.n_vectors += 1;
100 Ok(())
101 }
102
103 /// Transfer ownership of the accumulated data to the compute backend and
104 /// seal the database.
105 ///
106 /// Vector data is moved into the backend (zero-cost for the CPU backend).
107 /// Payloads are serialized and moved into a cold anonymous mmap — the
108 /// staging `Vec<T>` is dropped immediately after.
109 pub fn build(mut self) -> Result<SealedRingDb<T>> {
110 let dims = self.config.dims;
111 let n_vectors = self.n_vectors;
112 self.backend
113 .upload_f32_dataset(dims, self.vectors, self.norms_sq)?;
114 let payload_store = self.payload_builder.finish()?;
115 Ok(SealedRingDb {
116 config: self.config,
117 backend: self.backend,
118 n_vectors,
119 payload_store,
120 })
121 }
122
123 /// Number of vectors currently staged.
124 pub fn len(&self) -> usize {
125 self.n_vectors
126 }
127
128 /// Returns `true` if no vectors have been inserted.
129 pub fn is_empty(&self) -> bool {
130 self.n_vectors == 0
131 }
132
133 /// Number of dimensions per vector.
134 pub fn dims(&self) -> usize {
135 self.config.dims
136 }
137
138 /// Name of the backend currently in use.
139 pub fn backend_name(&self) -> &str {
140 self.backend.name()
141 }
142}
143
144/// Sealed (immutable) ring-query database.
145///
146/// Obtained by calling [`RingDb::build()`]. Vectors can no longer be
147/// inserted — only queries and payload fetches are allowed.
148///
149/// The hot side (vectors + norms) is owned by the compute backend.
150/// The cold side (payloads) lives in an anonymous mmap managed by
151/// [`PayloadStore`].
152pub struct SealedRingDb<T = ()> {
153 config: RingDbConfig,
154 backend: Box<dyn RingComputeBackend>,
155 n_vectors: usize,
156 payload_store: PayloadStore<T>,
157}
158
159impl<T: Serialize + DeserializeOwned> SealedRingDb<T> {
160 /// Execute a ring query and return matching vector IDs.
161 pub fn query(&self, q: &RingQuery<'_>) -> Result<QueryResult> {
162 let dims = self.config.dims;
163 if q.query.len() != dims {
164 return Err(crate::error::RingDbError::DimensionMismatch {
165 expected: dims,
166 got: q.query.len(),
167 });
168 }
169
170 let t = Instant::now();
171 let ids = self.backend.ring_query_f32(dims, q.query, q.d, q.lambda)?;
172 let elapsed = t.elapsed();
173
174 Ok(QueryResult {
175 ids,
176 backend_used: self.backend.name(),
177 elapsed,
178 })
179 }
180
181 /// Fetch the payload for a single vector ID.
182 ///
183 /// Reads and deserializes from the cold mmap. Call this after
184 /// [`query`](Self::query) to retrieve metadata for the matching vectors.
185 pub fn fetch_payload(&self, id: u32) -> Result<T> {
186 self.payload_store.fetch(id)
187 }
188
189 /// Fetch payloads for a slice of vector IDs, in order.
190 pub fn fetch_payloads(&self, ids: &[u32]) -> Result<Vec<T>> {
191 self.payload_store.fetch_many(ids)
192 }
193
194 /// Number of vectors stored.
195 pub fn len(&self) -> usize {
196 self.n_vectors
197 }
198
199 /// Returns `true` if the database contains no vectors.
200 pub fn is_empty(&self) -> bool {
201 self.n_vectors == 0
202 }
203
204 /// Number of dimensions per vector.
205 pub fn dims(&self) -> usize {
206 self.config.dims
207 }
208
209 /// Name of the backend currently in use.
210 pub fn backend_name(&self) -> &str {
211 self.backend.name()
212 }
213}