1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
//! Read-only accessors for `VectorCollection` metadata and state.
use crate::collection::types::CollectionConfig;
use crate::distance::DistanceMetric;
use crate::quantization::StorageMode;
use super::VectorCollection;
impl VectorCollection {
/// Returns a reference to the collection's guard rails.
#[must_use]
pub fn guard_rails(&self) -> &std::sync::Arc<crate::guardrails::GuardRails> {
self.inner.guard_rails()
}
/// Returns the collection name.
#[must_use]
pub fn name(&self) -> String {
self.inner.config().name
}
/// Returns the vector dimension.
#[must_use]
pub fn dimension(&self) -> usize {
self.inner.config().dimension
}
/// Returns the distance metric.
#[must_use]
pub fn metric(&self) -> DistanceMetric {
self.inner.config().metric
}
/// Returns the storage mode.
#[must_use]
pub fn storage_mode(&self) -> StorageMode {
self.inner.config().storage_mode
}
/// Returns the number of points in the collection.
#[must_use]
pub fn len(&self) -> usize {
self.inner.len()
}
/// Returns `true` if the collection is empty.
#[must_use]
pub fn is_empty(&self) -> bool {
self.inner.is_empty()
}
/// Returns all point IDs.
#[must_use]
pub fn all_ids(&self) -> Vec<u64> {
self.inner.all_ids()
}
/// Returns the next batch of points for scroll iteration.
///
/// Delegates to the inner collection's scroll implementation; see also
/// the parallel `scroll_batch` on [`crate::GraphCollection`] and
/// [`crate::MetadataCollection`].
///
/// # Errors
///
/// Returns an error if `batch_size` is 0.
pub fn scroll_batch(
&self,
cursor: Option<u64>,
batch_size: usize,
filter: Option<&crate::filter::Filter>,
) -> crate::error::Result<crate::collection::ScrollBatch> {
self.inner.scroll_batch(cursor, batch_size, filter)
}
/// Returns the current collection config.
#[must_use]
pub fn config(&self) -> CollectionConfig {
self.inner.config()
}
/// Rebuilds the HNSW index of this collection from the vector
/// storage, reclaiming memory occupied by tombstoned entries.
/// Returns the number of entries compacted.
///
/// Used by the server admin endpoint
/// `POST /collections/{name}/index/rebuild` (finding F-21).
///
/// # Errors
///
/// Returns an error if the vacuum fails (for instance, when
/// vector storage is disabled on the HNSW index).
pub fn rebuild_index(&self) -> crate::error::Result<usize> {
self.inner.vacuum_hnsw_index()
}
/// Compacts the underlying vector storage, rewriting active vectors
/// into a contiguous layout and reclaiming disk space occupied by
/// deleted entries.
///
/// Returns the number of bytes reclaimed.
///
/// Used by the server admin endpoint
/// `POST /collections/{name}/compact`.
///
/// # Errors
///
/// Returns an error if the compaction I/O fails (e.g. disk full,
/// file lock contention).
pub fn compact_storage(&self) -> crate::error::Result<usize> {
self.inner.compact_vector_storage()
}
/// Applies post-creation overrides to the advanced configuration
/// fields (`pq_rescore_oversampling`, `deferred_indexing`,
/// `async_index_builder`) and persists the updated `config.json`.
///
/// Each parameter is wrapped in an outer `Option` that expresses
/// "leave unchanged" (`None`) versus "set to this value" (`Some(_)`).
/// Passing `Some(None)` explicitly clears the inner field. A local
/// clippy allow is applied because the three-state semantics are
/// the intended contract here.
///
/// # Errors
///
/// Returns an error if the updated config cannot be written to disk.
#[allow(clippy::option_option)]
pub fn apply_advanced_config(
&self,
pq_rescore_oversampling: Option<Option<u32>>,
#[cfg(feature = "persistence")] deferred_indexing: Option<
Option<crate::collection::streaming::DeferredIndexerConfig>,
>,
async_index_builder: Option<Option<crate::collection::streaming::AsyncIndexBuilderConfig>>,
) -> crate::error::Result<()> {
self.inner.apply_advanced_config(
pq_rescore_oversampling,
#[cfg(feature = "persistence")]
deferred_indexing,
async_index_builder,
)
}
/// Returns CBO statistics.
#[must_use]
pub fn get_stats(&self) -> crate::collection::stats::CollectionStats {
self.inner.get_stats()
}
/// Returns `true` if the collection is a metadata-only collection.
#[must_use]
pub fn is_metadata_only(&self) -> bool {
self.inner.is_metadata_only()
}
/// Analyzes the collection and returns fresh statistics.
///
/// # Errors
///
/// - Returns an error if statistics computation fails.
pub fn analyze(&self) -> crate::error::Result<crate::collection::stats::CollectionStats> {
self.inner.analyze()
}
/// Returns `true` if a secondary index exists on `field`.
#[must_use]
pub fn has_secondary_index(&self, field: &str) -> bool {
self.inner.has_secondary_index(field)
}
/// Drops a secondary index on `field_name`. Returns `true` if the index existed.
#[must_use]
pub fn drop_secondary_index(&self, field_name: &str) -> bool {
self.inner.drop_secondary_index(field_name)
}
/// Returns `true` if a property index exists.
#[must_use]
pub fn has_property_index(&self, label: &str, property: &str) -> bool {
self.inner.has_property_index(label, property)
}
/// Returns `true` if a range index exists.
#[must_use]
pub fn has_range_index(&self, label: &str, property: &str) -> bool {
self.inner.has_range_index(label, property)
}
/// Lists all index definitions on this collection.
#[must_use]
pub fn list_indexes(&self) -> Vec<crate::collection::IndexInfo> {
self.inner.list_indexes()
}
/// Returns total memory usage of all indexes in bytes.
#[must_use]
pub fn indexes_memory_usage(&self) -> usize {
self.inner.indexes_memory_usage()
}
/// Attaches an [`AutoReindexManager`](crate::collection::auto_reindex::AutoReindexManager)
/// to this collection as a runtime-only hook.
///
/// The attachment is **not persisted** to `config.json` — callers must
/// re-attach after every [`Database::open`](crate::Database::open).
/// This intentional design avoids the `Duration` serde round-trip and
/// keeps the collection schema version stable.
///
/// Once attached, the manager is consulted by the bulk upsert hot
/// path after every successful `upsert_bulk` call. When the manager
/// reports that index parameters have diverged from the optimal
/// configuration for the current dataset size, a `tracing::info!`
/// event is emitted. Automatic index reconstruction is NOT performed
/// — that decision is left to the caller.
///
/// External consumers can register their own reindex pipeline via
/// the manager's event callback
/// ([`crate::collection::auto_reindex::AutoReindexManager::on_event`])
/// or poll the divergence state via
/// [`Self::check_auto_reindex_divergence`].
///
/// # Example
///
/// ```rust,no_run
/// # use std::sync::Arc;
/// # use velesdb_core::{VectorCollection, DistanceMetric, StorageMode};
/// # use velesdb_core::collection::auto_reindex::{AutoReindexConfig, AutoReindexManager};
/// # let coll = VectorCollection::create("./data/docs".into(), "docs", 768, DistanceMetric::Cosine, StorageMode::Full)?;
/// let manager = Arc::new(AutoReindexManager::new(AutoReindexConfig::default()));
/// coll.attach_auto_reindex(manager);
/// # Ok::<(), velesdb_core::Error>(())
/// ```
pub fn attach_auto_reindex(
&self,
manager: std::sync::Arc<crate::collection::auto_reindex::AutoReindexManager>,
) {
self.inner.attach_auto_reindex(manager);
}
/// Detaches the currently attached auto-reindex manager, returning it
/// so callers can drop or reuse it. Returns `None` when no manager
/// was attached.
#[must_use = "detach_auto_reindex returns the previously attached manager — ignore only when you intend to drop it"]
pub fn detach_auto_reindex(
&self,
) -> Option<std::sync::Arc<crate::collection::auto_reindex::AutoReindexManager>> {
self.inner.detach_auto_reindex()
}
/// Returns a clone of the currently attached auto-reindex manager,
/// or `None` if none is attached.
#[must_use]
pub fn auto_reindex_manager(
&self,
) -> Option<std::sync::Arc<crate::collection::auto_reindex::AutoReindexManager>> {
self.inner.auto_reindex_manager()
}
/// Returns a [`DivergenceCheck`](crate::collection::auto_reindex::DivergenceCheck)
/// computed from the attached manager against the collection's current
/// state, or `None` when no manager is attached.
///
/// Read-only — does not mutate the manager state.
#[must_use]
pub fn check_auto_reindex_divergence(
&self,
) -> Option<crate::collection::auto_reindex::DivergenceCheck> {
self.inner.check_auto_reindex_divergence()
}
}