pulsedb/config.rs
1//! Configuration types for PulseDB.
2//!
3//! The [`Config`] struct controls database behavior including:
4//! - Embedding provider (builtin ONNX or external)
5//! - Embedding dimension (384, 768, or custom)
6//! - Cache size and durability settings
7//!
8//! # Example
9//! ```rust
10//! use pulsedb::{Config, EmbeddingProvider, EmbeddingDimension, SyncMode};
11//!
12//! // Use defaults (External provider, 384 dimensions)
13//! let config = Config::default();
14//!
15//! // Customize for production
16//! let config = Config {
17//! embedding_dimension: EmbeddingDimension::D768,
18//! cache_size_mb: 128,
19//! sync_mode: SyncMode::Normal,
20//! ..Default::default()
21//! };
22//! ```
23
24use std::path::PathBuf;
25use std::time::Duration;
26
27use serde::{Deserialize, Serialize};
28
29use crate::error::ValidationError;
30use crate::types::CollectiveId;
31
32/// Database configuration options.
33///
34/// All fields have sensible defaults. Use struct update syntax to override
35/// specific settings:
36///
37/// ```rust
38/// use pulsedb::Config;
39///
40/// let config = Config {
41/// cache_size_mb: 256,
42/// ..Default::default()
43/// };
44/// ```
45#[derive(Clone, Debug)]
46pub struct Config {
47 /// How embeddings are generated or provided.
48 pub embedding_provider: EmbeddingProvider,
49
50 /// Embedding vector dimension (must match provider output).
51 pub embedding_dimension: EmbeddingDimension,
52
53 /// Default collective for operations when none specified.
54 pub default_collective: Option<CollectiveId>,
55
56 /// Cache size in megabytes for the storage engine.
57 ///
58 /// Higher values improve read performance but use more memory.
59 /// Default: 64 MB
60 pub cache_size_mb: usize,
61
62 /// Durability mode for write operations.
63 pub sync_mode: SyncMode,
64
65 /// HNSW vector index parameters.
66 ///
67 /// Controls the quality and performance of semantic search.
68 /// See [`HnswConfig`] for tuning guidelines.
69 pub hnsw: HnswConfig,
70
71 /// Agent activity tracking parameters.
72 ///
73 /// Controls staleness detection for agent heartbeats.
74 /// See [`ActivityConfig`] for details.
75 pub activity: ActivityConfig,
76
77 /// Watch system parameters.
78 ///
79 /// Controls the in-process event notification channel.
80 /// See [`WatchConfig`] for details.
81 pub watch: WatchConfig,
82}
83
84impl Default for Config {
85 fn default() -> Self {
86 Self {
87 // External is the safe default - no ONNX dependency required
88 embedding_provider: EmbeddingProvider::External,
89 // 384 matches all-MiniLM-L6-v2, the default builtin model
90 embedding_dimension: EmbeddingDimension::D384,
91 default_collective: None,
92 cache_size_mb: 64,
93 sync_mode: SyncMode::Normal,
94 hnsw: HnswConfig::default(),
95 activity: ActivityConfig::default(),
96 watch: WatchConfig::default(),
97 }
98 }
99}
100
101impl Config {
102 /// Creates a new Config with default settings.
103 pub fn new() -> Self {
104 Self::default()
105 }
106
107 /// Creates a Config for builtin embedding generation.
108 ///
109 /// This requires the `builtin-embeddings` feature to be enabled.
110 ///
111 /// # Example
112 /// ```rust
113 /// use pulsedb::Config;
114 ///
115 /// let config = Config::with_builtin_embeddings();
116 /// ```
117 pub fn with_builtin_embeddings() -> Self {
118 Self {
119 embedding_provider: EmbeddingProvider::Builtin { model_path: None },
120 ..Default::default()
121 }
122 }
123
124 /// Creates a Config for external embedding provider.
125 ///
126 /// When using external embeddings, you must provide pre-computed
127 /// embedding vectors when recording experiences.
128 ///
129 /// # Example
130 /// ```rust
131 /// use pulsedb::{Config, EmbeddingDimension};
132 ///
133 /// // OpenAI ada-002 uses 1536 dimensions
134 /// let config = Config::with_external_embeddings(EmbeddingDimension::Custom(1536));
135 /// ```
136 pub fn with_external_embeddings(dimension: EmbeddingDimension) -> Self {
137 Self {
138 embedding_provider: EmbeddingProvider::External,
139 embedding_dimension: dimension,
140 ..Default::default()
141 }
142 }
143
144 /// Validates the configuration.
145 ///
146 /// Called automatically by `PulseDB::open()`. You can also call this
147 /// explicitly to check configuration before attempting to open.
148 ///
149 /// # Errors
150 /// Returns `ValidationError` if:
151 /// - `cache_size_mb` is 0
152 /// - Custom dimension is 0 or > 4096
153 pub fn validate(&self) -> Result<(), ValidationError> {
154 // Cache size must be positive
155 if self.cache_size_mb == 0 {
156 return Err(ValidationError::invalid_field(
157 "cache_size_mb",
158 "must be greater than 0",
159 ));
160 }
161
162 // Validate HNSW parameters
163 if self.hnsw.max_nb_connection == 0 {
164 return Err(ValidationError::invalid_field(
165 "hnsw.max_nb_connection",
166 "must be greater than 0",
167 ));
168 }
169 if self.hnsw.ef_construction == 0 {
170 return Err(ValidationError::invalid_field(
171 "hnsw.ef_construction",
172 "must be greater than 0",
173 ));
174 }
175 if self.hnsw.ef_search == 0 {
176 return Err(ValidationError::invalid_field(
177 "hnsw.ef_search",
178 "must be greater than 0",
179 ));
180 }
181
182 // Validate watch buffer size
183 if self.watch.buffer_size == 0 {
184 return Err(ValidationError::invalid_field(
185 "watch.buffer_size",
186 "must be greater than 0",
187 ));
188 }
189 if self.watch.poll_interval_ms == 0 {
190 return Err(ValidationError::invalid_field(
191 "watch.poll_interval_ms",
192 "must be greater than 0",
193 ));
194 }
195
196 // Validate custom dimension bounds
197 if let EmbeddingDimension::Custom(dim) = self.embedding_dimension {
198 if dim == 0 {
199 return Err(ValidationError::invalid_field(
200 "embedding_dimension",
201 "custom dimension must be greater than 0",
202 ));
203 }
204 if dim > 4096 {
205 return Err(ValidationError::invalid_field(
206 "embedding_dimension",
207 "custom dimension must not exceed 4096",
208 ));
209 }
210 }
211
212 Ok(())
213 }
214
215 /// Returns the embedding dimension as a numeric value.
216 pub fn dimension(&self) -> usize {
217 self.embedding_dimension.size()
218 }
219}
220
221/// Embedding provider configuration.
222///
223/// Determines how embedding vectors are generated for experiences.
224#[derive(Clone, Debug)]
225pub enum EmbeddingProvider {
226 /// PulseDB generates embeddings using a built-in ONNX model.
227 ///
228 /// Requires the `builtin-embeddings` feature. The default model is
229 /// all-MiniLM-L6-v2 (384 dimensions).
230 Builtin {
231 /// Custom ONNX model path. If `None`, uses the bundled model.
232 model_path: Option<PathBuf>,
233 },
234
235 /// Caller provides pre-computed embedding vectors.
236 ///
237 /// Use this when you have your own embedding service (OpenAI, Cohere, etc.)
238 /// or want to use a model not bundled with PulseDB.
239 External,
240}
241
242impl EmbeddingProvider {
243 /// Returns true if this is the builtin provider.
244 pub fn is_builtin(&self) -> bool {
245 matches!(self, Self::Builtin { .. })
246 }
247
248 /// Returns true if this is the external provider.
249 pub fn is_external(&self) -> bool {
250 matches!(self, Self::External)
251 }
252}
253
254/// Embedding vector dimensions.
255///
256/// Standard dimensions are provided for common models. Use `Custom` for
257/// other embedding services.
258#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
259pub enum EmbeddingDimension {
260 /// 384 dimensions (all-MiniLM-L6-v2, default builtin model).
261 #[default]
262 D384,
263
264 /// 768 dimensions (bge-base-en-v1.5, BERT-base).
265 D768,
266
267 /// Custom dimension for other embedding models.
268 ///
269 /// Must be between 1 and 4096.
270 Custom(usize),
271}
272
273impl EmbeddingDimension {
274 /// Returns the numeric size of this dimension.
275 ///
276 /// # Example
277 /// ```rust
278 /// use pulsedb::EmbeddingDimension;
279 ///
280 /// assert_eq!(EmbeddingDimension::D384.size(), 384);
281 /// assert_eq!(EmbeddingDimension::D768.size(), 768);
282 /// assert_eq!(EmbeddingDimension::Custom(1536).size(), 1536);
283 /// ```
284 #[inline]
285 pub const fn size(&self) -> usize {
286 match self {
287 Self::D384 => 384,
288 Self::D768 => 768,
289 Self::Custom(n) => *n,
290 }
291 }
292}
293
294/// Durability mode for write operations.
295///
296/// Controls the trade-off between write performance and crash safety.
297#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
298pub enum SyncMode {
299 /// Sync to disk on transaction commit.
300 ///
301 /// This is the default and recommended setting. Provides good performance
302 /// while ensuring committed data survives crashes.
303 #[default]
304 Normal,
305
306 /// Async sync (faster writes, may lose recent data on crash).
307 ///
308 /// Use for development or when you can tolerate losing the last few
309 /// seconds of writes. Significantly faster than `Normal`.
310 Fast,
311
312 /// Sync every write operation (slowest, maximum durability).
313 ///
314 /// Use when data loss is absolutely unacceptable. Very slow for
315 /// high write volumes.
316 Paranoid,
317}
318
319impl SyncMode {
320 /// Returns true if this mode syncs on every write.
321 pub fn is_paranoid(&self) -> bool {
322 matches!(self, Self::Paranoid)
323 }
324
325 /// Returns true if this mode is async (may lose data on crash).
326 pub fn is_fast(&self) -> bool {
327 matches!(self, Self::Fast)
328 }
329}
330
331/// Configuration for the HNSW vector index.
332///
333/// Controls the trade-off between index build time, memory usage,
334/// and search accuracy. The defaults are tuned for PulseDB's target
335/// scale (10K-500K experiences per collective).
336///
337/// # Tuning Guide
338///
339/// | Use Case | M | ef_construction | ef_search |
340/// |--------------|----|-----------------|-----------|
341/// | Low memory | 8 | 100 | 30 |
342/// | Balanced | 16 | 200 | 50 |
343/// | High recall | 32 | 400 | 100 |
344#[derive(Clone, Debug)]
345pub struct HnswConfig {
346 /// Maximum bidirectional connections per node (M parameter).
347 ///
348 /// Higher values improve recall but increase memory and build time.
349 /// Each node stores up to M links, so memory per node is O(M).
350 /// Default: 16
351 pub max_nb_connection: usize,
352
353 /// Number of candidates tracked during index construction.
354 ///
355 /// Higher values produce a better quality graph but slow down insertion.
356 /// Rule of thumb: ef_construction >= 2 * max_nb_connection.
357 /// Default: 200
358 pub ef_construction: usize,
359
360 /// Number of candidates tracked during search.
361 ///
362 /// Higher values improve recall but increase search latency.
363 /// Must be >= k (the number of results requested).
364 /// Default: 50
365 pub ef_search: usize,
366
367 /// Maximum number of layers in the skip-list structure.
368 ///
369 /// Lower layers are dense, upper layers are sparse "express lanes."
370 /// Default 16 handles datasets up to ~1M vectors with M=16.
371 /// Default: 16
372 pub max_layer: usize,
373
374 /// Initial pre-allocated capacity (number of vectors).
375 ///
376 /// The index grows beyond this automatically, but pre-allocation
377 /// avoids reallocations for known workloads.
378 /// Default: 10_000
379 pub max_elements: usize,
380}
381
382impl Default for HnswConfig {
383 fn default() -> Self {
384 Self {
385 max_nb_connection: 16,
386 ef_construction: 200,
387 ef_search: 50,
388 max_layer: 16,
389 max_elements: 10_000,
390 }
391 }
392}
393
394/// Configuration for agent activity tracking.
395///
396/// Controls how stale activities are detected and filtered.
397///
398/// # Example
399/// ```rust
400/// use std::time::Duration;
401/// use pulsedb::Config;
402///
403/// let config = Config {
404/// activity: pulsedb::ActivityConfig {
405/// stale_threshold: Duration::from_secs(120), // 2 minutes
406/// },
407/// ..Default::default()
408/// };
409/// ```
410#[derive(Clone, Debug)]
411pub struct ActivityConfig {
412 /// Duration after which an activity with no heartbeat is considered stale.
413 ///
414 /// Activities whose `last_heartbeat` is older than `now - stale_threshold`
415 /// are excluded from `get_active_agents()` results. They remain in storage
416 /// until explicitly ended or the collective is deleted.
417 ///
418 /// Default: 5 minutes (300 seconds)
419 pub stale_threshold: Duration,
420}
421
422impl Default for ActivityConfig {
423 fn default() -> Self {
424 Self {
425 stale_threshold: Duration::from_secs(300),
426 }
427 }
428}
429
430/// Configuration for the watch system (in-process and cross-process).
431///
432/// Controls whether in-process channel subscriptions are enabled, the
433/// channel buffer size for real-time experience notifications, and the
434/// poll interval for cross-process change detection.
435///
436/// # Example
437/// ```rust
438/// use pulsedb::Config;
439///
440/// let config = Config {
441/// watch: pulsedb::WatchConfig {
442/// in_process: true,
443/// buffer_size: 500,
444/// poll_interval_ms: 200,
445/// },
446/// ..Default::default()
447/// };
448/// ```
449#[derive(Clone, Debug)]
450pub struct WatchConfig {
451 /// Enable in-process watch subscriptions via crossbeam channels.
452 ///
453 /// When `true` (default), [`watch_experiences()`](crate::PulseDB::watch_experiences)
454 /// streams receive real-time events. When `false`, in-process event
455 /// dispatch is skipped entirely — only cross-process
456 /// [`poll_changes()`](crate::PulseDB::poll_changes) remains available.
457 ///
458 /// Default: true
459 pub in_process: bool,
460
461 /// Maximum number of events buffered per subscriber (in-process).
462 ///
463 /// When a subscriber's channel is full, new events are dropped for
464 /// that subscriber (with a warning log). The publisher never blocks.
465 ///
466 /// Default: 1000
467 pub buffer_size: usize,
468
469 /// Poll interval in milliseconds for cross-process change detection.
470 ///
471 /// Reader processes call `poll_changes()` at this interval to check
472 /// for new experiences written by the writer process.
473 ///
474 /// Default: 100
475 pub poll_interval_ms: u64,
476}
477
478impl Default for WatchConfig {
479 fn default() -> Self {
480 Self {
481 in_process: true,
482 buffer_size: 1000,
483 poll_interval_ms: 100,
484 }
485 }
486}
487
488#[cfg(test)]
489mod tests {
490 use super::*;
491
492 #[test]
493 fn test_default_config() {
494 let config = Config::default();
495 assert!(config.embedding_provider.is_external());
496 assert_eq!(config.embedding_dimension, EmbeddingDimension::D384);
497 assert_eq!(config.cache_size_mb, 64);
498 assert_eq!(config.sync_mode, SyncMode::Normal);
499 assert!(config.default_collective.is_none());
500 }
501
502 #[test]
503 fn test_with_builtin_embeddings() {
504 let config = Config::with_builtin_embeddings();
505 assert!(config.embedding_provider.is_builtin());
506 }
507
508 #[test]
509 fn test_with_external_embeddings() {
510 let config = Config::with_external_embeddings(EmbeddingDimension::Custom(1536));
511 assert!(config.embedding_provider.is_external());
512 assert_eq!(config.dimension(), 1536);
513 }
514
515 #[test]
516 fn test_validate_success() {
517 let config = Config::default();
518 assert!(config.validate().is_ok());
519 }
520
521 #[test]
522 fn test_validate_cache_size_zero() {
523 let config = Config {
524 cache_size_mb: 0,
525 ..Default::default()
526 };
527 let err = config.validate().unwrap_err();
528 assert!(
529 matches!(err, ValidationError::InvalidField { field, .. } if field == "cache_size_mb")
530 );
531 }
532
533 #[test]
534 fn test_validate_custom_dimension_zero() {
535 let config = Config {
536 embedding_dimension: EmbeddingDimension::Custom(0),
537 ..Default::default()
538 };
539 assert!(config.validate().is_err());
540 }
541
542 #[test]
543 fn test_validate_custom_dimension_too_large() {
544 let config = Config {
545 embedding_dimension: EmbeddingDimension::Custom(5000),
546 ..Default::default()
547 };
548 assert!(config.validate().is_err());
549 }
550
551 #[test]
552 fn test_validate_custom_dimension_valid() {
553 let config = Config {
554 embedding_dimension: EmbeddingDimension::Custom(1536),
555 ..Default::default()
556 };
557 assert!(config.validate().is_ok());
558 }
559
560 #[test]
561 fn test_embedding_dimension_sizes() {
562 assert_eq!(EmbeddingDimension::D384.size(), 384);
563 assert_eq!(EmbeddingDimension::D768.size(), 768);
564 assert_eq!(EmbeddingDimension::Custom(512).size(), 512);
565 }
566
567 #[test]
568 fn test_sync_mode_checks() {
569 assert!(!SyncMode::Normal.is_fast());
570 assert!(!SyncMode::Normal.is_paranoid());
571 assert!(SyncMode::Fast.is_fast());
572 assert!(SyncMode::Paranoid.is_paranoid());
573 }
574
575 #[test]
576 fn test_hnsw_config_defaults() {
577 let config = HnswConfig::default();
578 assert_eq!(config.max_nb_connection, 16);
579 assert_eq!(config.ef_construction, 200);
580 assert_eq!(config.ef_search, 50);
581 assert_eq!(config.max_layer, 16);
582 assert_eq!(config.max_elements, 10_000);
583 }
584
585 #[test]
586 fn test_config_includes_hnsw() {
587 let config = Config::default();
588 assert_eq!(config.hnsw.max_nb_connection, 16);
589 }
590
591 #[test]
592 fn test_validate_hnsw_zero_max_nb_connection() {
593 let config = Config {
594 hnsw: HnswConfig {
595 max_nb_connection: 0,
596 ..Default::default()
597 },
598 ..Default::default()
599 };
600 let err = config.validate().unwrap_err();
601 assert!(matches!(
602 err,
603 ValidationError::InvalidField { field, .. } if field == "hnsw.max_nb_connection"
604 ));
605 }
606
607 #[test]
608 fn test_validate_hnsw_zero_ef_construction() {
609 let config = Config {
610 hnsw: HnswConfig {
611 ef_construction: 0,
612 ..Default::default()
613 },
614 ..Default::default()
615 };
616 assert!(config.validate().is_err());
617 }
618
619 #[test]
620 fn test_validate_hnsw_zero_ef_search() {
621 let config = Config {
622 hnsw: HnswConfig {
623 ef_search: 0,
624 ..Default::default()
625 },
626 ..Default::default()
627 };
628 assert!(config.validate().is_err());
629 }
630
631 #[test]
632 fn test_embedding_dimension_serialization() {
633 let dim = EmbeddingDimension::D768;
634 let bytes = bincode::serialize(&dim).unwrap();
635 let restored: EmbeddingDimension = bincode::deserialize(&bytes).unwrap();
636 assert_eq!(dim, restored);
637 }
638
639 #[test]
640 fn test_watch_config_defaults() {
641 let config = WatchConfig::default();
642 assert!(config.in_process);
643 assert_eq!(config.buffer_size, 1000);
644 assert_eq!(config.poll_interval_ms, 100);
645 }
646
647 #[test]
648 fn test_validate_watch_zero_buffer_size() {
649 let config = Config {
650 watch: WatchConfig {
651 buffer_size: 0,
652 ..Default::default()
653 },
654 ..Default::default()
655 };
656 let err = config.validate().unwrap_err();
657 assert!(matches!(
658 err,
659 ValidationError::InvalidField { field, .. } if field == "watch.buffer_size"
660 ));
661 }
662
663 #[test]
664 fn test_validate_watch_zero_poll_interval() {
665 let config = Config {
666 watch: WatchConfig {
667 poll_interval_ms: 0,
668 ..Default::default()
669 },
670 ..Default::default()
671 };
672 let err = config.validate().unwrap_err();
673 assert!(matches!(
674 err,
675 ValidationError::InvalidField { field, .. } if field == "watch.poll_interval_ms"
676 ));
677 }
678}