pulsedb/config.rs
1//! Configuration types for PulseDB.
2//!
3//! The [`Config`] struct controls database behavior including:
4//! - Embedding provider (builtin ONNX or external)
5//! - Embedding dimension (384, 768, or custom)
6//! - Cache size and durability settings
7//!
8//! # Example
9//! ```rust
10//! use pulsedb::{Config, EmbeddingProvider, EmbeddingDimension, SyncMode};
11//!
12//! // Use defaults (External provider, 384 dimensions)
13//! let config = Config::default();
14//!
15//! // Customize for production
16//! let config = Config {
17//! embedding_dimension: EmbeddingDimension::D768,
18//! cache_size_mb: 128,
19//! sync_mode: SyncMode::Normal,
20//! ..Default::default()
21//! };
22//! ```
23
24use std::path::PathBuf;
25use std::time::Duration;
26
27use serde::{Deserialize, Serialize};
28
29use crate::error::ValidationError;
30use crate::types::CollectiveId;
31
32/// Database configuration options.
33///
34/// All fields have sensible defaults. Use struct update syntax to override
35/// specific settings:
36///
37/// ```rust
38/// use pulsedb::Config;
39///
40/// let config = Config {
41/// cache_size_mb: 256,
42/// ..Default::default()
43/// };
44/// ```
45#[derive(Clone, Debug)]
46pub struct Config {
47 /// How embeddings are generated or provided.
48 pub embedding_provider: EmbeddingProvider,
49
50 /// Embedding vector dimension (must match provider output).
51 pub embedding_dimension: EmbeddingDimension,
52
53 /// Default collective for operations when none specified.
54 pub default_collective: Option<CollectiveId>,
55
56 /// Cache size in megabytes for the storage engine.
57 ///
58 /// Higher values improve read performance but use more memory.
59 /// Default: 64 MB
60 pub cache_size_mb: usize,
61
62 /// Durability mode for write operations.
63 pub sync_mode: SyncMode,
64
65 /// HNSW vector index parameters.
66 ///
67 /// Controls the quality and performance of semantic search.
68 /// See [`HnswConfig`] for tuning guidelines.
69 pub hnsw: HnswConfig,
70
71 /// Agent activity tracking parameters.
72 ///
73 /// Controls staleness detection for agent heartbeats.
74 /// See [`ActivityConfig`] for details.
75 pub activity: ActivityConfig,
76
77 /// Watch system parameters.
78 ///
79 /// Controls the in-process event notification channel.
80 /// See [`WatchConfig`] for details.
81 pub watch: WatchConfig,
82
83 /// Read-only mode.
84 ///
85 /// When `true`, all mutation methods (`record_experience`, `store_relation`,
86 /// etc.) return `PulseDBError::ReadOnly`. Read operations work normally.
87 ///
88 /// Use this for read-only consumers like PulseVision that open the same
89 /// database file a writer is using.
90 ///
91 /// Default: false
92 pub read_only: bool,
93}
94
95impl Default for Config {
96 fn default() -> Self {
97 Self {
98 // External is the safe default - no ONNX dependency required
99 embedding_provider: EmbeddingProvider::External,
100 // 384 matches all-MiniLM-L6-v2, the default builtin model
101 embedding_dimension: EmbeddingDimension::D384,
102 default_collective: None,
103 cache_size_mb: 64,
104 sync_mode: SyncMode::Normal,
105 hnsw: HnswConfig::default(),
106 activity: ActivityConfig::default(),
107 watch: WatchConfig::default(),
108 read_only: false,
109 }
110 }
111}
112
113impl Config {
114 /// Creates a new Config with default settings.
115 pub fn new() -> Self {
116 Self::default()
117 }
118
119 /// Creates a Config for read-only access.
120 ///
121 /// All mutation methods will return `PulseDBError::ReadOnly`.
122 /// Use this for read-only consumers like visualization tools that
123 /// open the same database file a writer is using.
124 ///
125 /// # Example
126 /// ```rust
127 /// use pulsedb::Config;
128 ///
129 /// let config = Config::read_only();
130 /// assert!(config.read_only);
131 /// ```
132 pub fn read_only() -> Self {
133 Self {
134 read_only: true,
135 ..Default::default()
136 }
137 }
138
139 /// Creates a Config for builtin embedding generation.
140 ///
141 /// This requires the `builtin-embeddings` feature to be enabled.
142 ///
143 /// # Example
144 /// ```rust
145 /// use pulsedb::Config;
146 ///
147 /// let config = Config::with_builtin_embeddings();
148 /// ```
149 pub fn with_builtin_embeddings() -> Self {
150 Self {
151 embedding_provider: EmbeddingProvider::Builtin { model_path: None },
152 ..Default::default()
153 }
154 }
155
156 /// Creates a Config for external embedding provider.
157 ///
158 /// When using external embeddings, you must provide pre-computed
159 /// embedding vectors when recording experiences.
160 ///
161 /// # Example
162 /// ```rust
163 /// use pulsedb::{Config, EmbeddingDimension};
164 ///
165 /// // OpenAI ada-002 uses 1536 dimensions
166 /// let config = Config::with_external_embeddings(EmbeddingDimension::Custom(1536));
167 /// ```
168 pub fn with_external_embeddings(dimension: EmbeddingDimension) -> Self {
169 Self {
170 embedding_provider: EmbeddingProvider::External,
171 embedding_dimension: dimension,
172 ..Default::default()
173 }
174 }
175
176 /// Validates the configuration.
177 ///
178 /// Called automatically by `PulseDB::open()`. You can also call this
179 /// explicitly to check configuration before attempting to open.
180 ///
181 /// # Errors
182 /// Returns `ValidationError` if:
183 /// - `cache_size_mb` is 0
184 /// - Custom dimension is 0 or > 4096
185 pub fn validate(&self) -> Result<(), ValidationError> {
186 // Cache size must be positive
187 if self.cache_size_mb == 0 {
188 return Err(ValidationError::invalid_field(
189 "cache_size_mb",
190 "must be greater than 0",
191 ));
192 }
193
194 // Validate HNSW parameters
195 if self.hnsw.max_nb_connection == 0 {
196 return Err(ValidationError::invalid_field(
197 "hnsw.max_nb_connection",
198 "must be greater than 0",
199 ));
200 }
201 if self.hnsw.ef_construction == 0 {
202 return Err(ValidationError::invalid_field(
203 "hnsw.ef_construction",
204 "must be greater than 0",
205 ));
206 }
207 if self.hnsw.ef_search == 0 {
208 return Err(ValidationError::invalid_field(
209 "hnsw.ef_search",
210 "must be greater than 0",
211 ));
212 }
213
214 // Validate watch buffer size
215 if self.watch.buffer_size == 0 {
216 return Err(ValidationError::invalid_field(
217 "watch.buffer_size",
218 "must be greater than 0",
219 ));
220 }
221 if self.watch.poll_interval_ms == 0 {
222 return Err(ValidationError::invalid_field(
223 "watch.poll_interval_ms",
224 "must be greater than 0",
225 ));
226 }
227
228 // Validate custom dimension bounds
229 if let EmbeddingDimension::Custom(dim) = self.embedding_dimension {
230 if dim == 0 {
231 return Err(ValidationError::invalid_field(
232 "embedding_dimension",
233 "custom dimension must be greater than 0",
234 ));
235 }
236 if dim > 4096 {
237 return Err(ValidationError::invalid_field(
238 "embedding_dimension",
239 "custom dimension must not exceed 4096",
240 ));
241 }
242 }
243
244 Ok(())
245 }
246
247 /// Returns the embedding dimension as a numeric value.
248 pub fn dimension(&self) -> usize {
249 self.embedding_dimension.size()
250 }
251}
252
253/// Embedding provider configuration.
254///
255/// Determines how embedding vectors are generated for experiences.
256#[derive(Clone, Debug)]
257pub enum EmbeddingProvider {
258 /// PulseDB generates embeddings using a built-in ONNX model.
259 ///
260 /// Requires the `builtin-embeddings` feature. The default model is
261 /// all-MiniLM-L6-v2 (384 dimensions).
262 Builtin {
263 /// Custom ONNX model path. If `None`, uses the bundled model.
264 model_path: Option<PathBuf>,
265 },
266
267 /// Caller provides pre-computed embedding vectors.
268 ///
269 /// Use this when you have your own embedding service (OpenAI, Cohere, etc.)
270 /// or want to use a model not bundled with PulseDB.
271 External,
272}
273
274impl EmbeddingProvider {
275 /// Returns true if this is the builtin provider.
276 pub fn is_builtin(&self) -> bool {
277 matches!(self, Self::Builtin { .. })
278 }
279
280 /// Returns true if this is the external provider.
281 pub fn is_external(&self) -> bool {
282 matches!(self, Self::External)
283 }
284}
285
286/// Embedding vector dimensions.
287///
288/// Standard dimensions are provided for common models. Use `Custom` for
289/// other embedding services.
290#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
291pub enum EmbeddingDimension {
292 /// 384 dimensions (all-MiniLM-L6-v2, default builtin model).
293 #[default]
294 D384,
295
296 /// 768 dimensions (bge-base-en-v1.5, BERT-base).
297 D768,
298
299 /// Custom dimension for other embedding models.
300 ///
301 /// Must be between 1 and 4096.
302 Custom(usize),
303}
304
305impl EmbeddingDimension {
306 /// Returns the numeric size of this dimension.
307 ///
308 /// # Example
309 /// ```rust
310 /// use pulsedb::EmbeddingDimension;
311 ///
312 /// assert_eq!(EmbeddingDimension::D384.size(), 384);
313 /// assert_eq!(EmbeddingDimension::D768.size(), 768);
314 /// assert_eq!(EmbeddingDimension::Custom(1536).size(), 1536);
315 /// ```
316 #[inline]
317 pub const fn size(&self) -> usize {
318 match self {
319 Self::D384 => 384,
320 Self::D768 => 768,
321 Self::Custom(n) => *n,
322 }
323 }
324}
325
326/// Durability mode for write operations.
327///
328/// Controls the trade-off between write performance and crash safety.
329#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
330pub enum SyncMode {
331 /// Sync to disk on transaction commit.
332 ///
333 /// This is the default and recommended setting. Provides good performance
334 /// while ensuring committed data survives crashes.
335 #[default]
336 Normal,
337
338 /// Async sync (faster writes, may lose recent data on crash).
339 ///
340 /// Use for development or when you can tolerate losing the last few
341 /// seconds of writes. Significantly faster than `Normal`.
342 Fast,
343
344 /// Sync every write operation (slowest, maximum durability).
345 ///
346 /// Use when data loss is absolutely unacceptable. Very slow for
347 /// high write volumes.
348 Paranoid,
349}
350
351impl SyncMode {
352 /// Returns true if this mode syncs on every write.
353 pub fn is_paranoid(&self) -> bool {
354 matches!(self, Self::Paranoid)
355 }
356
357 /// Returns true if this mode is async (may lose data on crash).
358 pub fn is_fast(&self) -> bool {
359 matches!(self, Self::Fast)
360 }
361}
362
363/// Configuration for the HNSW vector index.
364///
365/// Controls the trade-off between index build time, memory usage,
366/// and search accuracy. The defaults are tuned for PulseDB's target
367/// scale (10K-500K experiences per collective).
368///
369/// # Tuning Guide
370///
371/// | Use Case | M | ef_construction | ef_search |
372/// |--------------|----|-----------------|-----------|
373/// | Low memory | 8 | 100 | 30 |
374/// | Balanced | 16 | 200 | 50 |
375/// | High recall | 32 | 400 | 100 |
376#[derive(Clone, Debug)]
377pub struct HnswConfig {
378 /// Maximum bidirectional connections per node (M parameter).
379 ///
380 /// Higher values improve recall but increase memory and build time.
381 /// Each node stores up to M links, so memory per node is O(M).
382 /// Default: 16
383 pub max_nb_connection: usize,
384
385 /// Number of candidates tracked during index construction.
386 ///
387 /// Higher values produce a better quality graph but slow down insertion.
388 /// Rule of thumb: ef_construction >= 2 * max_nb_connection.
389 /// Default: 200
390 pub ef_construction: usize,
391
392 /// Number of candidates tracked during search.
393 ///
394 /// Higher values improve recall but increase search latency.
395 /// Must be >= k (the number of results requested).
396 /// Default: 50
397 pub ef_search: usize,
398
399 /// Maximum number of layers in the skip-list structure.
400 ///
401 /// Lower layers are dense, upper layers are sparse "express lanes."
402 /// Default 16 handles datasets up to ~1M vectors with M=16.
403 /// Default: 16
404 pub max_layer: usize,
405
406 /// Initial pre-allocated capacity (number of vectors).
407 ///
408 /// The index grows beyond this automatically, but pre-allocation
409 /// avoids reallocations for known workloads.
410 /// Default: 10_000
411 pub max_elements: usize,
412}
413
414impl Default for HnswConfig {
415 fn default() -> Self {
416 Self {
417 max_nb_connection: 16,
418 ef_construction: 200,
419 ef_search: 50,
420 max_layer: 16,
421 max_elements: 10_000,
422 }
423 }
424}
425
426/// Configuration for agent activity tracking.
427///
428/// Controls how stale activities are detected and filtered.
429///
430/// # Example
431/// ```rust
432/// use std::time::Duration;
433/// use pulsedb::Config;
434///
435/// let config = Config {
436/// activity: pulsedb::ActivityConfig {
437/// stale_threshold: Duration::from_secs(120), // 2 minutes
438/// },
439/// ..Default::default()
440/// };
441/// ```
442#[derive(Clone, Debug)]
443pub struct ActivityConfig {
444 /// Duration after which an activity with no heartbeat is considered stale.
445 ///
446 /// Activities whose `last_heartbeat` is older than `now - stale_threshold`
447 /// are excluded from `get_active_agents()` results. They remain in storage
448 /// until explicitly ended or the collective is deleted.
449 ///
450 /// Default: 5 minutes (300 seconds)
451 pub stale_threshold: Duration,
452}
453
454impl Default for ActivityConfig {
455 fn default() -> Self {
456 Self {
457 stale_threshold: Duration::from_secs(300),
458 }
459 }
460}
461
462/// Configuration for the watch system (in-process and cross-process).
463///
464/// Controls whether in-process channel subscriptions are enabled, the
465/// channel buffer size for real-time experience notifications, and the
466/// poll interval for cross-process change detection.
467///
468/// # Example
469/// ```rust
470/// use pulsedb::Config;
471///
472/// let config = Config {
473/// watch: pulsedb::WatchConfig {
474/// in_process: true,
475/// buffer_size: 500,
476/// poll_interval_ms: 200,
477/// },
478/// ..Default::default()
479/// };
480/// ```
481#[derive(Clone, Debug)]
482pub struct WatchConfig {
483 /// Enable in-process watch subscriptions via crossbeam channels.
484 ///
485 /// When `true` (default), [`watch_experiences()`](crate::PulseDB::watch_experiences)
486 /// streams receive real-time events. When `false`, in-process event
487 /// dispatch is skipped entirely — only cross-process
488 /// [`poll_changes()`](crate::PulseDB::poll_changes) remains available.
489 ///
490 /// Default: true
491 pub in_process: bool,
492
493 /// Maximum number of events buffered per subscriber (in-process).
494 ///
495 /// When a subscriber's channel is full, new events are dropped for
496 /// that subscriber (with a warning log). The publisher never blocks.
497 ///
498 /// Default: 1000
499 pub buffer_size: usize,
500
501 /// Poll interval in milliseconds for cross-process change detection.
502 ///
503 /// Reader processes call `poll_changes()` at this interval to check
504 /// for new experiences written by the writer process.
505 ///
506 /// Default: 100
507 pub poll_interval_ms: u64,
508}
509
510impl Default for WatchConfig {
511 fn default() -> Self {
512 Self {
513 in_process: true,
514 buffer_size: 1000,
515 poll_interval_ms: 100,
516 }
517 }
518}
519
520#[cfg(test)]
521mod tests {
522 use super::*;
523
524 #[test]
525 fn test_default_config() {
526 let config = Config::default();
527 assert!(config.embedding_provider.is_external());
528 assert_eq!(config.embedding_dimension, EmbeddingDimension::D384);
529 assert_eq!(config.cache_size_mb, 64);
530 assert_eq!(config.sync_mode, SyncMode::Normal);
531 assert!(config.default_collective.is_none());
532 }
533
534 #[test]
535 fn test_with_builtin_embeddings() {
536 let config = Config::with_builtin_embeddings();
537 assert!(config.embedding_provider.is_builtin());
538 }
539
540 #[test]
541 fn test_with_external_embeddings() {
542 let config = Config::with_external_embeddings(EmbeddingDimension::Custom(1536));
543 assert!(config.embedding_provider.is_external());
544 assert_eq!(config.dimension(), 1536);
545 }
546
547 #[test]
548 fn test_validate_success() {
549 let config = Config::default();
550 assert!(config.validate().is_ok());
551 }
552
553 #[test]
554 fn test_validate_cache_size_zero() {
555 let config = Config {
556 cache_size_mb: 0,
557 ..Default::default()
558 };
559 let err = config.validate().unwrap_err();
560 assert!(
561 matches!(err, ValidationError::InvalidField { field, .. } if field == "cache_size_mb")
562 );
563 }
564
565 #[test]
566 fn test_validate_custom_dimension_zero() {
567 let config = Config {
568 embedding_dimension: EmbeddingDimension::Custom(0),
569 ..Default::default()
570 };
571 assert!(config.validate().is_err());
572 }
573
574 #[test]
575 fn test_validate_custom_dimension_too_large() {
576 let config = Config {
577 embedding_dimension: EmbeddingDimension::Custom(5000),
578 ..Default::default()
579 };
580 assert!(config.validate().is_err());
581 }
582
583 #[test]
584 fn test_validate_custom_dimension_valid() {
585 let config = Config {
586 embedding_dimension: EmbeddingDimension::Custom(1536),
587 ..Default::default()
588 };
589 assert!(config.validate().is_ok());
590 }
591
592 #[test]
593 fn test_embedding_dimension_sizes() {
594 assert_eq!(EmbeddingDimension::D384.size(), 384);
595 assert_eq!(EmbeddingDimension::D768.size(), 768);
596 assert_eq!(EmbeddingDimension::Custom(512).size(), 512);
597 }
598
599 #[test]
600 fn test_sync_mode_checks() {
601 assert!(!SyncMode::Normal.is_fast());
602 assert!(!SyncMode::Normal.is_paranoid());
603 assert!(SyncMode::Fast.is_fast());
604 assert!(SyncMode::Paranoid.is_paranoid());
605 }
606
607 #[test]
608 fn test_hnsw_config_defaults() {
609 let config = HnswConfig::default();
610 assert_eq!(config.max_nb_connection, 16);
611 assert_eq!(config.ef_construction, 200);
612 assert_eq!(config.ef_search, 50);
613 assert_eq!(config.max_layer, 16);
614 assert_eq!(config.max_elements, 10_000);
615 }
616
617 #[test]
618 fn test_config_includes_hnsw() {
619 let config = Config::default();
620 assert_eq!(config.hnsw.max_nb_connection, 16);
621 }
622
623 #[test]
624 fn test_validate_hnsw_zero_max_nb_connection() {
625 let config = Config {
626 hnsw: HnswConfig {
627 max_nb_connection: 0,
628 ..Default::default()
629 },
630 ..Default::default()
631 };
632 let err = config.validate().unwrap_err();
633 assert!(matches!(
634 err,
635 ValidationError::InvalidField { field, .. } if field == "hnsw.max_nb_connection"
636 ));
637 }
638
639 #[test]
640 fn test_validate_hnsw_zero_ef_construction() {
641 let config = Config {
642 hnsw: HnswConfig {
643 ef_construction: 0,
644 ..Default::default()
645 },
646 ..Default::default()
647 };
648 assert!(config.validate().is_err());
649 }
650
651 #[test]
652 fn test_validate_hnsw_zero_ef_search() {
653 let config = Config {
654 hnsw: HnswConfig {
655 ef_search: 0,
656 ..Default::default()
657 },
658 ..Default::default()
659 };
660 assert!(config.validate().is_err());
661 }
662
663 #[test]
664 fn test_embedding_dimension_serialization() {
665 let dim = EmbeddingDimension::D768;
666 let bytes = bincode::serialize(&dim).unwrap();
667 let restored: EmbeddingDimension = bincode::deserialize(&bytes).unwrap();
668 assert_eq!(dim, restored);
669 }
670
671 #[test]
672 fn test_watch_config_defaults() {
673 let config = WatchConfig::default();
674 assert!(config.in_process);
675 assert_eq!(config.buffer_size, 1000);
676 assert_eq!(config.poll_interval_ms, 100);
677 }
678
679 #[test]
680 fn test_validate_watch_zero_buffer_size() {
681 let config = Config {
682 watch: WatchConfig {
683 buffer_size: 0,
684 ..Default::default()
685 },
686 ..Default::default()
687 };
688 let err = config.validate().unwrap_err();
689 assert!(matches!(
690 err,
691 ValidationError::InvalidField { field, .. } if field == "watch.buffer_size"
692 ));
693 }
694
695 #[test]
696 fn test_validate_watch_zero_poll_interval() {
697 let config = Config {
698 watch: WatchConfig {
699 poll_interval_ms: 0,
700 ..Default::default()
701 },
702 ..Default::default()
703 };
704 let err = config.validate().unwrap_err();
705 assert!(matches!(
706 err,
707 ValidationError::InvalidField { field, .. } if field == "watch.poll_interval_ms"
708 ));
709 }
710}