Skip to main content

pulsedb/
config.rs

1//! Configuration types for PulseDB.
2//!
3//! The [`Config`] struct controls database behavior including:
4//! - Embedding provider (builtin ONNX or external)
5//! - Embedding dimension (384, 768, or custom)
6//! - Cache size and durability settings
7//!
8//! # Example
9//! ```rust
10//! use pulsedb::{Config, EmbeddingProvider, EmbeddingDimension, SyncMode};
11//!
12//! // Use defaults (External provider, 384 dimensions)
13//! let config = Config::default();
14//!
15//! // Customize for production
16//! let config = Config {
17//!     embedding_dimension: EmbeddingDimension::D768,
18//!     cache_size_mb: 128,
19//!     sync_mode: SyncMode::Normal,
20//!     ..Default::default()
21//! };
22//! ```
23
24use std::path::PathBuf;
25use std::time::Duration;
26
27use serde::{Deserialize, Serialize};
28
29use crate::error::ValidationError;
30use crate::types::CollectiveId;
31
32/// Database configuration options.
33///
34/// All fields have sensible defaults. Use struct update syntax to override
35/// specific settings:
36///
37/// ```rust
38/// use pulsedb::Config;
39///
40/// let config = Config {
41///     cache_size_mb: 256,
42///     ..Default::default()
43/// };
44/// ```
45#[derive(Clone, Debug)]
46pub struct Config {
47    /// How embeddings are generated or provided.
48    pub embedding_provider: EmbeddingProvider,
49
50    /// Embedding vector dimension (must match provider output).
51    pub embedding_dimension: EmbeddingDimension,
52
53    /// Default collective for operations when none specified.
54    pub default_collective: Option<CollectiveId>,
55
56    /// Cache size in megabytes for the storage engine.
57    ///
58    /// Higher values improve read performance but use more memory.
59    /// Default: 64 MB
60    pub cache_size_mb: usize,
61
62    /// Durability mode for write operations.
63    pub sync_mode: SyncMode,
64
65    /// HNSW vector index parameters.
66    ///
67    /// Controls the quality and performance of semantic search.
68    /// See [`HnswConfig`] for tuning guidelines.
69    pub hnsw: HnswConfig,
70
71    /// Agent activity tracking parameters.
72    ///
73    /// Controls staleness detection for agent heartbeats.
74    /// See [`ActivityConfig`] for details.
75    pub activity: ActivityConfig,
76
77    /// Watch system parameters.
78    ///
79    /// Controls the in-process event notification channel.
80    /// See [`WatchConfig`] for details.
81    pub watch: WatchConfig,
82}
83
84impl Default for Config {
85    fn default() -> Self {
86        Self {
87            // External is the safe default - no ONNX dependency required
88            embedding_provider: EmbeddingProvider::External,
89            // 384 matches all-MiniLM-L6-v2, the default builtin model
90            embedding_dimension: EmbeddingDimension::D384,
91            default_collective: None,
92            cache_size_mb: 64,
93            sync_mode: SyncMode::Normal,
94            hnsw: HnswConfig::default(),
95            activity: ActivityConfig::default(),
96            watch: WatchConfig::default(),
97        }
98    }
99}
100
101impl Config {
102    /// Creates a new Config with default settings.
103    pub fn new() -> Self {
104        Self::default()
105    }
106
107    /// Creates a Config for builtin embedding generation.
108    ///
109    /// This requires the `builtin-embeddings` feature to be enabled.
110    ///
111    /// # Example
112    /// ```rust
113    /// use pulsedb::Config;
114    ///
115    /// let config = Config::with_builtin_embeddings();
116    /// ```
117    pub fn with_builtin_embeddings() -> Self {
118        Self {
119            embedding_provider: EmbeddingProvider::Builtin { model_path: None },
120            ..Default::default()
121        }
122    }
123
124    /// Creates a Config for external embedding provider.
125    ///
126    /// When using external embeddings, you must provide pre-computed
127    /// embedding vectors when recording experiences.
128    ///
129    /// # Example
130    /// ```rust
131    /// use pulsedb::{Config, EmbeddingDimension};
132    ///
133    /// // OpenAI ada-002 uses 1536 dimensions
134    /// let config = Config::with_external_embeddings(EmbeddingDimension::Custom(1536));
135    /// ```
136    pub fn with_external_embeddings(dimension: EmbeddingDimension) -> Self {
137        Self {
138            embedding_provider: EmbeddingProvider::External,
139            embedding_dimension: dimension,
140            ..Default::default()
141        }
142    }
143
144    /// Validates the configuration.
145    ///
146    /// Called automatically by `PulseDB::open()`. You can also call this
147    /// explicitly to check configuration before attempting to open.
148    ///
149    /// # Errors
150    /// Returns `ValidationError` if:
151    /// - `cache_size_mb` is 0
152    /// - Custom dimension is 0 or > 4096
153    pub fn validate(&self) -> Result<(), ValidationError> {
154        // Cache size must be positive
155        if self.cache_size_mb == 0 {
156            return Err(ValidationError::invalid_field(
157                "cache_size_mb",
158                "must be greater than 0",
159            ));
160        }
161
162        // Validate HNSW parameters
163        if self.hnsw.max_nb_connection == 0 {
164            return Err(ValidationError::invalid_field(
165                "hnsw.max_nb_connection",
166                "must be greater than 0",
167            ));
168        }
169        if self.hnsw.ef_construction == 0 {
170            return Err(ValidationError::invalid_field(
171                "hnsw.ef_construction",
172                "must be greater than 0",
173            ));
174        }
175        if self.hnsw.ef_search == 0 {
176            return Err(ValidationError::invalid_field(
177                "hnsw.ef_search",
178                "must be greater than 0",
179            ));
180        }
181
182        // Validate watch buffer size
183        if self.watch.buffer_size == 0 {
184            return Err(ValidationError::invalid_field(
185                "watch.buffer_size",
186                "must be greater than 0",
187            ));
188        }
189        if self.watch.poll_interval_ms == 0 {
190            return Err(ValidationError::invalid_field(
191                "watch.poll_interval_ms",
192                "must be greater than 0",
193            ));
194        }
195
196        // Validate custom dimension bounds
197        if let EmbeddingDimension::Custom(dim) = self.embedding_dimension {
198            if dim == 0 {
199                return Err(ValidationError::invalid_field(
200                    "embedding_dimension",
201                    "custom dimension must be greater than 0",
202                ));
203            }
204            if dim > 4096 {
205                return Err(ValidationError::invalid_field(
206                    "embedding_dimension",
207                    "custom dimension must not exceed 4096",
208                ));
209            }
210        }
211
212        Ok(())
213    }
214
215    /// Returns the embedding dimension as a numeric value.
216    pub fn dimension(&self) -> usize {
217        self.embedding_dimension.size()
218    }
219}
220
221/// Embedding provider configuration.
222///
223/// Determines how embedding vectors are generated for experiences.
224#[derive(Clone, Debug)]
225pub enum EmbeddingProvider {
226    /// PulseDB generates embeddings using a built-in ONNX model.
227    ///
228    /// Requires the `builtin-embeddings` feature. The default model is
229    /// all-MiniLM-L6-v2 (384 dimensions).
230    Builtin {
231        /// Custom ONNX model path. If `None`, uses the bundled model.
232        model_path: Option<PathBuf>,
233    },
234
235    /// Caller provides pre-computed embedding vectors.
236    ///
237    /// Use this when you have your own embedding service (OpenAI, Cohere, etc.)
238    /// or want to use a model not bundled with PulseDB.
239    External,
240}
241
242impl EmbeddingProvider {
243    /// Returns true if this is the builtin provider.
244    pub fn is_builtin(&self) -> bool {
245        matches!(self, Self::Builtin { .. })
246    }
247
248    /// Returns true if this is the external provider.
249    pub fn is_external(&self) -> bool {
250        matches!(self, Self::External)
251    }
252}
253
254/// Embedding vector dimensions.
255///
256/// Standard dimensions are provided for common models. Use `Custom` for
257/// other embedding services.
258#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
259pub enum EmbeddingDimension {
260    /// 384 dimensions (all-MiniLM-L6-v2, default builtin model).
261    #[default]
262    D384,
263
264    /// 768 dimensions (bge-base-en-v1.5, BERT-base).
265    D768,
266
267    /// Custom dimension for other embedding models.
268    ///
269    /// Must be between 1 and 4096.
270    Custom(usize),
271}
272
273impl EmbeddingDimension {
274    /// Returns the numeric size of this dimension.
275    ///
276    /// # Example
277    /// ```rust
278    /// use pulsedb::EmbeddingDimension;
279    ///
280    /// assert_eq!(EmbeddingDimension::D384.size(), 384);
281    /// assert_eq!(EmbeddingDimension::D768.size(), 768);
282    /// assert_eq!(EmbeddingDimension::Custom(1536).size(), 1536);
283    /// ```
284    #[inline]
285    pub const fn size(&self) -> usize {
286        match self {
287            Self::D384 => 384,
288            Self::D768 => 768,
289            Self::Custom(n) => *n,
290        }
291    }
292}
293
294/// Durability mode for write operations.
295///
296/// Controls the trade-off between write performance and crash safety.
297#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
298pub enum SyncMode {
299    /// Sync to disk on transaction commit.
300    ///
301    /// This is the default and recommended setting. Provides good performance
302    /// while ensuring committed data survives crashes.
303    #[default]
304    Normal,
305
306    /// Async sync (faster writes, may lose recent data on crash).
307    ///
308    /// Use for development or when you can tolerate losing the last few
309    /// seconds of writes. Significantly faster than `Normal`.
310    Fast,
311
312    /// Sync every write operation (slowest, maximum durability).
313    ///
314    /// Use when data loss is absolutely unacceptable. Very slow for
315    /// high write volumes.
316    Paranoid,
317}
318
319impl SyncMode {
320    /// Returns true if this mode syncs on every write.
321    pub fn is_paranoid(&self) -> bool {
322        matches!(self, Self::Paranoid)
323    }
324
325    /// Returns true if this mode is async (may lose data on crash).
326    pub fn is_fast(&self) -> bool {
327        matches!(self, Self::Fast)
328    }
329}
330
331/// Configuration for the HNSW vector index.
332///
333/// Controls the trade-off between index build time, memory usage,
334/// and search accuracy. The defaults are tuned for PulseDB's target
335/// scale (10K-500K experiences per collective).
336///
337/// # Tuning Guide
338///
339/// | Use Case     | M  | ef_construction | ef_search |
340/// |--------------|----|-----------------|-----------|
341/// | Low memory   |  8 |             100 |        30 |
342/// | Balanced     | 16 |             200 |        50 |
343/// | High recall  | 32 |             400 |       100 |
344#[derive(Clone, Debug)]
345pub struct HnswConfig {
346    /// Maximum bidirectional connections per node (M parameter).
347    ///
348    /// Higher values improve recall but increase memory and build time.
349    /// Each node stores up to M links, so memory per node is O(M).
350    /// Default: 16
351    pub max_nb_connection: usize,
352
353    /// Number of candidates tracked during index construction.
354    ///
355    /// Higher values produce a better quality graph but slow down insertion.
356    /// Rule of thumb: ef_construction >= 2 * max_nb_connection.
357    /// Default: 200
358    pub ef_construction: usize,
359
360    /// Number of candidates tracked during search.
361    ///
362    /// Higher values improve recall but increase search latency.
363    /// Must be >= k (the number of results requested).
364    /// Default: 50
365    pub ef_search: usize,
366
367    /// Maximum number of layers in the skip-list structure.
368    ///
369    /// Lower layers are dense, upper layers are sparse "express lanes."
370    /// Default 16 handles datasets up to ~1M vectors with M=16.
371    /// Default: 16
372    pub max_layer: usize,
373
374    /// Initial pre-allocated capacity (number of vectors).
375    ///
376    /// The index grows beyond this automatically, but pre-allocation
377    /// avoids reallocations for known workloads.
378    /// Default: 10_000
379    pub max_elements: usize,
380}
381
382impl Default for HnswConfig {
383    fn default() -> Self {
384        Self {
385            max_nb_connection: 16,
386            ef_construction: 200,
387            ef_search: 50,
388            max_layer: 16,
389            max_elements: 10_000,
390        }
391    }
392}
393
394/// Configuration for agent activity tracking.
395///
396/// Controls how stale activities are detected and filtered.
397///
398/// # Example
399/// ```rust
400/// use std::time::Duration;
401/// use pulsedb::Config;
402///
403/// let config = Config {
404///     activity: pulsedb::ActivityConfig {
405///         stale_threshold: Duration::from_secs(120), // 2 minutes
406///     },
407///     ..Default::default()
408/// };
409/// ```
410#[derive(Clone, Debug)]
411pub struct ActivityConfig {
412    /// Duration after which an activity with no heartbeat is considered stale.
413    ///
414    /// Activities whose `last_heartbeat` is older than `now - stale_threshold`
415    /// are excluded from `get_active_agents()` results. They remain in storage
416    /// until explicitly ended or the collective is deleted.
417    ///
418    /// Default: 5 minutes (300 seconds)
419    pub stale_threshold: Duration,
420}
421
422impl Default for ActivityConfig {
423    fn default() -> Self {
424        Self {
425            stale_threshold: Duration::from_secs(300),
426        }
427    }
428}
429
430/// Configuration for the watch system (in-process and cross-process).
431///
432/// Controls whether in-process channel subscriptions are enabled, the
433/// channel buffer size for real-time experience notifications, and the
434/// poll interval for cross-process change detection.
435///
436/// # Example
437/// ```rust
438/// use pulsedb::Config;
439///
440/// let config = Config {
441///     watch: pulsedb::WatchConfig {
442///         in_process: true,
443///         buffer_size: 500,
444///         poll_interval_ms: 200,
445///     },
446///     ..Default::default()
447/// };
448/// ```
449#[derive(Clone, Debug)]
450pub struct WatchConfig {
451    /// Enable in-process watch subscriptions via crossbeam channels.
452    ///
453    /// When `true` (default), [`watch_experiences()`](crate::PulseDB::watch_experiences)
454    /// streams receive real-time events. When `false`, in-process event
455    /// dispatch is skipped entirely — only cross-process
456    /// [`poll_changes()`](crate::PulseDB::poll_changes) remains available.
457    ///
458    /// Default: true
459    pub in_process: bool,
460
461    /// Maximum number of events buffered per subscriber (in-process).
462    ///
463    /// When a subscriber's channel is full, new events are dropped for
464    /// that subscriber (with a warning log). The publisher never blocks.
465    ///
466    /// Default: 1000
467    pub buffer_size: usize,
468
469    /// Poll interval in milliseconds for cross-process change detection.
470    ///
471    /// Reader processes call `poll_changes()` at this interval to check
472    /// for new experiences written by the writer process.
473    ///
474    /// Default: 100
475    pub poll_interval_ms: u64,
476}
477
478impl Default for WatchConfig {
479    fn default() -> Self {
480        Self {
481            in_process: true,
482            buffer_size: 1000,
483            poll_interval_ms: 100,
484        }
485    }
486}
487
488#[cfg(test)]
489mod tests {
490    use super::*;
491
492    #[test]
493    fn test_default_config() {
494        let config = Config::default();
495        assert!(config.embedding_provider.is_external());
496        assert_eq!(config.embedding_dimension, EmbeddingDimension::D384);
497        assert_eq!(config.cache_size_mb, 64);
498        assert_eq!(config.sync_mode, SyncMode::Normal);
499        assert!(config.default_collective.is_none());
500    }
501
502    #[test]
503    fn test_with_builtin_embeddings() {
504        let config = Config::with_builtin_embeddings();
505        assert!(config.embedding_provider.is_builtin());
506    }
507
508    #[test]
509    fn test_with_external_embeddings() {
510        let config = Config::with_external_embeddings(EmbeddingDimension::Custom(1536));
511        assert!(config.embedding_provider.is_external());
512        assert_eq!(config.dimension(), 1536);
513    }
514
515    #[test]
516    fn test_validate_success() {
517        let config = Config::default();
518        assert!(config.validate().is_ok());
519    }
520
521    #[test]
522    fn test_validate_cache_size_zero() {
523        let config = Config {
524            cache_size_mb: 0,
525            ..Default::default()
526        };
527        let err = config.validate().unwrap_err();
528        assert!(
529            matches!(err, ValidationError::InvalidField { field, .. } if field == "cache_size_mb")
530        );
531    }
532
533    #[test]
534    fn test_validate_custom_dimension_zero() {
535        let config = Config {
536            embedding_dimension: EmbeddingDimension::Custom(0),
537            ..Default::default()
538        };
539        assert!(config.validate().is_err());
540    }
541
542    #[test]
543    fn test_validate_custom_dimension_too_large() {
544        let config = Config {
545            embedding_dimension: EmbeddingDimension::Custom(5000),
546            ..Default::default()
547        };
548        assert!(config.validate().is_err());
549    }
550
551    #[test]
552    fn test_validate_custom_dimension_valid() {
553        let config = Config {
554            embedding_dimension: EmbeddingDimension::Custom(1536),
555            ..Default::default()
556        };
557        assert!(config.validate().is_ok());
558    }
559
560    #[test]
561    fn test_embedding_dimension_sizes() {
562        assert_eq!(EmbeddingDimension::D384.size(), 384);
563        assert_eq!(EmbeddingDimension::D768.size(), 768);
564        assert_eq!(EmbeddingDimension::Custom(512).size(), 512);
565    }
566
567    #[test]
568    fn test_sync_mode_checks() {
569        assert!(!SyncMode::Normal.is_fast());
570        assert!(!SyncMode::Normal.is_paranoid());
571        assert!(SyncMode::Fast.is_fast());
572        assert!(SyncMode::Paranoid.is_paranoid());
573    }
574
575    #[test]
576    fn test_hnsw_config_defaults() {
577        let config = HnswConfig::default();
578        assert_eq!(config.max_nb_connection, 16);
579        assert_eq!(config.ef_construction, 200);
580        assert_eq!(config.ef_search, 50);
581        assert_eq!(config.max_layer, 16);
582        assert_eq!(config.max_elements, 10_000);
583    }
584
585    #[test]
586    fn test_config_includes_hnsw() {
587        let config = Config::default();
588        assert_eq!(config.hnsw.max_nb_connection, 16);
589    }
590
591    #[test]
592    fn test_validate_hnsw_zero_max_nb_connection() {
593        let config = Config {
594            hnsw: HnswConfig {
595                max_nb_connection: 0,
596                ..Default::default()
597            },
598            ..Default::default()
599        };
600        let err = config.validate().unwrap_err();
601        assert!(matches!(
602            err,
603            ValidationError::InvalidField { field, .. } if field == "hnsw.max_nb_connection"
604        ));
605    }
606
607    #[test]
608    fn test_validate_hnsw_zero_ef_construction() {
609        let config = Config {
610            hnsw: HnswConfig {
611                ef_construction: 0,
612                ..Default::default()
613            },
614            ..Default::default()
615        };
616        assert!(config.validate().is_err());
617    }
618
619    #[test]
620    fn test_validate_hnsw_zero_ef_search() {
621        let config = Config {
622            hnsw: HnswConfig {
623                ef_search: 0,
624                ..Default::default()
625            },
626            ..Default::default()
627        };
628        assert!(config.validate().is_err());
629    }
630
631    #[test]
632    fn test_embedding_dimension_serialization() {
633        let dim = EmbeddingDimension::D768;
634        let bytes = bincode::serialize(&dim).unwrap();
635        let restored: EmbeddingDimension = bincode::deserialize(&bytes).unwrap();
636        assert_eq!(dim, restored);
637    }
638
639    #[test]
640    fn test_watch_config_defaults() {
641        let config = WatchConfig::default();
642        assert!(config.in_process);
643        assert_eq!(config.buffer_size, 1000);
644        assert_eq!(config.poll_interval_ms, 100);
645    }
646
647    #[test]
648    fn test_validate_watch_zero_buffer_size() {
649        let config = Config {
650            watch: WatchConfig {
651                buffer_size: 0,
652                ..Default::default()
653            },
654            ..Default::default()
655        };
656        let err = config.validate().unwrap_err();
657        assert!(matches!(
658            err,
659            ValidationError::InvalidField { field, .. } if field == "watch.buffer_size"
660        ));
661    }
662
663    #[test]
664    fn test_validate_watch_zero_poll_interval() {
665        let config = Config {
666            watch: WatchConfig {
667                poll_interval_ms: 0,
668                ..Default::default()
669            },
670            ..Default::default()
671        };
672        let err = config.validate().unwrap_err();
673        assert!(matches!(
674            err,
675            ValidationError::InvalidField { field, .. } if field == "watch.poll_interval_ms"
676        ));
677    }
678}