Skip to main content

pulsedb/
config.rs

1//! Configuration types for PulseDB.
2//!
3//! The [`Config`] struct controls database behavior including:
4//! - Embedding provider (builtin ONNX or external)
5//! - Embedding dimension (384, 768, or custom)
6//! - Cache size and durability settings
7//!
8//! # Example
9//! ```rust
10//! use pulsedb::{Config, EmbeddingProvider, EmbeddingDimension, SyncMode};
11//!
12//! // Use defaults (External provider, 384 dimensions)
13//! let config = Config::default();
14//!
15//! // Customize for production
16//! let config = Config {
17//!     embedding_dimension: EmbeddingDimension::D768,
18//!     cache_size_mb: 128,
19//!     sync_mode: SyncMode::Normal,
20//!     ..Default::default()
21//! };
22//! ```
23
24use std::path::PathBuf;
25use std::time::Duration;
26
27use serde::{Deserialize, Serialize};
28
29use crate::error::ValidationError;
30use crate::types::CollectiveId;
31
32/// Database configuration options.
33///
34/// All fields have sensible defaults. Use struct update syntax to override
35/// specific settings:
36///
37/// ```rust
38/// use pulsedb::Config;
39///
40/// let config = Config {
41///     cache_size_mb: 256,
42///     ..Default::default()
43/// };
44/// ```
45#[derive(Clone, Debug)]
46pub struct Config {
47    /// How embeddings are generated or provided.
48    pub embedding_provider: EmbeddingProvider,
49
50    /// Embedding vector dimension (must match provider output).
51    pub embedding_dimension: EmbeddingDimension,
52
53    /// Default collective for operations when none specified.
54    pub default_collective: Option<CollectiveId>,
55
56    /// Cache size in megabytes for the storage engine.
57    ///
58    /// Higher values improve read performance but use more memory.
59    /// Default: 64 MB
60    pub cache_size_mb: usize,
61
62    /// Durability mode for write operations.
63    pub sync_mode: SyncMode,
64
65    /// HNSW vector index parameters.
66    ///
67    /// Controls the quality and performance of semantic search.
68    /// See [`HnswConfig`] for tuning guidelines.
69    pub hnsw: HnswConfig,
70
71    /// Agent activity tracking parameters.
72    ///
73    /// Controls staleness detection for agent heartbeats.
74    /// See [`ActivityConfig`] for details.
75    pub activity: ActivityConfig,
76
77    /// Watch system parameters.
78    ///
79    /// Controls the in-process event notification channel.
80    /// See [`WatchConfig`] for details.
81    pub watch: WatchConfig,
82
83    /// Read-only mode.
84    ///
85    /// When `true`, all mutation methods (`record_experience`, `store_relation`,
86    /// etc.) return `PulseDBError::ReadOnly`. Read operations work normally.
87    ///
88    /// Use this for read-only consumers like PulseVision that open the same
89    /// database file a writer is using.
90    ///
91    /// Default: false
92    pub read_only: bool,
93}
94
95impl Default for Config {
96    fn default() -> Self {
97        Self {
98            // External is the safe default - no ONNX dependency required
99            embedding_provider: EmbeddingProvider::External,
100            // 384 matches all-MiniLM-L6-v2, the default builtin model
101            embedding_dimension: EmbeddingDimension::D384,
102            default_collective: None,
103            cache_size_mb: 64,
104            sync_mode: SyncMode::Normal,
105            hnsw: HnswConfig::default(),
106            activity: ActivityConfig::default(),
107            watch: WatchConfig::default(),
108            read_only: false,
109        }
110    }
111}
112
113impl Config {
114    /// Creates a new Config with default settings.
115    pub fn new() -> Self {
116        Self::default()
117    }
118
119    /// Creates a Config for read-only access.
120    ///
121    /// All mutation methods will return `PulseDBError::ReadOnly`.
122    /// Use this for read-only consumers like visualization tools that
123    /// open the same database file a writer is using.
124    ///
125    /// # Example
126    /// ```rust
127    /// use pulsedb::Config;
128    ///
129    /// let config = Config::read_only();
130    /// assert!(config.read_only);
131    /// ```
132    pub fn read_only() -> Self {
133        Self {
134            read_only: true,
135            ..Default::default()
136        }
137    }
138
139    /// Creates a Config for builtin embedding generation.
140    ///
141    /// This requires the `builtin-embeddings` feature to be enabled.
142    ///
143    /// # Example
144    /// ```rust
145    /// use pulsedb::Config;
146    ///
147    /// let config = Config::with_builtin_embeddings();
148    /// ```
149    pub fn with_builtin_embeddings() -> Self {
150        Self {
151            embedding_provider: EmbeddingProvider::Builtin { model_path: None },
152            ..Default::default()
153        }
154    }
155
156    /// Creates a Config for external embedding provider.
157    ///
158    /// When using external embeddings, you must provide pre-computed
159    /// embedding vectors when recording experiences.
160    ///
161    /// # Example
162    /// ```rust
163    /// use pulsedb::{Config, EmbeddingDimension};
164    ///
165    /// // OpenAI ada-002 uses 1536 dimensions
166    /// let config = Config::with_external_embeddings(EmbeddingDimension::Custom(1536));
167    /// ```
168    pub fn with_external_embeddings(dimension: EmbeddingDimension) -> Self {
169        Self {
170            embedding_provider: EmbeddingProvider::External,
171            embedding_dimension: dimension,
172            ..Default::default()
173        }
174    }
175
176    /// Validates the configuration.
177    ///
178    /// Called automatically by `PulseDB::open()`. You can also call this
179    /// explicitly to check configuration before attempting to open.
180    ///
181    /// # Errors
182    /// Returns `ValidationError` if:
183    /// - `cache_size_mb` is 0
184    /// - Custom dimension is 0 or > 4096
185    pub fn validate(&self) -> Result<(), ValidationError> {
186        // Cache size must be positive
187        if self.cache_size_mb == 0 {
188            return Err(ValidationError::invalid_field(
189                "cache_size_mb",
190                "must be greater than 0",
191            ));
192        }
193
194        // Validate HNSW parameters
195        if self.hnsw.max_nb_connection == 0 {
196            return Err(ValidationError::invalid_field(
197                "hnsw.max_nb_connection",
198                "must be greater than 0",
199            ));
200        }
201        if self.hnsw.ef_construction == 0 {
202            return Err(ValidationError::invalid_field(
203                "hnsw.ef_construction",
204                "must be greater than 0",
205            ));
206        }
207        if self.hnsw.ef_search == 0 {
208            return Err(ValidationError::invalid_field(
209                "hnsw.ef_search",
210                "must be greater than 0",
211            ));
212        }
213
214        // Validate watch buffer size
215        if self.watch.buffer_size == 0 {
216            return Err(ValidationError::invalid_field(
217                "watch.buffer_size",
218                "must be greater than 0",
219            ));
220        }
221        if self.watch.poll_interval_ms == 0 {
222            return Err(ValidationError::invalid_field(
223                "watch.poll_interval_ms",
224                "must be greater than 0",
225            ));
226        }
227
228        // Validate custom dimension bounds
229        if let EmbeddingDimension::Custom(dim) = self.embedding_dimension {
230            if dim == 0 {
231                return Err(ValidationError::invalid_field(
232                    "embedding_dimension",
233                    "custom dimension must be greater than 0",
234                ));
235            }
236            if dim > 4096 {
237                return Err(ValidationError::invalid_field(
238                    "embedding_dimension",
239                    "custom dimension must not exceed 4096",
240                ));
241            }
242        }
243
244        Ok(())
245    }
246
247    /// Returns the embedding dimension as a numeric value.
248    pub fn dimension(&self) -> usize {
249        self.embedding_dimension.size()
250    }
251}
252
253/// Embedding provider configuration.
254///
255/// Determines how embedding vectors are generated for experiences.
256#[derive(Clone, Debug)]
257pub enum EmbeddingProvider {
258    /// PulseDB generates embeddings using a built-in ONNX model.
259    ///
260    /// Requires the `builtin-embeddings` feature. The default model is
261    /// all-MiniLM-L6-v2 (384 dimensions).
262    Builtin {
263        /// Custom ONNX model path. If `None`, uses the bundled model.
264        model_path: Option<PathBuf>,
265    },
266
267    /// Caller provides pre-computed embedding vectors.
268    ///
269    /// Use this when you have your own embedding service (OpenAI, Cohere, etc.)
270    /// or want to use a model not bundled with PulseDB.
271    External,
272}
273
274impl EmbeddingProvider {
275    /// Returns true if this is the builtin provider.
276    pub fn is_builtin(&self) -> bool {
277        matches!(self, Self::Builtin { .. })
278    }
279
280    /// Returns true if this is the external provider.
281    pub fn is_external(&self) -> bool {
282        matches!(self, Self::External)
283    }
284}
285
286/// Embedding vector dimensions.
287///
288/// Standard dimensions are provided for common models. Use `Custom` for
289/// other embedding services.
290#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
291pub enum EmbeddingDimension {
292    /// 384 dimensions (all-MiniLM-L6-v2, default builtin model).
293    #[default]
294    D384,
295
296    /// 768 dimensions (bge-base-en-v1.5, BERT-base).
297    D768,
298
299    /// Custom dimension for other embedding models.
300    ///
301    /// Must be between 1 and 4096.
302    Custom(usize),
303}
304
305impl EmbeddingDimension {
306    /// Returns the numeric size of this dimension.
307    ///
308    /// # Example
309    /// ```rust
310    /// use pulsedb::EmbeddingDimension;
311    ///
312    /// assert_eq!(EmbeddingDimension::D384.size(), 384);
313    /// assert_eq!(EmbeddingDimension::D768.size(), 768);
314    /// assert_eq!(EmbeddingDimension::Custom(1536).size(), 1536);
315    /// ```
316    #[inline]
317    pub const fn size(&self) -> usize {
318        match self {
319            Self::D384 => 384,
320            Self::D768 => 768,
321            Self::Custom(n) => *n,
322        }
323    }
324}
325
326/// Durability mode for write operations.
327///
328/// Controls the trade-off between write performance and crash safety.
329#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
330pub enum SyncMode {
331    /// Sync to disk on transaction commit.
332    ///
333    /// This is the default and recommended setting. Provides good performance
334    /// while ensuring committed data survives crashes.
335    #[default]
336    Normal,
337
338    /// Async sync (faster writes, may lose recent data on crash).
339    ///
340    /// Use for development or when you can tolerate losing the last few
341    /// seconds of writes. Significantly faster than `Normal`.
342    Fast,
343
344    /// Sync every write operation (slowest, maximum durability).
345    ///
346    /// Use when data loss is absolutely unacceptable. Very slow for
347    /// high write volumes.
348    Paranoid,
349}
350
351impl SyncMode {
352    /// Returns true if this mode syncs on every write.
353    pub fn is_paranoid(&self) -> bool {
354        matches!(self, Self::Paranoid)
355    }
356
357    /// Returns true if this mode is async (may lose data on crash).
358    pub fn is_fast(&self) -> bool {
359        matches!(self, Self::Fast)
360    }
361}
362
363/// Configuration for the HNSW vector index.
364///
365/// Controls the trade-off between index build time, memory usage,
366/// and search accuracy. The defaults are tuned for PulseDB's target
367/// scale (10K-500K experiences per collective).
368///
369/// # Tuning Guide
370///
371/// | Use Case     | M  | ef_construction | ef_search |
372/// |--------------|----|-----------------|-----------|
373/// | Low memory   |  8 |             100 |        30 |
374/// | Balanced     | 16 |             200 |        50 |
375/// | High recall  | 32 |             400 |       100 |
376#[derive(Clone, Debug)]
377pub struct HnswConfig {
378    /// Maximum bidirectional connections per node (M parameter).
379    ///
380    /// Higher values improve recall but increase memory and build time.
381    /// Each node stores up to M links, so memory per node is O(M).
382    /// Default: 16
383    pub max_nb_connection: usize,
384
385    /// Number of candidates tracked during index construction.
386    ///
387    /// Higher values produce a better quality graph but slow down insertion.
388    /// Rule of thumb: ef_construction >= 2 * max_nb_connection.
389    /// Default: 200
390    pub ef_construction: usize,
391
392    /// Number of candidates tracked during search.
393    ///
394    /// Higher values improve recall but increase search latency.
395    /// Must be >= k (the number of results requested).
396    /// Default: 50
397    pub ef_search: usize,
398
399    /// Maximum number of layers in the skip-list structure.
400    ///
401    /// Lower layers are dense, upper layers are sparse "express lanes."
402    /// Default 16 handles datasets up to ~1M vectors with M=16.
403    /// Default: 16
404    pub max_layer: usize,
405
406    /// Initial pre-allocated capacity (number of vectors).
407    ///
408    /// The index grows beyond this automatically, but pre-allocation
409    /// avoids reallocations for known workloads.
410    /// Default: 10_000
411    pub max_elements: usize,
412}
413
414impl Default for HnswConfig {
415    fn default() -> Self {
416        Self {
417            max_nb_connection: 16,
418            ef_construction: 200,
419            ef_search: 50,
420            max_layer: 16,
421            max_elements: 10_000,
422        }
423    }
424}
425
426/// Configuration for agent activity tracking.
427///
428/// Controls how stale activities are detected and filtered.
429///
430/// # Example
431/// ```rust
432/// use std::time::Duration;
433/// use pulsedb::Config;
434///
435/// let config = Config {
436///     activity: pulsedb::ActivityConfig {
437///         stale_threshold: Duration::from_secs(120), // 2 minutes
438///     },
439///     ..Default::default()
440/// };
441/// ```
442#[derive(Clone, Debug)]
443pub struct ActivityConfig {
444    /// Duration after which an activity with no heartbeat is considered stale.
445    ///
446    /// Activities whose `last_heartbeat` is older than `now - stale_threshold`
447    /// are excluded from `get_active_agents()` results. They remain in storage
448    /// until explicitly ended or the collective is deleted.
449    ///
450    /// Default: 5 minutes (300 seconds)
451    pub stale_threshold: Duration,
452}
453
454impl Default for ActivityConfig {
455    fn default() -> Self {
456        Self {
457            stale_threshold: Duration::from_secs(300),
458        }
459    }
460}
461
462/// Configuration for the watch system (in-process and cross-process).
463///
464/// Controls whether in-process channel subscriptions are enabled, the
465/// channel buffer size for real-time experience notifications, and the
466/// poll interval for cross-process change detection.
467///
468/// # Example
469/// ```rust
470/// use pulsedb::Config;
471///
472/// let config = Config {
473///     watch: pulsedb::WatchConfig {
474///         in_process: true,
475///         buffer_size: 500,
476///         poll_interval_ms: 200,
477///     },
478///     ..Default::default()
479/// };
480/// ```
481#[derive(Clone, Debug)]
482pub struct WatchConfig {
483    /// Enable in-process watch subscriptions via crossbeam channels.
484    ///
485    /// When `true` (default), [`watch_experiences()`](crate::PulseDB::watch_experiences)
486    /// streams receive real-time events. When `false`, in-process event
487    /// dispatch is skipped entirely — only cross-process
488    /// [`poll_changes()`](crate::PulseDB::poll_changes) remains available.
489    ///
490    /// Default: true
491    pub in_process: bool,
492
493    /// Maximum number of events buffered per subscriber (in-process).
494    ///
495    /// When a subscriber's channel is full, new events are dropped for
496    /// that subscriber (with a warning log). The publisher never blocks.
497    ///
498    /// Default: 1000
499    pub buffer_size: usize,
500
501    /// Poll interval in milliseconds for cross-process change detection.
502    ///
503    /// Reader processes call `poll_changes()` at this interval to check
504    /// for new experiences written by the writer process.
505    ///
506    /// Default: 100
507    pub poll_interval_ms: u64,
508}
509
510impl Default for WatchConfig {
511    fn default() -> Self {
512        Self {
513            in_process: true,
514            buffer_size: 1000,
515            poll_interval_ms: 100,
516        }
517    }
518}
519
520#[cfg(test)]
521mod tests {
522    use super::*;
523
524    #[test]
525    fn test_default_config() {
526        let config = Config::default();
527        assert!(config.embedding_provider.is_external());
528        assert_eq!(config.embedding_dimension, EmbeddingDimension::D384);
529        assert_eq!(config.cache_size_mb, 64);
530        assert_eq!(config.sync_mode, SyncMode::Normal);
531        assert!(config.default_collective.is_none());
532    }
533
534    #[test]
535    fn test_with_builtin_embeddings() {
536        let config = Config::with_builtin_embeddings();
537        assert!(config.embedding_provider.is_builtin());
538    }
539
540    #[test]
541    fn test_with_external_embeddings() {
542        let config = Config::with_external_embeddings(EmbeddingDimension::Custom(1536));
543        assert!(config.embedding_provider.is_external());
544        assert_eq!(config.dimension(), 1536);
545    }
546
547    #[test]
548    fn test_validate_success() {
549        let config = Config::default();
550        assert!(config.validate().is_ok());
551    }
552
553    #[test]
554    fn test_validate_cache_size_zero() {
555        let config = Config {
556            cache_size_mb: 0,
557            ..Default::default()
558        };
559        let err = config.validate().unwrap_err();
560        assert!(
561            matches!(err, ValidationError::InvalidField { field, .. } if field == "cache_size_mb")
562        );
563    }
564
565    #[test]
566    fn test_validate_custom_dimension_zero() {
567        let config = Config {
568            embedding_dimension: EmbeddingDimension::Custom(0),
569            ..Default::default()
570        };
571        assert!(config.validate().is_err());
572    }
573
574    #[test]
575    fn test_validate_custom_dimension_too_large() {
576        let config = Config {
577            embedding_dimension: EmbeddingDimension::Custom(5000),
578            ..Default::default()
579        };
580        assert!(config.validate().is_err());
581    }
582
583    #[test]
584    fn test_validate_custom_dimension_valid() {
585        let config = Config {
586            embedding_dimension: EmbeddingDimension::Custom(1536),
587            ..Default::default()
588        };
589        assert!(config.validate().is_ok());
590    }
591
592    #[test]
593    fn test_embedding_dimension_sizes() {
594        assert_eq!(EmbeddingDimension::D384.size(), 384);
595        assert_eq!(EmbeddingDimension::D768.size(), 768);
596        assert_eq!(EmbeddingDimension::Custom(512).size(), 512);
597    }
598
599    #[test]
600    fn test_sync_mode_checks() {
601        assert!(!SyncMode::Normal.is_fast());
602        assert!(!SyncMode::Normal.is_paranoid());
603        assert!(SyncMode::Fast.is_fast());
604        assert!(SyncMode::Paranoid.is_paranoid());
605    }
606
607    #[test]
608    fn test_hnsw_config_defaults() {
609        let config = HnswConfig::default();
610        assert_eq!(config.max_nb_connection, 16);
611        assert_eq!(config.ef_construction, 200);
612        assert_eq!(config.ef_search, 50);
613        assert_eq!(config.max_layer, 16);
614        assert_eq!(config.max_elements, 10_000);
615    }
616
617    #[test]
618    fn test_config_includes_hnsw() {
619        let config = Config::default();
620        assert_eq!(config.hnsw.max_nb_connection, 16);
621    }
622
623    #[test]
624    fn test_validate_hnsw_zero_max_nb_connection() {
625        let config = Config {
626            hnsw: HnswConfig {
627                max_nb_connection: 0,
628                ..Default::default()
629            },
630            ..Default::default()
631        };
632        let err = config.validate().unwrap_err();
633        assert!(matches!(
634            err,
635            ValidationError::InvalidField { field, .. } if field == "hnsw.max_nb_connection"
636        ));
637    }
638
639    #[test]
640    fn test_validate_hnsw_zero_ef_construction() {
641        let config = Config {
642            hnsw: HnswConfig {
643                ef_construction: 0,
644                ..Default::default()
645            },
646            ..Default::default()
647        };
648        assert!(config.validate().is_err());
649    }
650
651    #[test]
652    fn test_validate_hnsw_zero_ef_search() {
653        let config = Config {
654            hnsw: HnswConfig {
655                ef_search: 0,
656                ..Default::default()
657            },
658            ..Default::default()
659        };
660        assert!(config.validate().is_err());
661    }
662
663    #[test]
664    fn test_embedding_dimension_serialization() {
665        let dim = EmbeddingDimension::D768;
666        let bytes = bincode::serialize(&dim).unwrap();
667        let restored: EmbeddingDimension = bincode::deserialize(&bytes).unwrap();
668        assert_eq!(dim, restored);
669    }
670
671    #[test]
672    fn test_watch_config_defaults() {
673        let config = WatchConfig::default();
674        assert!(config.in_process);
675        assert_eq!(config.buffer_size, 1000);
676        assert_eq!(config.poll_interval_ms, 100);
677    }
678
679    #[test]
680    fn test_validate_watch_zero_buffer_size() {
681        let config = Config {
682            watch: WatchConfig {
683                buffer_size: 0,
684                ..Default::default()
685            },
686            ..Default::default()
687        };
688        let err = config.validate().unwrap_err();
689        assert!(matches!(
690            err,
691            ValidationError::InvalidField { field, .. } if field == "watch.buffer_size"
692        ));
693    }
694
695    #[test]
696    fn test_validate_watch_zero_poll_interval() {
697        let config = Config {
698            watch: WatchConfig {
699                poll_interval_ms: 0,
700                ..Default::default()
701            },
702            ..Default::default()
703        };
704        let err = config.validate().unwrap_err();
705        assert!(matches!(
706            err,
707            ValidationError::InvalidField { field, .. } if field == "watch.poll_interval_ms"
708        ));
709    }
710}