semantic-memory 0.5.0

Hybrid semantic search with SQLite, FTS5, and HNSW — built for AI agents
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
use crate::error::MemoryError;
use crate::tokenizer::TokenCounter;
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;

/// Configuration for the memory system.
#[derive(Clone, Serialize, Deserialize)]
pub struct MemoryConfig {
    /// Base directory for all storage files (SQLite + HNSW sidecar files).
    /// Replaces the v0.1.0 `database_path` field.
    pub base_dir: PathBuf,

    /// Embedding provider configuration.
    pub embedding: EmbeddingConfig,

    /// Search tuning parameters.
    pub search: SearchConfig,

    /// Chunking parameters.
    pub chunking: ChunkingConfig,

    /// Connection pool configuration.
    pub pool: PoolConfig,

    /// Resource limits.
    pub limits: MemoryLimits,

    /// Custom token counter. None = use EstimateTokenCounter (chars / 4).
    #[serde(skip)]
    pub token_counter: Option<Arc<dyn TokenCounter>>,

    /// HNSW index configuration.
    #[cfg(feature = "hnsw")]
    #[serde(skip)]
    pub hnsw: crate::hnsw::HnswConfig,
}

impl std::fmt::Debug for MemoryConfig {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let mut s = f.debug_struct("MemoryConfig");
        s.field("base_dir", &self.base_dir)
            .field("embedding", &self.embedding)
            .field("search", &self.search)
            .field("chunking", &self.chunking)
            .field("pool", &self.pool)
            .field("limits", &self.limits)
            .field(
                "token_counter",
                &self.token_counter.as_ref().map(|_| "custom"),
            );
        #[cfg(feature = "hnsw")]
        s.field("hnsw", &self.hnsw);
        s.finish()
    }
}

impl Default for MemoryConfig {
    fn default() -> Self {
        Self {
            base_dir: PathBuf::from("memory"),
            embedding: EmbeddingConfig::default(),
            search: SearchConfig::default(),
            chunking: ChunkingConfig::default(),
            pool: PoolConfig::default(),
            limits: MemoryLimits::default(),
            token_counter: None,
            #[cfg(feature = "hnsw")]
            hnsw: crate::hnsw::HnswConfig::default(),
        }
    }
}

impl MemoryConfig {
    /// Normalize and validate configuration into a concrete runtime shape.
    ///
    /// This is the single canonical config entry point used by store creation.
    pub fn normalize_and_validate(mut self) -> Result<Self, MemoryError> {
        self.embedding.normalize_and_validate()?;
        self.limits = self.limits.normalize_and_validate()?;
        let timeout_cap_secs = self.limits.embedding_timeout.as_secs().max(1);
        self.embedding.timeout_secs = self.embedding.timeout_secs.min(timeout_cap_secs);
        self.search.normalize_and_validate()?;
        self.chunking.normalize_and_validate()?;
        self.pool.normalize_and_validate()?;
        #[cfg(feature = "hnsw")]
        {
            self.hnsw.dimensions = self.embedding.dimensions;
        }
        Ok(self)
    }
}

/// Embedding provider configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EmbeddingConfig {
    /// Ollama base URL.
    pub ollama_url: String,

    /// Embedding model name.
    pub model: String,

    /// Expected embedding dimensions.
    pub dimensions: usize,

    /// Maximum texts to embed in a single API call.
    pub batch_size: usize,

    /// Timeout for embedding requests in seconds.
    pub timeout_secs: u64,
}

impl Default for EmbeddingConfig {
    fn default() -> Self {
        Self {
            ollama_url: "http://localhost:11434".to_string(),
            model: "nomic-embed-text".to_string(),
            dimensions: 768,
            batch_size: 32,
            timeout_secs: 30,
        }
    }
}

impl EmbeddingConfig {
    fn normalize_and_validate(&mut self) -> Result<(), MemoryError> {
        if self.dimensions == 0 {
            return Err(MemoryError::InvalidConfig {
                field: "embedding.dimensions",
                reason: "dimensions must be at least 1".to_string(),
            });
        }
        if self.batch_size == 0 {
            self.batch_size = 1;
        }
        if self.timeout_secs == 0 {
            self.timeout_secs = 1;
        }
        let parsed =
            reqwest::Url::parse(&self.ollama_url).map_err(|_| MemoryError::InvalidConfig {
                field: "embedding.ollama_url",
                reason: "must be an absolute http:// or https:// URL".to_string(),
            })?;
        match parsed.scheme() {
            "http" | "https" if parsed.host_str().is_some() => {}
            _ => {
                return Err(MemoryError::InvalidConfig {
                    field: "embedding.ollama_url",
                    reason: "must be an absolute http:// or https:// URL".to_string(),
                })
            }
        }
        Ok(())
    }
}

/// Search tuning parameters.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchConfig {
    /// Weight for BM25 score in RRF fusion.
    pub bm25_weight: f64,

    /// Weight for vector similarity in RRF fusion.
    pub vector_weight: f64,

    /// RRF constant (k). Controls rank importance decay.
    pub rrf_k: f64,

    /// Number of candidates from each search method before fusion.
    pub candidate_pool_size: usize,

    /// Default number of results to return.
    pub default_top_k: usize,

    /// Minimum cosine similarity threshold for vector candidates.
    pub min_similarity: f64,

    /// Optional recency boost. If enabled, results are boosted based on how
    /// recently they were created/updated. The value is the half-life in days —
    /// a fact that is `recency_half_life_days` old gets 50% of the recency boost.
    /// None = no recency weighting (current behavior, default).
    pub recency_half_life_days: Option<f64>,

    /// Weight of the recency boost relative to BM25 and vector scores in RRF.
    /// Only used when recency_half_life_days is Some.
    /// Default: 0.5
    pub recency_weight: f64,

    /// When true, rerank top HNSW candidates using exact f32 cosine similarity
    /// from SQLite. Improves recall at the cost of one batched SQL query.
    /// Only applies when HNSW feature is enabled.
    /// Default: true
    pub rerank_from_f32: bool,
}

impl Default for SearchConfig {
    fn default() -> Self {
        Self {
            bm25_weight: 1.0,
            vector_weight: 1.0,
            rrf_k: 60.0,
            candidate_pool_size: 50,
            default_top_k: 5,
            min_similarity: 0.3,
            recency_half_life_days: None,
            recency_weight: 0.5,
            rerank_from_f32: true,
        }
    }
}

impl SearchConfig {
    fn normalize_and_validate(&mut self) -> Result<(), MemoryError> {
        if self.candidate_pool_size == 0 {
            self.candidate_pool_size = 1;
        }
        if self.default_top_k == 0 {
            self.default_top_k = 1;
        }
        self.candidate_pool_size = self.candidate_pool_size.max(self.default_top_k);
        if !self.rrf_k.is_finite() || self.rrf_k <= 0.0 {
            return Err(MemoryError::InvalidConfig {
                field: "search.rrf_k",
                reason: "rrf_k must be finite and > 0".to_string(),
            });
        }
        if !self.bm25_weight.is_finite() || self.bm25_weight < 0.0 {
            return Err(MemoryError::InvalidConfig {
                field: "search.bm25_weight",
                reason: "bm25_weight must be finite and >= 0".to_string(),
            });
        }
        if !self.vector_weight.is_finite() || self.vector_weight < 0.0 {
            return Err(MemoryError::InvalidConfig {
                field: "search.vector_weight",
                reason: "vector_weight must be finite and >= 0".to_string(),
            });
        }
        if !self.recency_weight.is_finite() || self.recency_weight < 0.0 {
            return Err(MemoryError::InvalidConfig {
                field: "search.recency_weight",
                reason: "recency_weight must be finite and >= 0".to_string(),
            });
        }
        if !self.min_similarity.is_finite() || !(-1.0..=1.0).contains(&self.min_similarity) {
            return Err(MemoryError::InvalidConfig {
                field: "search.min_similarity",
                reason: "min_similarity must be finite and within [-1.0, 1.0]".to_string(),
            });
        }
        if matches!(self.recency_half_life_days, Some(v) if !v.is_finite()) {
            return Err(MemoryError::InvalidConfig {
                field: "search.recency_half_life_days",
                reason: "recency_half_life_days must be finite".to_string(),
            });
        }
        if matches!(self.recency_half_life_days, Some(v) if v <= 0.0) {
            return Err(MemoryError::InvalidConfig {
                field: "search.recency_half_life_days",
                reason: "recency_half_life_days must be > 0 when enabled".to_string(),
            });
        }
        Ok(())
    }
}

/// Text chunking parameters.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChunkingConfig {
    /// Target chunk size in characters.
    pub target_size: usize,

    /// Minimum chunk size. Chunks smaller than this are merged with neighbors.
    pub min_size: usize,

    /// Maximum chunk size. Chunks larger than this are force-split.
    pub max_size: usize,

    /// Overlap between adjacent chunks in characters.
    pub overlap: usize,
}

impl Default for ChunkingConfig {
    fn default() -> Self {
        Self {
            target_size: 1000,
            min_size: 100,
            max_size: 2000,
            overlap: 200,
        }
    }
}

impl ChunkingConfig {
    fn normalize_and_validate(&mut self) -> Result<(), MemoryError> {
        if self.min_size == 0 {
            self.min_size = 1;
        }
        if self.max_size == 0 {
            return Err(MemoryError::InvalidConfig {
                field: "chunking.max_size",
                reason: "max_size must be at least 1".to_string(),
            });
        }
        if self.max_size < self.min_size {
            return Err(MemoryError::InvalidConfig {
                field: "chunking.max_size",
                reason: "max_size must be >= min_size".to_string(),
            });
        }
        if self.target_size < self.min_size {
            self.target_size = self.min_size;
        }
        if self.target_size > self.max_size {
            self.target_size = self.max_size;
        }
        if self.overlap >= self.min_size {
            self.overlap = self.min_size.saturating_sub(1);
        }
        Ok(())
    }
}

/// Connection pool configuration for SQLite.
///
/// Controls busy timeout and WAL checkpoint behavior. These defaults
/// are tuned for a single-process server on local SSD storage.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PoolConfig {
    /// SQLite busy timeout in milliseconds.
    /// Default: 5000 (5 seconds).
    pub busy_timeout_ms: u32,

    /// WAL auto-checkpoint threshold in pages.
    /// Default: 1000 (~4 MB with 4KB pages).
    pub wal_autocheckpoint: u32,

    /// Enable WAL mode. Should almost always be true.
    /// Default: true.
    pub enable_wal: bool,

    /// Number of reader connections kept in the pool.
    /// Writes still flow through a single writer connection because SQLite
    /// allows only one concurrent writer, but readers can proceed concurrently
    /// under WAL semantics.
    pub max_read_connections: usize,

    /// Timeout in seconds for acquiring a reader connection from the pool.
    /// Default: 30 seconds.
    pub reader_timeout_secs: u64,
}

impl Default for PoolConfig {
    fn default() -> Self {
        Self {
            busy_timeout_ms: 5000,
            wal_autocheckpoint: 1000,
            enable_wal: true,
            max_read_connections: 4,
            reader_timeout_secs: 30,
        }
    }
}

impl PoolConfig {
    fn normalize_and_validate(&mut self) -> Result<(), MemoryError> {
        if self.busy_timeout_ms == 0 {
            self.busy_timeout_ms = 1;
        }
        if self.wal_autocheckpoint == 0 {
            self.wal_autocheckpoint = 1;
        }
        if self.max_read_connections == 0 {
            return Err(MemoryError::InvalidConfig {
                field: "pool.max_read_connections",
                reason: "set pool.max_read_connections to at least 1".to_string(),
            });
        }
        if self.reader_timeout_secs == 0 {
            self.reader_timeout_secs = 1;
        }
        self.reader_timeout_secs = self.reader_timeout_secs.min(300);
        Ok(())
    }
}

/// Resource limits for the memory system.
///
/// Prevents runaway resource usage. All limits have defaults tuned for
/// a laptop-class server (8GB RAM, SSD storage).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryLimits {
    /// Maximum number of facts per namespace.
    /// Default: 100_000.
    pub max_facts_per_namespace: usize,

    /// Maximum number of chunks per document.
    /// Default: 1_000.
    pub max_chunks_per_document: usize,

    /// Maximum content size in bytes for a single fact or message.
    /// Default: 1 MB (1_048_576 bytes).
    pub max_content_bytes: usize,

    /// Maximum number of concurrent embedding requests.
    /// Hard-capped at 32 regardless of config.
    /// Default: 8.
    pub max_embedding_concurrency: usize,

    /// Maximum total database size in bytes. 0 = unlimited.
    /// Default: 0 (unlimited).
    pub max_db_size_bytes: u64,

    /// Embedding request timeout.
    /// Default: 30 seconds.
    #[serde(with = "duration_secs")]
    pub embedding_timeout: Duration,
}

impl Default for MemoryLimits {
    fn default() -> Self {
        Self {
            max_facts_per_namespace: 100_000,
            max_chunks_per_document: 1_000,
            max_content_bytes: 1_048_576,
            max_embedding_concurrency: 8,
            max_db_size_bytes: 0,
            embedding_timeout: Duration::from_secs(30),
        }
    }
}

impl MemoryLimits {
    /// Normalize and validate limits to hard caps.
    pub fn normalize_and_validate(mut self) -> Result<Self, MemoryError> {
        if self.max_facts_per_namespace == 0 {
            return Err(MemoryError::InvalidConfig {
                field: "limits.max_facts_per_namespace",
                reason: "must be at least 1".to_string(),
            });
        }
        if self.max_chunks_per_document == 0 {
            return Err(MemoryError::InvalidConfig {
                field: "limits.max_chunks_per_document",
                reason: "must be at least 1".to_string(),
            });
        }
        if self.max_content_bytes == 0 {
            return Err(MemoryError::InvalidConfig {
                field: "limits.max_content_bytes",
                reason: "must be at least 1".to_string(),
            });
        }
        // Hard cap: concurrency at 32
        if self.max_embedding_concurrency > 32 {
            self.max_embedding_concurrency = 32;
        }
        if self.max_embedding_concurrency == 0 {
            self.max_embedding_concurrency = 1;
        }
        if self.embedding_timeout.is_zero() {
            self.embedding_timeout = Duration::from_secs(1);
        }
        Ok(self)
    }

    /// Backward-compatible alias for callers that only need clamped limits.
    ///
    /// Falls back to defaults if the caller-provided limits are invalid.
    /// Default limits are infallible so the fallback path cannot fail.
    pub fn validated(self) -> Self {
        self.normalize_and_validate().unwrap_or_else(|err| {
            tracing::warn!(
                error = %err,
                "invalid MemoryLimits supplied to validated(); using defaults"
            );
            // Default limits are always valid — this path is infallible.
            let defaults = Self::default();
            Self {
                max_facts_per_namespace: defaults.max_facts_per_namespace,
                max_chunks_per_document: defaults.max_chunks_per_document,
                max_content_bytes: defaults.max_content_bytes,
                max_embedding_concurrency: defaults.max_embedding_concurrency.clamp(1, 32),
                max_db_size_bytes: defaults.max_db_size_bytes,
                embedding_timeout: if defaults.embedding_timeout.is_zero() {
                    std::time::Duration::from_secs(1)
                } else {
                    defaults.embedding_timeout
                },
            }
        })
    }
}

mod duration_secs {
    use serde::{Deserialize, Deserializer, Serializer};
    use std::time::Duration;

    pub fn serialize<S: Serializer>(d: &Duration, s: S) -> Result<S::Ok, S::Error> {
        s.serialize_u64(d.as_secs())
    }

    pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<Duration, D::Error> {
        let secs = u64::deserialize(d)?;
        Ok(Duration::from_secs(secs))
    }
}