Skip to main content

zeph_memory/
eviction.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Memory eviction subsystem.
5//!
6//! Provides a trait-based eviction policy framework with an Ebbinghaus
7//! forgetting curve implementation. The background sweep loop runs
8//! periodically, scoring entries and soft-deleting the lowest-scoring ones
9//! from `SQLite` before removing their `Qdrant` vectors in a second phase.
10//!
11//! Two-phase design ensures crash safety: soft-deleted `SQLite` rows are
12//! invisible to the application immediately, and `Qdrant` cleanup is retried
13//! on the next sweep if the agent crashes between phases.
14
15use std::sync::Arc;
16
17use tokio::task::JoinHandle;
18use tokio::time::{Duration, interval};
19use tokio_util::sync::CancellationToken;
20
21use crate::error::MemoryError;
22use crate::store::SqliteStore;
23use crate::types::MessageId;
24
25// ── Public types ──────────────────────────────────────────────────────────────
26
27/// Metadata for a single memory entry evaluated by [`EvictionPolicy::score`].
28#[derive(Debug, Clone)]
29pub struct EvictionEntry {
30    /// `SQLite` row ID of the message.
31    pub id: MessageId,
32    /// ISO 8601 creation timestamp (TEXT column from `SQLite`, UTC).
33    pub created_at: String,
34    /// ISO 8601 last-accessed timestamp, or `None` if never accessed after creation.
35    pub last_accessed: Option<String>,
36    /// Number of times this message has been retrieved via recall.
37    pub access_count: u32,
38}
39
40/// Trait for eviction scoring strategies.
41///
42/// Implementations must be `Send + Sync` so they can be shared across threads.
43pub trait EvictionPolicy: Send + Sync {
44    /// Compute a retention score for the given entry.
45    ///
46    /// Higher scores mean the entry is more likely to be retained.
47    /// Lower scores mean the entry is a candidate for eviction.
48    fn score(&self, entry: &EvictionEntry) -> f64;
49}
50
51/// Configuration for the eviction subsystem.
52#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
53#[serde(default)]
54pub struct EvictionConfig {
55    /// Policy name. Currently only `"ebbinghaus"` is supported.
56    pub policy: String,
57    /// Maximum number of entries to retain. `0` means unlimited (eviction disabled).
58    pub max_entries: usize,
59    /// How often to run the eviction sweep, in seconds.
60    pub sweep_interval_secs: u64,
61}
62
63impl Default for EvictionConfig {
64    fn default() -> Self {
65        Self {
66            policy: "ebbinghaus".to_owned(),
67            max_entries: 0,
68            sweep_interval_secs: 3600,
69        }
70    }
71}
72
73// ── Ebbinghaus policy ─────────────────────────────────────────────────────────
74
75/// Ebbinghaus forgetting curve eviction policy.
76///
77/// Score formula:
78///   `score = exp(-t / (S * ln(1 + n)))`
79///
80/// Where:
81/// - `t` = seconds since `last_accessed` (or `created_at` if never accessed)
82/// - `S` = `retention_strength` (higher = slower decay)
83/// - `n` = `access_count`
84///
85/// Entries with a high access count or recent access get higher scores
86/// and are less likely to be evicted.
87pub struct EbbinghausPolicy {
88    retention_strength: f64,
89}
90
91impl EbbinghausPolicy {
92    /// Create a new policy with the given retention strength.
93    ///
94    /// A good default is `86400.0` (one day in seconds).
95    #[must_use]
96    pub fn new(retention_strength: f64) -> Self {
97        Self { retention_strength }
98    }
99}
100
101impl Default for EbbinghausPolicy {
102    fn default() -> Self {
103        Self::new(86_400.0) // 1 day
104    }
105}
106
107impl EvictionPolicy for EbbinghausPolicy {
108    fn score(&self, entry: &EvictionEntry) -> f64 {
109        let now_secs = unix_now_secs();
110
111        let reference_secs = entry
112            .last_accessed
113            .as_deref()
114            .and_then(parse_sqlite_timestamp_secs)
115            .unwrap_or_else(|| parse_sqlite_timestamp_secs(&entry.created_at).unwrap_or(now_secs));
116
117        // Clamp t >= 0 to handle clock skew or future timestamps.
118        #[allow(clippy::cast_precision_loss)]
119        let t = now_secs.saturating_sub(reference_secs) as f64;
120        let n = f64::from(entry.access_count);
121
122        // ln(1 + 0) = 0 which would divide by zero — use 1.0 as minimum denominator.
123        let denominator = (self.retention_strength * (1.0_f64 + n).ln()).max(1.0);
124        (-t / denominator).exp()
125    }
126}
127
128fn unix_now_secs() -> u64 {
129    std::time::SystemTime::now()
130        .duration_since(std::time::UNIX_EPOCH)
131        .map(|d| d.as_secs())
132        .unwrap_or(0)
133}
134
135/// Parse a `SQLite` TEXT timestamp ("YYYY-MM-DD HH:MM:SS") into Unix seconds.
136///
137/// Does not use `chrono` to avoid adding a dependency to `zeph-memory`.
138fn parse_sqlite_timestamp_secs(s: &str) -> Option<u64> {
139    // Expected format: "YYYY-MM-DD HH:MM:SS"
140    let s = s.trim();
141    if s.len() < 19 {
142        return None;
143    }
144    let year: u64 = s[0..4].parse().ok()?;
145    let month: u64 = s[5..7].parse().ok()?;
146    let day: u64 = s[8..10].parse().ok()?;
147    let hour: u64 = s[11..13].parse().ok()?;
148    let min: u64 = s[14..16].parse().ok()?;
149    let sec: u64 = s[17..19].parse().ok()?;
150
151    // Days since Unix epoch (1970-01-01). Simple but accurate for years 1970-2099.
152    // Leap year calculation: divisible by 4 and not 100, or divisible by 400.
153    let is_leap = |y: u64| (y.is_multiple_of(4) && !y.is_multiple_of(100)) || y.is_multiple_of(400);
154    let days_in_month = |y: u64, m: u64| -> u64 {
155        match m {
156            1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
157            4 | 6 | 9 | 11 => 30,
158            2 => {
159                if is_leap(y) {
160                    29
161                } else {
162                    28
163                }
164            }
165            _ => 0,
166        }
167    };
168
169    let mut days: u64 = 0;
170    for y in 1970..year {
171        days += if is_leap(y) { 366 } else { 365 };
172    }
173    for m in 1..month {
174        days += days_in_month(year, m);
175    }
176    days += day.saturating_sub(1);
177
178    Some(days * 86400 + hour * 3600 + min * 60 + sec)
179}
180
181// ── Sweep loop ────────────────────────────────────────────────────────────────
182
183/// Start the background eviction loop.
184///
185/// The loop runs every `config.sweep_interval_secs` seconds. Each iteration:
186/// 1. Queries `SQLite` for all non-deleted entries and their eviction metadata.
187/// 2. Scores each entry using `policy`.
188/// 3. If the count exceeds `config.max_entries`, soft-deletes the excess lowest-scoring rows.
189/// 4. Queries for all soft-deleted rows and attempts to remove their Qdrant vectors.
190///    If Qdrant removal fails, it is retried on the next sweep cycle.
191///
192/// If `config.max_entries == 0`, the loop exits immediately without doing anything.
193///
194/// # Errors (non-fatal)
195///
196/// Database and Qdrant errors are logged but do not stop the loop.
197pub fn start_eviction_loop(
198    store: Arc<SqliteStore>,
199    config: &EvictionConfig,
200    policy: Arc<dyn EvictionPolicy + 'static>,
201    cancel: CancellationToken,
202) -> JoinHandle<()> {
203    let config = config.clone();
204    tokio::spawn(async move {
205        if config.max_entries == 0 {
206            tracing::debug!("eviction disabled (max_entries = 0)");
207            return;
208        }
209
210        let mut ticker = interval(Duration::from_secs(config.sweep_interval_secs));
211        // Skip the first immediate tick so the loop doesn't run at startup.
212        ticker.tick().await;
213
214        loop {
215            tokio::select! {
216                () = cancel.cancelled() => {
217                    tracing::debug!("eviction loop shutting down");
218                    return;
219                }
220                _ = ticker.tick() => {}
221            }
222
223            tracing::debug!(max_entries = config.max_entries, "running eviction sweep");
224
225            // Phase 1: score and soft-delete excess entries.
226            match run_eviction_phase1(&store, &*policy, config.max_entries).await {
227                Ok(deleted) => {
228                    if deleted > 0 {
229                        tracing::info!(deleted, "eviction phase 1: soft-deleted entries");
230                    }
231                }
232                Err(e) => {
233                    tracing::warn!(error = %e, "eviction phase 1 failed, will retry next sweep");
234                }
235            }
236
237            // Phase 2: clean up soft-deleted entries from Qdrant.
238            // On startup or after a crash, this also cleans up any orphaned vectors.
239            match run_eviction_phase2(&store).await {
240                Ok(cleaned) => {
241                    if cleaned > 0 {
242                        tracing::info!(cleaned, "eviction phase 2: removed Qdrant vectors");
243                    }
244                }
245                Err(e) => {
246                    tracing::warn!(error = %e, "eviction phase 2 failed, will retry next sweep");
247                }
248            }
249        }
250    })
251}
252
253async fn run_eviction_phase1(
254    store: &SqliteStore,
255    policy: &dyn EvictionPolicy,
256    max_entries: usize,
257) -> Result<usize, MemoryError> {
258    let candidates = store.get_eviction_candidates().await?;
259    let total = candidates.len();
260
261    if total <= max_entries {
262        return Ok(0);
263    }
264
265    let excess = total - max_entries;
266    let mut scored: Vec<(f64, MessageId)> = candidates
267        .into_iter()
268        .map(|e| (policy.score(&e), e.id))
269        .collect();
270
271    // Sort ascending by score — lowest scores (most forgettable) first.
272    scored.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
273
274    let ids_to_delete: Vec<MessageId> = scored.into_iter().take(excess).map(|(_, id)| id).collect();
275    store.soft_delete_messages(&ids_to_delete).await?;
276
277    Ok(ids_to_delete.len())
278}
279
280async fn run_eviction_phase2(store: &SqliteStore) -> Result<usize, MemoryError> {
281    // Find all soft-deleted entries that haven't been cleaned from Qdrant yet.
282    let ids = store.get_soft_deleted_message_ids().await?;
283    if ids.is_empty() {
284        return Ok(0);
285    }
286
287    // TODO: call Qdrant delete-vectors API here before marking as cleaned.
288    // The embedding_store handles vector lifecycle separately; when that API
289    // is wired in, the call should happen here and mark_qdrant_cleaned should
290    // only be called on success. Tracked in issue: phase-2 Qdrant cleanup.
291    tracing::warn!(
292        count = ids.len(),
293        "eviction phase 2: Qdrant vector removal not yet wired — marking cleaned without actual deletion (MVP)"
294    );
295
296    // Mark as cleaned after the (future) Qdrant call succeeds. For now this
297    // prevents infinite retries on every sweep cycle.
298    store.mark_qdrant_cleaned(&ids).await?;
299    Ok(ids.len())
300}
301
302// ── Tests ─────────────────────────────────────────────────────────────────────
303
304#[cfg(test)]
305mod tests {
306    use super::*;
307
308    /// Build a timestamp string for a time N seconds ago from now.
309    ///
310    /// Returns a string parseable by `parse_sqlite_timestamp_secs`.
311    fn ts_ago(seconds_ago: u64) -> String {
312        let ts = unix_now_secs().saturating_sub(seconds_ago);
313        // Convert back to "YYYY-MM-DD HH:MM:SS" using the same logic as parse_sqlite_timestamp_secs
314        let sec = ts % 60;
315        let min = (ts / 60) % 60;
316        let hour = (ts / 3600) % 24;
317        let mut total_days = ts / 86400;
318        let is_leap =
319            |y: u64| (y.is_multiple_of(4) && !y.is_multiple_of(100)) || y.is_multiple_of(400);
320        let mut year = 1970u64;
321        loop {
322            let days_in_year = if is_leap(year) { 366 } else { 365 };
323            if total_days < days_in_year {
324                break;
325            }
326            total_days -= days_in_year;
327            year += 1;
328        }
329        let month_days = [
330            0u64,
331            31,
332            28 + u64::from(is_leap(year)),
333            31,
334            30,
335            31,
336            30,
337            31,
338            31,
339            30,
340            31,
341            30,
342            31,
343        ];
344        let mut month = 1u64;
345        while month <= 12 {
346            let month_idx = usize::try_from(month).unwrap_or_else(|_| unreachable!());
347            if total_days < month_days[month_idx] {
348                break;
349            }
350            total_days -= month_days[month_idx];
351            month += 1;
352        }
353        let day = total_days + 1;
354        format!("{year:04}-{month:02}-{day:02} {hour:02}:{min:02}:{sec:02}")
355    }
356
357    fn make_entry(access_count: u32, seconds_ago: u64) -> EvictionEntry {
358        let ts = ts_ago(seconds_ago);
359        EvictionEntry {
360            id: MessageId(1),
361            created_at: ts.clone(),
362            last_accessed: Some(ts),
363            access_count,
364        }
365    }
366
367    #[test]
368    fn ebbinghaus_recent_high_access_scores_near_one() {
369        let policy = EbbinghausPolicy::default();
370        // Use 1 second ago to ensure t is close to 0
371        let entry = make_entry(10, 1);
372        let score = policy.score(&entry);
373        // t = 1, n = 10, denominator = 86400 * ln(11) ≈ 207_946; exp(-1/207_946) ≈ 1.0
374        assert!(
375            score > 0.99,
376            "score should be near 1.0 for recently accessed entry, got {score}"
377        );
378    }
379
380    #[test]
381    fn ebbinghaus_old_zero_access_scores_lower() {
382        let policy = EbbinghausPolicy::default();
383        let old = make_entry(0, 7 * 24 * 3600); // 7 days ago, never accessed
384        let recent = make_entry(0, 60); // 1 minute ago
385        assert!(
386            policy.score(&old) < policy.score(&recent),
387            "old entry must score lower than recent"
388        );
389    }
390
391    #[test]
392    fn ebbinghaus_high_access_decays_slower() {
393        let policy = EbbinghausPolicy::default();
394        let low = make_entry(1, 3600); // accessed 1 hour ago, 1 time
395        let high = make_entry(20, 3600); // accessed 1 hour ago, 20 times
396        assert!(
397            policy.score(&high) > policy.score(&low),
398            "high access count should yield higher score"
399        );
400    }
401
402    #[test]
403    fn ebbinghaus_never_accessed_uses_created_at_as_reference() {
404        let policy = EbbinghausPolicy::default();
405        // An old entry (7 days ago) with last_accessed = None.
406        // Score should be the same as make_entry(0, 7 days) because both use created_at.
407        let old_with_no_last_accessed = EvictionEntry {
408            id: MessageId(2),
409            created_at: ts_ago(7 * 24 * 3600),
410            last_accessed: None,
411            access_count: 0,
412        };
413        let old_with_same_last_accessed = make_entry(0, 7 * 24 * 3600);
414        let score_no_access = policy.score(&old_with_no_last_accessed);
415        let score_same = policy.score(&old_with_same_last_accessed);
416        // Both reference the same time; scores should be approximately equal
417        let diff = (score_no_access - score_same).abs();
418        assert!(diff < 1e-6, "scores should match; diff = {diff}");
419    }
420
421    #[test]
422    fn eviction_config_default_is_disabled() {
423        let config = EvictionConfig::default();
424        assert_eq!(
425            config.max_entries, 0,
426            "eviction must be disabled by default"
427        );
428    }
429
430    #[test]
431    fn parse_sqlite_timestamp_known_value() {
432        // 2024-01-01 00:00:00 UTC
433        let ts = parse_sqlite_timestamp_secs("2024-01-01 00:00:00").unwrap();
434        // Days from 1970 to 2024: 54 years, roughly
435        // Reference: 2024-01-01 00:00:00 UTC = 1704067200
436        assert_eq!(
437            ts, 1_704_067_200,
438            "2024-01-01 must parse to known timestamp"
439        );
440    }
441}