zeph-memory 0.19.1

Semantic memory with SQLite and Qdrant for Zeph agent
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
// SPDX-License-Identifier: MIT OR Apache-2.0

//! D-MEM RPE-based tiered graph extraction routing.
//!
//! Computes a heuristic "reward prediction error" (RPE) signal for each incoming turn.
//! Low-RPE turns (predictable, topically continuous, no new entities) skip the expensive
//! MAGMA LLM extraction pipeline. High-RPE turns proceed to full extraction.
//!
//! ## RPE formula
//!
//! ```text
//! RPE = 0.5 * (1 - max_cosine_similarity) + 0.5 * entity_novelty_ratio
//! ```
//!
//! Where:
//! - `max_cosine_similarity` = max cosine similarity between current turn embedding and last N
//!   turn embeddings. High = topically predictable.
//! - `entity_novelty_ratio` = fraction of candidate entity names not seen in recent history.
//!   0.0 if no entities extracted.
//!
//! ## Safety valve
//!
//! To prevent unbounded skipping, `consecutive_skips` is tracked. When it reaches
//! `max_skip_turns`, extraction is forced regardless of RPE score.

use std::collections::VecDeque;

use zeph_common::math::cosine_similarity;

/// Maximum number of recent turn embeddings to keep for context similarity computation.
pub const RPE_EMBEDDING_BUFFER_SIZE: usize = 10;

/// Number of recent entity names to keep in novelty history.
const ENTITY_HISTORY_SIZE: usize = 200;

/// RPE computation result for a single turn.
#[derive(Debug, Clone)]
pub struct RpeSignal {
    pub rpe_score: f32,
    pub context_similarity: f32,
    pub entity_novelty: f32,
    pub should_extract: bool,
}

/// Stateful RPE router. Tracks recent embeddings and entity history.
///
/// Protected by the caller's synchronization (typically held behind `Arc<Mutex<...>>`
/// at the `SemanticMemory` layer).
pub struct RpeRouter {
    recent_embeddings: VecDeque<Vec<f32>>,
    entity_history: VecDeque<String>,
    consecutive_skips: u32,
    /// RPE below this value → skip extraction. Range: `[0.0, 1.0]`.
    pub threshold: f32,
    /// Force extraction after this many consecutive skips. Default: 5.
    pub max_skip_turns: u32,
}

impl RpeRouter {
    #[must_use]
    pub fn new(threshold: f32, max_skip_turns: u32) -> Self {
        Self {
            recent_embeddings: VecDeque::with_capacity(RPE_EMBEDDING_BUFFER_SIZE),
            entity_history: VecDeque::with_capacity(ENTITY_HISTORY_SIZE),
            consecutive_skips: 0,
            threshold,
            max_skip_turns,
        }
    }

    /// Record a turn embedding. Called even when extraction is skipped, so context similarity
    /// remains up-to-date for the next turn.
    pub fn push_embedding(&mut self, embedding: Vec<f32>) {
        if self.recent_embeddings.len() >= RPE_EMBEDDING_BUFFER_SIZE {
            self.recent_embeddings.pop_front();
        }
        self.recent_embeddings.push_back(embedding);
    }

    /// Record entity names extracted (or candidate names from text) for novelty tracking.
    pub fn push_entities(&mut self, names: &[String]) {
        for name in names {
            if self.entity_history.len() >= ENTITY_HISTORY_SIZE {
                self.entity_history.pop_front();
            }
            self.entity_history.push_back(name.clone());
        }
    }

    /// Compute the RPE signal for the current turn.
    ///
    /// `turn_embedding` — embedding of the current message.
    /// `candidate_entities` — entity names extracted from the current message text (may be empty).
    ///
    /// Returns the RPE signal. When `recent_embeddings` is empty (cold start), returns
    /// `rpe_score = 1.0` and `should_extract = true`.
    #[must_use]
    pub fn compute(&mut self, turn_embedding: &[f32], candidate_entities: &[String]) -> RpeSignal {
        // Safety valve: force extraction after max_skip_turns consecutive skips.
        if self.consecutive_skips >= self.max_skip_turns {
            tracing::debug!(
                consecutive_skips = self.consecutive_skips,
                "D-MEM RPE: safety valve triggered, forcing extraction"
            );
            self.consecutive_skips = 0;
            return RpeSignal {
                rpe_score: 1.0,
                context_similarity: 0.0,
                entity_novelty: 1.0,
                should_extract: true,
            };
        }

        // Cold start: no history yet → always extract.
        if self.recent_embeddings.is_empty() {
            return RpeSignal {
                rpe_score: 1.0,
                context_similarity: 0.0,
                entity_novelty: 1.0,
                should_extract: true,
            };
        }

        // Context similarity: max cosine similarity to recent embeddings.
        let context_similarity = self
            .recent_embeddings
            .iter()
            .map(|emb| cosine_similarity(turn_embedding, emb))
            .fold(0.0f32, f32::max);

        // Entity novelty: fraction of candidate entities not in history.
        let entity_novelty = if candidate_entities.is_empty() {
            0.0
        } else {
            let novel = candidate_entities
                .iter()
                .filter(|e| !self.entity_history.contains(e))
                .count();
            #[allow(clippy::cast_precision_loss)]
            let ratio = novel as f32 / candidate_entities.len() as f32;
            ratio
        };

        let rpe_score = 0.5 * (1.0 - context_similarity) + 0.5 * entity_novelty;
        let should_extract = rpe_score >= self.threshold;

        if should_extract {
            self.consecutive_skips = 0;
        } else {
            self.consecutive_skips += 1;
            tracing::debug!(
                rpe_score,
                context_similarity,
                entity_novelty,
                consecutive_skips = self.consecutive_skips,
                "D-MEM RPE: low surprise, skipping graph extraction"
            );
        }

        RpeSignal {
            rpe_score,
            context_similarity,
            entity_novelty,
            should_extract,
        }
    }
}

// Lowercased known tech-domain terms that would be missed by capitalization heuristic.
const TECH_TERMS: &[&str] = &[
    "rust",
    "python",
    "go",
    "java",
    "kotlin",
    "swift",
    "ruby",
    "scala",
    "elixir",
    "haskell",
    "typescript",
    "javascript",
    "c",
    "c++",
    "cpp",
    "zig",
    "nim",
    "odin",
    "docker",
    "kubernetes",
    "k8s",
    "postgres",
    "sqlite",
    "redis",
    "kafka",
    "nginx",
    "linux",
    "macos",
    "windows",
    "android",
    "ios",
    "git",
    "cargo",
    "npm",
    "pip",
    "gradle",
    "cmake",
];

/// Extract candidate entity names from text using simple heuristics.
///
/// Captures capitalized tokens (length >= 3) that do NOT start the sentence.
/// Also captures lowercase technical terms known to be common entity types (programming
/// languages, tools). This is intentionally cheap — no LLM involved.
///
/// Returns lowercased names for comparison against stored canonical names.
#[must_use]
pub fn extract_candidate_entities(text: &str) -> Vec<String> {
    let mut candidates = Vec::new();
    let words: Vec<&str> = text.split_whitespace().collect();

    // Track sentence-start positions to avoid capturing "The", "This", etc.
    let mut sentence_starts: std::collections::HashSet<usize> = std::collections::HashSet::new();
    sentence_starts.insert(0);
    let mut prev_ends_sentence = true; // first word is always sentence-start
    for (idx, word) in words.iter().enumerate() {
        if prev_ends_sentence {
            sentence_starts.insert(idx);
        }
        prev_ends_sentence = word.ends_with('.') || word.ends_with('!') || word.ends_with('?');
    }

    // Collect capitalized non-sentence-start words >= 3 chars.
    for (idx, word) in words.iter().enumerate() {
        let clean: String = word
            .chars()
            .filter(|c| c.is_alphanumeric() || *c == '_' || *c == '-')
            .collect();
        if clean.len() < 3 || sentence_starts.contains(&idx) {
            continue;
        }
        // Skip pure-uppercase acronyms (API, HTTP, JSON).
        if clean.chars().all(char::is_uppercase) && clean.len() <= 5 {
            continue;
        }
        if clean.chars().next().is_some_and(char::is_uppercase) {
            candidates.push(clean.to_lowercase());
        }
    }

    // Add tech-domain terms found in the text (case-insensitive, word-boundary check).
    let text_lower = text.to_lowercase();
    for term in TECH_TERMS {
        let mut start = 0;
        while let Some(pos) = text_lower[start..].find(term) {
            let abs_pos = start + pos;
            let before_ok = abs_pos == 0
                || text_lower
                    .as_bytes()
                    .get(abs_pos - 1)
                    .is_none_or(|c| !c.is_ascii_alphanumeric() && *c != b'_');
            let after_ok = {
                let end = abs_pos + term.len();
                end >= text_lower.len()
                    || text_lower
                        .as_bytes()
                        .get(end)
                        .is_none_or(|c| !c.is_ascii_alphanumeric() && *c != b'_')
            };
            if before_ok && after_ok {
                let t = (*term).to_string();
                if !candidates.contains(&t) {
                    candidates.push(t);
                }
            }
            start = abs_pos + 1;
        }
    }

    // Deduplicate preserving order.
    let mut seen = std::collections::HashSet::new();
    candidates.retain(|c| seen.insert(c.clone()));
    candidates
}

#[cfg(test)]
mod tests {
    use super::*;

    fn make_embedding(val: f32, len: usize) -> Vec<f32> {
        vec![val; len]
    }

    #[test]
    fn rpe_cold_start_returns_one() {
        let mut router = RpeRouter::new(0.3, 5);
        let emb = make_embedding(0.5, 4);
        let signal = router.compute(&emb, &[]);
        assert!(signal.should_extract);
        assert!((signal.rpe_score - 1.0).abs() < 1e-6);
    }

    #[test]
    fn rpe_high_similarity_low_novelty_skips() {
        let mut router = RpeRouter::new(0.3, 5);
        let emb = make_embedding(1.0, 4);
        // Seed history with identical embedding.
        router.push_embedding(emb.clone());
        router.push_entities(&["rust".to_string()]);

        // Turn with same embedding and known entity → RPE near 0.
        let signal = router.compute(&emb, &["rust".to_string()]);
        // context_similarity = 1.0, entity_novelty = 0.0 → RPE = 0.0
        assert!(!signal.should_extract, "low-RPE turn should be skipped");
        assert!(signal.rpe_score < 0.3);
    }

    #[test]
    fn rpe_low_similarity_high_novelty_extracts() {
        let mut router = RpeRouter::new(0.3, 5);
        let prev = vec![1.0f32, 0.0, 0.0, 0.0];
        router.push_embedding(prev);

        // Orthogonal embedding + all-new entities.
        let curr = vec![0.0f32, 1.0, 0.0, 0.0];
        let signal = router.compute(&curr, &["NewFramework".to_string()]);
        // context_similarity = 0.0, entity_novelty = 1.0 → RPE = 1.0
        assert!(signal.should_extract);
        assert!((signal.rpe_score - 1.0).abs() < 1e-6);
    }

    #[test]
    fn rpe_max_skip_turns_forces_extraction() {
        let mut router = RpeRouter::new(0.3, 3);
        let emb = make_embedding(1.0, 4);
        router.push_embedding(emb.clone());
        router.push_entities(&["rust".to_string()]);

        // Force 3 skips.
        router.consecutive_skips = 3;
        let signal = router.compute(&emb, &["rust".to_string()]);
        assert!(signal.should_extract, "safety valve must force extraction");
        assert_eq!(
            router.consecutive_skips, 0,
            "counter must reset after safety valve"
        );
    }

    #[test]
    fn rpe_consecutive_skips_increments() {
        let mut router = RpeRouter::new(0.9, 10); // high threshold → easy to skip
        let emb = make_embedding(1.0, 4);
        router.push_embedding(emb.clone());
        router.push_entities(&["rust".to_string()]);

        let s = router.compute(&emb, &["rust".to_string()]);
        if !s.should_extract {
            assert_eq!(router.consecutive_skips, 1);
        }
    }

    #[test]
    fn extract_candidate_entities_captures_capitalized() {
        let text = "I use Tokio and Axum for async web development.";
        let entities = extract_candidate_entities(text);
        // "Tokio" and "Axum" are mid-sentence capitalized.
        assert!(
            entities.contains(&"tokio".to_string()),
            "expected tokio, got {entities:?}"
        );
        assert!(
            entities.contains(&"axum".to_string()),
            "expected axum, got {entities:?}"
        );
    }

    #[test]
    fn extract_candidate_entities_captures_tech_terms() {
        let text = "I write code in rust and use docker for deployment.";
        let entities = extract_candidate_entities(text);
        assert!(
            entities.contains(&"rust".to_string()),
            "expected rust, got {entities:?}"
        );
        assert!(
            entities.contains(&"docker".to_string()),
            "expected docker, got {entities:?}"
        );
    }

    #[test]
    fn extract_candidate_entities_ignores_sentence_start() {
        let text = "The project uses Rust. The team is growing.";
        let entities = extract_candidate_entities(text);
        // "The" appears at sentence start and should not be captured.
        assert!(!entities.contains(&"the".to_string()));
    }

    #[test]
    fn extract_candidate_entities_no_duplicates() {
        let text = "I use rust and I love rust and rust is great.";
        let entities = extract_candidate_entities(text);
        let count = entities.iter().filter(|e| e.as_str() == "rust").count();
        assert_eq!(
            count, 1,
            "rust should appear exactly once, got {entities:?}"
        );
    }
}