zeph-memory 0.18.4

Semantic memory with SQLite and Qdrant for Zeph agent
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
// SPDX-License-Identifier: MIT OR Apache-2.0

//! Persona fact extraction from conversation history (#2461).
//!
//! Uses a cheap LLM provider to extract user attributes (preferences, domain knowledge,
//! working style) from recent user messages. Supports contradiction resolution via
//! `supersedes_id`: when an extracted fact contradicts an existing one in the same
//! category, the LLM classifies it as NEW or UPDATE and returns the id of the old fact
//! to supersede.

use std::time::Duration;

use serde::{Deserialize, Serialize};
use tokio::time::timeout;
use zeph_llm::any::AnyProvider;
use zeph_llm::provider::{Message, Role};

use crate::error::MemoryError;
use crate::store::DbStore;
use crate::store::persona::PersonaFactRow;

const EXTRACTION_SYSTEM_PROMPT: &str = "\
You are a persona fact extractor. Given a list of user messages and any existing persona \
facts for each category, extract factual claims the user makes about themselves: their \
preferences, domain knowledge, working style, communication style, and background.

Rules:
1. Only extract facts from first-person user statements (\"I prefer\", \"I work on\", \
   \"my team\", \"I use\", etc.). Ignore assistant messages.
2. Do NOT extract facts from questions, greetings, or tool outputs.
3. For each extracted fact, decide if it is NEW (no existing fact contradicts it) or \
   UPDATE (it contradicts or replaces an existing fact). For UPDATE, provide the \
   `supersedes_id` of the older fact.
4. Confidence: 0.8-1.0 for explicit statements (\"I prefer X\"), 0.4-0.7 for inferences.
5. Categories: preference, domain_knowledge, working_style, communication, background.
6. Keep content concise (one sentence max). Normalize to English.
7. Return empty array if no facts are found.

Output JSON array of objects:
[
  {
    \"category\": \"preference|domain_knowledge|working_style|communication|background\",
    \"content\": \"concise factual statement\",
    \"confidence\": 0.0-1.0,
    \"action\": \"new|update\",
    \"supersedes_id\": null or integer id of the fact being replaced
  }
]";

/// Configuration for persona extraction.
pub struct PersonaExtractionConfig {
    pub enabled: bool,
    /// Provider name from `[[llm.providers]]` for extraction. Falls back to default when empty.
    pub persona_provider: String,
    /// Minimum user messages in a session before extraction runs.
    pub min_messages: usize,
    /// Maximum user messages sent to LLM per extraction pass.
    pub max_messages: usize,
    /// LLM timeout for the extraction call.
    pub extraction_timeout_secs: u64,
}

#[derive(Debug, Deserialize, Serialize)]
struct ExtractedFact {
    category: String,
    content: String,
    confidence: f64,
    action: String,
    supersedes_id: Option<i64>,
}

/// Self-referential language heuristic: only run extraction if user messages contain
/// first-person pronouns, which strongly indicates personal facts may be present.
#[must_use]
pub fn contains_self_referential_language(text: &str) -> bool {
    // Simple word-boundary check for common first-person tokens.
    // Lowercase the text once; patterns use lowercase literals.
    let lower = text.to_lowercase();
    let tokens = [" i ", " i'", " my ", " me ", " mine ", "i am ", "i'm "];
    tokens.iter().any(|t| lower.contains(t)) || lower.starts_with("i ") || lower.starts_with("my ")
}

/// Extract persona facts from recent user messages.
///
/// Returns the number of facts upserted.
///
/// # Errors
///
/// Returns an error only for transport-level LLM failures. Parse failures are logged
/// and treated as zero facts extracted (graceful degradation).
pub async fn extract_persona_facts(
    store: &DbStore,
    provider: &AnyProvider,
    user_messages: &[&str],
    config: &PersonaExtractionConfig,
    conversation_id: Option<i64>,
) -> Result<usize, MemoryError> {
    if !config.enabled || user_messages.len() < config.min_messages {
        return Ok(0);
    }

    // Gate: skip if none of the messages contain self-referential language.
    let has_self_ref = user_messages
        .iter()
        .any(|m| contains_self_referential_language(m));
    if !has_self_ref {
        return Ok(0);
    }

    let messages_to_send: Vec<&str> = user_messages
        .iter()
        .rev()
        .take(config.max_messages)
        .copied()
        .collect::<Vec<_>>()
        .into_iter()
        .rev()
        .collect();

    // Load existing facts to include in the prompt for contradiction detection.
    let existing_facts = store.load_persona_facts(0.0).await?;
    let user_prompt = build_extraction_prompt(&messages_to_send, &existing_facts);

    let llm_messages = [
        Message::from_legacy(Role::System, EXTRACTION_SYSTEM_PROMPT),
        Message::from_legacy(Role::User, user_prompt),
    ];

    let extraction_timeout = Duration::from_secs(config.extraction_timeout_secs);
    // Use the configured provider name; fall back to "persona" label if empty.
    let provider_name = if config.persona_provider.is_empty() {
        "persona"
    } else {
        &config.persona_provider
    };
    let response = match timeout(
        extraction_timeout,
        provider.chat_with_named_provider(provider_name, &llm_messages),
    )
    .await
    {
        Ok(Ok(text)) => text,
        Ok(Err(e)) => return Err(MemoryError::Llm(e)),
        Err(_) => {
            tracing::warn!(
                "persona extraction timed out after {}s",
                config.extraction_timeout_secs
            );
            return Ok(0);
        }
    };

    let facts = parse_extraction_response(&response);
    if facts.is_empty() {
        return Ok(0);
    }

    let mut upserted = 0usize;
    for fact in facts {
        if fact.category.is_empty() || fact.content.is_empty() {
            continue;
        }
        if !is_valid_category(&fact.category) {
            tracing::debug!(
                category = %fact.category,
                "persona extraction: skipping unknown category"
            );
            continue;
        }
        match store
            .upsert_persona_fact(
                &fact.category,
                &fact.content,
                fact.confidence.clamp(0.0, 1.0),
                conversation_id,
                fact.supersedes_id,
            )
            .await
        {
            Ok(_) => upserted += 1,
            Err(e) => {
                tracing::warn!(error = %e, "persona extraction: failed to upsert fact");
            }
        }
    }

    tracing::debug!(upserted, "persona extraction complete");
    Ok(upserted)
}

fn is_valid_category(category: &str) -> bool {
    matches!(
        category,
        "preference" | "domain_knowledge" | "working_style" | "communication" | "background"
    )
}

fn build_extraction_prompt(messages: &[&str], existing_facts: &[PersonaFactRow]) -> String {
    let mut prompt = String::from("User messages to analyze:\n");
    for (i, msg) in messages.iter().enumerate() {
        use std::fmt::Write as _;
        let _ = writeln!(prompt, "[{}] {}", i + 1, msg);
    }

    if !existing_facts.is_empty() {
        prompt.push_str("\nExisting persona facts (for contradiction detection):\n");
        for fact in existing_facts {
            use std::fmt::Write as _;
            let _ = writeln!(
                prompt,
                "  id={} category={} content=\"{}\"",
                fact.id, fact.category, fact.content
            );
        }
    }

    prompt.push_str("\nExtract persona facts as JSON array.");
    prompt
}

fn parse_extraction_response(response: &str) -> Vec<ExtractedFact> {
    // Try direct JSON array parse.
    if let Ok(facts) = serde_json::from_str::<Vec<ExtractedFact>>(response) {
        return facts;
    }

    // Try to find JSON array within the response (LLM may wrap in prose).
    if let (Some(start), Some(end)) = (response.find('['), response.rfind(']'))
        && end > start
    {
        let slice = &response[start..=end];
        if let Ok(facts) = serde_json::from_str::<Vec<ExtractedFact>>(slice) {
            return facts;
        }
    }

    tracing::warn!(
        "persona extraction: failed to parse LLM response (len={}): {:.200}",
        response.len(),
        response
    );
    Vec::new()
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::store::DbStore;

    async fn make_store() -> DbStore {
        DbStore::with_pool_size(":memory:", 1)
            .await
            .expect("in-memory store")
    }

    // --- contains_self_referential_language ---

    #[test]
    fn self_ref_detects_i_prefer() {
        assert!(contains_self_referential_language("I prefer dark mode"));
    }

    #[test]
    fn self_ref_detects_my_team() {
        assert!(contains_self_referential_language("my team uses Rust"));
    }

    #[test]
    fn self_ref_detects_sentence_starting_with_i() {
        assert!(contains_self_referential_language("I work remotely"));
    }

    #[test]
    fn self_ref_detects_inline_i() {
        assert!(contains_self_referential_language(
            "Sometimes I prefer async"
        ));
    }

    #[test]
    fn self_ref_detects_me_inline() {
        assert!(contains_self_referential_language(
            "That helps me understand"
        ));
    }

    #[test]
    fn self_ref_no_match_for_third_person() {
        assert!(!contains_self_referential_language("The team uses Python"));
    }

    #[test]
    fn self_ref_no_match_for_tool_output() {
        assert!(!contains_self_referential_language("Error: file not found"));
    }

    #[test]
    fn self_ref_no_match_for_empty_string() {
        assert!(!contains_self_referential_language(""));
    }

    // --- extraction gate: no LLM call when no self-referential language ---

    #[tokio::test]
    async fn extraction_gate_skips_when_no_self_ref() {
        let store = make_store().await;
        // Build a provider that always panics — it must never be called.
        // We use a real AnyProvider placeholder: since the gate fires before any
        // LLM call we just verify upserted == 0 without needing a mock provider.
        // Instead we use enabled=false to confirm the short-circuit path works,
        // and test the self-ref gate separately by passing non-self-ref messages.
        let cfg = PersonaExtractionConfig {
            enabled: true,
            persona_provider: String::new(),
            min_messages: 1,
            max_messages: 10,
            extraction_timeout_secs: 5,
        };
        // Messages with no first-person language — gate should fire and return 0.
        // We cannot construct AnyProvider in unit tests without real config, so we
        // verify the gate via the `contains_self_referential_language` function directly
        // (already tested above) and via the enabled=false path here.
        let cfg_disabled = PersonaExtractionConfig {
            enabled: false,
            persona_provider: String::new(),
            min_messages: 1,
            max_messages: 10,
            extraction_timeout_secs: 5,
        };
        // Use a dummy provider handle — it won't be called because enabled=false.
        // We can't easily construct AnyProvider in unit tests, so we test the
        // min_messages gate instead.
        let cfg_min = PersonaExtractionConfig {
            enabled: true,
            persona_provider: String::new(),
            min_messages: 5,
            max_messages: 10,
            extraction_timeout_secs: 5,
        };
        // Confirm: the function returns early (before LLM) if min_messages not met.
        // We pass an empty slice which is fewer than min_messages=5.
        // The function signature requires AnyProvider, so we just test the gate
        // logic indirectly through the public helper.
        let messages: Vec<&str> = vec![];
        assert!(messages.len() < cfg_min.min_messages);
        let _ = (store, cfg, cfg_disabled, cfg_min); // suppress unused warnings
    }

    // --- parse_extraction_response ---

    #[test]
    fn parse_direct_json_array() {
        let json = r#"[{"category":"preference","content":"I prefer dark mode","confidence":0.9,"action":"new","supersedes_id":null}]"#;
        let facts = parse_extraction_response(json);
        assert_eq!(facts.len(), 1);
        assert_eq!(facts[0].category, "preference");
        assert_eq!(facts[0].content, "I prefer dark mode");
        assert!((facts[0].confidence - 0.9).abs() < 1e-9);
        assert_eq!(facts[0].action, "new");
        assert!(facts[0].supersedes_id.is_none());
    }

    #[test]
    fn parse_json_embedded_in_prose() {
        let response = "Sure! Here are the facts:\n[{\"category\":\"domain_knowledge\",\"content\":\"Uses Rust\",\"confidence\":0.8,\"action\":\"new\",\"supersedes_id\":null}]\nThat's all.";
        let facts = parse_extraction_response(response);
        assert_eq!(facts.len(), 1);
        assert_eq!(facts[0].category, "domain_knowledge");
    }

    #[test]
    fn parse_empty_array() {
        let facts = parse_extraction_response("[]");
        assert!(facts.is_empty());
    }

    #[test]
    fn parse_invalid_json_returns_empty() {
        let facts = parse_extraction_response("not json at all");
        assert!(facts.is_empty());
    }

    #[test]
    fn parse_supersedes_id_populated() {
        let json = r#"[{"category":"preference","content":"I prefer dark mode","confidence":0.9,"action":"update","supersedes_id":7}]"#;
        let facts = parse_extraction_response(json);
        assert_eq!(facts[0].supersedes_id, Some(7));
        assert_eq!(facts[0].action, "update");
    }

    // --- contradiction resolution via store ---

    #[tokio::test]
    async fn contradiction_second_fact_supersedes_first() {
        let store = make_store().await;
        let old_id = store
            .upsert_persona_fact("preference", "I prefer light mode", 0.8, None, None)
            .await
            .expect("old fact");

        store
            .upsert_persona_fact("preference", "I prefer dark mode", 0.9, None, Some(old_id))
            .await
            .expect("new fact");

        let facts = store.load_persona_facts(0.0).await.expect("load");
        assert_eq!(facts.len(), 1, "superseded fact should be excluded");
        assert_eq!(facts[0].content, "I prefer dark mode");
    }
}