Skip to main content

zeph_memory/
semantic.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use zeph_llm::any::AnyProvider;
5use zeph_llm::provider::{LlmProvider, Message, MessageMetadata, Role};
6
7use std::sync::Arc;
8#[cfg(feature = "graph-memory")]
9use std::sync::atomic::{AtomicU64, Ordering};
10
11use crate::embedding_store::{EmbeddingStore, MessageKind, SearchFilter};
12use crate::error::MemoryError;
13use crate::sqlite::SqliteStore;
14use crate::token_counter::TokenCounter;
15use crate::types::{ConversationId, MessageId};
16use crate::vector_store::{FieldCondition, FieldValue, VectorFilter};
17
18const SESSION_SUMMARIES_COLLECTION: &str = "zeph_session_summaries";
19const KEY_FACTS_COLLECTION: &str = "zeph_key_facts";
20const CORRECTIONS_COLLECTION: &str = "zeph_corrections";
21
22#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, schemars::JsonSchema)]
23pub struct StructuredSummary {
24    pub summary: String,
25    pub key_facts: Vec<String>,
26    pub entities: Vec<String>,
27}
28
29#[derive(Debug)]
30pub struct RecalledMessage {
31    pub message: Message,
32    pub score: f32,
33}
34
35#[derive(Debug, Clone)]
36pub struct Summary {
37    pub id: i64,
38    pub conversation_id: ConversationId,
39    pub content: String,
40    pub first_message_id: MessageId,
41    pub last_message_id: MessageId,
42    pub token_estimate: i64,
43}
44
45#[derive(Debug, Clone)]
46pub struct SessionSummaryResult {
47    pub summary_text: String,
48    pub score: f32,
49    pub conversation_id: ConversationId,
50}
51
52fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
53    if a.len() != b.len() || a.is_empty() {
54        return 0.0;
55    }
56    let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
57    let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
58    let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
59    if norm_a == 0.0 || norm_b == 0.0 {
60        return 0.0;
61    }
62    dot / (norm_a * norm_b)
63}
64
65fn apply_temporal_decay(
66    ranked: &mut [(MessageId, f64)],
67    timestamps: &std::collections::HashMap<MessageId, i64>,
68    half_life_days: u32,
69) {
70    if half_life_days == 0 {
71        return;
72    }
73    let now = std::time::SystemTime::now()
74        .duration_since(std::time::UNIX_EPOCH)
75        .unwrap_or_default()
76        .as_secs()
77        .cast_signed();
78    let lambda = std::f64::consts::LN_2 / f64::from(half_life_days);
79
80    for (msg_id, score) in ranked.iter_mut() {
81        if let Some(&ts) = timestamps.get(msg_id) {
82            #[allow(clippy::cast_precision_loss)]
83            let age_days = (now - ts).max(0) as f64 / 86400.0;
84            *score *= (-lambda * age_days).exp();
85        }
86    }
87}
88
89fn apply_mmr(
90    ranked: &[(MessageId, f64)],
91    vectors: &std::collections::HashMap<MessageId, Vec<f32>>,
92    lambda: f32,
93    limit: usize,
94) -> Vec<(MessageId, f64)> {
95    if ranked.is_empty() || limit == 0 {
96        return Vec::new();
97    }
98
99    let lambda = f64::from(lambda);
100    let mut selected: Vec<(MessageId, f64)> = Vec::with_capacity(limit);
101    let mut remaining: Vec<(MessageId, f64)> = ranked.to_vec();
102
103    while selected.len() < limit && !remaining.is_empty() {
104        let best_idx = if selected.is_empty() {
105            // Pick highest relevance first
106            0
107        } else {
108            let mut best = 0usize;
109            let mut best_score = f64::NEG_INFINITY;
110
111            for (i, &(cand_id, relevance)) in remaining.iter().enumerate() {
112                let max_sim = if let Some(cand_vec) = vectors.get(&cand_id) {
113                    selected
114                        .iter()
115                        .filter_map(|(sel_id, _)| vectors.get(sel_id))
116                        .map(|sel_vec| f64::from(cosine_similarity(cand_vec, sel_vec)))
117                        .fold(f64::NEG_INFINITY, f64::max)
118                } else {
119                    0.0
120                };
121                let max_sim = if max_sim == f64::NEG_INFINITY {
122                    0.0
123                } else {
124                    max_sim
125                };
126                let mmr_score = lambda * relevance - (1.0 - lambda) * max_sim;
127                if mmr_score > best_score {
128                    best_score = mmr_score;
129                    best = i;
130                }
131            }
132            best
133        };
134
135        selected.push(remaining.remove(best_idx));
136    }
137
138    selected
139}
140
141fn build_summarization_prompt(messages: &[(MessageId, String, String)]) -> String {
142    let mut prompt = String::from(
143        "Summarize the following conversation. Extract key facts, decisions, entities, \
144         and context needed to continue the conversation.\n\n\
145         Respond in JSON with fields: summary (string), key_facts (list of strings), \
146         entities (list of strings).\n\nConversation:\n",
147    );
148
149    for (_, role, content) in messages {
150        prompt.push_str(role);
151        prompt.push_str(": ");
152        prompt.push_str(content);
153        prompt.push('\n');
154    }
155
156    prompt
157}
158
159pub struct SemanticMemory {
160    sqlite: SqliteStore,
161    qdrant: Option<Arc<EmbeddingStore>>,
162    provider: AnyProvider,
163    embedding_model: String,
164    vector_weight: f64,
165    keyword_weight: f64,
166    temporal_decay_enabled: bool,
167    temporal_decay_half_life_days: u32,
168    mmr_enabled: bool,
169    mmr_lambda: f32,
170    pub token_counter: Arc<TokenCounter>,
171    #[cfg(feature = "graph-memory")]
172    pub graph_store: Option<Arc<crate::graph::GraphStore>>,
173    #[cfg(feature = "graph-memory")]
174    community_detection_failures: Arc<AtomicU64>,
175    #[cfg(feature = "graph-memory")]
176    graph_extraction_count: Arc<AtomicU64>,
177    #[cfg(feature = "graph-memory")]
178    graph_extraction_failures: Arc<AtomicU64>,
179}
180
181impl SemanticMemory {
182    /// Create a new `SemanticMemory` instance with default hybrid search weights (0.7/0.3).
183    ///
184    /// Qdrant connection is best-effort: if unavailable, semantic search is disabled.
185    ///
186    /// # Errors
187    ///
188    /// Returns an error if `SQLite` cannot be initialized.
189    pub async fn new(
190        sqlite_path: &str,
191        qdrant_url: &str,
192        provider: AnyProvider,
193        embedding_model: &str,
194    ) -> Result<Self, MemoryError> {
195        Self::with_weights(sqlite_path, qdrant_url, provider, embedding_model, 0.7, 0.3).await
196    }
197
198    /// Create a new `SemanticMemory` with custom vector/keyword weights for hybrid search.
199    ///
200    /// # Errors
201    ///
202    /// Returns an error if `SQLite` cannot be initialized.
203    pub async fn with_weights(
204        sqlite_path: &str,
205        qdrant_url: &str,
206        provider: AnyProvider,
207        embedding_model: &str,
208        vector_weight: f64,
209        keyword_weight: f64,
210    ) -> Result<Self, MemoryError> {
211        Self::with_weights_and_pool_size(
212            sqlite_path,
213            qdrant_url,
214            provider,
215            embedding_model,
216            vector_weight,
217            keyword_weight,
218            5,
219        )
220        .await
221    }
222
223    /// Create a new `SemanticMemory` with custom weights and configurable pool size.
224    ///
225    /// # Errors
226    ///
227    /// Returns an error if `SQLite` cannot be initialized.
228    pub async fn with_weights_and_pool_size(
229        sqlite_path: &str,
230        qdrant_url: &str,
231        provider: AnyProvider,
232        embedding_model: &str,
233        vector_weight: f64,
234        keyword_weight: f64,
235        pool_size: u32,
236    ) -> Result<Self, MemoryError> {
237        let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
238        let pool = sqlite.pool().clone();
239
240        let qdrant = match EmbeddingStore::new(qdrant_url, pool) {
241            Ok(store) => Some(Arc::new(store)),
242            Err(e) => {
243                tracing::warn!("Qdrant unavailable, semantic search disabled: {e:#}");
244                None
245            }
246        };
247
248        Ok(Self {
249            sqlite,
250            qdrant,
251            provider,
252            embedding_model: embedding_model.into(),
253            vector_weight,
254            keyword_weight,
255            temporal_decay_enabled: false,
256            temporal_decay_half_life_days: 30,
257            mmr_enabled: false,
258            mmr_lambda: 0.7,
259            token_counter: Arc::new(TokenCounter::new()),
260            #[cfg(feature = "graph-memory")]
261            graph_store: None,
262            #[cfg(feature = "graph-memory")]
263            community_detection_failures: Arc::new(AtomicU64::new(0)),
264            #[cfg(feature = "graph-memory")]
265            graph_extraction_count: Arc::new(AtomicU64::new(0)),
266            #[cfg(feature = "graph-memory")]
267            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
268        })
269    }
270
271    /// Attach a `GraphStore` for graph-aware retrieval.
272    ///
273    /// When set, `recall_graph` traverses the graph starting from entities
274    /// matched by the query.
275    #[cfg(feature = "graph-memory")]
276    #[must_use]
277    pub fn with_graph_store(mut self, store: Arc<crate::graph::GraphStore>) -> Self {
278        self.graph_store = Some(store);
279        self
280    }
281
282    /// Returns the cumulative count of community detection failures since startup.
283    #[cfg(feature = "graph-memory")]
284    #[must_use]
285    pub fn community_detection_failures(&self) -> u64 {
286        self.community_detection_failures.load(Ordering::Relaxed)
287    }
288
289    /// Returns the cumulative count of successful graph extractions since startup.
290    #[cfg(feature = "graph-memory")]
291    #[must_use]
292    pub fn graph_extraction_count(&self) -> u64 {
293        self.graph_extraction_count.load(Ordering::Relaxed)
294    }
295
296    /// Returns the cumulative count of failed graph extractions since startup.
297    #[cfg(feature = "graph-memory")]
298    #[must_use]
299    pub fn graph_extraction_failures(&self) -> u64 {
300        self.graph_extraction_failures.load(Ordering::Relaxed)
301    }
302
303    /// Configure temporal decay and MMR re-ranking options.
304    #[must_use]
305    pub fn with_ranking_options(
306        mut self,
307        temporal_decay_enabled: bool,
308        temporal_decay_half_life_days: u32,
309        mmr_enabled: bool,
310        mmr_lambda: f32,
311    ) -> Self {
312        self.temporal_decay_enabled = temporal_decay_enabled;
313        self.temporal_decay_half_life_days = temporal_decay_half_life_days;
314        self.mmr_enabled = mmr_enabled;
315        self.mmr_lambda = mmr_lambda;
316        self
317    }
318
319    /// Construct a `SemanticMemory` from pre-built parts.
320    ///
321    /// Intended for tests that need full control over the backing stores.
322    #[cfg(any(test, feature = "mock"))]
323    #[must_use]
324    pub fn from_parts(
325        sqlite: SqliteStore,
326        qdrant: Option<Arc<EmbeddingStore>>,
327        provider: AnyProvider,
328        embedding_model: impl Into<String>,
329        vector_weight: f64,
330        keyword_weight: f64,
331        token_counter: Arc<TokenCounter>,
332    ) -> Self {
333        Self {
334            sqlite,
335            qdrant,
336            provider,
337            embedding_model: embedding_model.into(),
338            vector_weight,
339            keyword_weight,
340            temporal_decay_enabled: false,
341            temporal_decay_half_life_days: 30,
342            mmr_enabled: false,
343            mmr_lambda: 0.7,
344            token_counter,
345            #[cfg(feature = "graph-memory")]
346            graph_store: None,
347            #[cfg(feature = "graph-memory")]
348            community_detection_failures: Arc::new(AtomicU64::new(0)),
349            #[cfg(feature = "graph-memory")]
350            graph_extraction_count: Arc::new(AtomicU64::new(0)),
351            #[cfg(feature = "graph-memory")]
352            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
353        }
354    }
355
356    /// Create a `SemanticMemory` using the `SQLite`-embedded vector backend.
357    ///
358    /// # Errors
359    ///
360    /// Returns an error if `SQLite` cannot be initialized.
361    pub async fn with_sqlite_backend(
362        sqlite_path: &str,
363        provider: AnyProvider,
364        embedding_model: &str,
365        vector_weight: f64,
366        keyword_weight: f64,
367    ) -> Result<Self, MemoryError> {
368        Self::with_sqlite_backend_and_pool_size(
369            sqlite_path,
370            provider,
371            embedding_model,
372            vector_weight,
373            keyword_weight,
374            5,
375        )
376        .await
377    }
378
379    /// Create a `SemanticMemory` using the `SQLite`-embedded vector backend with configurable pool size.
380    ///
381    /// # Errors
382    ///
383    /// Returns an error if `SQLite` cannot be initialized.
384    pub async fn with_sqlite_backend_and_pool_size(
385        sqlite_path: &str,
386        provider: AnyProvider,
387        embedding_model: &str,
388        vector_weight: f64,
389        keyword_weight: f64,
390        pool_size: u32,
391    ) -> Result<Self, MemoryError> {
392        let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
393        let pool = sqlite.pool().clone();
394        let store = EmbeddingStore::new_sqlite(pool);
395
396        Ok(Self {
397            sqlite,
398            qdrant: Some(Arc::new(store)),
399            provider,
400            embedding_model: embedding_model.into(),
401            vector_weight,
402            keyword_weight,
403            temporal_decay_enabled: false,
404            temporal_decay_half_life_days: 30,
405            mmr_enabled: false,
406            mmr_lambda: 0.7,
407            token_counter: Arc::new(TokenCounter::new()),
408            #[cfg(feature = "graph-memory")]
409            graph_store: None,
410            #[cfg(feature = "graph-memory")]
411            community_detection_failures: Arc::new(AtomicU64::new(0)),
412            #[cfg(feature = "graph-memory")]
413            graph_extraction_count: Arc::new(AtomicU64::new(0)),
414            #[cfg(feature = "graph-memory")]
415            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
416        })
417    }
418
419    /// Save a message to `SQLite` and optionally embed and store in Qdrant.
420    ///
421    /// Returns the message ID assigned by `SQLite`.
422    ///
423    /// # Errors
424    ///
425    /// Returns an error if the `SQLite` save fails. Embedding failures are logged but not
426    /// propagated.
427    pub async fn remember(
428        &self,
429        conversation_id: ConversationId,
430        role: &str,
431        content: &str,
432    ) -> Result<MessageId, MemoryError> {
433        let message_id = self
434            .sqlite
435            .save_message(conversation_id, role, content)
436            .await?;
437
438        if let Some(qdrant) = &self.qdrant
439            && self.provider.supports_embeddings()
440        {
441            match self.provider.embed(content).await {
442                Ok(vector) => {
443                    // Ensure collection exists before storing
444                    let vector_size = u64::try_from(vector.len()).unwrap_or(896);
445                    if let Err(e) = qdrant.ensure_collection(vector_size).await {
446                        tracing::warn!("Failed to ensure Qdrant collection: {e:#}");
447                    } else if let Err(e) = qdrant
448                        .store(
449                            message_id,
450                            conversation_id,
451                            role,
452                            vector,
453                            MessageKind::Regular,
454                            &self.embedding_model,
455                        )
456                        .await
457                    {
458                        tracing::warn!("Failed to store embedding: {e:#}");
459                    }
460                }
461                Err(e) => {
462                    tracing::warn!("Failed to generate embedding: {e:#}");
463                }
464            }
465        }
466
467        Ok(message_id)
468    }
469
470    /// Save a message with pre-serialized parts JSON to `SQLite` and optionally embed in Qdrant.
471    ///
472    /// Returns `(message_id, embedding_stored)` tuple where `embedding_stored` is `true` if
473    /// an embedding was successfully generated and stored in Qdrant.
474    ///
475    /// # Errors
476    ///
477    /// Returns an error if the `SQLite` save fails.
478    pub async fn remember_with_parts(
479        &self,
480        conversation_id: ConversationId,
481        role: &str,
482        content: &str,
483        parts_json: &str,
484    ) -> Result<(MessageId, bool), MemoryError> {
485        let message_id = self
486            .sqlite
487            .save_message_with_parts(conversation_id, role, content, parts_json)
488            .await?;
489
490        let mut embedding_stored = false;
491
492        if let Some(qdrant) = &self.qdrant
493            && self.provider.supports_embeddings()
494        {
495            match self.provider.embed(content).await {
496                Ok(vector) => {
497                    let vector_size = u64::try_from(vector.len()).unwrap_or(896);
498                    if let Err(e) = qdrant.ensure_collection(vector_size).await {
499                        tracing::warn!("Failed to ensure Qdrant collection: {e:#}");
500                    } else if let Err(e) = qdrant
501                        .store(
502                            message_id,
503                            conversation_id,
504                            role,
505                            vector,
506                            MessageKind::Regular,
507                            &self.embedding_model,
508                        )
509                        .await
510                    {
511                        tracing::warn!("Failed to store embedding: {e:#}");
512                    } else {
513                        embedding_stored = true;
514                    }
515                }
516                Err(e) => {
517                    tracing::warn!("Failed to generate embedding: {e:#}");
518                }
519            }
520        }
521
522        Ok((message_id, embedding_stored))
523    }
524
525    /// Save a message to `SQLite` without generating an embedding.
526    ///
527    /// Use this when embedding is intentionally skipped (e.g. autosave disabled for assistant).
528    ///
529    /// # Errors
530    ///
531    /// Returns an error if the `SQLite` save fails.
532    pub async fn save_only(
533        &self,
534        conversation_id: ConversationId,
535        role: &str,
536        content: &str,
537        parts_json: &str,
538    ) -> Result<MessageId, MemoryError> {
539        self.sqlite
540            .save_message_with_parts(conversation_id, role, content, parts_json)
541            .await
542    }
543
544    /// Recall relevant messages using hybrid search (vector + FTS5 keyword).
545    ///
546    /// When Qdrant is available, runs both vector and keyword searches, then merges
547    /// results using weighted scoring. When Qdrant is unavailable, falls back to
548    /// FTS5-only keyword search.
549    ///
550    /// # Errors
551    ///
552    /// Returns an error if embedding generation, Qdrant search, or FTS5 query fails.
553    pub async fn recall(
554        &self,
555        query: &str,
556        limit: usize,
557        filter: Option<SearchFilter>,
558    ) -> Result<Vec<RecalledMessage>, MemoryError> {
559        let conversation_id = filter.as_ref().and_then(|f| f.conversation_id);
560
561        // FTS5 keyword search (always available)
562        let keyword_results = match self
563            .sqlite
564            .keyword_search(query, limit * 2, conversation_id)
565            .await
566        {
567            Ok(results) => results,
568            Err(e) => {
569                tracing::warn!("FTS5 keyword search failed: {e:#}");
570                Vec::new()
571            }
572        };
573
574        // Vector search (only when Qdrant available)
575        let vector_results = if let Some(qdrant) = &self.qdrant
576            && self.provider.supports_embeddings()
577        {
578            let query_vector = self.provider.embed(query).await?;
579            let vector_size = u64::try_from(query_vector.len()).unwrap_or(896);
580            qdrant.ensure_collection(vector_size).await?;
581            qdrant.search(&query_vector, limit * 2, filter).await?
582        } else {
583            Vec::new()
584        };
585
586        self.recall_merge_and_rank(keyword_results, vector_results, limit)
587            .await
588    }
589
590    /// Raw FTS5 keyword search results: returns `(MessageId, score)` pairs without ranking.
591    ///
592    /// # Errors
593    ///
594    /// Returns an error if the `SQLite` FTS5 query fails.
595    async fn recall_fts5_raw(
596        &self,
597        query: &str,
598        limit: usize,
599        conversation_id: Option<ConversationId>,
600    ) -> Result<Vec<(MessageId, f64)>, MemoryError> {
601        self.sqlite
602            .keyword_search(query, limit * 2, conversation_id)
603            .await
604    }
605
606    /// Raw vector search results from Qdrant. Returns an empty `Vec` when Qdrant is unavailable
607    /// or the provider does not support embeddings.
608    ///
609    /// # Errors
610    ///
611    /// Returns an error if embedding generation or Qdrant search fails.
612    async fn recall_vectors_raw(
613        &self,
614        query: &str,
615        limit: usize,
616        filter: Option<SearchFilter>,
617    ) -> Result<Vec<crate::embedding_store::SearchResult>, MemoryError> {
618        let Some(qdrant) = &self.qdrant else {
619            return Ok(Vec::new());
620        };
621        if !self.provider.supports_embeddings() {
622            return Ok(Vec::new());
623        }
624        let query_vector = self.provider.embed(query).await?;
625        let vector_size = u64::try_from(query_vector.len()).unwrap_or(896);
626        qdrant.ensure_collection(vector_size).await?;
627        qdrant.search(&query_vector, limit * 2, filter).await
628    }
629
630    /// Merge raw keyword and vector results, apply weighted scoring, temporal decay, and MMR
631    /// re-ranking, then resolve to `RecalledMessage` objects.
632    ///
633    /// This is the shared post-processing step used by all recall paths.
634    ///
635    /// # Errors
636    ///
637    /// Returns an error if the `SQLite` `messages_by_ids` query fails.
638    #[allow(clippy::cast_possible_truncation)]
639    async fn recall_merge_and_rank(
640        &self,
641        keyword_results: Vec<(MessageId, f64)>,
642        vector_results: Vec<crate::embedding_store::SearchResult>,
643        limit: usize,
644    ) -> Result<Vec<RecalledMessage>, MemoryError> {
645        let mut scores: std::collections::HashMap<MessageId, f64> =
646            std::collections::HashMap::new();
647
648        if !vector_results.is_empty() {
649            let max_vs = vector_results
650                .iter()
651                .map(|r| r.score)
652                .fold(f32::NEG_INFINITY, f32::max);
653            let norm = if max_vs > 0.0 { max_vs } else { 1.0 };
654            for r in &vector_results {
655                let normalized = f64::from(r.score / norm);
656                *scores.entry(r.message_id).or_default() += normalized * self.vector_weight;
657            }
658        }
659
660        if !keyword_results.is_empty() {
661            let max_ks = keyword_results
662                .iter()
663                .map(|r| r.1)
664                .fold(f64::NEG_INFINITY, f64::max);
665            let norm = if max_ks > 0.0 { max_ks } else { 1.0 };
666            for &(msg_id, score) in &keyword_results {
667                let normalized = score / norm;
668                *scores.entry(msg_id).or_default() += normalized * self.keyword_weight;
669            }
670        }
671
672        if scores.is_empty() {
673            return Ok(Vec::new());
674        }
675
676        let mut ranked: Vec<(MessageId, f64)> = scores.into_iter().collect();
677        ranked.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
678
679        if self.temporal_decay_enabled && self.temporal_decay_half_life_days > 0 {
680            let ids: Vec<MessageId> = ranked.iter().map(|r| r.0).collect();
681            match self.sqlite.message_timestamps(&ids).await {
682                Ok(timestamps) => {
683                    apply_temporal_decay(
684                        &mut ranked,
685                        &timestamps,
686                        self.temporal_decay_half_life_days,
687                    );
688                    ranked
689                        .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
690                }
691                Err(e) => {
692                    tracing::warn!("temporal decay: failed to fetch timestamps: {e:#}");
693                }
694            }
695        }
696
697        if self.mmr_enabled && !vector_results.is_empty() {
698            if let Some(qdrant) = &self.qdrant {
699                let ids: Vec<MessageId> = ranked.iter().map(|r| r.0).collect();
700                match qdrant.get_vectors(&ids).await {
701                    Ok(vec_map) if !vec_map.is_empty() => {
702                        ranked = apply_mmr(&ranked, &vec_map, self.mmr_lambda, limit);
703                    }
704                    Ok(_) => {
705                        ranked.truncate(limit);
706                    }
707                    Err(e) => {
708                        tracing::warn!("MMR: failed to fetch vectors: {e:#}");
709                        ranked.truncate(limit);
710                    }
711                }
712            } else {
713                ranked.truncate(limit);
714            }
715        } else {
716            ranked.truncate(limit);
717        }
718
719        let ids: Vec<MessageId> = ranked.iter().map(|r| r.0).collect();
720        let messages = self.sqlite.messages_by_ids(&ids).await?;
721        let msg_map: std::collections::HashMap<MessageId, _> = messages.into_iter().collect();
722
723        let recalled = ranked
724            .iter()
725            .filter_map(|(msg_id, score)| {
726                msg_map.get(msg_id).map(|msg| RecalledMessage {
727                    message: msg.clone(),
728                    #[expect(clippy::cast_possible_truncation)]
729                    score: *score as f32,
730                })
731            })
732            .collect();
733
734        Ok(recalled)
735    }
736
737    /// Recall messages using query-aware routing.
738    ///
739    /// Delegates to FTS5-only, vector-only, or hybrid search based on the router decision,
740    /// then runs the shared merge and ranking pipeline.
741    ///
742    /// # Errors
743    ///
744    /// Returns an error if any underlying search or database operation fails.
745    pub async fn recall_routed(
746        &self,
747        query: &str,
748        limit: usize,
749        filter: Option<SearchFilter>,
750        router: &dyn crate::router::MemoryRouter,
751    ) -> Result<Vec<RecalledMessage>, MemoryError> {
752        use crate::router::MemoryRoute;
753
754        let route = router.route(query);
755        tracing::debug!(?route, query_len = query.len(), "memory routing decision");
756
757        let conversation_id = filter.as_ref().and_then(|f| f.conversation_id);
758
759        let (keyword_results, vector_results): (
760            Vec<(MessageId, f64)>,
761            Vec<crate::embedding_store::SearchResult>,
762        ) = match route {
763            MemoryRoute::Keyword => {
764                let kw = self.recall_fts5_raw(query, limit, conversation_id).await?;
765                (kw, Vec::new())
766            }
767            MemoryRoute::Semantic => {
768                let vr = self.recall_vectors_raw(query, limit, filter).await?;
769                (Vec::new(), vr)
770            }
771            MemoryRoute::Hybrid => {
772                // FTS5 errors are swallowed gracefully to allow vector-only fallback.
773                let kw = match self.recall_fts5_raw(query, limit, conversation_id).await {
774                    Ok(r) => r,
775                    Err(e) => {
776                        tracing::warn!("FTS5 keyword search failed: {e:#}");
777                        Vec::new()
778                    }
779                };
780                // Vector errors propagate — if Qdrant is unavailable, recall_vectors_raw
781                // returns an empty Vec (not an error), so ? only fires on embed failures.
782                let vr = self.recall_vectors_raw(query, limit, filter).await?;
783                (kw, vr)
784            }
785            // Graph routing triggers graph_recall separately in agent/context.rs.
786            // For the message-based recall, behave like Hybrid.
787            MemoryRoute::Graph => {
788                let kw = match self.recall_fts5_raw(query, limit, conversation_id).await {
789                    Ok(r) => r,
790                    Err(e) => {
791                        tracing::warn!("FTS5 keyword search failed (graph→hybrid fallback): {e:#}");
792                        Vec::new()
793                    }
794                };
795                let vr = self.recall_vectors_raw(query, limit, filter).await?;
796                (kw, vr)
797            }
798        };
799
800        self.recall_merge_and_rank(keyword_results, vector_results, limit)
801            .await
802    }
803
804    /// Retrieve graph facts relevant to `query` via BFS traversal.
805    ///
806    /// Returns an empty `Vec` if no `graph_store` is configured.
807    ///
808    /// # Errors
809    ///
810    /// Returns an error if the underlying graph query fails.
811    #[cfg(feature = "graph-memory")]
812    pub async fn recall_graph(
813        &self,
814        query: &str,
815        limit: usize,
816        max_hops: u32,
817    ) -> Result<Vec<crate::graph::types::GraphFact>, MemoryError> {
818        let Some(store) = &self.graph_store else {
819            return Ok(Vec::new());
820        };
821        crate::graph::retrieval::graph_recall(
822            store,
823            self.qdrant.as_deref(),
824            &self.provider,
825            query,
826            limit,
827            max_hops,
828        )
829        .await
830    }
831
832    /// Check whether an embedding exists for a given message ID.
833    ///
834    /// # Errors
835    ///
836    /// Returns an error if the `SQLite` query fails.
837    pub async fn has_embedding(&self, message_id: MessageId) -> Result<bool, MemoryError> {
838        match &self.qdrant {
839            Some(qdrant) => qdrant.has_embedding(message_id).await,
840            None => Ok(false),
841        }
842    }
843
844    /// Embed all messages that do not yet have embeddings.
845    ///
846    /// Returns the count of successfully embedded messages.
847    ///
848    /// # Errors
849    ///
850    /// Returns an error if collection initialization or database query fails.
851    /// Individual embedding failures are logged but do not stop processing.
852    pub async fn embed_missing(&self) -> Result<usize, MemoryError> {
853        let Some(qdrant) = &self.qdrant else {
854            return Ok(0);
855        };
856        if !self.provider.supports_embeddings() {
857            return Ok(0);
858        }
859
860        let unembedded = self.sqlite.unembedded_message_ids(Some(1000)).await?;
861
862        if unembedded.is_empty() {
863            return Ok(0);
864        }
865
866        let probe = self.provider.embed("probe").await?;
867        let vector_size = u64::try_from(probe.len())?;
868        qdrant.ensure_collection(vector_size).await?;
869
870        let mut count = 0;
871        for (msg_id, conversation_id, role, content) in &unembedded {
872            match self.provider.embed(content).await {
873                Ok(vector) => {
874                    if let Err(e) = qdrant
875                        .store(
876                            *msg_id,
877                            *conversation_id,
878                            role,
879                            vector,
880                            MessageKind::Regular,
881                            &self.embedding_model,
882                        )
883                        .await
884                    {
885                        tracing::warn!("Failed to store embedding for msg {msg_id}: {e:#}");
886                        continue;
887                    }
888                    count += 1;
889                }
890                Err(e) => {
891                    tracing::warn!("Failed to embed msg {msg_id}: {e:#}");
892                }
893            }
894        }
895
896        tracing::info!("Embedded {count}/{} missing messages", unembedded.len());
897        Ok(count)
898    }
899
900    /// Store a session summary into the dedicated `zeph_session_summaries` Qdrant collection.
901    ///
902    /// # Errors
903    ///
904    /// Returns an error if embedding or Qdrant storage fails.
905    pub async fn store_session_summary(
906        &self,
907        conversation_id: ConversationId,
908        summary_text: &str,
909    ) -> Result<(), MemoryError> {
910        let Some(qdrant) = &self.qdrant else {
911            return Ok(());
912        };
913        if !self.provider.supports_embeddings() {
914            return Ok(());
915        }
916
917        let vector = self.provider.embed(summary_text).await?;
918        let vector_size = u64::try_from(vector.len()).unwrap_or(896);
919        qdrant
920            .ensure_named_collection(SESSION_SUMMARIES_COLLECTION, vector_size)
921            .await?;
922
923        let payload = serde_json::json!({
924            "conversation_id": conversation_id.0,
925            "summary_text": summary_text,
926        });
927
928        qdrant
929            .store_to_collection(SESSION_SUMMARIES_COLLECTION, payload, vector)
930            .await?;
931
932        tracing::debug!(
933            conversation_id = conversation_id.0,
934            "stored session summary"
935        );
936        Ok(())
937    }
938
939    /// Search session summaries from other conversations.
940    ///
941    /// # Errors
942    ///
943    /// Returns an error if embedding or Qdrant search fails.
944    pub async fn search_session_summaries(
945        &self,
946        query: &str,
947        limit: usize,
948        exclude_conversation_id: Option<ConversationId>,
949    ) -> Result<Vec<SessionSummaryResult>, MemoryError> {
950        let Some(qdrant) = &self.qdrant else {
951            return Ok(Vec::new());
952        };
953        if !self.provider.supports_embeddings() {
954            return Ok(Vec::new());
955        }
956
957        let vector = self.provider.embed(query).await?;
958        let vector_size = u64::try_from(vector.len()).unwrap_or(896);
959        qdrant
960            .ensure_named_collection(SESSION_SUMMARIES_COLLECTION, vector_size)
961            .await?;
962
963        let filter = exclude_conversation_id.map(|cid| VectorFilter {
964            must: vec![],
965            must_not: vec![FieldCondition {
966                field: "conversation_id".into(),
967                value: FieldValue::Integer(cid.0),
968            }],
969        });
970
971        let points = qdrant
972            .search_collection(SESSION_SUMMARIES_COLLECTION, &vector, limit, filter)
973            .await?;
974
975        let results = points
976            .into_iter()
977            .filter_map(|point| {
978                let summary_text = point.payload.get("summary_text")?.as_str()?.to_owned();
979                let conversation_id =
980                    ConversationId(point.payload.get("conversation_id")?.as_i64()?);
981                Some(SessionSummaryResult {
982                    summary_text,
983                    score: point.score,
984                    conversation_id,
985                })
986            })
987            .collect();
988
989        Ok(results)
990    }
991
992    /// Access the underlying `SqliteStore` for operations that don't involve semantics.
993    #[must_use]
994    pub fn sqlite(&self) -> &SqliteStore {
995        &self.sqlite
996    }
997
998    /// Check if the vector store backend is reachable.
999    ///
1000    /// Performs a real health check (Qdrant gRPC ping or `SQLite` query)
1001    /// instead of just checking whether the client was created.
1002    pub async fn is_vector_store_connected(&self) -> bool {
1003        match self.qdrant.as_ref() {
1004            Some(store) => store.health_check().await,
1005            None => false,
1006        }
1007    }
1008
1009    /// Check if a vector store client is configured (may not be connected).
1010    #[must_use]
1011    pub fn has_vector_store(&self) -> bool {
1012        self.qdrant.is_some()
1013    }
1014
1015    /// Return a reference to the embedding store, if configured.
1016    #[must_use]
1017    pub fn embedding_store(&self) -> Option<&Arc<EmbeddingStore>> {
1018        self.qdrant.as_ref()
1019    }
1020
1021    /// Count messages in a conversation.
1022    ///
1023    /// # Errors
1024    ///
1025    /// Returns an error if the query fails.
1026    pub async fn message_count(&self, conversation_id: ConversationId) -> Result<i64, MemoryError> {
1027        self.sqlite.count_messages(conversation_id).await
1028    }
1029
1030    /// Count messages not yet covered by any summary.
1031    ///
1032    /// # Errors
1033    ///
1034    /// Returns an error if the query fails.
1035    pub async fn unsummarized_message_count(
1036        &self,
1037        conversation_id: ConversationId,
1038    ) -> Result<i64, MemoryError> {
1039        let after_id = self
1040            .sqlite
1041            .latest_summary_last_message_id(conversation_id)
1042            .await?
1043            .unwrap_or(MessageId(0));
1044        self.sqlite
1045            .count_messages_after(conversation_id, after_id)
1046            .await
1047    }
1048
1049    /// Load all summaries for a conversation.
1050    ///
1051    /// # Errors
1052    ///
1053    /// Returns an error if the query fails.
1054    pub async fn load_summaries(
1055        &self,
1056        conversation_id: ConversationId,
1057    ) -> Result<Vec<Summary>, MemoryError> {
1058        let rows = self.sqlite.load_summaries(conversation_id).await?;
1059        let summaries = rows
1060            .into_iter()
1061            .map(
1062                |(
1063                    id,
1064                    conversation_id,
1065                    content,
1066                    first_message_id,
1067                    last_message_id,
1068                    token_estimate,
1069                )| {
1070                    Summary {
1071                        id,
1072                        conversation_id,
1073                        content,
1074                        first_message_id,
1075                        last_message_id,
1076                        token_estimate,
1077                    }
1078                },
1079            )
1080            .collect();
1081        Ok(summaries)
1082    }
1083
1084    /// Generate a summary of the oldest unsummarized messages.
1085    ///
1086    /// Returns `Ok(None)` if there are not enough messages to summarize.
1087    ///
1088    /// # Errors
1089    ///
1090    /// Returns an error if LLM call or database operation fails.
1091    pub async fn summarize(
1092        &self,
1093        conversation_id: ConversationId,
1094        message_count: usize,
1095    ) -> Result<Option<i64>, MemoryError> {
1096        let total = self.sqlite.count_messages(conversation_id).await?;
1097
1098        if total <= i64::try_from(message_count)? {
1099            return Ok(None);
1100        }
1101
1102        let after_id = self
1103            .sqlite
1104            .latest_summary_last_message_id(conversation_id)
1105            .await?
1106            .unwrap_or(MessageId(0));
1107
1108        let messages = self
1109            .sqlite
1110            .load_messages_range(conversation_id, after_id, message_count)
1111            .await?;
1112
1113        if messages.is_empty() {
1114            return Ok(None);
1115        }
1116
1117        let prompt = build_summarization_prompt(&messages);
1118        let chat_messages = vec![Message {
1119            role: Role::User,
1120            content: prompt,
1121            parts: vec![],
1122            metadata: MessageMetadata::default(),
1123        }];
1124
1125        let structured = match self
1126            .provider
1127            .chat_typed_erased::<StructuredSummary>(&chat_messages)
1128            .await
1129        {
1130            Ok(s) => s,
1131            Err(e) => {
1132                tracing::warn!(
1133                    "structured summarization failed, falling back to plain text: {e:#}"
1134                );
1135                let plain = self.provider.chat(&chat_messages).await?;
1136                StructuredSummary {
1137                    summary: plain,
1138                    key_facts: vec![],
1139                    entities: vec![],
1140                }
1141            }
1142        };
1143        let summary_text = &structured.summary;
1144
1145        let token_estimate = i64::try_from(self.token_counter.count_tokens(summary_text))?;
1146        let first_message_id = messages[0].0;
1147        let last_message_id = messages[messages.len() - 1].0;
1148
1149        let summary_id = self
1150            .sqlite
1151            .save_summary(
1152                conversation_id,
1153                summary_text,
1154                first_message_id,
1155                last_message_id,
1156                token_estimate,
1157            )
1158            .await?;
1159
1160        if let Some(qdrant) = &self.qdrant
1161            && self.provider.supports_embeddings()
1162        {
1163            match self.provider.embed(summary_text).await {
1164                Ok(vector) => {
1165                    // Ensure collection exists before storing
1166                    let vector_size = u64::try_from(vector.len()).unwrap_or(896);
1167                    if let Err(e) = qdrant.ensure_collection(vector_size).await {
1168                        tracing::warn!("Failed to ensure Qdrant collection: {e:#}");
1169                    } else if let Err(e) = qdrant
1170                        .store(
1171                            MessageId(summary_id),
1172                            conversation_id,
1173                            "system",
1174                            vector,
1175                            MessageKind::Summary,
1176                            &self.embedding_model,
1177                        )
1178                        .await
1179                    {
1180                        tracing::warn!("Failed to embed summary: {e:#}");
1181                    }
1182                }
1183                Err(e) => {
1184                    tracing::warn!("Failed to generate summary embedding: {e:#}");
1185                }
1186            }
1187        }
1188
1189        // Store key facts as individual Qdrant points
1190        if !structured.key_facts.is_empty() {
1191            self.store_key_facts(conversation_id, summary_id, &structured.key_facts)
1192                .await;
1193        }
1194
1195        Ok(Some(summary_id))
1196    }
1197
1198    async fn store_key_facts(
1199        &self,
1200        conversation_id: ConversationId,
1201        source_summary_id: i64,
1202        key_facts: &[String],
1203    ) {
1204        let Some(qdrant) = &self.qdrant else {
1205            return;
1206        };
1207        if !self.provider.supports_embeddings() {
1208            return;
1209        }
1210
1211        let Some(first_fact) = key_facts.first() else {
1212            return;
1213        };
1214        let first_vector = match self.provider.embed(first_fact).await {
1215            Ok(v) => v,
1216            Err(e) => {
1217                tracing::warn!("Failed to embed key fact: {e:#}");
1218                return;
1219            }
1220        };
1221        let vector_size = u64::try_from(first_vector.len()).unwrap_or(896);
1222        if let Err(e) = qdrant
1223            .ensure_named_collection(KEY_FACTS_COLLECTION, vector_size)
1224            .await
1225        {
1226            tracing::warn!("Failed to ensure key_facts collection: {e:#}");
1227            return;
1228        }
1229
1230        let first_payload = serde_json::json!({
1231            "conversation_id": conversation_id.0,
1232            "fact_text": first_fact,
1233            "source_summary_id": source_summary_id,
1234        });
1235        if let Err(e) = qdrant
1236            .store_to_collection(KEY_FACTS_COLLECTION, first_payload, first_vector)
1237            .await
1238        {
1239            tracing::warn!("Failed to store key fact: {e:#}");
1240        }
1241
1242        for fact in &key_facts[1..] {
1243            match self.provider.embed(fact).await {
1244                Ok(vector) => {
1245                    let payload = serde_json::json!({
1246                        "conversation_id": conversation_id.0,
1247                        "fact_text": fact,
1248                        "source_summary_id": source_summary_id,
1249                    });
1250                    if let Err(e) = qdrant
1251                        .store_to_collection(KEY_FACTS_COLLECTION, payload, vector)
1252                        .await
1253                    {
1254                        tracing::warn!("Failed to store key fact: {e:#}");
1255                    }
1256                }
1257                Err(e) => {
1258                    tracing::warn!("Failed to embed key fact: {e:#}");
1259                }
1260            }
1261        }
1262    }
1263
1264    /// Search key facts extracted from conversation summaries.
1265    ///
1266    /// # Errors
1267    ///
1268    /// Returns an error if embedding or Qdrant search fails.
1269    pub async fn search_key_facts(
1270        &self,
1271        query: &str,
1272        limit: usize,
1273    ) -> Result<Vec<String>, MemoryError> {
1274        let Some(qdrant) = &self.qdrant else {
1275            return Ok(Vec::new());
1276        };
1277        if !self.provider.supports_embeddings() {
1278            return Ok(Vec::new());
1279        }
1280
1281        let vector = self.provider.embed(query).await?;
1282        let vector_size = u64::try_from(vector.len()).unwrap_or(896);
1283        qdrant
1284            .ensure_named_collection(KEY_FACTS_COLLECTION, vector_size)
1285            .await?;
1286
1287        let points = qdrant
1288            .search_collection(KEY_FACTS_COLLECTION, &vector, limit, None)
1289            .await?;
1290
1291        let facts = points
1292            .into_iter()
1293            .filter_map(|p| p.payload.get("fact_text")?.as_str().map(String::from))
1294            .collect();
1295
1296        Ok(facts)
1297    }
1298
1299    /// Search a named document collection by semantic similarity.
1300    ///
1301    /// Returns up to `limit` scored vector points whose payloads contain ingested document chunks.
1302    /// Returns an empty vec when Qdrant is unavailable, the collection does not exist,
1303    /// or the provider does not support embeddings.
1304    ///
1305    /// # Errors
1306    ///
1307    /// Returns an error if embedding generation or Qdrant search fails.
1308    pub async fn search_document_collection(
1309        &self,
1310        collection: &str,
1311        query: &str,
1312        limit: usize,
1313    ) -> Result<Vec<crate::ScoredVectorPoint>, MemoryError> {
1314        let Some(qdrant) = &self.qdrant else {
1315            return Ok(Vec::new());
1316        };
1317        if !self.provider.supports_embeddings() {
1318            return Ok(Vec::new());
1319        }
1320        if !qdrant.collection_exists(collection).await? {
1321            return Ok(Vec::new());
1322        }
1323        let vector = self.provider.embed(query).await?;
1324        qdrant
1325            .search_collection(collection, &vector, limit, None)
1326            .await
1327    }
1328
1329    /// Store an embedding for a user correction in the vector store.
1330    ///
1331    /// Silently skips if no vector store is configured or embeddings are unsupported.
1332    ///
1333    /// # Errors
1334    ///
1335    /// Returns an error if embedding generation or vector store write fails.
1336    pub async fn store_correction_embedding(
1337        &self,
1338        correction_id: i64,
1339        correction_text: &str,
1340    ) -> Result<(), MemoryError> {
1341        let Some(ref store) = self.qdrant else {
1342            return Ok(());
1343        };
1344        if !self.provider.supports_embeddings() {
1345            return Ok(());
1346        }
1347        let embedding = self
1348            .provider
1349            .embed(correction_text)
1350            .await
1351            .map_err(|e| MemoryError::Other(e.to_string()))?;
1352        let payload = serde_json::json!({ "correction_id": correction_id });
1353        store
1354            .store_to_collection(CORRECTIONS_COLLECTION, payload, embedding)
1355            .await?;
1356        Ok(())
1357    }
1358
1359    /// Retrieve corrections semantically similar to `query`.
1360    ///
1361    /// Returns up to `limit` corrections scoring above `min_score`.
1362    /// Returns an empty vec if no vector store is configured.
1363    ///
1364    /// # Errors
1365    ///
1366    /// Returns an error if embedding generation or vector search fails.
1367    pub async fn retrieve_similar_corrections(
1368        &self,
1369        query: &str,
1370        limit: usize,
1371        min_score: f32,
1372    ) -> Result<Vec<crate::sqlite::corrections::UserCorrectionRow>, MemoryError> {
1373        let Some(ref store) = self.qdrant else {
1374            return Ok(vec![]);
1375        };
1376        if !self.provider.supports_embeddings() {
1377            return Ok(vec![]);
1378        }
1379        let embedding = self
1380            .provider
1381            .embed(query)
1382            .await
1383            .map_err(|e| MemoryError::Other(e.to_string()))?;
1384        let scored = store
1385            .search_collection(CORRECTIONS_COLLECTION, &embedding, limit, None)
1386            .await
1387            .unwrap_or_default();
1388
1389        let mut results = Vec::new();
1390        for point in scored {
1391            if point.score < min_score {
1392                continue;
1393            }
1394            if let Some(id_val) = point.payload.get("correction_id")
1395                && let Some(id) = id_val.as_i64()
1396            {
1397                let rows = self.sqlite.load_corrections_for_id(id).await?;
1398                results.extend(rows);
1399            }
1400        }
1401        Ok(results)
1402    }
1403
1404    /// Spawn background graph extraction for a message. Fire-and-forget — never blocks.
1405    ///
1406    /// Extraction runs in a separate tokio task with a timeout. Any error or timeout is
1407    /// logged and the task exits silently; the agent response is never blocked.
1408    #[cfg(feature = "graph-memory")]
1409    pub fn spawn_graph_extraction(
1410        &self,
1411        content: String,
1412        context_messages: Vec<String>,
1413        config: GraphExtractionConfig,
1414    ) {
1415        let pool = self.sqlite.pool().clone();
1416        let provider = self.provider.clone();
1417        let failure_counter = self.community_detection_failures.clone();
1418        let extraction_count = self.graph_extraction_count.clone();
1419        let extraction_failures = self.graph_extraction_failures.clone();
1420
1421        tokio::spawn(async move {
1422            let timeout_dur = std::time::Duration::from_secs(config.extraction_timeout_secs);
1423            let extraction_ok = match tokio::time::timeout(
1424                timeout_dur,
1425                extract_and_store(
1426                    content,
1427                    context_messages,
1428                    provider.clone(),
1429                    pool.clone(),
1430                    config.clone(),
1431                ),
1432            )
1433            .await
1434            {
1435                Ok(Ok(stats)) => {
1436                    tracing::debug!(
1437                        entities = stats.entities_upserted,
1438                        edges = stats.edges_inserted,
1439                        "graph extraction completed"
1440                    );
1441                    extraction_count.fetch_add(1, Ordering::Relaxed);
1442                    true
1443                }
1444                Ok(Err(e)) => {
1445                    tracing::warn!("graph extraction failed: {e:#}");
1446                    extraction_failures.fetch_add(1, Ordering::Relaxed);
1447                    false
1448                }
1449                Err(_elapsed) => {
1450                    tracing::warn!("graph extraction timed out");
1451                    extraction_failures.fetch_add(1, Ordering::Relaxed);
1452                    false
1453                }
1454            };
1455
1456            if extraction_ok && config.community_refresh_interval > 0 {
1457                use crate::graph::GraphStore;
1458
1459                let store = GraphStore::new(pool.clone());
1460                let extraction_count = store.extraction_count().await.unwrap_or(0);
1461                if extraction_count > 0
1462                    && i64::try_from(config.community_refresh_interval)
1463                        .is_ok_and(|interval| extraction_count % interval == 0)
1464                {
1465                    tracing::info!(extraction_count, "triggering community detection refresh");
1466                    let store2 = GraphStore::new(pool);
1467                    let provider2 = provider;
1468                    let retention_days = config.expired_edge_retention_days;
1469                    let max_cap = config.max_entities_cap;
1470                    tokio::spawn(async move {
1471                        match crate::graph::community::detect_communities(&store2, &provider2).await
1472                        {
1473                            Ok(count) => {
1474                                tracing::info!(communities = count, "community detection complete");
1475                            }
1476                            Err(e) => {
1477                                tracing::warn!("community detection failed: {e:#}");
1478                                failure_counter.fetch_add(1, Ordering::Relaxed);
1479                            }
1480                        }
1481                        match crate::graph::community::run_graph_eviction(
1482                            &store2,
1483                            retention_days,
1484                            max_cap,
1485                        )
1486                        .await
1487                        {
1488                            Ok(stats) => {
1489                                tracing::info!(
1490                                    expired_edges = stats.expired_edges_deleted,
1491                                    orphan_entities = stats.orphan_entities_deleted,
1492                                    capped_entities = stats.capped_entities_deleted,
1493                                    "graph eviction complete"
1494                                );
1495                            }
1496                            Err(e) => {
1497                                tracing::warn!("graph eviction failed: {e:#}");
1498                            }
1499                        }
1500                    });
1501                }
1502            }
1503        });
1504    }
1505}
1506
1507/// Config for the spawned background extraction task.
1508///
1509/// Owned clone of the relevant fields from `GraphConfig` — no references, safe to send to
1510/// spawned tasks.
1511#[cfg(feature = "graph-memory")]
1512#[derive(Debug, Clone, Default)]
1513pub struct GraphExtractionConfig {
1514    pub max_entities: usize,
1515    pub max_edges: usize,
1516    pub extraction_timeout_secs: u64,
1517    pub community_refresh_interval: usize,
1518    pub expired_edge_retention_days: u32,
1519    pub max_entities_cap: usize,
1520}
1521
1522/// Stats returned from a completed extraction.
1523#[cfg(feature = "graph-memory")]
1524#[derive(Debug, Default)]
1525pub struct ExtractionStats {
1526    pub entities_upserted: usize,
1527    pub edges_inserted: usize,
1528}
1529
1530/// Extract entities and edges from `content` and persist them to the graph store.
1531///
1532/// This function runs inside a spawned task — it receives owned data only.
1533///
1534/// # Errors
1535///
1536/// Returns an error if the database query fails or LLM extraction fails.
1537#[cfg(feature = "graph-memory")]
1538pub async fn extract_and_store(
1539    content: String,
1540    context_messages: Vec<String>,
1541    provider: AnyProvider,
1542    pool: sqlx::SqlitePool,
1543    config: GraphExtractionConfig,
1544) -> Result<ExtractionStats, MemoryError> {
1545    use crate::graph::{EntityResolver, GraphExtractor, GraphStore};
1546
1547    let extractor = GraphExtractor::new(provider, config.max_entities, config.max_edges);
1548    let ctx_refs: Vec<&str> = context_messages.iter().map(String::as_str).collect();
1549
1550    let store = GraphStore::new(pool);
1551
1552    // Increment attempt counter before extraction so it reflects every non-empty attempt,
1553    // regardless of whether the LLM returns parseable results (S1 fix).
1554    let pool = store.pool();
1555    sqlx::query(
1556        "INSERT INTO graph_metadata (key, value) VALUES ('extraction_count', '0')
1557         ON CONFLICT(key) DO NOTHING",
1558    )
1559    .execute(pool)
1560    .await?;
1561    sqlx::query(
1562        "UPDATE graph_metadata
1563         SET value = CAST(CAST(value AS INTEGER) + 1 AS TEXT)
1564         WHERE key = 'extraction_count'",
1565    )
1566    .execute(pool)
1567    .await?;
1568
1569    let Some(result) = extractor.extract(&content, &ctx_refs).await? else {
1570        return Ok(ExtractionStats::default());
1571    };
1572
1573    let resolver = EntityResolver::new(&store);
1574
1575    let mut entities_upserted = 0usize;
1576    let mut entity_ids: std::collections::HashMap<String, i64> = std::collections::HashMap::new();
1577
1578    for entity in &result.entities {
1579        match resolver
1580            .resolve(&entity.name, &entity.entity_type, entity.summary.as_deref())
1581            .await
1582        {
1583            Ok((id, _outcome)) => {
1584                entity_ids.insert(entity.name.clone(), id);
1585                entities_upserted += 1;
1586            }
1587            Err(e) => {
1588                tracing::debug!("graph: skipping entity {:?}: {e:#}", entity.name);
1589            }
1590        }
1591    }
1592
1593    let mut edges_inserted = 0usize;
1594    for edge in &result.edges {
1595        let (Some(&src_id), Some(&tgt_id)) =
1596            (entity_ids.get(&edge.source), entity_ids.get(&edge.target))
1597        else {
1598            tracing::debug!(
1599                "graph: skipping edge {:?}->{:?}: entity not resolved",
1600                edge.source,
1601                edge.target
1602            );
1603            continue;
1604        };
1605        match resolver
1606            .resolve_edge(src_id, tgt_id, &edge.relation, &edge.fact, 0.8, None)
1607            .await
1608        {
1609            Ok(Some(_)) => edges_inserted += 1,
1610            Ok(None) => {} // deduplicated
1611            Err(e) => {
1612                tracing::debug!("graph: skipping edge: {e:#}");
1613            }
1614        }
1615    }
1616
1617    Ok(ExtractionStats {
1618        entities_upserted,
1619        edges_inserted,
1620    })
1621}
1622
1623#[cfg(test)]
1624mod tests {
1625    use zeph_llm::mock::MockProvider;
1626    use zeph_llm::provider::Role;
1627
1628    use super::*;
1629
1630    fn test_provider() -> AnyProvider {
1631        AnyProvider::Mock(MockProvider::default())
1632    }
1633
1634    async fn test_semantic_memory(_supports_embeddings: bool) -> SemanticMemory {
1635        let provider = test_provider();
1636        let sqlite = SqliteStore::new(":memory:").await.unwrap();
1637
1638        SemanticMemory {
1639            sqlite,
1640            qdrant: None,
1641            provider,
1642            embedding_model: "test-model".into(),
1643            vector_weight: 0.7,
1644            keyword_weight: 0.3,
1645            temporal_decay_enabled: false,
1646            temporal_decay_half_life_days: 30,
1647            mmr_enabled: false,
1648            mmr_lambda: 0.7,
1649            token_counter: Arc::new(TokenCounter::new()),
1650            #[cfg(feature = "graph-memory")]
1651            graph_store: None,
1652            #[cfg(feature = "graph-memory")]
1653            community_detection_failures: Arc::new(AtomicU64::new(0)),
1654            #[cfg(feature = "graph-memory")]
1655            graph_extraction_count: Arc::new(AtomicU64::new(0)),
1656            #[cfg(feature = "graph-memory")]
1657            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
1658        }
1659    }
1660
1661    #[tokio::test]
1662    async fn remember_saves_to_sqlite() {
1663        let memory = test_semantic_memory(false).await;
1664
1665        let cid = memory.sqlite.create_conversation().await.unwrap();
1666        let msg_id = memory.remember(cid, "user", "hello").await.unwrap();
1667
1668        assert_eq!(msg_id, MessageId(1));
1669
1670        let history = memory.sqlite.load_history(cid, 50).await.unwrap();
1671        assert_eq!(history.len(), 1);
1672        assert_eq!(history[0].role, Role::User);
1673        assert_eq!(history[0].content, "hello");
1674    }
1675
1676    #[tokio::test]
1677    async fn remember_with_parts_saves_parts_json() {
1678        let memory = test_semantic_memory(false).await;
1679        let cid = memory.sqlite.create_conversation().await.unwrap();
1680
1681        let parts_json =
1682            r#"[{"kind":"ToolOutput","tool_name":"shell","body":"hello","compacted_at":null}]"#;
1683        let (msg_id, _embedding_stored) = memory
1684            .remember_with_parts(cid, "assistant", "tool output", parts_json)
1685            .await
1686            .unwrap();
1687        assert!(msg_id > MessageId(0));
1688
1689        let history = memory.sqlite.load_history(cid, 50).await.unwrap();
1690        assert_eq!(history.len(), 1);
1691        assert_eq!(history[0].content, "tool output");
1692    }
1693
1694    #[tokio::test]
1695    async fn recall_returns_empty_without_qdrant() {
1696        let memory = test_semantic_memory(true).await;
1697
1698        let recalled = memory.recall("test", 5, None).await.unwrap();
1699        assert!(recalled.is_empty());
1700    }
1701
1702    #[tokio::test]
1703    async fn has_embedding_without_qdrant() {
1704        let memory = test_semantic_memory(true).await;
1705
1706        let has_embedding = memory.has_embedding(MessageId(1)).await.unwrap();
1707        assert!(!has_embedding);
1708    }
1709
1710    #[tokio::test]
1711    async fn embed_missing_without_qdrant() {
1712        let memory = test_semantic_memory(true).await;
1713
1714        let count = memory.embed_missing().await.unwrap();
1715        assert_eq!(count, 0);
1716    }
1717
1718    #[tokio::test]
1719    async fn sqlite_accessor() {
1720        let memory = test_semantic_memory(false).await;
1721
1722        let cid = memory.sqlite().create_conversation().await.unwrap();
1723        assert_eq!(cid, ConversationId(1));
1724
1725        memory
1726            .sqlite()
1727            .save_message(cid, "user", "test")
1728            .await
1729            .unwrap();
1730
1731        let history = memory.sqlite().load_history(cid, 50).await.unwrap();
1732        assert_eq!(history.len(), 1);
1733    }
1734
1735    #[tokio::test]
1736    async fn has_vector_store_returns_false_when_unavailable() {
1737        let memory = test_semantic_memory(false).await;
1738        assert!(!memory.has_vector_store());
1739    }
1740
1741    #[tokio::test]
1742    async fn is_vector_store_connected_returns_false_when_unavailable() {
1743        let memory = test_semantic_memory(false).await;
1744        assert!(!memory.is_vector_store_connected().await);
1745    }
1746
1747    #[tokio::test]
1748    async fn recall_returns_empty_when_embeddings_not_supported() {
1749        let memory = test_semantic_memory(false).await;
1750
1751        let recalled = memory.recall("test", 5, None).await.unwrap();
1752        assert!(recalled.is_empty());
1753    }
1754
1755    #[tokio::test]
1756    async fn embed_missing_returns_zero_when_embeddings_not_supported() {
1757        let memory = test_semantic_memory(false).await;
1758
1759        let cid = memory.sqlite().create_conversation().await.unwrap();
1760        memory
1761            .sqlite()
1762            .save_message(cid, "user", "test")
1763            .await
1764            .unwrap();
1765
1766        let count = memory.embed_missing().await.unwrap();
1767        assert_eq!(count, 0);
1768    }
1769
1770    #[tokio::test]
1771    async fn message_count_empty_conversation() {
1772        let memory = test_semantic_memory(false).await;
1773        let cid = memory.sqlite().create_conversation().await.unwrap();
1774
1775        let count = memory.message_count(cid).await.unwrap();
1776        assert_eq!(count, 0);
1777    }
1778
1779    #[tokio::test]
1780    async fn message_count_after_saves() {
1781        let memory = test_semantic_memory(false).await;
1782        let cid = memory.sqlite().create_conversation().await.unwrap();
1783
1784        memory.remember(cid, "user", "msg1").await.unwrap();
1785        memory.remember(cid, "assistant", "msg2").await.unwrap();
1786
1787        let count = memory.message_count(cid).await.unwrap();
1788        assert_eq!(count, 2);
1789    }
1790
1791    #[tokio::test]
1792    async fn unsummarized_count_decreases_after_summary() {
1793        let memory = test_semantic_memory(false).await;
1794        let cid = memory.sqlite().create_conversation().await.unwrap();
1795
1796        for i in 0..10 {
1797            memory
1798                .remember(cid, "user", &format!("msg{i}"))
1799                .await
1800                .unwrap();
1801        }
1802        assert_eq!(memory.unsummarized_message_count(cid).await.unwrap(), 10);
1803
1804        memory.summarize(cid, 5).await.unwrap();
1805
1806        assert!(memory.unsummarized_message_count(cid).await.unwrap() < 10);
1807        assert_eq!(memory.message_count(cid).await.unwrap(), 10);
1808    }
1809
1810    #[tokio::test]
1811    async fn load_summaries_empty() {
1812        let memory = test_semantic_memory(false).await;
1813        let cid = memory.sqlite().create_conversation().await.unwrap();
1814
1815        let summaries = memory.load_summaries(cid).await.unwrap();
1816        assert!(summaries.is_empty());
1817    }
1818
1819    #[tokio::test]
1820    async fn load_summaries_ordered() {
1821        let memory = test_semantic_memory(false).await;
1822        let cid = memory.sqlite().create_conversation().await.unwrap();
1823
1824        let msg_id1 = memory.remember(cid, "user", "m1").await.unwrap();
1825        let msg_id2 = memory.remember(cid, "assistant", "m2").await.unwrap();
1826        let msg_id3 = memory.remember(cid, "user", "m3").await.unwrap();
1827
1828        let s1 = memory
1829            .sqlite()
1830            .save_summary(cid, "summary1", msg_id1, msg_id2, 3)
1831            .await
1832            .unwrap();
1833        let s2 = memory
1834            .sqlite()
1835            .save_summary(cid, "summary2", msg_id2, msg_id3, 3)
1836            .await
1837            .unwrap();
1838
1839        let summaries = memory.load_summaries(cid).await.unwrap();
1840        assert_eq!(summaries.len(), 2);
1841        assert_eq!(summaries[0].id, s1);
1842        assert_eq!(summaries[0].content, "summary1");
1843        assert_eq!(summaries[1].id, s2);
1844        assert_eq!(summaries[1].content, "summary2");
1845    }
1846
1847    #[tokio::test]
1848    async fn summarize_below_threshold() {
1849        let memory = test_semantic_memory(false).await;
1850        let cid = memory.sqlite().create_conversation().await.unwrap();
1851
1852        memory.remember(cid, "user", "hello").await.unwrap();
1853
1854        let result = memory.summarize(cid, 10).await.unwrap();
1855        assert!(result.is_none());
1856    }
1857
1858    #[tokio::test]
1859    async fn summarize_stores_summary() {
1860        let memory = test_semantic_memory(false).await;
1861        let cid = memory.sqlite().create_conversation().await.unwrap();
1862
1863        for i in 0..5 {
1864            memory
1865                .remember(cid, "user", &format!("message {i}"))
1866                .await
1867                .unwrap();
1868        }
1869
1870        let summary_id = memory.summarize(cid, 3).await.unwrap();
1871        assert!(summary_id.is_some());
1872
1873        let summaries = memory.load_summaries(cid).await.unwrap();
1874        assert_eq!(summaries.len(), 1);
1875        assert_eq!(summaries[0].id, summary_id.unwrap());
1876        assert!(!summaries[0].content.is_empty());
1877    }
1878
1879    #[tokio::test]
1880    async fn summarize_respects_previous_summaries() {
1881        let memory = test_semantic_memory(false).await;
1882        let cid = memory.sqlite().create_conversation().await.unwrap();
1883
1884        for i in 0..10 {
1885            memory
1886                .remember(cid, "user", &format!("message {i}"))
1887                .await
1888                .unwrap();
1889        }
1890
1891        let s1 = memory.summarize(cid, 3).await.unwrap();
1892        assert!(s1.is_some());
1893
1894        let s2 = memory.summarize(cid, 3).await.unwrap();
1895        assert!(s2.is_some());
1896
1897        let summaries = memory.load_summaries(cid).await.unwrap();
1898        assert_eq!(summaries.len(), 2);
1899        assert!(summaries[0].last_message_id < summaries[1].first_message_id);
1900    }
1901
1902    #[tokio::test]
1903    async fn remember_multiple_messages_increments_ids() {
1904        let memory = test_semantic_memory(false).await;
1905        let cid = memory.sqlite.create_conversation().await.unwrap();
1906
1907        let id1 = memory.remember(cid, "user", "first").await.unwrap();
1908        let id2 = memory.remember(cid, "assistant", "second").await.unwrap();
1909        let id3 = memory.remember(cid, "user", "third").await.unwrap();
1910
1911        assert!(id1 < id2);
1912        assert!(id2 < id3);
1913    }
1914
1915    #[tokio::test]
1916    async fn message_count_across_conversations() {
1917        let memory = test_semantic_memory(false).await;
1918        let cid1 = memory.sqlite().create_conversation().await.unwrap();
1919        let cid2 = memory.sqlite().create_conversation().await.unwrap();
1920
1921        memory.remember(cid1, "user", "msg1").await.unwrap();
1922        memory.remember(cid1, "user", "msg2").await.unwrap();
1923        memory.remember(cid2, "user", "msg3").await.unwrap();
1924
1925        assert_eq!(memory.message_count(cid1).await.unwrap(), 2);
1926        assert_eq!(memory.message_count(cid2).await.unwrap(), 1);
1927    }
1928
1929    #[tokio::test]
1930    async fn summarize_exact_threshold_returns_none() {
1931        let memory = test_semantic_memory(false).await;
1932        let cid = memory.sqlite().create_conversation().await.unwrap();
1933
1934        for i in 0..3 {
1935            memory
1936                .remember(cid, "user", &format!("msg {i}"))
1937                .await
1938                .unwrap();
1939        }
1940
1941        let result = memory.summarize(cid, 3).await.unwrap();
1942        assert!(result.is_none());
1943    }
1944
1945    #[tokio::test]
1946    async fn summarize_one_above_threshold_produces_summary() {
1947        let memory = test_semantic_memory(false).await;
1948        let cid = memory.sqlite().create_conversation().await.unwrap();
1949
1950        for i in 0..4 {
1951            memory
1952                .remember(cid, "user", &format!("msg {i}"))
1953                .await
1954                .unwrap();
1955        }
1956
1957        let result = memory.summarize(cid, 3).await.unwrap();
1958        assert!(result.is_some());
1959    }
1960
1961    #[tokio::test]
1962    async fn summary_fields_populated() {
1963        let memory = test_semantic_memory(false).await;
1964        let cid = memory.sqlite().create_conversation().await.unwrap();
1965
1966        for i in 0..5 {
1967            memory
1968                .remember(cid, "user", &format!("msg {i}"))
1969                .await
1970                .unwrap();
1971        }
1972
1973        memory.summarize(cid, 3).await.unwrap();
1974        let summaries = memory.load_summaries(cid).await.unwrap();
1975        let s = &summaries[0];
1976
1977        assert_eq!(s.conversation_id, cid);
1978        assert!(s.first_message_id > MessageId(0));
1979        assert!(s.last_message_id >= s.first_message_id);
1980        assert!(s.token_estimate >= 0);
1981        assert!(!s.content.is_empty());
1982    }
1983
1984    #[test]
1985    fn build_summarization_prompt_format() {
1986        let messages = vec![
1987            (MessageId(1), "user".into(), "Hello".into()),
1988            (MessageId(2), "assistant".into(), "Hi there".into()),
1989        ];
1990        let prompt = build_summarization_prompt(&messages);
1991        assert!(prompt.contains("user: Hello"));
1992        assert!(prompt.contains("assistant: Hi there"));
1993        assert!(prompt.contains("key_facts"));
1994    }
1995
1996    #[test]
1997    fn build_summarization_prompt_empty() {
1998        let messages: Vec<(MessageId, String, String)> = vec![];
1999        let prompt = build_summarization_prompt(&messages);
2000        assert!(prompt.contains("key_facts"));
2001    }
2002
2003    #[test]
2004    fn structured_summary_deserialize() {
2005        let json = r#"{"summary":"s","key_facts":["f1","f2"],"entities":["e1"]}"#;
2006        let ss: StructuredSummary = serde_json::from_str(json).unwrap();
2007        assert_eq!(ss.summary, "s");
2008        assert_eq!(ss.key_facts.len(), 2);
2009        assert_eq!(ss.entities.len(), 1);
2010    }
2011
2012    #[test]
2013    fn structured_summary_empty_facts() {
2014        let json = r#"{"summary":"s","key_facts":[],"entities":[]}"#;
2015        let ss: StructuredSummary = serde_json::from_str(json).unwrap();
2016        assert!(ss.key_facts.is_empty());
2017        assert!(ss.entities.is_empty());
2018    }
2019
2020    #[tokio::test]
2021    async fn search_key_facts_no_qdrant_empty() {
2022        let memory = test_semantic_memory(false).await;
2023        let facts = memory.search_key_facts("query", 5).await.unwrap();
2024        assert!(facts.is_empty());
2025    }
2026
2027    #[test]
2028    fn recalled_message_debug() {
2029        let recalled = RecalledMessage {
2030            message: Message {
2031                role: Role::User,
2032                content: "test".into(),
2033                parts: vec![],
2034                metadata: MessageMetadata::default(),
2035            },
2036            score: 0.95,
2037        };
2038        let dbg = format!("{recalled:?}");
2039        assert!(dbg.contains("RecalledMessage"));
2040        assert!(dbg.contains("0.95"));
2041    }
2042
2043    #[test]
2044    fn summary_clone() {
2045        let summary = Summary {
2046            id: 1,
2047            conversation_id: ConversationId(2),
2048            content: "test summary".into(),
2049            first_message_id: MessageId(1),
2050            last_message_id: MessageId(5),
2051            token_estimate: 10,
2052        };
2053        let cloned = summary.clone();
2054        assert_eq!(summary.id, cloned.id);
2055        assert_eq!(summary.content, cloned.content);
2056    }
2057
2058    #[tokio::test]
2059    async fn remember_preserves_role_mapping() {
2060        let memory = test_semantic_memory(false).await;
2061        let cid = memory.sqlite.create_conversation().await.unwrap();
2062
2063        memory.remember(cid, "user", "u").await.unwrap();
2064        memory.remember(cid, "assistant", "a").await.unwrap();
2065        memory.remember(cid, "system", "s").await.unwrap();
2066
2067        let history = memory.sqlite.load_history(cid, 50).await.unwrap();
2068        assert_eq!(history.len(), 3);
2069        assert_eq!(history[0].role, Role::User);
2070        assert_eq!(history[1].role, Role::Assistant);
2071        assert_eq!(history[2].role, Role::System);
2072    }
2073
2074    #[tokio::test]
2075    async fn new_with_invalid_qdrant_url_graceful() {
2076        let mut mock = MockProvider::default();
2077        mock.supports_embeddings = true;
2078        let provider = AnyProvider::Mock(mock);
2079        let result =
2080            SemanticMemory::new(":memory:", "http://127.0.0.1:1", provider, "test-model").await;
2081        assert!(result.is_ok());
2082    }
2083
2084    #[tokio::test]
2085    async fn test_semantic_memory_sqlite_remember_recall_roundtrip() {
2086        // Build SemanticMemory with EmbeddingStore backed by SQLite instead of Qdrant
2087        let mut mock = MockProvider::default();
2088        mock.supports_embeddings = true;
2089        // Provide deterministic embedding vectors: embed returns a fixed 4-element vector
2090        // MockProvider.embed always returns the same vector, so cosine similarity = 1.0
2091        let provider = AnyProvider::Mock(mock);
2092
2093        let sqlite = SqliteStore::new(":memory:").await.unwrap();
2094        let pool = sqlite.pool().clone();
2095        let qdrant = Some(Arc::new(
2096            crate::embedding_store::EmbeddingStore::new_sqlite(pool),
2097        ));
2098
2099        let memory = SemanticMemory {
2100            sqlite,
2101            qdrant,
2102            provider,
2103            embedding_model: "test-model".into(),
2104            vector_weight: 0.7,
2105            keyword_weight: 0.3,
2106            temporal_decay_enabled: false,
2107            temporal_decay_half_life_days: 30,
2108            mmr_enabled: false,
2109            mmr_lambda: 0.7,
2110            token_counter: Arc::new(TokenCounter::new()),
2111            #[cfg(feature = "graph-memory")]
2112            graph_store: None,
2113            #[cfg(feature = "graph-memory")]
2114            community_detection_failures: Arc::new(AtomicU64::new(0)),
2115            #[cfg(feature = "graph-memory")]
2116            graph_extraction_count: Arc::new(AtomicU64::new(0)),
2117            #[cfg(feature = "graph-memory")]
2118            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
2119        };
2120
2121        let cid = memory.sqlite().create_conversation().await.unwrap();
2122
2123        // remember → stores in SQLite + SQLite vector store
2124        let id1 = memory
2125            .remember(cid, "user", "rust async programming")
2126            .await
2127            .unwrap();
2128        let id2 = memory
2129            .remember(cid, "assistant", "use tokio for async")
2130            .await
2131            .unwrap();
2132        assert!(id1 < id2);
2133
2134        // recall → should return results via FTS5 keyword search
2135        let recalled = memory.recall("rust", 5, None).await.unwrap();
2136        assert!(
2137            !recalled.is_empty(),
2138            "recall must return at least one result"
2139        );
2140
2141        // Verify history is accessible
2142        let history = memory.sqlite().load_history(cid, 50).await.unwrap();
2143        assert_eq!(history.len(), 2);
2144        assert_eq!(history[0].content, "rust async programming");
2145    }
2146
2147    #[tokio::test]
2148    async fn remember_with_embeddings_supported_but_no_qdrant() {
2149        let memory = test_semantic_memory(true).await;
2150        let cid = memory.sqlite.create_conversation().await.unwrap();
2151
2152        let msg_id = memory.remember(cid, "user", "hello embed").await.unwrap();
2153        assert!(msg_id > MessageId(0));
2154
2155        let history = memory.sqlite.load_history(cid, 50).await.unwrap();
2156        assert_eq!(history.len(), 1);
2157        assert_eq!(history[0].content, "hello embed");
2158    }
2159
2160    #[tokio::test]
2161    async fn remember_verifies_content_via_load_history() {
2162        let memory = test_semantic_memory(false).await;
2163        let cid = memory.sqlite.create_conversation().await.unwrap();
2164
2165        memory.remember(cid, "user", "alpha").await.unwrap();
2166        memory.remember(cid, "assistant", "beta").await.unwrap();
2167        memory.remember(cid, "user", "gamma").await.unwrap();
2168
2169        let history = memory.sqlite().load_history(cid, 50).await.unwrap();
2170        assert_eq!(history.len(), 3);
2171        assert_eq!(history[0].content, "alpha");
2172        assert_eq!(history[1].content, "beta");
2173        assert_eq!(history[2].content, "gamma");
2174    }
2175
2176    #[tokio::test]
2177    async fn message_count_multiple_conversations_isolated() {
2178        let memory = test_semantic_memory(false).await;
2179        let cid1 = memory.sqlite().create_conversation().await.unwrap();
2180        let cid2 = memory.sqlite().create_conversation().await.unwrap();
2181        let cid3 = memory.sqlite().create_conversation().await.unwrap();
2182
2183        for _ in 0..5 {
2184            memory.remember(cid1, "user", "msg").await.unwrap();
2185        }
2186        for _ in 0..3 {
2187            memory.remember(cid2, "user", "msg").await.unwrap();
2188        }
2189
2190        assert_eq!(memory.message_count(cid1).await.unwrap(), 5);
2191        assert_eq!(memory.message_count(cid2).await.unwrap(), 3);
2192        assert_eq!(memory.message_count(cid3).await.unwrap(), 0);
2193    }
2194
2195    #[tokio::test]
2196    async fn summarize_empty_messages_range_returns_none() {
2197        let memory = test_semantic_memory(false).await;
2198        let cid = memory.sqlite().create_conversation().await.unwrap();
2199
2200        for i in 0..6 {
2201            memory
2202                .remember(cid, "user", &format!("msg {i}"))
2203                .await
2204                .unwrap();
2205        }
2206
2207        memory.summarize(cid, 3).await.unwrap();
2208        memory.summarize(cid, 3).await.unwrap();
2209
2210        let summaries = memory.load_summaries(cid).await.unwrap();
2211        assert_eq!(summaries.len(), 2);
2212    }
2213
2214    #[tokio::test]
2215    async fn summarize_token_estimate_populated() {
2216        let memory = test_semantic_memory(false).await;
2217        let cid = memory.sqlite().create_conversation().await.unwrap();
2218
2219        for i in 0..5 {
2220            memory
2221                .remember(cid, "user", &format!("message {i}"))
2222                .await
2223                .unwrap();
2224        }
2225
2226        memory.summarize(cid, 3).await.unwrap();
2227        let summaries = memory.load_summaries(cid).await.unwrap();
2228        let token_est = summaries[0].token_estimate;
2229        assert!(token_est > 0);
2230    }
2231
2232    #[tokio::test]
2233    async fn summarize_fails_when_provider_chat_fails() {
2234        let sqlite = SqliteStore::new(":memory:").await.unwrap();
2235        let provider = AnyProvider::Ollama(zeph_llm::ollama::OllamaProvider::new(
2236            "http://127.0.0.1:1",
2237            "test".into(),
2238            "embed".into(),
2239        ));
2240        let memory = SemanticMemory {
2241            sqlite,
2242            qdrant: None,
2243            provider,
2244            embedding_model: "test".into(),
2245            vector_weight: 0.7,
2246            keyword_weight: 0.3,
2247            temporal_decay_enabled: false,
2248            temporal_decay_half_life_days: 30,
2249            mmr_enabled: false,
2250            mmr_lambda: 0.7,
2251            token_counter: Arc::new(TokenCounter::new()),
2252            #[cfg(feature = "graph-memory")]
2253            graph_store: None,
2254            #[cfg(feature = "graph-memory")]
2255            community_detection_failures: Arc::new(AtomicU64::new(0)),
2256            #[cfg(feature = "graph-memory")]
2257            graph_extraction_count: Arc::new(AtomicU64::new(0)),
2258            #[cfg(feature = "graph-memory")]
2259            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
2260        };
2261        let cid = memory.sqlite().create_conversation().await.unwrap();
2262
2263        for i in 0..5 {
2264            memory
2265                .remember(cid, "user", &format!("msg {i}"))
2266                .await
2267                .unwrap();
2268        }
2269
2270        let result = memory.summarize(cid, 3).await;
2271        assert!(result.is_err());
2272    }
2273
2274    #[tokio::test]
2275    async fn embed_missing_without_embedding_support_returns_zero() {
2276        let memory = test_semantic_memory(false).await;
2277        let cid = memory.sqlite().create_conversation().await.unwrap();
2278        memory
2279            .sqlite()
2280            .save_message(cid, "user", "test message")
2281            .await
2282            .unwrap();
2283
2284        let count = memory.embed_missing().await.unwrap();
2285        assert_eq!(count, 0);
2286    }
2287
2288    #[tokio::test]
2289    async fn has_embedding_returns_false_when_no_qdrant() {
2290        let memory = test_semantic_memory(false).await;
2291        let cid = memory.sqlite.create_conversation().await.unwrap();
2292        let msg_id = memory.remember(cid, "user", "test").await.unwrap();
2293        assert!(!memory.has_embedding(msg_id).await.unwrap());
2294    }
2295
2296    #[tokio::test]
2297    async fn recall_empty_without_qdrant_regardless_of_filter() {
2298        let memory = test_semantic_memory(true).await;
2299        let filter = SearchFilter {
2300            conversation_id: Some(ConversationId(1)),
2301            role: None,
2302        };
2303        let recalled = memory.recall("query", 10, Some(filter)).await.unwrap();
2304        assert!(recalled.is_empty());
2305    }
2306
2307    #[tokio::test]
2308    async fn summarize_message_range_bounds() {
2309        let memory = test_semantic_memory(false).await;
2310        let cid = memory.sqlite().create_conversation().await.unwrap();
2311
2312        for i in 0..8 {
2313            memory
2314                .remember(cid, "user", &format!("msg {i}"))
2315                .await
2316                .unwrap();
2317        }
2318
2319        let summary_id = memory.summarize(cid, 4).await.unwrap().unwrap();
2320        let summaries = memory.load_summaries(cid).await.unwrap();
2321        assert_eq!(summaries.len(), 1);
2322        assert_eq!(summaries[0].id, summary_id);
2323        assert!(summaries[0].first_message_id >= MessageId(1));
2324        assert!(summaries[0].last_message_id >= summaries[0].first_message_id);
2325    }
2326
2327    #[test]
2328    fn build_summarization_prompt_preserves_order() {
2329        let messages = vec![
2330            (MessageId(1), "user".into(), "first".into()),
2331            (MessageId(2), "assistant".into(), "second".into()),
2332            (MessageId(3), "user".into(), "third".into()),
2333        ];
2334        let prompt = build_summarization_prompt(&messages);
2335        let first_pos = prompt.find("user: first").unwrap();
2336        let second_pos = prompt.find("assistant: second").unwrap();
2337        let third_pos = prompt.find("user: third").unwrap();
2338        assert!(first_pos < second_pos);
2339        assert!(second_pos < third_pos);
2340    }
2341
2342    #[test]
2343    fn summary_debug() {
2344        let summary = Summary {
2345            id: 1,
2346            conversation_id: ConversationId(2),
2347            content: "test".into(),
2348            first_message_id: MessageId(1),
2349            last_message_id: MessageId(5),
2350            token_estimate: 10,
2351        };
2352        let dbg = format!("{summary:?}");
2353        assert!(dbg.contains("Summary"));
2354    }
2355
2356    #[tokio::test]
2357    async fn message_count_nonexistent_conversation() {
2358        let memory = test_semantic_memory(false).await;
2359        let count = memory.message_count(ConversationId(999)).await.unwrap();
2360        assert_eq!(count, 0);
2361    }
2362
2363    #[tokio::test]
2364    async fn load_summaries_nonexistent_conversation() {
2365        let memory = test_semantic_memory(false).await;
2366        let summaries = memory.load_summaries(ConversationId(999)).await.unwrap();
2367        assert!(summaries.is_empty());
2368    }
2369
2370    #[tokio::test]
2371    async fn store_session_summary_no_qdrant_noop() {
2372        let memory = test_semantic_memory(true).await;
2373        let result = memory
2374            .store_session_summary(ConversationId(1), "test summary")
2375            .await;
2376        assert!(result.is_ok());
2377    }
2378
2379    #[tokio::test]
2380    async fn store_session_summary_no_embeddings_noop() {
2381        let memory = test_semantic_memory(false).await;
2382        let result = memory
2383            .store_session_summary(ConversationId(1), "test summary")
2384            .await;
2385        assert!(result.is_ok());
2386    }
2387
2388    #[tokio::test]
2389    async fn search_session_summaries_no_qdrant_empty() {
2390        let memory = test_semantic_memory(true).await;
2391        let results = memory
2392            .search_session_summaries("query", 5, None)
2393            .await
2394            .unwrap();
2395        assert!(results.is_empty());
2396    }
2397
2398    #[tokio::test]
2399    async fn search_session_summaries_no_embeddings_empty() {
2400        let memory = test_semantic_memory(false).await;
2401        let results = memory
2402            .search_session_summaries("query", 5, Some(ConversationId(1)))
2403            .await
2404            .unwrap();
2405        assert!(results.is_empty());
2406    }
2407
2408    #[test]
2409    fn session_summary_result_debug() {
2410        let result = SessionSummaryResult {
2411            summary_text: "test".into(),
2412            score: 0.9,
2413            conversation_id: ConversationId(1),
2414        };
2415        let dbg = format!("{result:?}");
2416        assert!(dbg.contains("SessionSummaryResult"));
2417    }
2418
2419    #[test]
2420    fn session_summary_result_clone() {
2421        let result = SessionSummaryResult {
2422            summary_text: "test".into(),
2423            score: 0.9,
2424            conversation_id: ConversationId(1),
2425        };
2426        let cloned = result.clone();
2427        assert_eq!(result.summary_text, cloned.summary_text);
2428        assert_eq!(result.conversation_id, cloned.conversation_id);
2429    }
2430
2431    #[tokio::test]
2432    async fn recall_fts5_fallback_without_qdrant() {
2433        let memory = test_semantic_memory(false).await;
2434        let cid = memory.sqlite.create_conversation().await.unwrap();
2435
2436        memory
2437            .remember(cid, "user", "rust programming guide")
2438            .await
2439            .unwrap();
2440        memory
2441            .remember(cid, "assistant", "python tutorial")
2442            .await
2443            .unwrap();
2444        memory
2445            .remember(cid, "user", "advanced rust patterns")
2446            .await
2447            .unwrap();
2448
2449        let recalled = memory.recall("rust", 5, None).await.unwrap();
2450        assert_eq!(recalled.len(), 2);
2451        assert!(recalled[0].score >= recalled[1].score);
2452    }
2453
2454    #[tokio::test]
2455    async fn recall_fts5_fallback_with_filter() {
2456        let memory = test_semantic_memory(false).await;
2457        let cid1 = memory.sqlite.create_conversation().await.unwrap();
2458        let cid2 = memory.sqlite.create_conversation().await.unwrap();
2459
2460        memory.remember(cid1, "user", "hello world").await.unwrap();
2461        memory
2462            .remember(cid2, "user", "hello universe")
2463            .await
2464            .unwrap();
2465
2466        let filter = SearchFilter {
2467            conversation_id: Some(cid1),
2468            role: None,
2469        };
2470        let recalled = memory.recall("hello", 5, Some(filter)).await.unwrap();
2471        assert_eq!(recalled.len(), 1);
2472    }
2473
2474    #[tokio::test]
2475    async fn recall_fts5_no_matches_returns_empty() {
2476        let memory = test_semantic_memory(false).await;
2477        let cid = memory.sqlite.create_conversation().await.unwrap();
2478
2479        memory.remember(cid, "user", "hello world").await.unwrap();
2480
2481        let recalled = memory.recall("nonexistent", 5, None).await.unwrap();
2482        assert!(recalled.is_empty());
2483    }
2484
2485    #[tokio::test]
2486    async fn recall_fts5_respects_limit() {
2487        let memory = test_semantic_memory(false).await;
2488        let cid = memory.sqlite.create_conversation().await.unwrap();
2489
2490        for i in 0..10 {
2491            memory
2492                .remember(cid, "user", &format!("test message number {i}"))
2493                .await
2494                .unwrap();
2495        }
2496
2497        let recalled = memory.recall("test", 3, None).await.unwrap();
2498        assert_eq!(recalled.len(), 3);
2499    }
2500
2501    // Priority 2: summarize fallback path
2502
2503    #[tokio::test]
2504    async fn summarize_fallback_to_plain_text_when_structured_fails() {
2505        // Use OllamaProvider pointing at an unreachable URL for chat_typed_erased,
2506        // but MockProvider for the plain chat call.
2507        // The easiest way: MockProvider returns non-JSON plain text so chat_typed_erased
2508        // (which uses chat() + JSON parse) will fail to parse, then falls back to chat().
2509        // However MockProvider.chat_typed calls chat() which returns default_response.
2510        // chat_typed tries to parse it as JSON → fails → retries → fails → returns StructuredParse error.
2511        // Then the fallback calls plain chat() which succeeds.
2512        let sqlite = SqliteStore::new(":memory:").await.unwrap();
2513        let mut mock = MockProvider::default();
2514        // First two calls go to chat_typed (attempt + retry), third call is the plain fallback
2515        mock.default_response = "plain text summary".into();
2516        let provider = AnyProvider::Mock(mock);
2517
2518        let memory = SemanticMemory {
2519            sqlite,
2520            qdrant: None,
2521            provider,
2522            embedding_model: "test".into(),
2523            vector_weight: 0.7,
2524            keyword_weight: 0.3,
2525            temporal_decay_enabled: false,
2526            temporal_decay_half_life_days: 30,
2527            mmr_enabled: false,
2528            mmr_lambda: 0.7,
2529            token_counter: Arc::new(TokenCounter::new()),
2530            #[cfg(feature = "graph-memory")]
2531            graph_store: None,
2532            #[cfg(feature = "graph-memory")]
2533            community_detection_failures: Arc::new(AtomicU64::new(0)),
2534            #[cfg(feature = "graph-memory")]
2535            graph_extraction_count: Arc::new(AtomicU64::new(0)),
2536            #[cfg(feature = "graph-memory")]
2537            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
2538        };
2539
2540        let cid = memory.sqlite().create_conversation().await.unwrap();
2541        for i in 0..5 {
2542            memory
2543                .remember(cid, "user", &format!("msg {i}"))
2544                .await
2545                .unwrap();
2546        }
2547
2548        let result = memory.summarize(cid, 3).await;
2549        // The summarize will either succeed (with plain text fallback) or fail
2550        // depending on how many retries chat_typed_erased does internally.
2551        // With MockProvider returning non-JSON plain text, chat_typed fails to parse.
2552        // The fallback plain chat() returns "plain text summary".
2553        // Result should be Ok with a summary stored.
2554        assert!(result.is_ok());
2555        let summaries = memory.load_summaries(cid).await.unwrap();
2556        assert_eq!(summaries.len(), 1);
2557        assert!(!summaries[0].content.is_empty());
2558    }
2559
2560    // Temporal decay tests
2561
2562    #[test]
2563    fn temporal_decay_disabled_leaves_scores_unchanged() {
2564        let mut ranked = vec![(MessageId(1), 1.0f64), (MessageId(2), 0.5f64)];
2565        let timestamps = std::collections::HashMap::new();
2566        apply_temporal_decay(&mut ranked, &timestamps, 30);
2567        assert!((ranked[0].1 - 1.0).abs() < f64::EPSILON);
2568        assert!((ranked[1].1 - 0.5).abs() < f64::EPSILON);
2569    }
2570
2571    #[test]
2572    fn temporal_decay_zero_age_preserves_score() {
2573        let now = std::time::SystemTime::now()
2574            .duration_since(std::time::UNIX_EPOCH)
2575            .unwrap_or_default()
2576            .as_secs()
2577            .cast_signed();
2578        let mut ranked = vec![(MessageId(1), 1.0f64)];
2579        let mut timestamps = std::collections::HashMap::new();
2580        timestamps.insert(MessageId(1), now);
2581        apply_temporal_decay(&mut ranked, &timestamps, 30);
2582        // age = 0 days, exp(0) = 1.0 → no change
2583        assert!((ranked[0].1 - 1.0).abs() < 0.01);
2584    }
2585
2586    #[test]
2587    fn temporal_decay_half_life_halves_score() {
2588        // Age exactly half_life_days → score should be halved
2589        let half_life = 30u32;
2590        let age_secs = i64::from(half_life) * 86400;
2591        let now = std::time::SystemTime::now()
2592            .duration_since(std::time::UNIX_EPOCH)
2593            .unwrap_or_default()
2594            .as_secs()
2595            .cast_signed();
2596        let ts = now - age_secs;
2597        let mut ranked = vec![(MessageId(1), 1.0f64)];
2598        let mut timestamps = std::collections::HashMap::new();
2599        timestamps.insert(MessageId(1), ts);
2600        apply_temporal_decay(&mut ranked, &timestamps, half_life);
2601        // exp(-ln2) = 0.5
2602        assert!(
2603            (ranked[0].1 - 0.5).abs() < 0.01,
2604            "score was {}",
2605            ranked[0].1
2606        );
2607    }
2608
2609    // MMR tests
2610
2611    #[test]
2612    fn mmr_empty_input_returns_empty() {
2613        let ranked = vec![];
2614        let vectors = std::collections::HashMap::new();
2615        let result = apply_mmr(&ranked, &vectors, 0.7, 5);
2616        assert!(result.is_empty());
2617    }
2618
2619    #[test]
2620    fn mmr_returns_up_to_limit() {
2621        let ranked = vec![
2622            (MessageId(1), 1.0f64),
2623            (MessageId(2), 0.9f64),
2624            (MessageId(3), 0.8f64),
2625        ];
2626        let mut vectors = std::collections::HashMap::new();
2627        vectors.insert(MessageId(1), vec![1.0f32, 0.0]);
2628        vectors.insert(MessageId(2), vec![0.0f32, 1.0]);
2629        vectors.insert(MessageId(3), vec![1.0f32, 0.0]);
2630        let result = apply_mmr(&ranked, &vectors, 0.7, 2);
2631        assert_eq!(result.len(), 2);
2632    }
2633
2634    #[test]
2635    fn mmr_without_vectors_picks_by_relevance() {
2636        let ranked = vec![(MessageId(1), 1.0f64), (MessageId(2), 0.5f64)];
2637        let vectors = std::collections::HashMap::new();
2638        let result = apply_mmr(&ranked, &vectors, 0.7, 2);
2639        assert_eq!(result.len(), 2);
2640        assert_eq!(result[0].0, MessageId(1));
2641    }
2642
2643    #[test]
2644    fn mmr_prefers_diverse_over_redundant() {
2645        // Two candidates with same relevance but msg 2 is orthogonal (more diverse)
2646        let ranked = vec![
2647            (MessageId(1), 1.0f64), // selected first
2648            (MessageId(2), 0.9f64), // orthogonal to 1
2649            (MessageId(3), 0.9f64), // parallel to 1 (redundant)
2650        ];
2651        let mut vectors = std::collections::HashMap::new();
2652        vectors.insert(MessageId(1), vec![1.0f32, 0.0]);
2653        vectors.insert(MessageId(2), vec![0.0f32, 1.0]); // orthogonal
2654        vectors.insert(MessageId(3), vec![1.0f32, 0.0]); // same as 1
2655        let result = apply_mmr(&ranked, &vectors, 0.5, 2);
2656        assert_eq!(result.len(), 2);
2657        assert_eq!(result[0].0, MessageId(1));
2658        // msg 2 should be preferred over msg 3 (diverse)
2659        assert_eq!(result[1].0, MessageId(2));
2660    }
2661
2662    #[test]
2663    fn temporal_decay_half_life_zero_is_noop() {
2664        let now = std::time::SystemTime::now()
2665            .duration_since(std::time::UNIX_EPOCH)
2666            .unwrap_or_default()
2667            .as_secs()
2668            .cast_signed();
2669        let age_secs = 30i64 * 86400;
2670        let ts = now - age_secs;
2671        let mut ranked = vec![(MessageId(1), 1.0f64)];
2672        let mut timestamps = std::collections::HashMap::new();
2673        timestamps.insert(MessageId(1), ts);
2674        // half_life=0 → guard returns early, score must remain 1.0
2675        apply_temporal_decay(&mut ranked, &timestamps, 0);
2676        assert!(
2677            (ranked[0].1 - 1.0).abs() < f64::EPSILON,
2678            "score was {}",
2679            ranked[0].1
2680        );
2681    }
2682
2683    #[test]
2684    fn temporal_decay_huge_age_near_zero() {
2685        let now = std::time::SystemTime::now()
2686            .duration_since(std::time::UNIX_EPOCH)
2687            .unwrap_or_default()
2688            .as_secs()
2689            .cast_signed();
2690        // 10 years = ~3650 days
2691        let age_secs = 3650i64 * 86400;
2692        let ts = now - age_secs;
2693        let mut ranked = vec![(MessageId(1), 1.0f64)];
2694        let mut timestamps = std::collections::HashMap::new();
2695        timestamps.insert(MessageId(1), ts);
2696        apply_temporal_decay(&mut ranked, &timestamps, 30);
2697        // After 3650 days with half_life=30, score should be essentially 0
2698        assert!(ranked[0].1 < 0.001, "score was {}", ranked[0].1);
2699    }
2700
2701    #[test]
2702    fn temporal_decay_small_half_life() {
2703        // Very small half_life (1 day), age = 7 days → 2^(-7) ≈ 0.0078
2704        let now = std::time::SystemTime::now()
2705            .duration_since(std::time::UNIX_EPOCH)
2706            .unwrap_or_default()
2707            .as_secs()
2708            .cast_signed();
2709        let ts = now - 7 * 86400i64;
2710        let mut ranked = vec![(MessageId(1), 1.0f64)];
2711        let mut timestamps = std::collections::HashMap::new();
2712        timestamps.insert(MessageId(1), ts);
2713        apply_temporal_decay(&mut ranked, &timestamps, 1);
2714        assert!(ranked[0].1 < 0.01, "score was {}", ranked[0].1);
2715    }
2716
2717    #[test]
2718    fn mmr_lambda_zero_max_diversity() {
2719        // lambda=0 → pure diversity: second item should be most dissimilar
2720        let ranked = vec![
2721            (MessageId(1), 1.0f64),  // selected first (always highest relevance)
2722            (MessageId(2), 0.9f64),  // orthogonal to 1
2723            (MessageId(3), 0.85f64), // parallel to 1 (max_sim=1.0)
2724        ];
2725        let mut vectors = std::collections::HashMap::new();
2726        vectors.insert(MessageId(1), vec![1.0f32, 0.0]);
2727        vectors.insert(MessageId(2), vec![0.0f32, 1.0]); // orthogonal
2728        vectors.insert(MessageId(3), vec![1.0f32, 0.0]); // same direction
2729        let result = apply_mmr(&ranked, &vectors, 0.0, 3);
2730        assert_eq!(result.len(), 3);
2731        // After 1 is selected: mmr(2) = 0 - (1-0)*0 = 0, mmr(3) = 0 - 1*1 = -1 → 2 wins
2732        assert_eq!(result[1].0, MessageId(2));
2733    }
2734
2735    #[test]
2736    fn mmr_lambda_one_pure_relevance() {
2737        // lambda=1 → pure relevance, should pick in relevance order
2738        let ranked = vec![
2739            (MessageId(1), 1.0f64),
2740            (MessageId(2), 0.8f64),
2741            (MessageId(3), 0.6f64),
2742        ];
2743        let mut vectors = std::collections::HashMap::new();
2744        vectors.insert(MessageId(1), vec![1.0f32, 0.0]);
2745        vectors.insert(MessageId(2), vec![0.0f32, 1.0]);
2746        vectors.insert(MessageId(3), vec![0.5f32, 0.5]);
2747        let result = apply_mmr(&ranked, &vectors, 1.0, 3);
2748        assert_eq!(result.len(), 3);
2749        assert_eq!(result[0].0, MessageId(1));
2750        assert_eq!(result[1].0, MessageId(2));
2751        assert_eq!(result[2].0, MessageId(3));
2752    }
2753
2754    #[test]
2755    fn mmr_limit_zero_returns_empty() {
2756        let ranked = vec![(MessageId(1), 1.0f64), (MessageId(2), 0.8f64)];
2757        let mut vectors = std::collections::HashMap::new();
2758        vectors.insert(MessageId(1), vec![1.0f32, 0.0]);
2759        vectors.insert(MessageId(2), vec![0.0f32, 1.0]);
2760        let result = apply_mmr(&ranked, &vectors, 0.7, 0);
2761        assert!(result.is_empty());
2762    }
2763
2764    #[test]
2765    fn mmr_duplicate_vectors_penalizes_second() {
2766        // Two items with identical embeddings: second should be heavily penalized
2767        let ranked = vec![
2768            (MessageId(1), 1.0f64),
2769            (MessageId(2), 1.0f64), // same relevance, same direction
2770            (MessageId(3), 0.9f64), // orthogonal, lower relevance
2771        ];
2772        let mut vectors = std::collections::HashMap::new();
2773        vectors.insert(MessageId(1), vec![1.0f32, 0.0]);
2774        vectors.insert(MessageId(2), vec![1.0f32, 0.0]); // duplicate
2775        vectors.insert(MessageId(3), vec![0.0f32, 1.0]); // orthogonal
2776        let result = apply_mmr(&ranked, &vectors, 0.5, 3);
2777        assert_eq!(result.len(), 3);
2778        assert_eq!(result[0].0, MessageId(1));
2779        // msg3 (orthogonal) should be preferred over msg2 (duplicate) with lambda=0.5
2780        assert_eq!(result[1].0, MessageId(3));
2781    }
2782
2783    // recall_routed() tests (#1162 — tester gap coverage)
2784
2785    #[tokio::test]
2786    async fn recall_routed_keyword_route_returns_fts5_results() {
2787        use crate::{HeuristicRouter, MemoryRoute, MemoryRouter};
2788
2789        let memory = test_semantic_memory(false).await;
2790        let cid = memory.sqlite.create_conversation().await.unwrap();
2791
2792        memory
2793            .remember(cid, "user", "rust programming guide")
2794            .await
2795            .unwrap();
2796        memory
2797            .remember(cid, "assistant", "python tutorial")
2798            .await
2799            .unwrap();
2800
2801        // "rust_guide" is pure snake_case → routes Keyword
2802        let router = HeuristicRouter;
2803        assert_eq!(router.route("rust_guide"), MemoryRoute::Keyword);
2804
2805        let recalled = memory
2806            .recall_routed("rust_guide", 5, None, &router)
2807            .await
2808            .unwrap();
2809        // FTS5 will find "rust programming guide" but not "python tutorial"
2810        assert!(recalled.len() <= 2);
2811    }
2812
2813    #[tokio::test]
2814    async fn recall_routed_semantic_route_without_qdrant_returns_empty_vectors() {
2815        use crate::{HeuristicRouter, MemoryRoute, MemoryRouter};
2816
2817        let memory = test_semantic_memory(false).await;
2818        let cid = memory.sqlite.create_conversation().await.unwrap();
2819
2820        memory
2821            .remember(cid, "user", "how does the agent loop work")
2822            .await
2823            .unwrap();
2824
2825        // Long natural language question → routes Semantic
2826        let router = HeuristicRouter;
2827        assert_eq!(
2828            router.route("how does the agent loop work"),
2829            MemoryRoute::Semantic
2830        );
2831
2832        // Without Qdrant, vector results are empty; recall_routed returns empty vec
2833        let recalled = memory
2834            .recall_routed("how does the agent loop work", 5, None, &router)
2835            .await
2836            .unwrap();
2837        assert!(recalled.is_empty(), "no Qdrant → empty semantic recall");
2838    }
2839
2840    #[tokio::test]
2841    async fn recall_routed_hybrid_route_falls_back_to_fts5_on_no_qdrant() {
2842        use crate::{HeuristicRouter, MemoryRoute, MemoryRouter};
2843
2844        let memory = test_semantic_memory(false).await;
2845        let cid = memory.sqlite.create_conversation().await.unwrap();
2846
2847        memory
2848            .remember(cid, "user", "context window token budget")
2849            .await
2850            .unwrap();
2851
2852        // 4-word non-question, no code pattern → routes Hybrid
2853        let router = HeuristicRouter;
2854        assert_eq!(
2855            router.route("context window token budget"),
2856            MemoryRoute::Hybrid
2857        );
2858
2859        // Hybrid: FTS5 succeeds, vectors empty (no Qdrant) → merged result
2860        let recalled = memory
2861            .recall_routed("context window token budget", 5, None, &router)
2862            .await
2863            .unwrap();
2864        // FTS5 finds the message; merged result should be non-empty
2865        assert!(!recalled.is_empty(), "FTS5 should find the stored message");
2866    }
2867
2868    // graph-memory tests
2869
2870    #[cfg(feature = "graph-memory")]
2871    mod graph_extraction_tests {
2872        use super::*;
2873        use crate::graph::{EntityType, GraphStore};
2874
2875        async fn graph_memory() -> SemanticMemory {
2876            let mem = test_semantic_memory(false).await;
2877            let store = std::sync::Arc::new(GraphStore::new(mem.sqlite.pool().clone()));
2878            mem.with_graph_store(store)
2879        }
2880
2881        #[tokio::test]
2882        async fn recall_graph_returns_empty_when_no_entities() {
2883            let memory = graph_memory().await;
2884            let facts = memory.recall_graph("rust", 10, 2).await.unwrap();
2885            assert!(facts.is_empty(), "empty graph must return empty vec");
2886        }
2887
2888        #[tokio::test]
2889        async fn recall_graph_returns_facts_for_known_entity() {
2890            let memory = graph_memory().await;
2891            let store = GraphStore::new(memory.sqlite.pool().clone());
2892
2893            let rust_id = store
2894                .upsert_entity("rust", "rust", EntityType::Language, Some("a language"))
2895                .await
2896                .unwrap();
2897            let tokio_id = store
2898                .upsert_entity("tokio", "tokio", EntityType::Tool, Some("async runtime"))
2899                .await
2900                .unwrap();
2901            store
2902                .insert_edge(
2903                    rust_id,
2904                    tokio_id,
2905                    "uses",
2906                    "Rust uses tokio for async",
2907                    0.9,
2908                    None,
2909                )
2910                .await
2911                .unwrap();
2912
2913            let facts = memory.recall_graph("rust", 10, 2).await.unwrap();
2914            assert!(!facts.is_empty(), "should return at least one fact");
2915            assert_eq!(facts[0].entity_name, "rust");
2916            assert_eq!(facts[0].relation, "uses");
2917        }
2918
2919        #[tokio::test]
2920        async fn recall_graph_sorted_by_composite_score() {
2921            let memory = graph_memory().await;
2922            let store = GraphStore::new(memory.sqlite.pool().clone());
2923
2924            let a_id = store
2925                .upsert_entity("entity_a", "entity_a", EntityType::Concept, None)
2926                .await
2927                .unwrap();
2928            let b_id = store
2929                .upsert_entity("entity_b", "entity_b", EntityType::Concept, None)
2930                .await
2931                .unwrap();
2932            let c_id = store
2933                .upsert_entity("entity_c", "entity_c", EntityType::Concept, None)
2934                .await
2935                .unwrap();
2936            store
2937                .insert_edge(a_id, b_id, "relates", "a relates b", 0.9, None)
2938                .await
2939                .unwrap();
2940            store
2941                .insert_edge(a_id, c_id, "relates", "a relates c", 0.5, None)
2942                .await
2943                .unwrap();
2944
2945            let facts = memory.recall_graph("entity_a", 10, 1).await.unwrap();
2946            if facts.len() >= 2 {
2947                assert!(
2948                    facts[0].composite_score() >= facts[1].composite_score(),
2949                    "facts must be sorted descending by composite score"
2950                );
2951            }
2952        }
2953
2954        #[tokio::test]
2955        async fn extract_and_store_returns_zero_stats_for_empty_content() {
2956            let memory = graph_memory().await;
2957            let pool = memory.sqlite.pool().clone();
2958            let provider = test_provider();
2959
2960            let stats = extract_and_store(
2961                String::new(),
2962                vec![],
2963                provider,
2964                pool,
2965                GraphExtractionConfig {
2966                    max_entities: 10,
2967                    max_edges: 10,
2968                    extraction_timeout_secs: 5,
2969                    ..Default::default()
2970                },
2971            )
2972            .await
2973            .unwrap();
2974            assert_eq!(stats.entities_upserted, 0);
2975            assert_eq!(stats.edges_inserted, 0);
2976        }
2977
2978        #[tokio::test]
2979        async fn extraction_count_increments_atomically() {
2980            let memory = graph_memory().await;
2981            let pool = memory.sqlite.pool().clone();
2982            let provider = test_provider();
2983
2984            // Run two extractions sequentially to verify count increments
2985            for _ in 0..2 {
2986                let _ = extract_and_store(
2987                    "I use Rust for systems programming".to_owned(),
2988                    vec![],
2989                    provider.clone(),
2990                    pool.clone(),
2991                    GraphExtractionConfig {
2992                        max_entities: 5,
2993                        max_edges: 5,
2994                        extraction_timeout_secs: 5,
2995                        ..Default::default()
2996                    },
2997                )
2998                .await;
2999            }
3000
3001            let store = GraphStore::new(pool);
3002            let count = store.get_metadata("extraction_count").await.unwrap();
3003            // R-SUG-02: assert exact value "2" — two extractions must each increment the counter.
3004            assert_eq!(
3005                count.as_deref(),
3006                Some("2"),
3007                "extraction_count must be exactly 2 after two extraction attempts"
3008            );
3009        }
3010
3011        #[tokio::test]
3012        async fn recall_graph_truncates_to_limit() {
3013            let memory = graph_memory().await;
3014            let store = GraphStore::new(memory.sqlite.pool().clone());
3015
3016            let root_id = store
3017                .upsert_entity("root", "root", EntityType::Concept, None)
3018                .await
3019                .unwrap();
3020            for i in 0..5 {
3021                let name = format!("target_{i}");
3022                let tid = store
3023                    .upsert_entity(&name, &name, EntityType::Concept, None)
3024                    .await
3025                    .unwrap();
3026                store
3027                    .insert_edge(
3028                        root_id,
3029                        tid,
3030                        "links",
3031                        &format!("root links {name}"),
3032                        0.7,
3033                        None,
3034                    )
3035                    .await
3036                    .unwrap();
3037            }
3038
3039            let facts = memory.recall_graph("root", 3, 1).await.unwrap();
3040            assert!(facts.len() <= 3, "recall_graph must respect limit");
3041        }
3042
3043        // R-SUG-05: multi-hop BFS test.
3044        #[tokio::test]
3045        async fn recall_graph_multi_hop_traverses_two_hops() {
3046            // Chain: A -[knows]-> B -[uses]-> C
3047            // recall_graph("a", max_hops=2) must return facts for both hops.
3048            let memory = graph_memory().await;
3049            let store = GraphStore::new(memory.sqlite.pool().clone());
3050
3051            let a_id = store
3052                .upsert_entity("a_entity", "a_entity", EntityType::Person, None)
3053                .await
3054                .unwrap();
3055            let b_id = store
3056                .upsert_entity("b_entity", "b_entity", EntityType::Person, None)
3057                .await
3058                .unwrap();
3059            let c_id = store
3060                .upsert_entity("c_entity", "c_entity", EntityType::Concept, None)
3061                .await
3062                .unwrap();
3063
3064            store
3065                .insert_edge(a_id, b_id, "knows", "a knows b", 0.9, None)
3066                .await
3067                .unwrap();
3068            store
3069                .insert_edge(b_id, c_id, "uses", "b uses c", 0.8, None)
3070                .await
3071                .unwrap();
3072
3073            // max_hops=1: only hop-0 edges visible from A → should find A-B edge
3074            let facts_1hop = memory.recall_graph("a_entity", 10, 1).await.unwrap();
3075            assert!(!facts_1hop.is_empty(), "hop=1 must find direct edge");
3076
3077            // max_hops=2: BFS reaches B then C → A-B and B-C edges both visible
3078            let facts_2hop = memory.recall_graph("a_entity", 10, 2).await.unwrap();
3079            assert!(
3080                facts_2hop.len() >= facts_1hop.len(),
3081                "hop=2 must find at least as many facts as hop=1"
3082            );
3083            let has_bc = facts_2hop.iter().any(|f| {
3084                (f.entity_name.contains("b_entity") || f.target_name.contains("b_entity"))
3085                    && (f.entity_name.contains("c_entity") || f.target_name.contains("c_entity"))
3086            });
3087            assert!(has_bc, "hop=2 BFS must traverse to c_entity via b_entity");
3088        }
3089
3090        // R-SUG-05: timeout degradation — zero-second timeout returns empty stats, no panic.
3091        #[tokio::test]
3092        async fn spawn_graph_extraction_zero_timeout_returns_without_panic() {
3093            let memory = graph_memory().await;
3094            let cfg = GraphExtractionConfig {
3095                max_entities: 5,
3096                max_edges: 5,
3097                extraction_timeout_secs: 0,
3098                ..Default::default()
3099            };
3100            // spawn fires and forgets — must not panic regardless of timeout value.
3101            memory.spawn_graph_extraction(
3102                "I use Rust for systems programming".to_owned(),
3103                vec![],
3104                cfg,
3105            );
3106            // Brief wait for the task to settle.
3107            tokio::time::sleep(std::time::Duration::from_millis(100)).await;
3108            // No assertion on count: with 0s timeout the task may or may not complete.
3109            // The test verifies there is no panic.
3110        }
3111    }
3112
3113    // Priority 3: proptest
3114
3115    use proptest::prelude::*;
3116
3117    proptest! {
3118        #[test]
3119        fn count_tokens_never_panics(s in ".*") {
3120            let counter = crate::token_counter::TokenCounter::new();
3121            let _ = counter.count_tokens(&s);
3122        }
3123    }
3124}