Skip to main content

zeph_memory/semantic/
mod.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! High-level semantic memory orchestrator.
5//!
6//! [`SemanticMemory`] is the primary entry point used by `zeph-core`.  It wires
7//! together [`crate::store::SqliteStore`] (relational persistence) and
8//! [`crate::embedding_store::EmbeddingStore`] (Qdrant vector index) into a single
9//! object with `remember` / `recall` / `summarize` operations.
10//!
11//! # Construction
12//!
13//! Use [`SemanticMemory::new`] for the default 0.7/0.3 vector/keyword weights, or
14//! [`SemanticMemory::with_qdrant_ops`] inside `AppBuilder` to share a single gRPC
15//! channel across all subsystems.
16//!
17//! # Hybrid recall
18//!
19//! Recall uses reciprocal-rank fusion of BM25 (`SQLite` FTS5) and cosine-similarity
20//! (Qdrant) results, with optional temporal decay, MMR diversity reranking, and
21//! per-tier score boosts.
22
23mod algorithms;
24mod corrections;
25mod cross_session;
26mod graph;
27pub(crate) mod importance;
28pub mod persona;
29mod recall;
30mod summarization;
31pub mod trajectory;
32pub mod tree_consolidation;
33pub(crate) mod write_buffer;
34
35#[cfg(test)]
36mod tests;
37
38use std::sync::Arc;
39use std::sync::Mutex;
40use std::sync::atomic::AtomicU64;
41use std::time::Instant;
42
43use tokio::sync::RwLock;
44use zeph_llm::any::AnyProvider;
45use zeph_llm::provider::LlmProvider as _;
46
47use crate::admission::AdmissionControl;
48use crate::embedding_store::EmbeddingStore;
49use crate::error::MemoryError;
50use crate::store::SqliteStore;
51use crate::token_counter::TokenCounter;
52
53pub(crate) const SESSION_SUMMARIES_COLLECTION: &str = "zeph_session_summaries";
54pub(crate) const KEY_FACTS_COLLECTION: &str = "zeph_key_facts";
55pub(crate) const CORRECTIONS_COLLECTION: &str = "zeph_corrections";
56
57/// Progress state for embed backfill.
58#[derive(Debug, Clone, Copy, PartialEq, Eq)]
59pub struct BackfillProgress {
60    /// Number of messages processed so far (including failures).
61    pub done: usize,
62    /// Total number of unembedded messages at backfill start.
63    pub total: usize,
64}
65
66pub use algorithms::{apply_mmr, apply_temporal_decay};
67pub use cross_session::SessionSummaryResult;
68pub use graph::{
69    ExtractionResult, ExtractionStats, GraphExtractionConfig, LinkingStats, NoteLinkingConfig,
70    PostExtractValidator, extract_and_store, link_memory_notes,
71};
72pub use persona::{
73    PersonaExtractionConfig, contains_self_referential_language, extract_persona_facts,
74};
75pub use recall::{EmbedContext, RecalledMessage};
76pub use summarization::{StructuredSummary, Summary, build_summarization_prompt};
77pub use trajectory::{TrajectoryEntry, TrajectoryExtractionConfig, extract_trajectory_entries};
78pub use tree_consolidation::{
79    TreeConsolidationConfig, TreeConsolidationResult, run_tree_consolidation_sweep,
80    start_tree_consolidation_loop,
81};
82pub use write_buffer::{BufferedWrite, WriteBuffer};
83
84/// Cached profile centroid for query-bias correction (MM-F3, #3341).
85///
86/// Stored inside `SemanticMemory::profile_centroid` under an `RwLock`. Expires after
87/// `profile_centroid_ttl_secs` seconds; a miss is non-sticky (next call retries).
88#[derive(Debug, Clone)]
89pub(crate) struct CachedCentroid {
90    /// The centroid vector (unweighted mean of persona-fact embeddings).
91    pub vector: Vec<f32>,
92    /// Wall-clock instant when this centroid was computed.
93    pub computed_at: Instant,
94}
95
96/// Whether temporal decay is applied to recall scores.
97///
98/// When `Enabled`, older memories receive lower scores based on the configured
99/// half-life. When `Disabled`, all memories are scored equally regardless of age.
100#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
101pub enum TemporalDecay {
102    /// Apply exponential decay: older memories score lower.
103    Enabled,
104    /// No age-based score reduction.
105    #[default]
106    Disabled,
107}
108
109impl TemporalDecay {
110    /// Returns `true` when the variant is `Enabled`.
111    #[must_use]
112    #[inline]
113    pub fn is_enabled(self) -> bool {
114        self == Self::Enabled
115    }
116}
117
118impl From<bool> for TemporalDecay {
119    fn from(b: bool) -> Self {
120        if b { Self::Enabled } else { Self::Disabled }
121    }
122}
123
124/// Whether Maximal Marginal Relevance (MMR) diversity re-ranking is applied.
125///
126/// When `Enabled`, recall results are re-ranked to balance relevance and
127/// diversity using the configured lambda parameter.
128#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
129pub enum MmrReranking {
130    /// Apply MMR diversity re-ranking after initial vector search.
131    Enabled,
132    /// Return results in raw cosine-similarity order.
133    #[default]
134    Disabled,
135}
136
137impl MmrReranking {
138    /// Returns `true` when the variant is `Enabled`.
139    #[must_use]
140    #[inline]
141    pub fn is_enabled(self) -> bool {
142        self == Self::Enabled
143    }
144}
145
146impl From<bool> for MmrReranking {
147    fn from(b: bool) -> Self {
148        if b { Self::Enabled } else { Self::Disabled }
149    }
150}
151
152/// Whether write-time importance scoring influences recall ranking.
153///
154/// When `Enabled`, each stored message receives an importance score that
155/// is blended into the recall ranking with the configured weight.
156#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
157pub enum ImportanceScoring {
158    /// Blend importance scores into recall ranking.
159    Enabled,
160    /// Recall ranking uses only hybrid search scores.
161    #[default]
162    Disabled,
163}
164
165impl ImportanceScoring {
166    /// Returns `true` when the variant is `Enabled`.
167    #[must_use]
168    #[inline]
169    pub fn is_enabled(self) -> bool {
170        self == Self::Enabled
171    }
172}
173
174impl From<bool> for ImportanceScoring {
175    fn from(b: bool) -> Self {
176        if b { Self::Enabled } else { Self::Disabled }
177    }
178}
179
180/// Whether query-bias correction shifts first-person queries toward the user profile centroid.
181///
182/// When `Enabled`, queries containing first-person language are biased towards
183/// the stored user profile centroid to improve personalised recall.
184#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
185pub enum QueryBiasCorrection {
186    /// Shift first-person query embeddings towards the user profile centroid.
187    #[default]
188    Enabled,
189    /// Pass query embeddings through unchanged.
190    Disabled,
191}
192
193impl QueryBiasCorrection {
194    /// Returns `true` when the variant is `Enabled`.
195    #[must_use]
196    #[inline]
197    pub fn is_enabled(self) -> bool {
198        self == Self::Enabled
199    }
200}
201
202impl From<bool> for QueryBiasCorrection {
203    fn from(b: bool) -> Self {
204        if b { Self::Enabled } else { Self::Disabled }
205    }
206}
207
208/// Whether Hebbian edge-weight reinforcement is active.
209///
210/// When `Enabled`, each graph edge traversed during recall receives a small
211/// weight increment (`hebbian_lr`), strengthening frequently-used associations.
212#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
213pub enum HebbianReinforcement {
214    /// Increment edge weights after each recall traversal.
215    Enabled,
216    /// Edge weights remain unchanged after recall.
217    #[default]
218    Disabled,
219}
220
221impl HebbianReinforcement {
222    /// Returns `true` when the variant is `Enabled`.
223    #[must_use]
224    #[inline]
225    pub fn is_enabled(self) -> bool {
226        self == Self::Enabled
227    }
228}
229
230impl From<bool> for HebbianReinforcement {
231    fn from(b: bool) -> Self {
232        if b { Self::Enabled } else { Self::Disabled }
233    }
234}
235
236/// Classification of a user query's self-referential intent (MM-F3, #3341).
237///
238/// Used to decide whether query-bias correction should shift the embedding
239/// towards the user's profile centroid.
240#[derive(Debug, Clone, Copy, PartialEq, Eq)]
241pub(crate) enum QueryIntent {
242    /// Query contains first-person language — likely about the user themselves.
243    FirstPerson,
244    /// Query is about an external topic; no bias shift applied.
245    Other,
246}
247
248/// HL-F5 runtime wiring for spreading activation (mirror of `[memory.hebbian]` spread fields).
249///
250/// Built from config at bootstrap and attached via [`SemanticMemory::with_hebbian_spread`].
251#[derive(Debug, Clone, Default)]
252pub struct HelaSpreadRuntime {
253    /// `true` when `[memory.hebbian] enabled = true` AND `spreading_activation = true`.
254    pub enabled: bool,
255    /// BFS hops, already clamped to `[1, 6]` by the caller.
256    pub depth: u32,
257    /// Soft upper bound on the visited-node set.
258    pub max_visited: usize,
259    /// MAGMA edge-type filter for BFS traversal.
260    pub edge_types: Vec<crate::graph::EdgeType>,
261    /// Per-step circuit-breaker duration.
262    pub step_budget: Option<std::time::Duration>,
263}
264
265/// High-level semantic memory orchestrator combining `SQLite` and Qdrant.
266///
267/// Instantiate via [`SemanticMemory::new`] or the `AppBuilder` integration.
268/// All fields are `pub(crate)` — callers interact through the inherent method API.
269pub struct SemanticMemory {
270    pub(crate) sqlite: SqliteStore,
271    pub(crate) qdrant: Option<Arc<EmbeddingStore>>,
272    pub(crate) provider: AnyProvider,
273    /// Dedicated provider for batch embedding calls (backfill, write-path embedding).
274    ///
275    /// When `Some`, all embedding I/O is routed through this provider instead of `provider`.
276    /// This prevents `embed_backfill` from saturating the main provider and causing guardrail
277    /// timeouts. When `None`, falls back to `provider`.
278    pub(crate) embed_provider: Option<AnyProvider>,
279    pub(crate) embedding_model: String,
280    pub(crate) vector_weight: f64,
281    pub(crate) keyword_weight: f64,
282    pub(crate) temporal_decay: TemporalDecay,
283    pub(crate) temporal_decay_half_life_days: u32,
284    pub(crate) mmr_reranking: MmrReranking,
285    pub(crate) mmr_lambda: f32,
286    pub(crate) importance_scoring: ImportanceScoring,
287    pub(crate) importance_weight: f64,
288    /// Multiplicative score boost for semantic-tier messages in recall ranking.
289    /// Default: `1.3`. Disabled when set to `1.0`.
290    pub(crate) tier_boost_semantic: f64,
291    pub token_counter: Arc<TokenCounter>,
292    pub graph_store: Option<Arc<crate::graph::GraphStore>>,
293    /// Experience store for tool-outcome telemetry and per-turn evolution sweeps.
294    ///
295    /// `Some` when `memory.graph.experience.enabled = true` at bootstrap.
296    pub experience: Option<Arc<crate::graph::experience::ExperienceStore>>,
297    /// `ReasoningBank` store for distilled reasoning strategies (#3342).
298    ///
299    /// `Some` when `memory.reasoning.enabled = true` at bootstrap.
300    pub reasoning: Option<Arc<crate::reasoning::ReasoningMemory>>,
301    pub(crate) community_detection_failures: Arc<AtomicU64>,
302    pub(crate) graph_extraction_count: Arc<AtomicU64>,
303    pub(crate) graph_extraction_failures: Arc<AtomicU64>,
304    pub(crate) last_qdrant_warn: Arc<AtomicU64>,
305    /// A-MAC admission control gate. When `Some`, each `remember()` call is evaluated.
306    pub(crate) admission_control: Option<Arc<AdmissionControl>>,
307    /// Write quality gate. When `Some`, evaluated in `remember()`/`remember_with_parts()`
308    /// after A-MAC admission and before persistence.
309    pub(crate) quality_gate: Option<Arc<crate::quality_gate::QualityGate>>,
310    /// Cosine similarity threshold for skipping near-duplicate key facts (0.0–1.0).
311    /// When a new fact's nearest neighbour in `zeph_key_facts` has score >= this value,
312    /// the fact is considered a duplicate and not inserted.  Default: `0.95`.
313    pub(crate) key_facts_dedup_threshold: f32,
314    /// Bounded set of in-flight background embed tasks.
315    ///
316    /// Guarded by a `Mutex` because `SemanticMemory` is shared via `Arc` and
317    /// `JoinSet` requires `&mut self` for `spawn`. Capacity is capped at
318    /// `MAX_EMBED_BG_TASKS`; tasks that exceed the limit are dropped with a debug log.
319    pub(crate) embed_tasks: Mutex<tokio::task::JoinSet<()>>,
320    /// ANN candidate count fetched from the vector store before reranking (MM-F1, #3340).
321    ///
322    /// `0` = legacy behavior (`recall_limit * 2`). `≥ 1` = direct count.
323    pub(crate) retrieval_depth: u32,
324    /// Template applied to raw user queries before embedding (MM-F2, #3340).
325    ///
326    /// Empty string = identity (pass raw query through). Applied at query-side embed sites only;
327    /// never applied to stored content (summaries, documents).
328    pub(crate) search_prompt_template: String,
329    /// Fires `tracing::warn!` once per instance when `retrieval_depth < recall_limit`.
330    pub(crate) depth_below_limit_warned: Arc<std::sync::atomic::AtomicBool>,
331    /// Fires `tracing::warn!` once per instance when `search_prompt_template` has no `{query}`.
332    pub(crate) missing_placeholder_warned: Arc<std::sync::atomic::AtomicBool>,
333    /// Query-bias correction towards the user profile centroid (MM-F3, #3341).
334    pub(crate) query_bias_correction: QueryBiasCorrection,
335    /// Blend weight for query-bias correction (MM-F3, #3341). Clamped to `[0.0, 1.0]`.
336    pub(crate) query_bias_profile_weight: f32,
337    /// Cached profile centroid computed from persona-fact embeddings (MM-F3, #3341).
338    ///
339    /// Protected by `RwLock` to allow concurrent reads. Never holds the lock across `.await`
340    /// (await-discipline rule #4). TTL-bounded; miss is non-sticky.
341    pub(crate) profile_centroid: RwLock<Option<CachedCentroid>>,
342    /// Time-to-live for the profile centroid cache in seconds (MM-F3, #3341). Default: 300.
343    pub(crate) profile_centroid_ttl_secs: u64,
344    /// Opt-in master switch for Hebbian edge-weight reinforcement (HL-F2, #3344).
345    pub(crate) hebbian_reinforcement: HebbianReinforcement,
346    /// Weight increment applied per recall traversal when `hebbian_reinforcement` is `Enabled` (HL-F2, #3344).
347    pub(crate) hebbian_lr: f32,
348    /// HL-F5 spreading activation runtime config (#3346).
349    pub(crate) hebbian_spread: HelaSpreadRuntime,
350}
351
352impl SemanticMemory {
353    /// Create a new `SemanticMemory` instance with default hybrid search weights (0.7/0.3).
354    ///
355    /// Qdrant connection is best-effort: if unavailable, semantic search is disabled.
356    ///
357    /// For `AppBuilder` bootstrap, prefer [`SemanticMemory::with_qdrant_ops`] to share
358    /// a single gRPC channel across all subsystems.
359    ///
360    /// # Errors
361    ///
362    /// Returns an error if `SQLite` cannot be initialized.
363    pub async fn new(
364        sqlite_path: &str,
365        qdrant_url: &str,
366        api_key: Option<&str>,
367        provider: AnyProvider,
368        embedding_model: &str,
369    ) -> Result<Self, MemoryError> {
370        Self::with_weights(
371            sqlite_path,
372            qdrant_url,
373            api_key,
374            provider,
375            embedding_model,
376            0.7,
377            0.3,
378        )
379        .await
380    }
381
382    /// Create a new `SemanticMemory` with custom vector/keyword weights for hybrid search.
383    ///
384    /// For `AppBuilder` bootstrap, prefer [`SemanticMemory::with_qdrant_ops`] to share
385    /// a single gRPC channel across all subsystems.
386    ///
387    /// # Errors
388    ///
389    /// Returns an error if `SQLite` cannot be initialized.
390    pub async fn with_weights(
391        sqlite_path: &str,
392        qdrant_url: &str,
393        api_key: Option<&str>,
394        provider: AnyProvider,
395        embedding_model: &str,
396        vector_weight: f64,
397        keyword_weight: f64,
398    ) -> Result<Self, MemoryError> {
399        Self::with_weights_and_pool_size(
400            sqlite_path,
401            qdrant_url,
402            api_key,
403            provider,
404            embedding_model,
405            vector_weight,
406            keyword_weight,
407            5,
408        )
409        .await
410    }
411
412    /// Create a new `SemanticMemory` with custom weights and configurable pool size.
413    ///
414    /// For `AppBuilder` bootstrap, prefer [`SemanticMemory::with_qdrant_ops`] to share
415    /// a single gRPC channel across all subsystems.
416    ///
417    /// # Errors
418    ///
419    /// Returns an error if `SQLite` cannot be initialized.
420    #[allow(clippy::too_many_arguments)]
421    pub async fn with_weights_and_pool_size(
422        sqlite_path: &str,
423        qdrant_url: &str,
424        api_key: Option<&str>,
425        provider: AnyProvider,
426        embedding_model: &str,
427        vector_weight: f64,
428        keyword_weight: f64,
429        pool_size: u32,
430    ) -> Result<Self, MemoryError> {
431        let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
432        let pool = sqlite.pool().clone();
433
434        let qdrant = match EmbeddingStore::new(qdrant_url, api_key, pool) {
435            Ok(store) => Some(Arc::new(store)),
436            Err(e) => {
437                tracing::warn!("Qdrant unavailable, semantic search disabled: {e:#}");
438                None
439            }
440        };
441
442        Ok(Self {
443            sqlite,
444            qdrant,
445            provider,
446            embed_provider: None,
447            embedding_model: embedding_model.into(),
448            vector_weight,
449            keyword_weight,
450            temporal_decay: TemporalDecay::Disabled,
451            temporal_decay_half_life_days: 30,
452            mmr_reranking: MmrReranking::Disabled,
453            mmr_lambda: 0.7,
454            importance_scoring: ImportanceScoring::Disabled,
455            importance_weight: 0.15,
456            tier_boost_semantic: 1.3,
457            token_counter: Arc::new(TokenCounter::new()),
458            graph_store: None,
459            experience: None,
460            reasoning: None,
461            community_detection_failures: Arc::new(AtomicU64::new(0)),
462            graph_extraction_count: Arc::new(AtomicU64::new(0)),
463            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
464            last_qdrant_warn: Arc::new(AtomicU64::new(0)),
465            admission_control: None,
466            quality_gate: None,
467            key_facts_dedup_threshold: 0.95,
468            embed_tasks: std::sync::Mutex::new(tokio::task::JoinSet::new()),
469            retrieval_depth: 0,
470            search_prompt_template: String::new(),
471            depth_below_limit_warned: Arc::new(std::sync::atomic::AtomicBool::new(false)),
472            missing_placeholder_warned: Arc::new(std::sync::atomic::AtomicBool::new(false)),
473            query_bias_correction: QueryBiasCorrection::Enabled,
474            query_bias_profile_weight: 0.25,
475            profile_centroid: RwLock::new(None),
476            profile_centroid_ttl_secs: 300,
477            hebbian_reinforcement: HebbianReinforcement::Disabled,
478            hebbian_lr: 0.1,
479            hebbian_spread: HelaSpreadRuntime::default(),
480        })
481    }
482
483    /// Create a `SemanticMemory` from a pre-built `QdrantOps` instance.
484    ///
485    /// Use this at bootstrap to share one `QdrantOps` (and thus one gRPC channel)
486    /// across all subsystems. The `ops` is consumed and wrapped inside `EmbeddingStore`.
487    ///
488    /// # Errors
489    ///
490    /// Returns an error if `SQLite` cannot be initialized.
491    pub async fn with_qdrant_ops(
492        sqlite_path: &str,
493        ops: crate::QdrantOps,
494        provider: AnyProvider,
495        embedding_model: &str,
496        vector_weight: f64,
497        keyword_weight: f64,
498        pool_size: u32,
499    ) -> Result<Self, MemoryError> {
500        let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
501        let pool = sqlite.pool().clone();
502        let store = EmbeddingStore::with_store(Box::new(ops), pool);
503
504        Ok(Self {
505            sqlite,
506            qdrant: Some(Arc::new(store)),
507            provider,
508            embed_provider: None,
509            embedding_model: embedding_model.into(),
510            vector_weight,
511            keyword_weight,
512            temporal_decay: TemporalDecay::Disabled,
513            temporal_decay_half_life_days: 30,
514            mmr_reranking: MmrReranking::Disabled,
515            mmr_lambda: 0.7,
516            importance_scoring: ImportanceScoring::Disabled,
517            importance_weight: 0.15,
518            tier_boost_semantic: 1.3,
519            token_counter: Arc::new(TokenCounter::new()),
520            graph_store: None,
521            experience: None,
522            reasoning: None,
523            community_detection_failures: Arc::new(AtomicU64::new(0)),
524            graph_extraction_count: Arc::new(AtomicU64::new(0)),
525            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
526            last_qdrant_warn: Arc::new(AtomicU64::new(0)),
527            admission_control: None,
528            quality_gate: None,
529            key_facts_dedup_threshold: 0.95,
530            embed_tasks: std::sync::Mutex::new(tokio::task::JoinSet::new()),
531            retrieval_depth: 0,
532            search_prompt_template: String::new(),
533            depth_below_limit_warned: Arc::new(std::sync::atomic::AtomicBool::new(false)),
534            missing_placeholder_warned: Arc::new(std::sync::atomic::AtomicBool::new(false)),
535            query_bias_correction: QueryBiasCorrection::Enabled,
536            query_bias_profile_weight: 0.25,
537            profile_centroid: RwLock::new(None),
538            profile_centroid_ttl_secs: 300,
539            hebbian_reinforcement: HebbianReinforcement::Disabled,
540            hebbian_lr: 0.1,
541            hebbian_spread: HelaSpreadRuntime::default(),
542        })
543    }
544
545    /// Attach a `GraphStore` for graph-aware retrieval.
546    ///
547    /// When set, `recall_graph` traverses the graph starting from entities
548    /// matched by the query.
549    #[must_use]
550    pub fn with_graph_store(mut self, store: Arc<crate::graph::GraphStore>) -> Self {
551        self.graph_store = Some(store);
552        self
553    }
554
555    /// Attach an [`ExperienceStore`](crate::graph::experience::ExperienceStore) for tool-outcome
556    /// telemetry and per-turn evolution sweeps.
557    ///
558    /// When set, the agent records one row per tool invocation in `experience_nodes` and
559    /// periodically runs `evolution_sweep` to prune low-confidence and self-loop edges.
560    #[must_use]
561    pub fn with_experience_store(
562        mut self,
563        store: Arc<crate::graph::experience::ExperienceStore>,
564    ) -> Self {
565        self.experience = Some(store);
566        self
567    }
568
569    /// Attach a [`ReasoningMemory`](crate::reasoning::ReasoningMemory) store for
570    /// distilled reasoning strategy storage and retrieval (#3342).
571    ///
572    /// When set, [`SemanticMemory::retrieve_reasoning_strategies`] uses this store for
573    /// embedding-similarity lookups. When `None`, retrieval returns an empty vec.
574    #[must_use]
575    pub fn with_reasoning(mut self, store: Arc<crate::reasoning::ReasoningMemory>) -> Self {
576        self.reasoning = Some(store);
577        self
578    }
579
580    /// Returns the cumulative count of community detection failures since startup.
581    #[must_use]
582    pub fn community_detection_failures(&self) -> u64 {
583        use std::sync::atomic::Ordering;
584        self.community_detection_failures.load(Ordering::Relaxed)
585    }
586
587    /// Returns the cumulative count of successful graph extractions since startup.
588    #[must_use]
589    pub fn graph_extraction_count(&self) -> u64 {
590        use std::sync::atomic::Ordering;
591        self.graph_extraction_count.load(Ordering::Relaxed)
592    }
593
594    /// Returns the cumulative count of failed graph extractions since startup.
595    #[must_use]
596    pub fn graph_extraction_failures(&self) -> u64 {
597        use std::sync::atomic::Ordering;
598        self.graph_extraction_failures.load(Ordering::Relaxed)
599    }
600
601    /// Configure temporal decay and MMR re-ranking options.
602    #[must_use]
603    pub fn with_ranking_options(
604        mut self,
605        temporal_decay: TemporalDecay,
606        temporal_decay_half_life_days: u32,
607        mmr_reranking: MmrReranking,
608        mmr_lambda: f32,
609    ) -> Self {
610        self.temporal_decay = temporal_decay;
611        self.temporal_decay_half_life_days = temporal_decay_half_life_days;
612        self.mmr_reranking = mmr_reranking;
613        self.mmr_lambda = mmr_lambda;
614        self
615    }
616
617    /// Configure write-time importance scoring for memory retrieval.
618    #[must_use]
619    pub fn with_importance_options(mut self, scoring: ImportanceScoring, weight: f64) -> Self {
620        self.importance_scoring = scoring;
621        self.importance_weight = weight;
622        self
623    }
624
625    /// Configure the multiplicative score boost applied to semantic-tier messages during recall.
626    ///
627    /// Set to `1.0` to disable the boost. Default: `1.3`.
628    #[must_use]
629    pub fn with_tier_boost(mut self, boost: f64) -> Self {
630        self.tier_boost_semantic = boost;
631        self
632    }
633
634    /// Attach an A-MAC admission controller.
635    ///
636    /// When set, `remember()` and `remember_with_parts()` evaluate each message before persisting.
637    /// Messages below the admission threshold return `Ok(None)` without incrementing counts.
638    #[must_use]
639    pub fn with_admission_control(mut self, control: AdmissionControl) -> Self {
640        self.admission_control = Some(Arc::new(control));
641        self
642    }
643
644    /// Attach a write quality gate that scores each `remember()` call before persisting.
645    ///
646    /// When set, the gate is evaluated after A-MAC admission. A `Some(reason)` result from
647    /// [`crate::quality_gate::QualityGate::evaluate`] causes the write to be skipped
648    /// and `Ok(None)` / `Ok((None, false))` to be returned.
649    #[must_use]
650    pub fn with_quality_gate(mut self, gate: Arc<crate::quality_gate::QualityGate>) -> Self {
651        self.quality_gate = Some(gate);
652        self
653    }
654
655    /// Set the cosine similarity threshold used to skip near-duplicate key facts on insert.
656    ///
657    /// When a candidate fact's nearest neighbour in `zeph_key_facts` has a score ≥ this value,
658    /// the fact is not stored.  Default: `0.95`.
659    #[must_use]
660    pub fn with_key_facts_dedup_threshold(mut self, threshold: f32) -> Self {
661        self.key_facts_dedup_threshold = threshold;
662        self
663    }
664
665    /// Configure query-bias correction (MM-F3, #3341).
666    ///
667    /// When `correction` is [`QueryBiasCorrection::Enabled`], first-person queries are biased
668    /// towards the user profile centroid. `profile_weight` controls the blend strength and is
669    /// clamped to `[0.0, 1.0]`. `centroid_ttl_secs` controls how long the centroid cache stays
670    /// valid.
671    #[must_use]
672    pub fn with_query_bias(
673        mut self,
674        correction: QueryBiasCorrection,
675        profile_weight: f32,
676        centroid_ttl_secs: u64,
677    ) -> Self {
678        self.query_bias_correction = correction;
679        self.query_bias_profile_weight = profile_weight.clamp(0.0, 1.0);
680        self.profile_centroid_ttl_secs = centroid_ttl_secs;
681        self
682    }
683
684    /// Configure HL-F5 spreading activation runtime parameters (HL-F5, #3346).
685    ///
686    /// Has no effect when `hebbian_spread.enabled = false` (the default).
687    /// Call this after `with_graph_store` and `with_hebbian` during bootstrap.
688    #[must_use]
689    pub fn with_hebbian_spread(mut self, runtime: HelaSpreadRuntime) -> Self {
690        self.hebbian_spread = runtime;
691        self
692    }
693
694    /// Configure Hebbian edge-weight reinforcement (HL-F2, #3344).
695    ///
696    /// When `reinforcement` is [`HebbianReinforcement::Enabled`], `lr` is added to the `weight`
697    /// column of each traversed edge after every recall. `lr = 0.0` with `Enabled` logs a WARN.
698    #[must_use]
699    pub fn with_hebbian(mut self, reinforcement: HebbianReinforcement, lr: f32) -> Self {
700        let lr = lr.max(0.0);
701        if reinforcement.is_enabled() && lr == 0.0 {
702            tracing::warn!("hebbian enabled with lr=0.0 — no reinforcement will occur");
703        }
704        self.hebbian_reinforcement = reinforcement;
705        self.hebbian_lr = lr;
706        self
707    }
708
709    /// Classify a query's intent for query-bias correction (MM-F3, #3341).
710    ///
711    /// Returns [`QueryIntent::FirstPerson`] when the query contains self-referential language
712    /// (first-person pronouns). Otherwise returns [`QueryIntent::Other`].
713    pub(crate) fn classify_query_intent(query: &str) -> QueryIntent {
714        if persona::contains_self_referential_language(query) {
715            QueryIntent::FirstPerson
716        } else {
717            QueryIntent::Other
718        }
719    }
720
721    /// Apply query-bias correction to an embedding (MM-F3, #3341).
722    ///
723    /// Returns the embedding unchanged if `query_bias_correction` is [`QueryBiasCorrection::Disabled`],
724    /// if the query is not first-person, or if the profile centroid is unavailable.
725    /// Logs a single WARN on dimension mismatch and returns the original embedding.
726    #[tracing::instrument(name = "memory.query_bias.apply", skip(self, embedding), fields(query_len = query.len()))]
727    pub(crate) async fn apply_query_bias(&self, query: &str, embedding: Vec<f32>) -> Vec<f32> {
728        if !self.query_bias_correction.is_enabled() {
729            tracing::debug!(reason = "disabled", "query-bias: skipping");
730            return embedding;
731        }
732        if Self::classify_query_intent(query) != QueryIntent::FirstPerson {
733            tracing::debug!(reason = "not_first_person", "query-bias: skipping");
734            return embedding;
735        }
736        let Some(centroid) = self.profile_centroid_cached().await else {
737            tracing::debug!(reason = "no_centroid", "query-bias: skipping");
738            return embedding;
739        };
740        if centroid.len() != embedding.len() {
741            tracing::warn!(
742                centroid_dim = centroid.len(),
743                query_dim = embedding.len(),
744                reason = "dim_mismatch",
745                "query-bias: dimension mismatch between profile centroid and query embedding — skipping bias"
746            );
747            return embedding;
748        }
749        let w = self.query_bias_profile_weight;
750        tracing::debug!(
751            intent = "first_person",
752            centroid_dim = centroid.len(),
753            weight = w,
754            "query-bias: applying profile bias"
755        );
756        embedding
757            .iter()
758            .zip(centroid.iter())
759            .map(|(&q, &c)| (1.0 - w) * q + w * c)
760            .collect()
761    }
762
763    /// Return the cached profile centroid, recomputing if stale or absent (MM-F3, #3341).
764    ///
765    /// Holds the read lock only to check freshness; releases it before any `.await`.
766    /// On compute failure, preserves the previous cache value (non-sticky miss).
767    #[tracing::instrument(name = "memory.query_bias.centroid", skip(self))]
768    pub(crate) async fn profile_centroid_cached(&self) -> Option<Vec<f32>> {
769        // Fast path: check freshness under read lock without holding it across await.
770        {
771            let guard = self.profile_centroid.read().await;
772            if let Some(c) = &*guard
773                && c.computed_at.elapsed().as_secs() < self.profile_centroid_ttl_secs
774            {
775                let ttl_remaining = self
776                    .profile_centroid_ttl_secs
777                    .saturating_sub(c.computed_at.elapsed().as_secs());
778                tracing::debug!(
779                    centroid_dim = c.vector.len(),
780                    ttl_remaining_secs = ttl_remaining,
781                    "query-bias: centroid cache hit"
782                );
783                return Some(c.vector.clone());
784            }
785        }
786        // Slow path: recompute. Guard is dropped before this point.
787        let computed = self.compute_profile_centroid().await;
788        let mut guard = self.profile_centroid.write().await;
789        match computed {
790            Some(v) => {
791                tracing::debug!(centroid_dim = v.len(), "query-bias: centroid computed");
792                *guard = Some(CachedCentroid {
793                    vector: v.clone(),
794                    computed_at: Instant::now(),
795                });
796                Some(v)
797            }
798            None => {
799                // Do not overwrite a valid (but stale) cache on failure — serve stale over nothing.
800                guard.as_ref().map(|c| c.vector.clone())
801            }
802        }
803    }
804
805    /// Compute the profile centroid from persona-fact embeddings (MM-F3, #3341).
806    ///
807    /// Returns `None` when the persona table is empty or embedding fails.
808    /// Uses `load_persona_facts(0.0)` (all non-superseded facts) for the centroid basis.
809    async fn compute_profile_centroid(&self) -> Option<Vec<f32>> {
810        let facts = match self.sqlite.load_persona_facts(0.0).await {
811            Ok(f) => f,
812            Err(e) => {
813                tracing::warn!(error = %e, "query-bias: failed to load persona facts");
814                return None;
815            }
816        };
817        if facts.is_empty() {
818            return None;
819        }
820        let provider = self.effective_embed_provider();
821        let texts: Vec<String> = facts.iter().map(|f| f.content.clone()).collect();
822        let mut embeddings: Vec<Vec<f32>> = Vec::with_capacity(texts.len());
823        for text in &texts {
824            match provider.embed(text).await {
825                Ok(v) => embeddings.push(v),
826                Err(e) => {
827                    tracing::warn!(error = %e, "query-bias: failed to embed persona fact — skipping");
828                }
829            }
830        }
831        if embeddings.is_empty() {
832            return None;
833        }
834        let dim = embeddings[0].len();
835        let mut centroid = vec![0.0f32; dim];
836        for emb in &embeddings {
837            if emb.len() != dim {
838                tracing::warn!(
839                    expected = dim,
840                    got = emb.len(),
841                    "query-bias: persona embedding dimension mismatch — skipping fact"
842                );
843                continue;
844            }
845            for (c, &v) in centroid.iter_mut().zip(emb.iter()) {
846                *c += v;
847            }
848        }
849        #[allow(clippy::cast_precision_loss)]
850        let n = embeddings.len() as f32;
851        for c in &mut centroid {
852            *c /= n;
853        }
854        Some(centroid)
855    }
856
857    /// Configure retrieval depth and search prompt template (MM-F1/F2, #3340).
858    ///
859    /// `depth` is the number of ANN candidates fetched from the vector store before keyword merge
860    /// and MMR re-ranking.  `0` = legacy behavior (`recall_limit * 2`).  `≥ 1` = exact count.
861    ///
862    /// `search_prompt_template` is applied to the raw user query before embedding.  Supports a
863    /// single `{query}` placeholder.  Empty string = identity.
864    #[must_use]
865    pub fn with_retrieval_options(
866        mut self,
867        depth: u32,
868        search_prompt_template: impl Into<String>,
869    ) -> Self {
870        self.retrieval_depth = depth;
871        self.search_prompt_template = search_prompt_template.into();
872        self
873    }
874
875    /// Effective ANN candidate count for a given requested final limit (MM-F1, #3340).
876    ///
877    /// - `retrieval_depth == 0`: legacy behavior, returns `limit * 2`.
878    /// - `retrieval_depth >= 1`: returns the configured depth directly.
879    ///
880    /// When `retrieval_depth < limit`, a one-shot WARN fires because the ANN pool cannot
881    /// saturate the requested top-k.  When `limit <= retrieval_depth < limit * 2`, an INFO
882    /// fires per call noting the smaller-than-legacy pool.
883    pub(crate) fn effective_depth(&self, limit: usize) -> usize {
884        use std::sync::atomic::Ordering;
885
886        let depth = self.retrieval_depth as usize;
887        if depth == 0 {
888            return limit.saturating_mul(2);
889        }
890        if depth < limit {
891            if !self.depth_below_limit_warned.swap(true, Ordering::Relaxed) {
892                tracing::warn!(
893                    retrieval_depth = depth,
894                    recall_limit = limit,
895                    "memory.retrieval.depth < recall_limit; ANN pool cannot saturate top-k — consider raising depth"
896                );
897            }
898        } else if depth < limit.saturating_mul(2) {
899            tracing::info!(
900                retrieval_depth = depth,
901                recall_limit = limit,
902                legacy_default = limit.saturating_mul(2),
903                "memory.retrieval.depth is below legacy limit*2; ANN pool will be smaller than pre-#3340"
904            );
905        } else {
906            tracing::debug!(
907                retrieval_depth = depth,
908                recall_limit = limit,
909                "recall: using configured ANN depth"
910            );
911        }
912        depth
913    }
914
915    /// Apply the configured search prompt template to a raw query (MM-F2, #3340).
916    ///
917    /// Returns `query` as-is when the template is empty or has no `{query}` placeholder.
918    /// A one-shot WARN fires when the template is non-empty but missing the placeholder.
919    pub(crate) fn apply_search_prompt(&self, query: &str) -> String {
920        use std::sync::atomic::Ordering;
921
922        let template = &self.search_prompt_template;
923        if template.is_empty() {
924            return query.to_owned();
925        }
926        if !template.contains("{query}") {
927            if !self
928                .missing_placeholder_warned
929                .swap(true, Ordering::Relaxed)
930            {
931                tracing::warn!(
932                    template = template.as_str(),
933                    "memory.retrieval.search_prompt_template has no {{query}} placeholder — \
934                     using raw query as-is"
935                );
936            }
937            return query.to_owned();
938        }
939        template.replace("{query}", query)
940    }
941
942    /// Attach a dedicated embedding provider for write-path and backfill operations.
943    ///
944    /// When set, all batch embedding calls (backfill, `remember`) route through this provider
945    /// instead of the main `provider`. This prevents `embed_backfill` from saturating the main
946    /// provider and causing guardrail timeouts due to rate-limit contention or Ollama model-lock.
947    #[must_use]
948    pub fn with_embed_provider(mut self, embed_provider: AnyProvider) -> Self {
949        self.embed_provider = Some(embed_provider);
950        self
951    }
952
953    /// Returns the provider to use for embedding calls.
954    ///
955    /// Returns the dedicated embed provider when configured, falling back to the main provider.
956    pub fn effective_embed_provider(&self) -> &AnyProvider {
957        self.embed_provider.as_ref().unwrap_or(&self.provider)
958    }
959
960    /// Construct a `SemanticMemory` from pre-built parts.
961    ///
962    /// Intended for tests that need full control over the backing stores.
963    #[must_use]
964    pub fn from_parts(
965        sqlite: SqliteStore,
966        qdrant: Option<Arc<EmbeddingStore>>,
967        provider: AnyProvider,
968        embedding_model: impl Into<String>,
969        vector_weight: f64,
970        keyword_weight: f64,
971        token_counter: Arc<TokenCounter>,
972    ) -> Self {
973        Self {
974            sqlite,
975            qdrant,
976            provider,
977            embed_provider: None,
978            embedding_model: embedding_model.into(),
979            vector_weight,
980            keyword_weight,
981            temporal_decay: TemporalDecay::Disabled,
982            temporal_decay_half_life_days: 30,
983            mmr_reranking: MmrReranking::Disabled,
984            mmr_lambda: 0.7,
985            importance_scoring: ImportanceScoring::Disabled,
986            importance_weight: 0.15,
987            tier_boost_semantic: 1.3,
988            token_counter,
989            graph_store: None,
990            experience: None,
991            reasoning: None,
992            community_detection_failures: Arc::new(AtomicU64::new(0)),
993            graph_extraction_count: Arc::new(AtomicU64::new(0)),
994            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
995            last_qdrant_warn: Arc::new(AtomicU64::new(0)),
996            admission_control: None,
997            quality_gate: None,
998            key_facts_dedup_threshold: 0.95,
999            embed_tasks: std::sync::Mutex::new(tokio::task::JoinSet::new()),
1000            retrieval_depth: 0,
1001            search_prompt_template: String::new(),
1002            depth_below_limit_warned: Arc::new(std::sync::atomic::AtomicBool::new(false)),
1003            missing_placeholder_warned: Arc::new(std::sync::atomic::AtomicBool::new(false)),
1004            query_bias_correction: QueryBiasCorrection::Enabled,
1005            query_bias_profile_weight: 0.25,
1006            profile_centroid: RwLock::new(None),
1007            profile_centroid_ttl_secs: 300,
1008            hebbian_reinforcement: HebbianReinforcement::Disabled,
1009            hebbian_lr: 0.1,
1010            hebbian_spread: HelaSpreadRuntime::default(),
1011        }
1012    }
1013
1014    /// Create a `SemanticMemory` using the `SQLite`-embedded vector backend.
1015    ///
1016    /// # Errors
1017    ///
1018    /// Returns an error if `SQLite` cannot be initialized.
1019    pub async fn with_sqlite_backend(
1020        sqlite_path: &str,
1021        provider: AnyProvider,
1022        embedding_model: &str,
1023        vector_weight: f64,
1024        keyword_weight: f64,
1025    ) -> Result<Self, MemoryError> {
1026        Self::with_sqlite_backend_and_pool_size(
1027            sqlite_path,
1028            provider,
1029            embedding_model,
1030            vector_weight,
1031            keyword_weight,
1032            5,
1033        )
1034        .await
1035    }
1036
1037    /// Create a `SemanticMemory` using the `SQLite`-embedded vector backend with configurable pool size.
1038    ///
1039    /// # Errors
1040    ///
1041    /// Returns an error if `SQLite` cannot be initialized.
1042    pub async fn with_sqlite_backend_and_pool_size(
1043        sqlite_path: &str,
1044        provider: AnyProvider,
1045        embedding_model: &str,
1046        vector_weight: f64,
1047        keyword_weight: f64,
1048        pool_size: u32,
1049    ) -> Result<Self, MemoryError> {
1050        let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
1051        let pool = sqlite.pool().clone();
1052        let store = EmbeddingStore::new_sqlite(pool);
1053
1054        Ok(Self {
1055            sqlite,
1056            qdrant: Some(Arc::new(store)),
1057            provider,
1058            embed_provider: None,
1059            embedding_model: embedding_model.into(),
1060            vector_weight,
1061            keyword_weight,
1062            temporal_decay: TemporalDecay::Disabled,
1063            temporal_decay_half_life_days: 30,
1064            mmr_reranking: MmrReranking::Disabled,
1065            mmr_lambda: 0.7,
1066            importance_scoring: ImportanceScoring::Disabled,
1067            importance_weight: 0.15,
1068            tier_boost_semantic: 1.3,
1069            token_counter: Arc::new(TokenCounter::new()),
1070            graph_store: None,
1071            experience: None,
1072            reasoning: None,
1073            community_detection_failures: Arc::new(AtomicU64::new(0)),
1074            graph_extraction_count: Arc::new(AtomicU64::new(0)),
1075            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
1076            last_qdrant_warn: Arc::new(AtomicU64::new(0)),
1077            admission_control: None,
1078            quality_gate: None,
1079            key_facts_dedup_threshold: 0.95,
1080            embed_tasks: std::sync::Mutex::new(tokio::task::JoinSet::new()),
1081            retrieval_depth: 0,
1082            search_prompt_template: String::new(),
1083            depth_below_limit_warned: Arc::new(std::sync::atomic::AtomicBool::new(false)),
1084            missing_placeholder_warned: Arc::new(std::sync::atomic::AtomicBool::new(false)),
1085            query_bias_correction: QueryBiasCorrection::Enabled,
1086            query_bias_profile_weight: 0.25,
1087            profile_centroid: RwLock::new(None),
1088            profile_centroid_ttl_secs: 300,
1089            hebbian_reinforcement: HebbianReinforcement::Disabled,
1090            hebbian_lr: 0.1,
1091            hebbian_spread: HelaSpreadRuntime::default(),
1092        })
1093    }
1094
1095    /// Access the underlying `SqliteStore` for operations that don't involve semantics.
1096    #[must_use]
1097    pub fn sqlite(&self) -> &SqliteStore {
1098        &self.sqlite
1099    }
1100
1101    /// Check if the vector store backend is reachable.
1102    ///
1103    /// Performs a real health check (Qdrant gRPC ping or `SQLite` query)
1104    /// instead of just checking whether the client was created.
1105    pub async fn is_vector_store_connected(&self) -> bool {
1106        match self.qdrant.as_ref() {
1107            Some(store) => store.health_check().await,
1108            None => false,
1109        }
1110    }
1111
1112    /// Check if a vector store client is configured (may not be connected).
1113    #[must_use]
1114    pub fn has_vector_store(&self) -> bool {
1115        self.qdrant.is_some()
1116    }
1117
1118    /// Return a reference to the embedding store, if configured.
1119    #[must_use]
1120    pub fn embedding_store(&self) -> Option<&Arc<EmbeddingStore>> {
1121        self.qdrant.as_ref()
1122    }
1123
1124    /// Return a reference to the underlying LLM provider (used for RPE embedding).
1125    pub fn provider(&self) -> &AnyProvider {
1126        &self.provider
1127    }
1128
1129    /// Count messages in a conversation.
1130    ///
1131    /// # Errors
1132    ///
1133    /// Returns an error if the query fails.
1134    pub async fn message_count(
1135        &self,
1136        conversation_id: crate::types::ConversationId,
1137    ) -> Result<i64, MemoryError> {
1138        self.sqlite.count_messages(conversation_id).await
1139    }
1140
1141    /// Count messages not yet covered by any summary.
1142    ///
1143    /// # Errors
1144    ///
1145    /// Returns an error if the query fails.
1146    pub async fn unsummarized_message_count(
1147        &self,
1148        conversation_id: crate::types::ConversationId,
1149    ) -> Result<i64, MemoryError> {
1150        let after_id = self
1151            .sqlite
1152            .latest_summary_last_message_id(conversation_id)
1153            .await?
1154            .unwrap_or(crate::types::MessageId(0));
1155        self.sqlite
1156            .count_messages_after(conversation_id, after_id)
1157            .await
1158    }
1159
1160    /// Load recent episodic messages for the promotion-scan window.
1161    ///
1162    /// Returns up to `max_items` of the most recent non-deleted messages across all
1163    /// conversations, with their `conversation_id` for session-count heuristics.
1164    ///
1165    /// # Embedding note
1166    ///
1167    /// When Qdrant is configured, embeddings are populated by fetching `chunk_index = 0`
1168    /// vectors from the vector store via [`EmbeddingStore::get_vectors_for_messages`].
1169    /// Messages whose vector cannot be retrieved are still returned with `embedding: None`;
1170    /// the promotion engine skips those rows rather than re-embedding on the hot path.
1171    ///
1172    /// When Qdrant is not configured, all inputs carry `embedding: None`.
1173    ///
1174    /// Vectors whose dimension disagrees with the first non-empty vector in the batch
1175    /// are dropped with a single `WARN` log and treated as missing.
1176    ///
1177    /// # Errors
1178    ///
1179    /// Returns [`MemoryError`] if the underlying `SQLite` query or vector store fetch fails.
1180    pub async fn load_promotion_window(
1181        &self,
1182        max_items: usize,
1183    ) -> Result<Vec<crate::compression::promotion::PromotionInput>, MemoryError> {
1184        use zeph_db::sql;
1185
1186        let limit = i64::try_from(max_items).unwrap_or(i64::MAX);
1187        let rows: Vec<(
1188            crate::types::MessageId,
1189            crate::types::ConversationId,
1190            String,
1191        )> = zeph_db::query_as(sql!(
1192            "SELECT id, conversation_id, content \
1193                 FROM messages \
1194                 WHERE deleted_at IS NULL \
1195                 ORDER BY id DESC \
1196                 LIMIT ?"
1197        ))
1198        .bind(limit)
1199        .fetch_all(self.sqlite.pool())
1200        .await?;
1201
1202        let mut vectors = if let Some(qdrant) = &self.qdrant {
1203            let ids: Vec<_> = rows.iter().map(|(id, _, _)| *id).collect();
1204            let mut raw = qdrant.get_vectors_for_messages(&ids).await?;
1205
1206            // Dimension validation: find reference dim from the first non-empty vector.
1207            let ref_dim = raw.values().next().map(Vec::len);
1208            if let Some(ref_dim) = ref_dim {
1209                let mismatched: Vec<_> = raw
1210                    .iter()
1211                    .filter(|(_, v)| v.len() != ref_dim)
1212                    .map(|(id, v)| (*id, v.len()))
1213                    .collect();
1214                if !mismatched.is_empty() {
1215                    tracing::warn!(
1216                        expected_dim = ref_dim,
1217                        dropped_count = mismatched.len(),
1218                        "load_promotion_window: dimension mismatch — dropping mismatched vectors"
1219                    );
1220                    for (id, _) in mismatched {
1221                        raw.remove(&id);
1222                    }
1223                }
1224            }
1225            raw
1226        } else {
1227            std::collections::HashMap::new()
1228        };
1229
1230        Ok(rows
1231            .into_iter()
1232            .map(|(message_id, conversation_id, content)| {
1233                crate::compression::promotion::PromotionInput {
1234                    message_id,
1235                    conversation_id,
1236                    content,
1237                    embedding: vectors.remove(&message_id),
1238                }
1239            })
1240            .collect())
1241    }
1242
1243    /// Retrieve top-k reasoning strategies by embedding similarity to `query`.
1244    ///
1245    /// Returns an empty vec when reasoning memory is not attached, Qdrant is unavailable,
1246    /// or the provider does not support embeddings.
1247    ///
1248    /// This method is **pure** — it does not increment `use_count` or `last_used_at`.
1249    /// Call [`crate::reasoning::ReasoningMemory::mark_used`] with the ids of strategies
1250    /// actually injected into the prompt (after budget truncation).
1251    ///
1252    /// # Errors
1253    ///
1254    /// Returns an error if embedding generation or the vector search fails.
1255    pub async fn retrieve_reasoning_strategies(
1256        &self,
1257        query: &str,
1258        limit: usize,
1259    ) -> Result<Vec<crate::reasoning::ReasoningStrategy>, MemoryError> {
1260        let Some(reasoning) = &self.reasoning else {
1261            return Ok(Vec::new());
1262        };
1263        if !self.effective_embed_provider().supports_embeddings() {
1264            return Ok(Vec::new());
1265        }
1266        let embedding = self.effective_embed_provider().embed(query).await?;
1267        reasoning
1268            .retrieve_by_embedding(&embedding, limit as u64)
1269            .await
1270    }
1271}