1mod algorithms;
24mod corrections;
25mod cross_session;
26mod graph;
27pub(crate) mod importance;
28pub mod persona;
29mod recall;
30mod summarization;
31pub mod trajectory;
32pub mod tree_consolidation;
33pub(crate) mod write_buffer;
34
35#[cfg(test)]
36mod tests;
37
38use std::sync::Arc;
39use std::sync::Mutex;
40use std::sync::atomic::AtomicU64;
41use std::time::Instant;
42
43use tokio::sync::RwLock;
44use zeph_llm::any::AnyProvider;
45use zeph_llm::provider::LlmProvider as _;
46
47use crate::admission::AdmissionControl;
48use crate::embedding_store::EmbeddingStore;
49use crate::error::MemoryError;
50use crate::store::SqliteStore;
51use crate::token_counter::TokenCounter;
52
53pub(crate) const SESSION_SUMMARIES_COLLECTION: &str = "zeph_session_summaries";
54pub(crate) const KEY_FACTS_COLLECTION: &str = "zeph_key_facts";
55pub(crate) const CORRECTIONS_COLLECTION: &str = "zeph_corrections";
56
57#[derive(Debug, Clone, Copy, PartialEq, Eq)]
59pub struct BackfillProgress {
60 pub done: usize,
62 pub total: usize,
64}
65
66pub use algorithms::{apply_mmr, apply_temporal_decay};
67pub use cross_session::SessionSummaryResult;
68pub use graph::{
69 ExtractionResult, ExtractionStats, GraphExtractionConfig, LinkingStats, NoteLinkingConfig,
70 PostExtractValidator, extract_and_store, link_memory_notes,
71};
72pub use persona::{
73 PersonaExtractionConfig, contains_self_referential_language, extract_persona_facts,
74};
75pub use recall::{EmbedContext, RecalledMessage};
76pub use summarization::{StructuredSummary, Summary, build_summarization_prompt};
77pub use trajectory::{TrajectoryEntry, TrajectoryExtractionConfig, extract_trajectory_entries};
78pub use tree_consolidation::{
79 TreeConsolidationConfig, TreeConsolidationResult, run_tree_consolidation_sweep,
80 start_tree_consolidation_loop,
81};
82pub use write_buffer::{BufferedWrite, WriteBuffer};
83
84#[derive(Debug, Clone)]
89pub(crate) struct CachedCentroid {
90 pub vector: Vec<f32>,
92 pub computed_at: Instant,
94}
95
96#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
101pub enum TemporalDecay {
102 Enabled,
104 #[default]
106 Disabled,
107}
108
109impl TemporalDecay {
110 #[must_use]
112 #[inline]
113 pub fn is_enabled(self) -> bool {
114 self == Self::Enabled
115 }
116}
117
118impl From<bool> for TemporalDecay {
119 fn from(b: bool) -> Self {
120 if b { Self::Enabled } else { Self::Disabled }
121 }
122}
123
124#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
129pub enum MmrReranking {
130 Enabled,
132 #[default]
134 Disabled,
135}
136
137impl MmrReranking {
138 #[must_use]
140 #[inline]
141 pub fn is_enabled(self) -> bool {
142 self == Self::Enabled
143 }
144}
145
146impl From<bool> for MmrReranking {
147 fn from(b: bool) -> Self {
148 if b { Self::Enabled } else { Self::Disabled }
149 }
150}
151
152#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
157pub enum ImportanceScoring {
158 Enabled,
160 #[default]
162 Disabled,
163}
164
165impl ImportanceScoring {
166 #[must_use]
168 #[inline]
169 pub fn is_enabled(self) -> bool {
170 self == Self::Enabled
171 }
172}
173
174impl From<bool> for ImportanceScoring {
175 fn from(b: bool) -> Self {
176 if b { Self::Enabled } else { Self::Disabled }
177 }
178}
179
180#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
185pub enum QueryBiasCorrection {
186 #[default]
188 Enabled,
189 Disabled,
191}
192
193impl QueryBiasCorrection {
194 #[must_use]
196 #[inline]
197 pub fn is_enabled(self) -> bool {
198 self == Self::Enabled
199 }
200}
201
202impl From<bool> for QueryBiasCorrection {
203 fn from(b: bool) -> Self {
204 if b { Self::Enabled } else { Self::Disabled }
205 }
206}
207
208#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
213pub enum HebbianReinforcement {
214 Enabled,
216 #[default]
218 Disabled,
219}
220
221impl HebbianReinforcement {
222 #[must_use]
224 #[inline]
225 pub fn is_enabled(self) -> bool {
226 self == Self::Enabled
227 }
228}
229
230impl From<bool> for HebbianReinforcement {
231 fn from(b: bool) -> Self {
232 if b { Self::Enabled } else { Self::Disabled }
233 }
234}
235
236#[derive(Debug, Clone, Copy, PartialEq, Eq)]
241pub(crate) enum QueryIntent {
242 FirstPerson,
244 Other,
246}
247
248#[derive(Debug, Clone, Default)]
252pub struct HelaSpreadRuntime {
253 pub enabled: bool,
255 pub depth: u32,
257 pub max_visited: usize,
259 pub edge_types: Vec<crate::graph::EdgeType>,
261 pub step_budget: Option<std::time::Duration>,
263}
264
265pub struct SemanticMemory {
270 pub(crate) sqlite: SqliteStore,
271 pub(crate) qdrant: Option<Arc<EmbeddingStore>>,
272 pub(crate) provider: AnyProvider,
273 pub(crate) embed_provider: Option<AnyProvider>,
279 pub(crate) embedding_model: String,
280 pub(crate) vector_weight: f64,
281 pub(crate) keyword_weight: f64,
282 pub(crate) temporal_decay: TemporalDecay,
283 pub(crate) temporal_decay_half_life_days: u32,
284 pub(crate) mmr_reranking: MmrReranking,
285 pub(crate) mmr_lambda: f32,
286 pub(crate) importance_scoring: ImportanceScoring,
287 pub(crate) importance_weight: f64,
288 pub(crate) tier_boost_semantic: f64,
291 pub token_counter: Arc<TokenCounter>,
292 pub graph_store: Option<Arc<crate::graph::GraphStore>>,
293 pub experience: Option<Arc<crate::graph::experience::ExperienceStore>>,
297 pub reasoning: Option<Arc<crate::reasoning::ReasoningMemory>>,
301 pub(crate) community_detection_failures: Arc<AtomicU64>,
302 pub(crate) graph_extraction_count: Arc<AtomicU64>,
303 pub(crate) graph_extraction_failures: Arc<AtomicU64>,
304 pub(crate) last_qdrant_warn: Arc<AtomicU64>,
305 pub(crate) admission_control: Option<Arc<AdmissionControl>>,
307 pub(crate) quality_gate: Option<Arc<crate::quality_gate::QualityGate>>,
310 pub(crate) key_facts_dedup_threshold: f32,
314 pub(crate) embed_tasks: Mutex<tokio::task::JoinSet<()>>,
320 pub(crate) retrieval_depth: u32,
324 pub(crate) search_prompt_template: String,
329 pub(crate) depth_below_limit_warned: Arc<std::sync::atomic::AtomicBool>,
331 pub(crate) missing_placeholder_warned: Arc<std::sync::atomic::AtomicBool>,
333 pub(crate) query_bias_correction: QueryBiasCorrection,
335 pub(crate) query_bias_profile_weight: f32,
337 pub(crate) profile_centroid: RwLock<Option<CachedCentroid>>,
342 pub(crate) profile_centroid_ttl_secs: u64,
344 pub(crate) hebbian_reinforcement: HebbianReinforcement,
346 pub(crate) hebbian_lr: f32,
348 pub(crate) hebbian_spread: HelaSpreadRuntime,
350}
351
352impl SemanticMemory {
353 pub async fn new(
364 sqlite_path: &str,
365 qdrant_url: &str,
366 api_key: Option<&str>,
367 provider: AnyProvider,
368 embedding_model: &str,
369 ) -> Result<Self, MemoryError> {
370 Self::with_weights(
371 sqlite_path,
372 qdrant_url,
373 api_key,
374 provider,
375 embedding_model,
376 0.7,
377 0.3,
378 )
379 .await
380 }
381
382 pub async fn with_weights(
391 sqlite_path: &str,
392 qdrant_url: &str,
393 api_key: Option<&str>,
394 provider: AnyProvider,
395 embedding_model: &str,
396 vector_weight: f64,
397 keyword_weight: f64,
398 ) -> Result<Self, MemoryError> {
399 Self::with_weights_and_pool_size(
400 sqlite_path,
401 qdrant_url,
402 api_key,
403 provider,
404 embedding_model,
405 vector_weight,
406 keyword_weight,
407 5,
408 )
409 .await
410 }
411
412 #[allow(clippy::too_many_arguments)]
421 pub async fn with_weights_and_pool_size(
422 sqlite_path: &str,
423 qdrant_url: &str,
424 api_key: Option<&str>,
425 provider: AnyProvider,
426 embedding_model: &str,
427 vector_weight: f64,
428 keyword_weight: f64,
429 pool_size: u32,
430 ) -> Result<Self, MemoryError> {
431 let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
432 let pool = sqlite.pool().clone();
433
434 let qdrant = match EmbeddingStore::new(qdrant_url, api_key, pool) {
435 Ok(store) => Some(Arc::new(store)),
436 Err(e) => {
437 tracing::warn!("Qdrant unavailable, semantic search disabled: {e:#}");
438 None
439 }
440 };
441
442 Ok(Self {
443 sqlite,
444 qdrant,
445 provider,
446 embed_provider: None,
447 embedding_model: embedding_model.into(),
448 vector_weight,
449 keyword_weight,
450 temporal_decay: TemporalDecay::Disabled,
451 temporal_decay_half_life_days: 30,
452 mmr_reranking: MmrReranking::Disabled,
453 mmr_lambda: 0.7,
454 importance_scoring: ImportanceScoring::Disabled,
455 importance_weight: 0.15,
456 tier_boost_semantic: 1.3,
457 token_counter: Arc::new(TokenCounter::new()),
458 graph_store: None,
459 experience: None,
460 reasoning: None,
461 community_detection_failures: Arc::new(AtomicU64::new(0)),
462 graph_extraction_count: Arc::new(AtomicU64::new(0)),
463 graph_extraction_failures: Arc::new(AtomicU64::new(0)),
464 last_qdrant_warn: Arc::new(AtomicU64::new(0)),
465 admission_control: None,
466 quality_gate: None,
467 key_facts_dedup_threshold: 0.95,
468 embed_tasks: std::sync::Mutex::new(tokio::task::JoinSet::new()),
469 retrieval_depth: 0,
470 search_prompt_template: String::new(),
471 depth_below_limit_warned: Arc::new(std::sync::atomic::AtomicBool::new(false)),
472 missing_placeholder_warned: Arc::new(std::sync::atomic::AtomicBool::new(false)),
473 query_bias_correction: QueryBiasCorrection::Enabled,
474 query_bias_profile_weight: 0.25,
475 profile_centroid: RwLock::new(None),
476 profile_centroid_ttl_secs: 300,
477 hebbian_reinforcement: HebbianReinforcement::Disabled,
478 hebbian_lr: 0.1,
479 hebbian_spread: HelaSpreadRuntime::default(),
480 })
481 }
482
483 pub async fn with_qdrant_ops(
492 sqlite_path: &str,
493 ops: crate::QdrantOps,
494 provider: AnyProvider,
495 embedding_model: &str,
496 vector_weight: f64,
497 keyword_weight: f64,
498 pool_size: u32,
499 ) -> Result<Self, MemoryError> {
500 let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
501 let pool = sqlite.pool().clone();
502 let store = EmbeddingStore::with_store(Box::new(ops), pool);
503
504 Ok(Self {
505 sqlite,
506 qdrant: Some(Arc::new(store)),
507 provider,
508 embed_provider: None,
509 embedding_model: embedding_model.into(),
510 vector_weight,
511 keyword_weight,
512 temporal_decay: TemporalDecay::Disabled,
513 temporal_decay_half_life_days: 30,
514 mmr_reranking: MmrReranking::Disabled,
515 mmr_lambda: 0.7,
516 importance_scoring: ImportanceScoring::Disabled,
517 importance_weight: 0.15,
518 tier_boost_semantic: 1.3,
519 token_counter: Arc::new(TokenCounter::new()),
520 graph_store: None,
521 experience: None,
522 reasoning: None,
523 community_detection_failures: Arc::new(AtomicU64::new(0)),
524 graph_extraction_count: Arc::new(AtomicU64::new(0)),
525 graph_extraction_failures: Arc::new(AtomicU64::new(0)),
526 last_qdrant_warn: Arc::new(AtomicU64::new(0)),
527 admission_control: None,
528 quality_gate: None,
529 key_facts_dedup_threshold: 0.95,
530 embed_tasks: std::sync::Mutex::new(tokio::task::JoinSet::new()),
531 retrieval_depth: 0,
532 search_prompt_template: String::new(),
533 depth_below_limit_warned: Arc::new(std::sync::atomic::AtomicBool::new(false)),
534 missing_placeholder_warned: Arc::new(std::sync::atomic::AtomicBool::new(false)),
535 query_bias_correction: QueryBiasCorrection::Enabled,
536 query_bias_profile_weight: 0.25,
537 profile_centroid: RwLock::new(None),
538 profile_centroid_ttl_secs: 300,
539 hebbian_reinforcement: HebbianReinforcement::Disabled,
540 hebbian_lr: 0.1,
541 hebbian_spread: HelaSpreadRuntime::default(),
542 })
543 }
544
545 #[must_use]
550 pub fn with_graph_store(mut self, store: Arc<crate::graph::GraphStore>) -> Self {
551 self.graph_store = Some(store);
552 self
553 }
554
555 #[must_use]
561 pub fn with_experience_store(
562 mut self,
563 store: Arc<crate::graph::experience::ExperienceStore>,
564 ) -> Self {
565 self.experience = Some(store);
566 self
567 }
568
569 #[must_use]
575 pub fn with_reasoning(mut self, store: Arc<crate::reasoning::ReasoningMemory>) -> Self {
576 self.reasoning = Some(store);
577 self
578 }
579
580 #[must_use]
582 pub fn community_detection_failures(&self) -> u64 {
583 use std::sync::atomic::Ordering;
584 self.community_detection_failures.load(Ordering::Relaxed)
585 }
586
587 #[must_use]
589 pub fn graph_extraction_count(&self) -> u64 {
590 use std::sync::atomic::Ordering;
591 self.graph_extraction_count.load(Ordering::Relaxed)
592 }
593
594 #[must_use]
596 pub fn graph_extraction_failures(&self) -> u64 {
597 use std::sync::atomic::Ordering;
598 self.graph_extraction_failures.load(Ordering::Relaxed)
599 }
600
601 #[must_use]
603 pub fn with_ranking_options(
604 mut self,
605 temporal_decay: TemporalDecay,
606 temporal_decay_half_life_days: u32,
607 mmr_reranking: MmrReranking,
608 mmr_lambda: f32,
609 ) -> Self {
610 self.temporal_decay = temporal_decay;
611 self.temporal_decay_half_life_days = temporal_decay_half_life_days;
612 self.mmr_reranking = mmr_reranking;
613 self.mmr_lambda = mmr_lambda;
614 self
615 }
616
617 #[must_use]
619 pub fn with_importance_options(mut self, scoring: ImportanceScoring, weight: f64) -> Self {
620 self.importance_scoring = scoring;
621 self.importance_weight = weight;
622 self
623 }
624
625 #[must_use]
629 pub fn with_tier_boost(mut self, boost: f64) -> Self {
630 self.tier_boost_semantic = boost;
631 self
632 }
633
634 #[must_use]
639 pub fn with_admission_control(mut self, control: AdmissionControl) -> Self {
640 self.admission_control = Some(Arc::new(control));
641 self
642 }
643
644 #[must_use]
650 pub fn with_quality_gate(mut self, gate: Arc<crate::quality_gate::QualityGate>) -> Self {
651 self.quality_gate = Some(gate);
652 self
653 }
654
655 #[must_use]
660 pub fn with_key_facts_dedup_threshold(mut self, threshold: f32) -> Self {
661 self.key_facts_dedup_threshold = threshold;
662 self
663 }
664
665 #[must_use]
672 pub fn with_query_bias(
673 mut self,
674 correction: QueryBiasCorrection,
675 profile_weight: f32,
676 centroid_ttl_secs: u64,
677 ) -> Self {
678 self.query_bias_correction = correction;
679 self.query_bias_profile_weight = profile_weight.clamp(0.0, 1.0);
680 self.profile_centroid_ttl_secs = centroid_ttl_secs;
681 self
682 }
683
684 #[must_use]
689 pub fn with_hebbian_spread(mut self, runtime: HelaSpreadRuntime) -> Self {
690 self.hebbian_spread = runtime;
691 self
692 }
693
694 #[must_use]
699 pub fn with_hebbian(mut self, reinforcement: HebbianReinforcement, lr: f32) -> Self {
700 let lr = lr.max(0.0);
701 if reinforcement.is_enabled() && lr == 0.0 {
702 tracing::warn!("hebbian enabled with lr=0.0 — no reinforcement will occur");
703 }
704 self.hebbian_reinforcement = reinforcement;
705 self.hebbian_lr = lr;
706 self
707 }
708
709 pub(crate) fn classify_query_intent(query: &str) -> QueryIntent {
714 if persona::contains_self_referential_language(query) {
715 QueryIntent::FirstPerson
716 } else {
717 QueryIntent::Other
718 }
719 }
720
721 #[tracing::instrument(name = "memory.query_bias.apply", skip(self, embedding), fields(query_len = query.len()))]
727 pub(crate) async fn apply_query_bias(&self, query: &str, embedding: Vec<f32>) -> Vec<f32> {
728 if !self.query_bias_correction.is_enabled() {
729 tracing::debug!(reason = "disabled", "query-bias: skipping");
730 return embedding;
731 }
732 if Self::classify_query_intent(query) != QueryIntent::FirstPerson {
733 tracing::debug!(reason = "not_first_person", "query-bias: skipping");
734 return embedding;
735 }
736 let Some(centroid) = self.profile_centroid_cached().await else {
737 tracing::debug!(reason = "no_centroid", "query-bias: skipping");
738 return embedding;
739 };
740 if centroid.len() != embedding.len() {
741 tracing::warn!(
742 centroid_dim = centroid.len(),
743 query_dim = embedding.len(),
744 reason = "dim_mismatch",
745 "query-bias: dimension mismatch between profile centroid and query embedding — skipping bias"
746 );
747 return embedding;
748 }
749 let w = self.query_bias_profile_weight;
750 tracing::debug!(
751 intent = "first_person",
752 centroid_dim = centroid.len(),
753 weight = w,
754 "query-bias: applying profile bias"
755 );
756 embedding
757 .iter()
758 .zip(centroid.iter())
759 .map(|(&q, &c)| (1.0 - w) * q + w * c)
760 .collect()
761 }
762
763 #[tracing::instrument(name = "memory.query_bias.centroid", skip(self))]
768 pub(crate) async fn profile_centroid_cached(&self) -> Option<Vec<f32>> {
769 {
771 let guard = self.profile_centroid.read().await;
772 if let Some(c) = &*guard
773 && c.computed_at.elapsed().as_secs() < self.profile_centroid_ttl_secs
774 {
775 let ttl_remaining = self
776 .profile_centroid_ttl_secs
777 .saturating_sub(c.computed_at.elapsed().as_secs());
778 tracing::debug!(
779 centroid_dim = c.vector.len(),
780 ttl_remaining_secs = ttl_remaining,
781 "query-bias: centroid cache hit"
782 );
783 return Some(c.vector.clone());
784 }
785 }
786 let computed = self.compute_profile_centroid().await;
788 let mut guard = self.profile_centroid.write().await;
789 match computed {
790 Some(v) => {
791 tracing::debug!(centroid_dim = v.len(), "query-bias: centroid computed");
792 *guard = Some(CachedCentroid {
793 vector: v.clone(),
794 computed_at: Instant::now(),
795 });
796 Some(v)
797 }
798 None => {
799 guard.as_ref().map(|c| c.vector.clone())
801 }
802 }
803 }
804
805 async fn compute_profile_centroid(&self) -> Option<Vec<f32>> {
810 let facts = match self.sqlite.load_persona_facts(0.0).await {
811 Ok(f) => f,
812 Err(e) => {
813 tracing::warn!(error = %e, "query-bias: failed to load persona facts");
814 return None;
815 }
816 };
817 if facts.is_empty() {
818 return None;
819 }
820 let provider = self.effective_embed_provider();
821 let texts: Vec<String> = facts.iter().map(|f| f.content.clone()).collect();
822 let mut embeddings: Vec<Vec<f32>> = Vec::with_capacity(texts.len());
823 for text in &texts {
824 match provider.embed(text).await {
825 Ok(v) => embeddings.push(v),
826 Err(e) => {
827 tracing::warn!(error = %e, "query-bias: failed to embed persona fact — skipping");
828 }
829 }
830 }
831 if embeddings.is_empty() {
832 return None;
833 }
834 let dim = embeddings[0].len();
835 let mut centroid = vec![0.0f32; dim];
836 for emb in &embeddings {
837 if emb.len() != dim {
838 tracing::warn!(
839 expected = dim,
840 got = emb.len(),
841 "query-bias: persona embedding dimension mismatch — skipping fact"
842 );
843 continue;
844 }
845 for (c, &v) in centroid.iter_mut().zip(emb.iter()) {
846 *c += v;
847 }
848 }
849 #[allow(clippy::cast_precision_loss)]
850 let n = embeddings.len() as f32;
851 for c in &mut centroid {
852 *c /= n;
853 }
854 Some(centroid)
855 }
856
857 #[must_use]
865 pub fn with_retrieval_options(
866 mut self,
867 depth: u32,
868 search_prompt_template: impl Into<String>,
869 ) -> Self {
870 self.retrieval_depth = depth;
871 self.search_prompt_template = search_prompt_template.into();
872 self
873 }
874
875 pub(crate) fn effective_depth(&self, limit: usize) -> usize {
884 use std::sync::atomic::Ordering;
885
886 let depth = self.retrieval_depth as usize;
887 if depth == 0 {
888 return limit.saturating_mul(2);
889 }
890 if depth < limit {
891 if !self.depth_below_limit_warned.swap(true, Ordering::Relaxed) {
892 tracing::warn!(
893 retrieval_depth = depth,
894 recall_limit = limit,
895 "memory.retrieval.depth < recall_limit; ANN pool cannot saturate top-k — consider raising depth"
896 );
897 }
898 } else if depth < limit.saturating_mul(2) {
899 tracing::info!(
900 retrieval_depth = depth,
901 recall_limit = limit,
902 legacy_default = limit.saturating_mul(2),
903 "memory.retrieval.depth is below legacy limit*2; ANN pool will be smaller than pre-#3340"
904 );
905 } else {
906 tracing::debug!(
907 retrieval_depth = depth,
908 recall_limit = limit,
909 "recall: using configured ANN depth"
910 );
911 }
912 depth
913 }
914
915 pub(crate) fn apply_search_prompt(&self, query: &str) -> String {
920 use std::sync::atomic::Ordering;
921
922 let template = &self.search_prompt_template;
923 if template.is_empty() {
924 return query.to_owned();
925 }
926 if !template.contains("{query}") {
927 if !self
928 .missing_placeholder_warned
929 .swap(true, Ordering::Relaxed)
930 {
931 tracing::warn!(
932 template = template.as_str(),
933 "memory.retrieval.search_prompt_template has no {{query}} placeholder — \
934 using raw query as-is"
935 );
936 }
937 return query.to_owned();
938 }
939 template.replace("{query}", query)
940 }
941
942 #[must_use]
948 pub fn with_embed_provider(mut self, embed_provider: AnyProvider) -> Self {
949 self.embed_provider = Some(embed_provider);
950 self
951 }
952
953 pub fn effective_embed_provider(&self) -> &AnyProvider {
957 self.embed_provider.as_ref().unwrap_or(&self.provider)
958 }
959
960 #[must_use]
964 pub fn from_parts(
965 sqlite: SqliteStore,
966 qdrant: Option<Arc<EmbeddingStore>>,
967 provider: AnyProvider,
968 embedding_model: impl Into<String>,
969 vector_weight: f64,
970 keyword_weight: f64,
971 token_counter: Arc<TokenCounter>,
972 ) -> Self {
973 Self {
974 sqlite,
975 qdrant,
976 provider,
977 embed_provider: None,
978 embedding_model: embedding_model.into(),
979 vector_weight,
980 keyword_weight,
981 temporal_decay: TemporalDecay::Disabled,
982 temporal_decay_half_life_days: 30,
983 mmr_reranking: MmrReranking::Disabled,
984 mmr_lambda: 0.7,
985 importance_scoring: ImportanceScoring::Disabled,
986 importance_weight: 0.15,
987 tier_boost_semantic: 1.3,
988 token_counter,
989 graph_store: None,
990 experience: None,
991 reasoning: None,
992 community_detection_failures: Arc::new(AtomicU64::new(0)),
993 graph_extraction_count: Arc::new(AtomicU64::new(0)),
994 graph_extraction_failures: Arc::new(AtomicU64::new(0)),
995 last_qdrant_warn: Arc::new(AtomicU64::new(0)),
996 admission_control: None,
997 quality_gate: None,
998 key_facts_dedup_threshold: 0.95,
999 embed_tasks: std::sync::Mutex::new(tokio::task::JoinSet::new()),
1000 retrieval_depth: 0,
1001 search_prompt_template: String::new(),
1002 depth_below_limit_warned: Arc::new(std::sync::atomic::AtomicBool::new(false)),
1003 missing_placeholder_warned: Arc::new(std::sync::atomic::AtomicBool::new(false)),
1004 query_bias_correction: QueryBiasCorrection::Enabled,
1005 query_bias_profile_weight: 0.25,
1006 profile_centroid: RwLock::new(None),
1007 profile_centroid_ttl_secs: 300,
1008 hebbian_reinforcement: HebbianReinforcement::Disabled,
1009 hebbian_lr: 0.1,
1010 hebbian_spread: HelaSpreadRuntime::default(),
1011 }
1012 }
1013
1014 pub async fn with_sqlite_backend(
1020 sqlite_path: &str,
1021 provider: AnyProvider,
1022 embedding_model: &str,
1023 vector_weight: f64,
1024 keyword_weight: f64,
1025 ) -> Result<Self, MemoryError> {
1026 Self::with_sqlite_backend_and_pool_size(
1027 sqlite_path,
1028 provider,
1029 embedding_model,
1030 vector_weight,
1031 keyword_weight,
1032 5,
1033 )
1034 .await
1035 }
1036
1037 pub async fn with_sqlite_backend_and_pool_size(
1043 sqlite_path: &str,
1044 provider: AnyProvider,
1045 embedding_model: &str,
1046 vector_weight: f64,
1047 keyword_weight: f64,
1048 pool_size: u32,
1049 ) -> Result<Self, MemoryError> {
1050 let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
1051 let pool = sqlite.pool().clone();
1052 let store = EmbeddingStore::new_sqlite(pool);
1053
1054 Ok(Self {
1055 sqlite,
1056 qdrant: Some(Arc::new(store)),
1057 provider,
1058 embed_provider: None,
1059 embedding_model: embedding_model.into(),
1060 vector_weight,
1061 keyword_weight,
1062 temporal_decay: TemporalDecay::Disabled,
1063 temporal_decay_half_life_days: 30,
1064 mmr_reranking: MmrReranking::Disabled,
1065 mmr_lambda: 0.7,
1066 importance_scoring: ImportanceScoring::Disabled,
1067 importance_weight: 0.15,
1068 tier_boost_semantic: 1.3,
1069 token_counter: Arc::new(TokenCounter::new()),
1070 graph_store: None,
1071 experience: None,
1072 reasoning: None,
1073 community_detection_failures: Arc::new(AtomicU64::new(0)),
1074 graph_extraction_count: Arc::new(AtomicU64::new(0)),
1075 graph_extraction_failures: Arc::new(AtomicU64::new(0)),
1076 last_qdrant_warn: Arc::new(AtomicU64::new(0)),
1077 admission_control: None,
1078 quality_gate: None,
1079 key_facts_dedup_threshold: 0.95,
1080 embed_tasks: std::sync::Mutex::new(tokio::task::JoinSet::new()),
1081 retrieval_depth: 0,
1082 search_prompt_template: String::new(),
1083 depth_below_limit_warned: Arc::new(std::sync::atomic::AtomicBool::new(false)),
1084 missing_placeholder_warned: Arc::new(std::sync::atomic::AtomicBool::new(false)),
1085 query_bias_correction: QueryBiasCorrection::Enabled,
1086 query_bias_profile_weight: 0.25,
1087 profile_centroid: RwLock::new(None),
1088 profile_centroid_ttl_secs: 300,
1089 hebbian_reinforcement: HebbianReinforcement::Disabled,
1090 hebbian_lr: 0.1,
1091 hebbian_spread: HelaSpreadRuntime::default(),
1092 })
1093 }
1094
1095 #[must_use]
1097 pub fn sqlite(&self) -> &SqliteStore {
1098 &self.sqlite
1099 }
1100
1101 pub async fn is_vector_store_connected(&self) -> bool {
1106 match self.qdrant.as_ref() {
1107 Some(store) => store.health_check().await,
1108 None => false,
1109 }
1110 }
1111
1112 #[must_use]
1114 pub fn has_vector_store(&self) -> bool {
1115 self.qdrant.is_some()
1116 }
1117
1118 #[must_use]
1120 pub fn embedding_store(&self) -> Option<&Arc<EmbeddingStore>> {
1121 self.qdrant.as_ref()
1122 }
1123
1124 pub fn provider(&self) -> &AnyProvider {
1126 &self.provider
1127 }
1128
1129 pub async fn message_count(
1135 &self,
1136 conversation_id: crate::types::ConversationId,
1137 ) -> Result<i64, MemoryError> {
1138 self.sqlite.count_messages(conversation_id).await
1139 }
1140
1141 pub async fn unsummarized_message_count(
1147 &self,
1148 conversation_id: crate::types::ConversationId,
1149 ) -> Result<i64, MemoryError> {
1150 let after_id = self
1151 .sqlite
1152 .latest_summary_last_message_id(conversation_id)
1153 .await?
1154 .unwrap_or(crate::types::MessageId(0));
1155 self.sqlite
1156 .count_messages_after(conversation_id, after_id)
1157 .await
1158 }
1159
1160 pub async fn load_promotion_window(
1181 &self,
1182 max_items: usize,
1183 ) -> Result<Vec<crate::compression::promotion::PromotionInput>, MemoryError> {
1184 use zeph_db::sql;
1185
1186 let limit = i64::try_from(max_items).unwrap_or(i64::MAX);
1187 let rows: Vec<(
1188 crate::types::MessageId,
1189 crate::types::ConversationId,
1190 String,
1191 )> = zeph_db::query_as(sql!(
1192 "SELECT id, conversation_id, content \
1193 FROM messages \
1194 WHERE deleted_at IS NULL \
1195 ORDER BY id DESC \
1196 LIMIT ?"
1197 ))
1198 .bind(limit)
1199 .fetch_all(self.sqlite.pool())
1200 .await?;
1201
1202 let mut vectors = if let Some(qdrant) = &self.qdrant {
1203 let ids: Vec<_> = rows.iter().map(|(id, _, _)| *id).collect();
1204 let mut raw = qdrant.get_vectors_for_messages(&ids).await?;
1205
1206 let ref_dim = raw.values().next().map(Vec::len);
1208 if let Some(ref_dim) = ref_dim {
1209 let mismatched: Vec<_> = raw
1210 .iter()
1211 .filter(|(_, v)| v.len() != ref_dim)
1212 .map(|(id, v)| (*id, v.len()))
1213 .collect();
1214 if !mismatched.is_empty() {
1215 tracing::warn!(
1216 expected_dim = ref_dim,
1217 dropped_count = mismatched.len(),
1218 "load_promotion_window: dimension mismatch — dropping mismatched vectors"
1219 );
1220 for (id, _) in mismatched {
1221 raw.remove(&id);
1222 }
1223 }
1224 }
1225 raw
1226 } else {
1227 std::collections::HashMap::new()
1228 };
1229
1230 Ok(rows
1231 .into_iter()
1232 .map(|(message_id, conversation_id, content)| {
1233 crate::compression::promotion::PromotionInput {
1234 message_id,
1235 conversation_id,
1236 content,
1237 embedding: vectors.remove(&message_id),
1238 }
1239 })
1240 .collect())
1241 }
1242
1243 pub async fn retrieve_reasoning_strategies(
1256 &self,
1257 query: &str,
1258 limit: usize,
1259 ) -> Result<Vec<crate::reasoning::ReasoningStrategy>, MemoryError> {
1260 let Some(reasoning) = &self.reasoning else {
1261 return Ok(Vec::new());
1262 };
1263 if !self.effective_embed_provider().supports_embeddings() {
1264 return Ok(Vec::new());
1265 }
1266 let embedding = self.effective_embed_provider().embed(query).await?;
1267 reasoning
1268 .retrieve_by_embedding(&embedding, limit as u64)
1269 .await
1270 }
1271}