1use anyhow::{Result, anyhow};
37use serde::{Deserialize, Serialize};
38use serde_json::Value;
39use std::collections::HashMap;
40use std::sync::Arc;
41use tokio::sync::Mutex;
42use tracing::{debug, info};
43
44use crate::embeddings::{DEFAULT_REQUIRED_DIMENSION, EmbeddingClient, EmbeddingConfig};
45use crate::rag::{SearchOptions, SearchResult, SliceLayer};
46use crate::search::{
47 BM25Config, BM25Index, HybridConfig, HybridSearchResult, HybridSearcher, SearchMode,
48};
49use crate::storage::{ChromaDocument, StorageManager};
50
51pub use crate::rag::SearchResult as Document;
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct MemexConfig {
59 pub app_name: String,
61 pub namespace: String,
63 #[serde(default)]
65 pub db_path: Option<String>,
66 #[serde(default = "default_dimension")]
68 pub dimension: usize,
69 #[serde(default)]
71 pub embedding_config: EmbeddingConfig,
72 #[serde(default)]
74 pub enable_bm25: bool,
75 #[serde(default)]
77 pub bm25_config: Option<BM25Config>,
78 #[serde(default = "default_enable_hybrid")]
80 pub enable_hybrid: bool,
81 #[serde(default)]
83 pub hybrid_config: Option<HybridConfig>,
84}
85
86fn default_enable_hybrid() -> bool {
87 true }
89
90fn default_dimension() -> usize {
91 DEFAULT_REQUIRED_DIMENSION
92}
93
94impl Default for MemexConfig {
95 fn default() -> Self {
96 Self {
97 app_name: "memex".to_string(),
98 namespace: "default".to_string(),
99 db_path: None,
100 dimension: default_dimension(),
101 embedding_config: EmbeddingConfig::default(),
102 enable_bm25: false,
103 bm25_config: None,
104 enable_hybrid: default_enable_hybrid(),
105 hybrid_config: None,
106 }
107 }
108}
109
110impl MemexConfig {
111 pub fn new(app_name: impl Into<String>, namespace: impl Into<String>) -> Self {
113 Self {
114 app_name: app_name.into(),
115 namespace: namespace.into(),
116 ..Default::default()
117 }
118 }
119
120 pub fn with_db_path(mut self, path: impl Into<String>) -> Self {
122 self.db_path = Some(path.into());
123 self
124 }
125
126 pub fn with_dimension(mut self, dimension: usize) -> Self {
128 self.dimension = dimension;
129 self.embedding_config.required_dimension = dimension;
130 self
131 }
132
133 pub fn with_embedding_config(mut self, config: EmbeddingConfig) -> Self {
135 self.dimension = config.required_dimension;
136 self.embedding_config = config;
137 self
138 }
139
140 fn sync_dimension_fields(&mut self) -> Result<()> {
141 if self.dimension == self.embedding_config.required_dimension {
142 return Ok(());
143 }
144
145 let default_dim = default_dimension();
146 if self.dimension == default_dim {
147 self.dimension = self.embedding_config.required_dimension;
148 return Ok(());
149 }
150
151 if self.embedding_config.required_dimension == default_dim {
152 self.embedding_config.required_dimension = self.dimension;
153 return Ok(());
154 }
155
156 Err(anyhow!(
157 "MemexConfig.dimension={} conflicts with embedding_config.required_dimension={}. \
158 Set them to the same value or use with_dimension()/with_embedding_config() so one source of truth updates both.",
159 self.dimension,
160 self.embedding_config.required_dimension
161 ))
162 }
163
164 pub fn with_bm25(mut self, config: BM25Config) -> Self {
166 self.enable_bm25 = true;
167 self.bm25_config = Some(config);
168 self
169 }
170
171 pub fn effective_db_path(&self) -> String {
173 self.db_path
174 .clone()
175 .unwrap_or_else(|| format!("~/.rmcp-servers/{}/lancedb", self.app_name))
176 }
177
178 pub fn effective_bm25_path(&self) -> String {
180 self.bm25_config
181 .as_ref()
182 .map(|c| c.index_path.clone())
183 .unwrap_or_else(|| format!("~/.rmcp-servers/{}/bm25", self.app_name))
184 }
185
186 fn hybrid_uses_bm25(&self) -> bool {
187 self.enable_hybrid
188 && self.hybrid_config.clone().unwrap_or_default().mode != SearchMode::Vector
189 }
190
191 fn normalize_bm25_config(&self, mut config: BM25Config) -> BM25Config {
192 if config.index_path == BM25Config::default().index_path {
193 config.index_path = self.effective_bm25_path();
194 }
195 config
196 }
197
198 fn resolved_bm25_config(&self) -> Option<BM25Config> {
199 if !self.enable_bm25 && !self.hybrid_uses_bm25() {
200 return None;
201 }
202
203 let config = self
204 .bm25_config
205 .clone()
206 .or_else(|| {
207 self.hybrid_config
208 .as_ref()
209 .filter(|cfg| cfg.mode != SearchMode::Vector)
210 .map(|cfg| cfg.bm25.clone())
211 })
212 .unwrap_or_default();
213
214 Some(self.normalize_bm25_config(config))
215 }
216
217 fn resolved_hybrid_config(&self) -> HybridConfig {
218 let mut config = self.hybrid_config.clone().unwrap_or_default();
219 if let Some(bm25) = self.resolved_bm25_config() {
220 config.bm25 = bm25;
221 }
222 config
223 }
224}
225
226#[derive(Debug, Clone, Default, Serialize, Deserialize)]
231pub struct MetaFilter {
232 #[serde(skip_serializing_if = "Option::is_none")]
234 pub patient_id: Option<String>,
235 #[serde(skip_serializing_if = "Option::is_none")]
237 pub visit_id: Option<String>,
238 #[serde(skip_serializing_if = "Option::is_none")]
240 pub doc_type: Option<String>,
241 #[serde(skip_serializing_if = "Option::is_none")]
243 pub date_from: Option<String>,
244 #[serde(skip_serializing_if = "Option::is_none")]
246 pub date_to: Option<String>,
247 #[serde(default, skip_serializing_if = "Vec::is_empty")]
249 pub custom: Vec<(String, String)>,
250}
251
252impl MetaFilter {
253 pub fn for_patient(patient_id: impl Into<String>) -> Self {
255 Self {
256 patient_id: Some(patient_id.into()),
257 ..Default::default()
258 }
259 }
260
261 pub fn for_visit(visit_id: impl Into<String>) -> Self {
263 Self {
264 visit_id: Some(visit_id.into()),
265 ..Default::default()
266 }
267 }
268
269 pub fn with_custom(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
271 self.custom.push((key.into(), value.into()));
272 self
273 }
274
275 pub fn matches(&self, metadata: &Value) -> bool {
277 if let Some(ref patient_id) = self.patient_id
278 && metadata.get("patient_id").and_then(|v| v.as_str()) != Some(patient_id)
279 {
280 return false;
281 }
282
283 if let Some(ref visit_id) = self.visit_id
284 && metadata.get("visit_id").and_then(|v| v.as_str()) != Some(visit_id)
285 {
286 return false;
287 }
288
289 if let Some(ref doc_type) = self.doc_type
290 && metadata.get("doc_type").and_then(|v| v.as_str()) != Some(doc_type)
291 {
292 return false;
293 }
294
295 if let Some(ref date_from) = self.date_from
297 && let Some(doc_date) = metadata.get("date").and_then(|v| v.as_str())
298 && doc_date < date_from.as_str()
299 {
300 return false;
301 }
302
303 if let Some(ref date_to) = self.date_to
304 && let Some(doc_date) = metadata.get("date").and_then(|v| v.as_str())
305 && doc_date > date_to.as_str()
306 {
307 return false;
308 }
309
310 for (key, value) in &self.custom {
312 if metadata.get(key).and_then(|v| v.as_str()) != Some(value) {
313 return false;
314 }
315 }
316
317 true
318 }
319}
320
321#[derive(Debug, Clone, Serialize, Deserialize)]
323pub struct StoreItem {
324 pub id: String,
326 pub text: String,
328 #[serde(default)]
330 pub metadata: Value,
331}
332
333impl StoreItem {
334 pub fn new(id: impl Into<String>, text: impl Into<String>) -> Self {
336 Self {
337 id: id.into(),
338 text: text.into(),
339 metadata: Value::Object(serde_json::Map::new()),
340 }
341 }
342
343 pub fn with_metadata(mut self, metadata: Value) -> Self {
345 self.metadata = metadata;
346 self
347 }
348}
349
350#[derive(Debug, Clone)]
352pub struct BatchResult {
353 pub success_count: usize,
355 pub failure_count: usize,
357 pub failed_ids: Vec<String>,
359}
360
361#[derive(Debug, Clone, Serialize, Deserialize)]
363pub struct LayerStats {
364 pub total_chunks: usize,
366 pub avg_score: f32,
368 pub top_keywords: Vec<String>,
370}
371
372impl LayerStats {
373 pub fn empty() -> Self {
375 Self {
376 total_chunks: 0,
377 avg_score: 0.0,
378 top_keywords: vec![],
379 }
380 }
381
382 pub fn from_results(results: &[SearchResult]) -> Self {
384 if results.is_empty() {
385 return Self::empty();
386 }
387
388 let total_chunks = results.len();
389 let avg_score = results.iter().map(|r| r.score).sum::<f32>() / total_chunks as f32;
390
391 let mut keyword_counts: HashMap<String, usize> = HashMap::new();
393 for result in results {
394 for keyword in &result.keywords {
395 *keyword_counts.entry(keyword.clone()).or_insert(0) += 1;
396 }
397 }
398
399 let mut keywords: Vec<_> = keyword_counts.into_iter().collect();
401 keywords.sort_by_key(|b| std::cmp::Reverse(b.1));
402 let top_keywords = keywords.into_iter().take(10).map(|(k, _)| k).collect();
403
404 Self {
405 total_chunks,
406 avg_score,
407 top_keywords,
408 }
409 }
410}
411
412#[derive(Debug, Clone, Serialize, Deserialize)]
414pub struct DiveResult {
415 pub layer: SliceLayer,
417 pub results: Vec<SearchResult>,
419 pub layer_stats: LayerStats,
421}
422
423pub struct MemexEngine {
428 storage: Arc<StorageManager>,
429 embeddings: Arc<Mutex<EmbeddingClient>>,
430 bm25: Option<Arc<BM25Index>>,
431 hybrid_searcher: Option<HybridSearcher>,
432 namespace: String,
433 config: MemexConfig,
434}
435
436impl MemexEngine {
437 pub async fn new(mut config: MemexConfig) -> Result<Self> {
447 config.sync_dimension_fields()?;
448 let db_path = config.effective_db_path();
449
450 info!(
451 "Initializing MemexEngine: app={}, namespace={}, db={}",
452 config.app_name, config.namespace, db_path
453 );
454
455 let storage = StorageManager::new_lance_only(&db_path).await?;
457 storage.ensure_collection().await?;
458
459 let embeddings = EmbeddingClient::new(&config.embedding_config).await?;
461
462 info!(
463 "Connected to embedding provider: {} (dim={})",
464 embeddings.connected_to(),
465 embeddings.required_dimension()
466 );
467
468 let bm25 = config
470 .resolved_bm25_config()
471 .map(|bm25_config| BM25Index::new(&bm25_config).map(Arc::new))
472 .transpose()?;
473
474 let storage_arc = Arc::new(storage);
475
476 let hybrid_searcher = if config.enable_hybrid {
478 let hybrid_config = config.resolved_hybrid_config();
479 Some(if let Some(ref bm25_index) = bm25 {
480 HybridSearcher::with_bm25_index(
481 storage_arc.clone(),
482 bm25_index.clone(),
483 hybrid_config,
484 )
485 } else {
486 HybridSearcher::new(storage_arc.clone(), hybrid_config).await?
487 })
488 } else {
489 None
490 };
491
492 Ok(Self {
493 storage: storage_arc,
494 embeddings: Arc::new(Mutex::new(embeddings)),
495 bm25,
496 hybrid_searcher,
497 namespace: config.namespace.clone(),
498 config,
499 })
500 }
501
502 pub async fn for_app(app_name: &str, namespace: &str) -> Result<Self> {
512 let config = MemexConfig::new(app_name, namespace);
513 Self::new(config).await
514 }
515
516 pub async fn for_vista() -> Result<Self> {
526 use crate::embeddings::ProviderConfig;
527
528 let config = MemexConfig {
529 app_name: "vista".to_string(),
530 namespace: "default".to_string(),
531 db_path: Some("~/.rmcp-servers/vista/lancedb".to_string()),
532 dimension: 1024,
533 embedding_config: EmbeddingConfig {
534 required_dimension: 1024,
535 providers: vec![ProviderConfig {
536 name: "ollama-vista".to_string(),
537 base_url: "http://localhost:11434".to_string(),
538 model: "qwen3-embedding:0.6b".to_string(),
539 priority: 1,
540 endpoint: "/v1/embeddings".to_string(),
541 }],
542 ..EmbeddingConfig::default()
543 },
544 enable_bm25: false,
545 bm25_config: None,
546 enable_hybrid: true, hybrid_config: None,
548 };
549 Self::new(config).await
550 }
551
552 pub fn namespace(&self) -> &str {
554 &self.namespace
555 }
556
557 pub fn config(&self) -> &MemexConfig {
559 &self.config
560 }
561
562 pub fn storage(&self) -> Arc<StorageManager> {
564 self.storage.clone()
565 }
566
567 pub async fn store(&self, id: &str, text: &str, metadata: Value) -> Result<()> {
590 debug!("Storing document: id={}, text_len={}", id, text.len());
591
592 let embedding = self.embeddings.lock().await.embed(text).await?;
594
595 let doc = ChromaDocument::new_flat(
597 id.to_string(),
598 self.namespace.clone(),
599 embedding,
600 metadata.clone(),
601 text.to_string(),
602 );
603
604 self.storage.add_to_store(vec![doc]).await?;
606
607 if let Some(ref bm25) = self.bm25 {
609 bm25.add_documents(&[(id.to_string(), self.namespace.clone(), text.to_string())])
610 .await?;
611 }
612
613 debug!("Stored document: id={}", id);
614 Ok(())
615 }
616
617 pub async fn search(&self, query: &str, limit: usize) -> Result<Vec<SearchResult>> {
634 debug!("Searching: query='{}', limit={}", query, limit);
635
636 let query_embedding = self.embeddings.lock().await.embed(query).await?;
638
639 let candidates = self
641 .storage
642 .search_store(Some(&self.namespace), query_embedding, limit)
643 .await?;
644
645 let results: Vec<SearchResult> = candidates
647 .into_iter()
648 .enumerate()
649 .map(|(idx, doc)| {
650 let score = 1.0 - (idx as f32 / (limit as f32 + 1.0));
652 let layer = doc.slice_layer();
653 SearchResult {
654 id: doc.id,
655 namespace: doc.namespace,
656 text: doc.document,
657 score,
658 metadata: doc.metadata,
659 layer,
660 parent_id: doc.parent_id,
661 children_ids: doc.children_ids,
662 keywords: doc.keywords,
663 }
664 })
665 .collect();
666
667 debug!("Search returned {} results", results.len());
668 Ok(results)
669 }
670
671 pub async fn search_hybrid(
686 &self,
687 query: &str,
688 limit: usize,
689 ) -> Result<Vec<HybridSearchResult>> {
690 debug!("Hybrid search: query='{}', limit={}", query, limit);
691
692 let hybrid = self.hybrid_searcher.as_ref().ok_or_else(|| {
693 anyhow!("Hybrid search not enabled. Set enable_hybrid: true in MemexConfig.")
694 })?;
695
696 let query_embedding = self.embeddings.lock().await.embed(query).await?;
698
699 let results = hybrid
701 .search(
702 query,
703 query_embedding,
704 Some(&self.namespace),
705 limit,
706 SearchOptions::default(),
707 )
708 .await?;
709
710 debug!("Hybrid search returned {} results", results.len());
711 Ok(results)
712 }
713
714 pub async fn search_with_mode(
727 &self,
728 query: &str,
729 limit: usize,
730 mode: SearchMode,
731 ) -> Result<Vec<HybridSearchResult>> {
732 debug!("Search with mode: query='{}', mode={:?}", query, mode);
733
734 match mode {
735 SearchMode::Vector => {
736 let results = self.search(query, limit).await?;
738 Ok(results
739 .into_iter()
740 .map(|r| HybridSearchResult {
741 id: r.id,
742 namespace: r.namespace,
743 document: r.text,
744 combined_score: r.score,
745 vector_score: Some(r.score),
746 bm25_score: None,
747 metadata: r.metadata,
748 layer: r.layer,
749 parent_id: r.parent_id,
750 children_ids: r.children_ids,
751 keywords: r.keywords,
752 })
753 .collect())
754 }
755 SearchMode::Keyword | SearchMode::Hybrid => {
756 self.search_hybrid(query, limit).await
758 }
759 }
760 }
761
762 pub async fn get(&self, id: &str) -> Result<Option<SearchResult>> {
772 debug!("Getting document: id={}", id);
773
774 if let Some(doc) = self.storage.get_document(&self.namespace, id).await? {
775 let layer = doc.slice_layer();
776 return Ok(Some(SearchResult {
777 id: doc.id,
778 namespace: doc.namespace,
779 text: doc.document,
780 score: 1.0,
781 metadata: doc.metadata,
782 layer,
783 parent_id: doc.parent_id,
784 children_ids: doc.children_ids,
785 keywords: doc.keywords,
786 }));
787 }
788
789 Ok(None)
790 }
791
792 pub async fn delete(&self, id: &str) -> Result<bool> {
804 debug!("Deleting document: id={}", id);
805
806 let deleted = self.storage.delete_document(&self.namespace, id).await?;
807
808 if let Some(ref bm25) = self.bm25 {
810 bm25.delete_documents(&[id.to_string()]).await?;
811 }
812
813 Ok(deleted > 0)
814 }
815
816 pub async fn store_batch(&self, items: Vec<StoreItem>) -> Result<BatchResult> {
836 if items.is_empty() {
837 return Ok(BatchResult {
838 success_count: 0,
839 failure_count: 0,
840 failed_ids: vec![],
841 });
842 }
843
844 info!("Batch storing {} documents", items.len());
845
846 let texts: Vec<String> = items.iter().map(|i| i.text.clone()).collect();
848
849 let embeddings = self.embeddings.lock().await.embed_batch(&texts).await?;
851
852 let mut docs = Vec::with_capacity(items.len());
854 let mut bm25_docs = Vec::new();
855
856 for (item, embedding) in items.iter().zip(embeddings) {
857 let doc = ChromaDocument::new_flat(
858 item.id.clone(),
859 self.namespace.clone(),
860 embedding,
861 item.metadata.clone(),
862 item.text.clone(),
863 );
864 docs.push(doc);
865
866 if self.bm25.is_some() {
867 bm25_docs.push((item.id.clone(), self.namespace.clone(), item.text.clone()));
868 }
869 }
870
871 self.storage.add_to_store(docs).await?;
873
874 if let Some(ref bm25) = self.bm25 {
876 bm25.add_documents(&bm25_docs).await?;
877 }
878
879 Ok(BatchResult {
880 success_count: items.len(),
881 failure_count: 0,
882 failed_ids: vec![],
883 })
884 }
885
886 pub async fn search_filtered(
901 &self,
902 query: &str,
903 filter: MetaFilter,
904 limit: usize,
905 ) -> Result<Vec<SearchResult>> {
906 let candidates = self.search(query, limit * 3).await?;
908
909 let filtered: Vec<SearchResult> = candidates
911 .into_iter()
912 .filter(|r| filter.matches(&r.metadata))
913 .take(limit)
914 .collect();
915
916 debug!(
917 "Filtered search: query='{}', filter={:?}, results={}",
918 query,
919 filter,
920 filtered.len()
921 );
922
923 Ok(filtered)
924 }
925
926 pub async fn delete_by_filter(&self, filter: MetaFilter) -> Result<usize> {
939 info!("Deleting documents by filter: {:?}", filter);
940
941 let mut deleted_ids = Vec::new();
949
950 const BATCH_SIZE: usize = 1000;
954 let mut offset = 0;
955
956 loop {
957 let candidates = self
958 .storage
959 .all_documents_page(Some(&self.namespace), offset, BATCH_SIZE)
960 .await?;
961
962 if candidates.is_empty() {
963 break;
964 }
965
966 let page_len = candidates.len();
967 for doc in candidates {
968 if filter.matches(&doc.metadata) {
969 deleted_ids.push(doc.id);
970 }
971 }
972
973 if page_len < BATCH_SIZE {
974 break;
975 }
976
977 offset += page_len;
978 }
979
980 for id in &deleted_ids {
981 self.storage.delete_document(&self.namespace, id).await?;
982 }
983
984 if let Some(ref bm25) = self.bm25
986 && !deleted_ids.is_empty()
987 {
988 bm25.delete_documents(&deleted_ids).await?;
989 }
990
991 let deleted_count = deleted_ids.len();
992 info!("Deleted {} documents by filter", deleted_count);
993 Ok(deleted_count)
994 }
995
996 pub async fn purge_namespace(&self) -> Result<usize> {
1000 info!("Purging namespace: {}", self.namespace);
1001
1002 let deleted = self
1003 .storage
1004 .delete_namespace_documents(&self.namespace)
1005 .await?;
1006
1007 if let Some(ref bm25) = self.bm25 {
1008 bm25.delete_namespace_term(&self.namespace).await?;
1009 }
1010
1011 Ok(deleted)
1012 }
1013
1014 #[deprecated(
1027 since = "0.3.1",
1028 note = "Use search_hybrid() with HybridSearcher instead"
1029 )]
1030 pub async fn search_bm25_fusion(
1031 &self,
1032 query: &str,
1033 limit: usize,
1034 bm25_weight: f32,
1035 ) -> Result<Vec<SearchResult>> {
1036 let bm25 = self
1037 .bm25
1038 .as_ref()
1039 .ok_or_else(|| anyhow!("BM25 not enabled. Set enable_bm25: true in MemexConfig."))?;
1040
1041 let bm25_results = bm25.search(query, Some(&self.namespace), limit * 2)?;
1043 let bm25_max_score = bm25_results.first().map(|(_, _, s)| *s).unwrap_or(1.0);
1044
1045 let vector_results = self.search(query, limit * 2).await?;
1047
1048 use std::collections::HashMap;
1050 let mut scores: HashMap<String, (f32, Option<SearchResult>)> = HashMap::new();
1051
1052 for (id, _namespace, score) in bm25_results {
1054 let normalized = score / bm25_max_score.max(0.001);
1055 scores.insert(id, (normalized * bm25_weight, None));
1056 }
1057
1058 let vector_weight = 1.0 - bm25_weight;
1060 for result in vector_results {
1061 let entry = scores.entry(result.id.clone()).or_insert((0.0, None));
1062 entry.0 += result.score * vector_weight;
1063 entry.1 = Some(result);
1064 }
1065
1066 let mut combined: Vec<_> = scores
1068 .into_iter()
1069 .filter_map(|(_id, (score, result))| {
1070 result.map(|mut r| {
1072 r.score = score;
1073 r
1074 })
1075 })
1076 .collect();
1077
1078 combined.sort_by(|a, b| {
1079 b.score
1080 .partial_cmp(&a.score)
1081 .unwrap_or(std::cmp::Ordering::Equal)
1082 });
1083 combined.truncate(limit);
1084
1085 Ok(combined)
1086 }
1087}
1088
1089#[cfg(test)]
1090mod tests {
1091 use super::*;
1092
1093 #[test]
1094 fn test_meta_filter_matches() {
1095 let filter = MetaFilter::for_patient("P-123");
1096
1097 let matching = serde_json::json!({
1098 "patient_id": "P-123",
1099 "visit_id": "V-456"
1100 });
1101 assert!(filter.matches(&matching));
1102
1103 let not_matching = serde_json::json!({
1104 "patient_id": "P-999",
1105 "visit_id": "V-456"
1106 });
1107 assert!(!filter.matches(¬_matching));
1108 }
1109
1110 #[test]
1111 fn test_meta_filter_custom() {
1112 let filter = MetaFilter::default()
1113 .with_custom("doc_type", "soap_note")
1114 .with_custom("status", "active");
1115
1116 let matching = serde_json::json!({
1117 "doc_type": "soap_note",
1118 "status": "active"
1119 });
1120 assert!(filter.matches(&matching));
1121
1122 let missing_field = serde_json::json!({
1123 "doc_type": "soap_note"
1124 });
1125 assert!(!filter.matches(&missing_field));
1126 }
1127
1128 #[test]
1129 fn test_memex_config_defaults() {
1130 let config = MemexConfig::default();
1131 assert_eq!(config.dimension, DEFAULT_REQUIRED_DIMENSION);
1132 assert_eq!(
1133 config.embedding_config.required_dimension,
1134 DEFAULT_REQUIRED_DIMENSION
1135 );
1136 assert_eq!(config.namespace, "default");
1137 assert_eq!(config.effective_db_path(), "~/.rmcp-servers/memex/lancedb");
1138 }
1139
1140 #[test]
1141 fn test_memex_config_builder() {
1142 let config = MemexConfig::new("vista", "patients")
1143 .with_dimension(1024)
1144 .with_db_path("/custom/path/db");
1145
1146 assert_eq!(config.app_name, "vista");
1147 assert_eq!(config.namespace, "patients");
1148 assert_eq!(config.dimension, 1024);
1149 assert_eq!(config.embedding_config.required_dimension, 1024);
1150 assert_eq!(config.effective_db_path(), "/custom/path/db");
1151 }
1152
1153 #[test]
1154 fn test_memex_config_with_embedding_config_syncs_dimension() {
1155 let embedding_config = EmbeddingConfig {
1156 required_dimension: 768,
1157 ..EmbeddingConfig::default()
1158 };
1159
1160 let config = MemexConfig::new("sync-test", "ns").with_embedding_config(embedding_config);
1161
1162 assert_eq!(config.dimension, 768);
1163 assert_eq!(config.embedding_config.required_dimension, 768);
1164 }
1165
1166 #[test]
1167 fn test_memex_config_sync_dimension_fields_uses_non_default_embedding_dimension() {
1168 let mut config = MemexConfig::default();
1169 config.embedding_config.required_dimension = 1024;
1170
1171 config.sync_dimension_fields().unwrap();
1172
1173 assert_eq!(config.dimension, 1024);
1174 assert_eq!(config.embedding_config.required_dimension, 1024);
1175 }
1176
1177 #[test]
1178 fn test_memex_config_sync_dimension_fields_rejects_true_conflict() {
1179 let mut config = MemexConfig {
1180 dimension: 768,
1181 ..MemexConfig::default()
1182 };
1183 config.embedding_config.required_dimension = 1024;
1184
1185 let err = config.sync_dimension_fields().unwrap_err().to_string();
1186 assert!(err.contains("conflicts with embedding_config.required_dimension"));
1187 }
1188
1189 #[test]
1190 fn test_store_item() {
1191 let item = StoreItem::new("doc-1", "Hello world")
1192 .with_metadata(serde_json::json!({"type": "greeting"}));
1193
1194 assert_eq!(item.id, "doc-1");
1195 assert_eq!(item.text, "Hello world");
1196 assert_eq!(item.metadata["type"], "greeting");
1197 }
1198
1199 #[test]
1200 fn test_store_item_default_metadata() {
1201 let item = StoreItem::new("doc-1", "Hello world");
1202
1203 assert_eq!(item.id, "doc-1");
1204 assert_eq!(item.text, "Hello world");
1205 assert!(item.metadata.is_object());
1206 assert!(item.metadata.as_object().unwrap().is_empty());
1207 }
1208
1209 #[test]
1210 fn test_meta_filter_empty_matches_all() {
1211 let filter = MetaFilter::default();
1212
1213 let any_metadata = serde_json::json!({
1215 "patient_id": "P-123",
1216 "visit_id": "V-456",
1217 "random_field": "value"
1218 });
1219 assert!(filter.matches(&any_metadata));
1220
1221 let empty = serde_json::json!({});
1223 assert!(filter.matches(&empty));
1224 }
1225
1226 #[test]
1227 fn test_meta_filter_date_range() {
1228 let filter = MetaFilter {
1229 date_from: Some("2024-01-01".to_string()),
1230 date_to: Some("2024-12-31".to_string()),
1231 ..Default::default()
1232 };
1233
1234 let in_range = serde_json::json!({
1236 "date": "2024-06-15"
1237 });
1238 assert!(filter.matches(&in_range));
1239
1240 let before = serde_json::json!({
1242 "date": "2023-12-31"
1243 });
1244 assert!(!filter.matches(&before));
1245
1246 let after = serde_json::json!({
1248 "date": "2025-01-01"
1249 });
1250 assert!(!filter.matches(&after));
1251
1252 let no_date = serde_json::json!({
1254 "patient_id": "P-123"
1255 });
1256 assert!(filter.matches(&no_date));
1257 }
1258
1259 #[test]
1260 fn test_meta_filter_for_visit() {
1261 let filter = MetaFilter::for_visit("V-789");
1262
1263 let matching = serde_json::json!({
1264 "visit_id": "V-789",
1265 "patient_id": "P-123"
1266 });
1267 assert!(filter.matches(&matching));
1268
1269 let not_matching = serde_json::json!({
1270 "visit_id": "V-other",
1271 "patient_id": "P-123"
1272 });
1273 assert!(!filter.matches(¬_matching));
1274 }
1275
1276 #[test]
1277 fn test_meta_filter_combined() {
1278 let filter = MetaFilter {
1279 patient_id: Some("P-123".to_string()),
1280 doc_type: Some("soap_note".to_string()),
1281 ..Default::default()
1282 };
1283
1284 let both_match = serde_json::json!({
1286 "patient_id": "P-123",
1287 "doc_type": "soap_note"
1288 });
1289 assert!(filter.matches(&both_match));
1290
1291 let wrong_type = serde_json::json!({
1293 "patient_id": "P-123",
1294 "doc_type": "prescription"
1295 });
1296 assert!(!filter.matches(&wrong_type));
1297
1298 let missing = serde_json::json!({
1300 "patient_id": "P-123"
1301 });
1302 assert!(!filter.matches(&missing));
1303 }
1304
1305 #[test]
1306 fn test_batch_result_struct() {
1307 let result = BatchResult {
1308 success_count: 10,
1309 failure_count: 2,
1310 failed_ids: vec!["doc-5".to_string(), "doc-8".to_string()],
1311 };
1312
1313 assert_eq!(result.success_count, 10);
1314 assert_eq!(result.failure_count, 2);
1315 assert_eq!(result.failed_ids.len(), 2);
1316 assert!(result.failed_ids.contains(&"doc-5".to_string()));
1317 }
1318
1319 #[test]
1320 fn test_memex_config_with_bm25() {
1321 use crate::search::BM25Config;
1322
1323 let bm25_config = BM25Config::default();
1324 let config = MemexConfig::new("test-app", "docs").with_bm25(bm25_config);
1325
1326 assert!(config.enable_bm25);
1327 assert!(config.bm25_config.is_some());
1328 }
1329
1330 #[test]
1331 fn test_memex_config_effective_bm25_path() {
1332 let config = MemexConfig::new("my-app", "docs");
1333 assert_eq!(config.effective_bm25_path(), "~/.rmcp-servers/my-app/bm25");
1334 }
1335
1336 #[test]
1337 fn test_resolved_bm25_config_uses_app_specific_path_for_hybrid_defaults() {
1338 let config = MemexConfig::new("my-app", "docs");
1339 let bm25 = config
1340 .resolved_bm25_config()
1341 .expect("hybrid defaults should provision BM25");
1342
1343 assert_eq!(bm25.index_path, "~/.rmcp-servers/my-app/bm25");
1344 }
1345
1346 #[test]
1347 fn test_resolved_hybrid_config_reuses_resolved_bm25_path() {
1348 let config = MemexConfig::new("my-app", "docs");
1349 let hybrid = config.resolved_hybrid_config();
1350
1351 assert_eq!(hybrid.bm25.index_path, "~/.rmcp-servers/my-app/bm25");
1352 }
1353
1354 #[test]
1355 fn test_meta_filter_serialization() {
1356 let filter = MetaFilter::for_patient("P-123").with_custom("status", "active");
1357
1358 let json = serde_json::to_string(&filter).unwrap();
1359 let deserialized: MetaFilter = serde_json::from_str(&json).unwrap();
1360
1361 assert_eq!(deserialized.patient_id, Some("P-123".to_string()));
1362 assert_eq!(deserialized.custom.len(), 1);
1363 assert_eq!(
1364 deserialized.custom[0],
1365 ("status".to_string(), "active".to_string())
1366 );
1367 }
1368
1369 #[test]
1370 fn test_memex_config_serialization() {
1371 let config = MemexConfig::new("test", "ns")
1372 .with_dimension(512)
1373 .with_db_path("/tmp/test");
1374
1375 let json = serde_json::to_string(&config).unwrap();
1376 let deserialized: MemexConfig = serde_json::from_str(&json).unwrap();
1377
1378 assert_eq!(deserialized.app_name, "test");
1379 assert_eq!(deserialized.namespace, "ns");
1380 assert_eq!(deserialized.dimension, 512);
1381 assert_eq!(deserialized.embedding_config.required_dimension, 512);
1382 assert_eq!(deserialized.db_path, Some("/tmp/test".to_string()));
1383 }
1384
1385 #[test]
1386 fn test_store_item_serialization() {
1387 let item =
1388 StoreItem::new("id-1", "content").with_metadata(serde_json::json!({"key": "value"}));
1389
1390 let json = serde_json::to_string(&item).unwrap();
1391 let deserialized: StoreItem = serde_json::from_str(&json).unwrap();
1392
1393 assert_eq!(deserialized.id, "id-1");
1394 assert_eq!(deserialized.text, "content");
1395 assert_eq!(deserialized.metadata["key"], "value");
1396 }
1397}