1use anyhow::{Result, anyhow};
37use serde::{Deserialize, Serialize};
38use serde_json::Value;
39use std::collections::HashMap;
40use std::sync::Arc;
41use tokio::sync::Mutex;
42use tracing::{debug, info};
43
44use crate::embeddings::{DEFAULT_REQUIRED_DIMENSION, EmbeddingClient, EmbeddingConfig};
45use crate::rag::{SearchOptions, SearchResult, SliceLayer};
46use crate::search::{
47 BM25Config, BM25Index, HybridConfig, HybridSearchResult, HybridSearcher, SearchMode,
48};
49use crate::storage::{ChromaDocument, StorageManager};
50
51pub use crate::rag::SearchResult as Document;
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct MemexConfig {
59 pub app_name: String,
61 pub namespace: String,
63 #[serde(default)]
65 pub db_path: Option<String>,
66 #[serde(default = "default_dimension")]
68 pub dimension: usize,
69 #[serde(default)]
71 pub embedding_config: EmbeddingConfig,
72 #[serde(default)]
74 pub enable_bm25: bool,
75 #[serde(default)]
77 pub bm25_config: Option<BM25Config>,
78 #[serde(default = "default_enable_hybrid")]
80 pub enable_hybrid: bool,
81 #[serde(default)]
83 pub hybrid_config: Option<HybridConfig>,
84}
85
86fn default_enable_hybrid() -> bool {
87 true }
89
90fn default_dimension() -> usize {
91 DEFAULT_REQUIRED_DIMENSION
92}
93
94impl Default for MemexConfig {
95 fn default() -> Self {
96 Self {
97 app_name: "memex".to_string(),
98 namespace: "default".to_string(),
99 db_path: None,
100 dimension: default_dimension(),
101 embedding_config: EmbeddingConfig::default(),
102 enable_bm25: false,
103 bm25_config: None,
104 enable_hybrid: default_enable_hybrid(),
105 hybrid_config: None,
106 }
107 }
108}
109
110impl MemexConfig {
111 pub fn new(app_name: impl Into<String>, namespace: impl Into<String>) -> Self {
113 Self {
114 app_name: app_name.into(),
115 namespace: namespace.into(),
116 ..Default::default()
117 }
118 }
119
120 pub fn with_db_path(mut self, path: impl Into<String>) -> Self {
122 self.db_path = Some(path.into());
123 self
124 }
125
126 pub fn with_dimension(mut self, dimension: usize) -> Self {
128 self.dimension = dimension;
129 self.embedding_config.required_dimension = dimension;
130 self
131 }
132
133 pub fn with_embedding_config(mut self, config: EmbeddingConfig) -> Self {
135 self.dimension = config.required_dimension;
136 self.embedding_config = config;
137 self
138 }
139
140 fn sync_dimension_fields(&mut self) -> Result<()> {
141 if self.dimension == self.embedding_config.required_dimension {
142 return Ok(());
143 }
144
145 let default_dim = default_dimension();
146 if self.dimension == default_dim {
147 self.dimension = self.embedding_config.required_dimension;
148 return Ok(());
149 }
150
151 if self.embedding_config.required_dimension == default_dim {
152 self.embedding_config.required_dimension = self.dimension;
153 return Ok(());
154 }
155
156 Err(anyhow!(
157 "MemexConfig.dimension={} conflicts with embedding_config.required_dimension={}. \
158 Set them to the same value or use with_dimension()/with_embedding_config() so one source of truth updates both.",
159 self.dimension,
160 self.embedding_config.required_dimension
161 ))
162 }
163
164 pub fn with_bm25(mut self, config: BM25Config) -> Self {
166 self.enable_bm25 = true;
167 self.bm25_config = Some(config);
168 self
169 }
170
171 pub fn effective_db_path(&self) -> String {
173 self.db_path
174 .clone()
175 .unwrap_or_else(|| format!("~/.rmcp-servers/{}/lancedb", self.app_name))
176 }
177
178 pub fn effective_bm25_path(&self) -> String {
180 self.bm25_config
181 .as_ref()
182 .map(|c| c.index_path.clone())
183 .unwrap_or_else(|| format!("~/.rmcp-servers/{}/bm25", self.app_name))
184 }
185
186 fn hybrid_uses_bm25(&self) -> bool {
187 self.enable_hybrid
188 && self.hybrid_config.clone().unwrap_or_default().mode != SearchMode::Vector
189 }
190
191 fn normalize_bm25_config(&self, mut config: BM25Config) -> BM25Config {
192 if config.index_path == BM25Config::default().index_path {
193 config.index_path = self.effective_bm25_path();
194 }
195 config
196 }
197
198 fn resolved_bm25_config(&self) -> Option<BM25Config> {
199 if !self.enable_bm25 && !self.hybrid_uses_bm25() {
200 return None;
201 }
202
203 let config = self
204 .bm25_config
205 .clone()
206 .or_else(|| {
207 self.hybrid_config
208 .as_ref()
209 .filter(|cfg| cfg.mode != SearchMode::Vector)
210 .map(|cfg| cfg.bm25.clone())
211 })
212 .unwrap_or_default();
213
214 Some(self.normalize_bm25_config(config))
215 }
216
217 fn resolved_hybrid_config(&self) -> HybridConfig {
218 let mut config = self.hybrid_config.clone().unwrap_or_default();
219 if let Some(bm25) = self.resolved_bm25_config() {
220 config.bm25 = bm25;
221 }
222 config
223 }
224}
225
226#[derive(Debug, Clone, Default, Serialize, Deserialize)]
231pub struct MetaFilter {
232 #[serde(skip_serializing_if = "Option::is_none")]
234 pub patient_id: Option<String>,
235 #[serde(skip_serializing_if = "Option::is_none")]
237 pub visit_id: Option<String>,
238 #[serde(skip_serializing_if = "Option::is_none")]
240 pub doc_type: Option<String>,
241 #[serde(skip_serializing_if = "Option::is_none")]
243 pub date_from: Option<String>,
244 #[serde(skip_serializing_if = "Option::is_none")]
246 pub date_to: Option<String>,
247 #[serde(default, skip_serializing_if = "Vec::is_empty")]
249 pub custom: Vec<(String, String)>,
250}
251
252impl MetaFilter {
253 pub fn for_patient(patient_id: impl Into<String>) -> Self {
255 Self {
256 patient_id: Some(patient_id.into()),
257 ..Default::default()
258 }
259 }
260
261 pub fn for_visit(visit_id: impl Into<String>) -> Self {
263 Self {
264 visit_id: Some(visit_id.into()),
265 ..Default::default()
266 }
267 }
268
269 pub fn with_custom(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
271 self.custom.push((key.into(), value.into()));
272 self
273 }
274
275 pub fn matches(&self, metadata: &Value) -> bool {
277 if let Some(ref patient_id) = self.patient_id
278 && metadata.get("patient_id").and_then(|v| v.as_str()) != Some(patient_id)
279 {
280 return false;
281 }
282
283 if let Some(ref visit_id) = self.visit_id
284 && metadata.get("visit_id").and_then(|v| v.as_str()) != Some(visit_id)
285 {
286 return false;
287 }
288
289 if let Some(ref doc_type) = self.doc_type
290 && metadata.get("doc_type").and_then(|v| v.as_str()) != Some(doc_type)
291 {
292 return false;
293 }
294
295 if let Some(ref date_from) = self.date_from
297 && let Some(doc_date) = metadata.get("date").and_then(|v| v.as_str())
298 && doc_date < date_from.as_str()
299 {
300 return false;
301 }
302
303 if let Some(ref date_to) = self.date_to
304 && let Some(doc_date) = metadata.get("date").and_then(|v| v.as_str())
305 && doc_date > date_to.as_str()
306 {
307 return false;
308 }
309
310 for (key, value) in &self.custom {
312 if metadata.get(key).and_then(|v| v.as_str()) != Some(value) {
313 return false;
314 }
315 }
316
317 true
318 }
319}
320
321#[derive(Debug, Clone, Serialize, Deserialize)]
323pub struct StoreItem {
324 pub id: String,
326 pub text: String,
328 #[serde(default)]
330 pub metadata: Value,
331}
332
333impl StoreItem {
334 pub fn new(id: impl Into<String>, text: impl Into<String>) -> Self {
336 Self {
337 id: id.into(),
338 text: text.into(),
339 metadata: Value::Object(serde_json::Map::new()),
340 }
341 }
342
343 pub fn with_metadata(mut self, metadata: Value) -> Self {
345 self.metadata = metadata;
346 self
347 }
348}
349
350#[derive(Debug, Clone)]
352pub struct BatchResult {
353 pub success_count: usize,
355 pub failure_count: usize,
357 pub failed_ids: Vec<String>,
359}
360
361#[derive(Debug, Clone, Serialize, Deserialize)]
363pub struct LayerStats {
364 pub total_chunks: usize,
366 pub avg_score: f32,
368 pub top_keywords: Vec<String>,
370}
371
372impl LayerStats {
373 pub fn empty() -> Self {
375 Self {
376 total_chunks: 0,
377 avg_score: 0.0,
378 top_keywords: vec![],
379 }
380 }
381
382 pub fn from_results(results: &[SearchResult]) -> Self {
384 if results.is_empty() {
385 return Self::empty();
386 }
387
388 let total_chunks = results.len();
389 let avg_score = results.iter().map(|r| r.score).sum::<f32>() / total_chunks as f32;
390
391 let mut keyword_counts: HashMap<String, usize> = HashMap::new();
393 for result in results {
394 for keyword in &result.keywords {
395 *keyword_counts.entry(keyword.clone()).or_insert(0) += 1;
396 }
397 }
398
399 let mut keywords: Vec<_> = keyword_counts.into_iter().collect();
401 keywords.sort_by(|a, b| b.1.cmp(&a.1));
402 let top_keywords = keywords.into_iter().take(10).map(|(k, _)| k).collect();
403
404 Self {
405 total_chunks,
406 avg_score,
407 top_keywords,
408 }
409 }
410}
411
412#[derive(Debug, Clone, Serialize, Deserialize)]
414pub struct DiveResult {
415 pub layer: SliceLayer,
417 pub results: Vec<SearchResult>,
419 pub layer_stats: LayerStats,
421}
422
423pub struct MemexEngine {
428 storage: Arc<StorageManager>,
429 embeddings: Arc<Mutex<EmbeddingClient>>,
430 bm25: Option<Arc<BM25Index>>,
431 hybrid_searcher: Option<HybridSearcher>,
432 namespace: String,
433 config: MemexConfig,
434}
435
436impl MemexEngine {
437 pub async fn new(mut config: MemexConfig) -> Result<Self> {
447 config.sync_dimension_fields()?;
448 let db_path = config.effective_db_path();
449
450 info!(
451 "Initializing MemexEngine: app={}, namespace={}, db={}",
452 config.app_name, config.namespace, db_path
453 );
454
455 let storage = StorageManager::new_lance_only(&db_path).await?;
457 storage.ensure_collection().await?;
458
459 let embeddings = EmbeddingClient::new(&config.embedding_config).await?;
461
462 info!(
463 "Connected to embedding provider: {} (dim={})",
464 embeddings.connected_to(),
465 embeddings.required_dimension()
466 );
467
468 let bm25 = config
470 .resolved_bm25_config()
471 .map(|bm25_config| BM25Index::new(&bm25_config).map(Arc::new))
472 .transpose()?;
473
474 let storage_arc = Arc::new(storage);
475
476 let hybrid_searcher = if config.enable_hybrid {
478 let hybrid_config = config.resolved_hybrid_config();
479 Some(if let Some(ref bm25_index) = bm25 {
480 HybridSearcher::with_bm25_index(
481 storage_arc.clone(),
482 bm25_index.clone(),
483 hybrid_config,
484 )
485 } else {
486 HybridSearcher::new(storage_arc.clone(), hybrid_config).await?
487 })
488 } else {
489 None
490 };
491
492 Ok(Self {
493 storage: storage_arc,
494 embeddings: Arc::new(Mutex::new(embeddings)),
495 bm25,
496 hybrid_searcher,
497 namespace: config.namespace.clone(),
498 config,
499 })
500 }
501
502 pub async fn for_app(app_name: &str, namespace: &str) -> Result<Self> {
512 let config = MemexConfig::new(app_name, namespace);
513 Self::new(config).await
514 }
515
516 pub async fn for_vista() -> Result<Self> {
526 use crate::embeddings::ProviderConfig;
527
528 let config = MemexConfig {
529 app_name: "vista".to_string(),
530 namespace: "default".to_string(),
531 db_path: Some("~/.rmcp-servers/vista/lancedb".to_string()),
532 dimension: 1024,
533 embedding_config: EmbeddingConfig {
534 required_dimension: 1024,
535 providers: vec![ProviderConfig {
536 name: "ollama-vista".to_string(),
537 base_url: "http://localhost:11434".to_string(),
538 model: "qwen3-embedding:0.6b".to_string(),
539 priority: 1,
540 endpoint: "/v1/embeddings".to_string(),
541 }],
542 ..EmbeddingConfig::default()
543 },
544 enable_bm25: false,
545 bm25_config: None,
546 enable_hybrid: true, hybrid_config: None,
548 };
549 Self::new(config).await
550 }
551
552 pub fn namespace(&self) -> &str {
554 &self.namespace
555 }
556
557 pub fn config(&self) -> &MemexConfig {
559 &self.config
560 }
561
562 pub fn storage(&self) -> Arc<StorageManager> {
564 self.storage.clone()
565 }
566
567 pub async fn store(&self, id: &str, text: &str, metadata: Value) -> Result<()> {
590 debug!("Storing document: id={}, text_len={}", id, text.len());
591
592 let embedding = self.embeddings.lock().await.embed(text).await?;
594
595 let doc = ChromaDocument::new_flat(
597 id.to_string(),
598 self.namespace.clone(),
599 embedding,
600 metadata.clone(),
601 text.to_string(),
602 );
603
604 self.storage.add_to_store(vec![doc]).await?;
606
607 if let Some(ref bm25) = self.bm25 {
609 bm25.add_documents(&[(id.to_string(), self.namespace.clone(), text.to_string())])
610 .await?;
611 }
612
613 debug!("Stored document: id={}", id);
614 Ok(())
615 }
616
617 pub async fn search(&self, query: &str, limit: usize) -> Result<Vec<SearchResult>> {
634 debug!("Searching: query='{}', limit={}", query, limit);
635
636 let query_embedding = self.embeddings.lock().await.embed(query).await?;
638
639 let candidates = self
641 .storage
642 .search_store(Some(&self.namespace), query_embedding, limit)
643 .await?;
644
645 let results: Vec<SearchResult> = candidates
647 .into_iter()
648 .enumerate()
649 .map(|(idx, doc)| {
650 let score = 1.0 - (idx as f32 / (limit as f32 + 1.0));
652 let layer = doc.slice_layer();
653 SearchResult {
654 id: doc.id,
655 namespace: doc.namespace,
656 text: doc.document,
657 score,
658 metadata: doc.metadata,
659 layer,
660 parent_id: doc.parent_id,
661 children_ids: doc.children_ids,
662 keywords: doc.keywords,
663 }
664 })
665 .collect();
666
667 debug!("Search returned {} results", results.len());
668 Ok(results)
669 }
670
671 pub async fn search_hybrid(
686 &self,
687 query: &str,
688 limit: usize,
689 ) -> Result<Vec<HybridSearchResult>> {
690 debug!("Hybrid search: query='{}', limit={}", query, limit);
691
692 let hybrid = self.hybrid_searcher.as_ref().ok_or_else(|| {
693 anyhow!("Hybrid search not enabled. Set enable_hybrid: true in MemexConfig.")
694 })?;
695
696 let query_embedding = self.embeddings.lock().await.embed(query).await?;
698
699 let results = hybrid
701 .search(
702 query,
703 query_embedding,
704 Some(&self.namespace),
705 limit,
706 SearchOptions::default(),
707 )
708 .await?;
709
710 debug!("Hybrid search returned {} results", results.len());
711 Ok(results)
712 }
713
714 pub async fn search_with_mode(
727 &self,
728 query: &str,
729 limit: usize,
730 mode: SearchMode,
731 ) -> Result<Vec<HybridSearchResult>> {
732 debug!("Search with mode: query='{}', mode={:?}", query, mode);
733
734 match mode {
735 SearchMode::Vector => {
736 let results = self.search(query, limit).await?;
738 Ok(results
739 .into_iter()
740 .map(|r| HybridSearchResult {
741 id: r.id,
742 namespace: r.namespace,
743 document: r.text,
744 combined_score: r.score,
745 vector_score: Some(r.score),
746 bm25_score: None,
747 metadata: r.metadata,
748 layer: r.layer,
749 parent_id: r.parent_id,
750 children_ids: r.children_ids,
751 keywords: r.keywords,
752 })
753 .collect())
754 }
755 SearchMode::Keyword | SearchMode::Hybrid => {
756 self.search_hybrid(query, limit).await
758 }
759 }
760 }
761
762 pub async fn get(&self, id: &str) -> Result<Option<SearchResult>> {
772 debug!("Getting document: id={}", id);
773
774 if let Some(doc) = self.storage.get_document(&self.namespace, id).await? {
775 let layer = doc.slice_layer();
776 return Ok(Some(SearchResult {
777 id: doc.id,
778 namespace: doc.namespace,
779 text: doc.document,
780 score: 1.0,
781 metadata: doc.metadata,
782 layer,
783 parent_id: doc.parent_id,
784 children_ids: doc.children_ids,
785 keywords: doc.keywords,
786 }));
787 }
788
789 Ok(None)
790 }
791
792 pub async fn delete(&self, id: &str) -> Result<bool> {
804 debug!("Deleting document: id={}", id);
805
806 let deleted = self.storage.delete_document(&self.namespace, id).await?;
807
808 if let Some(ref bm25) = self.bm25 {
810 bm25.delete_documents(&[id.to_string()]).await?;
811 }
812
813 Ok(deleted > 0)
814 }
815
816 pub async fn store_batch(&self, items: Vec<StoreItem>) -> Result<BatchResult> {
836 if items.is_empty() {
837 return Ok(BatchResult {
838 success_count: 0,
839 failure_count: 0,
840 failed_ids: vec![],
841 });
842 }
843
844 info!("Batch storing {} documents", items.len());
845
846 let texts: Vec<String> = items.iter().map(|i| i.text.clone()).collect();
848
849 let embeddings = self.embeddings.lock().await.embed_batch(&texts).await?;
851
852 let mut docs = Vec::with_capacity(items.len());
854 let mut bm25_docs = Vec::new();
855
856 for (item, embedding) in items.iter().zip(embeddings.into_iter()) {
857 let doc = ChromaDocument::new_flat(
858 item.id.clone(),
859 self.namespace.clone(),
860 embedding,
861 item.metadata.clone(),
862 item.text.clone(),
863 );
864 docs.push(doc);
865
866 if self.bm25.is_some() {
867 bm25_docs.push((item.id.clone(), self.namespace.clone(), item.text.clone()));
868 }
869 }
870
871 self.storage.add_to_store(docs).await?;
873
874 if let Some(ref bm25) = self.bm25 {
876 bm25.add_documents(&bm25_docs).await?;
877 }
878
879 Ok(BatchResult {
880 success_count: items.len(),
881 failure_count: 0,
882 failed_ids: vec![],
883 })
884 }
885
886 pub async fn search_filtered(
901 &self,
902 query: &str,
903 filter: MetaFilter,
904 limit: usize,
905 ) -> Result<Vec<SearchResult>> {
906 let candidates = self.search(query, limit * 3).await?;
908
909 let filtered: Vec<SearchResult> = candidates
911 .into_iter()
912 .filter(|r| filter.matches(&r.metadata))
913 .take(limit)
914 .collect();
915
916 debug!(
917 "Filtered search: query='{}', filter={:?}, results={}",
918 query,
919 filter,
920 filtered.len()
921 );
922
923 Ok(filtered)
924 }
925
926 pub async fn delete_by_filter(&self, filter: MetaFilter) -> Result<usize> {
939 info!("Deleting documents by filter: {:?}", filter);
940
941 let mut deleted_count = 0;
949 let mut deleted_ids = Vec::new();
950
951 const BATCH_SIZE: usize = 1000;
954
955 let candidates = self
956 .storage
957 .all_documents(Some(&self.namespace), BATCH_SIZE)
958 .await?;
959
960 for doc in candidates {
961 if filter.matches(&doc.metadata) {
962 self.storage
963 .delete_document(&self.namespace, &doc.id)
964 .await?;
965 deleted_ids.push(doc.id);
966 deleted_count += 1;
967 }
968 }
969
970 if let Some(ref bm25) = self.bm25
972 && !deleted_ids.is_empty()
973 {
974 bm25.delete_documents(&deleted_ids).await?;
975 }
976
977 info!("Deleted {} documents by filter", deleted_count);
978 Ok(deleted_count)
979 }
980
981 pub async fn purge_namespace(&self) -> Result<usize> {
985 info!("Purging namespace: {}", self.namespace);
986
987 let deleted = self
988 .storage
989 .delete_namespace_documents(&self.namespace)
990 .await?;
991
992 if let Some(ref bm25) = self.bm25 {
993 bm25.delete_namespace_term(&self.namespace).await?;
994 }
995
996 Ok(deleted)
997 }
998
999 #[deprecated(
1012 since = "0.3.1",
1013 note = "Use search_hybrid() with HybridSearcher instead"
1014 )]
1015 pub async fn search_bm25_fusion(
1016 &self,
1017 query: &str,
1018 limit: usize,
1019 bm25_weight: f32,
1020 ) -> Result<Vec<SearchResult>> {
1021 let bm25 = self
1022 .bm25
1023 .as_ref()
1024 .ok_or_else(|| anyhow!("BM25 not enabled. Set enable_bm25: true in MemexConfig."))?;
1025
1026 let bm25_results = bm25.search(query, Some(&self.namespace), limit * 2)?;
1028 let bm25_max_score = bm25_results.first().map(|(_, _, s)| *s).unwrap_or(1.0);
1029
1030 let vector_results = self.search(query, limit * 2).await?;
1032
1033 use std::collections::HashMap;
1035 let mut scores: HashMap<String, (f32, Option<SearchResult>)> = HashMap::new();
1036
1037 for (id, _namespace, score) in bm25_results {
1039 let normalized = score / bm25_max_score.max(0.001);
1040 scores.insert(id, (normalized * bm25_weight, None));
1041 }
1042
1043 let vector_weight = 1.0 - bm25_weight;
1045 for result in vector_results {
1046 let entry = scores.entry(result.id.clone()).or_insert((0.0, None));
1047 entry.0 += result.score * vector_weight;
1048 entry.1 = Some(result);
1049 }
1050
1051 let mut combined: Vec<_> = scores
1053 .into_iter()
1054 .filter_map(|(_id, (score, result))| {
1055 result.map(|mut r| {
1057 r.score = score;
1058 r
1059 })
1060 })
1061 .collect();
1062
1063 combined.sort_by(|a, b| {
1064 b.score
1065 .partial_cmp(&a.score)
1066 .unwrap_or(std::cmp::Ordering::Equal)
1067 });
1068 combined.truncate(limit);
1069
1070 Ok(combined)
1071 }
1072}
1073
1074#[cfg(test)]
1075mod tests {
1076 use super::*;
1077
1078 #[test]
1079 fn test_meta_filter_matches() {
1080 let filter = MetaFilter::for_patient("P-123");
1081
1082 let matching = serde_json::json!({
1083 "patient_id": "P-123",
1084 "visit_id": "V-456"
1085 });
1086 assert!(filter.matches(&matching));
1087
1088 let not_matching = serde_json::json!({
1089 "patient_id": "P-999",
1090 "visit_id": "V-456"
1091 });
1092 assert!(!filter.matches(¬_matching));
1093 }
1094
1095 #[test]
1096 fn test_meta_filter_custom() {
1097 let filter = MetaFilter::default()
1098 .with_custom("doc_type", "soap_note")
1099 .with_custom("status", "active");
1100
1101 let matching = serde_json::json!({
1102 "doc_type": "soap_note",
1103 "status": "active"
1104 });
1105 assert!(filter.matches(&matching));
1106
1107 let missing_field = serde_json::json!({
1108 "doc_type": "soap_note"
1109 });
1110 assert!(!filter.matches(&missing_field));
1111 }
1112
1113 #[test]
1114 fn test_memex_config_defaults() {
1115 let config = MemexConfig::default();
1116 assert_eq!(config.dimension, DEFAULT_REQUIRED_DIMENSION);
1117 assert_eq!(
1118 config.embedding_config.required_dimension,
1119 DEFAULT_REQUIRED_DIMENSION
1120 );
1121 assert_eq!(config.namespace, "default");
1122 assert_eq!(config.effective_db_path(), "~/.rmcp-servers/memex/lancedb");
1123 }
1124
1125 #[test]
1126 fn test_memex_config_builder() {
1127 let config = MemexConfig::new("vista", "patients")
1128 .with_dimension(1024)
1129 .with_db_path("/custom/path/db");
1130
1131 assert_eq!(config.app_name, "vista");
1132 assert_eq!(config.namespace, "patients");
1133 assert_eq!(config.dimension, 1024);
1134 assert_eq!(config.embedding_config.required_dimension, 1024);
1135 assert_eq!(config.effective_db_path(), "/custom/path/db");
1136 }
1137
1138 #[test]
1139 fn test_memex_config_with_embedding_config_syncs_dimension() {
1140 let embedding_config = EmbeddingConfig {
1141 required_dimension: 768,
1142 ..EmbeddingConfig::default()
1143 };
1144
1145 let config = MemexConfig::new("sync-test", "ns").with_embedding_config(embedding_config);
1146
1147 assert_eq!(config.dimension, 768);
1148 assert_eq!(config.embedding_config.required_dimension, 768);
1149 }
1150
1151 #[test]
1152 fn test_memex_config_sync_dimension_fields_uses_non_default_embedding_dimension() {
1153 let mut config = MemexConfig::default();
1154 config.embedding_config.required_dimension = 1024;
1155
1156 config.sync_dimension_fields().unwrap();
1157
1158 assert_eq!(config.dimension, 1024);
1159 assert_eq!(config.embedding_config.required_dimension, 1024);
1160 }
1161
1162 #[test]
1163 fn test_memex_config_sync_dimension_fields_rejects_true_conflict() {
1164 let mut config = MemexConfig {
1165 dimension: 768,
1166 ..MemexConfig::default()
1167 };
1168 config.embedding_config.required_dimension = 1024;
1169
1170 let err = config.sync_dimension_fields().unwrap_err().to_string();
1171 assert!(err.contains("conflicts with embedding_config.required_dimension"));
1172 }
1173
1174 #[test]
1175 fn test_store_item() {
1176 let item = StoreItem::new("doc-1", "Hello world")
1177 .with_metadata(serde_json::json!({"type": "greeting"}));
1178
1179 assert_eq!(item.id, "doc-1");
1180 assert_eq!(item.text, "Hello world");
1181 assert_eq!(item.metadata["type"], "greeting");
1182 }
1183
1184 #[test]
1185 fn test_store_item_default_metadata() {
1186 let item = StoreItem::new("doc-1", "Hello world");
1187
1188 assert_eq!(item.id, "doc-1");
1189 assert_eq!(item.text, "Hello world");
1190 assert!(item.metadata.is_object());
1191 assert!(item.metadata.as_object().unwrap().is_empty());
1192 }
1193
1194 #[test]
1195 fn test_meta_filter_empty_matches_all() {
1196 let filter = MetaFilter::default();
1197
1198 let any_metadata = serde_json::json!({
1200 "patient_id": "P-123",
1201 "visit_id": "V-456",
1202 "random_field": "value"
1203 });
1204 assert!(filter.matches(&any_metadata));
1205
1206 let empty = serde_json::json!({});
1208 assert!(filter.matches(&empty));
1209 }
1210
1211 #[test]
1212 fn test_meta_filter_date_range() {
1213 let filter = MetaFilter {
1214 date_from: Some("2024-01-01".to_string()),
1215 date_to: Some("2024-12-31".to_string()),
1216 ..Default::default()
1217 };
1218
1219 let in_range = serde_json::json!({
1221 "date": "2024-06-15"
1222 });
1223 assert!(filter.matches(&in_range));
1224
1225 let before = serde_json::json!({
1227 "date": "2023-12-31"
1228 });
1229 assert!(!filter.matches(&before));
1230
1231 let after = serde_json::json!({
1233 "date": "2025-01-01"
1234 });
1235 assert!(!filter.matches(&after));
1236
1237 let no_date = serde_json::json!({
1239 "patient_id": "P-123"
1240 });
1241 assert!(filter.matches(&no_date));
1242 }
1243
1244 #[test]
1245 fn test_meta_filter_for_visit() {
1246 let filter = MetaFilter::for_visit("V-789");
1247
1248 let matching = serde_json::json!({
1249 "visit_id": "V-789",
1250 "patient_id": "P-123"
1251 });
1252 assert!(filter.matches(&matching));
1253
1254 let not_matching = serde_json::json!({
1255 "visit_id": "V-other",
1256 "patient_id": "P-123"
1257 });
1258 assert!(!filter.matches(¬_matching));
1259 }
1260
1261 #[test]
1262 fn test_meta_filter_combined() {
1263 let filter = MetaFilter {
1264 patient_id: Some("P-123".to_string()),
1265 doc_type: Some("soap_note".to_string()),
1266 ..Default::default()
1267 };
1268
1269 let both_match = serde_json::json!({
1271 "patient_id": "P-123",
1272 "doc_type": "soap_note"
1273 });
1274 assert!(filter.matches(&both_match));
1275
1276 let wrong_type = serde_json::json!({
1278 "patient_id": "P-123",
1279 "doc_type": "prescription"
1280 });
1281 assert!(!filter.matches(&wrong_type));
1282
1283 let missing = serde_json::json!({
1285 "patient_id": "P-123"
1286 });
1287 assert!(!filter.matches(&missing));
1288 }
1289
1290 #[test]
1291 fn test_batch_result_struct() {
1292 let result = BatchResult {
1293 success_count: 10,
1294 failure_count: 2,
1295 failed_ids: vec!["doc-5".to_string(), "doc-8".to_string()],
1296 };
1297
1298 assert_eq!(result.success_count, 10);
1299 assert_eq!(result.failure_count, 2);
1300 assert_eq!(result.failed_ids.len(), 2);
1301 assert!(result.failed_ids.contains(&"doc-5".to_string()));
1302 }
1303
1304 #[test]
1305 fn test_memex_config_with_bm25() {
1306 use crate::search::BM25Config;
1307
1308 let bm25_config = BM25Config::default();
1309 let config = MemexConfig::new("test-app", "docs").with_bm25(bm25_config);
1310
1311 assert!(config.enable_bm25);
1312 assert!(config.bm25_config.is_some());
1313 }
1314
1315 #[test]
1316 fn test_memex_config_effective_bm25_path() {
1317 let config = MemexConfig::new("my-app", "docs");
1318 assert_eq!(config.effective_bm25_path(), "~/.rmcp-servers/my-app/bm25");
1319 }
1320
1321 #[test]
1322 fn test_resolved_bm25_config_uses_app_specific_path_for_hybrid_defaults() {
1323 let config = MemexConfig::new("my-app", "docs");
1324 let bm25 = config
1325 .resolved_bm25_config()
1326 .expect("hybrid defaults should provision BM25");
1327
1328 assert_eq!(bm25.index_path, "~/.rmcp-servers/my-app/bm25");
1329 }
1330
1331 #[test]
1332 fn test_resolved_hybrid_config_reuses_resolved_bm25_path() {
1333 let config = MemexConfig::new("my-app", "docs");
1334 let hybrid = config.resolved_hybrid_config();
1335
1336 assert_eq!(hybrid.bm25.index_path, "~/.rmcp-servers/my-app/bm25");
1337 }
1338
1339 #[test]
1340 fn test_meta_filter_serialization() {
1341 let filter = MetaFilter::for_patient("P-123").with_custom("status", "active");
1342
1343 let json = serde_json::to_string(&filter).unwrap();
1344 let deserialized: MetaFilter = serde_json::from_str(&json).unwrap();
1345
1346 assert_eq!(deserialized.patient_id, Some("P-123".to_string()));
1347 assert_eq!(deserialized.custom.len(), 1);
1348 assert_eq!(
1349 deserialized.custom[0],
1350 ("status".to_string(), "active".to_string())
1351 );
1352 }
1353
1354 #[test]
1355 fn test_memex_config_serialization() {
1356 let config = MemexConfig::new("test", "ns")
1357 .with_dimension(512)
1358 .with_db_path("/tmp/test");
1359
1360 let json = serde_json::to_string(&config).unwrap();
1361 let deserialized: MemexConfig = serde_json::from_str(&json).unwrap();
1362
1363 assert_eq!(deserialized.app_name, "test");
1364 assert_eq!(deserialized.namespace, "ns");
1365 assert_eq!(deserialized.dimension, 512);
1366 assert_eq!(deserialized.embedding_config.required_dimension, 512);
1367 assert_eq!(deserialized.db_path, Some("/tmp/test".to_string()));
1368 }
1369
1370 #[test]
1371 fn test_store_item_serialization() {
1372 let item =
1373 StoreItem::new("id-1", "content").with_metadata(serde_json::json!({"key": "value"}));
1374
1375 let json = serde_json::to_string(&item).unwrap();
1376 let deserialized: StoreItem = serde_json::from_str(&json).unwrap();
1377
1378 assert_eq!(deserialized.id, "id-1");
1379 assert_eq!(deserialized.text, "content");
1380 assert_eq!(deserialized.metadata["key"], "value");
1381 }
1382}