1pub mod adaptive;
7pub mod bm25;
9pub mod explained;
11mod types;
13pub use types::*;
14pub mod causal_analysis;
16pub mod enriched;
18#[cfg(feature = "pagerank")]
20pub mod hipporag_ppr;
21pub mod hybrid;
23pub mod pagerank_retrieval;
24pub mod symbolic_anchoring;
26
27#[cfg(feature = "parallel-processing")]
28use crate::parallel::ParallelProcessor;
29use crate::{
30 config::Config,
31 core::{ChunkId, EntityId, KnowledgeGraph},
32 summarization::DocumentTree,
33 vector::{EmbeddingGenerator, VectorUtils},
34 Result,
35};
36use std::collections::{HashMap, HashSet};
37
38pub use bm25::{BM25Result, BM25Retriever, Document as BM25Document};
39pub use enriched::{EnrichedRetrievalConfig, EnrichedRetriever};
40pub use explained::{ExplainedAnswer, ReasoningStep, SourceReference, SourceType};
41pub use hybrid::{FusionMethod, HybridConfig, HybridRetriever, HybridSearchResult};
42
43#[cfg(feature = "pagerank")]
44pub use pagerank_retrieval::{PageRankRetrievalSystem, ScoredResult};
45
46#[cfg(feature = "pagerank")]
47pub use hipporag_ppr::{Fact, HippoRAGConfig, HippoRAGRetriever};
48
49use crate::vector::store::VectorStore;
50
51pub struct RetrievalSystem {
53 vector_store: std::sync::Arc<dyn VectorStore>,
54 embedding_generator: EmbeddingGenerator,
55 config: RetrievalConfig,
56 #[cfg(feature = "parallel-processing")]
57 parallel_processor: Option<ParallelProcessor>,
58 #[cfg(feature = "pagerank")]
59 pagerank_retriever: Option<PageRankRetrievalSystem>,
60 enriched_retriever: Option<EnrichedRetriever>,
61 #[cfg(feature = "lazygraphrag")]
62 concept_filtering_enabled: bool,
63}
64
65impl RetrievalSystem {
66 pub fn new(config: &Config) -> Result<Self> {
68 let retrieval_config = RetrievalConfig {
69 top_k: config.retrieval.top_k,
70 similarity_threshold: 0.35,
71 max_expansion_depth: 2,
72 entity_weight: 0.4,
73 chunk_weight: 0.4,
74 graph_weight: 0.2,
75 #[cfg(feature = "lazygraphrag")]
76 use_concept_filtering: false,
77 #[cfg(feature = "lazygraphrag")]
78 concept_top_k: 20,
79 };
80
81 let vector_store =
84 std::sync::Arc::new(crate::vector::memory_store::MemoryVectorStore::new());
85
86 Ok(Self {
87 vector_store,
88 embedding_generator: EmbeddingGenerator::new(128), config: retrieval_config,
90 #[cfg(feature = "parallel-processing")]
91 parallel_processor: None,
92 #[cfg(feature = "pagerank")]
93 pagerank_retriever: None,
94 enriched_retriever: None,
95 #[cfg(feature = "lazygraphrag")]
96 concept_filtering_enabled: false,
97 })
98 }
99}
100
101impl RetrievalSystem {
102 #[cfg(feature = "parallel-processing")]
104 pub fn with_parallel_processing(
105 vector_store: std::sync::Arc<dyn VectorStore>,
106 embedding_generator: EmbeddingGenerator,
107 parallel_processor: ParallelProcessor,
108 ) -> Result<Self> {
109 let retrieval_config = RetrievalConfig::default();
114
115 Ok(Self {
116 vector_store,
117 embedding_generator,
118 config: retrieval_config,
119 parallel_processor: Some(parallel_processor),
120 #[cfg(feature = "pagerank")]
121 pagerank_retriever: None,
122 enriched_retriever: None,
123 #[cfg(feature = "lazygraphrag")]
124 concept_filtering_enabled: false,
125 })
126 }
127
128 pub async fn index_graph(&self, graph: &KnowledgeGraph) -> Result<()> {
130 for entity in graph.entities() {
132 if let Some(embedding) = &entity.embedding {
133 let id = format!("entity:{}", entity.id);
134 self.vector_store
136 .add_vector(&id, embedding.clone(), HashMap::new())
137 .await?;
138 }
139 }
140
141 for chunk in graph.chunks() {
143 if let Some(embedding) = &chunk.embedding {
144 let id = format!("chunk:{}", chunk.id);
145 self.vector_store
146 .add_vector(&id, embedding.clone(), HashMap::new())
147 .await?;
148 }
149 }
150
151 self.vector_store.initialize().await?;
153
154 Ok(())
155 }
156
157 #[cfg(feature = "pagerank")]
159 pub fn initialize_pagerank(&mut self, graph: &KnowledgeGraph) -> Result<()> {
160 use crate::graph::pagerank::{PageRankConfig, ScoreWeights};
161
162 #[cfg(feature = "tracing")]
163 tracing::debug!("Initializing high-performance PageRank retrieval system...");
164
165 let pagerank_config = PageRankConfig {
166 damping_factor: 0.85,
167 max_iterations: 50, tolerance: 1e-5, personalized: true,
170 #[cfg(feature = "parallel-processing")]
171 parallel_enabled: self.parallel_processor.is_some(),
172 #[cfg(not(feature = "parallel-processing"))]
173 parallel_enabled: false,
174 cache_size: 2000, sparse_threshold: 500,
176 incremental_updates: true,
177 simd_block_size: 64, };
179
180 let score_weights = ScoreWeights {
181 vector_weight: 0.3,
182 pagerank_weight: 0.5, chunk_weight: 0.15,
184 relationship_weight: 0.05,
185 };
186
187 let mut pagerank_retriever = PageRankRetrievalSystem::new(self.config.top_k)
188 .with_pagerank_config(pagerank_config)
189 .with_score_weights(score_weights)
190 .with_incremental_mode(true)
191 .with_min_threshold(0.05);
192
193 pagerank_retriever.precompute_global_pagerank(graph)?;
198
199 self.pagerank_retriever = Some(pagerank_retriever);
200
201 #[cfg(feature = "tracing")]
202 tracing::debug!("PageRank retrieval system initialized with 27x performance optimizations");
203 Ok(())
204 }
205
206 pub fn initialize_enriched(&mut self, config: Option<EnrichedRetrievalConfig>) -> Result<()> {
208 #[cfg(feature = "tracing")]
209 tracing::debug!("Initializing enriched metadata-aware retrieval system...");
210
211 let enriched_config = config.unwrap_or_default();
212 let enriched_retriever = EnrichedRetriever::with_config(enriched_config);
213
214 self.enriched_retriever = Some(enriched_retriever);
215
216 #[cfg(feature = "tracing")]
217 tracing::debug!("Enriched retrieval system initialized with metadata boosting");
218 Ok(())
219 }
220
221 #[cfg(feature = "pagerank")]
223 pub fn pagerank_query(
224 &self,
225 query: &str,
226 graph: &KnowledgeGraph,
227 max_results: Option<usize>,
228 ) -> Result<Vec<ScoredResult>> {
229 if let Some(pagerank_retriever) = &self.pagerank_retriever {
230 pagerank_retriever.search_with_pagerank(query, graph, max_results)
231 } else {
232 Err(crate::core::GraphRAGError::Retrieval {
233 message: "PageRank retriever not initialized. Call initialize_pagerank() first."
234 .to_string(),
235 })
236 }
237 }
238
239 #[cfg(feature = "pagerank")]
241 pub fn pagerank_batch_query(
242 &self,
243 queries: &[&str],
244 graph: &KnowledgeGraph,
245 max_results_per_query: Option<usize>,
246 ) -> Result<Vec<Vec<ScoredResult>>> {
247 if let Some(pagerank_retriever) = &self.pagerank_retriever {
248 pagerank_retriever.batch_search(queries, graph, max_results_per_query)
249 } else {
250 Err(crate::core::GraphRAGError::Retrieval {
251 message: "PageRank retriever not initialized. Call initialize_pagerank() first."
252 .to_string(),
253 })
254 }
255 }
256
257 pub fn query(&self, query: &str) -> Result<Vec<String>> {
259 Ok(vec![format!("Results for query: {}", query)])
267 }
268
269 pub async fn hybrid_query(
271 &mut self,
272 query: &str,
273 graph: &KnowledgeGraph,
274 ) -> Result<Vec<SearchResult>> {
275 self.hybrid_query_with_trees(query, graph, &HashMap::new())
276 .await
277 }
278
279 pub async fn hybrid_query_with_trees(
281 &mut self,
282 query: &str,
283 graph: &KnowledgeGraph,
284 document_trees: &HashMap<crate::core::DocumentId, DocumentTree>,
285 ) -> Result<Vec<SearchResult>> {
286 let analysis = self.analyze_query(query, graph)?;
288
289 let query_embedding = self.embedding_generator.generate_embedding(query);
291
292 let mut results = self
294 .execute_adaptive_retrieval(query, &query_embedding, graph, document_trees, &analysis)
295 .await?;
296
297 if let Some(enriched_retriever) = &self.enriched_retriever {
299 results = enriched_retriever.boost_with_metadata(results, query, graph)?;
301
302 results = enriched_retriever.filter_by_structure(query, results, graph)?;
304 }
305
306 Ok(results)
307 }
308
309 pub async fn legacy_hybrid_query(
311 &mut self,
312 query: &str,
313 graph: &KnowledgeGraph,
314 ) -> Result<Vec<SearchResult>> {
315 let query_embedding = self.embedding_generator.generate_embedding(query);
317
318 let results = self.comprehensive_search(&query_embedding, graph).await?;
320
321 Ok(results)
322 }
323
324 pub async fn add_embeddings_to_graph(&mut self, graph: &mut KnowledgeGraph) -> Result<()> {
326 #[cfg(feature = "parallel-processing")]
327 if let Some(processor) = self.parallel_processor.clone() {
328 return self.add_embeddings_parallel(graph, &processor).await;
329 }
330
331 self.add_embeddings_sequential(graph).await
332 }
333
334 #[cfg(feature = "parallel-processing")]
336 async fn add_embeddings_parallel(
337 &mut self,
338 graph: &mut KnowledgeGraph,
339 processor: &ParallelProcessor,
340 ) -> Result<()> {
341 let mut chunk_texts = Vec::new();
343 let mut entity_texts = Vec::new();
344
345 for chunk in graph.chunks() {
347 if chunk.embedding.is_none() {
348 chunk_texts.push((chunk.id.clone(), chunk.content.clone()));
349 }
350 }
351
352 for entity in graph.entities() {
354 if entity.embedding.is_none() {
355 let entity_text = format!("{} {}", entity.name, entity.entity_type);
356 entity_texts.push((entity.id.clone(), entity_text));
357 }
358 }
359
360 let total_items = chunk_texts.len() + entity_texts.len();
365 if processor.should_use_parallel(total_items) {
366 #[cfg(feature = "tracing")]
367 tracing::debug!(
368 "Processing {total_items} embeddings with enhanced sequential approach"
369 );
370 }
371
372 for (chunk_id, text) in chunk_texts {
374 let embedding = self.embedding_generator.generate_embedding(&text);
375 if let Some(chunk) = graph.get_chunk_mut(&chunk_id) {
376 chunk.embedding = Some(embedding);
377 }
378 }
379
380 for (entity_id, text) in entity_texts {
382 let embedding = self.embedding_generator.generate_embedding(&text);
383 if let Some(entity) = graph.get_entity_mut(&entity_id) {
384 entity.embedding = Some(embedding);
385 }
386 }
387
388 self.index_graph(graph).await?;
390
391 Ok(())
392 }
393
394 #[cfg_attr(not(feature = "tracing"), allow(unused_assignments, unused_variables))]
396 async fn add_embeddings_sequential(&mut self, graph: &mut KnowledgeGraph) -> Result<()> {
397 let _total_chunks = graph.chunks().count();
399 let _total_entities = graph.entities().count();
400 let mut chunk_count = 0;
404 for chunk in graph.chunks_mut() {
405 if chunk.embedding.is_none() {
406 let embedding = self.embedding_generator.generate_embedding(&chunk.content);
407 chunk.embedding = Some(embedding);
408 chunk_count += 1;
409 }
410 }
411
412 let mut entity_count = 0;
414 for entity in graph.entities_mut() {
415 if entity.embedding.is_none() {
416 let entity_text = format!("{} {}", entity.name, entity.entity_type);
418 let embedding = self.embedding_generator.generate_embedding(&entity_text);
419 entity.embedding = Some(embedding);
420 entity_count += 1;
421 }
422 }
423
424 #[cfg(feature = "tracing")]
425 tracing::debug!(
426 "Generated embeddings for {chunk_count} chunks and {entity_count} entities"
427 );
428
429 self.index_graph(graph).await?;
432
433 Ok(())
434 }
435
436 #[cfg(feature = "parallel-processing")]
439 pub async fn batch_query(
440 &mut self,
441 queries: &[&str],
442 graph: &KnowledgeGraph,
443 ) -> Result<Vec<Vec<SearchResult>>> {
444 let processor =
445 self.parallel_processor
446 .as_ref()
447 .ok_or_else(|| crate::core::GraphRAGError::Config {
448 message: "Parallel processor not initialized".to_string(),
449 })?;
450
451 if !processor.should_use_parallel(queries.len()) {
452 let mut results = Vec::new();
453 for &query in queries {
454 results.push(self.hybrid_query(query, graph).await?);
455 }
456 return Ok(results);
457 }
458
459 let chunk_size = processor.config().chunk_batch_size.min(queries.len());
460 #[cfg(feature = "tracing")]
461 tracing::debug!(
462 "Processing {} queries with enhanced sequential approach (chunk size: {})",
463 queries.len(),
464 chunk_size
465 );
466
467 let mut all_results = Vec::new();
468 for &query in queries {
469 match self.hybrid_query(query, graph).await {
470 Ok(results) => all_results.push(results),
471 Err(e) => {
472 #[cfg(feature = "tracing")]
473 tracing::warn!("Error processing query '{query}': {e}");
474 all_results.push(Vec::new());
475 },
476 }
477 }
478
479 Ok(all_results)
480 }
481
482 #[cfg(not(feature = "parallel-processing"))]
484 pub async fn batch_query(
485 &mut self,
486 queries: &[&str],
487 graph: &KnowledgeGraph,
488 ) -> Result<Vec<Vec<SearchResult>>> {
489 let mut results = Vec::new();
490 for &query in queries {
491 results.push(self.hybrid_query(query, graph).await?);
492 }
493 Ok(results)
494 }
495
496 pub fn analyze_query(&self, query: &str, graph: &KnowledgeGraph) -> Result<QueryAnalysis> {
498 let query_lower = query.to_lowercase();
499 let words: Vec<&str> = query_lower.split_whitespace().collect();
500
501 let mut key_entities = Vec::new();
503 for entity in graph.entities() {
504 let entity_name_lower = entity.name.to_lowercase();
505 if words
506 .iter()
507 .any(|&word| entity_name_lower.contains(word) || word.contains(&entity_name_lower))
508 {
509 key_entities.push(entity.name.clone());
510 }
511 }
512
513 let concepts: Vec<String> = words
515 .iter()
516 .filter(|&&word| word.len() > 3 && !self.is_stop_word(word))
517 .filter(|&&word| {
518 !key_entities.iter().any(|entity| {
519 entity.to_lowercase().contains(word) || word.contains(&entity.to_lowercase())
520 })
521 })
522 .map(|&word| word.to_string())
523 .collect();
524
525 let query_type = if !key_entities.is_empty() && key_entities.len() > 1 {
527 QueryType::Relationship
528 } else if !key_entities.is_empty() {
529 QueryType::EntityFocused
530 } else if self.has_abstract_concepts(&words) {
531 QueryType::Conceptual
532 } else if self.has_question_words(&words) {
533 QueryType::Exploratory
534 } else {
535 QueryType::Factual
536 };
537
538 let intent = if words
540 .iter()
541 .any(|&w| ["overview", "summary", "general", "about"].contains(&w))
542 {
543 QueryIntent::Overview
544 } else if words
545 .iter()
546 .any(|&w| ["detailed", "specific", "exactly", "precise"].contains(&w))
547 {
548 QueryIntent::Detailed
549 } else if words
550 .iter()
551 .any(|&w| ["compare", "vs", "versus", "between", "difference"].contains(&w))
552 {
553 QueryIntent::Comparative
554 } else if words
555 .iter()
556 .any(|&w| ["cause", "why", "because", "lead", "result"].contains(&w))
557 {
558 QueryIntent::Causal
559 } else if words
560 .iter()
561 .any(|&w| ["when", "time", "before", "after", "during"].contains(&w))
562 {
563 QueryIntent::Temporal
564 } else {
565 QueryIntent::Detailed
566 };
567
568 let complexity_score = (words.len() as f32 * 0.1
570 + key_entities.len() as f32 * 0.3
571 + concepts.len() as f32 * 0.2)
572 .min(1.0);
573
574 Ok(QueryAnalysis {
575 query_type,
576 key_entities,
577 concepts,
578 intent,
579 complexity_score,
580 })
581 }
582
583 pub async fn execute_adaptive_retrieval(
585 &mut self,
586 query: &str,
587 query_embedding: &[f32],
588 graph: &KnowledgeGraph,
589 document_trees: &HashMap<crate::core::DocumentId, DocumentTree>,
590 analysis: &QueryAnalysis,
591 ) -> Result<Vec<SearchResult>> {
592 let mut all_results = Vec::new();
593
594 let (vector_weight, graph_weight, hierarchical_weight) =
596 self.calculate_strategy_weights(analysis);
597
598 if vector_weight > 0.0 {
600 let mut vector_results = self
601 .vector_similarity_search(query_embedding, graph)
602 .await?;
603 for result in &mut vector_results {
604 result.score *= vector_weight;
605 }
606 all_results.extend(vector_results);
607 }
608
609 if graph_weight > 0.0 {
611 let mut graph_results = match analysis.query_type {
612 QueryType::EntityFocused | QueryType::Relationship => {
613 self.entity_centric_search(query_embedding, graph, &analysis.key_entities)?
614 },
615 _ => self.entity_based_search(query_embedding, graph)?,
616 };
617 for result in &mut graph_results {
618 result.score *= graph_weight;
619 }
620 all_results.extend(graph_results);
621 }
622
623 if hierarchical_weight > 0.0 && !document_trees.is_empty() {
625 let mut hierarchical_results =
626 self.hierarchical_search(query, document_trees, analysis)?;
627 for result in &mut hierarchical_results {
628 result.score *= hierarchical_weight;
629 }
630 all_results.extend(hierarchical_results);
631 }
632
633 if analysis.complexity_score > 0.7 {
635 let traversal_results =
636 self.advanced_graph_traversal(query_embedding, graph, analysis)?;
637 all_results.extend(traversal_results);
638 }
639
640 let fusion_results = self.cross_strategy_fusion(&all_results, analysis)?;
642 all_results.extend(fusion_results);
643
644 let final_results = self.adaptive_rank_and_deduplicate(all_results, analysis)?;
646
647 Ok(final_results.into_iter().take(self.config.top_k).collect())
648 }
649
650 pub async fn comprehensive_search(
652 &self,
653 query_embedding: &[f32],
654 graph: &KnowledgeGraph,
655 ) -> Result<Vec<SearchResult>> {
656 let mut all_results = Vec::new();
657
658 let vector_results = self
660 .vector_similarity_search(query_embedding, graph)
661 .await?;
662 all_results.extend(vector_results);
663
664 let entity_results = self.entity_based_search(query_embedding, graph)?;
666 all_results.extend(entity_results);
667
668 let graph_results = self.graph_traversal_search(query_embedding, graph)?;
670 all_results.extend(graph_results);
671
672 let final_results = self.rank_and_deduplicate(all_results)?;
674
675 Ok(final_results.into_iter().take(self.config.top_k).collect())
676 }
677
678 async fn vector_similarity_search(
680 &self,
681 query_embedding: &[f32],
682 graph: &KnowledgeGraph,
683 ) -> Result<Vec<SearchResult>> {
684 let mut results = Vec::new();
685
686 let similar_vectors = self
690 .vector_store
691 .search(query_embedding, self.config.top_k * 2)
692 .await?;
693
694 for store_result in similar_vectors {
695 let id = store_result.id;
696 let similarity = store_result.score;
697 if similarity >= self.config.similarity_threshold {
698 let result = if id.starts_with("entity:") {
699 let entity_id = EntityId::new(
700 id.strip_prefix("entity:")
701 .expect("prefix checked")
702 .to_string(),
703 );
704 graph.get_entity(&entity_id).map(|entity| SearchResult {
705 id: entity.id.to_string(),
706 content: entity.name.clone(),
707 score: similarity * self.config.entity_weight,
708 result_type: ResultType::Entity,
709 entities: vec![entity.name.clone()],
710 source_chunks: entity
711 .mentions
712 .iter()
713 .map(|m| m.chunk_id.to_string())
714 .collect(),
715 })
716 } else if id.starts_with("chunk:") {
717 let chunk_id = ChunkId::new(
718 id.strip_prefix("chunk:")
719 .expect("prefix checked")
720 .to_string(),
721 );
722 if let Some(chunk) = graph.get_chunk(&chunk_id) {
723 let entity_names: Vec<String> = chunk
724 .entities
725 .iter()
726 .filter_map(|eid| graph.get_entity(eid))
727 .map(|e| e.name.clone())
728 .collect();
729
730 Some(SearchResult {
731 id: chunk.id.to_string(),
732 content: chunk.content.clone(),
733 score: similarity * self.config.chunk_weight,
734 result_type: ResultType::Chunk,
735 entities: entity_names,
736 source_chunks: vec![chunk.id.to_string()],
737 })
738 } else {
739 None
740 }
741 } else {
742 None
743 };
744
745 if let Some(search_result) = result {
746 results.push(search_result);
747 }
748 }
749 }
750
751 Ok(results)
752 }
753
754 fn entity_based_search(
756 &self,
757 query_embedding: &[f32],
758 graph: &KnowledgeGraph,
759 ) -> Result<Vec<SearchResult>> {
760 let mut results = Vec::new();
761 let mut visited = HashSet::new();
762
763 let entity_similarities = self.find_relevant_entities(query_embedding, graph)?;
765
766 for (entity_id, similarity) in entity_similarities.into_iter().take(5) {
767 if visited.contains(&entity_id) {
768 continue;
769 }
770
771 let expanded_entities = self.expand_through_relationships(
773 &entity_id,
774 graph,
775 self.config.max_expansion_depth,
776 &mut visited,
777 )?;
778
779 for expanded_entity_id in expanded_entities {
780 if let Some(entity) = graph.get_entity(&expanded_entity_id) {
781 let expansion_penalty = if expanded_entity_id == entity_id {
782 1.0
783 } else {
784 0.8
785 };
786
787 results.push(SearchResult {
788 id: entity.id.to_string(),
789 content: format!("{} ({})", entity.name, entity.entity_type),
790 score: similarity * expansion_penalty * self.config.entity_weight,
791 result_type: ResultType::Entity,
792 entities: vec![entity.name.clone()],
793 source_chunks: entity
794 .mentions
795 .iter()
796 .map(|m| m.chunk_id.to_string())
797 .collect(),
798 });
799 }
800 }
801 }
802
803 Ok(results)
804 }
805
806 fn calculate_strategy_weights(&self, analysis: &QueryAnalysis) -> (f32, f32, f32) {
808 match (&analysis.query_type, &analysis.intent) {
809 (QueryType::EntityFocused, _) => (0.5, 0.4, 0.1),
812 (QueryType::Relationship, _) => (0.3, 0.6, 0.1),
813 (QueryType::Conceptual, QueryIntent::Overview) => (0.2, 0.2, 0.6),
814 (QueryType::Conceptual, _) => (0.4, 0.3, 0.3),
815 (QueryType::Exploratory, QueryIntent::Overview) => (0.3, 0.2, 0.5),
816 (QueryType::Exploratory, _) => (0.4, 0.4, 0.2),
817 (QueryType::Factual, _) => (0.6, 0.3, 0.1),
818 }
819 }
820
821 fn entity_centric_search(
823 &mut self,
824 query_embedding: &[f32],
825 graph: &KnowledgeGraph,
826 key_entities: &[String],
827 ) -> Result<Vec<SearchResult>> {
828 let mut results = Vec::new();
829 let mut visited = HashSet::new();
830
831 for entity_name in key_entities {
832 if let Some(entity) = graph
834 .entities()
835 .find(|e| e.name.eq_ignore_ascii_case(entity_name))
836 {
837 results.push(SearchResult {
839 id: entity.id.to_string(),
840 content: format!("{} ({})", entity.name, entity.entity_type),
841 score: 0.9, result_type: ResultType::Entity,
843 entities: vec![entity.name.clone()],
844 source_chunks: entity
845 .mentions
846 .iter()
847 .map(|m| m.chunk_id.to_string())
848 .collect(),
849 });
850
851 let neighbors = graph.get_neighbors(&entity.id);
853 for (neighbor, relationship) in neighbors {
854 if !visited.contains(&neighbor.id) {
855 visited.insert(neighbor.id.clone());
856
857 let rel_embedding = self
859 .embedding_generator
860 .generate_embedding(&relationship.relation_type);
861 let rel_similarity =
862 VectorUtils::cosine_similarity(query_embedding, &rel_embedding);
863
864 results.push(SearchResult {
865 id: neighbor.id.to_string(),
866 content: format!("{} ({})", neighbor.name, neighbor.entity_type),
867 score: 0.7 * relationship.confidence * (1.0 + rel_similarity),
868 result_type: ResultType::Entity,
869 entities: vec![neighbor.name.clone()],
870 source_chunks: neighbor
871 .mentions
872 .iter()
873 .map(|m| m.chunk_id.to_string())
874 .collect(),
875 });
876 }
877 }
878 }
879 }
880
881 Ok(results)
882 }
883
884 fn hierarchical_search(
886 &self,
887 query: &str,
888 document_trees: &HashMap<crate::core::DocumentId, DocumentTree>,
889 analysis: &QueryAnalysis,
890 ) -> Result<Vec<SearchResult>> {
891 let mut results = Vec::new();
892 let max_results_per_tree = match analysis.intent {
893 QueryIntent::Overview => 3,
894 QueryIntent::Detailed => 8,
895 _ => 5,
896 };
897
898 for (doc_id, tree) in document_trees.iter() {
899 let tree_summaries = tree.query(query, max_results_per_tree)?;
900
901 for (idx, summary) in tree_summaries.iter().enumerate() {
902 let level_bonus = match analysis.intent {
904 QueryIntent::Overview => 0.3,
905 QueryIntent::Detailed => 0.2,
906 _ => 0.0,
907 };
908
909 results.push(SearchResult {
910 id: format!("{}:summary:{}", doc_id, idx),
911 content: summary.summary.clone(),
912 score: summary.score + level_bonus,
913 result_type: ResultType::HierarchicalSummary,
914 entities: Vec::new(),
915 source_chunks: vec![doc_id.to_string()],
916 });
917 }
918 }
919
920 Ok(results)
921 }
922
923 fn advanced_graph_traversal(
925 &self,
926 query_embedding: &[f32],
927 graph: &KnowledgeGraph,
928 analysis: &QueryAnalysis,
929 ) -> Result<Vec<SearchResult>> {
930 let mut results = Vec::new();
931
932 if analysis.query_type == QueryType::Relationship && analysis.key_entities.len() >= 2 {
933 results.extend(self.find_entity_paths(graph, &analysis.key_entities)?);
935 }
936
937 if analysis.complexity_score > 0.8 {
938 results.extend(self.community_based_search(query_embedding, graph)?);
940 }
941
942 Ok(results)
943 }
944
945 fn cross_strategy_fusion(
947 &self,
948 all_results: &[SearchResult],
949 _analysis: &QueryAnalysis,
950 ) -> Result<Vec<SearchResult>> {
951 let mut fusion_results = Vec::new();
952
953 let mut content_groups: HashMap<String, Vec<&SearchResult>> = HashMap::new();
955
956 for result in all_results {
957 let content_key = Self::safe_truncate(&result.content, 50);
958
959 content_groups.entry(content_key).or_default().push(result);
960 }
961
962 for (content_key, group) in content_groups {
964 if group.len() > 1 {
965 let types: HashSet<_> = group.iter().map(|r| &r.result_type).collect();
966 if types.len() > 1 {
967 let avg_score = group.iter().map(|r| r.score).sum::<f32>() / group.len() as f32;
969 let boost = 0.2 * (types.len() - 1) as f32;
970
971 let all_entities: HashSet<_> =
972 group.iter().flat_map(|r| r.entities.iter()).collect();
973
974 let all_chunks: HashSet<_> =
975 group.iter().flat_map(|r| r.source_chunks.iter()).collect();
976
977 fusion_results.push(SearchResult {
978 id: format!(
979 "fusion_{}",
980 content_key.chars().take(10).collect::<String>()
981 ),
982 content: group[0].content.clone(),
983 score: (avg_score + boost).min(1.0),
984 result_type: ResultType::Hybrid,
985 entities: all_entities.into_iter().cloned().collect(),
986 source_chunks: all_chunks.into_iter().cloned().collect(),
987 });
988 }
989 }
990 }
991
992 Ok(fusion_results)
993 }
994
995 fn adaptive_rank_and_deduplicate(
997 &self,
998 mut results: Vec<SearchResult>,
999 analysis: &QueryAnalysis,
1000 ) -> Result<Vec<SearchResult>> {
1001 for result in &mut results {
1003 match analysis.query_type {
1004 QueryType::EntityFocused if result.result_type == ResultType::Entity => {
1005 result.score *= 1.2;
1006 },
1007 QueryType::Conceptual if result.result_type == ResultType::HierarchicalSummary => {
1008 result.score *= 1.1;
1009 },
1010 QueryType::Relationship if result.entities.len() > 1 => {
1011 result.score *= 1.15;
1012 },
1013 _ => {},
1014 }
1015
1016 for entity in &analysis.key_entities {
1018 if result
1019 .entities
1020 .iter()
1021 .any(|e| e.eq_ignore_ascii_case(entity))
1022 {
1023 result.score *= 1.1;
1024 }
1025 }
1026 }
1027
1028 results.sort_by(|a, b| {
1030 b.score
1031 .partial_cmp(&a.score)
1032 .unwrap_or(std::cmp::Ordering::Equal)
1033 });
1034
1035 let mut deduplicated = Vec::new();
1037 let mut seen_content = HashSet::new();
1038 let mut type_counts: HashMap<ResultType, usize> = HashMap::new();
1039
1040 for result in results {
1041 let content_signature = self.create_content_signature(&result.content);
1042
1043 if !seen_content.contains(&content_signature) {
1044 let type_count = type_counts.get(&result.result_type).unwrap_or(&0);
1045
1046 let max_per_type = match result.result_type {
1048 ResultType::Entity => self.config.top_k / 3,
1049 ResultType::Chunk => self.config.top_k / 2,
1050 ResultType::HierarchicalSummary => self.config.top_k / 4,
1051 ResultType::Hybrid => self.config.top_k / 4,
1052 ResultType::GraphPath => self.config.top_k / 5,
1053 };
1054
1055 if *type_count < max_per_type {
1056 seen_content.insert(content_signature);
1057 *type_counts.entry(result.result_type.clone()).or_insert(0) += 1;
1058 deduplicated.push(result);
1059 }
1060 }
1061 }
1062
1063 Ok(deduplicated)
1064 }
1065
1066 fn find_entity_paths(
1068 &self,
1069 graph: &KnowledgeGraph,
1070 key_entities: &[String],
1071 ) -> Result<Vec<SearchResult>> {
1072 let mut results = Vec::new();
1073
1074 if key_entities.len() < 2 {
1075 return Ok(results);
1076 }
1077
1078 if let (Some(source), Some(target)) = (
1080 graph
1081 .entities()
1082 .find(|e| e.name.eq_ignore_ascii_case(&key_entities[0])),
1083 graph
1084 .entities()
1085 .find(|e| e.name.eq_ignore_ascii_case(&key_entities[1])),
1086 ) {
1087 let path_description =
1088 format!("Connection between {} and {}", source.name, target.name);
1089 let neighbors_source = graph.get_neighbors(&source.id);
1090 let neighbors_target = graph.get_neighbors(&target.id);
1091
1092 if neighbors_source
1094 .iter()
1095 .any(|(neighbor, _)| neighbor.id == target.id)
1096 {
1097 results.push(SearchResult {
1098 id: format!("path_{}_{}", source.id, target.id),
1099 content: format!("Direct relationship: {path_description}"),
1100 score: 0.8,
1101 result_type: ResultType::GraphPath,
1102 entities: vec![source.name.clone(), target.name.clone()],
1103 source_chunks: Vec::new(),
1104 });
1105 }
1106
1107 for (neighbor_s, rel_s) in &neighbors_source {
1109 for (neighbor_t, rel_t) in &neighbors_target {
1110 if neighbor_s.id == neighbor_t.id {
1111 results.push(SearchResult {
1112 id: format!("path_{}_{}_{}", source.id, neighbor_s.id, target.id),
1113 content: format!(
1114 "Indirect relationship via {}: {} -> {} -> {}",
1115 neighbor_s.name, source.name, neighbor_s.name, target.name
1116 ),
1117 score: 0.6 * rel_s.confidence * rel_t.confidence,
1118 result_type: ResultType::GraphPath,
1119 entities: vec![
1120 source.name.clone(),
1121 neighbor_s.name.clone(),
1122 target.name.clone(),
1123 ],
1124 source_chunks: Vec::new(),
1125 });
1126 }
1127 }
1128 }
1129 }
1130
1131 Ok(results)
1132 }
1133
1134 fn community_based_search(
1136 &self,
1137 query_embedding: &[f32],
1138 graph: &KnowledgeGraph,
1139 ) -> Result<Vec<SearchResult>> {
1140 let mut results = Vec::new();
1141 let mut entity_scores: HashMap<String, f32> = HashMap::new();
1142
1143 for entity in graph.entities() {
1145 let neighbors = graph.get_neighbors(&entity.id);
1146 let centrality_score = neighbors.len() as f32 * 0.1;
1147
1148 if let Some(embedding) = &entity.embedding {
1150 let similarity = VectorUtils::cosine_similarity(query_embedding, embedding);
1151 entity_scores.insert(entity.id.to_string(), centrality_score + similarity);
1152 }
1153 }
1154
1155 let mut sorted_entities: Vec<_> = entity_scores.iter().collect();
1157 sorted_entities.sort_by(|a, b| b.1.partial_cmp(a.1).unwrap_or(std::cmp::Ordering::Equal));
1158
1159 for (entity_id, score) in sorted_entities.iter().take(3) {
1160 if let Some(entity) = graph.entities().find(|e| e.id.to_string() == **entity_id) {
1161 let mut entity_context = String::new();
1163 for mention in entity.mentions.iter().take(2) {
1164 if let Some(chunk) = graph.chunks().find(|c| c.id == mention.chunk_id) {
1165 let chunk_excerpt = if chunk.content.len() > 200 {
1166 format!("{}...", &chunk.content[..200])
1167 } else {
1168 chunk.content.clone()
1169 };
1170 entity_context.push_str(&chunk_excerpt);
1171 entity_context.push(' ');
1172 }
1173 }
1174
1175 if entity_context.is_empty() {
1177 entity_context = format!(
1178 "{} is a {} character in the story.",
1179 entity.name, entity.entity_type
1180 );
1181 }
1182
1183 results.push(SearchResult {
1184 id: entity.id.to_string(),
1185 content: entity_context,
1186 score: **score,
1187 result_type: ResultType::Entity,
1188 entities: vec![entity.name.clone()],
1189 source_chunks: entity
1190 .mentions
1191 .iter()
1192 .map(|m| m.chunk_id.to_string())
1193 .collect(),
1194 });
1195 }
1196 }
1197
1198 Ok(results)
1199 }
1200
1201 fn has_abstract_concepts(&self, words: &[&str]) -> bool {
1203 const ABSTRACT_INDICATORS: &[&str] = &[
1204 "concept",
1205 "idea",
1206 "theory",
1207 "principle",
1208 "philosophy",
1209 "meaning",
1210 "understanding",
1211 "knowledge",
1212 "wisdom",
1213 "truth",
1214 "beauty",
1215 "justice",
1216 ];
1217 words
1218 .iter()
1219 .any(|&word| ABSTRACT_INDICATORS.contains(&word))
1220 }
1221
1222 fn has_question_words(&self, words: &[&str]) -> bool {
1224 const QUESTION_WORDS: &[&str] = &[
1225 "what", "how", "why", "when", "where", "who", "which", "explain", "describe",
1226 ];
1227 words.iter().any(|&word| QUESTION_WORDS.contains(&word))
1228 }
1229
1230 fn create_content_signature(&self, content: &str) -> String {
1232 let prefix = Self::safe_truncate(content, 50);
1234 format!(
1235 "{}_{}",
1236 prefix
1237 .chars()
1238 .filter(|c| c.is_alphanumeric())
1239 .collect::<String>(),
1240 content.len()
1241 )
1242 }
1243
1244 fn graph_traversal_search(
1246 &self,
1247 _query_embedding: &[f32],
1248 _graph: &KnowledgeGraph,
1249 ) -> Result<Vec<SearchResult>> {
1250 Ok(Vec::new())
1258 }
1259
1260 fn find_relevant_entities(
1262 &self,
1263 query_embedding: &[f32],
1264 graph: &KnowledgeGraph,
1265 ) -> Result<Vec<(EntityId, f32)>> {
1266 let mut similarities = Vec::new();
1267
1268 for entity in graph.entities() {
1269 if let Some(embedding) = &entity.embedding {
1270 let similarity = VectorUtils::cosine_similarity(query_embedding, embedding);
1271 if similarity >= self.config.similarity_threshold {
1272 similarities.push((entity.id.clone(), similarity));
1273 }
1274 }
1275 }
1276
1277 similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1279
1280 Ok(similarities)
1281 }
1282
1283 fn expand_through_relationships(
1285 &self,
1286 start_entity: &EntityId,
1287 graph: &KnowledgeGraph,
1288 max_depth: usize,
1289 visited: &mut HashSet<EntityId>,
1290 ) -> Result<Vec<EntityId>> {
1291 let mut results = Vec::new();
1292 let mut current_level = vec![start_entity.clone()];
1293 visited.insert(start_entity.clone());
1294
1295 for _depth in 0..max_depth {
1296 let mut next_level = Vec::new();
1297
1298 for entity_id in ¤t_level {
1299 results.push(entity_id.clone());
1300
1301 let neighbors = graph.get_neighbors(entity_id);
1303 for (neighbor_entity, _relationship) in neighbors {
1304 if !visited.contains(&neighbor_entity.id) {
1305 visited.insert(neighbor_entity.id.clone());
1306 next_level.push(neighbor_entity.id.clone());
1307 }
1308 }
1309 }
1310
1311 if next_level.is_empty() {
1312 break;
1313 }
1314
1315 current_level = next_level;
1316 }
1317
1318 Ok(results)
1319 }
1320
1321 fn is_stop_word(&self, word: &str) -> bool {
1323 const STOP_WORDS: &[&str] = &[
1324 "the", "be", "to", "of", "and", "a", "in", "that", "have", "i", "it", "for", "not",
1325 "on", "with", "he", "as", "you", "do", "at", "this", "but", "his", "by", "from",
1326 "they", "we", "say", "her", "she", "or", "an", "will", "my", "one", "all", "would",
1327 "there", "their", "what", "so", "up", "out", "if", "about", "who", "get", "which",
1328 "go", "me",
1329 ];
1330 STOP_WORDS.contains(&word)
1331 }
1332
1333 fn rank_and_deduplicate(&self, mut results: Vec<SearchResult>) -> Result<Vec<SearchResult>> {
1335 results.sort_by(|a, b| {
1337 b.score
1338 .partial_cmp(&a.score)
1339 .unwrap_or(std::cmp::Ordering::Equal)
1340 });
1341
1342 let mut seen_ids = HashSet::new();
1344 let mut deduplicated = Vec::new();
1345
1346 for result in results {
1347 if !seen_ids.contains(&result.id) {
1348 seen_ids.insert(result.id.clone());
1349 deduplicated.push(result);
1350 }
1351 }
1352
1353 Ok(deduplicated)
1354 }
1355
1356 pub async fn vector_search(
1358 &mut self,
1359 query: &str,
1360 max_results: usize,
1361 ) -> Result<Vec<SearchResult>> {
1362 let query_embedding = self.embedding_generator.generate_embedding(query);
1363 let similar_vectors = self
1364 .vector_store
1365 .search(&query_embedding, max_results)
1366 .await?;
1367
1368 let mut results = Vec::new();
1369 for store_result in similar_vectors {
1370 results.push(SearchResult {
1371 id: store_result.id.clone(),
1372 content: format!("Vector result for: {}", store_result.id),
1373 score: store_result.score,
1374 result_type: ResultType::Chunk,
1375 entities: Vec::new(),
1376 source_chunks: vec![store_result.id],
1377 });
1378 }
1379
1380 Ok(results)
1381 }
1382
1383 pub fn graph_search(&self, query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
1385 let mut results = Vec::new();
1387 results.push(SearchResult {
1388 id: format!("graph_result_{}", query.len()),
1389 content: format!("Graph-based result for: {query}"),
1390 score: 0.7,
1391 result_type: ResultType::GraphPath,
1392 entities: Vec::new(),
1393 source_chunks: Vec::new(),
1394 });
1395
1396 Ok(results.into_iter().take(max_results).collect())
1397 }
1398
1399 pub fn public_hierarchical_search(
1401 &self,
1402 query: &str,
1403 max_results: usize,
1404 ) -> Result<Vec<SearchResult>> {
1405 let mut results = Vec::new();
1407 results.push(SearchResult {
1408 id: format!("hierarchical_result_{}", query.len()),
1409 content: format!("Hierarchical result for: {query}"),
1410 score: 0.8,
1411 result_type: ResultType::HierarchicalSummary,
1412 entities: Vec::new(),
1413 source_chunks: Vec::new(),
1414 });
1415
1416 Ok(results.into_iter().take(max_results).collect())
1417 }
1418
1419 pub fn bm25_search(&self, query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
1421 let mut results = Vec::new();
1423 results.push(SearchResult {
1424 id: format!("bm25_result_{}", query.len()),
1425 content: format!("BM25 result for: {query}"),
1426 score: 0.75,
1427 result_type: ResultType::Chunk,
1428 entities: Vec::new(),
1429 source_chunks: Vec::new(),
1430 });
1431
1432 Ok(results.into_iter().take(max_results).collect())
1433 }
1434
1435 pub fn get_statistics(&self) -> RetrievalStatistics {
1437 RetrievalStatistics {
1440 indexed_vectors: 0, vector_dimension: 0, index_built: false, config: self.config.clone(),
1444 }
1445 }
1446
1447 fn safe_truncate(s: &str, max_bytes: usize) -> String {
1449 if s.len() <= max_bytes {
1450 return s.to_string();
1451 }
1452
1453 let mut end_idx = max_bytes;
1455 while end_idx > 0 && !s.is_char_boundary(end_idx) {
1456 end_idx -= 1;
1457 }
1458
1459 s[..end_idx].to_string()
1460 }
1461
1462 pub fn save_state_to_json(&self, file_path: &str) -> Result<()> {
1464 use std::fs;
1465
1466 let mut json_data = json::JsonValue::new_object();
1467
1468 json_data["metadata"] = json::object! {
1470 "format_version" => "1.0",
1471 "created_at" => chrono::Utc::now().to_rfc3339(),
1472 "config" => json::object! {
1473 "top_k" => self.config.top_k,
1474 "similarity_threshold" => self.config.similarity_threshold,
1475 "max_expansion_depth" => self.config.max_expansion_depth,
1476 "entity_weight" => self.config.entity_weight,
1477 "chunk_weight" => self.config.chunk_weight,
1478 "graph_weight" => self.config.graph_weight
1479 }
1480 };
1481
1482 json_data["vector_index"] = json::object! {
1485 "vector_count" => 0, "dimension" => 0, "index_built" => false, "min_norm" => 0.0, "max_norm" => 0.0, "avg_norm" => 0.0 };
1492
1493 json_data["embedding_generator"] = json::object! {
1495 "dimension" => self.embedding_generator.dimension(),
1496 "cached_words" => self.embedding_generator.cached_words()
1497 };
1498
1499 #[cfg(feature = "parallel-processing")]
1501 {
1502 json_data["parallel_enabled"] = self.parallel_processor.is_some().into();
1503 }
1504 #[cfg(not(feature = "parallel-processing"))]
1505 {
1506 json_data["parallel_enabled"] = false.into();
1507 }
1508
1509 fs::write(file_path, json_data.dump())?;
1511 #[cfg(feature = "tracing")]
1512 tracing::info!("Retrieval system state saved to {file_path}");
1513
1514 Ok(())
1515 }
1516}
1517
1518#[cfg(test)]
1520mod tests {
1521 use super::*;
1522 use crate::{config::Config, core::KnowledgeGraph};
1523
1524 #[test]
1525 fn test_query_placeholder() {
1526 let config = Config::default();
1527 let retrieval = RetrievalSystem::new(&config).unwrap();
1528
1529 let results = retrieval.query("test query");
1530 assert!(results.is_ok());
1531
1532 let results = results.unwrap();
1533 assert!(!results.is_empty());
1534 assert!(results[0].contains("test query"));
1535 }
1536
1537 #[tokio::test]
1538 async fn test_graph_indexing() {
1539 let config = Config::default();
1540 let retrieval = RetrievalSystem::new(&config).unwrap();
1541 let graph = KnowledgeGraph::new();
1542
1543 let result = retrieval.index_graph(&graph).await;
1544 assert!(result.is_ok());
1545 }
1546
1547 #[test]
1552 fn test_explained_answer_creation() {
1553 let search_results = vec![
1554 SearchResult {
1555 id: "chunk_1".to_string(),
1556 content: "This is the first relevant chunk about climate change.".to_string(),
1557 score: 0.85,
1558 result_type: ResultType::Chunk,
1559 entities: vec!["climate".to_string(), "environment".to_string()],
1560 source_chunks: vec!["doc1_chunk1".to_string()],
1561 },
1562 SearchResult {
1563 id: "chunk_2".to_string(),
1564 content: "Another chunk discussing environmental policies.".to_string(),
1565 score: 0.72,
1566 result_type: ResultType::Chunk,
1567 entities: vec!["policy".to_string(), "environment".to_string()],
1568 source_chunks: vec!["doc1_chunk2".to_string()],
1569 },
1570 ];
1571
1572 let explained = ExplainedAnswer::from_results(
1573 "Climate change is a major environmental concern.".to_string(),
1574 &search_results,
1575 "What is climate change?",
1576 );
1577
1578 assert!(!explained.answer.is_empty());
1579 assert!(explained.confidence > 0.0 && explained.confidence <= 1.0);
1580 assert!(!explained.sources.is_empty());
1581 assert!(!explained.reasoning_steps.is_empty());
1582 }
1583
1584 #[test]
1585 fn test_explained_answer_empty_results() {
1586 let explained = ExplainedAnswer::from_results(
1587 "No relevant information found.".to_string(),
1588 &[],
1589 "What is something unknown?",
1590 );
1591
1592 assert_eq!(explained.confidence, 0.0);
1593 assert!(explained.sources.is_empty());
1594 assert!(!explained.reasoning_steps.is_empty()); }
1596
1597 #[test]
1598 fn test_explained_answer_format_display() {
1599 let search_results = vec![SearchResult {
1600 id: "test_chunk".to_string(),
1601 content: "Test content about technology.".to_string(),
1602 score: 0.9,
1603 result_type: ResultType::Chunk,
1604 entities: vec!["technology".to_string()],
1605 source_chunks: vec!["doc1_chunk1".to_string()],
1606 }];
1607
1608 let explained = ExplainedAnswer::from_results(
1609 "Technology is important.".to_string(),
1610 &search_results,
1611 "Why is technology important?",
1612 );
1613
1614 let formatted = explained.format_display();
1615
1616 assert!(formatted.contains("**Answer:**"));
1617 assert!(formatted.contains("**Confidence:**"));
1618 assert!(formatted.contains("**Reasoning:**"));
1619 assert!(formatted.contains("**Sources:**"));
1620 }
1621
1622 #[test]
1623 fn test_reasoning_steps_structure() {
1624 let search_results = vec![SearchResult {
1625 id: "entity_1".to_string(),
1626 content: "Entity description".to_string(),
1627 score: 0.8,
1628 result_type: ResultType::Entity,
1629 entities: vec!["person".to_string(), "organization".to_string()],
1630 source_chunks: vec![],
1631 }];
1632
1633 let explained = ExplainedAnswer::from_results(
1634 "Answer text".to_string(),
1635 &search_results,
1636 "Who are the key people?",
1637 );
1638
1639 for (i, step) in explained.reasoning_steps.iter().enumerate() {
1641 assert_eq!(step.step_number as usize, i + 1);
1642 assert!(!step.description.is_empty());
1643 assert!(step.confidence >= 0.0 && step.confidence <= 1.0);
1644 }
1645 }
1646
1647 #[test]
1648 fn test_source_reference_types() {
1649 let search_results = vec![
1650 SearchResult {
1651 id: "chunk".to_string(),
1652 content: "Chunk content".to_string(),
1653 score: 0.7,
1654 result_type: ResultType::Chunk,
1655 entities: vec![],
1656 source_chunks: vec![],
1657 },
1658 SearchResult {
1659 id: "entity".to_string(),
1660 content: "Entity content".to_string(),
1661 score: 0.6,
1662 result_type: ResultType::Entity,
1663 entities: vec![],
1664 source_chunks: vec![],
1665 },
1666 SearchResult {
1667 id: "path".to_string(),
1668 content: "Graph path content".to_string(),
1669 score: 0.5,
1670 result_type: ResultType::GraphPath,
1671 entities: vec![],
1672 source_chunks: vec![],
1673 },
1674 ];
1675
1676 let explained =
1677 ExplainedAnswer::from_results("Answer".to_string(), &search_results, "Query");
1678
1679 let source_types: Vec<_> = explained.sources.iter().map(|s| &s.source_type).collect();
1680 assert!(source_types.contains(&&SourceType::TextChunk));
1681 assert!(source_types.contains(&&SourceType::Entity));
1682 assert!(source_types.contains(&&SourceType::Relationship));
1683 }
1684}