1use async_trait::async_trait;
2use crate::error::Result;
3use crate::types::{Candidate, ContextPack, ContextChunk};
4use crate::{
5 embeddings::EmbeddingService,
6 retrieval::{DocumentRepository, HybridRetrievalService, HybridRetrievalConfig, Bm25SearchService},
7 hyde::{HydeService, LlmService, HydeExpansion},
8 query_understanding::{QueryUnderstandingService, QueryUnderstanding},
9 ml_prediction::{MLPredictionService, RetrievalStrategy, MLPredictionResult},
10};
11use serde::{Deserialize, Serialize};
12use std::sync::Arc;
13use std::collections::HashMap;
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct PipelineConfig {
18 pub enable_hyde: bool,
19 pub enable_query_understanding: bool,
20 pub enable_ml_prediction: bool,
21 pub max_candidates: usize,
22 pub rerank_enabled: bool,
23 pub rerank_top_k: usize,
24 pub timeout_seconds: u64,
25}
26
27impl Default for PipelineConfig {
28 fn default() -> Self {
29 Self {
30 enable_hyde: true,
31 enable_query_understanding: true,
32 enable_ml_prediction: true,
33 max_candidates: 50,
34 rerank_enabled: true,
35 rerank_top_k: 20,
36 timeout_seconds: 30,
37 }
38 }
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct EnhancedQueryOptions {
44 pub session_id: String,
45 pub k: usize,
46 pub include_metadata: bool,
47 pub enable_hyde: Option<bool>,
48 pub override_strategy: Option<RetrievalStrategy>,
49 pub context: Option<HashMap<String, serde_json::Value>>,
50}
51
52impl Default for EnhancedQueryOptions {
53 fn default() -> Self {
54 Self {
55 session_id: "default".to_string(),
56 k: 10,
57 include_metadata: true,
58 enable_hyde: None,
59 override_strategy: None,
60 context: None,
61 }
62 }
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct EnhancedQueryResult {
68 pub candidates: Vec<Candidate>,
69 pub context_pack: ContextPack,
70 pub query_understanding: Option<QueryUnderstanding>,
71 pub ml_prediction: Option<MLPredictionResult>,
72 pub hyde_expansion: Option<HydeExpansion>,
73 pub strategy_used: RetrievalStrategy,
74 pub processing_time_ms: u64,
75 pub total_candidates_found: usize,
76}
77
78#[async_trait]
80pub trait RerankingService: Send + Sync {
81 async fn rerank(&self, query: &str, candidates: &[Candidate]) -> Result<Vec<Candidate>>;
82}
83
84pub struct EnhancedQueryPipeline {
86 config: PipelineConfig,
87 document_repository: Arc<dyn DocumentRepository>,
88 embedding_service: Arc<dyn EmbeddingService>,
89 hybrid_retrieval: HybridRetrievalService,
90 hyde_service: Option<Arc<HydeService>>,
91 query_understanding: QueryUnderstandingService,
92 ml_prediction: MLPredictionService,
93 reranking_service: Option<Arc<dyn RerankingService>>,
94}
95
96impl Clone for EnhancedQueryPipeline {
97 fn clone(&self) -> Self {
98 Self {
99 config: self.config.clone(),
100 document_repository: self.document_repository.clone(),
101 embedding_service: self.embedding_service.clone(),
102 hybrid_retrieval: HybridRetrievalService::new(
103 self.embedding_service.clone(),
104 HybridRetrievalConfig::default(),
105 ),
106 hyde_service: self.hyde_service.clone(),
107 query_understanding: QueryUnderstandingService::new(),
108 ml_prediction: MLPredictionService::default(),
109 reranking_service: self.reranking_service.clone(),
110 }
111 }
112}
113
114impl EnhancedQueryPipeline {
115 pub fn new(
116 config: PipelineConfig,
117 document_repository: Arc<dyn DocumentRepository>,
118 embedding_service: Arc<dyn EmbeddingService>,
119 llm_service: Option<Arc<dyn LlmService>>,
120 reranking_service: Option<Arc<dyn RerankingService>>,
121 ) -> Self {
122 let hybrid_config = HybridRetrievalConfig::default();
123 let hybrid_retrieval = HybridRetrievalService::new(
124 embedding_service.clone(),
125 hybrid_config,
126 );
127
128 let hyde_service = if config.enable_hyde {
129 llm_service.map(|llm| {
130 Arc::new(HydeService::new(
131 llm,
132 embedding_service.clone(),
133 Default::default(),
134 ))
135 })
136 } else {
137 None
138 };
139
140 Self {
141 config,
142 document_repository,
143 embedding_service,
144 hybrid_retrieval,
145 hyde_service,
146 query_understanding: QueryUnderstandingService::new(),
147 ml_prediction: MLPredictionService::default(),
148 reranking_service,
149 }
150 }
151
152 pub async fn process_query(
154 &self,
155 query: &str,
156 options: &EnhancedQueryOptions,
157 ) -> Result<EnhancedQueryResult> {
158 let start_time = std::time::Instant::now();
159
160 let query_understanding = self.phase_query_understanding(query).await?;
161 let ml_prediction = self.phase_ml_prediction(&query_understanding).await?;
162 let strategy = self.phase_strategy_selection(options, &ml_prediction);
163 let hyde_expansion = self.phase_hyde_expansion(query, &strategy, options).await?;
164 let candidates = self.phase_retrieval(query, &strategy, options, hyde_expansion.as_ref()).await?;
165 let reranked_candidates = self.phase_reranking(query, candidates).await?;
166 let final_candidates = self.phase_result_limiting(reranked_candidates, options.k);
167 let context_pack = self.phase_context_creation(&final_candidates, options).await?;
168
169 self.create_final_result(
170 final_candidates,
171 context_pack,
172 query_understanding,
173 ml_prediction,
174 hyde_expansion,
175 strategy,
176 start_time,
177 )
178 }
179
180 async fn phase_query_understanding(&self, query: &str) -> Result<Option<QueryUnderstanding>> {
182 if self.config.enable_query_understanding {
183 Ok(Some(self.query_understanding.understand_query(query)?))
184 } else {
185 Ok(None)
186 }
187 }
188
189 async fn phase_ml_prediction(
191 &self,
192 query_understanding: &Option<QueryUnderstanding>
193 ) -> Result<Option<MLPredictionResult>> {
194 if self.config.enable_ml_prediction && query_understanding.is_some() {
195 Ok(Some(self.ml_prediction.predict_strategy(query_understanding.as_ref().unwrap())?))
196 } else {
197 Ok(None)
198 }
199 }
200
201 fn phase_strategy_selection(
203 &self,
204 options: &EnhancedQueryOptions,
205 ml_prediction: &Option<MLPredictionResult>
206 ) -> RetrievalStrategy {
207 options.override_strategy.clone()
208 .or_else(|| ml_prediction.as_ref().map(|p| p.prediction.strategy.clone()))
209 .unwrap_or(RetrievalStrategy::Hybrid)
210 }
211
212 async fn phase_hyde_expansion(
214 &self,
215 query: &str,
216 strategy: &RetrievalStrategy,
217 options: &EnhancedQueryOptions
218 ) -> Result<Option<HydeExpansion>> {
219 if self.should_use_hyde(strategy, options) {
220 if let Some(ref hyde_service) = self.hyde_service {
221 Ok(Some(hyde_service.expand_query(query).await?))
222 } else {
223 Ok(None)
224 }
225 } else {
226 Ok(None)
227 }
228 }
229
230 async fn phase_retrieval(
232 &self,
233 query: &str,
234 strategy: &RetrievalStrategy,
235 options: &EnhancedQueryOptions,
236 hyde_expansion: Option<&HydeExpansion>
237 ) -> Result<Vec<Candidate>> {
238 self.execute_retrieval_strategy(query, strategy, options, hyde_expansion).await
239 }
240
241 async fn phase_reranking(&self, query: &str, candidates: Vec<Candidate>) -> Result<Vec<Candidate>> {
243 if self.config.rerank_enabled && candidates.len() > 1 {
244 if let Some(ref reranker) = self.reranking_service {
245 let top_candidates = candidates
246 .iter()
247 .take(self.config.rerank_top_k)
248 .cloned()
249 .collect::<Vec<_>>();
250 reranker.rerank(query, &top_candidates).await
251 } else {
252 Ok(candidates)
253 }
254 } else {
255 Ok(candidates)
256 }
257 }
258
259 fn phase_result_limiting(&self, candidates: Vec<Candidate>, k: usize) -> Vec<Candidate> {
261 candidates.into_iter().take(k).collect()
262 }
263
264 async fn phase_context_creation(
266 &self,
267 candidates: &[Candidate],
268 options: &EnhancedQueryOptions
269 ) -> Result<ContextPack> {
270 self.create_context_pack(candidates, options).await
271 }
272
273 fn create_final_result(
275 &self,
276 final_candidates: Vec<Candidate>,
277 context_pack: ContextPack,
278 query_understanding: Option<QueryUnderstanding>,
279 ml_prediction: Option<MLPredictionResult>,
280 hyde_expansion: Option<HydeExpansion>,
281 strategy: RetrievalStrategy,
282 start_time: std::time::Instant,
283 ) -> Result<EnhancedQueryResult> {
284 let total_candidates_found = final_candidates.len();
285 let processing_time = start_time.elapsed();
286
287 Ok(EnhancedQueryResult {
288 candidates: final_candidates,
289 context_pack,
290 query_understanding,
291 ml_prediction,
292 hyde_expansion,
293 strategy_used: strategy,
294 processing_time_ms: processing_time.as_millis() as u64,
295 total_candidates_found,
296 })
297 }
298
299 async fn execute_retrieval_strategy(
301 &self,
302 query: &str,
303 strategy: &RetrievalStrategy,
304 options: &EnhancedQueryOptions,
305 hyde_expansion: Option<&HydeExpansion>,
306 ) -> Result<Vec<Candidate>> {
307 match strategy {
308 RetrievalStrategy::BM25Only => {
309 Bm25SearchService::search(
310 &*self.document_repository,
311 &[query.to_string()],
312 &options.session_id,
313 self.config.max_candidates as i32,
314 ).await
315 }
316 RetrievalStrategy::VectorOnly => {
317 let query_embedding = self.embedding_service.embed(&[query.to_string()]).await?;
318 let query_embedding = query_embedding.into_iter().next().unwrap();
319 self.document_repository.vector_search(&query_embedding, self.config.max_candidates as i32).await
320 }
321 RetrievalStrategy::Hybrid => {
322 self.hybrid_retrieval.retrieve(
323 &*self.document_repository,
324 &[query.to_string()],
325 &options.session_id,
326 ).await
327 }
328 RetrievalStrategy::HydeEnhanced => {
329 if let Some(expansion) = hyde_expansion {
330 self.execute_hyde_enhanced_search(query, expansion).await
331 } else {
332 self.hybrid_retrieval.retrieve(
334 &*self.document_repository,
335 &[query.to_string()],
336 &options.session_id,
337 ).await
338 }
339 }
340 RetrievalStrategy::MultiStep => {
341 self.execute_multi_step_retrieval(query, options).await
342 }
343 RetrievalStrategy::Adaptive => {
344 self.execute_adaptive_retrieval(query, options).await
345 }
346 }
347 }
348
349 async fn execute_hyde_enhanced_search(
351 &self,
352 query: &str,
353 expansion: &HydeExpansion,
354 ) -> Result<Vec<Candidate>> {
355 if let Some(ref combined_embedding) = expansion.combined_embedding {
356 self.document_repository.vector_search(combined_embedding, self.config.max_candidates as i32).await
358 } else {
359 let mut all_candidates = Vec::new();
361
362 for hyp_doc in &expansion.hypothetical_documents {
363 if let Some(ref embedding) = hyp_doc.embedding {
364 let candidates = self.document_repository
365 .vector_search(embedding, (self.config.max_candidates / expansion.hypothetical_documents.len()) as i32)
366 .await?;
367 all_candidates.extend(candidates);
368 }
369 }
370
371 let original_candidates = self.hybrid_retrieval
373 .retrieve(
374 &*self.document_repository,
375 &[query.to_string()],
376 "default", )
378 .await?;
379 all_candidates.extend(original_candidates);
380
381 self.deduplicate_and_sort_candidates(all_candidates)
383 }
384 }
385
386 async fn execute_multi_step_retrieval(
388 &self,
389 query: &str,
390 options: &EnhancedQueryOptions,
391 ) -> Result<Vec<Candidate>> {
392 let initial_candidates = self.hybrid_retrieval
394 .retrieve(
395 &*self.document_repository,
396 &[query.to_string()],
397 &options.session_id,
398 )
399 .await?;
400
401 if initial_candidates.len() < 5 {
403 let query_embedding = self.embedding_service.embed(&[query.to_string()]).await?;
405 let query_embedding = query_embedding.into_iter().next().unwrap();
406 self.document_repository.vector_search(&query_embedding, self.config.max_candidates as i32).await
407 } else {
408 Ok(initial_candidates.into_iter().take(self.config.max_candidates).collect())
410 }
411 }
412
413 async fn execute_adaptive_retrieval(
415 &self,
416 query: &str,
417 options: &EnhancedQueryOptions,
418 ) -> Result<Vec<Candidate>> {
419 let hybrid_candidates = self.hybrid_retrieval
421 .retrieve(
422 &*self.document_repository,
423 &[query.to_string()],
424 &options.session_id,
425 )
426 .await?;
427
428 if hybrid_candidates.len() < 5 {
430 let query_embedding = self.embedding_service.embed(&[query.to_string()]).await?;
432 let query_embedding = query_embedding.into_iter().next().unwrap();
433 self.document_repository.vector_search(&query_embedding, self.config.max_candidates as i32).await
434 } else if hybrid_candidates.iter().all(|c| c.score < 0.5) {
435 Bm25SearchService::search(
437 &*self.document_repository,
438 &[query.to_string()],
439 &options.session_id,
440 self.config.max_candidates as i32,
441 ).await
442 } else {
443 Ok(hybrid_candidates)
444 }
445 }
446
447 fn should_use_hyde(&self, strategy: &RetrievalStrategy, options: &EnhancedQueryOptions) -> bool {
449 if let Some(enable_hyde) = options.enable_hyde {
450 enable_hyde && self.hyde_service.is_some()
451 } else {
452 matches!(strategy, RetrievalStrategy::HydeEnhanced) &&
453 self.config.enable_hyde &&
454 self.hyde_service.is_some()
455 }
456 }
457
458 async fn create_context_pack(
460 &self,
461 candidates: &[Candidate],
462 options: &EnhancedQueryOptions,
463 ) -> Result<ContextPack> {
464 let chunks: Vec<ContextChunk> = candidates.iter().map(|candidate| {
466 ContextChunk {
467 id: candidate.doc_id.clone(),
468 score: candidate.score,
469 kind: candidate.kind.clone().unwrap_or_else(|| "text".to_string()),
470 text: candidate.text.clone().unwrap_or_default(),
471 }
472 }).collect();
473
474 let context_pack = ContextPack {
475 id: uuid::Uuid::new_v4().to_string(),
476 session_id: options.session_id.clone(),
477 query: "query_placeholder".to_string(), created_at: chrono::Utc::now(),
479 summary: "Generated context pack".to_string(), key_entities: Vec::new(), claims: Vec::new(), contradictions: Vec::new(), chunks,
484 citations: Vec::new(), };
486
487 Ok(context_pack)
488 }
489
490 fn deduplicate_and_sort_candidates(&self, mut candidates: Vec<Candidate>) -> Result<Vec<Candidate>> {
492 let mut seen = std::collections::HashSet::new();
494 candidates.retain(|c| seen.insert(c.doc_id.clone()));
495
496 candidates.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
498
499 candidates.truncate(self.config.max_candidates);
501
502 Ok(candidates)
503 }
504}
505
506pub struct PipelineFactory;
508
509impl PipelineFactory {
510 pub fn create_pipeline(
511 config: PipelineConfig,
512 document_repository: Arc<dyn DocumentRepository>,
513 embedding_service: Arc<dyn EmbeddingService>,
514 llm_service: Option<Arc<dyn LlmService>>,
515 reranking_service: Option<Arc<dyn RerankingService>>,
516 ) -> EnhancedQueryPipeline {
517 EnhancedQueryPipeline::new(
518 config,
519 document_repository,
520 embedding_service,
521 llm_service,
522 reranking_service,
523 )
524 }
525
526 pub fn create_default_pipeline(
527 document_repository: Arc<dyn DocumentRepository>,
528 embedding_service: Arc<dyn EmbeddingService>,
529 ) -> EnhancedQueryPipeline {
530 EnhancedQueryPipeline::new(
531 PipelineConfig::default(),
532 document_repository,
533 embedding_service,
534 None,
535 None,
536 )
537 }
538}
539
540#[cfg(test)]
541mod tests {
542 use super::*;
543 use crate::types::EmbeddingVector;
544 use crate::types::{Chunk, DfIdf};
545
546 struct MockDocumentRepository;
547
548 #[async_trait]
549 impl DocumentRepository for MockDocumentRepository {
550 async fn get_chunks_by_session(&self, _session_id: &str) -> Result<Vec<Chunk>> {
551 Ok(vec![])
552 }
553
554 async fn get_dfidf_by_session(&self, _session_id: &str) -> Result<Vec<DfIdf>> {
555 Ok(vec![])
556 }
557
558 async fn get_chunk_by_id(&self, _chunk_id: &str) -> Result<Option<Chunk>> {
559 Ok(None)
560 }
561
562 async fn vector_search(&self, _query_vector: &EmbeddingVector, k: i32) -> Result<Vec<Candidate>> {
563 Ok(vec![Candidate {
564 doc_id: "test-1".to_string(),
565 score: 0.9,
566 text: Some("Test document 1".to_string()),
567 kind: Some("text".to_string()),
568 }])
569 }
570 }
571
572 #[tokio::test]
573 async fn test_pipeline_creation() {
574 let doc_repo = Arc::new(MockDocumentRepository);
575 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
576
577 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
578
579 assert!(pipeline.config.enable_query_understanding);
581 assert!(pipeline.config.enable_ml_prediction);
582 }
583
584 #[tokio::test]
585 async fn test_basic_query_processing() {
586 let doc_repo = Arc::new(MockDocumentRepository);
587 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
588
589 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
590 let options = EnhancedQueryOptions::default();
591
592 let result = pipeline.process_query("What is machine learning?", &options).await.unwrap();
593
594 assert!(!result.candidates.is_empty());
595 assert!(result.query_understanding.is_some());
596 assert!(result.ml_prediction.is_some());
597 assert!(result.processing_time_ms > 0);
598 }
599
600 #[tokio::test]
601 async fn test_strategy_override() {
602 let doc_repo = Arc::new(MockDocumentRepository);
603 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
604
605 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
606 let mut options = EnhancedQueryOptions::default();
607 options.override_strategy = Some(RetrievalStrategy::VectorOnly);
608
609 let result = pipeline.process_query("test query", &options).await.unwrap();
610
611 assert_eq!(result.strategy_used, RetrievalStrategy::VectorOnly);
612 }
613
614 #[tokio::test]
615 async fn test_pipeline_different_strategies() {
616 let doc_repo = Arc::new(MockDocumentRepository);
617 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
618
619 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
620
621 let mut bm25_options = EnhancedQueryOptions::default();
623 bm25_options.override_strategy = Some(RetrievalStrategy::BM25Only);
624 let bm25_result = pipeline.process_query("test query", &bm25_options).await.unwrap();
625 assert_eq!(bm25_result.strategy_used, RetrievalStrategy::BM25Only);
626
627 let mut vector_options = EnhancedQueryOptions::default();
629 vector_options.override_strategy = Some(RetrievalStrategy::VectorOnly);
630 let vector_result = pipeline.process_query("test query", &vector_options).await.unwrap();
631 assert_eq!(vector_result.strategy_used, RetrievalStrategy::VectorOnly);
632
633 let mut hybrid_options = EnhancedQueryOptions::default();
635 hybrid_options.override_strategy = Some(RetrievalStrategy::Hybrid);
636 let hybrid_result = pipeline.process_query("test query", &hybrid_options).await.unwrap();
637 assert_eq!(hybrid_result.strategy_used, RetrievalStrategy::Hybrid);
638
639 let mut adaptive_options = EnhancedQueryOptions::default();
641 adaptive_options.override_strategy = Some(RetrievalStrategy::Adaptive);
642 let adaptive_result = pipeline.process_query("test query", &adaptive_options).await.unwrap();
643 assert_eq!(adaptive_result.strategy_used, RetrievalStrategy::Adaptive);
644 }
645
646 #[tokio::test]
647 async fn test_query_options_limits() {
648 let doc_repo = Arc::new(MockDocumentRepository);
649 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
650
651 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
652
653 let mut options = EnhancedQueryOptions::default();
655 options.k = 5;
656
657 let result = pipeline.process_query("test query", &options).await.unwrap();
658
659 assert!(result.candidates.len() <= 5);
660 assert!(result.processing_time_ms >= 0);
661 }
662
663 #[tokio::test]
664 async fn test_query_understanding_integration() {
665 let doc_repo = Arc::new(MockDocumentRepository);
666 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
667
668 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
669 let options = EnhancedQueryOptions::default();
670
671 let technical_result = pipeline.process_query("How to debug JavaScript function?", &options).await.unwrap();
673 assert!(technical_result.query_understanding.is_some());
674
675 let analytical_result = pipeline.process_query("What are the benefits?", &options).await.unwrap();
676 assert!(analytical_result.query_understanding.is_some());
677
678 let code_result = pipeline.process_query("function myFunc() { return 42; }", &options).await.unwrap();
679 assert!(code_result.query_understanding.is_some());
680 }
681
682 #[tokio::test]
683 async fn test_ml_prediction_integration() {
684 let doc_repo = Arc::new(MockDocumentRepository);
685 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
686
687 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
688 let options = EnhancedQueryOptions::default();
689
690 let result = pipeline.process_query("complex analytical question about machine learning", &options).await.unwrap();
691
692 assert!(result.ml_prediction.is_some());
693 let prediction = result.ml_prediction.unwrap();
694 assert!(prediction.prediction.confidence > 0.0);
695 assert!(!prediction.explanation.is_empty());
696 assert!(!prediction.feature_importance.is_empty());
697 }
698
699 #[tokio::test]
700 async fn test_error_handling() {
701 let doc_repo = Arc::new(MockDocumentRepository);
703 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
704
705 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
706 let options = EnhancedQueryOptions::default();
707
708 let empty_result = pipeline.process_query("", &options).await.unwrap();
710 assert!(empty_result.candidates.len() >= 0); let whitespace_result = pipeline.process_query(" ", &options).await.unwrap();
713 assert!(whitespace_result.candidates.len() >= 0);
714
715 let unicode_result = pipeline.process_query("测试 🚀 тест", &options).await.unwrap();
716 assert!(unicode_result.processing_time_ms >= 0);
717 }
718
719 #[test]
720 fn test_enhanced_query_options_default() {
721 let options = EnhancedQueryOptions::default();
722
723 assert_eq!(options.k, 10);
724 assert!(options.override_strategy.is_none());
725 assert_eq!(options.include_metadata, true);
726 assert_eq!(options.session_id, "default");
727 }
728
729 #[test]
730 fn test_enhanced_query_options_builder() {
731 let mut options = EnhancedQueryOptions::default();
732 options.k = 10;
733 options.override_strategy = Some(RetrievalStrategy::Hybrid);
734 options.include_metadata = false;
735 options.session_id = "test-session".to_string();
736
737 assert_eq!(options.k, 10);
738 assert_eq!(options.override_strategy, Some(RetrievalStrategy::Hybrid));
739 assert_eq!(options.include_metadata, false);
740 assert_eq!(options.session_id, "test-session");
741 }
742
743 #[tokio::test]
744 async fn test_pipeline_factory_different_configurations() {
745 let doc_repo = Arc::new(MockDocumentRepository);
746 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(256));
747
748 let default_pipeline = PipelineFactory::create_default_pipeline(doc_repo.clone(), embedding_service.clone());
750 let result1 = default_pipeline.process_query("test", &EnhancedQueryOptions::default()).await.unwrap();
751
752 assert!(!result1.candidates.is_empty());
753
754 let embedding_dim = embedding_service.dimension();
756 assert_eq!(embedding_dim, 256);
757 }
758
759 #[tokio::test]
760 async fn test_query_result_completeness() {
761 let doc_repo = Arc::new(MockDocumentRepository);
762 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
763
764 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
765 let options = EnhancedQueryOptions::default();
766
767 let result = pipeline.process_query("comprehensive test query", &options).await.unwrap();
768
769 assert!(matches!(result.strategy_used,
772 RetrievalStrategy::BM25Only |
773 RetrievalStrategy::VectorOnly |
774 RetrievalStrategy::Hybrid |
775 RetrievalStrategy::HydeEnhanced |
776 RetrievalStrategy::MultiStep |
777 RetrievalStrategy::Adaptive
778 ));
779 assert!(result.candidates.len() >= 0); assert!(result.processing_time_ms >= 0);
781 assert!(result.query_understanding.is_some());
782 assert!(result.ml_prediction.is_some());
783
784 let understanding = result.query_understanding.unwrap();
786 assert!(!understanding.original_query.is_empty());
787 assert!(understanding.confidence > 0.0);
788 assert!(!understanding.keywords.is_empty());
789
790 let prediction = result.ml_prediction.unwrap();
792 assert!(prediction.prediction.confidence > 0.0);
793 assert!(!prediction.explanation.is_empty());
794 assert!(!prediction.feature_importance.is_empty());
795 }
796
797 struct MockDocumentRepositoryWithData;
798
799 #[async_trait]
800 impl DocumentRepository for MockDocumentRepositoryWithData {
801 async fn get_chunks_by_session(&self, _session_id: &str) -> Result<Vec<Chunk>> {
802 Ok(vec![
803 Chunk {
804 id: "chunk1".to_string(),
805 message_id: uuid::Uuid::new_v4(),
806 session_id: "session1".to_string(),
807 offset_start: 0,
808 offset_end: 100,
809 kind: "text".to_string(),
810 text: "This is a test chunk about machine learning.".to_string(),
811 tokens: 10,
812 },
813 Chunk {
814 id: "chunk2".to_string(),
815 message_id: uuid::Uuid::new_v4(),
816 session_id: "session1".to_string(),
817 offset_start: 100,
818 offset_end: 200,
819 kind: "code".to_string(),
820 text: "function processData() { return 'processed'; }".to_string(),
821 tokens: 8,
822 }
823 ])
824 }
825
826 async fn get_dfidf_by_session(&self, _session_id: &str) -> Result<Vec<DfIdf>> {
827 Ok(vec![
828 DfIdf {
829 term: "machine".to_string(),
830 session_id: "session1".to_string(),
831 df: 1,
832 idf: 2.5,
833 },
834 DfIdf {
835 term: "learning".to_string(),
836 session_id: "session1".to_string(),
837 df: 1,
838 idf: 2.3,
839 },
840 ])
841 }
842
843 async fn get_chunk_by_id(&self, chunk_id: &str) -> Result<Option<Chunk>> {
844 if chunk_id == "chunk1" || chunk_id == "chunk2" {
845 self.get_chunks_by_session("session1").await.map(|chunks| {
846 chunks.into_iter().find(|c| c.id == chunk_id)
847 })
848 } else {
849 Ok(None)
850 }
851 }
852
853 async fn vector_search(&self, _query_vector: &EmbeddingVector, _k: i32) -> Result<Vec<Candidate>> {
854 Ok(vec![
855 Candidate {
856 doc_id: "chunk1".to_string(),
857 score: 0.95,
858 text: Some("This is a test chunk about machine learning.".to_string()),
859 kind: Some("text".to_string()),
860 },
861 Candidate {
862 doc_id: "chunk2".to_string(),
863 score: 0.85,
864 text: Some("function processData() { return 'processed'; }".to_string()),
865 kind: Some("code".to_string()),
866 },
867 ])
868 }
869 }
870
871 #[tokio::test]
872 async fn test_pipeline_with_real_data() {
873 let doc_repo = Arc::new(MockDocumentRepositoryWithData);
874 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
875
876 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
877 let options = EnhancedQueryOptions::default();
878
879 let result = pipeline.process_query("machine learning function", &options).await.unwrap();
880
881 assert!(!result.candidates.is_empty());
883 assert!(result.candidates.len() <= 2);
884
885 for candidate in &result.candidates {
887 assert!(!candidate.doc_id.is_empty());
888 assert!(candidate.score > 0.0);
889 assert!(candidate.text.is_some());
890 assert!(candidate.kind.is_some());
891 }
892 }
893
894 #[tokio::test]
897 async fn test_pipeline_complex_workflow_orchestration() {
898 let doc_repo = Arc::new(MockDocumentRepository);
899 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
900
901 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
902
903 let mut options = EnhancedQueryOptions::default();
905 options.session_id = "complex_session_123".to_string();
906 options.k = 25;
907 options.include_metadata = true;
908
909 let result = pipeline.process_query("optimize neural network training", &options).await.unwrap();
910
911 assert!(!result.candidates.is_empty());
913 assert!(result.candidates.len() <= 25);
914 assert!(result.processing_time_ms >= 0);
915
916 for candidate in &result.candidates {
918 assert!(!candidate.doc_id.is_empty());
919 assert!(candidate.score > 0.0);
920 assert!(candidate.text.is_some());
921 assert!(candidate.kind.is_some());
922 }
923
924 assert!(result.query_understanding.is_some());
926
927 assert!(result.ml_prediction.is_some());
929 }
930
931 #[tokio::test]
932 async fn test_pipeline_configuration_variations() {
933 let doc_repo = Arc::new(MockDocumentRepository);
934 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
935
936 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
937
938 let mut minimal_options = EnhancedQueryOptions::default();
940 minimal_options.session_id = "minimal_test".to_string();
941 minimal_options.k = 3;
942 minimal_options.include_metadata = false;
943
944 let minimal_result = pipeline.process_query("simple query", &minimal_options).await.unwrap();
945 assert!(!minimal_result.candidates.is_empty());
946 assert!(minimal_result.candidates.len() <= 3);
947
948 let mut max_options = EnhancedQueryOptions::default();
950 max_options.session_id = "max_test".to_string();
951 max_options.k = 50;
952 max_options.include_metadata = true;
953
954 let max_result = pipeline.process_query("complex analysis query", &max_options).await.unwrap();
955 assert!(!max_result.candidates.is_empty());
956 assert!(max_result.candidates.len() <= 50);
957 assert!(max_result.processing_time_ms >= 0);
958 }
959
960 #[tokio::test]
961 async fn test_pipeline_async_and_concurrency() {
962 let doc_repo = Arc::new(MockDocumentRepository);
963 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
964
965 let pipeline = Arc::new(PipelineFactory::create_default_pipeline(doc_repo, embedding_service));
966
967 let mut handles = Vec::new();
969
970 for i in 0..10 {
971 let pipeline_clone = pipeline.clone();
972 let mut options = EnhancedQueryOptions::default();
973 options.session_id = format!("concurrent_session_{}", i);
974 options.k = 5;
975 options.include_metadata = true;
976
977 let handle = tokio::spawn(async move {
978 pipeline_clone.process_query(&format!("query {}", i), &options).await
979 });
980 handles.push(handle);
981 }
982
983 let mut successful_results = 0;
985 for handle in handles {
986 if let Ok(Ok(_query_result)) = handle.await {
987 successful_results += 1;
988 }
989 }
990
991 assert!(successful_results >= 5);
993 }
994
995 #[tokio::test]
996 async fn test_pipeline_error_handling_comprehensive() {
997 let doc_repo = Arc::new(MockDocumentRepository);
998 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
999
1000 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
1001
1002 let mut empty_options = EnhancedQueryOptions::default();
1004 empty_options.session_id = "error_test".to_string();
1005 empty_options.k = 5;
1006 empty_options.include_metadata = true;
1007
1008 let empty_result = pipeline.process_query("", &empty_options).await.unwrap();
1009 assert!(empty_result.candidates.len() >= 0); let whitespace_result = pipeline.process_query(" \t\n ", &empty_options).await.unwrap();
1013 assert!(whitespace_result.candidates.len() >= 0);
1014
1015 let unicode_result = pipeline.process_query("测试 🦀 émojis ånd spëciæl chärs", &empty_options).await.unwrap();
1017 assert!(unicode_result.processing_time_ms >= 0);
1018
1019 let long_query = "a".repeat(10000);
1021 let long_result = pipeline.process_query(&long_query, &empty_options).await.unwrap();
1022 assert!(long_result.processing_time_ms >= 0);
1023
1024 let mut zero_options = EnhancedQueryOptions::default();
1026 zero_options.session_id = "zero_test".to_string();
1027 zero_options.k = 0;
1028 zero_options.include_metadata = false;
1029
1030 let zero_result = pipeline.process_query("test", &zero_options).await.unwrap();
1031 assert!(zero_result.candidates.len() >= 0); }
1033
1034 #[tokio::test]
1035 async fn test_pipeline_performance_and_metrics() {
1036 let doc_repo = Arc::new(MockDocumentRepository);
1037 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1038
1039 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
1040
1041 let mut options = EnhancedQueryOptions::default();
1042 options.session_id = "performance_test".to_string();
1043 options.k = 10;
1044 options.include_metadata = true;
1045
1046 let start_time = std::time::Instant::now();
1048 let result = pipeline.process_query("performance test query", &options).await.unwrap();
1049 let total_time = start_time.elapsed();
1050
1051 assert!(result.processing_time_ms >= 0);
1053 assert!(result.processing_time_ms <= total_time.as_millis() as u64);
1054
1055 let mut times = Vec::new();
1057 for i in 0..5 {
1058 let start = std::time::Instant::now();
1059 let _result = pipeline.process_query(&format!("consistency test {}", i), &options).await.unwrap();
1060 times.push(start.elapsed().as_millis());
1061 }
1062
1063 let avg_time = times.iter().sum::<u128>() as f64 / times.len() as f64;
1065 for time in times {
1066 assert!((time as f64 - avg_time).abs() < avg_time * 10.0);
1068 }
1069 }
1070
1071 #[tokio::test]
1072 async fn test_pipeline_session_and_context_handling() {
1073 let doc_repo = Arc::new(MockDocumentRepository);
1074 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1075
1076 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
1077
1078 let mut session1_options = EnhancedQueryOptions::default();
1080 session1_options.session_id = "session_1".to_string();
1081 session1_options.k = 5;
1082 session1_options.include_metadata = true;
1083
1084 let mut session2_options = EnhancedQueryOptions::default();
1085 session2_options.session_id = "session_2".to_string();
1086 session2_options.k = 5;
1087 session2_options.include_metadata = true;
1088
1089 let result1 = pipeline.process_query("query from session 1", &session1_options).await.unwrap();
1090 let result2 = pipeline.process_query("query from session 2", &session2_options).await.unwrap();
1091
1092 assert!(!result1.candidates.is_empty());
1094 assert!(!result2.candidates.is_empty());
1095
1096 let mut complex_context = HashMap::new();
1098 complex_context.insert("nested".to_string(), serde_json::json!({
1099 "level1": {
1100 "level2": {
1101 "data": [1, 2, 3, 4, 5]
1102 }
1103 }
1104 }));
1105 complex_context.insert("array".to_string(), serde_json::json!([
1106 {"type": "filter", "value": "rust"},
1107 {"type": "sort", "value": "relevance"}
1108 ]));
1109 complex_context.insert("null_value".to_string(), serde_json::Value::Null);
1110 complex_context.insert("boolean".to_string(), serde_json::Value::Bool(true));
1111 complex_context.insert("number".to_string(), serde_json::Value::Number(serde_json::Number::from(42)));
1112
1113 let mut context_options = EnhancedQueryOptions::default();
1114 context_options.session_id = "context_session".to_string();
1115 context_options.k = 10;
1116 context_options.include_metadata = true;
1117 context_options.context = Some(complex_context);
1118
1119 let context_result = pipeline.process_query("context-aware query", &context_options).await.unwrap();
1120 assert!(!context_result.candidates.is_empty());
1121
1122 let mut long_session_options = EnhancedQueryOptions::default();
1124 long_session_options.session_id = "a".repeat(1000);
1125 long_session_options.k = 5;
1126 long_session_options.include_metadata = false;
1127
1128 let long_session_result = pipeline.process_query("long session test", &long_session_options).await.unwrap();
1129 assert!(long_session_result.processing_time_ms >= 0);
1130 }
1131
1132 #[tokio::test]
1133 async fn test_enhanced_query_options_comprehensive() {
1134 let mut builder_options = EnhancedQueryOptions::default();
1136
1137 builder_options.session_id = "builder_test".to_string();
1139 builder_options.k = 15;
1140 builder_options.include_metadata = true;
1141 builder_options.enable_hyde = Some(false);
1142 builder_options.override_strategy = Some(RetrievalStrategy::Hybrid);
1143
1144 let mut context = HashMap::new();
1145 context.insert("builder".to_string(), serde_json::Value::String("test".to_string()));
1146 builder_options.context = Some(context);
1147
1148 let serialized = serde_json::to_string(&builder_options).unwrap();
1150 let deserialized: EnhancedQueryOptions = serde_json::from_str(&serialized).unwrap();
1151
1152 assert_eq!(builder_options.session_id, deserialized.session_id);
1153 assert_eq!(builder_options.k, deserialized.k);
1154 assert_eq!(builder_options.include_metadata, deserialized.include_metadata);
1155 assert_eq!(builder_options.enable_hyde, deserialized.enable_hyde);
1156 assert_eq!(builder_options.override_strategy, deserialized.override_strategy);
1157 assert!(builder_options.context.is_some());
1158 assert!(deserialized.context.is_some());
1159 }
1160
1161 #[tokio::test]
1162 async fn test_pipeline_config_comprehensive() {
1163 let default_config = PipelineConfig::default();
1165 assert_eq!(default_config.enable_hyde, true);
1166 assert_eq!(default_config.enable_query_understanding, true);
1167 assert_eq!(default_config.enable_ml_prediction, true);
1168 assert_eq!(default_config.max_candidates, 50);
1169 assert_eq!(default_config.rerank_enabled, true);
1170 assert_eq!(default_config.rerank_top_k, 20);
1171 assert_eq!(default_config.timeout_seconds, 30);
1172
1173 let serialized = serde_json::to_string(&default_config).unwrap();
1175 let deserialized: PipelineConfig = serde_json::from_str(&serialized).unwrap();
1176
1177 assert_eq!(default_config.enable_hyde, deserialized.enable_hyde);
1178 assert_eq!(default_config.max_candidates, deserialized.max_candidates);
1179 assert_eq!(default_config.timeout_seconds, deserialized.timeout_seconds);
1180
1181 let cloned_config = default_config.clone();
1183 assert_eq!(default_config.enable_hyde, cloned_config.enable_hyde);
1184
1185 let debug_str = format!("{:?}", default_config);
1186 assert!(debug_str.contains("PipelineConfig"));
1187 assert!(debug_str.contains("enable_hyde"));
1188 }
1189
1190 #[tokio::test]
1191 async fn test_pipeline_stage_by_stage_processing() {
1192 let doc_repo = Arc::new(MockDocumentRepository);
1193 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1194
1195 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
1196
1197 let mut options = EnhancedQueryOptions::default();
1199 options.session_id = "stage_test".to_string();
1200 options.k = 10;
1201 options.include_metadata = true;
1202
1203 let qu_result = pipeline.process_query("technical machine learning optimization", &options).await.unwrap();
1205 assert!(qu_result.candidates.len() >= 0); let ml_result = pipeline.process_query("algorithm performance analysis", &options).await.unwrap();
1209 assert!(ml_result.candidates.len() >= 0);
1210
1211 let hyde_result = pipeline.process_query("neural network architecture", &options).await.unwrap();
1213 assert!(hyde_result.candidates.len() >= 0);
1214
1215 let retrieval_result = pipeline.process_query("code function search", &options).await.unwrap();
1217 assert!(retrieval_result.candidates.len() >= 0);
1218
1219 for candidate in &retrieval_result.candidates {
1221 assert!(candidate.score > 0.0);
1222 }
1223 }
1224
1225 #[tokio::test]
1226 async fn test_pipeline_edge_cases_and_boundaries() {
1227 let doc_repo = Arc::new(MockDocumentRepository);
1228 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1229
1230 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
1231
1232 let mut large_k_options = EnhancedQueryOptions::default();
1234 large_k_options.session_id = "large_k_test".to_string();
1235 large_k_options.k = usize::MAX;
1236 large_k_options.include_metadata = true;
1237
1238 let large_k_result = pipeline.process_query("test query", &large_k_options).await.unwrap();
1239 assert!(large_k_result.candidates.len() >= 0);
1240
1241 let mut min_k_options = EnhancedQueryOptions::default();
1243 min_k_options.session_id = "min_k_test".to_string();
1244 min_k_options.k = 1;
1245 min_k_options.include_metadata = false;
1246
1247 let min_k_result = pipeline.process_query("single result query", &min_k_options).await.unwrap();
1248 assert!(min_k_result.candidates.len() >= 0);
1249
1250 let mut empty_session_options = EnhancedQueryOptions::default();
1252 empty_session_options.session_id = String::new();
1253 empty_session_options.k = 5;
1254 empty_session_options.include_metadata = true;
1255
1256 let empty_session_result = pipeline.process_query("empty session test", &empty_session_options).await.unwrap();
1257 assert!(empty_session_result.processing_time_ms >= 0);
1258
1259 let patterns = ["short", "medium length query", "very long query with many complex technical terms and concepts"];
1261 for (i, pattern) in patterns.iter().enumerate() {
1262 let mut pattern_options = EnhancedQueryOptions::default();
1263 pattern_options.session_id = format!("pattern_test_{}", i);
1264 pattern_options.k = 5;
1265 pattern_options.include_metadata = true;
1266
1267 let pattern_result = pipeline.process_query(pattern, &pattern_options).await.unwrap();
1268 assert!(pattern_result.processing_time_ms >= 0);
1269 }
1270 }
1271
1272 #[tokio::test]
1275 async fn test_complex_strategy_execution_paths() {
1276 let doc_repo = Arc::new(MockDocumentRepositoryWithData);
1277 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1278
1279 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
1280
1281 let mut multi_step_options = EnhancedQueryOptions::default();
1283 multi_step_options.override_strategy = Some(RetrievalStrategy::MultiStep);
1284 multi_step_options.session_id = "multi_step_test".to_string();
1285 multi_step_options.k = 15;
1286
1287 let multi_step_result = pipeline.process_query("complex multi step query", &multi_step_options).await.unwrap();
1288 assert_eq!(multi_step_result.strategy_used, RetrievalStrategy::MultiStep);
1289 assert!(!multi_step_result.candidates.is_empty());
1290
1291 let mut adaptive_options = EnhancedQueryOptions::default();
1293 adaptive_options.override_strategy = Some(RetrievalStrategy::Adaptive);
1294 adaptive_options.session_id = "adaptive_test".to_string();
1295 adaptive_options.k = 20;
1296
1297 let adaptive_result = pipeline.process_query("adaptive strategy test", &adaptive_options).await.unwrap();
1298 assert_eq!(adaptive_result.strategy_used, RetrievalStrategy::Adaptive);
1299
1300 let mut hyde_options = EnhancedQueryOptions::default();
1302 hyde_options.override_strategy = Some(RetrievalStrategy::HydeEnhanced);
1303 hyde_options.session_id = "hyde_test".to_string();
1304 hyde_options.k = 10;
1305
1306 let hyde_result = pipeline.process_query("hyde enhanced query", &hyde_options).await.unwrap();
1307 assert!(matches!(hyde_result.strategy_used, RetrievalStrategy::HydeEnhanced));
1309 }
1310
1311 #[tokio::test]
1312 async fn test_pipeline_resource_management_and_cleanup() {
1313 let doc_repo = Arc::new(MockDocumentRepository);
1314 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1315
1316 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
1317
1318 let mut large_options = EnhancedQueryOptions::default();
1320 large_options.session_id = "memory_test".to_string();
1321 large_options.k = 1000; large_options.include_metadata = true;
1323
1324 let large_result = pipeline.process_query("memory intensive query", &large_options).await.unwrap();
1325 assert!(large_result.processing_time_ms >= 0);
1326
1327 drop(large_result);
1329
1330 let mut concurrent_handles = Vec::new();
1332 for i in 0..20 {
1333 let pipeline_clone = Arc::new(pipeline.clone());
1334 let mut options = EnhancedQueryOptions::default();
1335 options.session_id = format!("resource_test_{}", i);
1336 options.k = 50;
1337
1338 let handle = tokio::spawn(async move {
1339 pipeline_clone.process_query(&format!("resource query {}", i), &options).await
1340 });
1341 concurrent_handles.push(handle);
1342 }
1343
1344 let mut successful_operations = 0;
1346 for handle in concurrent_handles {
1347 if let Ok(Ok(_)) = handle.await {
1348 successful_operations += 1;
1349 }
1350 }
1351 assert!(successful_operations >= 15); }
1353
1354 #[tokio::test]
1355 async fn test_pipeline_complex_error_recovery() {
1356 let doc_repo = Arc::new(MockDocumentRepository);
1357 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1358
1359 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
1360
1361 let error_queries = [
1363 "", "\0\0\0", &"🔥".repeat(1000), "SELECT * FROM users; DROP TABLE users;", &"\n".repeat(100), &"a".repeat(100000), ];
1370
1371 for (i, error_query) in error_queries.iter().enumerate() {
1372 let mut options = EnhancedQueryOptions::default();
1373 options.session_id = format!("error_recovery_{}", i);
1374 options.k = 5;
1375 options.include_metadata = true;
1376
1377 let result = pipeline.process_query(error_query, &options).await;
1379 assert!(result.is_ok(), "Failed on query: {}", error_query);
1380
1381 let result = result.unwrap();
1382 assert!(result.processing_time_ms >= 0);
1383 }
1384 }
1385
1386 #[tokio::test]
1387 async fn test_pipeline_configuration_feature_flags() {
1388 let doc_repo = Arc::new(MockDocumentRepository);
1389 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1390
1391 let mut disabled_config = PipelineConfig::default();
1393 disabled_config.enable_hyde = false;
1394 disabled_config.enable_query_understanding = false;
1395 disabled_config.enable_ml_prediction = false;
1396 disabled_config.rerank_enabled = false;
1397 disabled_config.max_candidates = 5;
1398
1399 let disabled_pipeline = PipelineFactory::create_pipeline(
1400 disabled_config,
1401 doc_repo.clone(),
1402 embedding_service.clone(),
1403 None,
1404 None,
1405 );
1406
1407 let mut options = EnhancedQueryOptions::default();
1408 options.session_id = "disabled_test".to_string();
1409 options.k = 3;
1410
1411 let disabled_result = disabled_pipeline.process_query("test query", &options).await.unwrap();
1412 assert!(disabled_result.query_understanding.is_none());
1413 assert!(disabled_result.ml_prediction.is_none());
1414 assert!(disabled_result.hyde_expansion.is_none());
1415
1416 let mut selective_config = PipelineConfig::default();
1418 selective_config.enable_hyde = false;
1419 selective_config.enable_query_understanding = true;
1420 selective_config.enable_ml_prediction = false;
1421 selective_config.rerank_enabled = true;
1422 selective_config.rerank_top_k = 3;
1423
1424 let selective_pipeline = PipelineFactory::create_pipeline(
1425 selective_config,
1426 doc_repo.clone(),
1427 embedding_service.clone(),
1428 None,
1429 None,
1430 );
1431
1432 let selective_result = selective_pipeline.process_query("selective test", &options).await.unwrap();
1433 assert!(selective_result.query_understanding.is_some());
1434 assert!(selective_result.ml_prediction.is_none());
1435 assert!(selective_result.hyde_expansion.is_none());
1436 }
1437
1438 #[tokio::test]
1439 async fn test_pipeline_deduplication_and_sorting_complex() {
1440 let doc_repo = Arc::new(MockDocumentRepositoryWithData);
1441 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1442
1443 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
1444
1445 let mut dedup_options = EnhancedQueryOptions::default();
1447 dedup_options.session_id = "dedup_test".to_string();
1448 dedup_options.k = 10;
1449 dedup_options.include_metadata = true;
1450
1451 let dedup_result = pipeline.process_query("deduplication test query", &dedup_options).await.unwrap();
1452
1453 let mut seen_ids = std::collections::HashSet::new();
1455 for candidate in &dedup_result.candidates {
1456 assert!(seen_ids.insert(candidate.doc_id.clone()), "Duplicate doc_id found: {}", candidate.doc_id);
1457 }
1458
1459 for window in dedup_result.candidates.windows(2) {
1461 assert!(window[0].score >= window[1].score, "Candidates not sorted by score");
1462 }
1463
1464 let strategies = [
1466 RetrievalStrategy::BM25Only,
1467 RetrievalStrategy::VectorOnly,
1468 RetrievalStrategy::Hybrid,
1469 RetrievalStrategy::MultiStep,
1470 RetrievalStrategy::Adaptive,
1471 ];
1472
1473 for strategy in &strategies {
1474 let mut strategy_options = EnhancedQueryOptions::default();
1475 strategy_options.override_strategy = Some(strategy.clone());
1476 strategy_options.session_id = "strategy_dedup_test".to_string();
1477 strategy_options.k = 15;
1478
1479 let strategy_result = pipeline.process_query("strategy specific query", &strategy_options).await.unwrap();
1480 assert_eq!(strategy_result.strategy_used, *strategy);
1481
1482 let mut seen_strategy_ids = std::collections::HashSet::new();
1484 for candidate in &strategy_result.candidates {
1485 assert!(seen_strategy_ids.insert(candidate.doc_id.clone()));
1486 }
1487 }
1488 }
1489
1490 #[tokio::test]
1491 async fn test_pipeline_context_pack_creation_complex() {
1492 let doc_repo = Arc::new(MockDocumentRepositoryWithData);
1493 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1494
1495 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
1496
1497 let mut context_options = EnhancedQueryOptions::default();
1499 context_options.session_id = "context_complex_test".to_string();
1500 context_options.k = 20;
1501 context_options.include_metadata = true;
1502
1503 let mut complex_context = HashMap::new();
1505 complex_context.insert("user_preferences".to_string(), serde_json::json!({
1506 "language": "rust",
1507 "experience_level": "advanced",
1508 "preferred_patterns": ["async", "generics", "traits"]
1509 }));
1510 complex_context.insert("search_filters".to_string(), serde_json::json!({
1511 "date_range": {
1512 "start": "2024-01-01",
1513 "end": "2024-12-31"
1514 },
1515 "content_types": ["code", "documentation", "examples"],
1516 "complexity": ["medium", "high"]
1517 }));
1518 context_options.context = Some(complex_context);
1519
1520 let context_result = pipeline.process_query("complex context query", &context_options).await.unwrap();
1521
1522 assert!(!context_result.context_pack.id.is_empty());
1524 assert_eq!(context_result.context_pack.session_id, "context_complex_test");
1525 assert!(!context_result.context_pack.chunks.is_empty());
1526
1527 for chunk in &context_result.context_pack.chunks {
1529 assert!(!chunk.id.is_empty());
1530 assert!(chunk.score > 0.0);
1531 assert!(!chunk.kind.is_empty());
1532 }
1534
1535 let mut empty_context_options = EnhancedQueryOptions::default();
1537 empty_context_options.session_id = "empty_context_test".to_string();
1538 empty_context_options.k = 0;
1539 empty_context_options.include_metadata = false;
1540
1541 let empty_context_result = pipeline.process_query("empty context query", &empty_context_options).await.unwrap();
1542 assert!(!empty_context_result.context_pack.id.is_empty());
1543 assert_eq!(empty_context_result.context_pack.session_id, "empty_context_test");
1544 }
1545
1546 #[tokio::test]
1547 async fn test_pipeline_hyde_expansion_complex_paths() {
1548 let doc_repo = Arc::new(MockDocumentRepository);
1549 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1550
1551 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
1553
1554 let mut hyde_options = EnhancedQueryOptions::default();
1556 hyde_options.enable_hyde = Some(true);
1557 hyde_options.override_strategy = Some(RetrievalStrategy::HydeEnhanced);
1558 hyde_options.session_id = "hyde_fallback_test".to_string();
1559 hyde_options.k = 10;
1560
1561 let hyde_result = pipeline.process_query("hyde test query", &hyde_options).await.unwrap();
1562 assert!(hyde_result.hyde_expansion.is_none());
1564 assert!(!hyde_result.candidates.is_empty());
1565
1566 let mut hyde_disabled_options = EnhancedQueryOptions::default();
1568 hyde_disabled_options.enable_hyde = Some(false);
1569 hyde_disabled_options.session_id = "hyde_disabled_test".to_string();
1570 hyde_disabled_options.k = 5;
1571
1572 let hyde_disabled_result = pipeline.process_query("no hyde query", &hyde_disabled_options).await.unwrap();
1573 assert!(hyde_disabled_result.hyde_expansion.is_none());
1574 }
1575
1576 #[tokio::test]
1577 async fn test_pipeline_reranking_complex_scenarios() {
1578 let doc_repo = Arc::new(MockDocumentRepositoryWithData);
1579 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1580
1581 let mut no_rerank_config = PipelineConfig::default();
1583 no_rerank_config.rerank_enabled = false;
1584
1585 let no_rerank_pipeline = PipelineFactory::create_pipeline(
1586 no_rerank_config,
1587 doc_repo.clone(),
1588 embedding_service.clone(),
1589 None,
1590 None,
1591 );
1592
1593 let mut options = EnhancedQueryOptions::default();
1594 options.session_id = "no_rerank_test".to_string();
1595 options.k = 10;
1596
1597 let no_rerank_result = no_rerank_pipeline.process_query("reranking test", &options).await.unwrap();
1598 assert!(!no_rerank_result.candidates.is_empty());
1599
1600 let mut single_config = PipelineConfig::default();
1602 single_config.rerank_enabled = true;
1603 single_config.rerank_top_k = 1;
1604
1605 let single_pipeline = PipelineFactory::create_pipeline(
1606 single_config,
1607 doc_repo.clone(),
1608 embedding_service.clone(),
1609 None,
1610 None,
1611 );
1612
1613 let mut single_options = EnhancedQueryOptions::default();
1614 single_options.session_id = "single_rerank_test".to_string();
1615 single_options.k = 1;
1616
1617 let single_result = single_pipeline.process_query("single candidate test", &single_options).await.unwrap();
1618 assert!(single_result.candidates.len() <= 1);
1619
1620 let rerank_configs = [1, 3, 5, 10, 20];
1622 for top_k in &rerank_configs {
1623 let mut config = PipelineConfig::default();
1624 config.rerank_enabled = true;
1625 config.rerank_top_k = *top_k;
1626
1627 let rerank_pipeline = PipelineFactory::create_pipeline(
1628 config,
1629 doc_repo.clone(),
1630 embedding_service.clone(),
1631 None,
1632 None,
1633 );
1634
1635 let mut rerank_options = EnhancedQueryOptions::default();
1636 rerank_options.session_id = format!("rerank_top_k_{}", top_k);
1637 rerank_options.k = 15;
1638
1639 let rerank_result = rerank_pipeline.process_query("rerank top k test", &rerank_options).await.unwrap();
1640 assert!(rerank_result.processing_time_ms >= 0);
1641 }
1642 }
1643
1644 #[tokio::test]
1645 async fn test_pipeline_ml_prediction_integration_complex() {
1646 let doc_repo = Arc::new(MockDocumentRepository);
1647 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1648
1649 let pipeline = PipelineFactory::create_default_pipeline(doc_repo.clone(), embedding_service.clone());
1650
1651 let query_types = [
1653 ("technical algorithm query about neural networks", "technical"),
1654 ("how to implement quicksort in rust", "code"),
1655 ("what are the benefits of async programming", "conceptual"),
1656 ("debug this function: fn test() { panic!(); }", "debugging"),
1657 ("performance optimization for web servers", "performance"),
1658 ];
1659
1660 for (query, query_type) in &query_types {
1661 let mut options = EnhancedQueryOptions::default();
1662 options.session_id = format!("ml_prediction_{}", query_type);
1663 options.k = 10;
1664 options.include_metadata = true;
1665
1666 let result = pipeline.process_query(query, &options).await.unwrap();
1667
1668 assert!(result.ml_prediction.is_some());
1670 let prediction = result.ml_prediction.unwrap();
1671 assert!(prediction.prediction.confidence > 0.0);
1672 assert!(prediction.prediction.confidence <= 1.0);
1673 assert!(!prediction.explanation.is_empty());
1674 assert!(!prediction.feature_importance.is_empty());
1675
1676 for (feature, importance) in &prediction.feature_importance {
1678 assert!(!feature.is_empty());
1679 assert!(*importance >= 0.0);
1680 }
1681 }
1682
1683 let mut ml_disabled_config = PipelineConfig::default();
1685 ml_disabled_config.enable_ml_prediction = false;
1686
1687 let ml_disabled_pipeline = PipelineFactory::create_pipeline(
1688 ml_disabled_config,
1689 doc_repo,
1690 embedding_service,
1691 None,
1692 None,
1693 );
1694
1695 let mut disabled_options = EnhancedQueryOptions::default();
1696 disabled_options.session_id = "ml_disabled_test".to_string();
1697 disabled_options.k = 5;
1698
1699 let disabled_result = ml_disabled_pipeline.process_query("test query", &disabled_options).await.unwrap();
1700 assert!(disabled_result.ml_prediction.is_none());
1701 }
1702
1703 #[tokio::test]
1704 async fn test_pipeline_timeout_and_performance_boundaries() {
1705 let doc_repo = Arc::new(MockDocumentRepository);
1706 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1707
1708 let timeout_configs = [1, 5, 10, 30, 60];
1710
1711 for timeout in &timeout_configs {
1712 let mut config = PipelineConfig::default();
1713 config.timeout_seconds = *timeout;
1714 config.max_candidates = 100;
1715
1716 let timeout_pipeline = PipelineFactory::create_pipeline(
1717 config,
1718 doc_repo.clone(),
1719 embedding_service.clone(),
1720 None,
1721 None,
1722 );
1723
1724 let mut options = EnhancedQueryOptions::default();
1725 options.session_id = format!("timeout_test_{}", timeout);
1726 options.k = 50;
1727 options.include_metadata = true;
1728
1729 let start_time = std::time::Instant::now();
1730 let result = timeout_pipeline.process_query("timeout performance test", &options).await.unwrap();
1731 let elapsed = start_time.elapsed();
1732
1733 assert!(result.processing_time_ms >= 0);
1735 assert!(elapsed.as_secs() < (*timeout + 5)); assert!(!result.candidates.is_empty());
1737 }
1738
1739 let candidate_limits = [1, 10, 50, 100, 500];
1741
1742 for limit in &candidate_limits {
1743 let mut config = PipelineConfig::default();
1744 config.max_candidates = *limit;
1745 config.timeout_seconds = 30;
1746
1747 let limit_pipeline = PipelineFactory::create_pipeline(
1748 config,
1749 doc_repo.clone(),
1750 embedding_service.clone(),
1751 None,
1752 None,
1753 );
1754
1755 let mut options = EnhancedQueryOptions::default();
1756 options.session_id = format!("limit_test_{}", limit);
1757 options.k = (*limit).min(20); let limit_result = limit_pipeline.process_query("candidate limit test", &options).await.unwrap();
1760
1761 assert!(limit_result.candidates.len() <= *limit);
1763 assert!(limit_result.processing_time_ms >= 0);
1764 }
1765 }
1766
1767 #[tokio::test]
1770 async fn test_pipeline_internal_method_coverage() {
1771 let doc_repo = Arc::new(MockDocumentRepositoryWithData);
1772 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1773
1774 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
1775
1776 let hyde_scenarios = [
1778 (Some(true), RetrievalStrategy::HydeEnhanced, "should_enable_hyde_explicit"),
1779 (Some(false), RetrievalStrategy::HydeEnhanced, "should_disable_hyde_explicit"),
1780 (None, RetrievalStrategy::HydeEnhanced, "should_enable_hyde_strategy"),
1781 (None, RetrievalStrategy::Hybrid, "should_disable_hyde_hybrid"),
1782 (None, RetrievalStrategy::BM25Only, "should_disable_hyde_bm25"),
1783 (None, RetrievalStrategy::VectorOnly, "should_disable_hyde_vector"),
1784 ];
1785
1786 for (enable_hyde, strategy, test_name) in &hyde_scenarios {
1787 let mut options = EnhancedQueryOptions::default();
1788 options.enable_hyde = *enable_hyde;
1789 options.override_strategy = Some(strategy.clone());
1790 options.session_id = test_name.to_string();
1791 options.k = 5;
1792
1793 let result = pipeline.process_query("hyde scenario test", &options).await.unwrap();
1794 assert_eq!(result.strategy_used, *strategy);
1795
1796 if matches!(strategy, RetrievalStrategy::HydeEnhanced) {
1798 assert!(result.hyde_expansion.is_none());
1800 }
1801 }
1802 }
1803
1804 #[tokio::test]
1805 async fn test_pipeline_deduplicate_and_sort_candidates_method() {
1806 let doc_repo = Arc::new(MockDocumentRepositoryWithData);
1807 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1808
1809 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
1810
1811 let test_strategies = [
1813 RetrievalStrategy::MultiStep, RetrievalStrategy::Adaptive, ];
1816
1817 for strategy in &test_strategies {
1818 let mut options = EnhancedQueryOptions::default();
1819 options.override_strategy = Some(strategy.clone());
1820 options.session_id = format!("dedup_sort_test_{:?}", strategy);
1821 options.k = 20;
1822 options.include_metadata = true;
1823
1824 let result = pipeline.process_query("deduplication and sorting test", &options).await.unwrap();
1825
1826 let mut seen_ids = std::collections::HashSet::new();
1828 for candidate in &result.candidates {
1829 assert!(seen_ids.insert(candidate.doc_id.clone()),
1830 "Found duplicate doc_id: {} in strategy: {:?}", candidate.doc_id, strategy);
1831 }
1832
1833 for window in result.candidates.windows(2) {
1835 assert!(window[0].score >= window[1].score,
1836 "Candidates not properly sorted by score in strategy: {:?}", strategy);
1837 }
1838 }
1839 }
1840
1841 #[tokio::test]
1842 async fn test_pipeline_multi_step_retrieval_edge_cases() {
1843 let doc_repo = Arc::new(MockDocumentRepositoryWithData);
1844 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1845
1846 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
1847
1848 let mut multi_step_options = EnhancedQueryOptions::default();
1850 multi_step_options.override_strategy = Some(RetrievalStrategy::MultiStep);
1851 multi_step_options.session_id = "multi_step_edge_test".to_string();
1852 multi_step_options.k = 50; let multi_step_result = pipeline.process_query("comprehensive multi-step test", &multi_step_options).await.unwrap();
1855 assert_eq!(multi_step_result.strategy_used, RetrievalStrategy::MultiStep);
1856
1857 assert!(multi_step_result.processing_time_ms >= 0);
1861 assert!(!multi_step_result.candidates.is_empty());
1862 }
1863
1864 #[tokio::test]
1865 async fn test_pipeline_adaptive_retrieval_score_conditions() {
1866 let doc_repo = Arc::new(MockDocumentRepositoryWithData);
1867 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1868
1869 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
1870
1871 let mut adaptive_options = EnhancedQueryOptions::default();
1873 adaptive_options.override_strategy = Some(RetrievalStrategy::Adaptive);
1874 adaptive_options.session_id = "adaptive_edge_test".to_string();
1875 adaptive_options.k = 25;
1876
1877 let adaptive_result = pipeline.process_query("adaptive retrieval edge case test", &adaptive_options).await.unwrap();
1878 assert_eq!(adaptive_result.strategy_used, RetrievalStrategy::Adaptive);
1879
1880 assert!(adaptive_result.processing_time_ms >= 0);
1885 }
1886
1887 #[tokio::test]
1888 async fn test_pipeline_result_limiting_and_truncation() {
1889 let doc_repo = Arc::new(MockDocumentRepositoryWithData);
1890 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1891
1892 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
1893
1894 let k_values = [0, 1, 2, 5, 10, 100, 1000];
1896
1897 for k in &k_values {
1898 let mut options = EnhancedQueryOptions::default();
1899 options.session_id = format!("result_limiting_test_{}", k);
1900 options.k = *k;
1901 options.include_metadata = true;
1902
1903 let result = pipeline.process_query("result limiting test", &options).await.unwrap();
1904
1905 assert!(result.candidates.len() <= *k, "Result count {} exceeds k limit {}", result.candidates.len(), k);
1907
1908 assert!(!result.context_pack.id.is_empty());
1910 assert_eq!(result.context_pack.session_id, format!("result_limiting_test_{}", k));
1911 }
1912 }
1913
1914 #[tokio::test]
1915 async fn test_pipeline_context_creation_metadata_handling() {
1916 let doc_repo = Arc::new(MockDocumentRepositoryWithData);
1917 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1918
1919 let pipeline = PipelineFactory::create_default_pipeline(doc_repo, embedding_service);
1920
1921 let metadata_scenarios = [
1923 (true, "metadata_enabled"),
1924 (false, "metadata_disabled"),
1925 ];
1926
1927 for (include_metadata, test_name) in &metadata_scenarios {
1928 let mut options = EnhancedQueryOptions::default();
1929 options.session_id = test_name.to_string();
1930 options.k = 10;
1931 options.include_metadata = *include_metadata;
1932
1933 let result = pipeline.process_query("metadata handling test", &options).await.unwrap();
1934
1935 assert!(!result.context_pack.id.is_empty());
1937 assert_eq!(result.context_pack.session_id, *test_name);
1938 assert!(result.context_pack.created_at <= chrono::Utc::now());
1939
1940 for chunk in &result.context_pack.chunks {
1942 assert!(!chunk.id.is_empty());
1943 assert!(chunk.score >= 0.0);
1944 assert!(!chunk.kind.is_empty());
1945 }
1946 }
1947 }
1948
1949 #[tokio::test]
1950 async fn test_pipeline_reranking_boundary_conditions() {
1951 let doc_repo = Arc::new(MockDocumentRepositoryWithData);
1952 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1953
1954 let rerank_scenarios = [
1956 (true, 0, "rerank_enabled_zero_candidates"), (true, 1, "rerank_enabled_one_candidate"), (false, 10, "rerank_disabled_many_candidates"), ];
1960
1961 for (rerank_enabled, rerank_top_k, test_name) in &rerank_scenarios {
1962 let mut config = PipelineConfig::default();
1963 config.rerank_enabled = *rerank_enabled;
1964 config.rerank_top_k = *rerank_top_k;
1965
1966 let rerank_pipeline = PipelineFactory::create_pipeline(
1967 config,
1968 doc_repo.clone(),
1969 embedding_service.clone(),
1970 None,
1971 None,
1972 );
1973
1974 let mut options = EnhancedQueryOptions::default();
1975 options.session_id = test_name.to_string();
1976 options.k = 15;
1977
1978 let result = rerank_pipeline.process_query("reranking boundary test", &options).await.unwrap();
1979
1980 assert!(result.processing_time_ms >= 0);
1982 assert!(result.candidates.len() >= 0);
1983
1984 for window in result.candidates.windows(2) {
1986 assert!(window[0].score >= window[1].score);
1987 }
1988 }
1989 }
1990
1991 #[tokio::test]
1992 async fn test_pipeline_configuration_validation_and_defaults() {
1993 let doc_repo = Arc::new(MockDocumentRepository);
1994 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
1995
1996 let mut extreme_config = PipelineConfig::default();
1998 extreme_config.max_candidates = 0; extreme_config.rerank_top_k = 0; extreme_config.timeout_seconds = 0; let extreme_pipeline = PipelineFactory::create_pipeline(
2003 extreme_config,
2004 doc_repo.clone(),
2005 embedding_service.clone(),
2006 None,
2007 None,
2008 );
2009
2010 let mut options = EnhancedQueryOptions::default();
2011 options.session_id = "extreme_config_test".to_string();
2012 options.k = 5;
2013
2014 let result = extreme_pipeline.process_query("extreme config test", &options).await.unwrap();
2016 assert!(result.processing_time_ms >= 0);
2017
2018 let mut large_config = PipelineConfig::default();
2020 large_config.max_candidates = usize::MAX;
2021 large_config.rerank_top_k = usize::MAX;
2022 large_config.timeout_seconds = u64::MAX;
2023
2024 let large_pipeline = PipelineFactory::create_pipeline(
2025 large_config,
2026 doc_repo.clone(),
2027 embedding_service.clone(),
2028 None,
2029 None,
2030 );
2031
2032 let large_result = large_pipeline.process_query("large config test", &options).await.unwrap();
2033 assert!(large_result.processing_time_ms >= 0);
2034 }
2035
2036 #[tokio::test]
2037 async fn test_pipeline_factory_comprehensive_scenarios() {
2038 let doc_repo = Arc::new(MockDocumentRepository);
2039 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
2040
2041 let factory_scenarios = [
2043 ("no_optional_services", None, None),
2044 ("with_llm_only", Some(Arc::new(MockLlmService) as Arc<dyn LlmService>), None),
2045 ("with_reranker_only", None, Some(Arc::new(MockRerankingService) as Arc<dyn RerankingService>)),
2046 ];
2047
2048 for (test_name, llm_service, reranker_service) in factory_scenarios {
2049 let config = PipelineConfig {
2050 enable_hyde: llm_service.is_some(),
2051 enable_query_understanding: true,
2052 enable_ml_prediction: true,
2053 max_candidates: 20,
2054 rerank_enabled: reranker_service.is_some(),
2055 rerank_top_k: 10,
2056 timeout_seconds: 30,
2057 };
2058
2059 let factory_pipeline = PipelineFactory::create_pipeline(
2060 config,
2061 doc_repo.clone(),
2062 embedding_service.clone(),
2063 llm_service,
2064 reranker_service,
2065 );
2066
2067 let mut options = EnhancedQueryOptions::default();
2068 options.session_id = test_name.to_string();
2069 options.k = 10;
2070
2071 let result = factory_pipeline.process_query("factory scenario test", &options).await.unwrap();
2072 assert!(result.processing_time_ms >= 0);
2073 assert!(!result.candidates.is_empty());
2074
2075 if test_name == "with_llm_only" {
2077 assert!(result.processing_time_ms >= 0);
2079 }
2080
2081 if test_name == "with_reranker_only" {
2082 assert!(result.processing_time_ms >= 0);
2084 }
2085 }
2086 }
2087
2088 #[tokio::test]
2089 async fn test_hyde_enhanced_fallback_scenarios() {
2090 let doc_repo = Arc::new(MockDocumentRepositoryWithData);
2091 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
2092
2093 let pipeline = PipelineFactory::create_default_pipeline(
2095 doc_repo.clone(),
2096 embedding_service.clone(),
2097 );
2098
2099 let mut options = EnhancedQueryOptions::default();
2100 options.session_id = "hyde_test".to_string();
2101 options.k = 5;
2102 options.override_strategy = Some(RetrievalStrategy::HydeEnhanced);
2103
2104 let result = pipeline.process_query("test query for hyde fallback", &options).await.unwrap();
2106 assert!(result.processing_time_ms >= 0);
2107 assert!(!result.candidates.is_empty());
2108 }
2109
2110 #[tokio::test]
2111 async fn test_reranking_edge_cases() {
2112 let doc_repo = Arc::new(MockDocumentRepositoryWithData);
2113 let embedding_service = Arc::new(crate::embeddings::FallbackEmbeddingService::new(384));
2114
2115 let pipeline = PipelineFactory::create_default_pipeline(
2117 doc_repo.clone(),
2118 embedding_service.clone(),
2119 );
2120
2121 let mut options = EnhancedQueryOptions::default();
2122 options.session_id = "single_candidate".to_string();
2123 options.k = 1; let result = pipeline.process_query("single result test", &options).await.unwrap();
2126 assert!(result.processing_time_ms >= 0);
2127 assert!(!result.candidates.is_empty());
2129 }
2130
2131 struct MockLlmService;
2134
2135 #[async_trait]
2136 impl LlmService for MockLlmService {
2137 async fn generate_text(&self, _prompt: &str, _config: &crate::hyde::HydeConfig) -> Result<Vec<String>> {
2138 Ok(vec![
2139 "Mock hypothetical document 1 for testing".to_string(),
2140 "Mock hypothetical document 2 for testing".to_string(),
2141 "Mock hypothetical document 3 for testing".to_string(),
2142 ])
2143 }
2144 }
2145
2146 struct MockRerankingService;
2147
2148 #[async_trait]
2149 impl RerankingService for MockRerankingService {
2150 async fn rerank(&self, _query: &str, candidates: &[Candidate]) -> Result<Vec<Candidate>> {
2151 let mut reranked = candidates.to_vec();
2152 reranked.reverse();
2154 Ok(reranked)
2155 }
2156 }
2157}