1use super::{
6 algorithms::PageRankConfig, entity::EntityExtractionConfig, query_expansion::ExpansionConfig,
7 storage::GraphStorageConfig,
8};
9use serde::{Deserialize, Serialize};
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct GraphConfig {
14 pub entity_extraction: EntityExtractionConfig,
16
17 pub storage: GraphStorageConfig,
19
20 pub query_expansion: ExpansionConfig,
22
23 pub algorithms: AlgorithmConfig,
25
26 pub performance: PerformanceConfig,
28
29 pub features: FeatureFlags,
31}
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct AlgorithmConfig {
36 pub pagerank: PageRankConfig,
38
39 pub traversal: TraversalConfig,
41
42 pub similarity: SimilarityConfig,
44
45 pub pathfinding: PathFindingConfig,
47}
48
49#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct TraversalConfig {
52 pub max_depth: usize,
54
55 pub max_nodes: usize,
57
58 pub max_distance: f32,
60
61 pub enable_early_termination: bool,
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct SimilarityConfig {
68 pub default_metric: SimilarityMetric,
70
71 pub similarity_threshold: f32,
73
74 pub enable_embedding_similarity: bool,
76
77 pub enable_structural_similarity: bool,
79
80 pub similarity_weights: SimilarityWeights,
82}
83
84#[derive(Debug, Clone, Serialize, Deserialize)]
86pub struct PathFindingConfig {
87 pub max_path_length: usize,
89
90 pub max_paths: usize,
92
93 pub min_path_score: f32,
95
96 pub enable_bidirectional_search: bool,
98
99 pub scoring_method: PathScoringMethod,
101}
102
103#[derive(Debug, Clone, Serialize, Deserialize)]
105pub struct PerformanceConfig {
106 pub enable_parallel_processing: bool,
108
109 pub num_workers: usize,
111
112 pub batch_size: usize,
114
115 pub cache_limits: CacheLimits,
117
118 pub memory_limits: MemoryLimits,
120
121 pub timeouts: TimeoutConfig,
123}
124
125#[derive(Debug, Clone, Serialize, Deserialize)]
127pub struct FeatureFlags {
128 pub entity_extraction: bool,
130
131 pub relationship_extraction: bool,
133
134 pub query_expansion: bool,
136
137 pub pagerank_scoring: bool,
139
140 pub path_based_retrieval: bool,
142
143 pub result_diversification: bool,
145
146 pub semantic_search: bool,
148
149 pub graph_reranking: bool,
151
152 pub incremental_updates: bool,
154
155 pub distributed_processing: bool,
157}
158
159#[derive(Debug, Clone, Serialize, Deserialize)]
161pub enum SimilarityMetric {
162 Cosine,
164
165 Euclidean,
167
168 Jaccard,
170
171 Dice,
173
174 Custom(String),
176}
177
178#[derive(Debug, Clone, Serialize, Deserialize)]
180pub struct SimilarityWeights {
181 pub content: f32,
183
184 pub structural: f32,
186
187 pub semantic: f32,
189
190 pub temporal: f32,
192
193 pub metadata: f32,
195}
196
197#[derive(Debug, Clone, Serialize, Deserialize)]
199pub enum PathScoringMethod {
200 Length,
202
203 EdgeWeight,
205
206 PageRank,
208
209 Combined(Vec<PathScoringFactor>),
211}
212
213#[derive(Debug, Clone, Serialize, Deserialize)]
215pub struct PathScoringFactor {
216 pub factor_type: PathFactorType,
218
219 pub weight: f32,
221}
222
223#[derive(Debug, Clone, Serialize, Deserialize)]
225pub enum PathFactorType {
226 Length,
228
229 AverageEdgeWeight,
231
232 MinEdgeWeight,
234
235 NodePageRank,
237
238 SemanticCoherence,
240}
241
242#[derive(Debug, Clone, Serialize, Deserialize)]
244pub struct CacheLimits {
245 pub max_cached_queries: usize,
247
248 pub max_cached_pagerank: usize,
250
251 pub max_cached_embeddings: usize,
253
254 pub max_cached_paths: usize,
256
257 pub cache_ttl_seconds: u64,
259}
260
261#[derive(Debug, Clone, Serialize, Deserialize)]
263pub struct MemoryLimits {
264 pub max_graph_size_mb: usize,
266
267 pub max_nodes: usize,
269
270 pub max_edges: usize,
272
273 pub cleanup_threshold_mb: usize,
275}
276
277#[derive(Debug, Clone, Serialize, Deserialize)]
279pub struct TimeoutConfig {
280 pub query_timeout_seconds: u64,
282
283 pub extraction_timeout_seconds: u64,
285
286 pub traversal_timeout_seconds: u64,
288
289 pub pagerank_timeout_seconds: u64,
291}
292
293impl Default for GraphConfig {
295 fn default() -> Self {
296 Self {
297 entity_extraction: EntityExtractionConfig::default(),
298 storage: GraphStorageConfig::default(),
299 query_expansion: ExpansionConfig::default(),
300 algorithms: AlgorithmConfig::default(),
301 performance: PerformanceConfig::default(),
302 features: FeatureFlags::default(),
303 }
304 }
305}
306
307impl Default for AlgorithmConfig {
308 fn default() -> Self {
309 Self {
310 pagerank: PageRankConfig::default(),
311 traversal: TraversalConfig::default(),
312 similarity: SimilarityConfig::default(),
313 pathfinding: PathFindingConfig::default(),
314 }
315 }
316}
317
318impl Default for TraversalConfig {
319 fn default() -> Self {
320 Self {
321 max_depth: 5,
322 max_nodes: 1000,
323 max_distance: 10.0,
324 enable_early_termination: true,
325 }
326 }
327}
328
329impl Default for SimilarityConfig {
330 fn default() -> Self {
331 Self {
332 default_metric: SimilarityMetric::Cosine,
333 similarity_threshold: 0.7,
334 enable_embedding_similarity: true,
335 enable_structural_similarity: true,
336 similarity_weights: SimilarityWeights::default(),
337 }
338 }
339}
340
341impl Default for SimilarityWeights {
342 fn default() -> Self {
343 Self {
344 content: 0.4,
345 structural: 0.2,
346 semantic: 0.3,
347 temporal: 0.05,
348 metadata: 0.05,
349 }
350 }
351}
352
353impl Default for PathFindingConfig {
354 fn default() -> Self {
355 Self {
356 max_path_length: 6,
357 max_paths: 10,
358 min_path_score: 0.1,
359 enable_bidirectional_search: true,
360 scoring_method: PathScoringMethod::Combined(vec![
361 PathScoringFactor {
362 factor_type: PathFactorType::Length,
363 weight: 0.3,
364 },
365 PathScoringFactor {
366 factor_type: PathFactorType::AverageEdgeWeight,
367 weight: 0.4,
368 },
369 PathScoringFactor {
370 factor_type: PathFactorType::NodePageRank,
371 weight: 0.3,
372 },
373 ]),
374 }
375 }
376}
377
378impl Default for PerformanceConfig {
379 fn default() -> Self {
380 Self {
381 enable_parallel_processing: true,
382 num_workers: num_cpus::get(),
383 batch_size: 100,
384 cache_limits: CacheLimits::default(),
385 memory_limits: MemoryLimits::default(),
386 timeouts: TimeoutConfig::default(),
387 }
388 }
389}
390
391impl Default for FeatureFlags {
392 fn default() -> Self {
393 Self {
394 entity_extraction: true,
395 relationship_extraction: true,
396 query_expansion: true,
397 pagerank_scoring: true,
398 path_based_retrieval: true,
399 result_diversification: true,
400 semantic_search: true,
401 graph_reranking: true,
402 incremental_updates: false,
403 distributed_processing: false,
404 }
405 }
406}
407
408impl Default for CacheLimits {
409 fn default() -> Self {
410 Self {
411 max_cached_queries: 1000,
412 max_cached_pagerank: 1,
413 max_cached_embeddings: 10000,
414 max_cached_paths: 5000,
415 cache_ttl_seconds: 3600, }
417 }
418}
419
420impl Default for MemoryLimits {
421 fn default() -> Self {
422 Self {
423 max_graph_size_mb: 1024, max_nodes: 1_000_000,
425 max_edges: 5_000_000,
426 cleanup_threshold_mb: 800,
427 }
428 }
429}
430
431impl Default for TimeoutConfig {
432 fn default() -> Self {
433 Self {
434 query_timeout_seconds: 30,
435 extraction_timeout_seconds: 300, traversal_timeout_seconds: 60,
437 pagerank_timeout_seconds: 600, }
439 }
440}
441
442pub struct GraphConfigBuilder {
444 config: GraphConfig,
445}
446
447impl GraphConfigBuilder {
448 pub fn new() -> Self {
450 Self {
451 config: GraphConfig::default(),
452 }
453 }
454
455 pub fn with_entity_extraction(mut self, enabled: bool) -> Self {
457 self.config.features.entity_extraction = enabled;
458 self
459 }
460
461 pub fn with_entity_confidence_threshold(mut self, threshold: f32) -> Self {
463 self.config.entity_extraction.min_confidence = threshold;
464 self
465 }
466
467 pub fn with_query_expansion(mut self, enabled: bool) -> Self {
469 self.config.features.query_expansion = enabled;
470 self
471 }
472
473 pub fn with_max_expansion_terms(mut self, max_terms: usize) -> Self {
475 self.config.query_expansion.max_expansion_terms = max_terms;
476 self
477 }
478
479 pub fn with_pagerank_scoring(mut self, enabled: bool) -> Self {
481 self.config.features.pagerank_scoring = enabled;
482 self
483 }
484
485 pub fn with_pagerank_damping_factor(mut self, damping_factor: f32) -> Self {
487 self.config.algorithms.pagerank.damping_factor = damping_factor;
488 self
489 }
490
491 pub fn with_traversal_limits(mut self, max_depth: usize, max_nodes: usize) -> Self {
493 self.config.algorithms.traversal.max_depth = max_depth;
494 self.config.algorithms.traversal.max_nodes = max_nodes;
495 self
496 }
497
498 pub fn with_similarity_threshold(mut self, threshold: f32) -> Self {
500 self.config.algorithms.similarity.similarity_threshold = threshold;
501 self
502 }
503
504 pub fn with_parallel_processing(mut self, enabled: bool) -> Self {
506 self.config.performance.enable_parallel_processing = enabled;
507 self
508 }
509
510 pub fn with_num_workers(mut self, num_workers: usize) -> Self {
512 self.config.performance.num_workers = num_workers;
513 self
514 }
515
516 pub fn with_batch_size(mut self, batch_size: usize) -> Self {
518 self.config.performance.batch_size = batch_size;
519 self
520 }
521
522 pub fn with_memory_limits(
524 mut self,
525 max_graph_size_mb: usize,
526 max_nodes: usize,
527 max_edges: usize,
528 ) -> Self {
529 self.config.performance.memory_limits.max_graph_size_mb = max_graph_size_mb;
530 self.config.performance.memory_limits.max_nodes = max_nodes;
531 self.config.performance.memory_limits.max_edges = max_edges;
532 self
533 }
534
535 pub fn with_query_timeout(mut self, timeout_seconds: u64) -> Self {
537 self.config.performance.timeouts.query_timeout_seconds = timeout_seconds;
538 self
539 }
540
541 pub fn with_all_features(mut self) -> Self {
543 self.config.features = FeatureFlags {
544 entity_extraction: true,
545 relationship_extraction: true,
546 query_expansion: true,
547 pagerank_scoring: true,
548 path_based_retrieval: true,
549 result_diversification: true,
550 semantic_search: true,
551 graph_reranking: true,
552 incremental_updates: true,
553 distributed_processing: false, };
555 self
556 }
557
558 pub fn with_minimal_features(mut self) -> Self {
560 self.config.features = FeatureFlags {
561 entity_extraction: true,
562 relationship_extraction: false,
563 query_expansion: true,
564 pagerank_scoring: false,
565 path_based_retrieval: false,
566 result_diversification: false,
567 semantic_search: true,
568 graph_reranking: false,
569 incremental_updates: false,
570 distributed_processing: false,
571 };
572 self
573 }
574
575 pub fn build(self) -> GraphConfig {
577 self.config
578 }
579}
580
581impl Default for GraphConfigBuilder {
582 fn default() -> Self {
583 Self::new()
584 }
585}
586
587impl GraphConfig {
589 pub fn validate(&self) -> Result<Vec<String>, Vec<String>> {
591 let mut warnings = Vec::new();
592 let mut errors = Vec::new();
593
594 if self.features.entity_extraction {
596 if self.entity_extraction.min_confidence < 0.0
597 || self.entity_extraction.min_confidence > 1.0
598 {
599 errors.push("Entity extraction confidence must be between 0.0 and 1.0".to_string());
600 }
601
602 if self.entity_extraction.max_entity_length == 0 {
603 errors.push("Maximum entity length must be greater than 0".to_string());
604 }
605 }
606
607 if self.features.query_expansion {
609 if self.query_expansion.max_expansion_terms == 0 {
610 warnings.push(
611 "Maximum expansion terms is 0, query expansion will be ineffective".to_string(),
612 );
613 }
614 }
615
616 if self.algorithms.pagerank.damping_factor < 0.0
618 || self.algorithms.pagerank.damping_factor > 1.0
619 {
620 errors.push("PageRank damping factor must be between 0.0 and 1.0".to_string());
621 }
622
623 if self.algorithms.traversal.max_depth == 0 {
624 errors.push("Maximum traversal depth must be greater than 0".to_string());
625 }
626
627 if self.algorithms.similarity.similarity_threshold < 0.0
628 || self.algorithms.similarity.similarity_threshold > 1.0
629 {
630 errors.push("Similarity threshold must be between 0.0 and 1.0".to_string());
631 }
632
633 if self.performance.num_workers == 0 {
635 errors.push("Number of workers must be greater than 0".to_string());
636 }
637
638 if self.performance.batch_size == 0 {
639 errors.push("Batch size must be greater than 0".to_string());
640 }
641
642 if self.performance.memory_limits.max_nodes == 0 {
644 errors.push("Maximum number of nodes must be greater than 0".to_string());
645 }
646
647 if self.performance.memory_limits.max_edges == 0 {
648 errors.push("Maximum number of edges must be greater than 0".to_string());
649 }
650
651 if !self.features.entity_extraction && self.features.relationship_extraction {
653 warnings.push(
654 "Relationship extraction requires entity extraction to be enabled".to_string(),
655 );
656 }
657
658 if !self.features.pagerank_scoring
659 && self.algorithms.pathfinding.scoring_method.uses_pagerank()
660 {
661 warnings
662 .push("Path scoring uses PageRank but PageRank scoring is disabled".to_string());
663 }
664
665 if errors.is_empty() {
666 Ok(warnings)
667 } else {
668 Err(errors)
669 }
670 }
671}
672
673impl PathScoringMethod {
674 pub fn uses_pagerank(&self) -> bool {
676 match self {
677 PathScoringMethod::PageRank => true,
678 PathScoringMethod::Combined(factors) => factors
679 .iter()
680 .any(|f| matches!(f.factor_type, PathFactorType::NodePageRank)),
681 _ => false,
682 }
683 }
684}
685
686#[cfg(test)]
687mod tests {
688 use super::*;
689
690 #[test]
691 fn test_default_config() {
692 let config = GraphConfig::default();
693
694 assert!(config.features.entity_extraction);
696 assert!(config.features.query_expansion);
697 assert!(config.features.pagerank_scoring);
698
699 assert!(config.algorithms.pagerank.damping_factor > 0.0);
701 assert!(config.algorithms.pagerank.damping_factor < 1.0);
702 assert!(config.algorithms.traversal.max_depth > 0);
703 assert!(config.performance.batch_size > 0);
704 }
705
706 #[test]
707 fn test_config_builder() {
708 let config = GraphConfigBuilder::new()
709 .with_entity_extraction(true)
710 .with_entity_confidence_threshold(0.8)
711 .with_query_expansion(true)
712 .with_max_expansion_terms(15)
713 .with_pagerank_scoring(true)
714 .with_pagerank_damping_factor(0.9)
715 .with_parallel_processing(true)
716 .with_num_workers(4)
717 .with_batch_size(50)
718 .build();
719
720 assert!(config.features.entity_extraction);
721 assert_eq!(config.entity_extraction.min_confidence, 0.8);
722 assert!(config.features.query_expansion);
723 assert_eq!(config.query_expansion.max_expansion_terms, 15);
724 assert!(config.features.pagerank_scoring);
725 assert_eq!(config.algorithms.pagerank.damping_factor, 0.9);
726 assert!(config.performance.enable_parallel_processing);
727 assert_eq!(config.performance.num_workers, 4);
728 assert_eq!(config.performance.batch_size, 50);
729 }
730
731 #[test]
732 fn test_config_validation() {
733 let mut config = GraphConfig::default();
734
735 let result = config.validate();
737 assert!(result.is_ok());
738
739 config.algorithms.pagerank.damping_factor = 1.5;
741 let result = config.validate();
742 assert!(result.is_err());
743
744 config.algorithms.pagerank.damping_factor = 0.85;
746 config.performance.num_workers = 0;
747 let result = config.validate();
748 assert!(result.is_err());
749 }
750
751 #[test]
752 fn test_minimal_and_full_features() {
753 let minimal_config = GraphConfigBuilder::new().with_minimal_features().build();
754
755 assert!(minimal_config.features.entity_extraction);
756 assert!(!minimal_config.features.relationship_extraction);
757 assert!(!minimal_config.features.pagerank_scoring);
758
759 let full_config = GraphConfigBuilder::new().with_all_features().build();
760
761 assert!(full_config.features.entity_extraction);
762 assert!(full_config.features.relationship_extraction);
763 assert!(full_config.features.pagerank_scoring);
764 assert!(full_config.features.incremental_updates);
765 }
766}