1#![allow(dead_code)]
7
8use super::dna_structures::DnaDataStructure;
9use crate::model::Triple;
10use crate::OxirsError;
11use serde::{Deserialize, Serialize};
12use std::time::{Duration, Instant};
13
14pub struct GeneticGraphOptimizer {
16 population: Vec<GraphStructure>,
18 fitness_function: Box<dyn Fn(&GraphStructure) -> f64 + Send + Sync>,
20 mutation_rate: f64,
22 crossover_rate: f64,
24 generations: usize,
26 population_size: usize,
28 elite_size: usize,
30 current_generation: usize,
32 best_fitness: f64,
34 evolution_history: Vec<GenerationStats>,
36 best_individual: Option<GraphStructure>,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct GraphStructure {
43 pub dna: DnaDataStructure,
45 pub indexing_genes: IndexingGenes,
47 pub storage_genes: StorageGenes,
49 pub access_genes: AccessGenes,
51 pub fitness: f64,
53 pub age: usize,
55 pub mutations: Vec<MutationType>,
57}
58
59#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct IndexingGenes {
62 pub primary_index: IndexGene,
64 pub secondary_indexes: Vec<IndexGene>,
66 pub compression: CompressionGene,
68 pub adaptive_triggers: Vec<AdaptiveTrigger>,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct IndexGene {
75 pub index_type: String,
77 pub parameters: Vec<f64>,
79 pub enabled: bool,
81 pub priority: u8,
83}
84
85#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct StorageGenes {
88 pub block_size: u32,
90 pub clustering: ClusteringGene,
92 pub partitioning: PartitioningGene,
94 pub caching: CachingGene,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct ClusteringGene {
101 pub algorithm: String,
103 pub target_size: u32,
105 pub similarity_threshold: f64,
107 pub rebalance_frequency: u32,
109}
110
111#[derive(Debug, Clone, Serialize, Deserialize)]
113pub struct PartitioningGene {
114 pub method: String,
116 pub partition_count: u32,
118 pub load_balance_factor: f64,
120 pub hot_data_threshold: f64,
122}
123
124#[derive(Debug, Clone, Serialize, Deserialize)]
126pub struct CachingGene {
127 pub cache_size_mb: u32,
129 pub eviction_policy: String,
131 pub prefetch_strategy: String,
133 pub write_policy: String,
135}
136
137#[derive(Debug, Clone, Serialize, Deserialize)]
139pub struct CompressionGene {
140 pub algorithm: String,
142 pub level: u8,
144 pub block_size: u32,
146 pub dictionary_size: u32,
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize)]
152pub struct AccessGenes {
153 pub read_patterns: Vec<AccessPattern>,
155 pub write_patterns: Vec<AccessPattern>,
157 pub query_preferences: QueryPreferences,
159 pub concurrency: ConcurrencyGene,
161}
162
163#[derive(Debug, Clone, Serialize, Deserialize)]
165pub struct AccessPattern {
166 pub pattern_id: String,
168 pub frequency: f64,
170 pub optimization: String,
172 pub buffer_size: u32,
174}
175
176#[derive(Debug, Clone, Serialize, Deserialize)]
178pub struct QueryPreferences {
179 pub join_algorithm: String,
181 pub index_selection: String,
183 pub result_caching: String,
185 pub parallel_execution: bool,
187}
188
189#[derive(Debug, Clone, Serialize, Deserialize)]
191pub struct ConcurrencyGene {
192 pub max_readers: u32,
194 pub max_writers: u32,
196 pub lock_timeout_ms: u32,
198 pub thread_pool_size: u32,
200}
201
202#[derive(Debug, Clone, Serialize, Deserialize)]
204pub struct AdaptiveTrigger {
205 pub condition: String,
207 pub threshold: f64,
209 pub action: String,
211 pub cooldown_seconds: u32,
213}
214
215#[derive(Debug, Clone, Serialize, Deserialize)]
217pub enum MutationType {
218 IndexMutation {
220 index_id: String,
221 old_value: String,
222 new_value: String,
223 },
224 StorageMutation {
226 parameter: String,
227 old_value: f64,
228 new_value: f64,
229 },
230 AccessMutation {
232 pattern_id: String,
233 modification: String,
234 },
235 GeneDeletion { gene_type: String, gene_id: String },
237 GeneDuplication { gene_type: String, gene_id: String },
239 ChromosomalRearrangement {
241 old_order: Vec<String>,
242 new_order: Vec<String>,
243 },
244}
245
246#[derive(Debug, Clone)]
248pub struct GenerationStats {
249 pub generation: usize,
251 pub best_fitness: f64,
253 pub average_fitness: f64,
255 pub worst_fitness: f64,
257 pub diversity: f64,
259 pub mutations: usize,
261 pub crossovers: usize,
263 pub evolution_time: Duration,
265}
266
267impl GeneticGraphOptimizer {
268 pub fn best_fitness(&self) -> f64 {
270 self.best_fitness
271 }
272
273 pub fn best_individual(&self) -> Option<&GraphStructure> {
275 self.best_individual.as_ref()
276 }
277
278 fn update_best_individual(&mut self, individual: &GraphStructure) {
280 if individual.fitness > self.best_fitness {
281 self.best_fitness = individual.fitness;
282 self.best_individual = Some(individual.clone());
283 }
284 }
285 pub fn new(
287 population_size: usize,
288 fitness_function: Box<dyn Fn(&GraphStructure) -> f64 + Send + Sync>,
289 ) -> Self {
290 Self {
291 population: Vec::new(),
292 fitness_function,
293 mutation_rate: 0.1,
294 crossover_rate: 0.8,
295 generations: 100,
296 population_size,
297 elite_size: population_size / 10,
298 current_generation: 0,
299 best_fitness: 0.0,
300 evolution_history: Vec::new(),
301 best_individual: None,
302 }
303 }
304
305 pub fn set_parameters(&mut self, mutation_rate: f64, crossover_rate: f64, generations: usize) {
307 self.mutation_rate = mutation_rate;
308 self.crossover_rate = crossover_rate;
309 self.generations = generations;
310 }
311
312 pub fn initialize_population(&mut self, base_triples: &[Triple]) -> Result<(), OxirsError> {
314 self.population.clear();
315
316 for _ in 0..self.population_size {
317 let mut structure = self.create_random_structure(base_triples)?;
318 structure.fitness = (self.fitness_function)(&structure);
319 self.population.push(structure);
320 }
321
322 self.population.sort_by(|a, b| {
324 b.fitness
325 .partial_cmp(&a.fitness)
326 .unwrap_or(std::cmp::Ordering::Equal)
327 });
328 self.best_fitness = self.population[0].fitness;
329
330 Ok(())
331 }
332
333 fn create_random_structure(
335 &self,
336 base_triples: &[Triple],
337 ) -> Result<GraphStructure, OxirsError> {
338 let mut dna = DnaDataStructure::new();
339
340 for triple in base_triples {
342 dna.encode_triple(triple)?;
343 }
344
345 let indexing_genes = self.generate_random_indexing_genes();
347 let storage_genes = self.generate_random_storage_genes();
348 let access_genes = self.generate_random_access_genes();
349
350 Ok(GraphStructure {
351 dna,
352 indexing_genes,
353 storage_genes,
354 access_genes,
355 fitness: 0.0,
356 age: 0,
357 mutations: Vec::new(),
358 })
359 }
360
361 fn generate_random_indexing_genes(&self) -> IndexingGenes {
363 IndexingGenes {
364 primary_index: IndexGene {
365 index_type: "SPO".to_string(),
366 parameters: vec![fastrand::f64(), fastrand::f64(), fastrand::f64()],
367 enabled: true,
368 priority: 10,
369 },
370 secondary_indexes: vec![
371 IndexGene {
372 index_type: "POS".to_string(),
373 parameters: vec![fastrand::f64(), fastrand::f64()],
374 enabled: fastrand::bool(),
375 priority: fastrand::u8(..10),
376 },
377 IndexGene {
378 index_type: "OSP".to_string(),
379 parameters: vec![fastrand::f64(), fastrand::f64()],
380 enabled: fastrand::bool(),
381 priority: fastrand::u8(..10),
382 },
383 ],
384 compression: CompressionGene {
385 algorithm: "LZ4".to_string(),
386 level: fastrand::u8(1..10),
387 block_size: 4096 * fastrand::u32(1..17),
388 dictionary_size: 1024 * fastrand::u32(1..65),
389 },
390 adaptive_triggers: vec![AdaptiveTrigger {
391 condition: "high_load".to_string(),
392 threshold: 0.8 + fastrand::f64() * 0.2,
393 action: "create_index".to_string(),
394 cooldown_seconds: 60 + fastrand::u32(..300),
395 }],
396 }
397 }
398
399 fn generate_random_storage_genes(&self) -> StorageGenes {
401 StorageGenes {
402 block_size: 4096 * fastrand::u32(1..33),
403 clustering: ClusteringGene {
404 algorithm: "k-means".to_string(),
405 target_size: 100 + fastrand::u32(..900),
406 similarity_threshold: 0.1 + fastrand::f64() * 0.8,
407 rebalance_frequency: 1000 + fastrand::u32(..9000),
408 },
409 partitioning: PartitioningGene {
410 method: "hash".to_string(),
411 partition_count: 4 + fastrand::u32(..28),
412 load_balance_factor: 0.1 + fastrand::f64() * 0.4,
413 hot_data_threshold: 0.8 + fastrand::f64() * 0.2,
414 },
415 caching: CachingGene {
416 cache_size_mb: 64 + fastrand::u32(..1936),
417 eviction_policy: "LRU".to_string(),
418 prefetch_strategy: "sequential".to_string(),
419 write_policy: "write-back".to_string(),
420 },
421 }
422 }
423
424 fn generate_random_access_genes(&self) -> AccessGenes {
426 AccessGenes {
427 read_patterns: vec![
428 AccessPattern {
429 pattern_id: "sequential".to_string(),
430 frequency: fastrand::f64(),
431 optimization: "prefetch".to_string(),
432 buffer_size: 1024 + fastrand::u32(..7168),
433 },
434 AccessPattern {
435 pattern_id: "random".to_string(),
436 frequency: fastrand::f64(),
437 optimization: "cache".to_string(),
438 buffer_size: 512 + fastrand::u32(..3584),
439 },
440 ],
441 write_patterns: vec![AccessPattern {
442 pattern_id: "batch".to_string(),
443 frequency: fastrand::f64(),
444 optimization: "buffer".to_string(),
445 buffer_size: 2048 + fastrand::u32(..14336),
446 }],
447 query_preferences: QueryPreferences {
448 join_algorithm: "hash_join".to_string(),
449 index_selection: "cost_based".to_string(),
450 result_caching: "enabled".to_string(),
451 parallel_execution: fastrand::bool(),
452 },
453 concurrency: ConcurrencyGene {
454 max_readers: 10 + fastrand::u32(..90),
455 max_writers: 1 + fastrand::u32(..9),
456 lock_timeout_ms: 100 + fastrand::u32(..1900),
457 thread_pool_size: 4 + fastrand::u32(..28),
458 },
459 }
460 }
461
462 pub fn evolve_generation(&mut self) -> Result<GenerationStats, OxirsError> {
464 let start_time = Instant::now();
465 let mut mutations = 0;
466 let mut crossovers = 0;
467
468 let mut new_population = Vec::new();
470
471 for i in 0..self.elite_size {
473 let mut elite = self.population[i].clone();
474 elite.age += 1;
475 new_population.push(elite);
476 }
477
478 while new_population.len() < self.population_size {
480 let parent1 = self.tournament_selection();
482 let parent2 = self.tournament_selection();
483
484 let mut offspring = if fastrand::f64() < self.crossover_rate {
486 crossovers += 1;
487 self.crossover(&parent1, &parent2)?
488 } else {
489 parent1.clone()
490 };
491
492 if fastrand::f64() < self.mutation_rate {
494 mutations += 1;
495 self.mutate(&mut offspring)?;
496 }
497
498 offspring.fitness = (self.fitness_function)(&offspring);
500 offspring.age = 0;
501
502 new_population.push(offspring);
503 }
504
505 self.population = new_population;
507
508 self.population.sort_by(|a, b| {
510 b.fitness
511 .partial_cmp(&a.fitness)
512 .unwrap_or(std::cmp::Ordering::Equal)
513 });
514
515 if self.population[0].fitness > self.best_fitness {
517 self.best_fitness = self.population[0].fitness;
518 }
519
520 let stats = GenerationStats {
522 generation: self.current_generation,
523 best_fitness: self.population[0].fitness,
524 average_fitness: self.population.iter().map(|s| s.fitness).sum::<f64>()
525 / self.population.len() as f64,
526 worst_fitness: self
527 .population
528 .last()
529 .expect("collection validated to be non-empty")
530 .fitness,
531 diversity: self.calculate_diversity(),
532 mutations,
533 crossovers,
534 evolution_time: start_time.elapsed(),
535 };
536
537 self.evolution_history.push(stats.clone());
538 self.current_generation += 1;
539
540 Ok(stats)
541 }
542
543 fn tournament_selection(&self) -> GraphStructure {
545 let tournament_size = 3;
546 let mut best_fitness = -1.0;
547 let mut best_individual = self.population[0].clone();
548
549 for _ in 0..tournament_size {
550 let candidate = &self.population[fastrand::usize(..self.population.len())];
551 if candidate.fitness > best_fitness {
552 best_fitness = candidate.fitness;
553 best_individual = candidate.clone();
554 }
555 }
556
557 best_individual
558 }
559
560 fn crossover(
562 &self,
563 parent1: &GraphStructure,
564 parent2: &GraphStructure,
565 ) -> Result<GraphStructure, OxirsError> {
566 let mut offspring = parent1.clone();
567
568 if fastrand::bool() {
570 offspring.indexing_genes.compression = parent2.indexing_genes.compression.clone();
571 }
572
573 if fastrand::bool() {
574 offspring.indexing_genes.secondary_indexes =
575 parent2.indexing_genes.secondary_indexes.clone();
576 }
577
578 if fastrand::bool() {
580 offspring.storage_genes.clustering = parent2.storage_genes.clustering.clone();
581 }
582
583 if fastrand::bool() {
584 offspring.storage_genes.caching = parent2.storage_genes.caching.clone();
585 }
586
587 if fastrand::bool() {
589 offspring.access_genes.query_preferences =
590 parent2.access_genes.query_preferences.clone();
591 }
592
593 Ok(offspring)
594 }
595
596 fn mutate(&self, structure: &mut GraphStructure) -> Result<(), OxirsError> {
598 let mutation_types = ["storage_parameter", "compression_level", "cache_size"];
599
600 let mutation_type = &mutation_types[fastrand::usize(..mutation_types.len())];
601
602 match *mutation_type {
603 "storage_parameter" => {
604 let old_value = structure.storage_genes.block_size as f64;
605 structure.storage_genes.block_size = 4096 * fastrand::u32(1..33);
606
607 structure.mutations.push(MutationType::StorageMutation {
608 parameter: "block_size".to_string(),
609 old_value,
610 new_value: structure.storage_genes.block_size as f64,
611 });
612 }
613 "compression_level" => {
614 let old_level = structure.indexing_genes.compression.level;
615 structure.indexing_genes.compression.level = fastrand::u8(1..10);
616
617 structure.mutations.push(MutationType::StorageMutation {
618 parameter: "compression_level".to_string(),
619 old_value: old_level as f64,
620 new_value: structure.indexing_genes.compression.level as f64,
621 });
622 }
623 "cache_size" => {
624 let old_size = structure.storage_genes.caching.cache_size_mb;
625 structure.storage_genes.caching.cache_size_mb = 64 + fastrand::u32(..1936);
626
627 structure.mutations.push(MutationType::StorageMutation {
628 parameter: "cache_size_mb".to_string(),
629 old_value: old_size as f64,
630 new_value: structure.storage_genes.caching.cache_size_mb as f64,
631 });
632 }
633 _ => {
634 let old_size = structure.storage_genes.caching.cache_size_mb;
636 structure.storage_genes.caching.cache_size_mb = 64 + fastrand::u32(..1936);
637
638 structure.mutations.push(MutationType::StorageMutation {
639 parameter: "cache_size_mb".to_string(),
640 old_value: old_size as f64,
641 new_value: structure.storage_genes.caching.cache_size_mb as f64,
642 });
643 }
644 }
645
646 Ok(())
647 }
648
649 fn calculate_diversity(&self) -> f64 {
651 let avg_fitness =
653 self.population.iter().map(|s| s.fitness).sum::<f64>() / self.population.len() as f64;
654 let variance = self
655 .population
656 .iter()
657 .map(|s| (s.fitness - avg_fitness).powi(2))
658 .sum::<f64>()
659 / self.population.len() as f64;
660
661 variance.sqrt()
662 }
663
664 pub fn evolve(&mut self) -> Result<GraphStructure, OxirsError> {
666 for generation in 0..self.generations {
667 let stats = self.evolve_generation()?;
668
669 if generation % 10 == 0 {
671 println!(
672 "Generation {}: Best={:.4}, Avg={:.4}, Diversity={:.4}",
673 stats.generation, stats.best_fitness, stats.average_fitness, stats.diversity
674 );
675 }
676
677 if generation > 50 && self.evolution_history.len() >= 20 {
679 let recent_best = self
680 .evolution_history
681 .iter()
682 .rev()
683 .take(20)
684 .map(|s| s.best_fitness)
685 .fold(0.0, f64::max);
686
687 if (recent_best - self.best_fitness).abs() < 0.001 {
688 println!("Early termination: No improvement in 20 generations");
689 break;
690 }
691 }
692 }
693
694 Ok(self.population[0].clone())
695 }
696
697 pub fn get_best_structure(&self) -> Option<&GraphStructure> {
699 self.population.first()
700 }
701
702 pub fn get_evolution_history(&self) -> &[GenerationStats] {
704 &self.evolution_history
705 }
706}
707
708pub fn default_fitness_function(structure: &GraphStructure) -> f64 {
710 let mut fitness = 0.0;
711
712 fitness += structure.indexing_genes.secondary_indexes.len() as f64 * 0.1;
714
715 let cache_mb = structure.storage_genes.caching.cache_size_mb as f64;
717 fitness += if (128.0..=512.0).contains(&cache_mb) {
718 0.2
719 } else {
720 0.0
721 };
722
723 let readers = structure.access_genes.concurrency.max_readers as f64;
725 let writers = structure.access_genes.concurrency.max_writers as f64;
726 fitness += if readers >= 10.0 && writers >= 2.0 && readers / writers <= 50.0 {
727 0.2
728 } else {
729 0.0
730 };
731
732 let compression_level = structure.indexing_genes.compression.level as f64;
734 fitness += if (3.0..=7.0).contains(&compression_level) {
735 0.1
736 } else {
737 0.0
738 };
739
740 let block_size = structure.storage_genes.block_size as f64;
742 fitness += if (8192.0..=65536.0).contains(&block_size) {
743 0.1
744 } else {
745 0.0
746 };
747
748 fitness += fastrand::f64() * 0.1;
750
751 fitness
752}
753
754#[cfg(test)]
755mod tests {
756 use super::*;
757 use crate::model::{Literal, NamedNode};
758
759 #[test]
760 fn test_genetic_optimizer_creation() {
761 let fitness_fn = Box::new(default_fitness_function);
762 let optimizer = GeneticGraphOptimizer::new(20, fitness_fn);
763
764 assert_eq!(optimizer.population_size, 20);
765 assert_eq!(optimizer.elite_size, 2);
766 }
767
768 #[test]
769 fn test_random_structure_generation() {
770 let fitness_fn = Box::new(default_fitness_function);
771 let optimizer = GeneticGraphOptimizer::new(10, fitness_fn);
772
773 let triples = vec![Triple::new(
774 NamedNode::new("http://example.org/subject").expect("valid IRI"),
775 NamedNode::new("http://example.org/predicate").expect("valid IRI"),
776 Literal::new("object"),
777 )];
778
779 let structure = optimizer.create_random_structure(&triples);
780 assert!(structure.is_ok());
781
782 let structure = structure.expect("structure should be available");
783 assert!(structure.fitness >= 0.0);
784 assert!(!structure.indexing_genes.secondary_indexes.is_empty());
785 }
786
787 #[test]
788 fn test_mutation() {
789 let fitness_fn = Box::new(default_fitness_function);
790 let optimizer = GeneticGraphOptimizer::new(10, fitness_fn);
791
792 let triples = vec![Triple::new(
793 NamedNode::new("http://example.org/subject").expect("valid IRI"),
794 NamedNode::new("http://example.org/predicate").expect("valid IRI"),
795 Literal::new("object"),
796 )];
797
798 let mut structure = optimizer
799 .create_random_structure(&triples)
800 .expect("operation should succeed");
801 let _old_block_size = structure.storage_genes.block_size;
802
803 optimizer
804 .mutate(&mut structure)
805 .expect("operation should succeed");
806
807 assert!(!structure.mutations.is_empty());
809 }
810
811 #[test]
812 fn test_fitness_function() {
813 let fitness_fn = Box::new(default_fitness_function);
814 let optimizer = GeneticGraphOptimizer::new(10, fitness_fn);
815
816 let triples = vec![Triple::new(
817 NamedNode::new("http://example.org/subject").expect("valid IRI"),
818 NamedNode::new("http://example.org/predicate").expect("valid IRI"),
819 Literal::new("object"),
820 )];
821
822 let structure = optimizer
823 .create_random_structure(&triples)
824 .expect("operation should succeed");
825 let fitness = default_fitness_function(&structure);
826
827 assert!(fitness >= 0.0);
828 assert!(fitness <= 1.0); }
830}