#![allow(dead_code)]
use super::dna_structures::DnaDataStructure;
use crate::model::Triple;
use crate::OxirsError;
use serde::{Deserialize, Serialize};
use std::time::{Duration, Instant};
pub struct GeneticGraphOptimizer {
population: Vec<GraphStructure>,
fitness_function: Box<dyn Fn(&GraphStructure) -> f64 + Send + Sync>,
mutation_rate: f64,
crossover_rate: f64,
generations: usize,
population_size: usize,
elite_size: usize,
current_generation: usize,
best_fitness: f64,
evolution_history: Vec<GenerationStats>,
best_individual: Option<GraphStructure>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GraphStructure {
pub dna: DnaDataStructure,
pub indexing_genes: IndexingGenes,
pub storage_genes: StorageGenes,
pub access_genes: AccessGenes,
pub fitness: f64,
pub age: usize,
pub mutations: Vec<MutationType>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IndexingGenes {
pub primary_index: IndexGene,
pub secondary_indexes: Vec<IndexGene>,
pub compression: CompressionGene,
pub adaptive_triggers: Vec<AdaptiveTrigger>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IndexGene {
pub index_type: String,
pub parameters: Vec<f64>,
pub enabled: bool,
pub priority: u8,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StorageGenes {
pub block_size: u32,
pub clustering: ClusteringGene,
pub partitioning: PartitioningGene,
pub caching: CachingGene,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ClusteringGene {
pub algorithm: String,
pub target_size: u32,
pub similarity_threshold: f64,
pub rebalance_frequency: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PartitioningGene {
pub method: String,
pub partition_count: u32,
pub load_balance_factor: f64,
pub hot_data_threshold: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CachingGene {
pub cache_size_mb: u32,
pub eviction_policy: String,
pub prefetch_strategy: String,
pub write_policy: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompressionGene {
pub algorithm: String,
pub level: u8,
pub block_size: u32,
pub dictionary_size: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AccessGenes {
pub read_patterns: Vec<AccessPattern>,
pub write_patterns: Vec<AccessPattern>,
pub query_preferences: QueryPreferences,
pub concurrency: ConcurrencyGene,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AccessPattern {
pub pattern_id: String,
pub frequency: f64,
pub optimization: String,
pub buffer_size: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QueryPreferences {
pub join_algorithm: String,
pub index_selection: String,
pub result_caching: String,
pub parallel_execution: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConcurrencyGene {
pub max_readers: u32,
pub max_writers: u32,
pub lock_timeout_ms: u32,
pub thread_pool_size: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AdaptiveTrigger {
pub condition: String,
pub threshold: f64,
pub action: String,
pub cooldown_seconds: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum MutationType {
IndexMutation {
index_id: String,
old_value: String,
new_value: String,
},
StorageMutation {
parameter: String,
old_value: f64,
new_value: f64,
},
AccessMutation {
pattern_id: String,
modification: String,
},
GeneDeletion { gene_type: String, gene_id: String },
GeneDuplication { gene_type: String, gene_id: String },
ChromosomalRearrangement {
old_order: Vec<String>,
new_order: Vec<String>,
},
}
#[derive(Debug, Clone)]
pub struct GenerationStats {
pub generation: usize,
pub best_fitness: f64,
pub average_fitness: f64,
pub worst_fitness: f64,
pub diversity: f64,
pub mutations: usize,
pub crossovers: usize,
pub evolution_time: Duration,
}
impl GeneticGraphOptimizer {
pub fn best_fitness(&self) -> f64 {
self.best_fitness
}
pub fn best_individual(&self) -> Option<&GraphStructure> {
self.best_individual.as_ref()
}
fn update_best_individual(&mut self, individual: &GraphStructure) {
if individual.fitness > self.best_fitness {
self.best_fitness = individual.fitness;
self.best_individual = Some(individual.clone());
}
}
pub fn new(
population_size: usize,
fitness_function: Box<dyn Fn(&GraphStructure) -> f64 + Send + Sync>,
) -> Self {
Self {
population: Vec::new(),
fitness_function,
mutation_rate: 0.1,
crossover_rate: 0.8,
generations: 100,
population_size,
elite_size: population_size / 10,
current_generation: 0,
best_fitness: 0.0,
evolution_history: Vec::new(),
best_individual: None,
}
}
pub fn set_parameters(&mut self, mutation_rate: f64, crossover_rate: f64, generations: usize) {
self.mutation_rate = mutation_rate;
self.crossover_rate = crossover_rate;
self.generations = generations;
}
pub fn initialize_population(&mut self, base_triples: &[Triple]) -> Result<(), OxirsError> {
self.population.clear();
for _ in 0..self.population_size {
let mut structure = self.create_random_structure(base_triples)?;
structure.fitness = (self.fitness_function)(&structure);
self.population.push(structure);
}
self.population.sort_by(|a, b| {
b.fitness
.partial_cmp(&a.fitness)
.unwrap_or(std::cmp::Ordering::Equal)
});
self.best_fitness = self.population[0].fitness;
Ok(())
}
fn create_random_structure(
&self,
base_triples: &[Triple],
) -> Result<GraphStructure, OxirsError> {
let mut dna = DnaDataStructure::new();
for triple in base_triples {
dna.encode_triple(triple)?;
}
let indexing_genes = self.generate_random_indexing_genes();
let storage_genes = self.generate_random_storage_genes();
let access_genes = self.generate_random_access_genes();
Ok(GraphStructure {
dna,
indexing_genes,
storage_genes,
access_genes,
fitness: 0.0,
age: 0,
mutations: Vec::new(),
})
}
fn generate_random_indexing_genes(&self) -> IndexingGenes {
IndexingGenes {
primary_index: IndexGene {
index_type: "SPO".to_string(),
parameters: vec![fastrand::f64(), fastrand::f64(), fastrand::f64()],
enabled: true,
priority: 10,
},
secondary_indexes: vec![
IndexGene {
index_type: "POS".to_string(),
parameters: vec![fastrand::f64(), fastrand::f64()],
enabled: fastrand::bool(),
priority: fastrand::u8(..10),
},
IndexGene {
index_type: "OSP".to_string(),
parameters: vec![fastrand::f64(), fastrand::f64()],
enabled: fastrand::bool(),
priority: fastrand::u8(..10),
},
],
compression: CompressionGene {
algorithm: "LZ4".to_string(),
level: fastrand::u8(1..10),
block_size: 4096 * fastrand::u32(1..17),
dictionary_size: 1024 * fastrand::u32(1..65),
},
adaptive_triggers: vec![AdaptiveTrigger {
condition: "high_load".to_string(),
threshold: 0.8 + fastrand::f64() * 0.2,
action: "create_index".to_string(),
cooldown_seconds: 60 + fastrand::u32(..300),
}],
}
}
fn generate_random_storage_genes(&self) -> StorageGenes {
StorageGenes {
block_size: 4096 * fastrand::u32(1..33),
clustering: ClusteringGene {
algorithm: "k-means".to_string(),
target_size: 100 + fastrand::u32(..900),
similarity_threshold: 0.1 + fastrand::f64() * 0.8,
rebalance_frequency: 1000 + fastrand::u32(..9000),
},
partitioning: PartitioningGene {
method: "hash".to_string(),
partition_count: 4 + fastrand::u32(..28),
load_balance_factor: 0.1 + fastrand::f64() * 0.4,
hot_data_threshold: 0.8 + fastrand::f64() * 0.2,
},
caching: CachingGene {
cache_size_mb: 64 + fastrand::u32(..1936),
eviction_policy: "LRU".to_string(),
prefetch_strategy: "sequential".to_string(),
write_policy: "write-back".to_string(),
},
}
}
fn generate_random_access_genes(&self) -> AccessGenes {
AccessGenes {
read_patterns: vec![
AccessPattern {
pattern_id: "sequential".to_string(),
frequency: fastrand::f64(),
optimization: "prefetch".to_string(),
buffer_size: 1024 + fastrand::u32(..7168),
},
AccessPattern {
pattern_id: "random".to_string(),
frequency: fastrand::f64(),
optimization: "cache".to_string(),
buffer_size: 512 + fastrand::u32(..3584),
},
],
write_patterns: vec![AccessPattern {
pattern_id: "batch".to_string(),
frequency: fastrand::f64(),
optimization: "buffer".to_string(),
buffer_size: 2048 + fastrand::u32(..14336),
}],
query_preferences: QueryPreferences {
join_algorithm: "hash_join".to_string(),
index_selection: "cost_based".to_string(),
result_caching: "enabled".to_string(),
parallel_execution: fastrand::bool(),
},
concurrency: ConcurrencyGene {
max_readers: 10 + fastrand::u32(..90),
max_writers: 1 + fastrand::u32(..9),
lock_timeout_ms: 100 + fastrand::u32(..1900),
thread_pool_size: 4 + fastrand::u32(..28),
},
}
}
pub fn evolve_generation(&mut self) -> Result<GenerationStats, OxirsError> {
let start_time = Instant::now();
let mut mutations = 0;
let mut crossovers = 0;
let mut new_population = Vec::new();
for i in 0..self.elite_size {
let mut elite = self.population[i].clone();
elite.age += 1;
new_population.push(elite);
}
while new_population.len() < self.population_size {
let parent1 = self.tournament_selection();
let parent2 = self.tournament_selection();
let mut offspring = if fastrand::f64() < self.crossover_rate {
crossovers += 1;
self.crossover(&parent1, &parent2)?
} else {
parent1.clone()
};
if fastrand::f64() < self.mutation_rate {
mutations += 1;
self.mutate(&mut offspring)?;
}
offspring.fitness = (self.fitness_function)(&offspring);
offspring.age = 0;
new_population.push(offspring);
}
self.population = new_population;
self.population.sort_by(|a, b| {
b.fitness
.partial_cmp(&a.fitness)
.unwrap_or(std::cmp::Ordering::Equal)
});
if self.population[0].fitness > self.best_fitness {
self.best_fitness = self.population[0].fitness;
}
let stats = GenerationStats {
generation: self.current_generation,
best_fitness: self.population[0].fitness,
average_fitness: self.population.iter().map(|s| s.fitness).sum::<f64>()
/ self.population.len() as f64,
worst_fitness: self
.population
.last()
.expect("collection validated to be non-empty")
.fitness,
diversity: self.calculate_diversity(),
mutations,
crossovers,
evolution_time: start_time.elapsed(),
};
self.evolution_history.push(stats.clone());
self.current_generation += 1;
Ok(stats)
}
fn tournament_selection(&self) -> GraphStructure {
let tournament_size = 3;
let mut best_fitness = -1.0;
let mut best_individual = self.population[0].clone();
for _ in 0..tournament_size {
let candidate = &self.population[fastrand::usize(..self.population.len())];
if candidate.fitness > best_fitness {
best_fitness = candidate.fitness;
best_individual = candidate.clone();
}
}
best_individual
}
fn crossover(
&self,
parent1: &GraphStructure,
parent2: &GraphStructure,
) -> Result<GraphStructure, OxirsError> {
let mut offspring = parent1.clone();
if fastrand::bool() {
offspring.indexing_genes.compression = parent2.indexing_genes.compression.clone();
}
if fastrand::bool() {
offspring.indexing_genes.secondary_indexes =
parent2.indexing_genes.secondary_indexes.clone();
}
if fastrand::bool() {
offspring.storage_genes.clustering = parent2.storage_genes.clustering.clone();
}
if fastrand::bool() {
offspring.storage_genes.caching = parent2.storage_genes.caching.clone();
}
if fastrand::bool() {
offspring.access_genes.query_preferences =
parent2.access_genes.query_preferences.clone();
}
Ok(offspring)
}
fn mutate(&self, structure: &mut GraphStructure) -> Result<(), OxirsError> {
let mutation_types = ["storage_parameter", "compression_level", "cache_size"];
let mutation_type = &mutation_types[fastrand::usize(..mutation_types.len())];
match *mutation_type {
"storage_parameter" => {
let old_value = structure.storage_genes.block_size as f64;
structure.storage_genes.block_size = 4096 * fastrand::u32(1..33);
structure.mutations.push(MutationType::StorageMutation {
parameter: "block_size".to_string(),
old_value,
new_value: structure.storage_genes.block_size as f64,
});
}
"compression_level" => {
let old_level = structure.indexing_genes.compression.level;
structure.indexing_genes.compression.level = fastrand::u8(1..10);
structure.mutations.push(MutationType::StorageMutation {
parameter: "compression_level".to_string(),
old_value: old_level as f64,
new_value: structure.indexing_genes.compression.level as f64,
});
}
"cache_size" => {
let old_size = structure.storage_genes.caching.cache_size_mb;
structure.storage_genes.caching.cache_size_mb = 64 + fastrand::u32(..1936);
structure.mutations.push(MutationType::StorageMutation {
parameter: "cache_size_mb".to_string(),
old_value: old_size as f64,
new_value: structure.storage_genes.caching.cache_size_mb as f64,
});
}
_ => {
let old_size = structure.storage_genes.caching.cache_size_mb;
structure.storage_genes.caching.cache_size_mb = 64 + fastrand::u32(..1936);
structure.mutations.push(MutationType::StorageMutation {
parameter: "cache_size_mb".to_string(),
old_value: old_size as f64,
new_value: structure.storage_genes.caching.cache_size_mb as f64,
});
}
}
Ok(())
}
fn calculate_diversity(&self) -> f64 {
let avg_fitness =
self.population.iter().map(|s| s.fitness).sum::<f64>() / self.population.len() as f64;
let variance = self
.population
.iter()
.map(|s| (s.fitness - avg_fitness).powi(2))
.sum::<f64>()
/ self.population.len() as f64;
variance.sqrt()
}
pub fn evolve(&mut self) -> Result<GraphStructure, OxirsError> {
for generation in 0..self.generations {
let stats = self.evolve_generation()?;
if generation % 10 == 0 {
println!(
"Generation {}: Best={:.4}, Avg={:.4}, Diversity={:.4}",
stats.generation, stats.best_fitness, stats.average_fitness, stats.diversity
);
}
if generation > 50 && self.evolution_history.len() >= 20 {
let recent_best = self
.evolution_history
.iter()
.rev()
.take(20)
.map(|s| s.best_fitness)
.fold(0.0, f64::max);
if (recent_best - self.best_fitness).abs() < 0.001 {
println!("Early termination: No improvement in 20 generations");
break;
}
}
}
Ok(self.population[0].clone())
}
pub fn get_best_structure(&self) -> Option<&GraphStructure> {
self.population.first()
}
pub fn get_evolution_history(&self) -> &[GenerationStats] {
&self.evolution_history
}
}
pub fn default_fitness_function(structure: &GraphStructure) -> f64 {
let mut fitness = 0.0;
fitness += structure.indexing_genes.secondary_indexes.len() as f64 * 0.1;
let cache_mb = structure.storage_genes.caching.cache_size_mb as f64;
fitness += if (128.0..=512.0).contains(&cache_mb) {
0.2
} else {
0.0
};
let readers = structure.access_genes.concurrency.max_readers as f64;
let writers = structure.access_genes.concurrency.max_writers as f64;
fitness += if readers >= 10.0 && writers >= 2.0 && readers / writers <= 50.0 {
0.2
} else {
0.0
};
let compression_level = structure.indexing_genes.compression.level as f64;
fitness += if (3.0..=7.0).contains(&compression_level) {
0.1
} else {
0.0
};
let block_size = structure.storage_genes.block_size as f64;
fitness += if (8192.0..=65536.0).contains(&block_size) {
0.1
} else {
0.0
};
fitness += fastrand::f64() * 0.1;
fitness
}
#[cfg(test)]
mod tests {
use super::*;
use crate::model::{Literal, NamedNode};
#[test]
fn test_genetic_optimizer_creation() {
let fitness_fn = Box::new(default_fitness_function);
let optimizer = GeneticGraphOptimizer::new(20, fitness_fn);
assert_eq!(optimizer.population_size, 20);
assert_eq!(optimizer.elite_size, 2);
}
#[test]
fn test_random_structure_generation() {
let fitness_fn = Box::new(default_fitness_function);
let optimizer = GeneticGraphOptimizer::new(10, fitness_fn);
let triples = vec![Triple::new(
NamedNode::new("http://example.org/subject").expect("valid IRI"),
NamedNode::new("http://example.org/predicate").expect("valid IRI"),
Literal::new("object"),
)];
let structure = optimizer.create_random_structure(&triples);
assert!(structure.is_ok());
let structure = structure.expect("structure should be available");
assert!(structure.fitness >= 0.0);
assert!(!structure.indexing_genes.secondary_indexes.is_empty());
}
#[test]
fn test_mutation() {
let fitness_fn = Box::new(default_fitness_function);
let optimizer = GeneticGraphOptimizer::new(10, fitness_fn);
let triples = vec![Triple::new(
NamedNode::new("http://example.org/subject").expect("valid IRI"),
NamedNode::new("http://example.org/predicate").expect("valid IRI"),
Literal::new("object"),
)];
let mut structure = optimizer
.create_random_structure(&triples)
.expect("operation should succeed");
let _old_block_size = structure.storage_genes.block_size;
optimizer
.mutate(&mut structure)
.expect("operation should succeed");
assert!(!structure.mutations.is_empty());
}
#[test]
fn test_fitness_function() {
let fitness_fn = Box::new(default_fitness_function);
let optimizer = GeneticGraphOptimizer::new(10, fitness_fn);
let triples = vec![Triple::new(
NamedNode::new("http://example.org/subject").expect("valid IRI"),
NamedNode::new("http://example.org/predicate").expect("valid IRI"),
Literal::new("object"),
)];
let structure = optimizer
.create_random_structure(&triples)
.expect("operation should succeed");
let fitness = default_fitness_function(&structure);
assert!(fitness >= 0.0);
assert!(fitness <= 1.0); }
}