use crate::error::Result;
use scirs2_core::random::prelude::*;
use std::collections::{HashMap, VecDeque};
use std::time::Instant;
#[derive(Debug)]
pub struct RLParameterOptimizer {
q_table: HashMap<StateDiscrete, HashMap<ActionDiscrete, f64>>,
current_state: StateDiscrete,
learning_params: RLLearningParams,
pub action_space: Vec<ActionDiscrete>,
state_space: Vec<StateDiscrete>,
experience_buffer: VecDeque<Experience>,
performance_history: VecDeque<PerformanceMetric>,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct StateDiscrete {
pub latency_bucket: usize,
pub cpu_bucket: usize,
pub memory_bucket: usize,
pub quality_bucket: usize,
pub complexity_bucket: usize,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct ActionDiscrete {
pub param_type: ParameterType,
pub adjustment: AdjustmentAction,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub enum ParameterType {
BlurSigma,
EdgeThreshold,
ThreadCount,
BufferSize,
SimdMode,
QualityLevel,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub enum AdjustmentAction {
DecreaseLarge,
DecreaseSmall,
NoChange,
IncreaseSmall,
IncreaseLarge,
}
#[derive(Debug, Clone)]
pub struct RLLearningParams {
pub learning_rate: f64,
pub discount_factor: f64,
pub epsilon: f64,
pub epsilon_decay: f64,
pub epsilon_min: f64,
}
impl Default for RLLearningParams {
fn default() -> Self {
Self {
learning_rate: 0.1,
discount_factor: 0.95,
epsilon: 1.0,
epsilon_decay: 0.995,
epsilon_min: 0.01,
}
}
}
#[derive(Debug, Clone)]
pub struct Experience {
pub state: StateDiscrete,
pub action: ActionDiscrete,
pub reward: f64,
pub next_state: StateDiscrete,
pub done: bool,
}
#[derive(Debug, Clone)]
pub struct PerformanceMetric {
pub latency: f64,
pub cpu_usage: f64,
pub memory_usage: f64,
pub quality_score: f64,
pub energy_consumption: f64,
pub timestamp: Instant,
}
impl Default for RLParameterOptimizer {
fn default() -> Self {
Self::new()
}
}
impl RLParameterOptimizer {
pub fn new() -> Self {
let learning_params = RLLearningParams::default();
let action_space = Self::create_action_space();
let state_space = Self::create_state_space();
Self {
q_table: HashMap::new(),
current_state: StateDiscrete::default(),
learning_params,
action_space,
state_space,
experience_buffer: VecDeque::with_capacity(10000),
performance_history: VecDeque::with_capacity(1000),
}
}
fn create_action_space() -> Vec<ActionDiscrete> {
let mut actions = Vec::new();
let param_types = [
ParameterType::BlurSigma,
ParameterType::EdgeThreshold,
ParameterType::ThreadCount,
ParameterType::BufferSize,
ParameterType::SimdMode,
ParameterType::QualityLevel,
];
let adjustments = [
AdjustmentAction::DecreaseLarge,
AdjustmentAction::DecreaseSmall,
AdjustmentAction::NoChange,
AdjustmentAction::IncreaseSmall,
AdjustmentAction::IncreaseLarge,
];
for param_type in ¶m_types {
for adjustment in &adjustments {
actions.push(ActionDiscrete {
param_type: param_type.clone(),
adjustment: adjustment.clone(),
});
}
}
actions
}
fn create_state_space() -> Vec<StateDiscrete> {
let mut states = Vec::new();
for latency in 0..5 {
for cpu in 0..5 {
for memory in 0..5 {
for quality in 0..5 {
for complexity in 0..5 {
states.push(StateDiscrete {
latency_bucket: latency,
cpu_bucket: cpu,
memory_bucket: memory,
quality_bucket: quality,
complexity_bucket: complexity,
});
}
}
}
}
}
states
}
pub fn select_action(&mut self, state: &StateDiscrete) -> ActionDiscrete {
let mut rng = thread_rng();
if rng.random::<f64>() < self.learning_params.epsilon {
let idx = rng.random_range(0..self.action_space.len());
self.action_space[idx].clone()
} else {
self.get_best_action(state)
}
}
fn get_best_action(&self, state: &StateDiscrete) -> ActionDiscrete {
if let Some(action_values) = self.q_table.get(state) {
action_values
.iter()
.max_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
.map(|(action, _)| action.clone())
.unwrap_or_else(|| self.action_space[0].clone())
} else {
self.action_space[0].clone()
}
}
pub fn update_q_values(&mut self, experience: Experience) {
let alpha = self.learning_params.learning_rate;
let gamma = self.learning_params.discount_factor;
let max_next_q = if experience.done {
0.0
} else {
self.q_table
.get(&experience.next_state)
.map(|action_values| {
*action_values
.values()
.max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.unwrap_or(&0.0)
})
.unwrap_or(0.0)
};
let current_q = self
.q_table
.entry(experience.state.clone())
.or_default()
.entry(experience.action.clone())
.or_insert(0.0);
*current_q += alpha * (experience.reward + gamma * max_next_q - *current_q);
self.experience_buffer.push_back(experience);
if self.experience_buffer.len() > 10000 {
self.experience_buffer.pop_front();
}
self.learning_params.epsilon = (self.learning_params.epsilon
* self.learning_params.epsilon_decay)
.max(self.learning_params.epsilon_min);
}
pub fn metrics_to_state(&self, metrics: &PerformanceMetric) -> StateDiscrete {
StateDiscrete {
latency_bucket: Self::bucket_value(metrics.latency, 0.0, 100.0, 5),
cpu_bucket: Self::bucket_value(metrics.cpu_usage, 0.0, 100.0, 5),
memory_bucket: Self::bucket_value(metrics.memory_usage, 0.0, 2000.0, 5),
quality_bucket: Self::bucket_value(metrics.quality_score, 0.0, 1.0, 5),
complexity_bucket: 2, }
}
fn bucket_value(value: f64, min_val: f64, max_val: f64, numbuckets: usize) -> usize {
let normalized = (value - min_val) / (max_val - min_val);
let bucket = (normalized * numbuckets as f64).floor() as usize;
bucket.min(numbuckets - 1)
}
pub fn calculate_reward(&self, metrics: &PerformanceMetric) -> f64 {
let latency_reward = 1.0 - (metrics.latency / 100.0).min(1.0);
let cpu_reward = 1.0 - (metrics.cpu_usage / 100.0);
let memory_reward = 1.0 - (metrics.memory_usage / 2000.0).min(1.0);
let quality_reward = metrics.quality_score;
let energy_reward = 1.0 - (metrics.energy_consumption / 10.0).min(1.0);
0.3 * latency_reward
+ 0.2 * cpu_reward
+ 0.2 * memory_reward
+ 0.2 * quality_reward
+ 0.1 * energy_reward
}
pub fn experience_replay(&mut self, batchsize: usize) {
if self.experience_buffer.len() < batchsize {
return;
}
let mut rng = thread_rng();
let sample_indices: Vec<usize> = (0..batchsize)
.map(|_| rng.random_range(0..self.experience_buffer.len()))
.collect();
for &idx in &sample_indices {
if let Some(experience) = self.experience_buffer.get(idx) {
self.update_q_values(experience.clone());
}
}
}
pub async fn initialize_rl_optimizer(&mut self) -> Result<()> {
self.experience_buffer.clear();
self.q_table = HashMap::new();
self.learning_params = RLLearningParams::default();
Ok(())
}
}
impl Default for StateDiscrete {
fn default() -> Self {
Self {
latency_bucket: 2,
cpu_bucket: 2,
memory_bucket: 2,
quality_bucket: 2,
complexity_bucket: 2,
}
}
}