Skip to main content

torsh_functional/optimization/
adaptive.rs

1//! Adaptive algorithm selection for optimization
2//!
3//! This module provides intelligent algorithm selection based on tensor characteristics
4//! and historical performance data to automatically choose the best optimization method.
5
6use torsh_core::Result as TorshResult;
7use torsh_tensor::Tensor;
8
9/// Tensor characteristics for algorithm selection
10#[derive(Debug, Clone)]
11pub struct TensorCharacteristics {
12    /// Number of elements in the tensor
13    pub size: usize,
14    /// Condition number estimate (ratio of largest to smallest singular value)
15    pub condition_number: f32,
16    /// Sparsity ratio (fraction of zero elements)
17    pub sparsity: f32,
18    /// Numerical precision characteristics
19    pub numerical_precision: f32,
20    /// Memory layout efficiency
21    pub memory_layout_score: f32,
22    /// Computational complexity estimate
23    pub computational_complexity: f32,
24}
25
26impl TensorCharacteristics {
27    /// Analyze tensor characteristics
28    pub fn analyze(tensor: &Tensor) -> TorshResult<Self> {
29        let data = tensor.data()?;
30        let size = data.len();
31
32        // Calculate sparsity
33        let zero_count = data.iter().filter(|&&x| x.abs() < 1e-12).count();
34        let sparsity = zero_count as f32 / size as f32;
35
36        // Estimate condition number using simple heuristic
37        let max_val = data.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b.abs()));
38        let min_val = data.iter().fold(f32::INFINITY, |a, &b| {
39            if b.abs() > 1e-12 {
40                a.min(b.abs())
41            } else {
42                a
43            }
44        });
45        let condition_number = if min_val > 0.0 {
46            max_val / min_val
47        } else {
48            f32::INFINITY
49        };
50
51        // Estimate numerical precision based on dynamic range
52        let mean_val = data.iter().sum::<f32>() / size as f32;
53        let std_dev =
54            (data.iter().map(|&x| (x - mean_val).powi(2)).sum::<f32>() / size as f32).sqrt();
55        let numerical_precision = if std_dev > 0.0 {
56            mean_val.abs() / std_dev
57        } else {
58            1.0
59        };
60
61        // Simple memory layout score based on tensor shape
62        let shape_binding = tensor.shape();
63        let shape = shape_binding.dims();
64        let memory_layout_score = match shape.len() {
65            1 => 1.0, // Linear - optimal
66            2 => 0.8, // Matrix - good
67            3 => 0.6, // 3D - fair
68            _ => 0.4, // Higher dimensional - poor
69        };
70
71        // Computational complexity estimate based on size and dimensionality
72        let computational_complexity = size as f32 * shape.len() as f32;
73
74        Ok(TensorCharacteristics {
75            size,
76            condition_number,
77            sparsity,
78            numerical_precision,
79            memory_layout_score,
80            computational_complexity,
81        })
82    }
83}
84
85/// Algorithm recommendation based on tensor characteristics
86#[derive(Debug, Clone)]
87pub enum OptimizationAlgorithm {
88    /// Gradient descent (for well-conditioned, dense problems)
89    GradientDescent,
90    /// Momentum gradient descent (for noisy or ill-conditioned problems)
91    MomentumGradientDescent,
92    /// Adam optimizer (for sparse or noisy gradients)
93    Adam,
94    /// L-BFGS (for smooth, deterministic problems)
95    LBFGS,
96    /// Conjugate gradient (for large, sparse, positive definite systems)
97    ConjugateGradient,
98}
99
100/// Adaptive algorithm selector
101pub struct AdaptiveAlgorithmSelector {
102    /// Performance history for different algorithms
103    performance_history: std::collections::HashMap<String, Vec<f32>>,
104    /// Learning rate for adaptation
105    #[allow(dead_code)]
106    adaptation_rate: f32,
107}
108
109impl AdaptiveAlgorithmSelector {
110    /// Create new adaptive algorithm selector
111    pub fn new() -> Self {
112        Self {
113            performance_history: std::collections::HashMap::new(),
114            adaptation_rate: 0.1,
115        }
116    }
117
118    /// Select best optimization algorithm based on tensor characteristics
119    pub fn select_algorithm(
120        &self,
121        characteristics: &TensorCharacteristics,
122    ) -> OptimizationAlgorithm {
123        // Rule-based selection with learned adjustments
124
125        // For very sparse problems, prefer specialized sparse algorithms
126        if characteristics.sparsity > 0.8 {
127            return OptimizationAlgorithm::ConjugateGradient;
128        }
129
130        // For large problems, use memory-efficient methods
131        if characteristics.size > 1_000_000 {
132            return match characteristics.condition_number {
133                cn if cn > 1000.0 => OptimizationAlgorithm::Adam,
134                _ => OptimizationAlgorithm::LBFGS,
135            };
136        }
137
138        // For well-conditioned, smooth problems
139        if characteristics.condition_number < 100.0 && characteristics.numerical_precision > 0.1 {
140            return OptimizationAlgorithm::LBFGS;
141        }
142
143        // For ill-conditioned problems
144        if characteristics.condition_number > 1000.0 {
145            return OptimizationAlgorithm::Adam;
146        }
147
148        // For moderately sized, general problems
149        if characteristics.size < 10_000 {
150            OptimizationAlgorithm::MomentumGradientDescent
151        } else {
152            OptimizationAlgorithm::Adam
153        }
154    }
155
156    /// Record performance for algorithm adaptation
157    pub fn record_performance(&mut self, algorithm: &OptimizationAlgorithm, performance: f32) {
158        let key = format!("{:?}", algorithm);
159        self.performance_history
160            .entry(key)
161            .or_insert_with(Vec::new)
162            .push(performance);
163
164        // Keep only recent history (last 100 entries)
165        if let Some(history) = self
166            .performance_history
167            .get_mut(&format!("{:?}", algorithm))
168        {
169            if history.len() > 100 {
170                history.drain(0..history.len() - 100);
171            }
172        }
173    }
174
175    /// Get performance score for an algorithm
176    pub fn get_algorithm_score(&self, algorithm: &OptimizationAlgorithm) -> f32 {
177        let key = format!("{:?}", algorithm);
178        if let Some(history) = self.performance_history.get(&key) {
179            if !history.is_empty() {
180                history.iter().sum::<f32>() / history.len() as f32
181            } else {
182                0.0
183            }
184        } else {
185            0.0
186        }
187    }
188
189    /// Adaptive selection that learns from performance history
190    pub fn select_algorithm_adaptive(
191        &self,
192        characteristics: &TensorCharacteristics,
193    ) -> OptimizationAlgorithm {
194        let base_selection = self.select_algorithm(characteristics);
195
196        // Check if we have enough history to make adaptive decisions
197        let base_score = self.get_algorithm_score(&base_selection);
198
199        // Try alternative algorithms if base algorithm performs poorly
200        if base_score < 0.5 && self.performance_history.len() > 3 {
201            let alternatives = vec![
202                OptimizationAlgorithm::Adam,
203                OptimizationAlgorithm::MomentumGradientDescent,
204                OptimizationAlgorithm::LBFGS,
205            ];
206
207            let best_alternative = alternatives.iter().max_by(|a, b| {
208                self.get_algorithm_score(a)
209                    .partial_cmp(&self.get_algorithm_score(b))
210                    .unwrap_or(std::cmp::Ordering::Equal)
211            });
212
213            if let Some(best) = best_alternative {
214                if self.get_algorithm_score(best) > base_score + 0.1 {
215                    return best.clone();
216                }
217            }
218        }
219
220        base_selection
221    }
222}
223
224impl Default for AdaptiveAlgorithmSelector {
225    fn default() -> Self {
226        Self::new()
227    }
228}
229
230/// Analyze problem characteristics and recommend optimization strategy
231pub fn analyze_optimization_problem(
232    objective_values: &[f32],
233    gradient_norms: &[f32],
234    tensor_characteristics: &TensorCharacteristics,
235) -> (OptimizationAlgorithm, String) {
236    let mut recommendations = Vec::new();
237
238    // Analyze convergence pattern
239    if objective_values.len() > 5 {
240        let recent_improvement = objective_values
241            .windows(2)
242            .rev()
243            .take(5)
244            .map(|w| w[0] - w[1])
245            .sum::<f32>()
246            / 5.0;
247
248        if recent_improvement < 1e-8 {
249            recommendations.push(
250                "Problem appears to have slow convergence - consider Adam or momentum methods",
251            );
252        }
253    }
254
255    // Analyze gradient characteristics
256    if gradient_norms.len() > 3 {
257        let gradient_variance = {
258            let mean = gradient_norms.iter().sum::<f32>() / gradient_norms.len() as f32;
259            gradient_norms
260                .iter()
261                .map(|&x| (x - mean).powi(2))
262                .sum::<f32>()
263                / gradient_norms.len() as f32
264        };
265
266        if gradient_variance > 1.0 {
267            recommendations.push("High gradient variance detected - Adam optimizer recommended");
268        }
269    }
270
271    // Use selector for algorithm choice
272    let selector = AdaptiveAlgorithmSelector::new();
273    let algorithm = selector.select_algorithm(tensor_characteristics);
274
275    // Generate recommendation summary
276    let recommendation_text = if recommendations.is_empty() {
277        format!(
278            "Recommended algorithm: {:?} based on tensor characteristics",
279            algorithm
280        )
281    } else {
282        format!(
283            "Recommended algorithm: {:?}. Additional notes: {}",
284            algorithm,
285            recommendations.join("; ")
286        )
287    };
288
289    (algorithm, recommendation_text)
290}
291
292/// Auto-configure optimization parameters based on problem characteristics
293pub fn auto_configure_optimization(
294    characteristics: &TensorCharacteristics,
295    algorithm: &OptimizationAlgorithm,
296) -> TorshResult<String> {
297    let config = match algorithm {
298        OptimizationAlgorithm::GradientDescent => {
299            let lr = if characteristics.condition_number > 100.0 {
300                0.001 // Conservative for ill-conditioned problems
301            } else {
302                0.01 // Standard rate for well-conditioned problems
303            };
304            format!(
305                "GradientDescentParams {{ learning_rate: {}, max_iter: {}, tolerance: {} }}",
306                lr, 1000, 1e-6
307            )
308        }
309
310        OptimizationAlgorithm::MomentumGradientDescent => {
311            let lr = if characteristics.size > 100_000 {
312                0.001
313            } else {
314                0.01
315            };
316            let momentum = if characteristics.condition_number > 100.0 {
317                0.95
318            } else {
319                0.9
320            };
321            format!(
322                "MomentumParams {{ learning_rate: {}, momentum: {}, max_iter: {}, tolerance: {} }}",
323                lr, momentum, 1000, 1e-6
324            )
325        }
326
327        OptimizationAlgorithm::Adam => {
328            let lr = if characteristics.sparsity > 0.5 {
329                0.001
330            } else {
331                0.01
332            };
333            format!("AdamParams {{ learning_rate: {}, beta1: 0.9, beta2: 0.999, eps: 1e-8, max_iter: {}, tolerance: {} }}",
334                    lr, 2000, 1e-6)
335        }
336
337        OptimizationAlgorithm::LBFGS => {
338            let m = if characteristics.size > 10_000 { 5 } else { 10 };
339            format!(
340                "BFGSParams {{ memory_size: {}, max_iter: {}, tolerance: {} }}",
341                m, 500, 1e-6
342            )
343        }
344
345        OptimizationAlgorithm::ConjugateGradient => {
346            format!(
347                "ConjugateGradientParams {{ max_iter: {}, tolerance: {}, restart_frequency: {} }}",
348                1000,
349                1e-6,
350                characteristics.size.min(50)
351            )
352        }
353    };
354
355    Ok(config)
356}