torsh_functional/optimization/
adaptive.rs1use torsh_core::Result as TorshResult;
7use torsh_tensor::Tensor;
8
9#[derive(Debug, Clone)]
11pub struct TensorCharacteristics {
12 pub size: usize,
14 pub condition_number: f32,
16 pub sparsity: f32,
18 pub numerical_precision: f32,
20 pub memory_layout_score: f32,
22 pub computational_complexity: f32,
24}
25
26impl TensorCharacteristics {
27 pub fn analyze(tensor: &Tensor) -> TorshResult<Self> {
29 let data = tensor.data()?;
30 let size = data.len();
31
32 let zero_count = data.iter().filter(|&&x| x.abs() < 1e-12).count();
34 let sparsity = zero_count as f32 / size as f32;
35
36 let max_val = data.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b.abs()));
38 let min_val = data.iter().fold(f32::INFINITY, |a, &b| {
39 if b.abs() > 1e-12 {
40 a.min(b.abs())
41 } else {
42 a
43 }
44 });
45 let condition_number = if min_val > 0.0 {
46 max_val / min_val
47 } else {
48 f32::INFINITY
49 };
50
51 let mean_val = data.iter().sum::<f32>() / size as f32;
53 let std_dev =
54 (data.iter().map(|&x| (x - mean_val).powi(2)).sum::<f32>() / size as f32).sqrt();
55 let numerical_precision = if std_dev > 0.0 {
56 mean_val.abs() / std_dev
57 } else {
58 1.0
59 };
60
61 let shape_binding = tensor.shape();
63 let shape = shape_binding.dims();
64 let memory_layout_score = match shape.len() {
65 1 => 1.0, 2 => 0.8, 3 => 0.6, _ => 0.4, };
70
71 let computational_complexity = size as f32 * shape.len() as f32;
73
74 Ok(TensorCharacteristics {
75 size,
76 condition_number,
77 sparsity,
78 numerical_precision,
79 memory_layout_score,
80 computational_complexity,
81 })
82 }
83}
84
85#[derive(Debug, Clone)]
87pub enum OptimizationAlgorithm {
88 GradientDescent,
90 MomentumGradientDescent,
92 Adam,
94 LBFGS,
96 ConjugateGradient,
98}
99
100pub struct AdaptiveAlgorithmSelector {
102 performance_history: std::collections::HashMap<String, Vec<f32>>,
104 #[allow(dead_code)]
106 adaptation_rate: f32,
107}
108
109impl AdaptiveAlgorithmSelector {
110 pub fn new() -> Self {
112 Self {
113 performance_history: std::collections::HashMap::new(),
114 adaptation_rate: 0.1,
115 }
116 }
117
118 pub fn select_algorithm(
120 &self,
121 characteristics: &TensorCharacteristics,
122 ) -> OptimizationAlgorithm {
123 if characteristics.sparsity > 0.8 {
127 return OptimizationAlgorithm::ConjugateGradient;
128 }
129
130 if characteristics.size > 1_000_000 {
132 return match characteristics.condition_number {
133 cn if cn > 1000.0 => OptimizationAlgorithm::Adam,
134 _ => OptimizationAlgorithm::LBFGS,
135 };
136 }
137
138 if characteristics.condition_number < 100.0 && characteristics.numerical_precision > 0.1 {
140 return OptimizationAlgorithm::LBFGS;
141 }
142
143 if characteristics.condition_number > 1000.0 {
145 return OptimizationAlgorithm::Adam;
146 }
147
148 if characteristics.size < 10_000 {
150 OptimizationAlgorithm::MomentumGradientDescent
151 } else {
152 OptimizationAlgorithm::Adam
153 }
154 }
155
156 pub fn record_performance(&mut self, algorithm: &OptimizationAlgorithm, performance: f32) {
158 let key = format!("{:?}", algorithm);
159 self.performance_history
160 .entry(key)
161 .or_insert_with(Vec::new)
162 .push(performance);
163
164 if let Some(history) = self
166 .performance_history
167 .get_mut(&format!("{:?}", algorithm))
168 {
169 if history.len() > 100 {
170 history.drain(0..history.len() - 100);
171 }
172 }
173 }
174
175 pub fn get_algorithm_score(&self, algorithm: &OptimizationAlgorithm) -> f32 {
177 let key = format!("{:?}", algorithm);
178 if let Some(history) = self.performance_history.get(&key) {
179 if !history.is_empty() {
180 history.iter().sum::<f32>() / history.len() as f32
181 } else {
182 0.0
183 }
184 } else {
185 0.0
186 }
187 }
188
189 pub fn select_algorithm_adaptive(
191 &self,
192 characteristics: &TensorCharacteristics,
193 ) -> OptimizationAlgorithm {
194 let base_selection = self.select_algorithm(characteristics);
195
196 let base_score = self.get_algorithm_score(&base_selection);
198
199 if base_score < 0.5 && self.performance_history.len() > 3 {
201 let alternatives = vec![
202 OptimizationAlgorithm::Adam,
203 OptimizationAlgorithm::MomentumGradientDescent,
204 OptimizationAlgorithm::LBFGS,
205 ];
206
207 let best_alternative = alternatives.iter().max_by(|a, b| {
208 self.get_algorithm_score(a)
209 .partial_cmp(&self.get_algorithm_score(b))
210 .unwrap_or(std::cmp::Ordering::Equal)
211 });
212
213 if let Some(best) = best_alternative {
214 if self.get_algorithm_score(best) > base_score + 0.1 {
215 return best.clone();
216 }
217 }
218 }
219
220 base_selection
221 }
222}
223
224impl Default for AdaptiveAlgorithmSelector {
225 fn default() -> Self {
226 Self::new()
227 }
228}
229
230pub fn analyze_optimization_problem(
232 objective_values: &[f32],
233 gradient_norms: &[f32],
234 tensor_characteristics: &TensorCharacteristics,
235) -> (OptimizationAlgorithm, String) {
236 let mut recommendations = Vec::new();
237
238 if objective_values.len() > 5 {
240 let recent_improvement = objective_values
241 .windows(2)
242 .rev()
243 .take(5)
244 .map(|w| w[0] - w[1])
245 .sum::<f32>()
246 / 5.0;
247
248 if recent_improvement < 1e-8 {
249 recommendations.push(
250 "Problem appears to have slow convergence - consider Adam or momentum methods",
251 );
252 }
253 }
254
255 if gradient_norms.len() > 3 {
257 let gradient_variance = {
258 let mean = gradient_norms.iter().sum::<f32>() / gradient_norms.len() as f32;
259 gradient_norms
260 .iter()
261 .map(|&x| (x - mean).powi(2))
262 .sum::<f32>()
263 / gradient_norms.len() as f32
264 };
265
266 if gradient_variance > 1.0 {
267 recommendations.push("High gradient variance detected - Adam optimizer recommended");
268 }
269 }
270
271 let selector = AdaptiveAlgorithmSelector::new();
273 let algorithm = selector.select_algorithm(tensor_characteristics);
274
275 let recommendation_text = if recommendations.is_empty() {
277 format!(
278 "Recommended algorithm: {:?} based on tensor characteristics",
279 algorithm
280 )
281 } else {
282 format!(
283 "Recommended algorithm: {:?}. Additional notes: {}",
284 algorithm,
285 recommendations.join("; ")
286 )
287 };
288
289 (algorithm, recommendation_text)
290}
291
292pub fn auto_configure_optimization(
294 characteristics: &TensorCharacteristics,
295 algorithm: &OptimizationAlgorithm,
296) -> TorshResult<String> {
297 let config = match algorithm {
298 OptimizationAlgorithm::GradientDescent => {
299 let lr = if characteristics.condition_number > 100.0 {
300 0.001 } else {
302 0.01 };
304 format!(
305 "GradientDescentParams {{ learning_rate: {}, max_iter: {}, tolerance: {} }}",
306 lr, 1000, 1e-6
307 )
308 }
309
310 OptimizationAlgorithm::MomentumGradientDescent => {
311 let lr = if characteristics.size > 100_000 {
312 0.001
313 } else {
314 0.01
315 };
316 let momentum = if characteristics.condition_number > 100.0 {
317 0.95
318 } else {
319 0.9
320 };
321 format!(
322 "MomentumParams {{ learning_rate: {}, momentum: {}, max_iter: {}, tolerance: {} }}",
323 lr, momentum, 1000, 1e-6
324 )
325 }
326
327 OptimizationAlgorithm::Adam => {
328 let lr = if characteristics.sparsity > 0.5 {
329 0.001
330 } else {
331 0.01
332 };
333 format!("AdamParams {{ learning_rate: {}, beta1: 0.9, beta2: 0.999, eps: 1e-8, max_iter: {}, tolerance: {} }}",
334 lr, 2000, 1e-6)
335 }
336
337 OptimizationAlgorithm::LBFGS => {
338 let m = if characteristics.size > 10_000 { 5 } else { 10 };
339 format!(
340 "BFGSParams {{ memory_size: {}, max_iter: {}, tolerance: {} }}",
341 m, 500, 1e-6
342 )
343 }
344
345 OptimizationAlgorithm::ConjugateGradient => {
346 format!(
347 "ConjugateGradientParams {{ max_iter: {}, tolerance: {}, restart_frequency: {} }}",
348 1000,
349 1e-6,
350 characteristics.size.min(50)
351 )
352 }
353 };
354
355 Ok(config)
356}