optirs-core 0.3.1

OptiRS core optimization algorithms and utilities
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
//! # OptiRS Core - Advanced ML Optimization Built on SciRS2
//!
//! **Version:** 0.3.1
//! **Status:** Stable Release - Production Ready
//!
//! `optirs-core` provides state-of-the-art optimization algorithms for machine learning,
//! built exclusively on the [SciRS2](https://github.com/cool-japan/scirs) scientific computing ecosystem.
//!
//! ## Dependencies
//!
//! - `scirs2-core` 0.1.1 - Required foundation
//!
//! ## Quick Start
//!
//! ```rust
//! use optirs_core::optimizers::{Adam, Optimizer};
//! use scirs2_core::ndarray::Array1;
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! // Create optimizer
//! let mut optimizer = Adam::new(0.001);
//!
//! // Prepare parameters and gradients
//! let params = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0]);
//! let grads = Array1::from_vec(vec![0.1, 0.2, 0.15, 0.08]);
//!
//! // Perform optimization step
//! let updated_params = optimizer.step(&params, &grads)?;
//! # Ok(())
//! # }
//! ```
//!
//! ## Features
//!
//! ### 19 State-of-the-Art Optimizers
//!
//! **First-Order Methods:**
//! - **SGD** - Stochastic Gradient Descent with optional momentum
//! - **SimdSGD** - SIMD-accelerated SGD (2-4x faster)
//! - **Adam** - Adaptive Moment Estimation
//! - **AdamW** - Adam with decoupled weight decay
//! - **AdaDelta** - Adaptive LR without manual tuning ⭐ NEW!
//! - **AdaBound** - Smooth Adam→SGD transition ⭐ NEW!
//! - **Ranger** - RAdam + Lookahead combination ⭐ NEW!
//! - **RMSprop** - Root Mean Square Propagation
//! - **Adagrad** - Adaptive Gradient Algorithm
//! - **LAMB** - Layer-wise Adaptive Moments for Batch training
//! - **LARS** - Layer-wise Adaptive Rate Scaling
//! - **Lion** - Evolved Sign Momentum
//! - **Lookahead** - Look ahead optimizer wrapper
//! - **RAdam** - Rectified Adam
//! - **SAM** - Sharpness-Aware Minimization
//! - **SparseAdam** - Adam optimized for sparse gradients
//! - **GroupedAdam** - Adam with parameter groups
//!
//! **Second-Order Methods:**
//! - **L-BFGS** - Limited-memory BFGS
//! - **K-FAC** - Kronecker-Factored Approximate Curvature
//! - **Newton-CG** - Newton Conjugate Gradient ⭐ NEW!
//!
//! ### Performance Optimizations (Phase 2 Complete)
//!
//! #### SIMD Acceleration (2-4x speedup)
//! ```rust
//! use optirs_core::optimizers::{Optimizer, SimdSGD};
//! use scirs2_core::ndarray::Array1;
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! let params = Array1::from_elem(100_000, 1.0f32);
//! let grads = Array1::from_elem(100_000, 0.001f32);
//!
//! let mut optimizer = SimdSGD::new(0.01f32);
//! let updated = optimizer.step(&params, &grads)?;
//! # Ok(())
//! # }
//! ```
//!
//! #### Parallel Processing (4-8x speedup)
//! ```rust
//! use optirs_core::optimizers::{Adam, Optimizer};
//! use optirs_core::parallel_optimizer::parallel_step_array1;
//! use scirs2_core::ndarray::Array1;
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! let params_list = vec![
//!     Array1::from_elem(10_000, 1.0),
//!     Array1::from_elem(20_000, 1.0),
//! ];
//! let grads_list = vec![
//!     Array1::from_elem(10_000, 0.01),
//!     Array1::from_elem(20_000, 0.01),
//! ];
//!
//! let mut optimizer = Adam::new(0.001);
//! let results = parallel_step_array1(&mut optimizer, &params_list, &grads_list)?;
//! # Ok(())
//! # }
//! ```
//!
//! #### Memory-Efficient Operations
//! ```rust
//! use optirs_core::memory_efficient_optimizer::GradientAccumulator;
//! use scirs2_core::ndarray::Array1;
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! let mut accumulator = GradientAccumulator::<f32>::new(1000);
//!
//! // Accumulate gradients from micro-batches
//! for _ in 0..4 {
//!     let micro_grads = Array1::from_elem(1000, 0.1);
//!     accumulator.accumulate(&micro_grads.view())?;
//! }
//!
//! let avg_grads = accumulator.average()?;
//! # Ok(())
//! # }
//! ```
//!
//! #### Production Metrics & Monitoring
//! ```rust
//! use optirs_core::optimizer_metrics::{MetricsCollector, MetricsReporter};
//! use optirs_core::optimizers::{Adam, Optimizer};
//! use scirs2_core::ndarray::Array1;
//! use std::time::{Duration, Instant};
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! let mut collector = MetricsCollector::new();
//! collector.register_optimizer("adam");
//!
//! let mut optimizer = Adam::new(0.001);
//! let params = Array1::from_elem(1000, 1.0);
//! let grads = Array1::from_elem(1000, 0.01);
//!
//! let params_before = params.clone();
//! let start = Instant::now();
//! let params = optimizer.step(&params, &grads)?;
//! let duration = start.elapsed();
//!
//! collector.update(
//!     "adam",
//!     duration,
//!     0.001,
//!     &grads.view(),
//!     &params_before.view(),
//!     &params.view(),
//! )?;
//!
//! println!("{}", collector.summary_report());
//! # Ok(())
//! # }
//! ```
//!
//! ### Learning Rate Schedulers
//!
//! - **ExponentialDecay** - Exponential learning rate decay
//! - **StepDecay** - Step-wise reduction
//! - **CosineAnnealing** - Cosine annealing schedule
//! - **LinearWarmupDecay** - Linear warmup with decay
//! - **OneCycle** - One cycle learning rate policy
//!
//! ### Advanced Features
//!
//! - **Parameter Groups** - Different learning rates per layer
//! - **Gradient Accumulation** - Micro-batch training for large models
//! - **Gradient Clipping** - Prevent exploding gradients
//! - **Regularization** - L1, L2, weight decay
//! - **Privacy-Preserving** - Differential privacy support
//! - **Distributed Training** - Multi-GPU and TPU coordination
//! - **Neural Architecture Search** - Automated architecture optimization
//!
//! ## Architecture
//!
//! ### SciRS2 Foundation
//!
//! OptiRS-Core is built **exclusively** on the SciRS2 ecosystem:
//!
//! - **Arrays**: Uses `scirs2_core::ndarray` (NOT direct ndarray)
//! - **Random**: Uses `scirs2_core::random` (NOT direct rand)
//! - **SIMD**: Uses `scirs2_core::simd_ops` for vectorization
//! - **Parallel**: Uses `scirs2_core::parallel_ops` for multi-core
//! - **GPU**: Built on `scirs2_core::gpu` abstractions
//! - **Metrics**: Uses `scirs2_core::metrics` for monitoring
//! - **Error Handling**: Uses `scirs2_core::error::Result`
//!
//! This integration ensures:
//! - Type safety across the ecosystem
//! - Consistent performance optimizations
//! - Unified error handling
//! - Simplified dependency management
//!
//! ### Module Organization
//!
//! - [`optimizers`] - Core optimizer implementations
//! - [`schedulers`] - Learning rate scheduling
//! - [`simd_optimizer`] - SIMD-accelerated optimizers
//! - [`parallel_optimizer`] - Multi-core processing
//! - [`memory_efficient_optimizer`] - Memory optimization
//! - [`gpu_optimizer`] - GPU acceleration
//! - [`optimizer_metrics`] - Performance monitoring
//! - [`gradient_processing`] - Gradient manipulation
//! - [`regularizers`] - Regularization techniques
//! - [`second_order`] - Second-order methods
//! - [`distributed`] - Distributed training
//! - [`privacy`] - Privacy-preserving optimization
//!
//! ## Performance
//!
//! ### Benchmarks
//!
//! All benchmarks use [Criterion.rs](https://github.com/bheisler/criterion.rs) with statistical analysis:
//!
//! - **optimizer_benchmarks** - Compare all 16 optimizers
//! - **simd_benchmarks** - SIMD vs scalar performance
//! - **parallel_benchmarks** - Multi-core scaling
//! - **memory_efficient_benchmarks** - Memory optimization impact
//! - **gpu_benchmarks** - GPU vs CPU comparison
//! - **metrics_benchmarks** - Monitoring overhead
//!
//! Run benchmarks:
//! ```bash
//! cargo bench --package optirs-core
//! ```
//!
//! ### Test Coverage
//!
//! - **549 unit tests** - Core functionality
//! - **54 doc tests** - Documentation examples
//! - **603 total tests** - All passing
//! - **Zero clippy warnings** - Production quality
//!
//! ## Examples
//!
//! See the `examples/` directory for comprehensive examples:
//!
//! - `basic_optimization.rs` - Getting started
//! - `advanced_optimization.rs` - Schedulers, regularization, clipping
//! - `performance_optimization.rs` - SIMD, parallel, GPU acceleration
//! - `production_monitoring.rs` - Metrics and convergence detection
//!
//! ## Contributing
//!
//! When contributing, ensure:
//! - **100% SciRS2 usage** - No direct ndarray/rand/rayon imports
//! - **Zero clippy warnings** - Run `cargo clippy`
//! - **All tests pass** - Run `cargo test`
//! - **Documentation** - Add examples to public APIs
//!
//! ## License
//!
//! licensed under Apache-2.0

#![allow(deprecated)]
#![allow(unreachable_code)]
#![allow(unused_mut)]
#![allow(unused_parens)]
#![allow(for_loops_over_fallibles)]
#![allow(unexpected_cfgs)]
#![allow(unused_attributes)]
#![allow(missing_docs)]

pub mod adaptive_selection;
pub mod benchmarking;
#[cfg(not(target_arch = "wasm32"))]
pub mod coordination;
pub mod curriculum_optimization;
#[cfg(not(target_arch = "wasm32"))]
pub mod distributed;
#[cfg(not(target_arch = "wasm32"))]
pub mod domain_specific;
pub mod error;
pub mod gpu_optimizer;
pub mod gradient_accumulation;
pub mod gradient_flow;
pub mod gradient_processing;
#[cfg(not(target_arch = "wasm32"))]
pub mod hardware_aware;
pub mod loss_landscape;
#[cfg(not(target_arch = "wasm32"))]
pub mod memory_efficient;
pub mod memory_efficient_optimizer;
pub mod metrics;
pub mod neural_integration;
#[cfg(not(target_arch = "wasm32"))]
pub mod neuromorphic;
pub mod online_learning;
pub mod optimizer_composition;
pub mod optimizer_metrics;
pub mod optimizers;
#[cfg(not(target_arch = "wasm32"))]
pub mod parallel_optimizer;
pub mod parameter_groups;
#[cfg(not(target_arch = "wasm32"))]
pub mod plugin;
#[cfg(not(target_arch = "wasm32"))]
pub mod privacy;
pub mod regularizers;
#[cfg(not(target_arch = "wasm32"))]
pub mod research;
pub mod schedulers;
pub mod second_order;
pub mod self_tuning;
pub mod simd_optimizer;
#[cfg(not(target_arch = "wasm32"))]
pub mod streaming;
pub mod training_stabilization;
pub mod unified_api;
pub mod utils;
pub mod visualization;

// Re-export commonly used types
pub use error::{OptimError, OptimizerError, Result};
pub use optimizers::*;
pub use parameter_groups::*;
pub use regularizers::*;
pub use schedulers::*;
pub use unified_api::{OptimizerConfig, OptimizerFactory, Parameter, UnifiedOptimizer};

// Re-export key functionality
pub use adaptive_selection::{
    AdaptiveOptimizerSelector, OptimizerStatistics, OptimizerType, PerformanceMetrics,
    ProblemCharacteristics, ProblemType, SelectionNetwork, SelectionStrategy,
};
pub use curriculum_optimization::{
    AdaptiveCurriculum, AdversarialAttack, AdversarialConfig, CurriculumManager, CurriculumState,
    CurriculumStrategy, ImportanceWeightingStrategy,
};
#[cfg(not(target_arch = "wasm32"))]
pub use distributed::{
    AveragingStrategy, CommunicationResult, CompressedGradient, CompressionStrategy,
    DistributedCoordinator, GradientCompressor, ParameterAverager, ParameterServer,
};
#[cfg(not(target_arch = "wasm32"))]
pub use domain_specific::{
    CrossDomainKnowledge, DomainOptimizationConfig, DomainPerformanceMetrics, DomainRecommendation,
    DomainSpecificSelector, DomainStrategy, LearningRateScheduleType, OptimizationContext,
    RecommendationType, RegularizationApproach, ResourceConstraints, TrainingConfiguration,
};
pub use gpu_optimizer::{GpuConfig, GpuMemoryStats, GpuOptimizer, GpuUtils};
pub use gradient_accumulation::{
    AccumulationMode, GradientAccumulator as GradAccumulator, MicroBatchTrainer,
    VariableAccumulator,
};
pub use gradient_processing::*;
pub use memory_efficient_optimizer::{
    ChunkedOptimizer, GradientAccumulator as MemoryEfficientGradientAccumulator,
    MemoryUsageEstimator,
};
pub use neural_integration::architecture_aware::{
    ArchitectureAwareOptimizer, ArchitectureStrategy,
};
pub use neural_integration::forward_backward::{BackwardHook, ForwardHook, NeuralIntegration};
pub use neural_integration::{
    LayerArchitecture, LayerId, OptimizationConfig, ParamId, ParameterManager, ParameterMetadata,
    ParameterOptimizer, ParameterType,
};
pub use online_learning::{
    ColumnGrowthStrategy, LearningRateAdaptation, LifelongOptimizer, LifelongStats,
    LifelongStrategy, MemoryExample, MemoryUpdateStrategy, MirrorFunction, OnlineLearningStrategy,
    OnlineOptimizer, OnlinePerformanceMetrics, SharedKnowledge, TaskGraph,
};
pub use optimizer_metrics::{
    ConvergenceMetrics, GradientStatistics, MetricsCollector, MetricsReporter, OptimizerMetrics,
    ParameterStatistics,
};
#[cfg(not(target_arch = "wasm32"))]
pub use parallel_optimizer::{
    parallel_step, parallel_step_array1, ParallelBatchProcessor, ParallelOptimizer,
};
#[cfg(not(target_arch = "wasm32"))]
pub use plugin::core::{
    create_basic_capabilities, create_plugin_info, OptimizerPluginFactory, PluginCategory,
    PluginInfo,
};
#[cfg(not(target_arch = "wasm32"))]
pub use plugin::sdk::{BaseOptimizerPlugin, PluginTester};
#[cfg(not(target_arch = "wasm32"))]
pub use plugin::{
    OptimizerPlugin, PluginCapabilities, PluginLoader, PluginRegistry, PluginValidationFramework,
};
#[cfg(not(target_arch = "wasm32"))]
pub use privacy::{
    AccountingMethod, ClippingStats, DifferentialPrivacyConfig, DifferentiallyPrivateOptimizer,
    MomentsAccountant, NoiseMechanism, PrivacyBudget, PrivacyValidation,
};
pub use second_order::{
    HessianInfo, Newton, NewtonCG, SecondOrderOptimizer, LBFGS as SecondOrderLBFGS,
};
pub use self_tuning::{
    OptimizerInfo, OptimizerTrait, PerformanceStats, SelfTuningConfig, SelfTuningOptimizer,
    SelfTuningStatistics, TargetMetric,
};
pub use simd_optimizer::{should_use_simd, SimdOptimizer};
#[cfg(not(target_arch = "wasm32"))]
pub use streaming::{
    LearningRateAdaptation as StreamingLearningRateAdaptation, StreamingConfig, StreamingDataPoint,
    StreamingHealthStatus, StreamingMetrics, StreamingOptimizer,
};
pub use training_stabilization::{AveragingMethod, ModelEnsemble, PolyakAverager, WeightAverager};
pub use visualization::{
    ColorScheme, ConvergenceInfo, DataSeries, MemoryStats as VisualizationMemoryStats,
    OptimizationMetric, OptimizationVisualizer, OptimizerComparison, PlotType, VisualizationConfig,
};

#[cfg(feature = "metrics_integration")]
pub use metrics::*;