#![allow(
clippy::needless_range_loop,
clippy::manual_memcpy,
clippy::vec_init_then_push,
clippy::borrowed_box
)]
#![allow(clippy::result_large_err)]
#![allow(clippy::too_many_arguments)]
#![allow(clippy::type_complexity)]
#![allow(clippy::excessive_nesting)]
pub mod adafactor_new;
pub mod adafisher_simple;
pub mod adam;
pub mod adam_v2;
pub mod adamax_plus;
pub mod adan;
pub mod adaptive;
pub mod ademamix;
pub mod advanced_2025_research;
pub mod advanced_distributed_features;
pub mod advanced_features;
pub mod amacp;
pub mod async_optim;
pub mod averaged_adam;
pub mod bge_adam;
pub mod bge_adam_optimized;
pub mod cache_friendly;
pub mod came;
pub mod common;
pub mod compression;
pub mod continual_learning;
pub mod convergence;
pub mod cpu_offload;
pub mod cross_framework;
pub mod cyclic_decay;
pub mod deep_distributed_qp;
pub mod enhanced_distributed_training;
pub mod eva;
pub mod federated;
pub mod fsdp;
pub mod fusion;
pub mod genie_stub;
pub mod gradient_processing;
pub mod hardware_aware;
pub mod hierarchical_aggregation;
pub mod hn_adam;
pub mod hyperparameter_tuning;
pub mod jax_compat;
pub mod kernel_fusion;
pub mod lamb;
pub mod lancbio;
pub mod lazy_state;
pub mod lion;
pub mod lookahead;
pub mod lora;
pub mod lora_rite_stub;
pub mod lr_finder;
pub mod memory_layout;
pub mod microadam;
pub mod monitoring;
pub mod multinode;
pub mod muon;
pub mod novograd;
pub mod onnx_export;
pub mod optimizer;
pub mod optimizer_surgery;
pub mod parallel;
pub mod pde_aware;
pub mod per_layer_quant;
pub mod performance_validation;
pub mod prodigy;
pub mod pytorch_compat;
pub mod quantized;
pub mod quantized_advanced;
pub mod quantum_inspired;
pub mod schedule_free;
pub mod scheduler;
pub mod second_order;
pub mod sgd;
pub mod simd_optimizations;
pub mod sofo_stub;
pub mod sophia;
pub mod sparse;
pub mod task_specific;
pub mod tensorflow_compat;
pub mod traits;
pub mod zero;
#[cfg(test)]
pub mod tests;
pub use adafactor_new::{AdaFactor, AdaFactorConfig};
pub use adafisher_simple::{AdaFisher, AdaFisherConfig};
pub use adam::{AdaBelief, Adam, AdamW, NAdam, RAdam};
pub use adam_v2::{AdamConfig, StandardizedAdam, StandardizedAdamW};
pub use adamax_plus::{AdaMaxPlus, AdaMaxPlusConfig};
pub use adan::{Adan, AdanConfig};
pub use adaptive::{create_ranger, create_ranger_with_config, AMSBound, AdaBound, Ranger};
pub use ademamix::{AdEMAMix, AdEMAMixConfig};
pub use advanced_2025_research::{AdaWin, AdaWinConfig, DiWo, DiWoConfig, MeZOV2, MeZOV2Config};
pub use advanced_distributed_features::{
AutoScaler, AutoScalerConfig, CheckpointConfig as AdvancedCheckpointConfig, CheckpointInfo,
CostOptimizer, MLOptimizerConfig, OptimizationResult, OptimizationType, PerformanceMLOptimizer,
ScalingDecision, ScalingStrategy, SmartCheckpointManager, WorkloadPredictor,
};
pub use advanced_features::{
CheckpointConfig, FusedOptimizer, MemoryBandwidthOptimizer, MultiOptimizerStats,
MultiOptimizerTrainer, ResourceUtilization, WarmupOptimizer, WarmupStrategy,
};
pub use amacp::{AMacP, AMacPConfig, AMacPStats};
pub use async_optim::{
AsyncSGD, AsyncSGDConfig, DelayCompensationMethod, DelayedGradient, DelayedGradientConfig,
ElasticAveraging, ElasticAveragingConfig, Hogwild, HogwildConfig, ParameterServer,
};
pub use averaged_adam::{AveragedAdam, AveragedAdamConfig};
pub use bge_adam::{BGEAdam, BGEAdamConfig};
pub use bge_adam_optimized::{OptimizedBGEAdam, OptimizedBGEAdamConfig};
pub use cache_friendly::{
CacheConfig, CacheFriendlyAdam, CacheFriendlyState, CacheStats, ParameterMetadata,
};
pub use came::{
came_update,
CAMEConfig,
CameConfig,
CameOptimizer,
CameParamState,
OptimError as CameOptimError,
CAME,
};
pub use common::{
BiasCorrection, GradientProcessor, OptimizerState, ParameterIds, ParameterUpdate,
StateMemoryStats, WeightDecayMode,
};
pub use compression::{
CompressedAllReduce, CompressedGradient, CompressionMethod, GradientCompressor,
};
pub use continual_learning::{
AllocationStrategy, EWCConfig, FisherMethod, L2Regularization, L2RegularizationConfig,
MemoryReplay, MemoryReplayConfig, MemorySelectionStrategy, PackNet, PackNetConfig,
UpdateStrategy, EWC,
};
pub use convergence::{
AggMo, AggMoConfig, FISTAConfig, HeavyBall, HeavyBallConfig, NesterovAcceleratedGradient,
NesterovAcceleratedGradientConfig, QHMConfig, VarianceReduction, VarianceReductionConfig,
VarianceReductionMethod, FISTA, QHM,
};
pub use cpu_offload::{
create_cpu_offloaded_adam, create_cpu_offloaded_adamw, create_cpu_offloaded_sgd,
CPUOffloadConfig, CPUOffloadStats, CPUOffloadedOptimizer,
};
pub use cross_framework::{
ConfigSource, ConfigTarget, CrossFrameworkConverter, Framework, JAXOptimizerConfig,
PyTorchOptimizerConfig, TrustformeRSOptimizerConfig, UniversalOptimizerConfig,
UniversalOptimizerState,
};
pub use deep_distributed_qp::{DeepDistributedQP, DeepDistributedQPConfig};
pub use enhanced_distributed_training::{
Bottleneck, CompressionConfig, CompressionType, DistributedConfig, DistributedTrainingStats,
DynamicBatchingConfig, EnhancedDistributedTrainer, FaultToleranceConfig,
MemoryOptimizationConfig, MonitoringConfig as DistributedMonitoringConfig,
PerformanceMetrics as DistributedPerformanceMetrics, PerformanceTrend, TrainingStepResult,
};
pub use eva::{EVAConfig, EVA};
pub use federated::{
ClientInfo, ClientSelectionStrategy, DifferentialPrivacy, DifferentialPrivacyConfig, FedAvg,
FedAvgConfig, FedProx, FedProxConfig, NoiseMechanism, SecureAggregation,
};
#[cfg(target_arch = "x86_64")]
pub use fusion::simd;
pub use fusion::{FusedOperation, FusedOptimizerState, FusionConfig, FusionStats};
pub use genie_stub::{DomainStats, GENIEConfig, GENIEStats, GENIE};
pub use gradient_processing::{
AdaptiveClippingConfig, GradientProcessedOptimizer, GradientProcessingConfig,
HessianApproximationType, HessianPreconditioningConfig, NoiseInjectionConfig, NoiseType,
SmoothingConfig,
};
pub use hardware_aware::{
create_edge_optimizer, create_gpu_adam, create_mobile_optimizer, create_tpu_optimizer,
CompressionRatio, EdgeOptimizer, GPUAdam, HardwareAwareConfig, HardwareTarget, MobileOptimizer,
TPUOptimizer, TPUVersion,
};
pub use hierarchical_aggregation::{
AggregationStats, AggregationStrategy, ButterflyStructure, CommunicationGroups, FaultDetector,
HierarchicalAggregator, HierarchicalConfig, NodeTopology, RecoveryStrategy, RingStructure,
TreeStructure,
};
pub use hn_adam::{HNAdam, HNAdamConfig};
pub use hyperparameter_tuning::{
BayesianOptimizer, HyperparameterSample, HyperparameterSpace, HyperparameterTuner,
MultiObjectiveOptimizer, OptimizationTask, OptimizerType,
PerformanceMetrics as HyperparameterPerformanceMetrics, TaskType as HyperparameterTaskType,
};
pub use jax_compat::{
JAXAdam, JAXAdamW, JAXChain, JAXCosineDecay, JAXCosineDecaySchedule, JAXExponentialDecay,
JAXGradientTransformation, JAXLearningRateSchedule, JAXOptState, JAXOptimizerFactory,
JAXOptimizerState, JAXWarmupCosineDecay, JAXSGD,
};
pub use kernel_fusion::{
CoalescingLevel, FusedGPUState, GPUMemoryStats, KernelFusedAdam, KernelFusionConfig,
};
pub use lamb::LAMB;
pub use lancbio::{LancBiO, LancBiOConfig};
pub use lion::{Lion, LionConfig};
pub use lookahead::{
Lookahead, LookaheadAdam, LookaheadAdamW, LookaheadNAdam, LookaheadRAdam, LookaheadSGD,
};
pub use lora::{
create_lora_adam, create_lora_adamw, create_lora_sgd, LoRAAdapter, LoRAConfig, LoRAOptimizer,
};
pub use lora_rite_stub::{LoRARITE, LoRARITEConfig, LoRARITEStats, TransformationStats};
pub use memory_layout::{
AlignedAllocator, AlignmentConfig, LayoutOptimizedAdam, LayoutStats, SoAOptimizerState,
};
pub use microadam::{MicroAdam, MicroAdamConfig};
pub use monitoring::{
ConvergenceIndicators, ConvergenceSpeed, HyperparameterSensitivity,
HyperparameterSensitivityConfig, HyperparameterSensitivityMetrics, MemoryStats, MemoryUsage,
MetricStats, MonitoringConfig, OptimizerMetrics, OptimizerMonitor, OptimizerRecommendation,
OptimizerSelector, PerformanceStats, PerformanceTier,
};
pub use muon::{Muon, MuonConfig};
pub use pde_aware::{PDEAwareConfig, PDEAwareOptimizer, PDEAwareStats};
pub use prodigy::{Prodigy, ProdigyConfig};
pub use performance_validation::{
BenchmarkScenario, ConvergenceAnalysisResults, CorrectnessResults,
DistributedValidationResults, MathematicalProperty, MathematicalTestCase,
MemoryValidationResults, PerformanceBenchmarkResults, PerformanceValidator,
RegressionAnalysisResults, StatisticalMetrics, ValidationConfig, ValidationResults,
};
pub use pytorch_compat::{
PyTorchAdam, PyTorchAdamW, PyTorchLRScheduler, PyTorchOptimizer, PyTorchOptimizerFactory,
PyTorchOptimizerState, PyTorchParamGroup, PyTorchSGD,
};
pub use quantized::{Adam8bit, AdamW8bit, QuantizationConfig, QuantizedState};
pub use quantized_advanced::{
Adam4bit, Adam4bitOptimizerConfig, AdvancedQuantizationConfig, GradientStatistics,
QuantizationMethod, QuantizationUtils, QuantizedTensor,
};
pub use quantum_inspired::{
QuantumAnnealingConfig, QuantumAnnealingOptimizer, QuantumAnnealingStats,
};
pub use schedule_free::{
ScheduleFreeAdam, ScheduleFreeAdamConfig, ScheduleFreeSGD, ScheduleFreeSGDConfig,
};
pub use scheduler::{
AdaptiveScheduler, CompositeScheduler, ConstantWithWarmupScheduler, CosineScheduler,
CosineWithRestartsScheduler, CyclicalMode, CyclicalScheduler, DynamicScheduler,
ExponentialScheduler, LRScheduler, LinearScheduler, OneCycleScheduler, Phase,
PhaseBasedScheduler, PolynomialScheduler, StepScheduler, SwitchCondition,
TaskSpecificScheduler, TaskType as SchedulerTaskType,
};
pub use second_order::{
LineSearchMethod, NewtonCG, SSBFGSConfig, SSBFGSStats, SSBroyden, SSBroydenConfig, LBFGS,
SSBFGS,
};
pub use sgd::SGD;
pub use simd_optimizations::{SIMDConfig, SIMDOptimizer, SIMDPerformanceInfo};
pub use sofo_stub::{
ForwardModeStats, MemoryStats as SOFOMemoryStats, SOFOConfig, SOFOStats, SOFO,
};
pub use sophia::{
hutchinson_hessian_estimate,
sophia_update,
Sophia,
SophiaConfig,
SophiaError,
SophiaLegacyConfig,
SophiaOptimizer,
SophiaParamState,
};
pub use sparse::{SparseAdam, SparseConfig, SparseMomentumState, SparseSGD};
pub use task_specific::{
create_bert_optimizer, create_gan_optimizer, create_maml_optimizer, create_ppo_optimizer,
BERTOptimizer, GANOptimizer, MetaOptimizer as TaskMetaOptimizer, RLOptimizer,
};
pub use tensorflow_compat::{
TensorFlowAdam, TensorFlowAdamW, TensorFlowCosineDecay, TensorFlowExponentialDecay,
TensorFlowLearningRateSchedule, TensorFlowOptimizer, TensorFlowOptimizerConfig,
TensorFlowOptimizerFactory,
};
pub use traits::{
AdaptiveMomentumOptimizer, AsyncOptimizer, ClassicalMomentumOptimizer, CompositeOptimizer,
DistributedOptimizer, FederatedOptimizer, GPUOptimizer, GradientCompressionOptimizer,
HardwareOptimizer, HardwareStats, LookaheadOptimizer, MetaOptimizer, MomentumOptimizer,
OptimizerFactory, ScheduledOptimizer, SecondOrderOptimizer, SerializableOptimizer,
StalenessCompensation, StatefulOptimizer,
};
pub use zero::{
all_gather_gradients, gather_parameters, partition_gradients, partition_parameters,
reduce_scatter_gradients, GradientBuffer, ParameterGroup, ParameterPartition, ZeROConfig,
ZeROImplementationStage, ZeROMemoryStats, ZeROOptimizer, ZeROStage, ZeROStage1, ZeROStage2,
ZeROStage3, ZeROState,
};
pub use multinode::{MultiNodeConfig, MultiNodeStats, MultiNodeTrainer};
pub use novograd::{MemoryEfficiencyStats, NovoGrad, NovoGradConfig, NovoGradStats};
pub use onnx_export::{
ONNXExportConfig, ONNXGraph, ONNXModel, ONNXNode, ONNXOptimizerExporter, ONNXOptimizerMetadata,
OptimizerConfig,
};
pub use parallel::{BatchUpdate, ParallelAdam, ParallelConfig, ParallelStats};
pub use cyclic_decay::{
AnnealStrategy, CyclicLrConfig, CyclicLrMode, CyclicLrScheduler, OneCycleLrScheduler,
};
pub use lazy_state::{LazyAdam, LazyOptimizerStats, LazyParamState};
pub use lr_finder::{
find_optimal_lr, LrFinder, LrFinderAction, LrFinderConfig, LrFinderResult, LrStopReason,
};