1#![cfg_attr(not(feature = "std"), no_std)]
214#![allow(clippy::result_large_err)]
215#![allow(clippy::needless_borrow)]
217#![allow(clippy::redundant_closure)]
218#![allow(clippy::collapsible_if)]
219#![allow(clippy::manual_range_contains)]
220#![allow(clippy::match_like_matches_macro)]
221#![allow(clippy::upper_case_acronyms)]
222
223pub mod adaptive_tuning;
224#[cfg(feature = "gpu")]
225pub mod async_gpu_optimizations;
226pub mod buffer;
227pub mod checkpointing;
228pub mod collective;
229pub mod complex;
230pub mod context;
231pub mod cross_platform_optimization;
232pub mod deployment;
233pub mod deterministic;
234pub mod device;
235pub mod dispatch_init;
236pub mod dispatch_registry;
237pub mod dispatch_registry_examples;
238pub mod dispatch_registry_extended;
239pub mod dtype;
240pub mod eager_execution;
241pub mod error;
242pub mod fallback;
243pub mod gpu_memory_metrics;
244pub mod gpu_stub;
245pub mod gradient_clipping;
246pub mod gradient_coverage_audit;
247pub mod gradient_validation_framework;
248pub mod graph;
249pub mod half_precision;
250pub mod integration;
251pub mod large_model_optimization;
252pub mod layout;
253pub mod memory;
254pub mod memory_tensorflow_comparison;
255pub mod mixed_precision;
256pub mod monitoring;
257pub mod neural_optimization;
258pub mod numerical_gradient;
259pub mod onnx_interop;
260pub mod ops;
261pub mod performance_benchmarks;
262pub mod performance_gates;
263pub mod production_benchmarks;
264pub mod production_performance_monitoring;
265pub mod quantization;
266#[cfg(feature = "serialize")]
267pub mod serialization;
268#[cfg(feature = "serialize")]
269pub mod serialization_onnx;
270pub mod session;
271pub mod shape;
272pub mod shape_error_taxonomy;
273pub mod simd;
274pub mod simplified_benchmarks;
275pub mod strided;
276pub mod structured_arrays;
277pub mod system_health;
278pub mod tensor;
279pub mod tensor_view;
280pub mod ultra_performance_profiler;
281pub mod wasm;
282pub mod wasm_optimization;
283pub use complex::{Complex32, Complex64};
286pub use device::Device;
287pub use dtype::{dtype_from_type, DType};
288pub use error::{Result, TensorError};
289pub use fallback::{
290 cleanup_memory_and_retry, execute_binary_op_with_fallback, execute_unary_op_with_fallback,
291 get_fallback_config, is_auto_fallback_enabled, set_auto_fallback_enabled, set_fallback_config,
292 FallbackConfig, FallbackWrapper,
293};
294pub use half_precision::{
295 bf16, f16, HalfPrecision, MixedPrecisionConfig as HalfMixedPrecisionConfig,
296};
297pub use integration::{
298 BaselinePerformance, OptimizationBreakdown, PerformanceTargets, UltraPerformanceValidator,
299 ValidationReport, ValidationResult, ValidationTestSuite,
300};
301pub use layout::{convert_layout, infer_layout, DataLayout, LayoutOptimizer, OperationType};
302pub use quantization::{
303 dequantize, dynamic_quantize, fake_quantize, per_channel_quantize, quantize, QuantizationParams,
304};
305pub use shape::Shape;
306pub use shape_error_taxonomy::{
307 validate_broadcast_shapes, validate_elementwise_shapes, validate_matmul_shapes,
308 validate_reduction_axis, validate_reshape, ShapeErrorBuilder, ShapeErrorCategory,
309 ShapeErrorUtils,
310};
311#[cfg(feature = "simd")]
312pub use simd::{benchmarks::Benchmarks as simd_benchmarks, SimdCapabilities, SimdOptimizer};
313pub use simd::{
314 global_simd_engine, AdvancedKernelRegistry, CacheFriendlyMatMul, CacheOptimizedTensorOps,
315 ConvolutionParams, CpuFeatures, ElementWiseOp, KernelOptimizationStrategy, MemoryAccessPattern,
316 ReductionOp as SimdReductionOp, SimdEngineConfig, SpecializedKernel, UltraSimdEngine,
317};
318pub use tensor::Tensor;
319pub use adaptive_tuning::{
321 execute_with_adaptive_tuning, AdaptiveTuner, ExecutionStrategy, OperationMetrics,
322 PerformancePredictor, GLOBAL_TUNER,
323};
324#[cfg(feature = "gpu")]
325pub use async_gpu_optimizations::{
326 utils as async_gpu_utils, AccessPattern, AsyncGpuOperation, AsyncGpuScheduler,
327 AsyncMatMulOperation, ComputeIntensity, OperationPriority,
328 PerformanceMetrics as AsyncPerformanceMetrics,
329};
330pub use collective::{
331 all_gather, all_reduce, broadcast, create_process_group, init_collective, CollectiveManager,
332 CollectiveOp, CommunicationGroup, ReductionOp,
333};
334pub use context::{get_context, set_context, Context};
335pub use cross_platform_optimization::{
336 get_global_optimizer, get_optimal_configuration, initialize_cross_platform_optimizer,
337 CrossPlatformOptimizer, OptimalConfiguration, TargetArchitecture, TargetPlatform,
338};
339pub use deterministic::{
340 clear_operation_log, get_global_seed, get_operation_log, get_operation_seed,
341 get_state_snapshot, is_deterministic_mode, is_strict_mode, mark_non_deterministic,
342 reset_operation_counter, restore_state_snapshot, set_deterministic_mode, set_global_seed,
343 set_strict_mode, should_use_deterministic_gpu_ops, DeterministicConfig, DeterministicScope,
344 DeterministicSnapshot, DeterministicState,
345};
346pub use dispatch_init::ensure_initialized as ensure_dispatch_initialized;
347pub use dispatch_registry::{
348 get_registry, BackendType, BinaryKernelFn, DispatchBenchmarkResult, DispatchRegistry,
349 KernelImplementation, OperationDescriptor, UnaryKernelFn, F32_REGISTRY, F64_REGISTRY,
350 I32_REGISTRY,
351};
352pub use eager_execution::{
353 CacheStatistics, EagerExecutionConfig, EagerExecutionEngine, EagerPerformanceReport,
354 ExecutionMetrics, EAGER_ENGINE,
355};
356pub use gpu_memory_metrics::{
357 generate_memory_report, get_gpu_memory_snapshot, get_gpu_memory_usage, get_gpu_peak_memory,
358 print_memory_report, reset_gpu_memory_metrics, GpuMemoryMetrics, GpuMemoryReport,
359 GpuMemorySnapshot, GPU_MEMORY_METRICS,
360};
361pub use gradient_clipping::{
362 GradientClipper, GradientClippingConfig, GradientStatistics, NormType,
363};
364pub use graph::{
365 AttributeValue, AttributeValueDef, EdgeId, Graph, GraphDef, GraphEdge, GraphNode, NodeDef,
366 NodeId, NodeType,
367};
368pub use large_model_optimization::{
369 LargeModelConfig, LargeModelOptimizationReport, LargeModelOptimizer, MemoryOptimizationStats,
370 ModelExecutionPlan, LARGE_MODEL_OPTIMIZER,
371};
372#[cfg(feature = "gpu")]
373pub use memory::DiagnosticMemoryPool;
374pub use memory::{
375 global_monitor, global_monitor_arc, IntegratedDiagnosticReport, KernelOccupancyStats,
376 MemoryAliasDetector, MemoryPool, MemoryPoolStats, MultiStreamMemoryManager, OperationTimer,
377 OptimizationResult, PerformanceMonitor, PoolHealthMetrics, PoolHealthStatus,
378 PoolOptimizationConfig, StridedView,
379};
380pub use memory_tensorflow_comparison::{
381 MemoryComparisonReport, MemoryOptimizationSuggestion, MemoryProfilingConfig, MemorySnapshot,
382 TensorFlowMemoryProfiler, MEMORY_PROFILER,
383};
384pub use mixed_precision::{
385 disable_autocast, enable_autocast, enable_autocast_bfloat16, from_bfloat16_f32,
386 from_bfloat16_f64, from_half, from_half_f32, from_half_f64, to_bfloat16_f32, to_bfloat16_f64,
387 to_half, to_half_f32, to_half_f64, AutocastContext, GradientScaler, MixedPrecisionConfig,
388 MixedPrecisionState,
389};
390pub use monitoring::{
391 AlertSeverity,
392 BottleneckType,
394 MonitoringConfig as UltraMonitoringConfig,
395 MonitoringReport,
396 OperationMetrics as MonitoringOperationMetrics,
398 OptimizationOpportunity,
399 PerformanceAlert,
400 PerformanceDashboard,
401
402 PerformancePrediction,
403
404 PerformancePredictor as MonitoringPerformancePredictor,
406
407 PerformanceSnapshot,
408 SystemBottleneck,
409 SystemMetrics,
410 TrendDirection,
411 TrendType,
412 UltraPerformanceMonitor,
414};
415pub use neural_optimization::{
416 LayerPerformanceMetrics, NetworkPerformanceReport,
417 OptimizationBreakdown as NeuralOptimizationBreakdown, UltraOptimizedActivations,
418 UltraOptimizedDenseLayer, UltraOptimizedNeuralNetwork,
419};
420pub use onnx_interop::{
421 OnnxConfig,
422 OnnxExporter,
423 OnnxImporter,
424 OnnxModel,
425 };
427pub use ops::{
428 execute_fused_graph, get_fusion_stats, infer_binary_elementwise,
429 infer_binary_elementwise_validated, infer_concat, infer_conv2d, infer_matmul, infer_reduction,
430 infer_reshape, print_framework_comparison_results, print_fusion_report,
431 record_fusion_opportunity, reset_fusion_stats, run_framework_comparison_benchmark,
432 BroadcastableConstraint, ElementwiseOpType, ExactShapeConstraint, FrameworkBenchmarkConfig,
433 FrameworkComparisonResult, FusionGraph, FusionNode, FusionPassBuilder, FusionStats,
434 MatMulCompatibleConstraint, MinRankConstraint, RankConstraint, ShapeConstraint, ShapeContext,
435 ShapeValidator,
436};
437pub use performance_gates::{
438 get_baseline, list_baselines, register_baseline, OperationBaseline, PerformanceGate,
439 PerformanceGateSuite, PerformanceMeasurement,
440};
441pub use production_benchmarks::{
442 run_comprehensive_production_benchmarks, BenchmarkConfig, BenchmarkResult,
443 BenchmarkSummary as ProductionBenchmarkSummary,
444 OptimizationBreakdown as ProductionOptimizationBreakdown, ProblemSize,
445 ProductionBenchmarkReport, ProductionBenchmarkSuite, QualityMetrics,
446};
447pub use production_performance_monitoring::{
448 get_global_monitor, initialize_performance_monitoring, record_performance_event,
449 AlertThresholds, MonitoringConfig, PerformanceEvent, PerformanceMetrics,
450 ProductionPerformanceMonitor,
451};
452pub use session::{create_session, DefaultSession, FeedDict, FetchSpec, Session, SessionConfig};
453pub use simplified_benchmarks::{
454 run_simple_benchmarks, validate_optimizations, BenchmarkReport, BenchmarkSummary,
455 SimpleBenchmarkConfig, SimpleBenchmarkResult, SimpleBenchmarkSuite,
456};
457pub use strided::{SliceParams, StridedLayout};
458pub use structured_arrays::{FieldDescriptor, FieldValue, StructuredArray};
459pub use system_health::{
460 run_quick_health_check, run_system_health_check, FeaturesInfo, GpuMemoryInfo,
461 HealthCheckConfig, HealthStatus, MemoryInfo, PerformanceBenchmarks, SystemHealthChecker,
462 SystemInfo,
463};
464pub use tensor_view::{MemoryStats, TensorView, TensorViewOps};
465pub use wasm::{utils as wasm_utils, WasmContext};
466#[cfg(target_arch = "wasm32")]
467pub use wasm::{WasmContextWithGpu, WasmWebGpuContext, WebGpuBackend, WebGpuLimits};
468#[cfg(feature = "wasm")]
469pub use wasm_optimization::{
470 WasmBundleOptimizer, WasmEdgeInference, WasmMemoryManager, WasmOptimizationConfig,
471 WasmOptimizedTensor, WasmTensorOperations,
472};
473
474#[cfg(feature = "gpu")]
475pub use gpu_profiler::{
476 disable_gpu_profiling, enable_gpu_profiling, generate_gpu_profiling_report,
477 get_gpu_profiling_stats, global_profiler, GpuProfiler, OperationProfile, ProfileStats,
478};
479
480#[cfg(feature = "gpu")]
481pub use gpu::memory_diagnostics::{
482 check_gpu_memory_leaks, print_gpu_diagnostics, run_gpu_diagnostics, DiagnosticReport,
483 DiagnosticsConfig, FragmentationAnalysis, GpuMemoryDiagnostics, LeakDetectionResult,
484 OperationProfile as MemoryOperationProfile, GLOBAL_GPU_DIAGNOSTICS,
485};
486
487#[cfg(feature = "gpu")]
488pub use gpu::memory_tracing::{
489 current_gpu_memory_usage, generate_gpu_memory_report, peak_gpu_memory_usage,
490 print_gpu_memory_report, record_gpu_allocation, record_gpu_deallocation, AllocationInfo,
491 GpuMemoryTracker, MemoryReport, MemoryTracingConfig, GLOBAL_GPU_MEMORY_TRACKER,
492};
493
494#[cfg(feature = "gpu")]
495pub mod gpu;
496
497#[cfg(feature = "gpu")]
498pub mod gpu_profiler;
499
500#[cfg(test)]
501mod tests {
502 use super::*;
503
504 #[test]
505 fn test_basic_tensor_creation() {
506 let tensor = Tensor::<f32>::zeros(&[2, 3]);
507 assert_eq!(tensor.shape(), &Shape::from_slice(&[2, 3]));
508 }
509}
510pub mod shape_inference_helpers;