#![cfg_attr(not(feature = "std"), no_std)]
#![allow(clippy::result_large_err)]
#![allow(clippy::needless_borrow)]
#![allow(clippy::redundant_closure)]
#![allow(clippy::collapsible_if)]
#![allow(clippy::manual_range_contains)]
#![allow(clippy::match_like_matches_macro)]
#![allow(clippy::upper_case_acronyms)]
pub mod adaptive_tuning;
#[cfg(feature = "gpu")]
pub mod async_gpu_optimizations;
pub mod buffer;
pub mod checkpointing;
pub mod collective;
pub mod complex;
pub mod context;
pub mod cross_platform_optimization;
pub mod deployment;
pub mod deterministic;
pub mod device;
pub mod dispatch_init;
pub mod dispatch_registry;
pub mod dispatch_registry_examples;
pub mod dispatch_registry_extended;
pub mod dtype;
pub mod eager_execution;
pub mod error;
pub mod fallback;
pub mod gpu_memory_metrics;
pub mod gpu_stub;
pub mod gradient_clipping;
pub mod gradient_coverage_audit;
pub mod gradient_validation_framework;
pub mod graph;
pub mod half_precision;
pub mod integration;
pub mod large_model_optimization;
pub mod layout;
pub mod memory;
pub mod memory_tensorflow_comparison;
pub mod mixed_precision;
pub mod monitoring;
pub mod neural_optimization;
pub mod numerical_gradient;
pub mod onnx_interop;
pub mod ops;
pub mod performance_benchmarks;
pub mod performance_gates;
pub mod production_benchmarks;
pub mod production_performance_monitoring;
pub mod quantization;
#[cfg(feature = "serialize")]
pub mod serialization;
#[cfg(feature = "serialize")]
pub mod serialization_onnx;
pub mod session;
pub mod shape;
pub mod shape_error_taxonomy;
pub mod simd;
pub mod simplified_benchmarks;
pub mod strided;
pub mod structured_arrays;
pub mod system_health;
pub mod tensor;
pub mod tensor_view;
pub mod ultra_performance_profiler;
pub mod wasm;
pub mod wasm_optimization;
pub use complex::{Complex32, Complex64};
pub use device::Device;
pub use dtype::{dtype_from_type, DType};
pub use error::{Result, TensorError};
pub use fallback::{
cleanup_memory_and_retry, execute_binary_op_with_fallback, execute_unary_op_with_fallback,
get_fallback_config, is_auto_fallback_enabled, set_auto_fallback_enabled, set_fallback_config,
FallbackConfig, FallbackWrapper,
};
pub use half_precision::{
bf16, f16, HalfPrecision, MixedPrecisionConfig as HalfMixedPrecisionConfig,
};
pub use integration::{
BaselinePerformance, OptimizationBreakdown, PerformanceTargets, UltraPerformanceValidator,
ValidationReport, ValidationResult, ValidationTestSuite,
};
pub use layout::{convert_layout, infer_layout, DataLayout, LayoutOptimizer, OperationType};
pub use quantization::{
dequantize, dynamic_quantize, fake_quantize, per_channel_quantize, quantize, QuantizationParams,
};
pub use shape::Shape;
pub use shape_error_taxonomy::{
validate_broadcast_shapes, validate_elementwise_shapes, validate_matmul_shapes,
validate_reduction_axis, validate_reshape, ShapeErrorBuilder, ShapeErrorCategory,
ShapeErrorUtils,
};
#[cfg(feature = "simd")]
pub use simd::{benchmarks::Benchmarks as simd_benchmarks, SimdCapabilities, SimdOptimizer};
pub use simd::{
global_simd_engine, AdvancedKernelRegistry, CacheFriendlyMatMul, CacheOptimizedTensorOps,
ConvolutionParams, CpuFeatures, ElementWiseOp, KernelOptimizationStrategy, MemoryAccessPattern,
ReductionOp as SimdReductionOp, SimdEngineConfig, SpecializedKernel, UltraSimdEngine,
};
pub use tensor::Tensor;
pub use adaptive_tuning::{
execute_with_adaptive_tuning, AdaptiveTuner, ExecutionStrategy, OperationMetrics,
PerformancePredictor, GLOBAL_TUNER,
};
#[cfg(feature = "gpu")]
pub use async_gpu_optimizations::{
utils as async_gpu_utils, AccessPattern, AsyncGpuOperation, AsyncGpuScheduler,
AsyncMatMulOperation, ComputeIntensity, OperationPriority,
PerformanceMetrics as AsyncPerformanceMetrics,
};
pub use collective::{
all_gather, all_reduce, broadcast, create_process_group, init_collective, CollectiveManager,
CollectiveOp, CommunicationGroup, ReductionOp,
};
pub use context::{get_context, set_context, Context};
pub use cross_platform_optimization::{
get_global_optimizer, get_optimal_configuration, initialize_cross_platform_optimizer,
CrossPlatformOptimizer, OptimalConfiguration, TargetArchitecture, TargetPlatform,
};
pub use deterministic::{
clear_operation_log, get_global_seed, get_operation_log, get_operation_seed,
get_state_snapshot, is_deterministic_mode, is_strict_mode, mark_non_deterministic,
reset_operation_counter, restore_state_snapshot, set_deterministic_mode, set_global_seed,
set_strict_mode, should_use_deterministic_gpu_ops, DeterministicConfig, DeterministicScope,
DeterministicSnapshot, DeterministicState,
};
pub use dispatch_init::ensure_initialized as ensure_dispatch_initialized;
pub use dispatch_registry::{
get_registry, BackendType, BinaryKernelFn, DispatchBenchmarkResult, DispatchRegistry,
KernelImplementation, OperationDescriptor, UnaryKernelFn, F32_REGISTRY, F64_REGISTRY,
I32_REGISTRY,
};
pub use eager_execution::{
CacheStatistics, EagerExecutionConfig, EagerExecutionEngine, EagerPerformanceReport,
ExecutionMetrics, EAGER_ENGINE,
};
pub use gpu_memory_metrics::{
generate_memory_report, get_gpu_memory_snapshot, get_gpu_memory_usage, get_gpu_peak_memory,
print_memory_report, reset_gpu_memory_metrics, GpuMemoryMetrics, GpuMemoryReport,
GpuMemorySnapshot, GPU_MEMORY_METRICS,
};
pub use gradient_clipping::{
GradientClipper, GradientClippingConfig, GradientStatistics, NormType,
};
pub use graph::{
AttributeValue, AttributeValueDef, EdgeId, Graph, GraphDef, GraphEdge, GraphNode, NodeDef,
NodeId, NodeType,
};
pub use large_model_optimization::{
LargeModelConfig, LargeModelOptimizationReport, LargeModelOptimizer, MemoryOptimizationStats,
ModelExecutionPlan, LARGE_MODEL_OPTIMIZER,
};
#[cfg(feature = "gpu")]
pub use memory::DiagnosticMemoryPool;
pub use memory::{
global_monitor, global_monitor_arc, IntegratedDiagnosticReport, KernelOccupancyStats,
MemoryAliasDetector, MemoryPool, MemoryPoolStats, MultiStreamMemoryManager, OperationTimer,
OptimizationResult, PerformanceMonitor, PoolHealthMetrics, PoolHealthStatus,
PoolOptimizationConfig, StridedView,
};
pub use memory_tensorflow_comparison::{
MemoryComparisonReport, MemoryOptimizationSuggestion, MemoryProfilingConfig, MemorySnapshot,
TensorFlowMemoryProfiler, MEMORY_PROFILER,
};
pub use mixed_precision::{
disable_autocast, enable_autocast, enable_autocast_bfloat16, from_bfloat16_f32,
from_bfloat16_f64, from_half, from_half_f32, from_half_f64, to_bfloat16_f32, to_bfloat16_f64,
to_half, to_half_f32, to_half_f64, AutocastContext, GradientScaler, MixedPrecisionConfig,
MixedPrecisionState,
};
pub use monitoring::{
AlertSeverity,
BottleneckType,
MonitoringConfig as UltraMonitoringConfig,
MonitoringReport,
OperationMetrics as MonitoringOperationMetrics,
OptimizationOpportunity,
PerformanceAlert,
PerformanceDashboard,
PerformancePrediction,
PerformancePredictor as MonitoringPerformancePredictor,
PerformanceSnapshot,
SystemBottleneck,
SystemMetrics,
TrendDirection,
TrendType,
UltraPerformanceMonitor,
};
pub use neural_optimization::{
LayerPerformanceMetrics, NetworkPerformanceReport,
OptimizationBreakdown as NeuralOptimizationBreakdown, UltraOptimizedActivations,
UltraOptimizedDenseLayer, UltraOptimizedNeuralNetwork,
};
pub use onnx_interop::{
OnnxConfig,
OnnxExporter,
OnnxImporter,
OnnxModel,
};
pub use ops::{
execute_fused_graph, get_fusion_stats, infer_binary_elementwise,
infer_binary_elementwise_validated, infer_concat, infer_conv2d, infer_matmul, infer_reduction,
infer_reshape, print_framework_comparison_results, print_fusion_report,
record_fusion_opportunity, reset_fusion_stats, run_framework_comparison_benchmark,
BroadcastableConstraint, ElementwiseOpType, ExactShapeConstraint, FrameworkBenchmarkConfig,
FrameworkComparisonResult, FusionGraph, FusionNode, FusionPassBuilder, FusionStats,
MatMulCompatibleConstraint, MinRankConstraint, RankConstraint, ShapeConstraint, ShapeContext,
ShapeValidator,
};
pub use performance_gates::{
get_baseline, list_baselines, register_baseline, OperationBaseline, PerformanceGate,
PerformanceGateSuite, PerformanceMeasurement,
};
pub use production_benchmarks::{
run_comprehensive_production_benchmarks, BenchmarkConfig, BenchmarkResult,
BenchmarkSummary as ProductionBenchmarkSummary,
OptimizationBreakdown as ProductionOptimizationBreakdown, ProblemSize,
ProductionBenchmarkReport, ProductionBenchmarkSuite, QualityMetrics,
};
pub use production_performance_monitoring::{
get_global_monitor, initialize_performance_monitoring, record_performance_event,
AlertThresholds, MonitoringConfig, PerformanceEvent, PerformanceMetrics,
ProductionPerformanceMonitor,
};
pub use session::{create_session, DefaultSession, FeedDict, FetchSpec, Session, SessionConfig};
pub use simplified_benchmarks::{
run_simple_benchmarks, validate_optimizations, BenchmarkReport, BenchmarkSummary,
SimpleBenchmarkConfig, SimpleBenchmarkResult, SimpleBenchmarkSuite,
};
pub use strided::{SliceParams, StridedLayout};
pub use structured_arrays::{FieldDescriptor, FieldValue, StructuredArray};
pub use system_health::{
run_quick_health_check, run_system_health_check, FeaturesInfo, GpuMemoryInfo,
HealthCheckConfig, HealthStatus, MemoryInfo, PerformanceBenchmarks, SystemHealthChecker,
SystemInfo,
};
pub use tensor_view::{MemoryStats, TensorView, TensorViewOps};
pub use wasm::{utils as wasm_utils, WasmContext};
#[cfg(target_arch = "wasm32")]
pub use wasm::{WasmContextWithGpu, WasmWebGpuContext, WebGpuBackend, WebGpuLimits};
#[cfg(feature = "wasm")]
pub use wasm_optimization::{
WasmBundleOptimizer, WasmEdgeInference, WasmMemoryManager, WasmOptimizationConfig,
WasmOptimizedTensor, WasmTensorOperations,
};
#[cfg(feature = "gpu")]
pub use gpu_profiler::{
disable_gpu_profiling, enable_gpu_profiling, generate_gpu_profiling_report,
get_gpu_profiling_stats, global_profiler, GpuProfiler, OperationProfile, ProfileStats,
};
#[cfg(feature = "gpu")]
pub use gpu::memory_diagnostics::{
check_gpu_memory_leaks, print_gpu_diagnostics, run_gpu_diagnostics, DiagnosticReport,
DiagnosticsConfig, FragmentationAnalysis, GpuMemoryDiagnostics, LeakDetectionResult,
OperationProfile as MemoryOperationProfile, GLOBAL_GPU_DIAGNOSTICS,
};
#[cfg(feature = "gpu")]
pub use gpu::memory_tracing::{
current_gpu_memory_usage, generate_gpu_memory_report, peak_gpu_memory_usage,
print_gpu_memory_report, record_gpu_allocation, record_gpu_deallocation, AllocationInfo,
GpuMemoryTracker, MemoryReport, MemoryTracingConfig, GLOBAL_GPU_MEMORY_TRACKER,
};
#[cfg(feature = "gpu")]
pub mod gpu;
#[cfg(feature = "gpu")]
pub mod gpu_profiler;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_tensor_creation() {
let tensor = Tensor::<f32>::zeros(&[2, 3]);
assert_eq!(tensor.shape(), &Shape::from_slice(&[2, 3]));
}
}
pub mod shape_inference_helpers;