List of all items
Structs
- CudaRust
- backend::backend_trait::BackendCapabilities
- backend::native_gpu::NativeGPUBackend
- backend::wasm_runtime::WasmRuntime
- backend::webgpu::WebGPUBackend
- backend::webgpu_optimized::AutoTuneResult
- backend::webgpu_optimized::BackendStats
- backend::webgpu_optimized::CachedKernel
- backend::webgpu_optimized::OptimizedWebGPUBackend
- backend::webgpu_optimized::WebGPUConfig
- kernel::shared_memory::BankConflictDetector
- kernel::shared_memory::DynamicSharedMemory
- kernel::shared_memory::SharedMemory
- kernel::warp::WarpState
- memory::SharedMemory
- memory::device_memory::DeviceBuffer
- memory::device_memory::DevicePtr
- memory::host_memory::HostBuffer
- memory::memory_pool::KernelMemoryManager
- memory::memory_pool::MemoryPool
- memory::memory_pool::PoolConfig
- memory::memory_pool::PoolStats
- memory::texture_memory::TextureDescriptor
- memory::texture_memory::TextureMemory
- memory::unified_memory::ManagedMemory
- memory::unified_memory::UnifiedMemory
- neural_integration::BatchProcessor
- neural_integration::BridgeConfig
- neural_integration::BufferHandle
- neural_integration::CompiledKernel
- neural_integration::DeviceInfo
- neural_integration::MemoryHandle
- neural_integration::MemoryStats
- neural_integration::NeuralBridge
- neural_integration::OperationHandle
- neural_integration::OperationStats
- neural_integration::PerformanceDegradation
- neural_integration::PerformanceStats
- neural_integration::SystemCapabilities
- neural_integration::benchmarks::BenchmarkResult
- neural_integration::benchmarks::BenchmarkSuite
- neural_integration::bridge::WebGpuBackend
- neural_integration::cuda_kernels::KernelConfig
- neural_integration::cuda_kernels::LaunchParams
- neural_integration::cuda_kernels::OptimizedKernels
- neural_integration::memory_manager::HybridMemoryManager
- neural_integration::memory_manager::NoOpMemoryManager
- neural_integration::performance_monitor::BottleneckAnalysis
- neural_integration::performance_monitor::MonitorConfig
- neural_integration::performance_monitor::NoOpMonitor
- neural_integration::performance_monitor::PerformanceTrend
- neural_integration::performance_monitor::RealTimeMonitor
- nutanix::config::DeploymentConfig
- nutanix::config::GpuClusterSummary
- nutanix::config::GpuInfo
- nutanix::config::GpuNode
- nutanix::config::HostCapabilities
- nutanix::config::NutanixConfig
- nutanix::deployment::DeploymentGenerator
- nutanix::discovery::NutanixClient
- nutanix::discovery::PrismBuildInfo
- nutanix::discovery::PrismClusterConfig
- nutanix::discovery::PrismClusterEntity
- nutanix::discovery::PrismClusterResources
- nutanix::discovery::PrismClusterStatus
- nutanix::discovery::PrismControllerVm
- nutanix::discovery::PrismEntityMetadata
- nutanix::discovery::PrismGpuInfo
- nutanix::discovery::PrismHostEntity
- nutanix::discovery::PrismHostResources
- nutanix::discovery::PrismHostStatus
- nutanix::discovery::PrismHypervisor
- nutanix::discovery::PrismListResponse
- nutanix::discovery::PrismMetadata
- nutanix::discovery::PrismReference
- nutanix::monitoring::Alert
- nutanix::monitoring::CapacityForecast
- nutanix::monitoring::GpuMetrics
- nutanix::monitoring::GpuMonitor
- nutanix::monitoring::NodeHealth
- nutanix::nc2::CostEstimate
- nutanix::nc2::Nc2Client
- nutanix::nc2::Nc2Cluster
- nutanix::nc2::WorkloadPlacement
- nutanix::vgpu_scheduler::MigrationPlan
- nutanix::vgpu_scheduler::ScheduleResult
- nutanix::vgpu_scheduler::VgpuScheduler
- nutanix::vgpu_scheduler::WorkloadRequest
- parser::ast::Ast
- parser::ast::Block
- parser::ast::FunctionDef
- parser::ast::GlobalVar
- parser::ast::KernelDef
- parser::ast::Parameter
- parser::ast::TextureType
- parser::ast::TypeDef
- parser::ast::VectorType
- parser::cuda_parser::CudaParser
- parser::kernel_extractor::KernelInfo
- parser::lexer::SpannedToken
- parser::ptx_parser::PtxFunction
- parser::ptx_parser::PtxInstruction
- parser::ptx_parser::PtxModule
- parser::ptx_parser::PtxPredicate
- parser::ptx_parser::PtxRegDecl
- parser::ptx_parser::PtxVariable
- profiling::GlobalProfiler
- profiling::PerformanceCounter
- profiling::ProfileMetrics
- profiling::ScopedTimer
- profiling::kernel_profiler::KernelComparison
- profiling::kernel_profiler::KernelProfiler
- profiling::kernel_profiler::KernelStats
- profiling::kernel_profiler::KernelTimer
- profiling::kernel_profiler::RooflineAnalysis
- profiling::kernel_profiler::RooflineModel
- profiling::memory_profiler::AllocationEvent
- profiling::memory_profiler::FragmentationAnalysis
- profiling::memory_profiler::MemoryPressureEvent
- profiling::memory_profiler::MemoryPressureMonitor
- profiling::memory_profiler::MemoryProfiler
- profiling::performance_monitor::CounterStats
- profiling::performance_monitor::Measurement
- profiling::performance_monitor::MonitorConfig
- profiling::performance_monitor::PerformanceMonitor
- profiling::performance_monitor::PerformanceReport
- profiling::performance_monitor::Timer
- profiling::runtime_profiler::BottleneckAnalysis
- profiling::runtime_profiler::OperationEvent
- profiling::runtime_profiler::OperationStats
- profiling::runtime_profiler::OperationTimer
- profiling::runtime_profiler::OptimizationSuggestions
- profiling::runtime_profiler::RuntimeProfiler
- profiling::runtime_profiler::Suggestion
- runtime::KernelContext
- runtime::Runtime
- runtime::async_pipeline::AsyncPipeline
- runtime::async_pipeline::PipelineBatch
- runtime::async_pipeline::PipelineEvent
- runtime::async_pipeline::PipelineOp
- runtime::async_pipeline::PipelineTimeline
- runtime::async_pipeline::PipelineUtilization
- runtime::benchmark::BenchmarkResult
- runtime::benchmark::BenchmarkRunner
- runtime::benchmark::BenchmarkSuite
- runtime::bfloat16::BFloat16
- runtime::coalescing::AccessRecorder
- runtime::coalescing::AccessSummary
- runtime::coalescing::CoalescingReport
- runtime::coalescing::MemoryAccess
- runtime::cooperative_groups::CooperativeGroup
- runtime::cooperative_groups::GridGroup
- runtime::cooperative_groups::ThreadBlockGroup
- runtime::cooperative_groups::TiledPartition
- runtime::cuda_graph::CudaGraph
- runtime::cuda_graph::GraphExec
- runtime::cuda_graph::GraphExecResult
- runtime::cuda_graph::GraphNode
- runtime::cuda_graph::NodeExecResult
- runtime::device::Device
- runtime::device::DeviceProperties
- runtime::dynamic_parallelism::ChildLaunch
- runtime::dynamic_parallelism::DynamicParallelismContext
- runtime::event::Event
- runtime::flash_attention::FlashAttention
- runtime::flash_attention::FlashAttentionConfig
- runtime::flash_attention::FlashAttentionOutput
- runtime::flash_attention::MemorySavings
- runtime::grid::Block
- runtime::grid::Dim3
- runtime::grid::Grid
- runtime::half::Half
- runtime::kernel::LaunchConfig
- runtime::kernel::ThreadContext
- runtime::kernel_fusion::FusedKernel
- runtime::kernel_fusion::FusionAnalyzer
- runtime::kernel_fusion::FusionNode
- runtime::kernel_fusion::FusionResult
- runtime::multi_gpu::DeviceRange
- runtime::multi_gpu::MultiGpuContext
- runtime::occupancy::BlockSizeSuggestion
- runtime::occupancy::GpuArchSpec
- runtime::occupancy::KernelResources
- runtime::occupancy::OccupancyResult
- runtime::quantization::QuantError
- runtime::quantization::QuantParams
- runtime::stream::Stream
- runtime::tensor_ops::Fragment
- runtime::tensor_ops::FragmentShape
- runtime::tensor_ops::GemmStats
- runtime::tensor_ops::TensorCoreEngine
- simd::detection::SimdCapabilities
- transpiler::CudaTranspiler
- transpiler::Transpiler
- transpiler::ast::Function
- transpiler::ast::Program
- transpiler::builtin_functions::BuiltinMapper
- transpiler::code_generator::CodeGenerator
- transpiler::kernel_translator::KernelTranslator
- transpiler::memory_mapper::MemoryMapper
- transpiler::memory_mapper::MemoryMapping
- transpiler::type_converter::TypeConverter
- transpiler::wgsl::WgslGenerator
Enums
- backend::backend_trait::MemcpyKind
- backend::native_gpu::GpuApi
- error::CudaRustError
- memory::texture_memory::AddressMode
- memory::texture_memory::FilterMode
- neural_integration::ActivationFunction
- neural_integration::BindingType
- neural_integration::GpuDevice
- neural_integration::NeuralIntegrationError
- neural_integration::NeuralOperation
- neural_integration::Precision
- neural_integration::performance_monitor::BottleneckType
- nutanix::config::GpuModel
- nutanix::config::GpuVendor
- nutanix::monitoring::AlertSeverity
- nutanix::monitoring::HealthStatus
- nutanix::nc2::CloudProvider
- nutanix::nc2::ClusterStatus
- nutanix::nc2::MigrationStatus
- nutanix::vgpu_scheduler::SchedulingPolicy
- nutanix::vgpu_scheduler::VgpuProfile
- parser::ast::BinaryOp
- parser::ast::Dimension
- parser::ast::Expression
- parser::ast::FloatType
- parser::ast::FunctionQualifier
- parser::ast::IntType
- parser::ast::Item
- parser::ast::KernelAttribute
- parser::ast::Literal
- parser::ast::ParamQualifier
- parser::ast::Statement
- parser::ast::StorageClass
- parser::ast::TextureDim
- parser::ast::Type
- parser::ast::UnaryOp
- parser::ast::WarpOp
- parser::lexer::Token
- parser::ptx_parser::PtxDirective
- parser::ptx_parser::PtxOperand
- parser::ptx_parser::PtxSpace
- parser::ptx_parser::PtxStatement
- parser::ptx_parser::PtxType
- profiling::memory_profiler::AllocationType
- profiling::memory_profiler::PressureLevel
- profiling::performance_monitor::CounterType
- profiling::runtime_profiler::OperationType
- profiling::runtime_profiler::SuggestionCategory
- profiling::runtime_profiler::SuggestionSeverity
- runtime::async_pipeline::PipelineStage
- runtime::coalescing::AccessPattern
- runtime::cuda_graph::MemcpyDirection
- runtime::cuda_graph::NodeKind
- runtime::cuda_graph::NodeState
- runtime::device::BackendType
- runtime::kernel_fusion::BinaryOp
- runtime::kernel_fusion::FusableOp
- runtime::kernel_fusion::MemOp
- runtime::kernel_fusion::PrecisionType
- runtime::kernel_fusion::ReduceOp
- runtime::kernel_fusion::UnaryOp
- runtime::occupancy::LimitingFactor
- runtime::quantization::QuantBits
- runtime::quantization::QuantScheme
- runtime::tensor_ops::MmaPrecision
- simd::detection::SimdLevel
- transpiler::ast::Expr
- transpiler::ast::ForInit
- transpiler::ast::Stmt
- transpiler::kernel_translator::KernelPattern
Traits
- backend::backend_trait::BackendTrait
- neural_integration::GpuBackendTrait
- neural_integration::MemoryManagerTrait
- neural_integration::PerformanceMonitorTrait
- runtime::dynamic_parallelism::ChildKernel
- runtime::kernel::KernelFunction
Macros
- kernel::kernel_function
- kernel_function
- memory_error
- parse_error
- profile_scope
- runtime_error
- time_block
- translation_error
Functions
- backend::get_backend
- backend::native_gpu::is_cuda_available
- backend::native_gpu::is_rocm_available
- backend::native_gpu::is_vulkan_available
- init
- memory::memory_pool::allocate
- memory::memory_pool::deallocate
- memory::memory_pool::global_pool
- memory::memory_pool::global_stats
- memory::unified_memory::allocate_unified
- neural_integration::benchmarks::run_quick_benchmark
- neural_integration::bridge::execute_cpu_fallback
- neural_integration::bridge::extract_wgsl_from_rust
- neural_integration::examples::batch_processing_example
- neural_integration::examples::custom_kernel_example
- neural_integration::examples::error_handling_example
- neural_integration::examples::matrix_operations_example
- neural_integration::examples::neural_network_example
- neural_integration::examples::performance_benchmark_example
- neural_integration::examples::run_all_examples
- neural_integration::examples::training_simulation_example
- neural_integration::get_capabilities
- neural_integration::gpu_neural_ops::execute_operation
- neural_integration::gpu_neural_ops::process_batch
- neural_integration::initialize
- nutanix::deployment::gpu_resource_key
- parser::kernel_extractor::extract_device_functions
- parser::kernel_extractor::extract_kernel_by_name
- parser::kernel_extractor::extract_kernels
- parser::lexer::tokenize
- parser::parse
- parser::ptx_parser::parse_ptx
- parser::ptx_parser::ptx_to_ast
- profiling::performance_monitor::global_monitor
- profiling::performance_monitor::global_report
- profiling::performance_monitor::record_measurement
- profiling::performance_monitor::time_operation
- runtime::benchmark::run_builtin_benchmarks
- runtime::bfloat16::bf16_dot
- runtime::bfloat16::bf16_gemm
- runtime::bfloat16::bf16_gemv
- runtime::bfloat16::bf16_to_f32_slice
- runtime::bfloat16::f32_to_bf16_slice
- runtime::block::dim
- runtime::block::index
- runtime::clear_kernel_context
- runtime::coalescing::analyze_warp_access
- runtime::coalescing::simulate_column_access
- runtime::coalescing::simulate_linear_access
- runtime::cooperative_groups::this_grid
- runtime::cooperative_groups::this_thread_block
- runtime::grid_dim::dim
- runtime::half::f32_to_half_slice
- runtime::half::half_dot
- runtime::half::half_gemv
- runtime::half::half_to_f32_slice
- runtime::kernel::launch_kernel
- runtime::memory::allocate
- runtime::memory::copy
- runtime::memory::free
- runtime::multi_gpu::memcpy_peer
- runtime::occupancy::calculate_occupancy
- runtime::occupancy::suggest_block_size
- runtime::quantization::dequantize_int4
- runtime::quantization::dequantize_int8
- runtime::quantization::quantization_error
- runtime::quantization::quantize_int4
- runtime::quantization::quantize_int8
- runtime::quantization::quantized_gemm_int8
- runtime::set_kernel_context
- runtime::sync_threads
- runtime::thread::index
- runtime::warp_intrinsics::all_sync
- runtime::warp_intrinsics::any_sync
- runtime::warp_intrinsics::ballot_sync
- runtime::warp_intrinsics::clz
- runtime::warp_intrinsics::exclusive_scan_sync
- runtime::warp_intrinsics::ffs
- runtime::warp_intrinsics::inclusive_scan_sync
- runtime::warp_intrinsics::lanemask_gt
- runtime::warp_intrinsics::lanemask_le
- runtime::warp_intrinsics::lanemask_lt
- runtime::warp_intrinsics::match_all_sync
- runtime::warp_intrinsics::match_any_sync
- runtime::warp_intrinsics::popc
- runtime::warp_intrinsics::reduce_add_sync
- runtime::warp_intrinsics::reduce_and_sync
- runtime::warp_intrinsics::reduce_max_sync
- runtime::warp_intrinsics::reduce_min_sync
- runtime::warp_intrinsics::reduce_or_sync
- runtime::warp_intrinsics::reduce_xor_sync
- runtime::with_kernel_context
- simd::matrix_ops::matrix_multiply_f32
- simd::vector_ops::vector_add_f32
- simd::vector_ops::vector_dot_f32
- simd::vector_ops::vector_mul_f32
- simd::vector_ops::vector_reduce_sum_f32
- simd::vector_ops::vector_scale_f32
- utils::calculate_block_size
- utils::calculate_grid_size
- utils::round_up
Type Aliases
- error::Result
- memory::texture_memory::SharedTexture
- memory::unified_memory::SharedUnifiedMemory
- neural_integration::NeuralResult
- neural_integration::wasm_types::WasmBridgeConfig
- runtime::cuda_graph::NodeId