1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
/*
* SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
///! Pre-built optimized GPU kernels.
///!
///! This module provides a collection of commonly used GPU kernels that are optimized
///! and ready to use. These kernels cover essential operations like tensor creation,
///! type conversion, and linear algebra.
///!
///! ## Available Kernel Modules
///!
///! - [`linalg`] - Linear algebra operations (GEMM, matrix-vector multiplication)
///! - [`creation`] - Tensor creation and initialization (`full`, `arange`)
///! - [`conversion`] - Type conversion operations between tensor element types
///!
///! ## Usage
///!
///! All kernels follow the same usage pattern with `_apply` launcher functions:
///!
///! ```rust,ignore
///! use cutile::api;
///! use cutile::kernels::creation::full_apply;
///!
///! // Create a tensor filled with a value
///! let val = 42.0f32;
///! let tensor = api::zeros([1024]).partition([128]);
///! let result = value((val, tensor))
///! .apply(full_apply)
///! .unpartition()
///! .await;
///! ```
pub use conversion;
pub use creation;
pub use linalg;