pub struct KernelBuilder { /* private fields */ }Expand description
Main builder for CUDA kernel compilation.
Implementations§
Source§impl KernelBuilder
impl KernelBuilder
Sourcepub fn source_dir<P: AsRef<Path>>(self, dir: P) -> Self
pub fn source_dir<P: AsRef<Path>>(self, dir: P) -> Self
Add a directory to search for .cu files (recursive).
KernelBuilder::new().source_dir("src/kernels");Sourcepub fn source_files<I, P>(self, files: I) -> Self
pub fn source_files<I, P>(self, files: I) -> Self
Add specific kernel files.
KernelBuilder::new().source_files(["src/kernels/hello.cu", "src/kernels/world.cu"]);Sourcepub fn source_glob(self, pattern: &str) -> Self
pub fn source_glob(self, pattern: &str) -> Self
Add kernel files matching a glob pattern.
KernelBuilder::new().source_glob("src/**/*.cu");Sourcepub fn compute_cap(self, cap: usize) -> Self
pub fn compute_cap(self, cap: usize) -> Self
Set the default compute capability (numeric, auto-selects suffix for sm_90+).
Sourcepub fn compute_cap_arch(self, arch: &str) -> Self
pub fn compute_cap_arch(self, arch: &str) -> Self
Set the default compute capability with explicit arch string (e.g., "90a", "100a").
Sourcepub fn with_compute_override(self, pattern: &str, cap: usize) -> Self
pub fn with_compute_override(self, pattern: &str, cap: usize) -> Self
Set compute cap override for specific kernels (numeric).
Pattern can use wildcards: "sm90_*.cu", "*_hopper.cu".
KernelBuilder::new()
.source_glob("src/**/*.cu")
.with_compute_override("sm90_*.cu", 90) // Hopper kernels
.with_compute_override("sm80_*.cu", 80); // Ampere kernelsSourcepub fn with_compute_override_arch(self, pattern: &str, arch: &str) -> Self
pub fn with_compute_override_arch(self, pattern: &str, arch: &str) -> Self
Set compute cap override with explicit arch string.
Sourcepub fn get_compute_cap(&self) -> Option<usize>
pub fn get_compute_cap(&self) -> Option<usize>
Get the current default compute capability (base number only).
Sourcepub fn set_compute_cap(&mut self, cap: usize)
pub fn set_compute_cap(&mut self, cap: usize)
Set compute capability (mutable reference version).
Sourcepub fn require_explicit_compute_cap(self) -> Result<Self>
pub fn require_explicit_compute_cap(self) -> Result<Self>
Require explicit compute capability (fail fast if not set).
Use this for Docker builds or CI environments where nvidia-smi is
unavailable. The build fails immediately if CUDA_COMPUTE_CAP is not
set and no compute capability was explicitly configured.
// In a Docker build, fail at build time if CUDA_COMPUTE_CAP wasn't
// baked into the image:
KernelBuilder::new()
.require_explicit_compute_cap()?
.source_dir("src/kernels")
.build_lib("libkernels.a")?;Sourcepub fn with_cutlass(self, commit: Option<&str>) -> Self
pub fn with_cutlass(self, commit: Option<&str>) -> Self
Add CUTLASS dependency.
commit pins a specific CUTLASS commit hash. Pass None to use the
built-in default. When the consuming crate also depends on
baracuda-cutlass-sys, that crate’s pinned version wins automatically
via cargo’s links mechanism — forge then skips its own git fetch.
KernelBuilder::new()
.source_dir("src/kernels")
.with_cutlass(None)
.arg("-DUSE_CUTLASS")
.build_lib("libkernels.a")?;Sourcepub fn with_git_dependency(
self,
name: &str,
repo: &str,
commit: &str,
include_paths: Vec<&str>,
extra_paths: Vec<&str>,
recurse_submodules: bool,
) -> Self
pub fn with_git_dependency( self, name: &str, repo: &str, commit: &str, include_paths: Vec<&str>, extra_paths: Vec<&str>, recurse_submodules: bool, ) -> Self
Add a custom git dependency.
If recurse_submodules is false, clone/fetch adds --no-recurse-submodules.
Sourcepub fn fetch_git_dependency(&self, name: &str) -> Result<PathBuf>
pub fn fetch_git_dependency(&self, name: &str) -> Result<PathBuf>
Fetch a configured git dependency and return its checkout root.
Sourcepub fn include_path<P: Into<PathBuf>>(self, path: P) -> Self
pub fn include_path<P: Into<PathBuf>>(self, path: P) -> Self
Add a local include path.
Sourcepub fn thread_percentage(self, percentage: f32) -> Self
pub fn thread_percentage(self, percentage: f32) -> Self
Set the percentage of available threads to use (0.0 - 1.0).
Sourcepub fn max_threads(self, max: usize) -> Self
pub fn max_threads(self, max: usize) -> Self
Set the maximum number of threads.
Sourcepub fn nvcc_thread_patterns<S: AsRef<str>>(
self,
patterns: &[S],
num_nvcc_threads: usize,
) -> Self
pub fn nvcc_thread_patterns<S: AsRef<str>>( self, patterns: &[S], num_nvcc_threads: usize, ) -> Self
Set patterns for files that should use nvcc’s --threads=N flag.
Sourcepub fn no_incremental(self) -> Self
pub fn no_incremental(self) -> Self
Disable incremental builds.
Sourcepub fn cpp_std(self, standard: &str) -> Self
pub fn cpp_std(self, standard: &str) -> Self
Set the C++ standard passed to nvcc as -std=<standard>.
Pass values like "c++17", "c++20". When unset (the default), the
builder selects automatically from the detected toolkit version:
c++20 for CUDA >= 12.0, c++17 otherwise.
If your extra_args already contains a -std= argument, this method’s
value is ignored (your explicit -std= wins).
// Force c++17 even on CUDA 12+, e.g. for code that must compile
// against both 11.x and 12.x toolkits:
KernelBuilder::new().cpp_std("c++17");Sourcepub fn build_lib<P: Into<PathBuf>>(&self, out_file: P) -> Result<()>
pub fn build_lib<P: Into<PathBuf>>(&self, out_file: P) -> Result<()>
Build a static library from all kernel sources.
out_file is typically format!("{}/libkernels.a", env!("OUT_DIR")).
Pair with cargo:rustc-link-search and cargo:rustc-link-lib to wire
the library into the resulting Rust binary.
let out_dir = std::env::var("OUT_DIR").unwrap();
KernelBuilder::new()
.source_dir("src/kernels")
.arg("-O3")
.build_lib(format!("{out_dir}/libkernels.a"))
.unwrap();
println!("cargo:rustc-link-search={out_dir}");
println!("cargo:rustc-link-lib=kernels");Sourcepub fn build_ptx(&self) -> Result<PtxOutput>
pub fn build_ptx(&self) -> Result<PtxOutput>
Build PTX files from all kernel sources.
Each .cu source produces a <stem>.ptx text file in the configured
out_dir. The returned PtxOutput can write a Rust source file
that exposes each PTX as a pub const &str for runtime loading via
baracuda-driver’s Module::load_ptx.
let output = KernelBuilder::new()
.source_glob("src/**/*.cu")
.build_ptx()?;
output.write("src/kernels.rs")?;Trait Implementations§
Source§impl Debug for KernelBuilder
impl Debug for KernelBuilder
Auto Trait Implementations§
impl Freeze for KernelBuilder
impl RefUnwindSafe for KernelBuilder
impl Send for KernelBuilder
impl Sync for KernelBuilder
impl Unpin for KernelBuilder
impl UnsafeUnpin for KernelBuilder
impl UnwindSafe for KernelBuilder
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more