baracuda_runtime/init.rs
1//! Runtime-API initialization helpers.
2//!
3//! The CUDA Runtime API initializes lazily on first use (typically when you
4//! call `cudaSetDevice`), so there's no explicit `init()` you _must_ call.
5//! The helpers here exist for fail-fast setup: version queries, `cudaInitDevice`
6//! for pre-warming the primary context without making it current, etc.
7
8use baracuda_cuda_sys::runtime::runtime;
9use baracuda_types::CudaVersion;
10
11use crate::error::{check, Result};
12
13/// CUDA Runtime version linked via `libcudart`.
14pub fn runtime_version() -> Result<CudaVersion> {
15 let r = runtime()?;
16 let cu = r.cuda_runtime_get_version()?;
17 let mut raw: core::ffi::c_int = 0;
18 check(unsafe { cu(&mut raw) })?;
19 Ok(CudaVersion::from_raw(raw as u32))
20}
21
22/// CUDA driver version (latest supported by the installed `libcuda`).
23pub fn driver_version() -> Result<CudaVersion> {
24 let r = runtime()?;
25 let cu = r.cuda_driver_get_version()?;
26 let mut raw: core::ffi::c_int = 0;
27 check(unsafe { cu(&mut raw) })?;
28 Ok(CudaVersion::from_raw(raw as u32))
29}
30
31/// Block the calling host thread until all work on the current device has
32/// completed. Equivalent to `cudaDeviceSynchronize`.
33pub fn device_synchronize() -> Result<()> {
34 let r = runtime()?;
35 let cu = r.cuda_device_synchronize()?;
36 check(unsafe { cu() })
37}
38
39/// Retrieve and clear the per-thread "sticky" error from the runtime.
40///
41/// This is how the Runtime API reports async kernel failures — failed
42/// launches are latched into a thread-local slot that persists across
43/// unrelated calls until this function (or `cudaPeekAtLastError`) reads it.
44pub fn last_error() -> Result<()> {
45 let r = runtime()?;
46 let cu = r.cuda_get_last_error()?;
47 check(unsafe { cu() })
48}
49
50/// As [`last_error`] but doesn't clear the sticky slot.
51pub fn peek_last_error() -> Result<()> {
52 let r = runtime()?;
53 let cu = r.cuda_peek_at_last_error()?;
54 check(unsafe { cu() })
55}
56
57/// Set the process's device-level scheduling/map flags. Typically called
58/// before the first CUDA call on the current thread — the flags bind
59/// when the primary context is created. Passes are flags from
60/// [`baracuda_cuda_sys::runtime::types::cudaDeviceScheduleFlags`].
61pub fn set_device_flags(flags: u32) -> Result<()> {
62 let r = runtime()?;
63 let cu = r.cuda_set_device_flags()?;
64 check(unsafe { cu(flags) })
65}
66
67/// Query current device-scheduling flags.
68pub fn get_device_flags() -> Result<u32> {
69 let r = runtime()?;
70 let cu = r.cuda_get_device_flags()?;
71 let mut flags: core::ffi::c_uint = 0;
72 check(unsafe { cu(&mut flags) })?;
73 Ok(flags)
74}