fil_rustacuda/context.rs
1//! # CUDA context management
2//!
3//! Most CUDA functions require a context. A CUDA context is analogous to a CPU process - it's
4//! an isolated container for all runtime state, including configuration settings and the
5//! device/unified/page-locked memory allocations. Each context has a separate memory space, and
6//! pointers from one context do not work in another. Each context is associated with a single
7//! device. Although it is possible to have multiple contexts associated with a single device, this
8//! is strongly discouraged as it can cause a significant loss of performance.
9//!
10//! CUDA keeps a thread-local stack of contexts which the programmer can push to or pop from.
11//! The top context in that stack is known as the "current" context and it is used in most CUDA
12//! API calls. One context can be safely made current in multiple CPU threads.
13//!
14//! # Safety
15//!
16//! The CUDA context management API does not fit easily into Rust's safety guarantees.
17//!
18//! The thread-local stack (as well as the fact that any context can be on the stack for any number
19//! of threads) means there is no clear owner for a CUDA context, but it still has to be cleaned up.
20//! Also, the fact that a context can be current to multiple threads at once means that there can be
21//! multiple implicit references to a context which are not controlled by Rust.
22//!
23//! RustaCUDA handles ownership by providing an owning [`Context`](struct.Context.html) struct and
24//! a non-owning [`UnownedContext`](struct.UnownedContext.html). When the `Context` is dropped, the
25//! backing context is destroyed. The context could be current on other threads, though. In this
26//! case, the context is still destroyed, and attempts to access the context on other threads will
27//! fail with an error. This is (mostly) safe, if a bit inconvenient. It's only mostly safe because
28//! other threads could be accessing that context while the destructor is running on this thread,
29//! which could result in undefined behavior.
30//!
31//! In short, Rust's thread-safety guarantees cannot fully protect use of the context management
32//! functions. The programmer must ensure that no other OS threads are using the `Context` when it
33//! is dropped.
34//!
35//! # Examples
36//!
37//! For most commmon uses (one device, one OS thread) it should suffice to create a single context:
38//!
39//! ```
40//! use rustacuda::device::Device;
41//! use rustacuda::context::{Context, ContextFlags};
42//! # use std::error::Error;
43//! # fn main () -> Result<(), Box<dyn Error>> {
44//!
45//! rustacuda::init(rustacuda::CudaFlags::empty())?;
46//! let device = Device::get_device(0)?;
47//! let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
48//! // call RustaCUDA functions which use the context
49//!
50//! // The context will be destroyed when dropped or it falls out of scope.
51//! drop(context);
52//! # Ok(())
53//! # }
54//! ```
55//!
56//! If you have multiple OS threads that each submit work to the same device, you can get a handle
57//! to the single context and pass it to each thread.
58//!
59//! ```
60//! # use rustacuda::context::{Context, ContextFlags, CurrentContext};
61//! # use rustacuda::device::Device;
62//! # use std::error::Error;
63//! # fn main() -> Result<(), Box<dyn Error>> {
64//! # rustacuda::init(rustacuda::CudaFlags::empty())?;
65//! # let device = Device::get_device(0)?;
66//! // As before
67//! let context =
68//! Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
69//! let mut join_handles = vec![];
70//!
71//! for _ in 0..4 {
72//! let unowned = context.get_unowned();
73//! let join_handle = std::thread::spawn(move || {
74//! CurrentContext::set_current(&unowned).unwrap();
75//! // Call RustaCUDA functions which use the context
76//! });
77//! join_handles.push(join_handle);
78//! }
79//! // We must ensure that the other threads are not using the context when it's destroyed.
80//! for handle in join_handles {
81//! handle.join().unwrap();
82//! }
83//! // Now it's safe to drop the context.
84//! drop(context);
85//! # Ok(())
86//! # }
87//! ```
88//!
89//! If you have multiple devices, each device needs its own context.
90//!
91//! ```
92//! # use rustacuda::device::Device;
93//! # use rustacuda::context::{Context, ContextStack, ContextFlags, CurrentContext};
94//! # use std::error::Error;
95//! #
96//! # fn main() -> Result<(), Box<dyn Error>> {
97//! # rustacuda::init(rustacuda::CudaFlags::empty())?;
98//! // Create and pop contexts for each device
99//! let mut contexts = vec![];
100//! for device in Device::devices()? {
101//! let device = device?;
102//! let ctx =
103//! Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
104//! ContextStack::pop()?;
105//! contexts.push(ctx);
106//! }
107//! CurrentContext::set_current(&contexts[0])?;
108//!
109//! // Call RustaCUDA functions which will use the context
110//!
111//! # Ok(())
112//! # }
113//! ```
114
115use crate::device::Device;
116use crate::error::{CudaResult, DropResult, ToResult};
117use crate::private::Sealed;
118use crate::CudaApiVersion;
119use cuda_driver_sys::CUcontext;
120use std::mem;
121use std::mem::transmute;
122use std::ptr;
123
124/// This enumeration represents configuration settings for devices which share hardware resources
125/// between L1 cache and shared memory.
126///
127/// Note that this is only a preference - the driver will use the requested configuration if
128/// possible, but it is free to choose a different configuration if required to execute functions.
129///
130/// See
131/// [CurrentContext::get_cache_config](struct.CurrentContext.html#method.get_cache_config) and
132/// [CurrentContext::set_cache_config](struct.CurrentContext.html#method.set_cache_config) to get
133/// and set the cache config for the current context.
134#[repr(u32)]
135#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
136pub enum CacheConfig {
137 /// No preference for shared memory or L1 (default)
138 PreferNone = 0,
139 /// Prefer larger shared memory and smaller L1 cache
140 PreferShared = 1,
141 /// Prefer larger L1 cache and smaller shared memory
142 PreferL1 = 2,
143 /// Prefer equal-sized L1 cache and shared memory
144 PreferEqual = 3,
145
146 #[doc(hidden)]
147 __Nonexhaustive,
148}
149
150/// This enumeration represents the limited resources which can be accessed through
151/// [CurrentContext::get_resource_limit](struct.CurrentContext.html#method.get_resource_limit) and
152/// [CurrentContext::set_resource_limit](struct.CurrentContext.html#method.set_resource_limit).
153#[repr(u32)]
154#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
155pub enum ResourceLimit {
156 /// The size in bytes of each GPU thread stack
157 StackSize = 0,
158 /// The size in bytes of the FIFO used by the `printf()` device system call.
159 PrintfFifoSize = 1,
160 /// The size in bytes of the heap used by the `malloc()` and `free()` device system calls.
161 ///
162 /// Note that this is used for memory allocated within a kernel launch; it is not related to the
163 /// device memory allocated by the host.
164 MallocHeapSize = 2,
165 /// The maximum nesting depth of a grid at which a thread can safely call
166 /// `cudaDeviceSynchronize()` to wait on child grid launches to complete.
167 DeviceRuntimeSynchronizeDepth = 3,
168 /// The maximum number of outstanding device runtime launches that can be made from the current
169 /// context.
170 DeviceRuntimePendingLaunchCount = 4,
171 /// L2 cache fetch granularity
172 MaxL2FetchGranularity = 5,
173
174 #[doc(hidden)]
175 __Nonexhaustive,
176}
177
178/// This enumeration represents the options for configuring the shared memory bank size.
179///
180/// See
181/// [CurrentContext::get_shared_memory_config](struct.CurrentContext.html#method.get_shared_memory_config) and
182/// [CurrentContext::set_shared_memory_config](struct.CurrentContext.html#method.set_shared_memory_config) to get
183/// and set the cache config for the current context.
184#[repr(u32)]
185#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
186pub enum SharedMemoryConfig {
187 /// Set shared-memory bank size to the default.
188 DefaultBankSize = 0,
189 /// Set shared-memory bank width to four bytes
190 FourByteBankSize = 1,
191 /// Set shared-memory bank width to eight bytes
192 EightByteBankSize = 2,
193
194 #[doc(hidden)]
195 __Nonexhaustive,
196}
197
198bitflags! {
199 /// Bit flags for initializing the CUDA context.
200 ///
201 /// If you're not sure which flags to use, `MAP_HOST | SCHED_AUTO` is a good default.
202 pub struct ContextFlags: u32 {
203 /// Instructs CUDA to actively spin when waiting for results from the GPU. This can decrease
204 /// latency when waiting for the GPU, but may lower the performance of other CPU threads
205 /// if they are performing work in parallel with the CUDA thread.
206 const SCHED_SPIN = 0x01;
207
208 /// Instructs CUDA to yield its thread when waiting for results from the GPU. This can
209 /// increase latency when waiting for the GPU, but can increase the performance of CPU
210 /// threads performing work in parallel with the GPU.
211 const SCHED_YIELD = 0x02;
212
213 /// Instructs CUDA to block the CPU thread on a synchronization primitive when waiting for
214 /// the GPU to finish work.
215 const SCHED_BLOCKING_SYNC = 0x04;
216
217 /// Instructs CUDA to automatically choose whether to yield to other OS threads while waiting
218 /// for the GPU, or to spin the OS thread. This is the default.
219 const SCHED_AUTO = 0x00;
220
221 /// Instructs CUDA to support mapped pinned allocations. This flag must be set in order to
222 /// use page-locked memory (see [LockedBuffer](../memory/struct.LockedBuffer.html])).
223 const MAP_HOST = 0x08;
224
225 /// Instruct CUDA not to reduce local memory after resizing local memory for a kernel. This
226 /// can prevent thrashing by local memory allocations when launching many kernels with high
227 /// local memory usage at the cost of potentially increased memory usage.
228 const LMEM_RESIZE_TO_MAX = 0x10;
229 }
230}
231
232/// Owned handle to a CUDA context.
233///
234/// The context will be destroyed when this goes out of scope. If this is the current context on
235/// the current OS thread, the next context on the stack (if any) will be made current. Note that
236/// the context will be destroyed even if other threads are still using it. Attempts to access the
237/// destroyed context from another thread will return an error.
238#[derive(Debug)]
239pub struct Context {
240 inner: CUcontext,
241}
242impl Context {
243 /// Create a CUDA context for the given device.
244 ///
245 /// # Example
246 ///
247 /// ```
248 /// # use rustacuda::device::Device;
249 /// # use rustacuda::context::{Context, ContextFlags};
250 /// # use std::error::Error;
251 /// #
252 /// # fn main () -> Result<(), Box<dyn Error>> {
253 /// rustacuda::init(rustacuda::CudaFlags::empty())?;
254 /// let device = Device::get_device(0)?;
255 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
256 /// # Ok(())
257 /// # }
258 /// ```
259 pub fn create_and_push(flags: ContextFlags, device: Device) -> CudaResult<Context> {
260 unsafe {
261 // CUDA only provides a create-and-push operation, but that makes it hard to provide
262 // lifetime guarantees so we create-and-push, then pop, then the programmer has to
263 // push again.
264 let mut ctx: CUcontext = ptr::null_mut();
265 cuda_driver_sys::cuCtxCreate_v2(
266 &mut ctx as *mut CUcontext,
267 flags.bits(),
268 device.into_inner(),
269 )
270 .to_result()?;
271 Ok(Context { inner: ctx })
272 }
273 }
274
275 /// Get the API version used to create this context.
276 ///
277 /// This is not necessarily the latest version supported by the driver.
278 ///
279 /// # Example
280 ///
281 /// ```
282 /// # use rustacuda::device::Device;
283 /// # use rustacuda::context::{Context, ContextFlags};
284 /// # use std::error::Error;
285 /// #
286 /// # fn main () -> Result<(), Box<dyn Error>> {
287 /// rustacuda::init(rustacuda::CudaFlags::empty())?;
288 /// let device = Device::get_device(0)?;
289 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
290 /// let version = context.get_api_version()?;
291 /// # Ok(())
292 /// # }
293 /// ```
294 pub fn get_api_version(&self) -> CudaResult<CudaApiVersion> {
295 unsafe {
296 let mut api_version = 0u32;
297 cuda_driver_sys::cuCtxGetApiVersion(self.inner, &mut api_version as *mut u32)
298 .to_result()?;
299 Ok(CudaApiVersion {
300 version: api_version as i32,
301 })
302 }
303 }
304
305 /// Returns an non-owning handle to this context.
306 ///
307 /// This is useful for sharing a single context between threads (though see the module-level
308 /// documentation for safety details!).
309 ///
310 /// # Example
311 ////* */
312 /// ```
313 /// # use rustacuda::device::Device;
314 /// # use rustacuda::context::{Context, ContextFlags};
315 /// # use std::error::Error;
316 /// #
317 /// # fn main() -> Result<(), Box<dyn Error>> {
318 /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
319 /// # let device = Device::get_device(0)?;
320 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
321 /// let unowned = context.get_unowned();
322 /// # Ok(())
323 /// # }
324 /// ```
325 pub fn get_unowned(&self) -> UnownedContext {
326 UnownedContext { inner: self.inner }
327 }
328
329 /// Destroy a `Context`, returning an error.
330 ///
331 /// Destroying a context can return errors from previous asynchronous work. This function
332 /// destroys the given context and returns the error and the un-destroyed context on failure.
333 ///
334 /// # Example
335 ///
336 /// ```
337 /// # use rustacuda::device::Device;
338 /// # use rustacuda::context::{Context, ContextFlags};
339 /// # use std::error::Error;
340 /// #
341 /// # fn main () -> Result<(), Box<dyn Error>> {
342 /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
343 /// # let device = Device::get_device(0)?;
344 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
345 /// match Context::drop(context) {
346 /// Ok(()) => println!("Successfully destroyed"),
347 /// Err((e, ctx)) => {
348 /// println!("Failed to destroy context: {:?}", e);
349 /// // Do something with ctx
350 /// },
351 /// }
352 /// # Ok(())
353 /// # }
354 /// ```
355 pub fn drop(mut ctx: Context) -> DropResult<Context> {
356 if ctx.inner.is_null() {
357 return Ok(());
358 }
359
360 unsafe {
361 let inner = mem::replace(&mut ctx.inner, ptr::null_mut());
362 match cuda_driver_sys::cuCtxDestroy_v2(inner).to_result() {
363 Ok(()) => {
364 mem::forget(ctx);
365 Ok(())
366 }
367 Err(e) => Err((e, Context { inner })),
368 }
369 }
370 }
371}
372impl Drop for Context {
373 fn drop(&mut self) {
374 if self.inner.is_null() {
375 return;
376 }
377
378 unsafe {
379 let inner = mem::replace(&mut self.inner, ptr::null_mut());
380 // No choice but to panic here.
381 cuda_driver_sys::cuCtxDestroy_v2(inner)
382 .to_result()
383 .expect("Failed to destroy context");
384 }
385 }
386}
387
388/// Sealed trait for `Context` and `UnownedContext`. Not intended for use outside of RustaCUDA.
389pub trait ContextHandle: Sealed {
390 #[doc(hidden)]
391 fn get_inner(&self) -> CUcontext;
392}
393impl Sealed for Context {}
394impl ContextHandle for Context {
395 fn get_inner(&self) -> CUcontext {
396 self.inner
397 }
398}
399impl Sealed for UnownedContext {}
400impl ContextHandle for UnownedContext {
401 fn get_inner(&self) -> CUcontext {
402 self.inner
403 }
404}
405
406/// Non-owning handle to a CUDA context.
407#[derive(Debug, Clone)]
408pub struct UnownedContext {
409 inner: CUcontext,
410}
411unsafe impl Send for UnownedContext {}
412unsafe impl Sync for UnownedContext {}
413impl UnownedContext {
414 /// Get the API version used to create this context.
415 ///
416 /// This is not necessarily the latest version supported by the driver.
417 ///
418 /// # Example
419 ///
420 /// ```
421 /// # use rustacuda::device::Device;
422 /// # use rustacuda::context::{Context, ContextFlags};
423 /// # use std::error::Error;
424 /// #
425 /// # fn main () -> Result<(), Box<dyn Error>> {
426 /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
427 /// # let device = Device::get_device(0)?;
428 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
429 /// let unowned = context.get_unowned();
430 /// let version = unowned.get_api_version()?;
431 /// #
432 /// # Ok(())
433 /// # }
434 /// ```
435 pub fn get_api_version(&self) -> CudaResult<CudaApiVersion> {
436 unsafe {
437 let mut api_version = 0u32;
438 cuda_driver_sys::cuCtxGetApiVersion(self.inner, &mut api_version as *mut u32)
439 .to_result()?;
440 Ok(CudaApiVersion {
441 version: api_version as i32,
442 })
443 }
444 }
445}
446
447/// Type used to represent the thread-local context stack.
448#[derive(Debug)]
449pub struct ContextStack;
450impl ContextStack {
451 /// Pop the current context off the stack and return the handle. That context may then be made
452 /// current again (perhaps on a different CPU thread) by calling [push](#method.push).
453 ///
454 /// # Example
455 ///
456 /// ```
457 /// # use rustacuda::device::Device;
458 /// # use rustacuda::context::{Context, ContextFlags, ContextStack};
459 /// # use std::error::Error;
460 /// #
461 /// # fn main () -> Result<(), Box<dyn Error>> {
462 /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
463 /// # let device = Device::get_device(0)?;
464 /// # let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
465 /// let unowned = ContextStack::pop()?;
466 /// #
467 /// # Ok(())
468 /// # }
469 /// ```
470 pub fn pop() -> CudaResult<UnownedContext> {
471 unsafe {
472 let mut ctx: CUcontext = ptr::null_mut();
473 cuda_driver_sys::cuCtxPopCurrent_v2(&mut ctx as *mut CUcontext).to_result()?;
474 Ok(UnownedContext { inner: ctx })
475 }
476 }
477
478 /// Push the given context to the top of the stack
479 ///
480 /// # Example
481 ///
482 /// ```
483 /// # use rustacuda::device::Device;
484 /// # use rustacuda::context::{Context, ContextFlags, ContextStack};
485 /// # use std::error::Error;
486 /// #
487 /// # fn main () -> Result<(), Box<dyn Error>> {
488 /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
489 /// # let device = Device::get_device(0)?;
490 /// # let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
491 /// let unowned = ContextStack::pop()?;
492 /// ContextStack::push(&unowned)?;
493 /// # Ok(())
494 /// # }
495 /// ```
496 pub fn push<C: ContextHandle>(ctx: &C) -> CudaResult<()> {
497 unsafe {
498 cuda_driver_sys::cuCtxPushCurrent_v2(ctx.get_inner()).to_result()?;
499 Ok(())
500 }
501 }
502}
503
504/// Struct representing a range of stream priorities.
505///
506/// By convention, lower numbers imply greater priorities. The range of meaningful stream priorities
507/// is given by `[greatest, least]` - that is (numerically), `greatest <= least`.
508#[derive(Debug, Clone, Hash, Eq, PartialEq)]
509pub struct StreamPriorityRange {
510 /// The least stream priority
511 pub least: i32,
512 /// The greatest stream priority
513 pub greatest: i32,
514}
515
516/// Type representing the top context in the thread-local stack.
517#[derive(Debug)]
518pub struct CurrentContext;
519impl CurrentContext {
520 /// Returns the preferred cache configuration for the current context.
521 ///
522 /// On devices where the L1 cache and shared memory use the same hardware resources, this
523 /// function returns the preferred cache configuration for the current context. For devices
524 /// where the size of the L1 cache and shared memory are fixed, this will always return
525 /// `CacheConfig::PreferNone`.
526 ///
527 /// # Example
528 ///
529 /// ```
530 /// # use rustacuda::device::Device;
531 /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext };
532 /// # use std::error::Error;
533 /// #
534 /// # fn main () -> Result<(), Box<dyn Error>> {
535 /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
536 /// # let device = Device::get_device(0)?;
537 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
538 /// let cache_config = CurrentContext::get_cache_config()?;
539 /// # Ok(())
540 /// # }
541 /// ```
542 pub fn get_cache_config() -> CudaResult<CacheConfig> {
543 unsafe {
544 let mut config = CacheConfig::PreferNone;
545 cuda_driver_sys::cuCtxGetCacheConfig(
546 &mut config as *mut CacheConfig as *mut cuda_driver_sys::CUfunc_cache,
547 )
548 .to_result()?;
549 Ok(config)
550 }
551 }
552
553 /// Return the device ID for the current context.
554 ///
555 /// # Example
556 ///
557 /// ```
558 /// # use rustacuda::device::Device;
559 /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext };
560 /// # use std::error::Error;
561 /// #
562 /// # fn main () -> Result<(), Box<dyn Error>> {
563 /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
564 /// # let device = Device::get_device(0)?;
565 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
566 /// let device = CurrentContext::get_device()?;
567 /// # Ok(())
568 /// # }
569 /// ```
570 pub fn get_device() -> CudaResult<Device> {
571 unsafe {
572 let mut device = Device { device: 0 };
573 cuda_driver_sys::cuCtxGetDevice(&mut device.device as *mut cuda_driver_sys::CUdevice)
574 .to_result()?;
575 Ok(device)
576 }
577 }
578
579 /// Return the context flags for the current context.
580 ///
581 /// # Example
582 ///
583 /// ```
584 /// # use rustacuda::device::Device;
585 /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext };
586 /// # use std::error::Error;
587 /// #
588 /// # fn main () -> Result<(), Box<dyn Error>> {
589 /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
590 /// # let device = Device::get_device(0)?;
591 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
592 /// let flags = CurrentContext::get_flags()?;
593 /// # Ok(())
594 /// # }
595 /// ```
596 pub fn get_flags() -> CudaResult<ContextFlags> {
597 unsafe {
598 let mut flags = 0u32;
599 cuda_driver_sys::cuCtxGetFlags(&mut flags as *mut u32).to_result()?;
600 Ok(ContextFlags::from_bits_truncate(flags))
601 }
602 }
603
604 /// Return resource limits for the current context.
605 ///
606 /// # Example
607 ///
608 /// ```
609 /// # use rustacuda::device::Device;
610 /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext, ResourceLimit };
611 /// # use std::error::Error;
612 /// #
613 /// # fn main () -> Result<(), Box<dyn Error>> {
614 /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
615 /// # let device = Device::get_device(0)?;
616 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
617 /// let stack_size = CurrentContext::get_resource_limit(ResourceLimit::StackSize)?;
618 /// # Ok(())
619 /// # }
620 /// ```
621 pub fn get_resource_limit(resource: ResourceLimit) -> CudaResult<usize> {
622 unsafe {
623 let mut limit: usize = 0;
624 cuda_driver_sys::cuCtxGetLimit(&mut limit as *mut usize, transmute(resource))
625 .to_result()?;
626 Ok(limit)
627 }
628 }
629
630 /// Return resource limits for the current context.
631 ///
632 /// # Example
633 ///
634 /// ```
635 /// # use rustacuda::device::Device;
636 /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext, ResourceLimit };
637 /// # use std::error::Error;
638 /// #
639 /// # fn main () -> Result<(), Box<dyn Error>> {
640 /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
641 /// # let device = Device::get_device(0)?;
642 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
643 /// let shared_mem_config = CurrentContext::get_shared_memory_config()?;
644 /// # Ok(())
645 /// # }
646 /// ```
647 pub fn get_shared_memory_config() -> CudaResult<SharedMemoryConfig> {
648 unsafe {
649 let mut cfg = SharedMemoryConfig::DefaultBankSize;
650 cuda_driver_sys::cuCtxGetSharedMemConfig(
651 &mut cfg as *mut SharedMemoryConfig as *mut cuda_driver_sys::CUsharedconfig,
652 )
653 .to_result()?;
654 Ok(cfg)
655 }
656 }
657
658 /// Return the least and greatest stream priorities.
659 ///
660 /// If the program attempts to create a stream with a priority outside of this range, it will be
661 /// automatically clamped to within the valid range. If the device does not support stream
662 /// priorities, the returned range will contain zeroes.
663 ///
664 /// # Example
665 ///
666 /// ```
667 /// # use rustacuda::device::Device;
668 /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext};
669 /// # use std::error::Error;
670 /// #
671 /// # fn main () -> Result<(), Box<dyn Error>> {
672 /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
673 /// # let device = Device::get_device(0)?;
674 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
675 /// let priority_range = CurrentContext::get_stream_priority_range()?;
676 /// # Ok(())
677 /// # }
678 /// ```
679 pub fn get_stream_priority_range() -> CudaResult<StreamPriorityRange> {
680 unsafe {
681 let mut range = StreamPriorityRange {
682 least: 0,
683 greatest: 0,
684 };
685 cuda_driver_sys::cuCtxGetStreamPriorityRange(
686 &mut range.least as *mut i32,
687 &mut range.greatest as *mut i32,
688 )
689 .to_result()?;
690 Ok(range)
691 }
692 }
693
694 /// Sets the preferred cache configuration for the current context.
695 ///
696 /// On devices where L1 cache and shared memory use the same hardware resources, this sets the
697 /// preferred cache configuration for the current context. This is only a preference. The
698 /// driver will use the requested configuration if possible, but is free to choose a different
699 /// configuration if required to execute the function.
700 ///
701 /// This setting does nothing on devices where the size of the L1 cache and shared memory are
702 /// fixed.
703 ///
704 /// # Example
705 ///
706 /// ```
707 /// # use rustacuda::device::Device;
708 /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext, CacheConfig };
709 /// # use std::error::Error;
710 /// #
711 /// # fn main () -> Result<(), Box<dyn Error>> {
712 /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
713 /// # let device = Device::get_device(0)?;
714 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
715 /// CurrentContext::set_cache_config(CacheConfig::PreferL1)?;
716 /// # Ok(())
717 /// # }
718 /// ```
719 pub fn set_cache_config(cfg: CacheConfig) -> CudaResult<()> {
720 unsafe { cuda_driver_sys::cuCtxSetCacheConfig(transmute(cfg)).to_result() }
721 }
722
723 /// Sets a requested resource limit for the current context.
724 ///
725 /// Note that this is only a request; the driver is free to modify the requested value to meet
726 /// hardware requirements. Each limit has some specific restrictions.
727 ///
728 /// * `StackSize`: Controls the stack size in bytes for each GPU thread
729 /// * `PrintfFifoSize`: Controls the size in bytes of the FIFO used by the `printf()` device
730 /// system call. This cannot be changed after a kernel has been launched which uses the
731 /// `printf()` function.
732 /// * `MallocHeapSize`: Controls the size in bytes of the heap used by the `malloc()` and `free()`
733 /// device system calls. This cannot be changed aftr a kernel has been launched which uses the
734 /// `malloc()` and `free()` system calls.
735 /// * `DeviceRuntimeSyncDepth`: Controls the maximum nesting depth of a grid at which a thread
736 /// can safely call `cudaDeviceSynchronize()`. This cannot be changed after a kernel has been
737 /// launched which uses the device runtime. When setting this limit, keep in mind that
738 /// additional levels of sync depth require the driver to reserve large amounts of device
739 /// memory which can no longer be used for device allocations.
740 /// * `DeviceRuntimePendingLaunchCount`: Controls the maximum number of outstanding device
741 /// runtime launches that can be made from the current context. A grid is outstanding from
742 /// the point of the launch up until the grid is known to have completed. Keep in mind that
743 /// increasing this limit will require the driver to reserve larger amounts of device memory
744 /// which can no longer be used for device allocations.
745 /// * `MaxL2FetchGranularity`: Controls the L2 fetch granularity. This is purely a performance
746 /// hint and it can be ignored or clamped depending on the platform.
747 ///
748 /// # Example
749 ///
750 /// ```
751 /// # use rustacuda::device::Device;
752 /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext, ResourceLimit };
753 /// # use std::error::Error;
754 /// #
755 /// # fn main () -> Result<(), Box<dyn Error>> {
756 /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
757 /// # let device = Device::get_device(0)?;
758 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
759 /// CurrentContext::set_resource_limit(ResourceLimit::StackSize, 2048)?;
760 /// # Ok(())
761 /// # }
762 /// ```
763 pub fn set_resource_limit(resource: ResourceLimit, limit: usize) -> CudaResult<()> {
764 unsafe {
765 cuda_driver_sys::cuCtxSetLimit(transmute(resource), limit).to_result()?;
766 Ok(())
767 }
768 }
769
770 /// Sets the preferred shared memory configuration for the current context.
771 ///
772 /// On devices with configurable shared memory banks, this function will set the context's
773 /// shared memory bank size which is used for subsequent kernel launches.
774 ///
775 /// # Example
776 ///
777 /// ```
778 /// # use rustacuda::device::Device;
779 /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext, SharedMemoryConfig };
780 /// # use std::error::Error;
781 /// #
782 /// # fn main () -> Result<(), Box<dyn Error>> {
783 /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
784 /// # let device = Device::get_device(0)?;
785 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
786 /// CurrentContext::set_shared_memory_config(SharedMemoryConfig::DefaultBankSize)?;
787 /// # Ok(())
788 /// # }
789 /// ```
790 pub fn set_shared_memory_config(cfg: SharedMemoryConfig) -> CudaResult<()> {
791 unsafe { cuda_driver_sys::cuCtxSetSharedMemConfig(transmute(cfg)).to_result() }
792 }
793
794 /// Returns a non-owning handle to the current context.
795 ///
796 /// # Example
797 ///
798 /// ```
799 /// # use rustacuda::device::Device;
800 /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext };
801 /// # use std::error::Error;
802 /// #
803 /// # fn main () -> Result<(), Box<dyn Error>> {
804 /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
805 /// # let device = Device::get_device(0)?;
806 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
807 /// let unowned = CurrentContext::get_current()?;
808 /// # Ok(())
809 /// # }
810 /// ```
811 pub fn get_current() -> CudaResult<UnownedContext> {
812 unsafe {
813 let mut ctx: CUcontext = ptr::null_mut();
814 cuda_driver_sys::cuCtxGetCurrent(&mut ctx as *mut CUcontext).to_result()?;
815 Ok(UnownedContext { inner: ctx })
816 }
817 }
818
819 /// Set the given context as the current context for this thread.
820 ///
821 /// If there is no context set for this thread, this pushes the given context onto the stack.
822 /// If there is a context set for this thread, this replaces the top context on the stack with
823 /// the given context.
824 ///
825 /// # Example
826 ///
827 /// ```
828 /// # use rustacuda::device::Device;
829 /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext };
830 /// # use std::error::Error;
831 /// #
832 /// # fn main () -> Result<(), Box<dyn Error>> {
833 /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
834 /// # let device = Device::get_device(0)?;
835 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
836 /// CurrentContext::set_current(&context)?;
837 /// # Ok(())
838 /// # }
839 /// ```
840 pub fn set_current<C: ContextHandle>(c: &C) -> CudaResult<()> {
841 unsafe {
842 cuda_driver_sys::cuCtxSetCurrent(c.get_inner()).to_result()?;
843 Ok(())
844 }
845 }
846
847 /// Block to wait for a context's tasks to complete.
848 pub fn synchronize() -> CudaResult<()> {
849 unsafe {
850 cuda_driver_sys::cuCtxSynchronize().to_result()?;
851 Ok(())
852 }
853 }
854}